Fix incorrect zero-extension assumptions in x86_64 asm

author Henrik Gramner <hengar-6@student.ltu.se>

Wed, 1 Feb 2012 22:52:48 +0000 (23:52 +0100)

committer Fiona Glaser <fiona@x264.com>

Tue, 6 Mar 2012 18:37:53 +0000 (10:37 -0800)
author Henrik Gramner <hengar-6@student.ltu.se>
Wed, 1 Feb 2012 22:52:48 +0000 (23:52 +0100)
committer Fiona Glaser <fiona@x264.com>
Tue, 6 Mar 2012 18:37:53 +0000 (10:37 -0800)
diff --git a/common/arm/mc-a.S b/common/arm/mc-a.S

index 8ed0a2279ce2e7bd96eae0030c0f975081d65fd3..507bbba158faaa61cf46df3a4341d0748ce6140b 100644 (file)
--- a/common/arm/mc-a.S
+++ b/common/arm/mc-a.S
@@ -32,7 +32,7 @@
  // note: prefetch stuff assumes 64-byte cacheline, true for the Cortex-A8
  // They also use nothing above armv5te, but we don't care about pre-armv6
  
-// void prefetch_ref( uint8_t *pix, int stride, int parity )
+// void prefetch_ref( uint8_t *pix, intptr_t stride, int parity )
  function x264_prefetch_ref_arm
      sub         r2, r2, #1
      add         r0, r0, #64
@@ -51,8 +51,8 @@ function x264_prefetch_ref_arm
      bx          lr
  .endfunc
  
-// void prefetch_fenc( uint8_t *pix_y, int stride_y,
-//                     uint8_t *pix_uv, int stride_uv, int mb_x )
+// void prefetch_fenc( uint8_t *pix_y,  intptr_t stride_y,
+//                     uint8_t *pix_uv, intptr_t stride_uv, int mb_x )
  function x264_prefetch_fenc_arm
      ldr         ip, [sp]
      push        {lr}
@@ -78,7 +78,7 @@ function x264_prefetch_fenc_arm
  .endfunc
  
  
-// void *x264_memcpy_aligned( void * dst, const void * src, size_t n )
+// void *x264_memcpy_aligned( void *dst, const void *src, size_t n )
  function x264_memcpy_aligned_neon
      orr         r3,  r0,  r1,  lsr #1
      movrel      ip,  memcpy_table
@@ -158,9 +158,9 @@ memzero_loop:
  .endfunc
  
  
-// void pixel_avg( uint8_t *dst, int dst_stride,
-//                 uint8_t *src1, int src1_stride,
-//                 uint8_t *src2, int src2_stride, int weight );
+// void pixel_avg( uint8_t *dst,  intptr_t dst_stride,
+//                 uint8_t *src1, intptr_t src1_stride,
+//                 uint8_t *src2, intptr_t src2_stride, int weight );
  .macro AVGH w h
  function x264_pixel_avg_\w\()x\h\()_neon
      ldr         ip, [sp, #8]
@@ -455,7 +455,7 @@ avg2_w20_loop:
  .endif
  .endm
  
-// void mc_weight( uint8_t *src, int src_stride, uint8_t *dst, int dst_stride,
+// void mc_weight( uint8_t *src, intptr_t src_stride, uint8_t *dst, intptr_t dst_stride,
  //                 const x264_weight_t *weight, int height )
  function x264_mc_weight_w20_neon
      weight_prologue full
@@ -744,7 +744,7 @@ weight_simple offsetadd, vqadd.u8
  weight_simple offsetsub, vqsub.u8
  
  
-// void mc_copy( uint8_t *dst, int dst_stride, uint8_t *src, int src_stride, int height )
+// void mc_copy( uint8_t *dst, intptr_t dst_stride, uint8_t *src, intptr_t src_stride, int height )
  function x264_mc_copy_w4_neon
      ldr         ip,  [sp]
  copy_w4_loop:
@@ -810,8 +810,8 @@ copy_w16_aligned_loop:
  .endfunc
  
  
-// void x264_mc_chroma_neon( uint8_t *dst, int i_dst_stride,
-//                           uint8_t *src, int i_src_stride,
+// void x264_mc_chroma_neon( uint8_t *dst, intptr_t i_dst_stride,
+//                           uint8_t *src, intptr_t i_src_stride,
  //                           int dx, int dy, int i_width, int i_height );
  function x264_mc_chroma_neon
      push            {r4-r6, lr}
@@ -1052,7 +1052,7 @@ mc_chroma_w8:
  .endfunc
  
  
-// hpel_filter_v( uint8_t *dst, uint8_t *src, int16_t *buf, int stride, int width)
+// hpel_filter_v( uint8_t *dst, uint8_t *src, int16_t *buf, intptr_t stride, int width )
  function x264_hpel_filter_v_neon
      ldr             ip,  [sp]
      sub             r1,  r1,  r3,  lsl #1
@@ -1266,7 +1266,7 @@ filter_h_loop:
  
  
  // frame_init_lowres_core( uint8_t *src0, uint8_t *dst0, uint8_t *dsth, uint8_t *dstv,
-//                         uint8_t *dstc, int src_stride, int dst_stride, int width,
+//                         uint8_t *dstc, intptr_t src_stride, intptr_t dst_stride, int width,
  //                         int height )
  function x264_frame_init_lowres_core_neon
      push            {r4-r10,lr}
diff --git a/common/arm/mc-c.c b/common/arm/mc-c.c

index de4aec4e8f68249534876e4cc1321ce0d4928f85..1148ae76f6284fd0516876e5c7cf0f444d5472a0 100644 (file)
--- a/common/arm/mc-c.c
+++ b/common/arm/mc-c.c
@@ -26,33 +26,33 @@
  #include "common/common.h"
  #include "mc.h"
  
-void x264_prefetch_ref_arm( uint8_t *, int, int );
-void x264_prefetch_fenc_arm( uint8_t *, int, uint8_t *, int, int );
-
-void *x264_memcpy_aligned_neon( void * dst, const void * src, size_t n );
-void x264_memzero_aligned_neon( void *dst, int n );
-
-void x264_pixel_avg_16x16_neon( uint8_t *, int, uint8_t *, int, uint8_t *, int, int );
-void x264_pixel_avg_16x8_neon( uint8_t *, int, uint8_t *, int, uint8_t *, int, int );
-void x264_pixel_avg_8x16_neon( uint8_t *, int, uint8_t *, int, uint8_t *, int, int );
-void x264_pixel_avg_8x8_neon( uint8_t *, int, uint8_t *, int, uint8_t *, int, int );
-void x264_pixel_avg_8x4_neon( uint8_t *, int, uint8_t *, int, uint8_t *, int, int );
-void x264_pixel_avg_4x8_neon( uint8_t *, int, uint8_t *, int, uint8_t *, int, int );
-void x264_pixel_avg_4x4_neon( uint8_t *, int, uint8_t *, int, uint8_t *, int, int );
-void x264_pixel_avg_4x2_neon( uint8_t *, int, uint8_t *, int, uint8_t *, int, int );
-
-void x264_pixel_avg2_w4_neon( uint8_t *, int, uint8_t *, int, uint8_t *, int );
-void x264_pixel_avg2_w8_neon( uint8_t *, int, uint8_t *, int, uint8_t *, int );
-void x264_pixel_avg2_w16_neon( uint8_t *, int, uint8_t *, int, uint8_t *, int );
-void x264_pixel_avg2_w20_neon( uint8_t *, int, uint8_t *, int, uint8_t *, int );
+void x264_prefetch_ref_arm( uint8_t *, intptr_t, int );
+void x264_prefetch_fenc_arm( uint8_t *, intptr_t, uint8_t *, intptr_t, int );
+
+void *x264_memcpy_aligned_neon( void *dst, const void *src, size_t n );
+void x264_memzero_aligned_neon( void *dst, size_t n );
+
+void x264_pixel_avg_16x16_neon( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t *, intptr_t, int );
+void x264_pixel_avg_16x8_neon ( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t *, intptr_t, int );
+void x264_pixel_avg_8x16_neon ( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t *, intptr_t, int );
+void x264_pixel_avg_8x8_neon  ( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t *, intptr_t, int );
+void x264_pixel_avg_8x4_neon  ( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t *, intptr_t, int );
+void x264_pixel_avg_4x8_neon  ( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t *, intptr_t, int );
+void x264_pixel_avg_4x4_neon  ( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t *, intptr_t, int );
+void x264_pixel_avg_4x2_neon  ( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t *, intptr_t, int );
+
+void x264_pixel_avg2_w4_neon ( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t *, int );
+void x264_pixel_avg2_w8_neon ( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t *, int );
+void x264_pixel_avg2_w16_neon( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t *, int );
+void x264_pixel_avg2_w20_neon( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t *, int );
  
  #define MC_WEIGHT(func)\
-void x264_mc_weight_w20##func##_neon( uint8_t *, int, uint8_t *, int, const x264_weight_t *, int );\
-void x264_mc_weight_w16##func##_neon( uint8_t *, int, uint8_t *, int, const x264_weight_t *, int );\
-void x264_mc_weight_w8##func##_neon( uint8_t *, int, uint8_t *, int, const x264_weight_t *, int );\
-void x264_mc_weight_w4##func##_neon( uint8_t *, int, uint8_t *, int, const x264_weight_t *, int );\
+void x264_mc_weight_w20##func##_neon( uint8_t *, intptr_t, uint8_t *, intptr_t, const x264_weight_t *, int );\
+void x264_mc_weight_w16##func##_neon( uint8_t *, intptr_t, uint8_t *, intptr_t, const x264_weight_t *, int );\
+void x264_mc_weight_w8##func##_neon ( uint8_t *, intptr_t, uint8_t *, intptr_t, const x264_weight_t *, int );\
+void x264_mc_weight_w4##func##_neon ( uint8_t *, intptr_t, uint8_t *, intptr_t, const x264_weight_t *, int );\
  \
-static void (* const x264_mc##func##_wtab_neon[6])( uint8_t *, int, uint8_t *, int, const x264_weight_t *, int ) =\
+static void (* const x264_mc##func##_wtab_neon[6])( uint8_t *, intptr_t, uint8_t *, intptr_t, const x264_weight_t *, int ) =\
  {\
      x264_mc_weight_w4##func##_neon,\
      x264_mc_weight_w4##func##_neon,\
@@ -67,15 +67,15 @@ MC_WEIGHT(_nodenom)
  MC_WEIGHT(_offsetadd)
  MC_WEIGHT(_offsetsub)
  
-void x264_mc_copy_w4_neon( uint8_t *, int, uint8_t *, int, int );
-void x264_mc_copy_w8_neon( uint8_t *, int, uint8_t *, int, int );
-void x264_mc_copy_w16_neon( uint8_t *, int, uint8_t *, int, int );
-void x264_mc_copy_w16_aligned_neon( uint8_t *, int, uint8_t *, int, int );
+void x264_mc_copy_w4_neon ( uint8_t *, intptr_t, uint8_t *, intptr_t, int );
+void x264_mc_copy_w8_neon ( uint8_t *, intptr_t, uint8_t *, intptr_t, int );
+void x264_mc_copy_w16_neon( uint8_t *, intptr_t, uint8_t *, intptr_t, int );
+void x264_mc_copy_w16_aligned_neon( uint8_t *, intptr_t, uint8_t *, intptr_t, int );
  
-void x264_mc_chroma_neon( uint8_t *, int, uint8_t *, int, int, int, int, int );
-void x264_frame_init_lowres_core_neon( uint8_t *, uint8_t *, uint8_t *, uint8_t *, uint8_t *, int, int, int, int);
+void x264_mc_chroma_neon( uint8_t *, intptr_t, uint8_t *, intptr_t, int, int, int, int );
+void x264_frame_init_lowres_core_neon( uint8_t *, uint8_t *, uint8_t *, uint8_t *, uint8_t *, intptr_t, intptr_t, int, int );
  
-void x264_hpel_filter_v_neon( uint8_t *, uint8_t *, int16_t *, int, int );
+void x264_hpel_filter_v_neon( uint8_t *, uint8_t *, int16_t *, intptr_t, int );
  void x264_hpel_filter_c_neon( uint8_t *, int16_t *, int );
  void x264_hpel_filter_h_neon( uint8_t *, uint8_t *, int );
  
@@ -101,7 +101,7 @@ static void x264_weight_cache_neon( x264_t *h, x264_weight_t *w )
          w->weightfn = x264_mc_wtab_neon;
  }
  
-static void (* const x264_pixel_avg_wtab_neon[6])( uint8_t *, int, uint8_t *, int, uint8_t *, int ) =
+static void (* const x264_pixel_avg_wtab_neon[6])( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t *, int ) =
  {
      NULL,
      x264_pixel_avg2_w4_neon,
@@ -111,7 +111,7 @@ static void (* const x264_pixel_avg_wtab_neon[6])( uint8_t *, int, uint8_t *, in
      x264_pixel_avg2_w20_neon,
  };
  
-static void (* const x264_mc_copy_wtab_neon[5])( uint8_t *, int, uint8_t *, int, int ) =
+static void (* const x264_mc_copy_wtab_neon[5])( uint8_t *, intptr_t, uint8_t *, intptr_t, int ) =
  {
      NULL,
      x264_mc_copy_w4_neon,
@@ -123,13 +123,13 @@ static void (* const x264_mc_copy_wtab_neon[5])( uint8_t *, int, uint8_t *, int,
  static const uint8_t hpel_ref0[16] = {0,1,1,1,0,1,1,1,2,3,3,3,0,1,1,1};
  static const uint8_t hpel_ref1[16] = {0,0,0,0,2,2,3,2,2,2,3,2,2,2,3,2};
  
-static void mc_luma_neon( uint8_t *dst,    int i_dst_stride,
-                          uint8_t *src[4], int i_src_stride,
+static void mc_luma_neon( uint8_t *dst,    intptr_t i_dst_stride,
+                          uint8_t *src[4], intptr_t i_src_stride,
                            int mvx, int mvy,
                            int i_width, int i_height, const x264_weight_t *weight )
  {
      int qpel_idx = ((mvy&3)<<2) + (mvx&3);
-    int offset = (mvy>>2)*i_src_stride + (mvx>>2);
+    intptr_t offset = (mvy>>2)*i_src_stride + (mvx>>2);
      uint8_t *src1 = src[hpel_ref0[qpel_idx]] + offset;
      if ( (mvy&3) == 3 )             // explict if() to force conditional add
          src1 += i_src_stride;
@@ -149,13 +149,13 @@ static void mc_luma_neon( uint8_t *dst,    int i_dst_stride,
          x264_mc_copy_wtab_neon[i_width>>2]( dst, i_dst_stride, src1, i_src_stride, i_height );
  }
  
-static uint8_t *get_ref_neon( uint8_t *dst,   int *i_dst_stride,
-                              uint8_t *src[4], int i_src_stride,
+static uint8_t *get_ref_neon( uint8_t *dst,   intptr_t *i_dst_stride,
+                              uint8_t *src[4], intptr_t i_src_stride,
                                int mvx, int mvy,
                                int i_width, int i_height, const x264_weight_t *weight )
  {
      int qpel_idx = ((mvy&3)<<2) + (mvx&3);
-    int offset = (mvy>>2)*i_src_stride + (mvx>>2);
+    intptr_t offset = (mvy>>2)*i_src_stride + (mvx>>2);
      uint8_t *src1 = src[hpel_ref0[qpel_idx]] + offset;
      if ( (mvy&3) == 3 )             // explict if() to force conditional add
          src1 += i_src_stride;
@@ -183,9 +183,9 @@ static uint8_t *get_ref_neon( uint8_t *dst,   int *i_dst_stride,
  }
  
  static void hpel_filter_neon( uint8_t *dsth, uint8_t *dstv, uint8_t *dstc, uint8_t *src,
-                              int stride, int width, int height, int16_t *buf )
+                              intptr_t stride, int width, int height, int16_t *buf )
  {
-    int realign = (intptr_t)src & 15;
+    intptr_t realign = (intptr_t)src & 15;
      src -= realign;
      dstv -= realign;
      dstc -= realign;
diff --git a/common/arm/pixel.h b/common/arm/pixel.h

index d0c90daef8ab445aa14511a9ebbd5406da0cb4dd..ba3901122b79993a652c6e5431bbb739f63bb046 100644 (file)
--- a/common/arm/pixel.h
+++ b/common/arm/pixel.h
@@ -39,11 +39,11 @@
      DECL_PIXELS( int, name, suffix, ( uint8_t *, int, uint8_t *, int ) )
  
  #define DECL_X4( name, suffix ) \
-    DECL_PIXELS( void, name##_x3, suffix, ( uint8_t *, uint8_t *, uint8_t *, uint8_t *, int, int * ) )\
-    DECL_PIXELS( void, name##_x4, suffix, ( uint8_t *, uint8_t *, uint8_t *, uint8_t *, uint8_t *, int, int * ) )
+    DECL_PIXELS( void, name##_x3, suffix, ( uint8_t *, uint8_t *, uint8_t *, uint8_t *, intptr_t, int * ) )\
+    DECL_PIXELS( void, name##_x4, suffix, ( uint8_t *, uint8_t *, uint8_t *, uint8_t *, uint8_t *, intptr_t, int * ) )
  
-int x264_pixel_sad_4x4_armv6( uint8_t *, int, uint8_t *, int );
-int x264_pixel_sad_4x8_armv6( uint8_t *, int, uint8_t *, int );
+int x264_pixel_sad_4x4_armv6( uint8_t *, intptr_t, uint8_t *, intptr_t );
+int x264_pixel_sad_4x8_armv6( uint8_t *, intptr_t, uint8_t *, intptr_t );
  
  DECL_X1( sad, neon )
  DECL_X1( sad_aligned, neon )
@@ -52,21 +52,21 @@ DECL_X4( sad, neon )
  DECL_X1( satd, neon )
  DECL_X1( ssd, neon )
  
-int x264_pixel_sa8d_8x8_neon( uint8_t *, int, uint8_t *, int );
-int x264_pixel_sa8d_16x16_neon( uint8_t *, int, uint8_t *, int );
+int x264_pixel_sa8d_8x8_neon  ( uint8_t *, intptr_t, uint8_t *, intptr_t );
+int x264_pixel_sa8d_16x16_neon( uint8_t *, intptr_t, uint8_t *, intptr_t );
  
-uint64_t x264_pixel_var_8x8_neon( uint8_t *, int );
-uint64_t x264_pixel_var_16x16_neon( uint8_t *, int );
-int x264_pixel_var2_8x8_neon( uint8_t *, int, uint8_t *, int, int * );
+uint64_t x264_pixel_var_8x8_neon  ( uint8_t *, intptr_t );
+uint64_t x264_pixel_var_16x16_neon( uint8_t *, intptr_t );
+int x264_pixel_var2_8x8_neon( uint8_t *, intptr_t, uint8_t *, intptr_t, int * );
  
-uint64_t x264_pixel_hadamard_ac_8x8_neon( uint8_t *, int );
-uint64_t x264_pixel_hadamard_ac_8x16_neon( uint8_t *, int );
-uint64_t x264_pixel_hadamard_ac_16x8_neon( uint8_t *, int );
-uint64_t x264_pixel_hadamard_ac_16x16_neon( uint8_t *, int );
+uint64_t x264_pixel_hadamard_ac_8x8_neon  ( uint8_t *, intptr_t );
+uint64_t x264_pixel_hadamard_ac_8x16_neon ( uint8_t *, intptr_t );
+uint64_t x264_pixel_hadamard_ac_16x8_neon ( uint8_t *, intptr_t );
+uint64_t x264_pixel_hadamard_ac_16x16_neon( uint8_t *, intptr_t );
  
-void x264_pixel_ssim_4x4x2_core_neon( const uint8_t *, int,
-                                      const uint8_t *, int,
-                                      int sums[2][4]);
+void x264_pixel_ssim_4x4x2_core_neon( const uint8_t *, intptr_t,
+                                      const uint8_t *, intptr_t,
+                                      int sums[2][4] );
  float x264_pixel_ssim_end4_neon( int sum0[5][4], int sum1[5][4], int width );
  
  #endif
diff --git a/common/deblock.c b/common/deblock.c

index 922b076d2d68b411386b74887e1fa1b046a43581..51f0d7a8d22d136fe16880b0a4c98f32f597d046 100644 (file)
--- a/common/deblock.c
+++ b/common/deblock.c
@@ -76,7 +76,7 @@ static const int8_t i_tc0_table[52+12*3][4] =
  #define tc0_table(x)   i_tc0_table[(x)+24]
  
  /* From ffmpeg */
-static ALWAYS_INLINE void deblock_edge_luma_c( pixel *pix, int xstride, int alpha, int beta, int8_t tc0 )
+static ALWAYS_INLINE void deblock_edge_luma_c( pixel *pix, intptr_t xstride, int alpha, int beta, int8_t tc0 )
  {
      int p2 = pix[-3*xstride];
      int p1 = pix[-2*xstride];
@@ -107,7 +107,7 @@ static ALWAYS_INLINE void deblock_edge_luma_c( pixel *pix, int xstride, int alph
          pix[ 0*xstride] = x264_clip_pixel( q0 - delta );    /* q0' */
      }
  }
-static inline void deblock_luma_c( pixel *pix, int xstride, int ystride, int alpha, int beta, int8_t *tc0 )
+static inline void deblock_luma_c( pixel *pix, intptr_t xstride, intptr_t ystride, int alpha, int beta, int8_t *tc0 )
  {
      for( int i = 0; i < 4; i++ )
      {
@@ -120,21 +120,21 @@ static inline void deblock_luma_c( pixel *pix, int xstride, int ystride, int alp
              deblock_edge_luma_c( pix, xstride, alpha, beta, tc0[i] );
      }
  }
-static void deblock_h_luma_mbaff_c( pixel *pix, int stride, int alpha, int beta, int8_t *tc0 )
+static void deblock_h_luma_mbaff_c( pixel *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 )
  {
      for( int d = 0; d < 8; d++, pix += stride )
          deblock_edge_luma_c( pix, 1, alpha, beta, tc0[d>>1] );
  }
-static void deblock_v_luma_c( pixel *pix, int stride, int alpha, int beta, int8_t *tc0 )
+static void deblock_v_luma_c( pixel *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 )
  {
      deblock_luma_c( pix, stride, 1, alpha, beta, tc0 );
  }
-static void deblock_h_luma_c( pixel *pix, int stride, int alpha, int beta, int8_t *tc0 )
+static void deblock_h_luma_c( pixel *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 )
  {
      deblock_luma_c( pix, 1, stride, alpha, beta, tc0 );
  }
  
-static ALWAYS_INLINE void deblock_edge_chroma_c( pixel *pix, int xstride, int alpha, int beta, int8_t tc )
+static ALWAYS_INLINE void deblock_edge_chroma_c( pixel *pix, intptr_t xstride, int alpha, int beta, int8_t tc )
  {
      int p1 = pix[-2*xstride];
      int p0 = pix[-1*xstride];
@@ -148,7 +148,7 @@ static ALWAYS_INLINE void deblock_edge_chroma_c( pixel *pix, int xstride, int al
          pix[ 0*xstride] = x264_clip_pixel( q0 - delta );    /* q0' */
      }
  }
-static ALWAYS_INLINE void deblock_chroma_c( pixel *pix, int height, int xstride, int ystride, int alpha, int beta, int8_t *tc0 )
+static ALWAYS_INLINE void deblock_chroma_c( pixel *pix, int height, intptr_t xstride, intptr_t ystride, int alpha, int beta, int8_t *tc0 )
  {
      for( int i = 0; i < 4; i++ )
      {
@@ -163,24 +163,24 @@ static ALWAYS_INLINE void deblock_chroma_c( pixel *pix, int height, int xstride,
                  deblock_edge_chroma_c( pix, xstride, alpha, beta, tc0[i] );
      }
  }
-static void deblock_h_chroma_mbaff_c( pixel *pix, int stride, int alpha, int beta, int8_t *tc0 )
+static void deblock_h_chroma_mbaff_c( pixel *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 )
  {
      deblock_chroma_c( pix, 1, 2, stride, alpha, beta, tc0 );
  }
-static void deblock_v_chroma_c( pixel *pix, int stride, int alpha, int beta, int8_t *tc0 )
+static void deblock_v_chroma_c( pixel *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 )
  {
      deblock_chroma_c( pix, 2, stride, 2, alpha, beta, tc0 );
  }
-static void deblock_h_chroma_c( pixel *pix, int stride, int alpha, int beta, int8_t *tc0 )
+static void deblock_h_chroma_c( pixel *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 )
  {
      deblock_chroma_c( pix, 2, 2, stride, alpha, beta, tc0 );
  }
-static void deblock_h_chroma_422_c( pixel *pix, int stride, int alpha, int beta, int8_t *tc0 )
+static void deblock_h_chroma_422_c( pixel *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 )
  {
      deblock_chroma_c( pix, 4, 2, stride, alpha, beta, tc0 );
  }
  
-static ALWAYS_INLINE void deblock_edge_luma_intra_c( pixel *pix, int xstride, int alpha, int beta )
+static ALWAYS_INLINE void deblock_edge_luma_intra_c( pixel *pix, intptr_t xstride, int alpha, int beta )
  {
      int p2 = pix[-3*xstride];
      int p1 = pix[-2*xstride];
@@ -219,26 +219,26 @@ static ALWAYS_INLINE void deblock_edge_luma_intra_c( pixel *pix, int xstride, in
          }
      }
  }
-static inline void deblock_luma_intra_c( pixel *pix, int xstride, int ystride, int alpha, int beta )
+static inline void deblock_luma_intra_c( pixel *pix, intptr_t xstride, intptr_t ystride, int alpha, int beta )
  {
      for( int d = 0; d < 16; d++, pix += ystride )
          deblock_edge_luma_intra_c( pix, xstride, alpha, beta );
  }
-static void deblock_h_luma_intra_mbaff_c( pixel *pix, int ystride, int alpha, int beta )
+static void deblock_h_luma_intra_mbaff_c( pixel *pix, intptr_t ystride, int alpha, int beta )
  {
      for( int d = 0; d < 8; d++, pix += ystride )
          deblock_edge_luma_intra_c( pix, 1, alpha, beta );
  }
-static void deblock_v_luma_intra_c( pixel *pix, int stride, int alpha, int beta )
+static void deblock_v_luma_intra_c( pixel *pix, intptr_t stride, int alpha, int beta )
  {
      deblock_luma_intra_c( pix, stride, 1, alpha, beta );
  }
-static void deblock_h_luma_intra_c( pixel *pix, int stride, int alpha, int beta )
+static void deblock_h_luma_intra_c( pixel *pix, intptr_t stride, int alpha, int beta )
  {
      deblock_luma_intra_c( pix, 1, stride, alpha, beta );
  }
  
-static ALWAYS_INLINE void deblock_edge_chroma_intra_c( pixel *pix, int xstride, int alpha, int beta )
+static ALWAYS_INLINE void deblock_edge_chroma_intra_c( pixel *pix, intptr_t xstride, int alpha, int beta )
  {
      int p1 = pix[-2*xstride];
      int p0 = pix[-1*xstride];
@@ -251,25 +251,25 @@ static ALWAYS_INLINE void deblock_edge_chroma_intra_c( pixel *pix, int xstride,
          pix[ 0*xstride] = (2*q1 + q0 + p1 + 2) >> 2;   /* q0' */
      }
  }
-static ALWAYS_INLINE void deblock_chroma_intra_c( pixel *pix, int width, int height, int xstride, int ystride, int alpha, int beta )
+static ALWAYS_INLINE void deblock_chroma_intra_c( pixel *pix, int width, int height, intptr_t xstride, intptr_t ystride, int alpha, int beta )
  {
      for( int d = 0; d < height; d++, pix += ystride-2 )
          for( int e = 0; e < width; e++, pix++ )
              deblock_edge_chroma_intra_c( pix, xstride, alpha, beta );
  }
-static void deblock_h_chroma_intra_mbaff_c( pixel *pix, int stride, int alpha, int beta )
+static void deblock_h_chroma_intra_mbaff_c( pixel *pix, intptr_t stride, int alpha, int beta )
  {
      deblock_chroma_intra_c( pix, 2, 4, 2, stride, alpha, beta );
  }
-static void deblock_v_chroma_intra_c( pixel *pix, int stride, int alpha, int beta )
+static void deblock_v_chroma_intra_c( pixel *pix, intptr_t stride, int alpha, int beta )
  {
      deblock_chroma_intra_c( pix, 1, 16, stride, 2, alpha, beta );
  }
-static void deblock_h_chroma_intra_c( pixel *pix, int stride, int alpha, int beta )
+static void deblock_h_chroma_intra_c( pixel *pix, intptr_t stride, int alpha, int beta )
  {
      deblock_chroma_intra_c( pix, 2, 8, 2, stride, alpha, beta );
  }
-static void deblock_h_chroma_422_intra_c( pixel *pix, int stride, int alpha, int beta )
+static void deblock_h_chroma_422_intra_c( pixel *pix, intptr_t stride, int alpha, int beta )
  {
      deblock_chroma_intra_c( pix, 2, 16, 2, stride, alpha, beta );
  }
@@ -303,7 +303,8 @@ static void deblock_strength_c( uint8_t nnz[X264_SCAN8_SIZE], int8_t ref[2][X264
      }
  }
  
-static ALWAYS_INLINE void deblock_edge( x264_t *h, pixel *pix, int i_stride, uint8_t bS[4], int i_qp, int a, int b, int b_chroma, x264_deblock_inter_t pf_inter )
+static ALWAYS_INLINE void deblock_edge( x264_t *h, pixel *pix, intptr_t i_stride, uint8_t bS[4], int i_qp,
+                                        int a, int b, int b_chroma, x264_deblock_inter_t pf_inter )
  {
      int index_a = i_qp + a;
      int index_b = i_qp + b;
@@ -322,7 +323,8 @@ static ALWAYS_INLINE void deblock_edge( x264_t *h, pixel *pix, int i_stride, uin
      pf_inter( pix, i_stride, alpha, beta, tc );
  }
  
-static ALWAYS_INLINE void deblock_edge_intra( x264_t *h, pixel *pix, int i_stride, uint8_t bS[4], int i_qp, int a, int b, int b_chroma, x264_deblock_intra_t pf_intra )
+static ALWAYS_INLINE void deblock_edge_intra( x264_t *h, pixel *pix, intptr_t i_stride, uint8_t bS[4], int i_qp,
+                                              int a, int b, int b_chroma, x264_deblock_intra_t pf_intra )
  {
      int index_a = i_qp + a;
      int index_b = i_qp + b;
@@ -631,30 +633,30 @@ void x264_macroblock_deblock( x264_t *h )
  }
  
  #if HAVE_MMX
-void x264_deblock_v_luma_sse2( pixel *pix, int stride, int alpha, int beta, int8_t *tc0 );
-void x264_deblock_v_luma_avx ( pixel *pix, int stride, int alpha, int beta, int8_t *tc0 );
-void x264_deblock_h_luma_sse2( pixel *pix, int stride, int alpha, int beta, int8_t *tc0 );
-void x264_deblock_h_luma_avx ( pixel *pix, int stride, int alpha, int beta, int8_t *tc0 );
-void x264_deblock_v_chroma_sse2( pixel *pix, int stride, int alpha, int beta, int8_t *tc0 );
-void x264_deblock_v_chroma_avx ( pixel *pix, int stride, int alpha, int beta, int8_t *tc0 );
-void x264_deblock_h_chroma_sse2( pixel *pix, int stride, int alpha, int beta, int8_t *tc0 );
-void x264_deblock_h_chroma_avx ( pixel *pix, int stride, int alpha, int beta, int8_t *tc0 );
-void x264_deblock_h_chroma_mbaff_sse2( pixel *pix, int stride, int alpha, int beta, int8_t *tc0 );
-void x264_deblock_h_chroma_mbaff_avx ( pixel *pix, int stride, int alpha, int beta, int8_t *tc0 );
-void x264_deblock_h_chroma_422_mmx2( pixel *pix, int stride, int alpha, int beta, int8_t *tc0 );
-void x264_deblock_h_chroma_422_sse2( pixel *pix, int stride, int alpha, int beta, int8_t *tc0 );
-void x264_deblock_h_chroma_422_avx ( pixel *pix, int stride, int alpha, int beta, int8_t *tc0 );
-void x264_deblock_v_luma_intra_sse2( pixel *pix, int stride, int alpha, int beta );
-void x264_deblock_v_luma_intra_avx ( pixel *pix, int stride, int alpha, int beta );
-void x264_deblock_h_luma_intra_sse2( pixel *pix, int stride, int alpha, int beta );
-void x264_deblock_h_luma_intra_avx ( pixel *pix, int stride, int alpha, int beta );
-void x264_deblock_v_chroma_intra_sse2( pixel *pix, int stride, int alpha, int beta );
-void x264_deblock_v_chroma_intra_avx ( pixel *pix, int stride, int alpha, int beta );
-void x264_deblock_h_chroma_intra_sse2( pixel *pix, int stride, int alpha, int beta );
-void x264_deblock_h_chroma_intra_avx ( pixel *pix, int stride, int alpha, int beta );
-void x264_deblock_h_chroma_422_intra_mmx2( pixel *pix, int stride, int alpha, int beta );
-void x264_deblock_h_chroma_422_intra_sse2( pixel *pix, int stride, int alpha, int beta );
-void x264_deblock_h_chroma_422_intra_avx ( pixel *pix, int stride, int alpha, int beta );
+void x264_deblock_v_luma_sse2( pixel *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 );
+void x264_deblock_v_luma_avx ( pixel *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 );
+void x264_deblock_h_luma_sse2( pixel *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 );
+void x264_deblock_h_luma_avx ( pixel *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 );
+void x264_deblock_v_chroma_sse2( pixel *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 );
+void x264_deblock_v_chroma_avx ( pixel *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 );
+void x264_deblock_h_chroma_sse2( pixel *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 );
+void x264_deblock_h_chroma_avx ( pixel *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 );
+void x264_deblock_h_chroma_mbaff_sse2( pixel *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 );
+void x264_deblock_h_chroma_mbaff_avx ( pixel *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 );
+void x264_deblock_h_chroma_422_mmx2( pixel *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 );
+void x264_deblock_h_chroma_422_sse2( pixel *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 );
+void x264_deblock_h_chroma_422_avx ( pixel *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 );
+void x264_deblock_v_luma_intra_sse2( pixel *pix, intptr_t stride, int alpha, int beta );
+void x264_deblock_v_luma_intra_avx ( pixel *pix, intptr_t stride, int alpha, int beta );
+void x264_deblock_h_luma_intra_sse2( pixel *pix, intptr_t stride, int alpha, int beta );
+void x264_deblock_h_luma_intra_avx ( pixel *pix, intptr_t stride, int alpha, int beta );
+void x264_deblock_v_chroma_intra_sse2( pixel *pix, intptr_t stride, int alpha, int beta );
+void x264_deblock_v_chroma_intra_avx ( pixel *pix, intptr_t stride, int alpha, int beta );
+void x264_deblock_h_chroma_intra_sse2( pixel *pix, intptr_t stride, int alpha, int beta );
+void x264_deblock_h_chroma_intra_avx ( pixel *pix, intptr_t stride, int alpha, int beta );
+void x264_deblock_h_chroma_422_intra_mmx2( pixel *pix, intptr_t stride, int alpha, int beta );
+void x264_deblock_h_chroma_422_intra_sse2( pixel *pix, intptr_t stride, int alpha, int beta );
+void x264_deblock_h_chroma_422_intra_avx ( pixel *pix, intptr_t stride, int alpha, int beta );
  void x264_deblock_strength_mmx2 ( uint8_t nnz[X264_SCAN8_SIZE], int8_t ref[2][X264_SCAN8_LUMA_SIZE],
                                    int16_t mv[2][X264_SCAN8_LUMA_SIZE][2], uint8_t bs[2][8][4],
                                    int mvy_limit, int bframe );
@@ -668,32 +670,32 @@ void x264_deblock_strength_avx  ( uint8_t nnz[X264_SCAN8_SIZE], int8_t ref[2][X2
                                    int16_t mv[2][X264_SCAN8_LUMA_SIZE][2], uint8_t bs[2][8][4],
                                    int mvy_limit, int bframe );
  
-void x264_deblock_h_chroma_intra_mbaff_mmx2( pixel *pix, int stride, int alpha, int beta );
-void x264_deblock_h_chroma_intra_mbaff_sse2( pixel *pix, int stride, int alpha, int beta );
-void x264_deblock_h_chroma_intra_mbaff_avx ( pixel *pix, int stride, int alpha, int beta );
+void x264_deblock_h_chroma_intra_mbaff_mmx2( pixel *pix, intptr_t stride, int alpha, int beta );
+void x264_deblock_h_chroma_intra_mbaff_sse2( pixel *pix, intptr_t stride, int alpha, int beta );
+void x264_deblock_h_chroma_intra_mbaff_avx ( pixel *pix, intptr_t stride, int alpha, int beta );
  #if ARCH_X86
-void x264_deblock_h_luma_mmx2( pixel *pix, int stride, int alpha, int beta, int8_t *tc0 );
-void x264_deblock_v8_luma_mmx2( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 );
-void x264_deblock_v_chroma_mmx2( pixel *pix, int stride, int alpha, int beta, int8_t *tc0 );
-void x264_deblock_h_chroma_mmx2( pixel *pix, int stride, int alpha, int beta, int8_t *tc0 );
-void x264_deblock_h_chroma_mbaff_mmx2( pixel *pix, int stride, int alpha, int beta, int8_t *tc0 );
-void x264_deblock_h_luma_intra_mmx2( pixel *pix, int stride, int alpha, int beta );
-void x264_deblock_v8_luma_intra_mmx2( uint8_t *pix, int stride, int alpha, int beta );
-void x264_deblock_v_chroma_intra_mmx2( pixel *pix, int stride, int alpha, int beta );
-void x264_deblock_h_chroma_intra_mmx2( pixel *pix, int stride, int alpha, int beta );
-void x264_deblock_h_chroma_intra_mbaff_mmx2( pixel *pix, int stride, int alpha, int beta );
+void x264_deblock_h_luma_mmx2( pixel *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 );
+void x264_deblock_v8_luma_mmx2( uint8_t *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 );
+void x264_deblock_v_chroma_mmx2( pixel *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 );
+void x264_deblock_h_chroma_mmx2( pixel *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 );
+void x264_deblock_h_chroma_mbaff_mmx2( pixel *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 );
+void x264_deblock_h_luma_intra_mmx2( pixel *pix, intptr_t stride, int alpha, int beta );
+void x264_deblock_v8_luma_intra_mmx2( uint8_t *pix, intptr_t stride, int alpha, int beta );
+void x264_deblock_v_chroma_intra_mmx2( pixel *pix, intptr_t stride, int alpha, int beta );
+void x264_deblock_h_chroma_intra_mmx2( pixel *pix, intptr_t stride, int alpha, int beta );
+void x264_deblock_h_chroma_intra_mbaff_mmx2( pixel *pix, intptr_t stride, int alpha, int beta );
  
  #if HIGH_BIT_DEPTH
-void x264_deblock_v_luma_mmx2( pixel *pix, int stride, int alpha, int beta, int8_t *tc0 );
-void x264_deblock_v_luma_intra_mmx2( pixel *pix, int stride, int alpha, int beta );
+void x264_deblock_v_luma_mmx2( pixel *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 );
+void x264_deblock_v_luma_intra_mmx2( pixel *pix, intptr_t stride, int alpha, int beta );
  #else
  // FIXME this wrapper has a significant cpu cost
-static void x264_deblock_v_luma_mmx2( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 )
+static void x264_deblock_v_luma_mmx2( uint8_t *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 )
  {
      x264_deblock_v8_luma_mmx2( pix,   stride, alpha, beta, tc0   );
      x264_deblock_v8_luma_mmx2( pix+8, stride, alpha, beta, tc0+2 );
  }
-static void x264_deblock_v_luma_intra_mmx2( uint8_t *pix, int stride, int alpha, int beta )
+static void x264_deblock_v_luma_intra_mmx2( uint8_t *pix, intptr_t stride, int alpha, int beta )
  {
      x264_deblock_v8_luma_intra_mmx2( pix,   stride, alpha, beta );
      x264_deblock_v8_luma_intra_mmx2( pix+8, stride, alpha, beta );
@@ -703,15 +705,15 @@ static void x264_deblock_v_luma_intra_mmx2( uint8_t *pix, int stride, int alpha,
  #endif
  
  #if ARCH_PPC
-void x264_deblock_v_luma_altivec( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 );
-void x264_deblock_h_luma_altivec( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 );
+void x264_deblock_v_luma_altivec( uint8_t *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 );
+void x264_deblock_h_luma_altivec( uint8_t *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 );
  #endif // ARCH_PPC
  
  #if HAVE_ARMV6
-void x264_deblock_v_luma_neon( uint8_t *, int, int, int, int8_t * );
-void x264_deblock_h_luma_neon( uint8_t *, int, int, int, int8_t * );
-void x264_deblock_v_chroma_neon( uint8_t *, int, int, int, int8_t * );
-void x264_deblock_h_chroma_neon( uint8_t *, int, int, int, int8_t * );
+void x264_deblock_v_luma_neon  ( uint8_t *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 );
+void x264_deblock_h_luma_neon  ( uint8_t *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 );
+void x264_deblock_v_chroma_neon( uint8_t *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 );
+void x264_deblock_h_chroma_neon( uint8_t *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 );
  #endif
  
  void x264_deblock_init( int cpu, x264_deblock_function_t *pf, int b_mbaff )
diff --git a/common/frame.c b/common/frame.c

index e13e509714155932b1cc64cf399946ea7937fb9d..21d13476489fc26797c0b9d1c9d7629922a7b1e0 100644 (file)
--- a/common/frame.c
+++ b/common/frame.c
@@ -710,8 +710,8 @@ x264_frame_t *x264_frame_pop_blank_unused( x264_t *h )
      return frame;
  }
  
-void x264_weight_scale_plane( x264_t *h, pixel *dst, int i_dst_stride, pixel *src, int i_src_stride,
-                         int i_width, int i_height, x264_weight_t *w )
+void x264_weight_scale_plane( x264_t *h, pixel *dst, intptr_t i_dst_stride, pixel *src, intptr_t i_src_stride,
+                              int i_width, int i_height, x264_weight_t *w )
  {
      /* Weight horizontal strips of height 16. This was found to be the optimal height
       * in terms of the cache loads. */
diff --git a/common/frame.h b/common/frame.h

index 94e875d2597499e8dd76f2a11860887295465439..54415f7f47d9f5aa8cf88bb4271d66a9f8f9bf0b 100644 (file)
--- a/common/frame.h
+++ b/common/frame.h
@@ -178,8 +178,8 @@ typedef struct
     x264_pthread_cond_t      cv_empty; /* event signaling that the list became emptier */
  } x264_sync_frame_list_t;
  
-typedef void (*x264_deblock_inter_t)( pixel *pix, int stride, int alpha, int beta, int8_t *tc0 );
-typedef void (*x264_deblock_intra_t)( pixel *pix, int stride, int alpha, int beta );
+typedef void (*x264_deblock_inter_t)( pixel *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 );
+typedef void (*x264_deblock_intra_t)( pixel *pix, intptr_t stride, int alpha, int beta );
  typedef struct
  {
      x264_deblock_inter_t deblock_luma[2];
@@ -232,7 +232,7 @@ x264_frame_t *x264_frame_shift( x264_frame_t **list );
  void          x264_frame_push_unused( x264_t *h, x264_frame_t *frame );
  void          x264_frame_push_blank_unused( x264_t *h, x264_frame_t *frame );
  x264_frame_t *x264_frame_pop_blank_unused( x264_t *h );
-void x264_weight_scale_plane( x264_t *h, pixel *dst, int i_dst_stride, pixel *src, int i_src_stride,
+void x264_weight_scale_plane( x264_t *h, pixel *dst, intptr_t i_dst_stride, pixel *src, intptr_t i_src_stride,
                                int i_width, int i_height, x264_weight_t *w );
  x264_frame_t *x264_frame_pop_unused( x264_t *h, int b_fdec );
  void          x264_frame_delete_list( x264_frame_t **list );
diff --git a/common/macroblock.c b/common/macroblock.c

index d600f82ddfab5282539a18bb2ca2377909ce2478..b4e6d951b0d1fa31a9aeaa1c48f36ee00316d921 100644 (file)
--- a/common/macroblock.c
+++ b/common/macroblock.c
@@ -121,7 +121,7 @@ static NOINLINE void x264_mb_mc_01xywh( x264_t *h, int x, int y, int width, int
      int mvy0   = x264_clip3( h->mb.cache.mv[0][i8][1], h->mb.mv_min[1], h->mb.mv_max[1] ) + 4*4*y;
      int mvy1   = x264_clip3( h->mb.cache.mv[1][i8][1], h->mb.mv_min[1], h->mb.mv_max[1] ) + 4*4*y;
      int i_mode = x264_size2pixel[height][width];
-    int i_stride0 = 16, i_stride1 = 16;
+    intptr_t i_stride0 = 16, i_stride1 = 16;
      ALIGNED_ARRAY_16( pixel, tmp0,[16*16] );
      ALIGNED_ARRAY_16( pixel, tmp1,[16*16] );
      pixel *src0, *src1;
diff --git a/common/mc.c b/common/mc.c

index 88ed6ea8e98f755dbb6b06fdfbd0c340efb1a30b..86f7e35a416af22a384cb18c6d58006155615796 100644 (file)
--- a/common/mc.c
+++ b/common/mc.c
@@ -37,10 +37,9 @@
  #endif
  
  
-static inline void pixel_avg( pixel *dst,  int i_dst_stride,
-                              pixel *src1, int i_src1_stride,
-                              pixel *src2, int i_src2_stride,
-                              int i_width, int i_height )
+static inline void pixel_avg( pixel *dst,  intptr_t i_dst_stride,
+                              pixel *src1, intptr_t i_src1_stride,
+                              pixel *src2, intptr_t i_src2_stride, int i_width, int i_height )
  {
      for( int y = 0; y < i_height; y++ )
      {
@@ -52,7 +51,9 @@ static inline void pixel_avg( pixel *dst,  int i_dst_stride,
      }
  }
  
-static inline void pixel_avg_wxh( pixel *dst, int i_dst, pixel *src1, int i_src1, pixel *src2, int i_src2, int width, int height )
+static inline void pixel_avg_wxh( pixel *dst,  intptr_t i_dst,
+                                  pixel *src1, intptr_t i_src1,
+                                  pixel *src2, intptr_t i_src2, int width, int height )
  {
      for( int y = 0; y < height; y++ )
      {
@@ -66,9 +67,11 @@ static inline void pixel_avg_wxh( pixel *dst, int i_dst, pixel *src1, int i_src1
  
  /* Implicit weighted bipred only:
   * assumes log2_denom = 5, offset = 0, weight1 + weight2 = 64 */
-static inline void pixel_avg_weight_wxh( pixel *dst, int i_dst, pixel *src1, int i_src1, pixel *src2, int i_src2, int width, int height, int i_weight1 )
+static inline void pixel_avg_weight_wxh( pixel *dst,  intptr_t i_dst,
+                                         pixel *src1, intptr_t i_src1,
+                                         pixel *src2, intptr_t i_src2, int width, int height, int i_weight1 )
  {
-    const int i_weight2 = 64 - i_weight1;
+    int i_weight2 = 64 - i_weight1;
      for( int y = 0; y<height; y++, dst += i_dst, src1 += i_src1, src2 += i_src2 )
          for( int x = 0; x<width; x++ )
              dst[x] = x264_clip_pixel( (src1[x]*i_weight1 + src2[x]*i_weight2 + (1<<5)) >> 6 );
@@ -76,9 +79,9 @@ static inline void pixel_avg_weight_wxh( pixel *dst, int i_dst, pixel *src1, int
  #undef op_scale2
  
  #define PIXEL_AVG_C( name, width, height ) \
-static void name( pixel *pix1, int i_stride_pix1, \
-                  pixel *pix2, int i_stride_pix2, \
-                  pixel *pix3, int i_stride_pix3, int weight ) \
+static void name( pixel *pix1, intptr_t i_stride_pix1, \
+                  pixel *pix2, intptr_t i_stride_pix2, \
+                  pixel *pix3, intptr_t i_stride_pix3, int weight ) \
  { \
      if( weight == 32 ) \
          pixel_avg_wxh( pix1, i_stride_pix1, pix2, i_stride_pix2, pix3, i_stride_pix3, width, height ); \
@@ -104,7 +107,8 @@ static void x264_weight_cache( x264_t *h, x264_weight_t *w )
  }
  #define opscale(x) dst[x] = x264_clip_pixel( ((src[x] * scale + (1<<(denom - 1))) >> denom) + offset )
  #define opscale_noden(x) dst[x] = x264_clip_pixel( src[x] * scale + offset )
-static void mc_weight( pixel *dst, int i_dst_stride, pixel *src, int i_src_stride, const x264_weight_t *weight, int i_width, int i_height )
+static void mc_weight( pixel *dst, intptr_t i_dst_stride, pixel *src, intptr_t i_src_stride,
+                       const x264_weight_t *weight, int i_width, int i_height )
  {
      int offset = weight->i_offset << (BIT_DEPTH-8);
      int scale = weight->i_scale;
@@ -124,7 +128,7 @@ static void mc_weight( pixel *dst, int i_dst_stride, pixel *src, int i_src_strid
  }
  
  #define MC_WEIGHT_C( name, width ) \
-    static void name( pixel *dst, int i_dst_stride, pixel *src, int i_src_stride, const x264_weight_t *weight, int height ) \
+    static void name( pixel *dst, intptr_t i_dst_stride, pixel *src, intptr_t i_src_stride, const x264_weight_t *weight, int height ) \
  { \
      mc_weight( dst, i_dst_stride, src, i_src_stride, weight, width, height );\
  }
@@ -146,7 +150,7 @@ static weight_fn_t x264_mc_weight_wtab[6] =
      mc_weight_w20,
  };
  const x264_weight_t x264_weight_none[3] = { {{0}} };
-static void mc_copy( pixel *src, int i_src_stride, pixel *dst, int i_dst_stride, int i_width, int i_height )
+static void mc_copy( pixel *src, intptr_t i_src_stride, pixel *dst, intptr_t i_dst_stride, int i_width, int i_height )
  {
      for( int y = 0; y < i_height; y++ )
      {
@@ -159,7 +163,7 @@ static void mc_copy( pixel *src, int i_src_stride, pixel *dst, int i_dst_stride,
  
  #define TAPFILTER(pix, d) ((pix)[x-2*d] + (pix)[x+3*d] - 5*((pix)[x-d] + (pix)[x+2*d]) + 20*((pix)[x] + (pix)[x+d]))
  static void hpel_filter( pixel *dsth, pixel *dstv, pixel *dstc, pixel *src,
-                         int stride, int width, int height, int16_t *buf )
+                         intptr_t stride, int width, int height, int16_t *buf )
  {
      const int pad = (BIT_DEPTH > 9) ? (-10 * PIXEL_MAX) : 0;
      for( int y = 0; y < height; y++ )
@@ -185,8 +189,8 @@ static void hpel_filter( pixel *dsth, pixel *dstv, pixel *dstc, pixel *src,
  static const uint8_t hpel_ref0[16] = {0,1,1,1,0,1,1,1,2,3,3,3,0,1,1,1};
  static const uint8_t hpel_ref1[16] = {0,0,0,0,2,2,3,2,2,2,3,2,2,2,3,2};
  
-static void mc_luma( pixel *dst,    int i_dst_stride,
-                     pixel *src[4], int i_src_stride,
+static void mc_luma( pixel *dst,    intptr_t i_dst_stride,
+                     pixel *src[4], intptr_t i_src_stride,
                       int mvx, int mvy,
                       int i_width, int i_height, const x264_weight_t *weight )
  {
@@ -208,8 +212,8 @@ static void mc_luma( pixel *dst,    int i_dst_stride,
          mc_copy( src1, i_src_stride, dst, i_dst_stride, i_width, i_height );
  }
  
-static pixel *get_ref( pixel *dst,   int *i_dst_stride,
-                       pixel *src[4], int i_src_stride,
+static pixel *get_ref( pixel *dst,   intptr_t *i_dst_stride,
+                       pixel *src[4], intptr_t i_src_stride,
                         int mvx, int mvy,
                         int i_width, int i_height, const x264_weight_t *weight )
  {
@@ -239,8 +243,8 @@ static pixel *get_ref( pixel *dst,   int *i_dst_stride,
  }
  
  /* full chroma mc (ie until 1/8 pixel)*/
-static void mc_chroma( pixel *dstu, pixel *dstv, int i_dst_stride,
-                       pixel *src, int i_src_stride,
+static void mc_chroma( pixel *dstu, pixel *dstv, intptr_t i_dst_stride,
+                       pixel *src, intptr_t i_src_stride,
                         int mvx, int mvy,
                         int i_width, int i_height )
  {
@@ -273,7 +277,7 @@ static void mc_chroma( pixel *dstu, pixel *dstv, int i_dst_stride,
  }
  
  #define MC_COPY(W) \
-static void mc_copy_w##W( pixel *dst, int i_dst, pixel *src, int i_src, int i_height ) \
+static void mc_copy_w##W( pixel *dst, intptr_t i_dst, pixel *src, intptr_t i_src, int i_height ) \
  { \
      mc_copy( src, i_src, dst, i_dst, W, i_height ); \
  }
@@ -281,8 +285,8 @@ MC_COPY( 16 )
  MC_COPY( 8 )
  MC_COPY( 4 )
  
-void x264_plane_copy_c( pixel *dst, int i_dst,
-                        pixel *src, int i_src, int w, int h )
+void x264_plane_copy_c( pixel *dst, intptr_t i_dst,
+                        pixel *src, intptr_t i_src, int w, int h )
  {
      while( h-- )
      {
@@ -292,9 +296,9 @@ void x264_plane_copy_c( pixel *dst, int i_dst,
      }
  }
  
-void x264_plane_copy_interleave_c( pixel *dst, int i_dst,
-                                   pixel *srcu, int i_srcu,
-                                   pixel *srcv, int i_srcv, int w, int h )
+void x264_plane_copy_interleave_c( pixel *dst,  intptr_t i_dst,
+                                   pixel *srcu, intptr_t i_srcu,
+                                   pixel *srcv, intptr_t i_srcv, int w, int h )
  {
      for( int y=0; y<h; y++, dst+=i_dst, srcu+=i_srcu, srcv+=i_srcv )
          for( int x=0; x<w; x++ )
@@ -304,9 +308,9 @@ void x264_plane_copy_interleave_c( pixel *dst, int i_dst,
          }
  }
  
-static void x264_plane_copy_deinterleave_c( pixel *dstu, int i_dstu,
-                                            pixel *dstv, int i_dstv,
-                                            pixel *src, int i_src, int w, int h )
+static void x264_plane_copy_deinterleave_c( pixel *dstu, intptr_t i_dstu,
+                                            pixel *dstv, intptr_t i_dstv,
+                                            pixel *src,  intptr_t i_src, int w, int h )
  {
      for( int y=0; y<h; y++, dstu+=i_dstu, dstv+=i_dstv, src+=i_src )
          for( int x=0; x<w; x++ )
@@ -316,10 +320,10 @@ static void x264_plane_copy_deinterleave_c( pixel *dstu, int i_dstu,
          }
  }
  
-static void x264_plane_copy_deinterleave_rgb_c( pixel *dsta, int i_dsta,
-                                                pixel *dstb, int i_dstb,
-                                                pixel *dstc, int i_dstc,
-                                                pixel *src, int i_src, int pw, int w, int h )
+static void x264_plane_copy_deinterleave_rgb_c( pixel *dsta, intptr_t i_dsta,
+                                                pixel *dstb, intptr_t i_dstb,
+                                                pixel *dstc, intptr_t i_dstc,
+                                                pixel *src,  intptr_t i_src, int pw, int w, int h )
  {
      for( int y=0; y<h; y++, dsta+=i_dsta, dstb+=i_dstb, dstc+=i_dstc, src+=i_src )
      {
@@ -332,7 +336,7 @@ static void x264_plane_copy_deinterleave_rgb_c( pixel *dsta, int i_dsta,
      }
  }
  
-static void store_interleave_chroma( pixel *dst, int i_dst, pixel *srcu, pixel *srcv, int height )
+static void store_interleave_chroma( pixel *dst, intptr_t i_dst, pixel *srcu, pixel *srcv, int height )
  {
      for( int y=0; y<height; y++, dst+=i_dst, srcu+=FDEC_STRIDE, srcv+=FDEC_STRIDE )
          for( int x=0; x<8; x++ )
@@ -342,29 +346,29 @@ static void store_interleave_chroma( pixel *dst, int i_dst, pixel *srcu, pixel *
          }
  }
  
-static void load_deinterleave_chroma_fenc( pixel *dst, pixel *src, int i_src, int height )
+static void load_deinterleave_chroma_fenc( pixel *dst, pixel *src, intptr_t i_src, int height )
  {
      x264_plane_copy_deinterleave_c( dst, FENC_STRIDE, dst+FENC_STRIDE/2, FENC_STRIDE, src, i_src, 8, height );
  }
  
-static void load_deinterleave_chroma_fdec( pixel *dst, pixel *src, int i_src, int height )
+static void load_deinterleave_chroma_fdec( pixel *dst, pixel *src, intptr_t i_src, int height )
  {
      x264_plane_copy_deinterleave_c( dst, FDEC_STRIDE, dst+FDEC_STRIDE/2, FDEC_STRIDE, src, i_src, 8, height );
  }
  
-static void prefetch_fenc_null( pixel *pix_y, int stride_y,
-                                pixel *pix_uv, int stride_uv, int mb_x )
+static void prefetch_fenc_null( pixel *pix_y,  intptr_t stride_y,
+                                pixel *pix_uv, intptr_t stride_uv, int mb_x )
  {}
  
-static void prefetch_ref_null( pixel *pix, int stride, int parity )
+static void prefetch_ref_null( pixel *pix, intptr_t stride, int parity )
  {}
  
-static void memzero_aligned( void * dst, int n )
+static void memzero_aligned( void * dst, size_t n )
  {
      memset( dst, 0, n );
  }
  
-static void integral_init4h( uint16_t *sum, pixel *pix, int stride )
+static void integral_init4h( uint16_t *sum, pixel *pix, intptr_t stride )
  {
      int v = pix[0]+pix[1]+pix[2]+pix[3];
      for( int x = 0; x < stride-4; x++ )
@@ -374,7 +378,7 @@ static void integral_init4h( uint16_t *sum, pixel *pix, int stride )
      }
  }
  
-static void integral_init8h( uint16_t *sum, pixel *pix, int stride )
+static void integral_init8h( uint16_t *sum, pixel *pix, intptr_t stride )
  {
      int v = pix[0]+pix[1]+pix[2]+pix[3]+pix[4]+pix[5]+pix[6]+pix[7];
      for( int x = 0; x < stride-8; x++ )
@@ -384,7 +388,7 @@ static void integral_init8h( uint16_t *sum, pixel *pix, int stride )
      }
  }
  
-static void integral_init4v( uint16_t *sum8, uint16_t *sum4, int stride )
+static void integral_init4v( uint16_t *sum8, uint16_t *sum4, intptr_t stride )
  {
      for( int x = 0; x < stride-8; x++ )
          sum4[x] = sum8[x+4*stride] - sum8[x];
@@ -392,7 +396,7 @@ static void integral_init4v( uint16_t *sum8, uint16_t *sum4, int stride )
          sum8[x] = sum8[x+8*stride] + sum8[x+8*stride+4] - sum8[x] - sum8[x+4];
  }
  
-static void integral_init8v( uint16_t *sum8, int stride )
+static void integral_init8v( uint16_t *sum8, intptr_t stride )
  {
      for( int x = 0; x < stride-8; x++ )
          sum8[x] = sum8[x+8*stride] - sum8[x];
@@ -425,7 +429,7 @@ void x264_frame_init_lowres( x264_t *h, x264_frame_t *frame )
  }
  
  static void frame_init_lowres_core( pixel *src0, pixel *dst0, pixel *dsth, pixel *dstv, pixel *dstc,
-                                    int src_stride, int dst_stride, int width, int height )
+                                    intptr_t src_stride, intptr_t dst_stride, int width, int height )
  {
      for( int y = 0; y < height; y++ )
      {
diff --git a/common/mc.h b/common/mc.h

index 424bcee3fc0cb8dfae02e914c008fca7b0fea82d..8d0f77272df6ff9daeec23c3bb4f2d19729c9ccc 100644 (file)
--- a/common/mc.h
+++ b/common/mc.h
@@ -27,7 +27,7 @@
  #define X264_MC_H
  
  struct x264_weight_t;
-typedef void (* weight_fn_t)( pixel *, int, pixel *,int, const struct x264_weight_t *, int );
+typedef void (* weight_fn_t)( pixel *, intptr_t, pixel *,intptr_t, const struct x264_weight_t *, int );
  typedef struct x264_weight_t
  {
      /* aligning the first member is a gcc hack to force the struct to be
@@ -62,65 +62,58 @@ extern const x264_weight_t x264_weight_none[3];
  
  typedef struct
  {
-    void (*mc_luma)( pixel *dst, int i_dst, pixel **src, int i_src,
+    void (*mc_luma)( pixel *dst, intptr_t i_dst, pixel **src, intptr_t i_src,
                       int mvx, int mvy, int i_width, int i_height, const x264_weight_t *weight );
  
      /* may round up the dimensions if they're not a power of 2 */
-    pixel* (*get_ref)( pixel *dst, int *i_dst, pixel **src, int i_src,
+    pixel* (*get_ref)( pixel *dst, intptr_t *i_dst, pixel **src, intptr_t i_src,
                         int mvx, int mvy, int i_width, int i_height, const x264_weight_t *weight );
  
      /* mc_chroma may write up to 2 bytes of garbage to the right of dst,
       * so it must be run from left to right. */
-    void (*mc_chroma)( pixel *dstu, pixel *dstv, int i_dst, pixel *src, int i_src,
+    void (*mc_chroma)( pixel *dstu, pixel *dstv, intptr_t i_dst, pixel *src, intptr_t i_src,
                         int mvx, int mvy, int i_width, int i_height );
  
-    void (*avg[12])( pixel *dst, int, pixel *src1, int, pixel *src2, int, int i_weight );
+    void (*avg[12])( pixel *dst,  intptr_t dst_stride, pixel *src1, intptr_t src1_stride,
+                     pixel *src2, intptr_t src2_stride, int i_weight );
  
      /* only 16x16, 8x8, and 4x4 defined */
-    void (*copy[7])( pixel *dst, int, pixel *src, int, int i_height );
-    void (*copy_16x16_unaligned)( pixel *dst, int, pixel *src, int, int i_height );
-
-    void (*store_interleave_chroma)( pixel *dst, int i_dst, pixel *srcu, pixel *srcv, int height );
-    void (*load_deinterleave_chroma_fenc)( pixel *dst, pixel *src, int i_src, int height );
-    void (*load_deinterleave_chroma_fdec)( pixel *dst, pixel *src, int i_src, int height );
-
-    void (*plane_copy)( pixel *dst, int i_dst,
-                        pixel *src, int i_src, int w, int h );
-    void (*plane_copy_interleave)( pixel *dst, int i_dst,
-                                   pixel *srcu, int i_srcu,
-                                   pixel *srcv, int i_srcv, int w, int h );
+    void (*copy[7])( pixel *dst, intptr_t dst_stride, pixel *src, intptr_t src_stride, int i_height );
+    void (*copy_16x16_unaligned)( pixel *dst, intptr_t dst_stride, pixel *src, intptr_t src_stride, int i_height );
+
+    void (*store_interleave_chroma)( pixel *dst, intptr_t i_dst, pixel *srcu, pixel *srcv, int height );
+    void (*load_deinterleave_chroma_fenc)( pixel *dst, pixel *src, intptr_t i_src, int height );
+    void (*load_deinterleave_chroma_fdec)( pixel *dst, pixel *src, intptr_t i_src, int height );
+
+    void (*plane_copy)( pixel *dst, intptr_t i_dst, pixel *src, intptr_t i_src, int w, int h );
+    void (*plane_copy_interleave)( pixel *dst,  intptr_t i_dst, pixel *srcu, intptr_t i_srcu,
+                                   pixel *srcv, intptr_t i_srcv, int w, int h );
      /* may write up to 15 pixels off the end of each plane */
-    void (*plane_copy_deinterleave)( pixel *dstu, int i_dstu,
-                                     pixel *dstv, int i_dstv,
-                                     pixel *src, int i_src, int w, int h );
-    void (*plane_copy_deinterleave_rgb)( pixel *dsta, int i_dsta,
-                                         pixel *dstb, int i_dstb,
-                                         pixel *dstc, int i_dstc,
-                                         pixel *src, int i_src, int pw, int w, int h );
+    void (*plane_copy_deinterleave)( pixel *dstu, intptr_t i_dstu, pixel *dstv, intptr_t i_dstv,
+                                     pixel *src,  intptr_t i_src, int w, int h );
+    void (*plane_copy_deinterleave_rgb)( pixel *dsta, intptr_t i_dsta, pixel *dstb, intptr_t i_dstb,
+                                         pixel *dstc, intptr_t i_dstc, pixel *src,  intptr_t i_src, int pw, int w, int h );
      void (*hpel_filter)( pixel *dsth, pixel *dstv, pixel *dstc, pixel *src,
-                         int i_stride, int i_width, int i_height, int16_t *buf );
+                         intptr_t i_stride, int i_width, int i_height, int16_t *buf );
  
      /* prefetch the next few macroblocks of fenc or fdec */
-    void (*prefetch_fenc)( pixel *pix_y, int stride_y,
-                           pixel *pix_uv, int stride_uv, int mb_x );
-    void (*prefetch_fenc_420)( pixel *pix_y, int stride_y,
-                               pixel *pix_uv, int stride_uv, int mb_x );
-    void (*prefetch_fenc_422)( pixel *pix_y, int stride_y,
-                               pixel *pix_uv, int stride_uv, int mb_x );
+    void (*prefetch_fenc)    ( pixel *pix_y, intptr_t stride_y, pixel *pix_uv, intptr_t stride_uv, int mb_x );
+    void (*prefetch_fenc_420)( pixel *pix_y, intptr_t stride_y, pixel *pix_uv, intptr_t stride_uv, int mb_x );
+    void (*prefetch_fenc_422)( pixel *pix_y, intptr_t stride_y, pixel *pix_uv, intptr_t stride_uv, int mb_x );
      /* prefetch the next few macroblocks of a hpel reference frame */
-    void (*prefetch_ref)( pixel *pix, int stride, int parity );
+    void (*prefetch_ref)( pixel *pix, intptr_t stride, int parity );
  
      void *(*memcpy_aligned)( void *dst, const void *src, size_t n );
-    void (*memzero_aligned)( void *dst, int n );
+    void (*memzero_aligned)( void *dst, size_t n );
  
      /* successive elimination prefilter */
-    void (*integral_init4h)( uint16_t *sum, pixel *pix, int stride );
-    void (*integral_init8h)( uint16_t *sum, pixel *pix, int stride );
-    void (*integral_init4v)( uint16_t *sum8, uint16_t *sum4, int stride );
-    void (*integral_init8v)( uint16_t *sum8, int stride );
+    void (*integral_init4h)( uint16_t *sum, pixel *pix, intptr_t stride );
+    void (*integral_init8h)( uint16_t *sum, pixel *pix, intptr_t stride );
+    void (*integral_init4v)( uint16_t *sum8, uint16_t *sum4, intptr_t stride );
+    void (*integral_init8v)( uint16_t *sum8, intptr_t stride );
  
      void (*frame_init_lowres_core)( pixel *src0, pixel *dst0, pixel *dsth, pixel *dstv, pixel *dstc,
-                                    int src_stride, int dst_stride, int width, int height );
+                                    intptr_t src_stride, intptr_t dst_stride, int width, int height );
      weight_fn_t *weight;
      weight_fn_t *offsetadd;
      weight_fn_t *offsetsub;
diff --git a/common/pixel.c b/common/pixel.c

index 2946ddcf67a3ba893d18b0673c8fbf05f807222c..03425b566135dc0a0993185395a51ca577f76f40 100644 (file)
--- a/common/pixel.c
+++ b/common/pixel.c
@@ -46,8 +46,8 @@
   * pixel_sad_WxH
   ****************************************************************************/
  #define PIXEL_SAD_C( name, lx, ly ) \
-static int name( pixel *pix1, int i_stride_pix1,  \
-                 pixel *pix2, int i_stride_pix2 ) \
+static int name( pixel *pix1, intptr_t i_stride_pix1,  \
+                 pixel *pix2, intptr_t i_stride_pix2 ) \
  {                                                   \
      int i_sum = 0;                                  \
      for( int y = 0; y < ly; y++ )                   \
@@ -76,8 +76,8 @@ PIXEL_SAD_C( x264_pixel_sad_4x4,    4,  4 )
   * pixel_ssd_WxH
   ****************************************************************************/
  #define PIXEL_SSD_C( name, lx, ly ) \
-static int name( pixel *pix1, int i_stride_pix1,  \
-                 pixel *pix2, int i_stride_pix2 ) \
+static int name( pixel *pix1, intptr_t i_stride_pix1,  \
+                 pixel *pix2, intptr_t i_stride_pix2 ) \
  {                                                   \
      int i_sum = 0;                                  \
      for( int y = 0; y < ly; y++ )                   \
@@ -102,7 +102,8 @@ PIXEL_SSD_C( x264_pixel_ssd_4x16,   4, 16 )
  PIXEL_SSD_C( x264_pixel_ssd_4x8,    4,  8 )
  PIXEL_SSD_C( x264_pixel_ssd_4x4,    4,  4 )
  
-uint64_t x264_pixel_ssd_wxh( x264_pixel_function_t *pf, pixel *pix1, int i_pix1, pixel *pix2, int i_pix2, int i_width, int i_height )
+uint64_t x264_pixel_ssd_wxh( x264_pixel_function_t *pf, pixel *pix1, intptr_t i_pix1,
+                             pixel *pix2, intptr_t i_pix2, int i_width, int i_height )
  {
      uint64_t i_ssd = 0;
      int y;
@@ -142,7 +143,8 @@ uint64_t x264_pixel_ssd_wxh( x264_pixel_function_t *pf, pixel *pix1, int i_pix1,
      return i_ssd;
  }
  
-static void pixel_ssd_nv12_core( pixel *pixuv1, int stride1, pixel *pixuv2, int stride2, int width, int height, uint64_t *ssd_u, uint64_t *ssd_v )
+static void pixel_ssd_nv12_core( pixel *pixuv1, intptr_t stride1, pixel *pixuv2, intptr_t stride2,
+                                 int width, int height, uint64_t *ssd_u, uint64_t *ssd_v )
  {
      *ssd_u = 0, *ssd_v = 0;
      for( int y = 0; y < height; y++, pixuv1+=stride1, pixuv2+=stride2 )
@@ -155,7 +157,8 @@ static void pixel_ssd_nv12_core( pixel *pixuv1, int stride1, pixel *pixuv2, int
          }
  }
  
-void x264_pixel_ssd_nv12( x264_pixel_function_t *pf, pixel *pix1, int i_pix1, pixel *pix2, int i_pix2, int i_width, int i_height, uint64_t *ssd_u, uint64_t *ssd_v )
+void x264_pixel_ssd_nv12( x264_pixel_function_t *pf, pixel *pix1, intptr_t i_pix1, pixel *pix2, intptr_t i_pix2,
+                          int i_width, int i_height, uint64_t *ssd_u, uint64_t *ssd_v )
  {
      pf->ssd_nv12_core( pix1, i_pix1, pix2, i_pix2, i_width&~7, i_height, ssd_u, ssd_v );
      if( i_width&7 )
@@ -171,7 +174,7 @@ void x264_pixel_ssd_nv12( x264_pixel_function_t *pf, pixel *pix1, int i_pix1, pi
   * pixel_var_wxh
   ****************************************************************************/
  #define PIXEL_VAR_C( name, w, h ) \
-static uint64_t name( pixel *pix, int i_stride ) \
+static uint64_t name( pixel *pix, intptr_t i_stride ) \
  {                                             \
      uint32_t sum = 0, sqr = 0;                \
      for( int y = 0; y < h; y++ )              \
@@ -194,7 +197,7 @@ PIXEL_VAR_C( x264_pixel_var_8x8,    8,  8 )
   * pixel_var2_wxh
   ****************************************************************************/
  #define PIXEL_VAR2_C( name, w, h, shift ) \
-static int name( pixel *pix1, int i_stride1, pixel *pix2, int i_stride2, int *ssd ) \
+static int name( pixel *pix1, intptr_t i_stride1, pixel *pix2, intptr_t i_stride2, int *ssd ) \
  { \
      uint32_t var = 0, sum = 0, sqr = 0; \
      for( int y = 0; y < h; y++ ) \
@@ -249,7 +252,7 @@ static ALWAYS_INLINE sum2_t abs2( sum2_t a )
   * pixel_satd_WxH: sum of 4x4 Hadamard transformed differences
   ****************************************************************************/
  
-static NOINLINE int x264_pixel_satd_4x4( pixel *pix1, int i_pix1, pixel *pix2, int i_pix2 )
+static NOINLINE int x264_pixel_satd_4x4( pixel *pix1, intptr_t i_pix1, pixel *pix2, intptr_t i_pix2 )
  {
      sum2_t tmp[4][2];
      sum2_t a0, a1, a2, a3, b0, b1;
@@ -274,7 +277,7 @@ static NOINLINE int x264_pixel_satd_4x4( pixel *pix1, int i_pix1, pixel *pix2, i
      return sum >> 1;
  }
  
-static NOINLINE int x264_pixel_satd_8x4( pixel *pix1, int i_pix1, pixel *pix2, int i_pix2 )
+static NOINLINE int x264_pixel_satd_8x4( pixel *pix1, intptr_t i_pix1, pixel *pix2, intptr_t i_pix2 )
  {
      sum2_t tmp[4][4];
      sum2_t a0, a1, a2, a3;
@@ -296,7 +299,7 @@ static NOINLINE int x264_pixel_satd_8x4( pixel *pix1, int i_pix1, pixel *pix2, i
  }
  
  #define PIXEL_SATD_C( w, h, sub )\
-static int x264_pixel_satd_##w##x##h( pixel *pix1, int i_pix1, pixel *pix2, int i_pix2 )\
+static int x264_pixel_satd_##w##x##h( pixel *pix1, intptr_t i_pix1, pixel *pix2, intptr_t i_pix2 )\
  {\
      int sum = sub( pix1, i_pix1, pix2, i_pix2 )\
              + sub( pix1+4*i_pix1, i_pix1, pix2+4*i_pix2, i_pix2 );\
@@ -318,7 +321,7 @@ PIXEL_SATD_C( 8,  8,  x264_pixel_satd_8x4 )
  PIXEL_SATD_C( 4,  16, x264_pixel_satd_4x4 )
  PIXEL_SATD_C( 4,  8,  x264_pixel_satd_4x4 )
  
-static NOINLINE int sa8d_8x8( pixel *pix1, int i_pix1, pixel *pix2, int i_pix2 )
+static NOINLINE int sa8d_8x8( pixel *pix1, intptr_t i_pix1, pixel *pix2, intptr_t i_pix2 )
  {
      sum2_t tmp[8][4];
      sum2_t a0, a1, a2, a3, a4, a5, a6, a7, b0, b1, b2, b3;
@@ -352,13 +355,13 @@ static NOINLINE int sa8d_8x8( pixel *pix1, int i_pix1, pixel *pix2, int i_pix2 )
      return sum;
  }
  
-static int x264_pixel_sa8d_8x8( pixel *pix1, int i_pix1, pixel *pix2, int i_pix2 )
+static int x264_pixel_sa8d_8x8( pixel *pix1, intptr_t i_pix1, pixel *pix2, intptr_t i_pix2 )
  {
      int sum = sa8d_8x8( pix1, i_pix1, pix2, i_pix2 );
      return (sum+2)>>2;
  }
  
-static int x264_pixel_sa8d_16x16( pixel *pix1, int i_pix1, pixel *pix2, int i_pix2 )
+static int x264_pixel_sa8d_16x16( pixel *pix1, intptr_t i_pix1, pixel *pix2, intptr_t i_pix2 )
  {
      int sum = sa8d_8x8( pix1, i_pix1, pix2, i_pix2 )
              + sa8d_8x8( pix1+8, i_pix1, pix2+8, i_pix2 )
@@ -368,7 +371,7 @@ static int x264_pixel_sa8d_16x16( pixel *pix1, int i_pix1, pixel *pix2, int i_pi
  }
  
  
-static NOINLINE uint64_t pixel_hadamard_ac( pixel *pix, int stride )
+static NOINLINE uint64_t pixel_hadamard_ac( pixel *pix, intptr_t stride )
  {
      sum2_t tmp[32];
      sum2_t a0, a1, a2, a3, dc;
@@ -406,7 +409,7 @@ static NOINLINE uint64_t pixel_hadamard_ac( pixel *pix, int stride )
  }
  
  #define HADAMARD_AC(w,h) \
-static uint64_t x264_pixel_hadamard_ac_##w##x##h( pixel *pix, int stride )\
+static uint64_t x264_pixel_hadamard_ac_##w##x##h( pixel *pix, intptr_t stride )\
  {\
      uint64_t sum = pixel_hadamard_ac( pix, stride );\
      if( w==16 )\
@@ -427,13 +430,15 @@ HADAMARD_AC( 8, 8 )
   * pixel_sad_x4
   ****************************************************************************/
  #define SAD_X( size ) \
-static void x264_pixel_sad_x3_##size( pixel *fenc, pixel *pix0, pixel *pix1, pixel *pix2, int i_stride, int scores[3] )\
+static void x264_pixel_sad_x3_##size( pixel *fenc, pixel *pix0, pixel *pix1, pixel *pix2,\
+                                      intptr_t i_stride, int scores[3] )\
  {\
      scores[0] = x264_pixel_sad_##size( fenc, FENC_STRIDE, pix0, i_stride );\
      scores[1] = x264_pixel_sad_##size( fenc, FENC_STRIDE, pix1, i_stride );\
      scores[2] = x264_pixel_sad_##size( fenc, FENC_STRIDE, pix2, i_stride );\
  }\
-static void x264_pixel_sad_x4_##size( pixel *fenc, pixel *pix0, pixel *pix1, pixel *pix2, pixel *pix3, int i_stride, int scores[4] )\
+static void x264_pixel_sad_x4_##size( pixel *fenc, pixel *pix0, pixel *pix1,pixel *pix2, pixel *pix3,\
+                                      intptr_t i_stride, int scores[4] )\
  {\
      scores[0] = x264_pixel_sad_##size( fenc, FENC_STRIDE, pix0, i_stride );\
      scores[1] = x264_pixel_sad_##size( fenc, FENC_STRIDE, pix1, i_stride );\
@@ -464,13 +469,15 @@ SAD_X( 8x8_vis )
   ****************************************************************************/
  
  #define SATD_X( size, cpu ) \
-static void x264_pixel_satd_x3_##size##cpu( pixel *fenc, pixel *pix0, pixel *pix1, pixel *pix2, int i_stride, int scores[3] )\
+static void x264_pixel_satd_x3_##size##cpu( pixel *fenc, pixel *pix0, pixel *pix1, pixel *pix2,\
+                                            intptr_t i_stride, int scores[3] )\
  {\
      scores[0] = x264_pixel_satd_##size##cpu( fenc, FENC_STRIDE, pix0, i_stride );\
      scores[1] = x264_pixel_satd_##size##cpu( fenc, FENC_STRIDE, pix1, i_stride );\
      scores[2] = x264_pixel_satd_##size##cpu( fenc, FENC_STRIDE, pix2, i_stride );\
  }\
-static void x264_pixel_satd_x4_##size##cpu( pixel *fenc, pixel *pix0, pixel *pix1, pixel *pix2, pixel *pix3, int i_stride, int scores[4] )\
+static void x264_pixel_satd_x4_##size##cpu( pixel *fenc, pixel *pix0, pixel *pix1, pixel *pix2, pixel *pix3,\
+                                            intptr_t i_stride, int scores[4] )\
  {\
      scores[0] = x264_pixel_satd_##size##cpu( fenc, FENC_STRIDE, pix0, i_stride );\
      scores[1] = x264_pixel_satd_##size##cpu( fenc, FENC_STRIDE, pix1, i_stride );\
@@ -576,9 +583,9 @@ INTRA_MBCMP(satd,  8x16, dc, h,  v, c, _xop, _mmx2 )
  /****************************************************************************
   * structural similarity metric
   ****************************************************************************/
-static void ssim_4x4x2_core( const pixel *pix1, int stride1,
-                             const pixel *pix2, int stride2,
-                             int sums[2][4])
+static void ssim_4x4x2_core( const pixel *pix1, intptr_t stride1,
+                             const pixel *pix2, intptr_t stride2,
+                             int sums[2][4] )
  {
      for( int z = 0; z < 2; z++ )
      {
@@ -640,8 +647,8 @@ static float ssim_end4( int sum0[5][4], int sum1[5][4], int width )
  }
  
  float x264_pixel_ssim_wxh( x264_pixel_function_t *pf,
-                           pixel *pix1, int stride1,
-                           pixel *pix2, int stride2,
+                           pixel *pix1, intptr_t stride1,
+                           pixel *pix2, intptr_t stride2,
                             int width, int height, void *buf, int *cnt )
  {
      int z = 0;
@@ -665,7 +672,7 @@ float x264_pixel_ssim_wxh( x264_pixel_function_t *pf,
      return ssim;
  }
  
-static int pixel_vsad( pixel *src, int stride, int height )
+static int pixel_vsad( pixel *src, intptr_t stride, int height )
  {
      int score = 0;
      for( int i = 1; i < height; i++, src += stride )
diff --git a/common/pixel.h b/common/pixel.h

index b39357268971c9193a6be55b51f84d9e8b54695f..50589137efd867a2dccdb864556a824114f7b740 100644 (file)
--- a/common/pixel.h
+++ b/common/pixel.h
@@ -30,9 +30,9 @@
  
  // SSD assumes all args aligned
  // other cmp functions assume first arg aligned
-typedef int  (*x264_pixel_cmp_t) ( pixel *, int, pixel *, int );
-typedef void (*x264_pixel_cmp_x3_t) ( pixel *, pixel *, pixel *, pixel *, int, int[3] );
-typedef void (*x264_pixel_cmp_x4_t) ( pixel *, pixel *, pixel *, pixel *, pixel *, int, int[4] );
+typedef int  (*x264_pixel_cmp_t) ( pixel *, intptr_t, pixel *, intptr_t );
+typedef void (*x264_pixel_cmp_x3_t) ( pixel *, pixel *, pixel *, pixel *, intptr_t, int[3] );
+typedef void (*x264_pixel_cmp_x4_t) ( pixel *, pixel *, pixel *, pixel *, pixel *, intptr_t, int[4] );
  
  enum
  {
@@ -88,18 +88,18 @@ typedef struct
      x264_pixel_cmp_x3_t fpelcmp_x3[7];
      x264_pixel_cmp_x4_t fpelcmp_x4[7];
      x264_pixel_cmp_t sad_aligned[8]; /* Aligned SAD for mbcmp */
-    int (*vsad)( pixel *, int, int );
+    int (*vsad)( pixel *, intptr_t, int );
  
-    uint64_t (*var[4])( pixel *pix, int stride );
-    int (*var2[4])( pixel *pix1, int stride1,
-                    pixel *pix2, int stride2, int *ssd );
-    uint64_t (*hadamard_ac[4])( pixel *pix, int stride );
+    uint64_t (*var[4])( pixel *pix, intptr_t stride );
+    int (*var2[4])( pixel *pix1, intptr_t stride1,
+                    pixel *pix2, intptr_t stride2, int *ssd );
+    uint64_t (*hadamard_ac[4])( pixel *pix, intptr_t stride );
  
-    void (*ssd_nv12_core)( pixel *pixuv1, int stride1,
-                           pixel *pixuv2, int stride2, int width, int height,
+    void (*ssd_nv12_core)( pixel *pixuv1, intptr_t stride1,
+                           pixel *pixuv2, intptr_t stride2, int width, int height,
                             uint64_t *ssd_u, uint64_t *ssd_v );
-    void (*ssim_4x4x2_core)( const pixel *pix1, int stride1,
-                             const pixel *pix2, int stride2, int sums[2][4] );
+    void (*ssim_4x4x2_core)( const pixel *pix1, intptr_t stride1,
+                             const pixel *pix2, intptr_t stride2, int sums[2][4] );
      float (*ssim_end4)( int sum0[5][4], int sum1[5][4], int width );
  
      /* multiple parallel calls to cmp. */
@@ -143,9 +143,12 @@ typedef struct
  } x264_pixel_function_t;
  
  void x264_pixel_init( int cpu, x264_pixel_function_t *pixf );
-void x264_pixel_ssd_nv12( x264_pixel_function_t *pf, pixel *pix1, int i_pix1, pixel *pix2, int i_pix2, int i_width, int i_height, uint64_t *ssd_u, uint64_t *ssd_v );
-uint64_t x264_pixel_ssd_wxh( x264_pixel_function_t *pf, pixel *pix1, int i_pix1, pixel *pix2, int i_pix2, int i_width, int i_height );
-float x264_pixel_ssim_wxh( x264_pixel_function_t *pf, pixel *pix1, int i_pix1, pixel *pix2, int i_pix2, int i_width, int i_height, void *buf, int *cnt );
+void x264_pixel_ssd_nv12   ( x264_pixel_function_t *pf, pixel *pix1, intptr_t i_pix1, pixel *pix2, intptr_t i_pix2,
+                             int i_width, int i_height, uint64_t *ssd_u, uint64_t *ssd_v );
+uint64_t x264_pixel_ssd_wxh( x264_pixel_function_t *pf, pixel *pix1, intptr_t i_pix1, pixel *pix2, intptr_t i_pix2,
+                             int i_width, int i_height );
+float x264_pixel_ssim_wxh  ( x264_pixel_function_t *pf, pixel *pix1, intptr_t i_pix1, pixel *pix2, intptr_t i_pix2,
+                             int i_width, int i_height, void *buf, int *cnt );
  int x264_field_vsad( x264_t *h, int mb_x, int mb_y );
  
  #endif
diff --git a/common/ppc/deblock.c b/common/ppc/deblock.c

index a9e862e3b6f76b3e2c73d4660a692a1cf8b2f60a..dea872ba0a6f5c06f6692140ddd5455ff2595ff2 100644 (file)
--- a/common/ppc/deblock.c
+++ b/common/ppc/deblock.c
@@ -267,7 +267,7 @@ static inline vec_u8_t h264_deblock_q1( register vec_u8_t p0, register vec_u8_t
      q1 = newq1;                                                                              \
  }
  
-void x264_deblock_v_luma_altivec( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 )
+void x264_deblock_v_luma_altivec( uint8_t *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 )
  {
      if( (tc0[0] & tc0[1] & tc0[2] & tc0[3]) >= 0 )
      {
@@ -285,7 +285,7 @@ void x264_deblock_v_luma_altivec( uint8_t *pix, int stride, int alpha, int beta,
      }
  }
  
-void x264_deblock_h_luma_altivec( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 )
+void x264_deblock_h_luma_altivec( uint8_t *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 )
  {
  
      register vec_u8_t line0, line1, line2, line3, line4, line5;
diff --git a/common/ppc/mc.c b/common/ppc/mc.c

index 0fc735c20171146c4e2c4409974ac3c3f9b711d1..2e720f47a8cd60cabf09b51a20ac910953c1a8da 100644 (file)
--- a/common/ppc/mc.c
+++ b/common/ppc/mc.c
@@ -37,8 +37,8 @@
  #include "ppccommon.h"
  
  #if !HIGH_BIT_DEPTH
-typedef void (*pf_mc_t)( uint8_t *src, int i_src,
-                         uint8_t *dst, int i_dst, int i_height );
+typedef void (*pf_mc_t)( uint8_t *src, intptr_t i_src,
+                         uint8_t *dst, intptr_t i_dst, int i_height );
  
  
  static const uint8_t hpel_ref0[16] = {0,1,1,1,0,1,1,1,2,3,3,3,0,1,1,1};
@@ -58,8 +58,8 @@ static inline int x264_tapfilter1( uint8_t *pix )
  }
  
  
-static inline void x264_pixel_avg2_w4_altivec( uint8_t *dst,  int i_dst,
-                                               uint8_t *src1, int i_src1,
+static inline void x264_pixel_avg2_w4_altivec( uint8_t *dst,  intptr_t i_dst,
+                                               uint8_t *src1, intptr_t i_src1,
                                                 uint8_t *src2, int i_height )
  {
      for( int y = 0; y < i_height; y++ )
@@ -72,8 +72,8 @@ static inline void x264_pixel_avg2_w4_altivec( uint8_t *dst,  int i_dst,
      }
  }
  
-static inline void x264_pixel_avg2_w8_altivec( uint8_t *dst,  int i_dst,
-                                               uint8_t *src1, int i_src1,
+static inline void x264_pixel_avg2_w8_altivec( uint8_t *dst,  intptr_t i_dst,
+                                               uint8_t *src1, intptr_t i_src1,
                                                 uint8_t *src2, int i_height )
  {
      vec_u8_t src1v, src2v;
@@ -95,8 +95,8 @@ static inline void x264_pixel_avg2_w8_altivec( uint8_t *dst,  int i_dst,
      }
  }
  
-static inline void x264_pixel_avg2_w16_altivec( uint8_t *dst,  int i_dst,
-                                                uint8_t *src1, int i_src1,
+static inline void x264_pixel_avg2_w16_altivec( uint8_t *dst,  intptr_t i_dst,
+                                                uint8_t *src1, intptr_t i_src1,
                                                  uint8_t *src2, int i_height )
  {
      vec_u8_t src1v, src2v;
@@ -117,8 +117,8 @@ static inline void x264_pixel_avg2_w16_altivec( uint8_t *dst,  int i_dst,
      }
  }
  
-static inline void x264_pixel_avg2_w20_altivec( uint8_t *dst,  int i_dst,
-                                                uint8_t *src1, int i_src1,
+static inline void x264_pixel_avg2_w20_altivec( uint8_t *dst,  intptr_t i_dst,
+                                                uint8_t *src1, intptr_t i_src1,
                                                  uint8_t *src2, int i_height )
  {
      x264_pixel_avg2_w16_altivec(dst, i_dst, src1, i_src1, src2, i_height);
@@ -128,8 +128,8 @@ static inline void x264_pixel_avg2_w20_altivec( uint8_t *dst,  int i_dst,
  /* mc_copy: plain c */
  
  #define MC_COPY( name, a )                                \
-static void name( uint8_t *dst, int i_dst,                \
-                  uint8_t *src, int i_src, int i_height ) \
+static void name( uint8_t *dst, intptr_t i_dst,           \
+                  uint8_t *src, intptr_t i_src, int i_height ) \
  {                                                         \
      int y;                                                \
      for( y = 0; y < i_height; y++ )                       \
@@ -142,14 +142,14 @@ static void name( uint8_t *dst, int i_dst,                \
  MC_COPY( x264_mc_copy_w4_altivec,  4  )
  MC_COPY( x264_mc_copy_w8_altivec,  8  )
  
-static void x264_mc_copy_w16_altivec( uint8_t *dst, int i_dst,
-                                      uint8_t *src, int i_src, int i_height )
+static void x264_mc_copy_w16_altivec( uint8_t *dst, intptr_t i_dst,
+                                      uint8_t *src, intptr_t i_src, int i_height )
  {
      vec_u8_t cpyV;
      PREP_LOAD;
      PREP_LOAD_SRC( src );
  
-    for( int y = 0; y < i_height; y++)
+    for( int y = 0; y < i_height; y++ )
      {
          VEC_LOAD( src, cpyV, 16, vec_u8_t, src );
          vec_st(cpyV, 0, dst);
@@ -160,12 +160,12 @@ static void x264_mc_copy_w16_altivec( uint8_t *dst, int i_dst,
  }
  
  
-static void x264_mc_copy_w16_aligned_altivec( uint8_t *dst, int i_dst,
-                                              uint8_t *src, int i_src, int i_height )
+static void x264_mc_copy_w16_aligned_altivec( uint8_t *dst, intptr_t i_dst,
+                                              uint8_t *src, intptr_t i_src, int i_height )
  {
-    for( int y = 0; y < i_height; ++y)
+    for( int y = 0; y < i_height; ++y )
      {
-        vec_u8_t cpyV = vec_ld( 0, src);
+        vec_u8_t cpyV = vec_ld( 0, src );
          vec_st(cpyV, 0, dst);
  
          src += i_src;
@@ -174,13 +174,13 @@ static void x264_mc_copy_w16_aligned_altivec( uint8_t *dst, int i_dst,
  }
  
  
-static void mc_luma_altivec( uint8_t *dst,    int i_dst_stride,
-                             uint8_t *src[4], int i_src_stride,
+static void mc_luma_altivec( uint8_t *dst,    intptr_t i_dst_stride,
+                             uint8_t *src[4], intptr_t i_src_stride,
                               int mvx, int mvy,
                               int i_width, int i_height, const x264_weight_t *weight )
  {
      int qpel_idx = ((mvy&3)<<2) + (mvx&3);
-    int offset = (mvy>>2)*i_src_stride + (mvx>>2);
+    intptr_t offset = (mvy>>2)*i_src_stride + (mvx>>2);
      uint8_t *src1 = src[hpel_ref0[qpel_idx]] + offset + ((mvy&3) == 3) * i_src_stride;
      if( qpel_idx & 5 ) /* qpel interpolation needed */
      {
@@ -222,13 +222,13 @@ static void mc_luma_altivec( uint8_t *dst,    int i_dst_stride,
  
  
  
-static uint8_t *get_ref_altivec( uint8_t *dst,   int *i_dst_stride,
-                                 uint8_t *src[4], int i_src_stride,
+static uint8_t *get_ref_altivec( uint8_t *dst,   intptr_t *i_dst_stride,
+                                 uint8_t *src[4], intptr_t i_src_stride,
                                   int mvx, int mvy,
                                   int i_width, int i_height, const x264_weight_t *weight )
  {
      int qpel_idx = ((mvy&3)<<2) + (mvx&3);
-    int offset = (mvy>>2)*i_src_stride + (mvx>>2);
+    intptr_t offset = (mvy>>2)*i_src_stride + (mvx>>2);
      uint8_t *src1 = src[hpel_ref0[qpel_idx]] + offset + ((mvy&3) == 3) * i_src_stride;
      if( qpel_idx & 5 ) /* qpel interpolation needed */
      {
@@ -266,10 +266,9 @@ static uint8_t *get_ref_altivec( uint8_t *dst,   int *i_dst_stride,
      }
  }
  
-static void mc_chroma_2xh( uint8_t *dstu, uint8_t *dstv, int i_dst_stride,
-                           uint8_t *src, int i_src_stride,
-                           int mvx, int mvy,
-                           int i_height )
+static void mc_chroma_2xh( uint8_t *dstu, uint8_t *dstv, intptr_t i_dst_stride,
+                           uint8_t *src, intptr_t i_src_stride,
+                           int mvx, int mvy, int i_height )
  {
      uint8_t *srcp;
      int d8x = mvx&0x07;
@@ -297,10 +296,9 @@ static void mc_chroma_2xh( uint8_t *dstu, uint8_t *dstv, int i_dst_stride,
      }
   }
  
-static void mc_chroma_altivec_4xh( uint8_t *dstu, uint8_t *dstv, int i_dst_stride,
-                                   uint8_t *src, int i_src_stride,
-                                   int mvx, int mvy,
-                                   int i_height )
+static void mc_chroma_altivec_4xh( uint8_t *dstu, uint8_t *dstv, intptr_t i_dst_stride,
+                                   uint8_t *src, intptr_t i_src_stride,
+                                   int mvx, int mvy, int i_height )
  {
      uint8_t *srcp;
      int d8x = mvx & 0x07;
@@ -386,10 +384,9 @@ static void mc_chroma_altivec_4xh( uint8_t *dstu, uint8_t *dstv, int i_dst_strid
      }
  }
  
-static void mc_chroma_altivec_8xh( uint8_t *dstu, uint8_t *dstv, int i_dst_stride,
-                                   uint8_t *src, int i_src_stride,
-                                   int mvx, int mvy,
-                                   int i_height )
+static void mc_chroma_altivec_8xh( uint8_t *dstu, uint8_t *dstv, intptr_t i_dst_stride,
+                                   uint8_t *src, intptr_t i_src_stride,
+                                   int mvx, int mvy, int i_height )
  {
      uint8_t *srcp;
      int d8x = mvx & 0x07;
@@ -510,10 +507,9 @@ static void mc_chroma_altivec_8xh( uint8_t *dstu, uint8_t *dstv, int i_dst_strid
      }
  }
  
-static void mc_chroma_altivec( uint8_t *dstu, uint8_t *dstv, int i_dst_stride,
-                               uint8_t *src, int i_src_stride,
-                               int mvx, int mvy,
-                               int i_width, int i_height )
+static void mc_chroma_altivec( uint8_t *dstu, uint8_t *dstv, intptr_t i_dst_stride,
+                               uint8_t *src, intptr_t i_src_stride,
+                               int mvx, int mvy, int i_width, int i_height )
  {
      if( i_width == 8 )
          mc_chroma_altivec_8xh( dstu, dstv, i_dst_stride, src, i_src_stride,
@@ -670,7 +666,7 @@ static void mc_chroma_altivec( uint8_t *dstu, uint8_t *dstv, int i_dst_stride,
  }
  
  void x264_hpel_filter_altivec( uint8_t *dsth, uint8_t *dstv, uint8_t *dstc, uint8_t *src,
-                               int i_stride, int i_width, int i_height, int16_t *buf )
+                               intptr_t i_stride, int i_width, int i_height, int16_t *buf )
  {
      vec_u8_t destv;
      vec_u8_t src1v, src2v, src3v, src4v, src5v, src6v;
@@ -765,7 +761,7 @@ void x264_hpel_filter_altivec( uint8_t *dsth, uint8_t *dstv, uint8_t *dstc, uint
  }
  
  static void frame_init_lowres_core_altivec( uint8_t *src0, uint8_t *dst0, uint8_t *dsth, uint8_t *dstv, uint8_t *dstc,
-                                           int src_stride, int dst_stride, int width, int height )
+                                            intptr_t src_stride, intptr_t dst_stride, int width, int height )
  {
      int w = width >> 4;
      int end = (width & 15);
@@ -857,7 +853,7 @@ static void frame_init_lowres_core_altivec( uint8_t *src0, uint8_t *dst0, uint8_
      }
  }
  
-static void mc_weight_w2_altivec( uint8_t *dst, int i_dst, uint8_t *src, int i_src,
+static void mc_weight_w2_altivec( uint8_t *dst, intptr_t i_dst, uint8_t *src, intptr_t i_src,
                                    const x264_weight_t *weight, int i_height )
  {
      LOAD_ZERO;
@@ -911,7 +907,7 @@ static void mc_weight_w2_altivec( uint8_t *dst, int i_dst, uint8_t *src, int i_s
          }
      }
  }
-static void mc_weight_w4_altivec( uint8_t *dst, int i_dst, uint8_t *src, int i_src,
+static void mc_weight_w4_altivec( uint8_t *dst, intptr_t i_dst, uint8_t *src, intptr_t i_src,
                                    const x264_weight_t *weight, int i_height )
  {
      LOAD_ZERO;
@@ -965,7 +961,7 @@ static void mc_weight_w4_altivec( uint8_t *dst, int i_dst, uint8_t *src, int i_s
          }
      }
  }
-static void mc_weight_w8_altivec( uint8_t *dst, int i_dst, uint8_t *src, int i_src,
+static void mc_weight_w8_altivec( uint8_t *dst, intptr_t i_dst, uint8_t *src, intptr_t i_src,
                                    const x264_weight_t *weight, int i_height )
  {
      LOAD_ZERO;
@@ -1020,7 +1016,7 @@ static void mc_weight_w8_altivec( uint8_t *dst, int i_dst, uint8_t *src, int i_s
          }
      }
  }
-static void mc_weight_w16_altivec( uint8_t *dst, int i_dst, uint8_t *src, int i_src,
+static void mc_weight_w16_altivec( uint8_t *dst, intptr_t i_dst, uint8_t *src, intptr_t i_src,
                                     const x264_weight_t *weight, int i_height )
  {
      LOAD_ZERO;
@@ -1080,7 +1076,7 @@ static void mc_weight_w16_altivec( uint8_t *dst, int i_dst, uint8_t *src, int i_
          }
      }
  }
-static void mc_weight_w20_altivec( uint8_t *dst, int i_dst, uint8_t *src, int i_src,
+static void mc_weight_w20_altivec( uint8_t *dst, intptr_t i_dst, uint8_t *src, intptr_t i_src,
                                     const x264_weight_t *weight, int i_height )
  {
      LOAD_ZERO;
diff --git a/common/ppc/pixel.c b/common/ppc/pixel.c

index b60bfaf086da0d1bcd6bde0b132783f1519ca92d..585bc1972655e342e045a9f8004bda2661dc572c 100644 (file)
--- a/common/ppc/pixel.c
+++ b/common/ppc/pixel.c
@@ -34,8 +34,8 @@
   **********************************************************************/
  
  #define PIXEL_SAD_ALTIVEC( name, lx, ly, a, b )        \
-static int name( uint8_t *pix1, int i_pix1,            \
-                 uint8_t *pix2, int i_pix2 )           \
+static int name( uint8_t *pix1, intptr_t i_pix1,       \
+                 uint8_t *pix2, intptr_t i_pix2 )      \
  {                                                      \
      ALIGNED_16( int sum );                             \
                                                         \
@@ -119,8 +119,8 @@ PIXEL_SAD_ALTIVEC( pixel_sad_8x8_altivec,   8,  8,  2s, 1 )
  /***********************************************************************
   * SATD 4x4
   **********************************************************************/
-static int pixel_satd_4x4_altivec( uint8_t *pix1, int i_pix1,
-                                   uint8_t *pix2, int i_pix2 )
+static int pixel_satd_4x4_altivec( uint8_t *pix1, intptr_t i_pix1,
+                                   uint8_t *pix2, intptr_t i_pix2 )
  {
      ALIGNED_16( int i_satd );
  
@@ -164,8 +164,8 @@ static int pixel_satd_4x4_altivec( uint8_t *pix1, int i_pix1,
  /***********************************************************************
   * SATD 4x8
   **********************************************************************/
-static int pixel_satd_4x8_altivec( uint8_t *pix1, int i_pix1,
-                                   uint8_t *pix2, int i_pix2 )
+static int pixel_satd_4x8_altivec( uint8_t *pix1, intptr_t i_pix1,
+                                   uint8_t *pix2, intptr_t i_pix2 )
  {
      ALIGNED_16( int i_satd );
  
@@ -218,8 +218,8 @@ static int pixel_satd_4x8_altivec( uint8_t *pix1, int i_pix1,
  /***********************************************************************
   * SATD 8x4
   **********************************************************************/
-static int pixel_satd_8x4_altivec( uint8_t *pix1, int i_pix1,
-                                   uint8_t *pix2, int i_pix2 )
+static int pixel_satd_8x4_altivec( uint8_t *pix1, intptr_t i_pix1,
+                                   uint8_t *pix2, intptr_t i_pix2 )
  {
      ALIGNED_16( int i_satd );
  
@@ -272,8 +272,8 @@ static int pixel_satd_8x4_altivec( uint8_t *pix1, int i_pix1,
  /***********************************************************************
   * SATD 8x8
   **********************************************************************/
-static int pixel_satd_8x8_altivec( uint8_t *pix1, int i_pix1,
-                                   uint8_t *pix2, int i_pix2 )
+static int pixel_satd_8x8_altivec( uint8_t *pix1, intptr_t i_pix1,
+                                   uint8_t *pix2, intptr_t i_pix2 )
  {
      ALIGNED_16( int i_satd );
  
@@ -332,8 +332,8 @@ static int pixel_satd_8x8_altivec( uint8_t *pix1, int i_pix1,
  /***********************************************************************
   * SATD 8x16
   **********************************************************************/
-static int pixel_satd_8x16_altivec( uint8_t *pix1, int i_pix1,
-                                    uint8_t *pix2, int i_pix2 )
+static int pixel_satd_8x16_altivec( uint8_t *pix1, intptr_t i_pix1,
+                                    uint8_t *pix2, intptr_t i_pix2 )
  {
      ALIGNED_16( int i_satd );
  
@@ -416,8 +416,8 @@ static int pixel_satd_8x16_altivec( uint8_t *pix1, int i_pix1,
  /***********************************************************************
   * SATD 16x8
   **********************************************************************/
-static int pixel_satd_16x8_altivec( uint8_t *pix1, int i_pix1,
-                                    uint8_t *pix2, int i_pix2 )
+static int pixel_satd_16x8_altivec( uint8_t *pix1, intptr_t i_pix1,
+                                    uint8_t *pix2, intptr_t i_pix2 )
  {
      ALIGNED_16( int i_satd );
  
@@ -500,8 +500,8 @@ static int pixel_satd_16x8_altivec( uint8_t *pix1, int i_pix1,
  /***********************************************************************
   * SATD 16x16
   **********************************************************************/
-static int pixel_satd_16x16_altivec( uint8_t *pix1, int i_pix1,
-                                     uint8_t *pix2, int i_pix2 )
+static int pixel_satd_16x16_altivec( uint8_t *pix1, intptr_t i_pix1,
+                                     uint8_t *pix2, intptr_t i_pix2 )
  {
      ALIGNED_16( int i_satd );
  
@@ -632,7 +632,7 @@ static int pixel_satd_16x16_altivec( uint8_t *pix1, int i_pix1,
  static void pixel_sad_x4_16x16_altivec( uint8_t *fenc,
                                          uint8_t *pix0, uint8_t *pix1,
                                          uint8_t *pix2, uint8_t *pix3,
-                                        int i_stride, int scores[4] )
+                                        intptr_t i_stride, int scores[4] )
  {
      ALIGNED_16( int sum0 );
      ALIGNED_16( int sum1 );
@@ -744,7 +744,7 @@ static void pixel_sad_x4_16x16_altivec( uint8_t *fenc,
  
  static void pixel_sad_x3_16x16_altivec( uint8_t *fenc, uint8_t *pix0,
                                          uint8_t *pix1, uint8_t *pix2,
-                                        int i_stride, int scores[3] )
+                                        intptr_t i_stride, int scores[3] )
  {
      ALIGNED_16( int sum0 );
      ALIGNED_16( int sum1 );
@@ -834,7 +834,8 @@ static void pixel_sad_x3_16x16_altivec( uint8_t *fenc, uint8_t *pix0,
      scores[2] = sum2;
  }
  
-static void pixel_sad_x4_16x8_altivec( uint8_t *fenc, uint8_t *pix0, uint8_t *pix1, uint8_t *pix2, uint8_t *pix3, int i_stride, int scores[4] )
+static void pixel_sad_x4_16x8_altivec( uint8_t *fenc, uint8_t *pix0, uint8_t *pix1, uint8_t *pix2,
+                                       uint8_t *pix3, intptr_t i_stride, int scores[4] )
  {
      ALIGNED_16( int sum0 );
      ALIGNED_16( int sum1 );
@@ -945,7 +946,7 @@ static void pixel_sad_x4_16x8_altivec( uint8_t *fenc, uint8_t *pix0, uint8_t *pi
  
  static void pixel_sad_x3_16x8_altivec( uint8_t *fenc, uint8_t *pix0,
                                         uint8_t *pix1, uint8_t *pix2,
-                                       int i_stride, int scores[3] )
+                                       intptr_t i_stride, int scores[3] )
  {
      ALIGNED_16( int sum0 );
      ALIGNED_16( int sum1 );
@@ -1038,7 +1039,7 @@ static void pixel_sad_x3_16x8_altivec( uint8_t *fenc, uint8_t *pix0,
  static void pixel_sad_x4_8x16_altivec( uint8_t *fenc,
                                         uint8_t *pix0, uint8_t *pix1,
                                         uint8_t *pix2, uint8_t *pix3,
-                                       int i_stride, int scores[4] )
+                                       intptr_t i_stride, int scores[4] )
  {
      ALIGNED_16( int sum0 );
      ALIGNED_16( int sum1 );
@@ -1152,7 +1153,7 @@ static void pixel_sad_x4_8x16_altivec( uint8_t *fenc,
  
  static void pixel_sad_x3_8x16_altivec( uint8_t *fenc, uint8_t *pix0,
                                         uint8_t *pix1, uint8_t *pix2,
-                                       int i_stride, int scores[3] )
+                                       intptr_t i_stride, int scores[3] )
  {
      ALIGNED_16( int sum0 );
      ALIGNED_16( int sum1 );
@@ -1247,7 +1248,7 @@ static void pixel_sad_x3_8x16_altivec( uint8_t *fenc, uint8_t *pix0,
  static void pixel_sad_x4_8x8_altivec( uint8_t *fenc,
                                        uint8_t *pix0, uint8_t *pix1,
                                        uint8_t *pix2, uint8_t *pix3,
-                                      int i_stride, int scores[4] )
+                                      intptr_t i_stride, int scores[4] )
  {
      ALIGNED_16( int sum0 );
      ALIGNED_16( int sum1 );
@@ -1361,7 +1362,7 @@ static void pixel_sad_x4_8x8_altivec( uint8_t *fenc,
  
  static void pixel_sad_x3_8x8_altivec( uint8_t *fenc, uint8_t *pix0,
                                        uint8_t *pix1, uint8_t *pix2,
-                                      int i_stride, int scores[3] )
+                                      intptr_t i_stride, int scores[3] )
  {
      ALIGNED_16( int sum0 );
      ALIGNED_16( int sum1 );
@@ -1457,8 +1458,8 @@ static void pixel_sad_x3_8x8_altivec( uint8_t *fenc, uint8_t *pix0,
  * SSD routines
  **********************************************************************/
  
-static int pixel_ssd_16x16_altivec ( uint8_t *pix1, int i_stride_pix1,
-                                     uint8_t *pix2, int i_stride_pix2)
+static int pixel_ssd_16x16_altivec ( uint8_t *pix1, intptr_t i_stride_pix1,
+                                     uint8_t *pix2, intptr_t i_stride_pix2 )
  {
      ALIGNED_16( int sum );
  
@@ -1536,8 +1537,8 @@ static int pixel_ssd_16x16_altivec ( uint8_t *pix1, int i_stride_pix1,
      return sum;
  }
  
-static int pixel_ssd_8x8_altivec ( uint8_t *pix1, int i_stride_pix1,
-                                   uint8_t *pix2, int i_stride_pix2)
+static int pixel_ssd_8x8_altivec ( uint8_t *pix1, intptr_t i_stride_pix1,
+                                   uint8_t *pix2, intptr_t i_stride_pix2 )
  {
      ALIGNED_16( int sum );
  
@@ -1588,7 +1589,7 @@ static int pixel_ssd_8x8_altivec ( uint8_t *pix1, int i_stride_pix1,
  /****************************************************************************
   * variance
   ****************************************************************************/
-static uint64_t x264_pixel_var_16x16_altivec( uint8_t *pix, int i_stride )
+static uint64_t x264_pixel_var_16x16_altivec( uint8_t *pix, intptr_t i_stride )
  {
      ALIGNED_16(uint32_t sum_tab[4]);
      ALIGNED_16(uint32_t sqr_tab[4]);
@@ -1615,7 +1616,7 @@ static uint64_t x264_pixel_var_16x16_altivec( uint8_t *pix, int i_stride )
      return sum + ((uint64_t)sqr<<32);
  }
  
-static uint64_t x264_pixel_var_8x8_altivec( uint8_t *pix, int i_stride )
+static uint64_t x264_pixel_var_8x8_altivec( uint8_t *pix, intptr_t i_stride )
  {
      ALIGNED_16(uint32_t sum_tab[4]);
      ALIGNED_16(uint32_t sqr_tab[4]);
@@ -1713,8 +1714,8 @@ static uint64_t x264_pixel_var_8x8_altivec( uint8_t *pix, int i_stride )
      sa8d7v = vec_sub(b6v, b7v);                           \
  }
  
-static int pixel_sa8d_8x8_core_altivec( uint8_t *pix1, int i_pix1,
-                                        uint8_t *pix2, int i_pix2 )
+static int pixel_sa8d_8x8_core_altivec( uint8_t *pix1, intptr_t i_pix1,
+                                        uint8_t *pix2, intptr_t i_pix2 )
  {
      int32_t i_satd=0;
  
@@ -1781,21 +1782,21 @@ static int pixel_sa8d_8x8_core_altivec( uint8_t *pix1, int i_pix1,
      return i_satd;
  }
  
-static int pixel_sa8d_8x8_altivec( uint8_t *pix1, int i_pix1,
-                                   uint8_t *pix2, int i_pix2 )
+static int pixel_sa8d_8x8_altivec( uint8_t *pix1, intptr_t i_pix1,
+                                   uint8_t *pix2, intptr_t i_pix2 )
  {
      int32_t i_satd;
      i_satd = (pixel_sa8d_8x8_core_altivec( pix1, i_pix1, pix2, i_pix2 )+2)>>2;
      return i_satd;
  }
  
-static int pixel_sa8d_16x16_altivec( uint8_t *pix1, int i_pix1,
-                                     uint8_t *pix2, int i_pix2 )
+static int pixel_sa8d_16x16_altivec( uint8_t *pix1, intptr_t i_pix1,
+                                     uint8_t *pix2, intptr_t i_pix2 )
  {
      int32_t i_satd;
  
-    i_satd = (pixel_sa8d_8x8_core_altivec( &pix1[0],     i_pix1, &pix2[0],     i_pix2 )
-            + pixel_sa8d_8x8_core_altivec( &pix1[8],     i_pix1, &pix2[8],     i_pix2 )
+    i_satd = (pixel_sa8d_8x8_core_altivec( &pix1[0],          i_pix1, &pix2[0],          i_pix2 )
+            + pixel_sa8d_8x8_core_altivec( &pix1[8],          i_pix1, &pix2[8],          i_pix2 )
              + pixel_sa8d_8x8_core_altivec( &pix1[8*i_pix1],   i_pix1, &pix2[8*i_pix2],   i_pix2 )
              + pixel_sa8d_8x8_core_altivec( &pix1[8*i_pix1+8], i_pix1, &pix2[8*i_pix2+8], i_pix2 ) +2)>>2;
      return i_satd;
@@ -1817,7 +1818,7 @@ static int pixel_sa8d_16x16_altivec( uint8_t *pix1, int i_pix1,
      vec_s16_t pix16_s##num = (vec_s16_t)vec_perm(pix8_##num, zero_u8v, perm); \
      vec_s16_t pix16_d##num;
  
-static uint64_t pixel_hadamard_ac_altivec( uint8_t *pix, int stride, const vec_u8_t perm )
+static uint64_t pixel_hadamard_ac_altivec( uint8_t *pix, intptr_t stride, const vec_u8_t perm )
  {
      ALIGNED_16( int32_t sum4_tab[4] );
      ALIGNED_16( int32_t sum8_tab[4] );
@@ -1903,7 +1904,7 @@ static const vec_u8_t hadamard_permtab[] =
         0x1C,0x0C,0x1D,0x0D, 0x1E,0x0E,0x1F,0x0F )
   };
  
-static uint64_t x264_pixel_hadamard_ac_16x16_altivec( uint8_t *pix, int stride )
+static uint64_t x264_pixel_hadamard_ac_16x16_altivec( uint8_t *pix, intptr_t stride )
  {
      int idx =  ((uintptr_t)pix & 8) >> 3;
      vec_u8_t permh = hadamard_permtab[idx];
@@ -1915,7 +1916,7 @@ static uint64_t x264_pixel_hadamard_ac_16x16_altivec( uint8_t *pix, int stride )
      return ((sum>>34)<<32) + ((uint32_t)sum>>1);
  }
  
-static uint64_t x264_pixel_hadamard_ac_16x8_altivec( uint8_t *pix, int stride )
+static uint64_t x264_pixel_hadamard_ac_16x8_altivec( uint8_t *pix, intptr_t stride )
  {
      int idx =  ((uintptr_t)pix & 8) >> 3;
      vec_u8_t permh = hadamard_permtab[idx];
@@ -1925,7 +1926,7 @@ static uint64_t x264_pixel_hadamard_ac_16x8_altivec( uint8_t *pix, int stride )
      return ((sum>>34)<<32) + ((uint32_t)sum>>1);
  }
  
-static uint64_t x264_pixel_hadamard_ac_8x16_altivec( uint8_t *pix, int stride )
+static uint64_t x264_pixel_hadamard_ac_8x16_altivec( uint8_t *pix, intptr_t stride )
  {
      vec_u8_t perm = hadamard_permtab[ (((uintptr_t)pix & 8) >> 3) ];
      uint64_t sum = pixel_hadamard_ac_altivec( pix, stride, perm );
@@ -1933,7 +1934,7 @@ static uint64_t x264_pixel_hadamard_ac_8x16_altivec( uint8_t *pix, int stride )
      return ((sum>>34)<<32) + ((uint32_t)sum>>1);
  }
  
-static uint64_t x264_pixel_hadamard_ac_8x8_altivec( uint8_t *pix, int stride )
+static uint64_t x264_pixel_hadamard_ac_8x8_altivec( uint8_t *pix, intptr_t stride )
  {
      vec_u8_t perm = hadamard_permtab[ (((uintptr_t)pix & 8) >> 3) ];
      uint64_t sum = pixel_hadamard_ac_altivec( pix, stride, perm );
@@ -1944,8 +1945,8 @@ static uint64_t x264_pixel_hadamard_ac_8x8_altivec( uint8_t *pix, int stride )
  /****************************************************************************
   * structural similarity metric
   ****************************************************************************/
-static void ssim_4x4x2_core_altivec( const uint8_t *pix1, int stride1,
-                                     const uint8_t *pix2, int stride2,
+static void ssim_4x4x2_core_altivec( const uint8_t *pix1, intptr_t stride1,
+                                     const uint8_t *pix2, intptr_t stride2,
                                       int sums[2][4] )
  {
      ALIGNED_16( int temp[4] );
@@ -1986,13 +1987,15 @@ static void ssim_4x4x2_core_altivec( const uint8_t *pix1, int stride1,
  }
  
  #define SATD_X( size ) \
-static void pixel_satd_x3_##size##_altivec( uint8_t *fenc, uint8_t *pix0, uint8_t *pix1, uint8_t *pix2, int i_stride, int scores[3] )\
+static void pixel_satd_x3_##size##_altivec( uint8_t *fenc, uint8_t *pix0, uint8_t *pix1, uint8_t *pix2,\
+                                            intptr_t i_stride, int scores[3] )\
  {\
      scores[0] = pixel_satd_##size##_altivec( fenc, FENC_STRIDE, pix0, i_stride );\
      scores[1] = pixel_satd_##size##_altivec( fenc, FENC_STRIDE, pix1, i_stride );\
      scores[2] = pixel_satd_##size##_altivec( fenc, FENC_STRIDE, pix2, i_stride );\
  }\
-static void pixel_satd_x4_##size##_altivec( uint8_t *fenc, uint8_t *pix0, uint8_t *pix1, uint8_t *pix2, uint8_t *pix3, int i_stride, int scores[4] )\
+static void pixel_satd_x4_##size##_altivec( uint8_t *fenc, uint8_t *pix0, uint8_t *pix1, uint8_t *pix2,\
+                                            uint8_t *pix3, intptr_t i_stride, int scores[4] )\
  {\
      scores[0] = pixel_satd_##size##_altivec( fenc, FENC_STRIDE, pix0, i_stride );\
      scores[1] = pixel_satd_##size##_altivec( fenc, FENC_STRIDE, pix1, i_stride );\
diff --git a/common/sparc/pixel.h b/common/sparc/pixel.h

index 32498db43b371e06715a6ad2e12321d048c98c87..0c762d4a2616b0222fa47be772285bdceffd4079 100644 (file)
--- a/common/sparc/pixel.h
+++ b/common/sparc/pixel.h
@@ -26,9 +26,9 @@
  #ifndef X264_SPARC_PIXEL_H
  #define X264_SPARC_PIXEL_H
  
-int x264_pixel_sad_8x8_vis( uint8_t *, int, uint8_t *, int );
-int x264_pixel_sad_8x16_vis( uint8_t *, int, uint8_t *, int );
-int x264_pixel_sad_16x8_vis( uint8_t *, int, uint8_t *, int );
-int x264_pixel_sad_16x16_vis( uint8_t *, int, uint8_t *, int );
+int x264_pixel_sad_8x8_vis  ( uint8_t *, intptr_t, uint8_t *, intptr_t );
+int x264_pixel_sad_8x16_vis ( uint8_t *, intptr_t, uint8_t *, intptr_t );
+int x264_pixel_sad_16x8_vis ( uint8_t *, intptr_t, uint8_t *, intptr_t );
+int x264_pixel_sad_16x16_vis( uint8_t *, intptr_t, uint8_t *, intptr_t );
  
  #endif
diff --git a/common/x86/deblock-a.asm b/common/x86/deblock-a.asm

index 7622eb69eea72afb14b6d3b0cbed982233c8f3d7..e7c5dc3120758b435906a24574910455452384cc 100644 (file)
--- a/common/x86/deblock-a.asm
+++ b/common/x86/deblock-a.asm
@@ -160,7 +160,7 @@ cextern pw_pixel_max
  
  %macro DEBLOCK_LUMA 0
  ;-----------------------------------------------------------------------------
-; void deblock_v_luma( uint16_t *pix, int stride, int alpha, int beta, int8_t *tc0 )
+; void deblock_v_luma( uint16_t *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 )
  ;-----------------------------------------------------------------------------
  cglobal deblock_v_luma, 5,5,8
      %assign pad 5*mmsize+12-(stack_offset&15)
@@ -603,7 +603,7 @@ DEBLOCK_LUMA_64
  
  %if ARCH_X86_64
  ;-----------------------------------------------------------------------------
-; void deblock_v_luma_intra( uint16_t *pix, int stride, int alpha, int beta )
+; void deblock_v_luma_intra( uint16_t *pix, intptr_t stride, int alpha, int beta )
  ;-----------------------------------------------------------------------------
  %macro DEBLOCK_LUMA_INTRA_64 0
  cglobal deblock_v_luma_intra, 4,7,16
@@ -653,7 +653,7 @@ cglobal deblock_v_luma_intra, 4,7,16
      REP_RET
  
  ;-----------------------------------------------------------------------------
-; void deblock_h_luma_intra( uint16_t *pix, int stride, int alpha, int beta )
+; void deblock_h_luma_intra( uint16_t *pix, intptr_t stride, int alpha, int beta )
  ;-----------------------------------------------------------------------------
  cglobal deblock_h_luma_intra, 4,7,16
      %define t0 m15
@@ -722,7 +722,7 @@ DEBLOCK_LUMA_INTRA_64
  
  %macro DEBLOCK_LUMA_INTRA 0
  ;-----------------------------------------------------------------------------
-; void deblock_v_luma_intra( uint16_t *pix, int stride, int alpha, int beta )
+; void deblock_v_luma_intra( uint16_t *pix, intptr_t stride, int alpha, int beta )
  ;-----------------------------------------------------------------------------
  cglobal deblock_v_luma_intra, 4,7,8
      LUMA_INTRA_INIT 3
@@ -748,7 +748,7 @@ cglobal deblock_v_luma_intra, 4,7,8
      RET
  
  ;-----------------------------------------------------------------------------
-; void deblock_h_luma_intra( uint16_t *pix, int stride, int alpha, int beta )
+; void deblock_h_luma_intra( uint16_t *pix, intptr_t stride, int alpha, int beta )
  ;-----------------------------------------------------------------------------
  cglobal deblock_h_luma_intra, 4,7,8
      LUMA_INTRA_INIT 8
@@ -1090,7 +1090,7 @@ DEBLOCK_LUMA_INTRA
  
  %if ARCH_X86_64
  ;-----------------------------------------------------------------------------
-; void deblock_v_luma( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 )
+; void deblock_v_luma( uint8_t *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 )
  ;-----------------------------------------------------------------------------
  %macro DEBLOCK_LUMA 0
  cglobal deblock_v_luma, 5,5,10
@@ -1135,12 +1135,11 @@ cglobal deblock_v_luma, 5,5,10
      RET
  
  ;-----------------------------------------------------------------------------
-; void deblock_h_luma( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 )
+; void deblock_h_luma( uint8_t *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 )
  ;-----------------------------------------------------------------------------
  INIT_MMX cpuname
  cglobal deblock_h_luma, 5,9
-    movsxd r7, r1d
-    lea    r8, [r7*3]
+    lea    r8, [r1*3]
      lea    r6, [r0-4]
      lea    r5, [r0-4+r8]
  %if WIN64
@@ -1152,14 +1151,15 @@ cglobal deblock_h_luma, 5,9
  %endif
  
      ; transpose 6x16 -> tmp space
-    TRANSPOSE6x8_MEM  PASS8ROWS(r6, r5, r7, r8), pix_tmp
-    lea    r6, [r6+r7*8]
-    lea    r5, [r5+r7*8]
-    TRANSPOSE6x8_MEM  PASS8ROWS(r6, r5, r7, r8), pix_tmp+8
+    TRANSPOSE6x8_MEM  PASS8ROWS(r6, r5, r1, r8), pix_tmp
+    lea    r6, [r6+r1*8]
+    lea    r5, [r5+r1*8]
+    TRANSPOSE6x8_MEM  PASS8ROWS(r6, r5, r1, r8), pix_tmp+8
  
      ; vertical filter
      ; alpha, beta, tc0 are still in r2d, r3d, r4
      ; don't backup r6, r5, r7, r8 because deblock_v_luma_sse2 doesn't use them
+    mov    r7, r1
      lea    r0, [pix_tmp+0x30]
      mov    r1d, 0x10
  %if WIN64
@@ -1203,7 +1203,7 @@ DEBLOCK_LUMA
  
  %macro DEBLOCK_LUMA 2
  ;-----------------------------------------------------------------------------
-; void deblock_v8_luma( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 )
+; void deblock_v8_luma( uint8_t *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 )
  ;-----------------------------------------------------------------------------
  cglobal deblock_%1_luma, 5,5
      lea     r4, [r1*3]
@@ -1255,7 +1255,7 @@ cglobal deblock_%1_luma, 5,5
      RET
  
  ;-----------------------------------------------------------------------------
-; void deblock_h_luma( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 )
+; void deblock_h_luma( uint8_t *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 )
  ;-----------------------------------------------------------------------------
  INIT_MMX cpuname
  cglobal deblock_h_luma, 0,5
@@ -1452,7 +1452,7 @@ DEBLOCK_LUMA v, 16
  %endif
  
  ;-----------------------------------------------------------------------------
-; void deblock_v_luma_intra( uint8_t *pix, int stride, int alpha, int beta )
+; void deblock_v_luma_intra( uint8_t *pix, intptr_t stride, int alpha, int beta )
  ;-----------------------------------------------------------------------------
  cglobal deblock_%1_luma_intra, 4,6,16
  %if ARCH_X86_64 == 0
@@ -1514,24 +1514,24 @@ cglobal deblock_%1_luma_intra, 4,6,16
  INIT_MMX cpuname
  %if ARCH_X86_64
  ;-----------------------------------------------------------------------------
-; void deblock_h_luma_intra( uint8_t *pix, int stride, int alpha, int beta )
+; void deblock_h_luma_intra( uint8_t *pix, intptr_t stride, int alpha, int beta )
  ;-----------------------------------------------------------------------------
  cglobal deblock_h_luma_intra, 4,9
-    movsxd r7, r1d
-    lea    r8, [r7*3]
+    lea    r8, [r1*3]
      lea    r6, [r0-4]
      lea    r5, [r0-4+r8]
      sub   rsp, 0x88
      %define pix_tmp rsp
  
      ; transpose 8x16 -> tmp space
-    TRANSPOSE8x8_MEM  PASS8ROWS(r6, r5, r7, r8), PASS8ROWS(pix_tmp, pix_tmp+0x30, 0x10, 0x30)
-    lea    r6, [r6+r7*8]
-    lea    r5, [r5+r7*8]
-    TRANSPOSE8x8_MEM  PASS8ROWS(r6, r5, r7, r8), PASS8ROWS(pix_tmp+8, pix_tmp+0x38, 0x10, 0x30)
-
-    lea    r0,  [pix_tmp+0x40]
-    mov    r1,  0x10
+    TRANSPOSE8x8_MEM  PASS8ROWS(r6, r5, r1, r8), PASS8ROWS(pix_tmp, pix_tmp+0x30, 0x10, 0x30)
+    lea    r6, [r6+r1*8]
+    lea    r5, [r5+r1*8]
+    TRANSPOSE8x8_MEM  PASS8ROWS(r6, r5, r1, r8), PASS8ROWS(pix_tmp+8, pix_tmp+0x38, 0x10, 0x30)
+
+    mov    r7, r1
+    lea    r0, [pix_tmp+0x40]
+    mov    r1, 0x10
      call   deblock_v_luma_intra
  
      ; transpose 16x6 -> original space (but we can't write only 6 pixels, so really 16x8)
@@ -1685,9 +1685,9 @@ cglobal deblock_inter_body
      ret
  
  ;-----------------------------------------------------------------------------
-; void deblock_v_chroma( uint16_t *pix, int stride, int alpha, int beta, int8_t *tc0 )
+; void deblock_v_chroma( uint16_t *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 )
  ;-----------------------------------------------------------------------------
-cglobal deblock_v_chroma, 7,7,8
+cglobal deblock_v_chroma, 5,7,8
      FIX_STRIDES r1
      mov         r5, r0
      sub         r0, r1
@@ -1705,7 +1705,7 @@ cglobal deblock_v_chroma, 7,7,8
      REP_RET
  
  ;-----------------------------------------------------------------------------
-; void deblock_h_chroma( uint16_t *pix, int stride, int alpha, int beta, int8_t *tc0 )
+; void deblock_h_chroma( uint16_t *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 )
  ;-----------------------------------------------------------------------------
  cglobal deblock_h_chroma, 5,7,8
      add         r1, r1
@@ -1731,7 +1731,7 @@ cglobal deblock_intra_body
      ret
  
  ;-----------------------------------------------------------------------------
-; void deblock_v_chroma_intra( uint16_t *pix, int stride, int alpha, int beta )
+; void deblock_v_chroma_intra( uint16_t *pix, intptr_t stride, int alpha, int beta )
  ;-----------------------------------------------------------------------------
  cglobal deblock_v_chroma_intra, 4,6,8
      add         r1, r1
@@ -1752,7 +1752,7 @@ cglobal deblock_v_chroma_intra, 4,6,8
      REP_RET
  
  ;-----------------------------------------------------------------------------
-; void deblock_h_chroma_intra( uint16_t *pix, int stride, int alpha, int beta )
+; void deblock_h_chroma_intra( uint16_t *pix, intptr_t stride, int alpha, int beta )
  ;-----------------------------------------------------------------------------
  cglobal deblock_h_chroma_intra, 4,6,8
      add         r1, r1
@@ -1770,7 +1770,7 @@ cglobal deblock_h_chroma_intra, 4,6,8
      REP_RET
  
  ;-----------------------------------------------------------------------------
-; void deblock_h_chroma_intra_mbaff( uint16_t *pix, int stride, int alpha, int beta )
+; void deblock_h_chroma_intra_mbaff( uint16_t *pix, intptr_t stride, int alpha, int beta )
  ;-----------------------------------------------------------------------------
  cglobal deblock_h_chroma_intra_mbaff, 4,6,8
      add         r1, r1
@@ -1793,7 +1793,7 @@ cglobal deblock_h_chroma_intra_mbaff, 4,6,8
      REP_RET
  
  ;-----------------------------------------------------------------------------
-; void deblock_h_chroma_mbaff( uint16_t *pix, int stride, int alpha, int beta, int8_t *tc0 )
+; void deblock_h_chroma_mbaff( uint16_t *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 )
  ;-----------------------------------------------------------------------------
  cglobal deblock_h_chroma_mbaff, 5,7,8
      add         r1, r1
@@ -1821,7 +1821,7 @@ cglobal deblock_h_chroma_mbaff, 5,7,8
      REP_RET
  
  ;-----------------------------------------------------------------------------
-; void deblock_h_chroma_422_intra( uint16_t *pix, int stride, int alpha, int beta )
+; void deblock_h_chroma_422_intra( uint16_t *pix, intptr_t stride, int alpha, int beta )
  ;-----------------------------------------------------------------------------
  cglobal deblock_h_chroma_422_intra, 4,6,8
      add         r1, r1
@@ -1839,7 +1839,7 @@ cglobal deblock_h_chroma_422_intra, 4,6,8
      REP_RET
  
  ;-----------------------------------------------------------------------------
-; void deblock_h_chroma_422( uint16_t *pix, int stride, int alpha, int beta, int8_t *tc0 )
+; void deblock_h_chroma_422( uint16_t *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 )
  ;-----------------------------------------------------------------------------
  cglobal deblock_h_chroma_422, 5,7,8
      add         r1, r1
@@ -1940,7 +1940,7 @@ cglobal chroma_inter_body
      ret
  
  ;-----------------------------------------------------------------------------
-; void deblock_v_chroma( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 )
+; void deblock_v_chroma( uint8_t *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 )
  ;-----------------------------------------------------------------------------
  cglobal deblock_v_chroma, 5,6,8
      CHROMA_V_START
@@ -1955,7 +1955,7 @@ cglobal deblock_v_chroma, 5,6,8
      RET
  
  ;-----------------------------------------------------------------------------
-; void deblock_h_chroma( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 )
+; void deblock_h_chroma( uint8_t *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 )
  ;-----------------------------------------------------------------------------
  cglobal deblock_h_chroma, 5,7,8
      CHROMA_H_START
@@ -1980,7 +1980,7 @@ DEBLOCK_CHROMA
  %endif
  
  ;-----------------------------------------------------------------------------
-; void deblock_h_chroma_mbaff( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 )
+; void deblock_h_chroma_mbaff( uint8_t *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 )
  ;-----------------------------------------------------------------------------
  %macro DEBLOCK_H_CHROMA_420_MBAFF 0
  cglobal deblock_h_chroma_mbaff, 5,7,8
@@ -2076,7 +2076,7 @@ cglobal chroma_intra_body
  
  %macro DEBLOCK_CHROMA_INTRA 0
  ;-----------------------------------------------------------------------------
-; void deblock_v_chroma_intra( uint8_t *pix, int stride, int alpha, int beta )
+; void deblock_v_chroma_intra( uint8_t *pix, intptr_t stride, int alpha, int beta )
  ;-----------------------------------------------------------------------------
  cglobal deblock_v_chroma_intra, 4,5,8
      CHROMA_V_START
@@ -2091,7 +2091,7 @@ cglobal deblock_v_chroma_intra, 4,5,8
      RET
  
  ;-----------------------------------------------------------------------------
-; void deblock_h_chroma_intra( uint8_t *pix, int stride, int alpha, int beta )
+; void deblock_h_chroma_intra( uint8_t *pix, intptr_t stride, int alpha, int beta )
  ;-----------------------------------------------------------------------------
  cglobal deblock_h_chroma_intra, 4,6,8
      CHROMA_H_START
@@ -2132,7 +2132,7 @@ DEBLOCK_CHROMA_INTRA
  %endif
  
  ;-----------------------------------------------------------------------------
-; void deblock_h_chroma_intra_mbaff( uint8_t *pix, int stride, int alpha, int beta )
+; void deblock_h_chroma_intra_mbaff( uint8_t *pix, intptr_t stride, int alpha, int beta )
  ;-----------------------------------------------------------------------------
  INIT_MMX mmx2
  cglobal deblock_h_chroma_intra_mbaff, 4,6,8
diff --git a/common/x86/mc-a.asm b/common/x86/mc-a.asm

index 689999eeb207494bbfce7581f0706a517ad6e7ac..923a2cd33a77703294bef7e39519cf4e1317423c 100644 (file)
--- a/common/x86/mc-a.asm
+++ b/common/x86/mc-a.asm
@@ -61,8 +61,7 @@ cextern pd_32
  %if WIN64
      DECLARE_REG_TMP 0,1,2,3,4,5,4,5
      %macro AVG_START 0-1 0
-        PROLOGUE 5,7,%1
-        movsxd r5, dword r5m
+        PROLOGUE 6,7,%1
      %endmacro
  %elif UNIX64
      DECLARE_REG_TMP 0,1,2,3,4,5,7,8
@@ -190,7 +189,7 @@ cextern pd_32
  %endif ;HIGH_BIT_DEPTH
  
  ;-----------------------------------------------------------------------------
-; int pixel_avg_weight_w16( pixel *dst, int, pixel *src1, int, pixel *src2, int, int i_weight )
+; int pixel_avg_weight_w16( pixel *dst, intptr_t, pixel *src1, intptr_t, pixel *src2, intptr_t, int i_weight )
  ;-----------------------------------------------------------------------------
  %macro AVG_WEIGHT 1-2 0
  cglobal pixel_avg_weight_w%1
@@ -403,7 +402,7 @@ AVG_WEIGHT 16, 7
  %endif ; HIGH_BIT_DEPTH
  
  ;-----------------------------------------------------------------------------
-;void mc_weight_wX( pixel *dst, int i_dst_stride, pixel *src, int i_src_stride, weight_t *weight, int h )
+;void mc_weight_wX( pixel *dst, intptr_t i_dst_stride, pixel *src, intptr_t i_src_stride, weight_t *weight, int h )
  ;-----------------------------------------------------------------------------
  
  %macro WEIGHTER 1
@@ -479,7 +478,7 @@ WEIGHTER 20
  %endmacro
  
  ;-----------------------------------------------------------------------------
-;void mc_offset_wX( pixel *src, int i_src_stride, pixel *dst, int i_dst_stride, weight_t *w, int h )
+;void mc_offset_wX( pixel *src, intptr_t i_src_stride, pixel *dst, intptr_t i_dst_stride, weight_t *w, int h )
  ;-----------------------------------------------------------------------------
  %macro OFFSET 2
  cglobal mc_offset%2_w%1, 6,6
@@ -524,8 +523,8 @@ OFFSETPN  8
  ;=============================================================================
  
  ;-----------------------------------------------------------------------------
-; void pixel_avg_4x4( pixel *dst, int dst_stride,
-;                     pixel *src1, int src1_stride, pixel *src2, int src2_stride, int weight );
+; void pixel_avg_4x4( pixel *dst, intptr_t dst_stride, pixel *src1, intptr_t src1_stride,
+;                     pixel *src2, intptr_t src2_stride, int weight );
  ;-----------------------------------------------------------------------------
  %macro AVGH 2
  cglobal pixel_avg_%1x%2
@@ -540,9 +539,8 @@ cglobal pixel_avg_%1x%2
  %endmacro
  
  ;-----------------------------------------------------------------------------
-; void pixel_avg_w4( pixel *dst, int dst_stride,
-;                    pixel *src1, int src1_stride, pixel *src2, int src2_stride,
-;                    int height, int weight );
+; void pixel_avg_w4( pixel *dst, intptr_t dst_stride, pixel *src1, intptr_t src1_stride,
+;                    pixel *src2, intptr_t src2_stride, int height, int weight );
  ;-----------------------------------------------------------------------------
  
  %macro AVG_FUNC 3
@@ -648,8 +646,8 @@ AVGH  4,  2
  
  %if HIGH_BIT_DEPTH
  ;-----------------------------------------------------------------------------
-; void pixel_avg2_wN( uint16_t *dst,  int dst_stride,
-;                     uint16_t *src1, int src_stride,
+; void pixel_avg2_wN( uint16_t *dst,  intptr_t dst_stride,
+;                     uint16_t *src1, intptr_t src_stride,
  ;                     uint16_t *src2, int height );
  ;-----------------------------------------------------------------------------
  %macro AVG2_W_ONE 1
@@ -832,8 +830,8 @@ cglobal pixel_avg2_w18_sse2, 6,7,6
  
  %if HIGH_BIT_DEPTH == 0
  ;-----------------------------------------------------------------------------
-; void pixel_avg2_w4( uint8_t *dst, int dst_stride,
-;                     uint8_t *src1, int src_stride,
+; void pixel_avg2_w4( uint8_t *dst,  intptr_t dst_stride,
+;                     uint8_t *src1, intptr_t src_stride,
  ;                     uint8_t *src2, int height );
  ;-----------------------------------------------------------------------------
  %macro AVG2_W8 2
@@ -1194,8 +1192,8 @@ AVG16_CACHELINE_LOOP_SSSE3 j, k
  %endmacro
  
  ;-----------------------------------------------------------------------------
-; void mc_copy_w4( uint8_t *dst, int i_dst_stride,
-;                  uint8_t *src, int i_src_stride, int i_height )
+; void mc_copy_w4( uint8_t *dst, intptr_t i_dst_stride,
+;                  uint8_t *src, intptr_t i_src_stride, int i_height )
  ;-----------------------------------------------------------------------------
  INIT_MMX
  cglobal mc_copy_w4_mmx, 4,6
@@ -1250,14 +1248,14 @@ MC_COPY 16
  ; FIXME doesn't cover all pixels in high depth and/or 4:4:4
  
  ;-----------------------------------------------------------------------------
-; void prefetch_fenc( pixel *pix_y, int stride_y,
-;                     pixel *pix_uv, int stride_uv, int mb_x )
+; void prefetch_fenc( pixel *pix_y,  intptr_t stride_y,
+;                     pixel *pix_uv, intptr_t stride_uv, int mb_x )
  ;-----------------------------------------------------------------------------
  
  %macro PREFETCH_FENC 1
  %if ARCH_X86_64
  cglobal prefetch_fenc_%1, 5,5
-    FIX_STRIDES r1d, r3d
+    FIX_STRIDES r1, r3
      and    r4d, 3
      mov    eax, r4d
      imul   r4d, r1d
@@ -1317,11 +1315,11 @@ PREFETCH_FENC 420
  PREFETCH_FENC 422
  
  ;-----------------------------------------------------------------------------
-; void prefetch_ref( pixel *pix, int stride, int parity )
+; void prefetch_ref( pixel *pix, intptr_t stride, int parity )
  ;-----------------------------------------------------------------------------
  INIT_MMX mmx2
  cglobal prefetch_ref, 3,3
-    FIX_STRIDES r1d
+    FIX_STRIDES r1
      dec    r2d
      and    r2d, r1d
      lea    r0,  [r0+r2*8+64*SIZEOF_PIXEL]
@@ -1397,8 +1395,8 @@ cglobal prefetch_ref, 3,3
  %endif ; HIGH_BIT_DEPTH
  
  ;-----------------------------------------------------------------------------
-; void mc_chroma( uint8_t *dstu, uint8_t *dstv, int dst_stride,
-;                 uint8_t *src, int src_stride,
+; void mc_chroma( uint8_t *dstu, uint8_t *dstv, intptr_t dst_stride,
+;                 uint8_t *src, intptr_t src_stride,
  ;                 int dx, int dy,
  ;                 int width, int height )
  ;-----------------------------------------------------------------------------
diff --git a/common/x86/mc-a2.asm b/common/x86/mc-a2.asm

index 0c15a416c99bd8af3712e6fb162b667f342bb548..12bec5b2796ae6a2b872b9f726a79da3a0089105 100644 (file)
--- a/common/x86/mc-a2.asm
+++ b/common/x86/mc-a2.asm
@@ -155,14 +155,11 @@ cextern pd_ffff
  
  %if HIGH_BIT_DEPTH
  ;-----------------------------------------------------------------------------
-; void hpel_filter_v( uint16_t *dst, uint16_t *src, int16_t *buf, int stride, int width );
+; void hpel_filter_v( uint16_t *dst, uint16_t *src, int16_t *buf, intptr_t stride, intptr_t width );
  ;-----------------------------------------------------------------------------
  %macro HPEL_FILTER 0
  cglobal hpel_filter_v, 5,6,11
-    FIX_STRIDES r3d, r4d
-%if WIN64
-    movsxd     r4, r4d
-%endif
+    FIX_STRIDES r3, r4
      lea        r5, [r1+r3]
      sub        r1, r3
      sub        r1, r3
@@ -179,7 +176,7 @@ cglobal hpel_filter_v, 5,6,11
      %define s30 [pad30]
  %endif
      add        r0, r4
-    lea        r2, [r2+r4]
+    add        r2, r4
      neg        r4
      mova       m7, [pw_pixel_max]
      pxor       m0, m0
@@ -216,12 +213,12 @@ cglobal hpel_filter_v, 5,6,11
      REP_RET
  
  ;-----------------------------------------------------------------------------
-; void hpel_filter_c( uint16_t *dst, int16_t *buf, int width );
+; void hpel_filter_c( uint16_t *dst, int16_t *buf, intptr_t width );
  ;-----------------------------------------------------------------------------
  cglobal hpel_filter_c, 3,3,10
      add        r2, r2
      add        r0, r2
-    lea        r1, [r1+r2]
+    add        r1, r2
      neg        r2
      mova       m0, [tap1]
      mova       m7, [tap3]
@@ -265,7 +262,7 @@ cglobal hpel_filter_c, 3,3,10
      REP_RET
  
  ;-----------------------------------------------------------------------------
-; void hpel_filter_h( uint16_t *dst, uint16_t *src, int width );
+; void hpel_filter_h( uint16_t *dst, uint16_t *src, intptr_t width );
  ;-----------------------------------------------------------------------------
  cglobal hpel_filter_h, 3,4,8
      %define src r1+r2
@@ -317,12 +314,9 @@ HPEL_FILTER
  %if HIGH_BIT_DEPTH == 0
  %macro HPEL_V 1
  ;-----------------------------------------------------------------------------
-; void hpel_filter_v( uint8_t *dst, uint8_t *src, int16_t *buf, int stride, int width );
+; void hpel_filter_v( uint8_t *dst, uint8_t *src, int16_t *buf, intptr_t stride, intptr_t width );
  ;-----------------------------------------------------------------------------
  cglobal hpel_filter_v, 5,6,%1
-%if WIN64
-    movsxd   r4, r4d
-%endif
      lea r5, [r1+r3]
      sub r1, r3
      sub r1, r3
@@ -375,7 +369,7 @@ cglobal hpel_filter_v, 5,6,%1
  %endmacro
  
  ;-----------------------------------------------------------------------------
-; void hpel_filter_c( uint8_t *dst, int16_t *buf, int width );
+; void hpel_filter_c( uint8_t *dst, int16_t *buf, intptr_t width );
  ;-----------------------------------------------------------------------------
  INIT_MMX
  cglobal hpel_filter_c_mmx2, 3,3
@@ -405,7 +399,7 @@ cglobal hpel_filter_c_mmx2, 3,3
      REP_RET
  
  ;-----------------------------------------------------------------------------
-; void hpel_filter_h( uint8_t *dst, uint8_t *src, int width );
+; void hpel_filter_h( uint8_t *dst, uint8_t *src, intptr_t width );
  ;-----------------------------------------------------------------------------
  cglobal hpel_filter_h_mmx2, 3,3
      add r0, r2
@@ -452,7 +446,7 @@ INIT_XMM
  
  %macro HPEL_C 0
  ;-----------------------------------------------------------------------------
-; void hpel_filter_c( uint8_t *dst, int16_t *buf, int width );
+; void hpel_filter_c( uint8_t *dst, int16_t *buf, intptr_t width );
  ;-----------------------------------------------------------------------------
  cglobal hpel_filter_c, 3,3,9
      add r0, r2
@@ -520,7 +514,7 @@ cglobal hpel_filter_c, 3,3,9
  %endmacro
  
  ;-----------------------------------------------------------------------------
-; void hpel_filter_h( uint8_t *dst, uint8_t *src, int width );
+; void hpel_filter_h( uint8_t *dst, uint8_t *src, intptr_t width );
  ;-----------------------------------------------------------------------------
  cglobal hpel_filter_h_sse2, 3,3,8
      add r0, r2
@@ -568,7 +562,7 @@ cglobal hpel_filter_h_sse2, 3,3,8
      REP_RET
  
  ;-----------------------------------------------------------------------------
-; void hpel_filter_h( uint8_t *dst, uint8_t *src, int width );
+; void hpel_filter_h( uint8_t *dst, uint8_t *src, intptr_t width );
  ;-----------------------------------------------------------------------------
  %macro HPEL_H 0
  cglobal hpel_filter_h, 3,3
@@ -739,15 +733,11 @@ HPEL_H
  %macro HPEL 0
  ;-----------------------------------------------------------------------------
  ; void hpel_filter( uint8_t *dsth, uint8_t *dstv, uint8_t *dstc,
-;                   uint8_t *src, int stride, int width, int height)
+;                   uint8_t *src, intptr_t stride, int width, int height )
  ;-----------------------------------------------------------------------------
  cglobal hpel_filter, 7,9,16
-%if WIN64
-    movsxd   r4, r4d
-    movsxd   r5, r5d
-%endif
      mov       r7, r3
-    sub       r5, 16
+    sub      r5d, 16
      mov       r8, r1
      and       r7, 15
      sub       r3, r7
@@ -815,21 +805,20 @@ HPEL
  %endif ; !HIGH_BIT_DEPTH
  
  ;-----------------------------------------------------------------------------
-; void plane_copy_core( pixel *dst, int i_dst,
-;                       pixel *src, int i_src, int w, int h)
+; void plane_copy_core( pixel *dst, intptr_t i_dst,
+;                       pixel *src, intptr_t i_src, int w, int h )
  ;-----------------------------------------------------------------------------
  ; assumes i_dst and w are multiples of 16, and i_dst>w
  INIT_MMX
  cglobal plane_copy_core_mmx2, 6,7
-    FIX_STRIDES r1d, r3d, r4d
-    movsxdifnidn r1, r1d
-    movsxdifnidn r3, r3d
+    FIX_STRIDES r1, r3, r4d
+%if HIGH_BIT_DEPTH == 0
      movsxdifnidn r4, r4d
+%endif
      sub    r1,  r4
      sub    r3,  r4
  .loopy:
-    mov    r6d, r4d
-    sub    r6d, 63
+    lea   r6d, [r4-63]
  .loopx:
      prefetchnta [r2+256]
      movq   m0, [r2   ]
@@ -958,22 +947,19 @@ cglobal plane_copy_core_mmx2, 6,7
  
  %macro PLANE_INTERLEAVE 0
  ;-----------------------------------------------------------------------------
-; void plane_copy_interleave_core( uint8_t *dst, int i_dst,
-;                                  uint8_t *srcu, int i_srcu,
-;                                  uint8_t *srcv, int i_srcv, int w, int h )
+; void plane_copy_interleave_core( uint8_t *dst,  intptr_t i_dst,
+;                                  uint8_t *srcu, intptr_t i_srcu,
+;                                  uint8_t *srcv, intptr_t i_srcv, int w, int h )
  ;-----------------------------------------------------------------------------
  ; assumes i_dst and w are multiples of 16, and i_dst>2*w
-cglobal plane_copy_interleave_core, 7,9
-    FIX_STRIDES r1d, r3d, r5d, r6d
+cglobal plane_copy_interleave_core, 6,9
+    mov   r6d, r6m
  %if HIGH_BIT_DEPTH
-    mov   r1m, r1d
-    mov   r3m, r3d
-    mov   r6m, r6d
+    FIX_STRIDES r1, r3, r5, r6d
+    movifnidn r1mp, r1
+    movifnidn r3mp, r3
+    mov  r6m, r6d
  %endif
-    movsxdifnidn r1, r1d
-    movsxdifnidn r3, r3d
-    movsxdifnidn r5, r5d
-    movsxdifnidn r6, r6d
      lea    r0, [r0+r6*2]
      add    r2,  r6
      add    r4,  r6
@@ -1028,10 +1014,10 @@ cglobal plane_copy_interleave_core, 7,9
      RET
  
  ;-----------------------------------------------------------------------------
-; void store_interleave_chroma( uint8_t *dst, int i_dst, uint8_t *srcu, uint8_t *srcv, int height )
+; void store_interleave_chroma( uint8_t *dst, intptr_t i_dst, uint8_t *srcu, uint8_t *srcv, int height )
  ;-----------------------------------------------------------------------------
  cglobal store_interleave_chroma, 5,5
-    FIX_STRIDES r1d
+    FIX_STRIDES r1
  .loop:
      INTERLEAVE r0+ 0, r2+           0, r3+           0, a
      INTERLEAVE r0+r1, r2+FDEC_STRIDEB, r3+FDEC_STRIDEB, a
@@ -1055,20 +1041,17 @@ cglobal store_interleave_chroma, 5,5
  
  %macro PLANE_DEINTERLEAVE 0
  ;-----------------------------------------------------------------------------
-; void plane_copy_deinterleave( pixel *dstu, int i_dstu,
-;                               pixel *dstv, int i_dstv,
-;                               pixel *src, int i_src, int w, int h )
+; void plane_copy_deinterleave( pixel *dstu, intptr_t i_dstu,
+;                               pixel *dstv, intptr_t i_dstv,
+;                               pixel *src,  intptr_t i_src, int w, int h )
  ;-----------------------------------------------------------------------------
  cglobal plane_copy_deinterleave, 6,7
      DEINTERLEAVE_START
      mov    r6d, r6m
-    FIX_STRIDES r1d, r3d, r5d, r6d
+    FIX_STRIDES r1, r3, r5, r6d
  %if HIGH_BIT_DEPTH
      mov    r6m, r6d
  %endif
-    movsxdifnidn r1, r1d
-    movsxdifnidn r3, r3d
-    movsxdifnidn r5, r5d
      add    r0,  r6
      add    r2,  r6
      lea    r4, [r4+r6*2]
@@ -1088,11 +1071,11 @@ cglobal plane_copy_deinterleave, 6,7
      REP_RET
  
  ;-----------------------------------------------------------------------------
-; void load_deinterleave_chroma_fenc( pixel *dst, pixel *src, int i_src, int height )
+; void load_deinterleave_chroma_fenc( pixel *dst, pixel *src, intptr_t i_src, int height )
  ;-----------------------------------------------------------------------------
  cglobal load_deinterleave_chroma_fenc, 4,4
      DEINTERLEAVE_START
-    FIX_STRIDES r2d
+    FIX_STRIDES r2
  .loop:
      DEINTERLEAVE r0+           0, r0+FENC_STRIDEB*1/2, r1+ 0, 1, m4, a
      DEINTERLEAVE r0+FENC_STRIDEB, r0+FENC_STRIDEB*3/2, r1+r2, 1, m4, a
@@ -1103,11 +1086,11 @@ cglobal load_deinterleave_chroma_fenc, 4,4
      REP_RET
  
  ;-----------------------------------------------------------------------------
-; void load_deinterleave_chroma_fdec( pixel *dst, pixel *src, int i_src, int height )
+; void load_deinterleave_chroma_fdec( pixel *dst, pixel *src, intptr_t i_src, int height )
  ;-----------------------------------------------------------------------------
  cglobal load_deinterleave_chroma_fdec, 4,4
      DEINTERLEAVE_START
-    FIX_STRIDES r2d
+    FIX_STRIDES r2
  .loop:
      DEINTERLEAVE r0+           0, r0+FDEC_STRIDEB*1/2, r1+ 0, 0, m4, a
      DEINTERLEAVE r0+FDEC_STRIDEB, r0+FDEC_STRIDEB*3/2, r1+r2, 0, m4, a
@@ -1236,7 +1219,7 @@ MEMZERO
  
  %if HIGH_BIT_DEPTH == 0
  ;-----------------------------------------------------------------------------
-; void integral_init4h( uint16_t *sum, uint8_t *pix, int stride )
+; void integral_init4h( uint16_t *sum, uint8_t *pix, intptr_t stride )
  ;-----------------------------------------------------------------------------
  INIT_XMM
  cglobal integral_init4h_sse4, 3,4
@@ -1291,7 +1274,7 @@ INTEGRAL_INIT8H
  
  %macro INTEGRAL_INIT_8V 0
  ;-----------------------------------------------------------------------------
-; void integral_init8v( uint16_t *sum8, int stride )
+; void integral_init8v( uint16_t *sum8, intptr_t stride )
  ;-----------------------------------------------------------------------------
  cglobal integral_init8v, 3,3
      shl   r1, 1
@@ -1316,7 +1299,7 @@ INIT_XMM sse2
  INTEGRAL_INIT_8V
  
  ;-----------------------------------------------------------------------------
-; void integral_init4v( uint16_t *sum8, uint16_t *sum4, int stride )
+; void integral_init4v( uint16_t *sum8, uint16_t *sum4, intptr_t stride )
  ;-----------------------------------------------------------------------------
  INIT_MMX
  cglobal integral_init4v_mmx, 3,5
@@ -1505,17 +1488,14 @@ cglobal integral_init4v_ssse3, 3,5
  
  ;-----------------------------------------------------------------------------
  ; void frame_init_lowres_core( uint8_t *src0, uint8_t *dst0, uint8_t *dsth, uint8_t *dstv, uint8_t *dstc,
-;                              int src_stride, int dst_stride, int width, int height )
+;                              intptr_t src_stride, intptr_t dst_stride, int width, int height )
  ;-----------------------------------------------------------------------------
  %macro FRAME_INIT_LOWRES 0
  cglobal frame_init_lowres_core, 6,7,(12-4*(BIT_DEPTH/9)) ; 8 for HIGH_BIT_DEPTH, 12 otherwise
  %if HIGH_BIT_DEPTH
      shl   dword r6m, 1
-    FIX_STRIDES r5d
+    FIX_STRIDES r5
      shl   dword r7m, 1
-%endif
-%if WIN64
-    movsxd    r5, r5d
  %endif
      ; src += 2*(height-1)*stride + 2*width
      mov      r6d, r8m
diff --git a/common/x86/mc-c.c b/common/x86/mc-c.c

index 435f6bd928c34ebffee8a8e4cf3bc49a2ecf3e5d..8e587536b216a9e6851966f433283f2baf1b5ddb 100644 (file)
--- a/common/x86/mc-c.c
+++ b/common/x86/mc-c.c
@@ -37,22 +37,22 @@
      void func##_sse2 args;\
      void func##_ssse3 args;
  
-DECL_SUF( x264_pixel_avg_16x16, ( pixel *, int, pixel *, int, pixel *, int, int ))
-DECL_SUF( x264_pixel_avg_16x8,  ( pixel *, int, pixel *, int, pixel *, int, int ))
-DECL_SUF( x264_pixel_avg_8x16,  ( pixel *, int, pixel *, int, pixel *, int, int ))
-DECL_SUF( x264_pixel_avg_8x8,   ( pixel *, int, pixel *, int, pixel *, int, int ))
-DECL_SUF( x264_pixel_avg_8x4,   ( pixel *, int, pixel *, int, pixel *, int, int ))
-DECL_SUF( x264_pixel_avg_4x16,  ( pixel *, int, pixel *, int, pixel *, int, int ))
-DECL_SUF( x264_pixel_avg_4x8,   ( pixel *, int, pixel *, int, pixel *, int, int ))
-DECL_SUF( x264_pixel_avg_4x4,   ( pixel *, int, pixel *, int, pixel *, int, int ))
-DECL_SUF( x264_pixel_avg_4x2,   ( pixel *, int, pixel *, int, pixel *, int, int ))
+DECL_SUF( x264_pixel_avg_16x16, ( pixel *, intptr_t, pixel *, intptr_t, pixel *, intptr_t, int ))
+DECL_SUF( x264_pixel_avg_16x8,  ( pixel *, intptr_t, pixel *, intptr_t, pixel *, intptr_t, int ))
+DECL_SUF( x264_pixel_avg_8x16,  ( pixel *, intptr_t, pixel *, intptr_t, pixel *, intptr_t, int ))
+DECL_SUF( x264_pixel_avg_8x8,   ( pixel *, intptr_t, pixel *, intptr_t, pixel *, intptr_t, int ))
+DECL_SUF( x264_pixel_avg_8x4,   ( pixel *, intptr_t, pixel *, intptr_t, pixel *, intptr_t, int ))
+DECL_SUF( x264_pixel_avg_4x16,  ( pixel *, intptr_t, pixel *, intptr_t, pixel *, intptr_t, int ))
+DECL_SUF( x264_pixel_avg_4x8,   ( pixel *, intptr_t, pixel *, intptr_t, pixel *, intptr_t, int ))
+DECL_SUF( x264_pixel_avg_4x4,   ( pixel *, intptr_t, pixel *, intptr_t, pixel *, intptr_t, int ))
+DECL_SUF( x264_pixel_avg_4x2,   ( pixel *, intptr_t, pixel *, intptr_t, pixel *, intptr_t, int ))
  
  #define MC_WEIGHT(w,type) \
-    void x264_mc_weight_w##w##_##type( pixel *,int, pixel *,int, const x264_weight_t *,int );
+    void x264_mc_weight_w##w##_##type( pixel *, intptr_t, pixel *, intptr_t, const x264_weight_t *, int );
  
  #define MC_WEIGHT_OFFSET(w,type) \
-    void x264_mc_offsetadd_w##w##_##type( pixel *,int, pixel *,int, const x264_weight_t *,int ); \
-    void x264_mc_offsetsub_w##w##_##type( pixel *,int, pixel *,int, const x264_weight_t *,int ); \
+    void x264_mc_offsetadd_w##w##_##type( pixel *, intptr_t, pixel *, intptr_t, const x264_weight_t *, int ); \
+    void x264_mc_offsetsub_w##w##_##type( pixel *, intptr_t, pixel *, intptr_t, const x264_weight_t *, int ); \
      MC_WEIGHT(w,type)
  
  MC_WEIGHT_OFFSET( 4, mmx2 )
@@ -75,74 +75,73 @@ MC_WEIGHT( 20, ssse3 )
  #undef MC_OFFSET
  #undef MC_WEIGHT
  
-void x264_mc_copy_w4_mmx( pixel *, int, pixel *, int, int );
-void x264_mc_copy_w8_mmx( pixel *, int, pixel *, int, int );
-void x264_mc_copy_w8_sse2( pixel *, int, pixel *, int, int );
-void x264_mc_copy_w16_mmx( pixel *, int, pixel *, int, int );
-void x264_mc_copy_w16_sse2( pixel *, int, pixel *, int, int );
-void x264_mc_copy_w16_aligned_sse2( pixel *, int, pixel *, int, int );
-void x264_prefetch_fenc_420_mmx2( pixel *, int, pixel *, int, int );
-void x264_prefetch_fenc_422_mmx2( pixel *, int, pixel *, int, int );
-void x264_prefetch_ref_mmx2( pixel *, int, int );
-void x264_plane_copy_core_mmx2( pixel *, int, pixel *, int, int w, int h);
-void x264_plane_copy_c( pixel *, int, pixel *, int, int w, int h );
-void x264_plane_copy_interleave_core_mmx2( pixel *dst, int i_dst,
-                                             pixel *srcu, int i_srcu,
-                                             pixel *srcv, int i_srcv, int w, int h );
-void x264_plane_copy_interleave_core_sse2( pixel *dst, int i_dst,
-                                           pixel *srcu, int i_srcu,
-                                           pixel *srcv, int i_srcv, int w, int h );
-void x264_plane_copy_interleave_core_avx( pixel *dst, int i_dst,
-                                           pixel *srcu, int i_srcu,
-                                           pixel *srcv, int i_srcv, int w, int h );
-void x264_plane_copy_interleave_c( pixel *dst, int i_dst,
-                                   pixel *srcu, int i_srcu,
-                                   pixel *srcv, int i_srcv, int w, int h );
-void x264_plane_copy_deinterleave_mmx( pixel *dstu, int i_dstu,
-                                       pixel *dstv, int i_dstv,
-                                       pixel *src, int i_src, int w, int h );
-void x264_plane_copy_deinterleave_sse2( pixel *dstu, int i_dstu,
-                                        pixel *dstv, int i_dstv,
-                                        pixel *src, int i_src, int w, int h );
-void x264_plane_copy_deinterleave_ssse3( uint8_t *dstu, int i_dstu,
-                                         uint8_t *dstv, int i_dstv,
-                                         uint8_t *src, int i_src, int w, int h );
-void x264_plane_copy_deinterleave_avx( uint16_t *dstu, int i_dstu,
-                                         uint16_t *dstv, int i_dstv,
-                                         uint16_t *src, int i_src, int w, int h );
-void x264_store_interleave_chroma_mmx2( pixel *dst, int i_dst, pixel *srcu, pixel *srcv, int height );
-void x264_store_interleave_chroma_sse2( pixel *dst, int i_dst, pixel *srcu, pixel *srcv, int height );
-void x264_store_interleave_chroma_avx( pixel *dst, int i_dst, pixel *srcu, pixel *srcv, int height );
-void x264_load_deinterleave_chroma_fenc_mmx( pixel *dst, pixel *src, int i_src, int height );
-void x264_load_deinterleave_chroma_fenc_sse2( pixel *dst, pixel *src, int i_src, int height );
-void x264_load_deinterleave_chroma_fenc_ssse3( uint8_t *dst, uint8_t *src, int i_src, int height );
-void x264_load_deinterleave_chroma_fenc_avx( uint16_t *dst, uint16_t *src, int i_src, int height );
-void x264_load_deinterleave_chroma_fdec_mmx( pixel *dst, pixel *src, int i_src, int height );
-void x264_load_deinterleave_chroma_fdec_sse2( pixel *dst, pixel *src, int i_src, int height );
-void x264_load_deinterleave_chroma_fdec_ssse3( uint8_t *dst, uint8_t *src, int i_src, int height );
-void x264_load_deinterleave_chroma_fdec_avx( uint16_t *dst, uint16_t *src, int i_src, int height );
-void *x264_memcpy_aligned_mmx( void * dst, const void * src, size_t n );
-void *x264_memcpy_aligned_sse2( void * dst, const void * src, size_t n );
-void x264_memzero_aligned_mmx( void * dst, int n );
-void x264_memzero_aligned_sse2( void * dst, int n );
-void x264_integral_init4h_sse4( uint16_t *sum, uint8_t *pix, int stride );
-void x264_integral_init8h_sse4( uint16_t *sum, uint8_t *pix, int stride );
-void x264_integral_init8h_avx ( uint16_t *sum, uint8_t *pix, int stride );
-void x264_integral_init4v_mmx( uint16_t *sum8, uint16_t *sum4, int stride );
-void x264_integral_init4v_sse2( uint16_t *sum8, uint16_t *sum4, int stride );
-void x264_integral_init8v_mmx( uint16_t *sum8, int stride );
-void x264_integral_init8v_sse2( uint16_t *sum8, int stride );
-void x264_integral_init4v_ssse3( uint16_t *sum8, uint16_t *sum4, int stride );
+void x264_mc_copy_w4_mmx  ( pixel *, intptr_t, pixel *, intptr_t, int );
+void x264_mc_copy_w8_mmx  ( pixel *, intptr_t, pixel *, intptr_t, int );
+void x264_mc_copy_w8_sse2 ( pixel *, intptr_t, pixel *, intptr_t, int );
+void x264_mc_copy_w16_mmx ( pixel *, intptr_t, pixel *, intptr_t, int );
+void x264_mc_copy_w16_sse2( pixel *, intptr_t, pixel *, intptr_t, int );
+void x264_mc_copy_w16_aligned_sse2( pixel *, intptr_t, pixel *, intptr_t, int );
+void x264_prefetch_fenc_420_mmx2( pixel *, intptr_t, pixel *, intptr_t, int );
+void x264_prefetch_fenc_422_mmx2( pixel *, intptr_t, pixel *, intptr_t, int );
+void x264_prefetch_ref_mmx2( pixel *, intptr_t, int );
+void x264_plane_copy_core_mmx2( pixel *, intptr_t, pixel *, intptr_t, int w, int h );
+void x264_plane_copy_c( pixel *, intptr_t, pixel *, intptr_t, int w, int h );
+void x264_plane_copy_interleave_core_mmx2( pixel *dst,  intptr_t i_dst,
+                                           pixel *srcu, intptr_t i_srcu,
+                                           pixel *srcv, intptr_t i_srcv, int w, int h );
+void x264_plane_copy_interleave_core_sse2( pixel *dst,  intptr_t i_dst,
+                                           pixel *srcu, intptr_t i_srcu,
+                                           pixel *srcv, intptr_t i_srcv, int w, int h );
+void x264_plane_copy_interleave_core_avx( pixel *dst,  intptr_t i_dst,
+                                          pixel *srcu, intptr_t i_srcu,
+                                          pixel *srcv, intptr_t i_srcv, int w, int h );
+void x264_plane_copy_interleave_c( pixel *dst,  intptr_t i_dst,
+                                   pixel *srcu, intptr_t i_srcu,
+                                   pixel *srcv, intptr_t i_srcv, int w, int h );
+void x264_plane_copy_deinterleave_mmx( pixel *dstu, intptr_t i_dstu,
+                                       pixel *dstv, intptr_t i_dstv,
+                                       pixel *src,  intptr_t i_src, int w, int h );
+void x264_plane_copy_deinterleave_sse2( pixel *dstu, intptr_t i_dstu,
+                                        pixel *dstv, intptr_t i_dstv,
+                                        pixel *src,  intptr_t i_src, int w, int h );
+void x264_plane_copy_deinterleave_ssse3( uint8_t *dstu, intptr_t i_dstu,
+                                         uint8_t *dstv, intptr_t i_dstv,
+                                         uint8_t *src,  intptr_t i_src, int w, int h );
+void x264_plane_copy_deinterleave_avx( uint16_t *dstu, intptr_t i_dstu,
+                                       uint16_t *dstv, intptr_t i_dstv,
+                                       uint16_t *src,  intptr_t i_src, int w, int h );
+void x264_store_interleave_chroma_mmx2( pixel *dst, intptr_t i_dst, pixel *srcu, pixel *srcv, int height );
+void x264_store_interleave_chroma_sse2( pixel *dst, intptr_t i_dst, pixel *srcu, pixel *srcv, int height );
+void x264_store_interleave_chroma_avx ( pixel *dst, intptr_t i_dst, pixel *srcu, pixel *srcv, int height );
+void x264_load_deinterleave_chroma_fenc_mmx ( pixel *dst, pixel *src, intptr_t i_src, int height );
+void x264_load_deinterleave_chroma_fenc_sse2( pixel *dst, pixel *src, intptr_t i_src, int height );
+void x264_load_deinterleave_chroma_fenc_ssse3( uint8_t *dst, uint8_t *src, intptr_t i_src, int height );
+void x264_load_deinterleave_chroma_fenc_avx( uint16_t *dst, uint16_t *src, intptr_t i_src, int height );
+void x264_load_deinterleave_chroma_fdec_mmx ( pixel *dst, pixel *src, intptr_t i_src, int height );
+void x264_load_deinterleave_chroma_fdec_sse2( pixel *dst, pixel *src, intptr_t i_src, int height );
+void x264_load_deinterleave_chroma_fdec_ssse3( uint8_t *dst, uint8_t *src, intptr_t i_src, int height );
+void x264_load_deinterleave_chroma_fdec_avx( uint16_t *dst, uint16_t *src, intptr_t i_src, int height );
+void *x264_memcpy_aligned_mmx ( void *dst, const void *src, size_t n );
+void *x264_memcpy_aligned_sse2( void *dst, const void *src, size_t n );
+void x264_memzero_aligned_mmx ( void *dst, size_t n );
+void x264_memzero_aligned_sse2( void *dst, size_t n );
+void x264_integral_init4h_sse4( uint16_t *sum, uint8_t *pix, intptr_t stride );
+void x264_integral_init8h_sse4( uint16_t *sum, uint8_t *pix, intptr_t stride );
+void x264_integral_init8h_avx ( uint16_t *sum, uint8_t *pix, intptr_t stride );
+void x264_integral_init4v_mmx  ( uint16_t *sum8, uint16_t *sum4, intptr_t stride );
+void x264_integral_init4v_sse2 ( uint16_t *sum8, uint16_t *sum4, intptr_t stride );
+void x264_integral_init4v_ssse3( uint16_t *sum8, uint16_t *sum4, intptr_t stride );
+void x264_integral_init8v_mmx ( uint16_t *sum8, intptr_t stride );
+void x264_integral_init8v_sse2( uint16_t *sum8, intptr_t stride );
  void x264_mbtree_propagate_cost_sse2( int *dst, uint16_t *propagate_in, uint16_t *intra_costs,
                                        uint16_t *inter_costs, uint16_t *inv_qscales, float *fps_factor, int len );
-void x264_mbtree_propagate_cost_avx( int *dst, uint16_t *propagate_in, uint16_t *intra_costs,
-                                     uint16_t *inter_costs, uint16_t *inv_qscales, float *fps_factor, int len );
+void x264_mbtree_propagate_cost_avx ( int *dst, uint16_t *propagate_in, uint16_t *intra_costs,
+                                      uint16_t *inter_costs, uint16_t *inv_qscales, float *fps_factor, int len );
  void x264_mbtree_propagate_cost_fma4( int *dst, uint16_t *propagate_in, uint16_t *intra_costs,
                                        uint16_t *inter_costs, uint16_t *inv_qscales, float *fps_factor, int len );
  
  #define MC_CHROMA(cpu)\
-void x264_mc_chroma_##cpu( pixel *dstu, pixel *dstv, int i_dst,\
-                           pixel *src, int i_src,\
+void x264_mc_chroma_##cpu( pixel *dstu, pixel *dstv, intptr_t i_dst, pixel *src, intptr_t i_src,\
                             int dx, int dy, int i_width, int i_height );
  MC_CHROMA(mmx2)
  MC_CHROMA(sse2)
@@ -154,7 +153,7 @@ MC_CHROMA(avx_cache64)
  
  #define LOWRES(cpu)\
  void x264_frame_init_lowres_core_##cpu( pixel *src0, pixel *dst0, pixel *dsth, pixel *dstv, pixel *dstc,\
-                                        int src_stride, int dst_stride, int width, int height );
+                                        intptr_t src_stride, intptr_t dst_stride, int width, int height );
  LOWRES(mmx2)
  LOWRES(cache32_mmx2)
  LOWRES(sse2)
@@ -163,7 +162,7 @@ LOWRES(avx)
  LOWRES(xop)
  
  #define PIXEL_AVG_W(width,cpu)\
-void x264_pixel_avg2_w##width##_##cpu( pixel *, int, pixel *, int, pixel *, int );
+void x264_pixel_avg2_w##width##_##cpu( pixel *, intptr_t, pixel *, intptr_t, pixel *, intptr_t );
  /* This declares some functions that don't exist, but that isn't a problem. */
  #define PIXEL_AVG_WALL(cpu)\
  PIXEL_AVG_W(4,cpu); PIXEL_AVG_W(8,cpu); PIXEL_AVG_W(10,cpu); PIXEL_AVG_W(12,cpu); PIXEL_AVG_W(16,cpu); PIXEL_AVG_W(18,cpu); PIXEL_AVG_W(20,cpu);
@@ -177,7 +176,7 @@ PIXEL_AVG_WALL(sse2_misalign)
  PIXEL_AVG_WALL(cache64_ssse3)
  
  #define PIXEL_AVG_WTAB(instr, name1, name2, name3, name4, name5)\
-static void (* const x264_pixel_avg_wtab_##instr[6])( pixel *, int, pixel *, int, pixel *, int ) =\
+static void (* const x264_pixel_avg_wtab_##instr[6])( pixel *, intptr_t, pixel *, intptr_t, pixel *, intptr_t ) =\
  {\
      NULL,\
      x264_pixel_avg2_w4_##name1,\
@@ -216,7 +215,7 @@ PIXEL_AVG_WTAB(cache64_ssse3, mmx2, cache64_mmx2, cache64_ssse3, cache64_ssse3,
  #endif // HIGH_BIT_DEPTH
  
  #define MC_COPY_WTAB(instr, name1, name2, name3)\
-static void (* const x264_mc_copy_wtab_##instr[5])( pixel *, int, pixel *, int, int ) =\
+static void (* const x264_mc_copy_wtab_##instr[5])( pixel *, intptr_t, pixel *, intptr_t, int ) =\
  {\
      NULL,\
      x264_mc_copy_w4_##name1,\
@@ -233,7 +232,7 @@ MC_COPY_WTAB(sse2,mmx,mmx,sse2)
  #endif
  
  #define MC_WEIGHT_WTAB(function, instr, name1, name2, w12version)\
-    static void (* x264_mc_##function##_wtab_##instr[6])( pixel *, int, pixel *, int, const x264_weight_t *, int ) =\
+    static void (* x264_mc_##function##_wtab_##instr[6])( pixel *, intptr_t, pixel *, intptr_t, const x264_weight_t *, int ) =\
  {\
      x264_mc_##function##_w4_##name1,\
      x264_mc_##function##_w4_##name1,\
@@ -332,10 +331,10 @@ static const uint8_t hpel_ref0[16] = {0,1,1,1,0,1,1,1,2,3,3,3,0,1,1,1};
  static const uint8_t hpel_ref1[16] = {0,0,0,0,2,2,3,2,2,2,3,2,2,2,3,2};
  
  #define MC_LUMA(name,instr1,instr2)\
-static void mc_luma_##name( pixel *dst,    int i_dst_stride,\
-                  pixel *src[4], int i_src_stride,\
-                  int mvx, int mvy,\
-                  int i_width, int i_height, const x264_weight_t *weight )\
+static void mc_luma_##name( pixel *dst,    intptr_t i_dst_stride,\
+                            pixel *src[4], intptr_t i_src_stride,\
+                            int mvx, int mvy,\
+                            int i_width, int i_height, const x264_weight_t *weight )\
  {\
      int qpel_idx = ((mvy&3)<<2) + (mvx&3);\
      int offset = (mvy>>2)*i_src_stride + (mvx>>2);\
@@ -367,10 +366,10 @@ MC_LUMA(cache64_ssse3,cache64_ssse3,sse2)
  #endif // !HIGH_BIT_DEPTH
  
  #define GET_REF(name)\
-static pixel *get_ref_##name( pixel *dst,   int *i_dst_stride,\
-                         pixel *src[4], int i_src_stride,\
-                         int mvx, int mvy,\
-                         int i_width, int i_height, const x264_weight_t *weight )\
+static pixel *get_ref_##name( pixel *dst,   intptr_t *i_dst_stride,\
+                              pixel *src[4], intptr_t i_src_stride,\
+                              int mvx, int mvy,\
+                              int i_width, int i_height, const x264_weight_t *weight )\
  {\
      int qpel_idx = ((mvy&3)<<2) + (mvx&3);\
      int offset = (mvy>>2)*i_src_stride + (mvx>>2);\
@@ -410,13 +409,13 @@ GET_REF(cache64_ssse3)
  #endif // !HIGH_BIT_DEPTH
  
  #define HPEL(align, cpu, cpuv, cpuc, cpuh)\
-void x264_hpel_filter_v_##cpuv( pixel *dst, pixel *src, int16_t *buf, int stride, int width);\
-void x264_hpel_filter_c_##cpuc( pixel *dst, int16_t *buf, int width );\
-void x264_hpel_filter_h_##cpuh( pixel *dst, pixel *src, int width );\
+void x264_hpel_filter_v_##cpuv( pixel *dst, pixel *src, int16_t *buf, intptr_t stride, intptr_t width);\
+void x264_hpel_filter_c_##cpuc( pixel *dst, int16_t *buf, intptr_t width );\
+void x264_hpel_filter_h_##cpuh( pixel *dst, pixel *src, intptr_t width );\
  static void x264_hpel_filter_##cpu( pixel *dsth, pixel *dstv, pixel *dstc, pixel *src,\
-                             int stride, int width, int height, int16_t *buf )\
+                                    intptr_t stride, int width, int height, int16_t *buf )\
  {\
-    int realign = (intptr_t)src & (align-1);\
+    intptr_t realign = (intptr_t)src & (align-1);\
      src -= realign;\
      dstv -= realign;\
      dstc -= realign;\
@@ -441,9 +440,9 @@ HPEL(16, sse2, sse2, sse2, sse2)
  #else // !HIGH_BIT_DEPTH
  HPEL(16, sse2_amd, mmx2, mmx2, sse2)
  #if ARCH_X86_64
-void x264_hpel_filter_sse2( uint8_t *dsth, uint8_t *dstv, uint8_t *dstc, uint8_t *src, int stride, int width, int height, int16_t *buf );
-void x264_hpel_filter_ssse3( uint8_t *dsth, uint8_t *dstv, uint8_t *dstc, uint8_t *src, int stride, int width, int height, int16_t *buf );
-void x264_hpel_filter_avx( uint8_t *dsth, uint8_t *dstv, uint8_t *dstc, uint8_t *src, int stride, int width, int height, int16_t *buf );
+void x264_hpel_filter_sse2 ( uint8_t *dsth, uint8_t *dstv, uint8_t *dstc, uint8_t *src, intptr_t stride, int width, int height, int16_t *buf );
+void x264_hpel_filter_ssse3( uint8_t *dsth, uint8_t *dstv, uint8_t *dstc, uint8_t *src, intptr_t stride, int width, int height, int16_t *buf );
+void x264_hpel_filter_avx  ( uint8_t *dsth, uint8_t *dstv, uint8_t *dstc, uint8_t *src, intptr_t stride, int width, int height, int16_t *buf );
  #else
  HPEL(16, sse2, sse2, sse2, sse2)
  HPEL(16, ssse3, ssse3, ssse3, ssse3)
@@ -452,7 +451,7 @@ HPEL(16, avx, avx, avx, avx)
  HPEL(16, sse2_misalign, sse2, sse2_misalign, sse2)
  #endif // HIGH_BIT_DEPTH
  
-static void x264_plane_copy_mmx2( pixel *dst, int i_dst, pixel *src, int i_src, int w, int h )
+static void x264_plane_copy_mmx2( pixel *dst, intptr_t i_dst, pixel *src, intptr_t i_src, int w, int h )
  {
      int c_w = 16/sizeof(pixel) - 1;
      if( w < 256 ) { // tiny resolutions don't want non-temporal hints. dunno the exact threshold.
@@ -470,9 +469,9 @@ static void x264_plane_copy_mmx2( pixel *dst, int i_dst, pixel *src, int i_src,
  }
  
  #define PLANE_INTERLEAVE(cpu) \
-static void x264_plane_copy_interleave_##cpu( pixel *dst, int i_dst,\
-                                              pixel *srcu, int i_srcu,\
-                                              pixel *srcv, int i_srcv, int w, int h )\
+static void x264_plane_copy_interleave_##cpu( pixel *dst,  intptr_t i_dst,\
+                                              pixel *srcu, intptr_t i_srcu,\
+                                              pixel *srcv, intptr_t i_srcv, int w, int h )\
  {\
      if( !(w&15) ) {\
          x264_plane_copy_interleave_core_##cpu( dst, i_dst, srcu, i_srcu, srcv, i_srcv, w, h );\
diff --git a/common/x86/pixel-32.asm b/common/x86/pixel-32.asm

index b3dcdbabd6b973acf09bbe40be572e91b1926f2e..77a87421c90a1aa327f8015523186b6e443216ad 100644 (file)
--- a/common/x86/pixel-32.asm
+++ b/common/x86/pixel-32.asm
@@ -67,7 +67,7 @@ INIT_MMX mmx2
  %endmacro
  
  ;-----------------------------------------------------------------------------
-; int pixel_sa8d_8x8( uint8_t *, int, uint8_t *, int )
+; int pixel_sa8d_8x8( uint8_t *, intptr_t, uint8_t *, intptr_t )
  ;-----------------------------------------------------------------------------
  cglobal pixel_sa8d_8x8_internal
      push   r0
@@ -362,8 +362,8 @@ cglobal intra_sa8d_x3_8x8, 2,3
  
  
  ;-----------------------------------------------------------------------------
-; void pixel_ssim_4x4x2_core( const uint8_t *pix1, int stride1,
-;                             const uint8_t *pix2, int stride2, int sums[2][4] )
+; void pixel_ssim_4x4x2_core( const uint8_t *pix1, intptr_t stride1,
+;                             const uint8_t *pix2, intptr_t stride2, int sums[2][4] )
  ;-----------------------------------------------------------------------------
  cglobal pixel_ssim_4x4x2_core, 0,5
      mov       r1, r1m
diff --git a/common/x86/pixel-a.asm b/common/x86/pixel-a.asm

index 6f3076cfb8e37b40e2781a35acc319fc1bf5adea..06737ab126378f1381660745d307749ab8a73339 100644 (file)
--- a/common/x86/pixel-a.asm
+++ b/common/x86/pixel-a.asm
@@ -144,7 +144,7 @@ cextern hsub_mul
  
  %if HIGH_BIT_DEPTH
  ;-----------------------------------------------------------------------------
-; int pixel_ssd_MxN( uint16_t *, int, uint16_t *, int )
+; int pixel_ssd_MxN( uint16_t *, intptr_t, uint16_t *, intptr_t )
  ;-----------------------------------------------------------------------------
  %macro SSD_ONE 2
  cglobal pixel_ssd_%1x%2, 4,5,6
@@ -361,7 +361,7 @@ SSD_ONE    16, 16
  %endmacro
  
  ;-----------------------------------------------------------------------------
-; int pixel_ssd_16x16( uint8_t *, int, uint8_t *, int )
+; int pixel_ssd_16x16( uint8_t *, intptr_t, uint8_t *, intptr_t )
  ;-----------------------------------------------------------------------------
  %macro SSD 2
  %if %1 != %2
@@ -466,7 +466,7 @@ SSD  8,  4
  %endif ; !HIGH_BIT_DEPTH
  
  ;-----------------------------------------------------------------------------
-; void pixel_ssd_nv12_core( uint16_t *pixuv1, int stride1, uint16_t *pixuv2, int stride2,
+; void pixel_ssd_nv12_core( uint16_t *pixuv1, intptr_t stride1, uint16_t *pixuv2, intptr_t stride2,
  ;                           int width, int height, uint64_t *ssd_u, uint64_t *ssd_v )
  ;
  ; The maximum width this function can handle without risk of overflow is given
@@ -560,7 +560,7 @@ cglobal pixel_ssd_nv12_core, 6,7,7
  
  %if HIGH_BIT_DEPTH == 0
  ;-----------------------------------------------------------------------------
-; void pixel_ssd_nv12_core( uint8_t *pixuv1, int stride1, uint8_t *pixuv2, int stride2,
+; void pixel_ssd_nv12_core( uint8_t *pixuv1, intptr_t stride1, uint8_t *pixuv2, intptr_t stride2,
  ;                           int width, int height, uint64_t *ssd_u, uint64_t *ssd_v )
  ;
  ; This implementation can potentially overflow on image widths >= 11008 (or
@@ -697,7 +697,7 @@ SSD_NV12
  %endmacro
  
  ;-----------------------------------------------------------------------------
-; int pixel_var_wxh( uint8_t *, int )
+; int pixel_var_wxh( uint8_t *, intptr_t )
  ;-----------------------------------------------------------------------------
  INIT_MMX mmx2
  cglobal pixel_var_16x16, 2,3
@@ -820,7 +820,7 @@ VAR
  %endmacro
  
  ;-----------------------------------------------------------------------------
-; int pixel_var2_8x8( pixel *, int, pixel *, int, int * )
+; int pixel_var2_8x8( pixel *, intptr_t, pixel *, intptr_t, int * )
  ;-----------------------------------------------------------------------------
  %macro VAR2_8x8_MMX 2
  cglobal pixel_var2_8x%1, 5,6
@@ -1128,7 +1128,7 @@ VAR2_8x8_SSSE3 16, 7
  ; for small blocks on x86_32, modify pixel pointer instead.
  
  ;-----------------------------------------------------------------------------
-; int pixel_satd_16x16( uint8_t *, int, uint8_t *, int )
+; int pixel_satd_16x16( uint8_t *, intptr_t, uint8_t *, intptr_t )
  ;-----------------------------------------------------------------------------
  INIT_MMX mmx2
  cglobal pixel_satd_16x4_internal
@@ -1335,7 +1335,7 @@ cglobal pixel_satd_4x4, 4,6
  %endmacro
  
  ;-----------------------------------------------------------------------------
-; int pixel_satd_8x4( uint8_t *, int, uint8_t *, int )
+; int pixel_satd_8x4( uint8_t *, intptr_t, uint8_t *, intptr_t )
  ;-----------------------------------------------------------------------------
  %macro SATDS_SSE2 0
  %if cpuflag(ssse3)
@@ -1476,7 +1476,7 @@ cglobal pixel_satd_8x4, 4,6,8
  
  %if ARCH_X86_64
  ;-----------------------------------------------------------------------------
-; int pixel_sa8d_8x8( uint8_t *, int, uint8_t *, int )
+; int pixel_sa8d_8x8( uint8_t *, intptr_t, uint8_t *, intptr_t )
  ;-----------------------------------------------------------------------------
  cglobal pixel_sa8d_8x8_internal
      lea  r6, [r0+4*r1]
@@ -3841,8 +3841,8 @@ HADAMARD_AC_SSE2
  ;=============================================================================
  
  ;-----------------------------------------------------------------------------
-; void pixel_ssim_4x4x2_core( const uint8_t *pix1, int stride1,
-;                             const uint8_t *pix2, int stride2, int sums[2][4] )
+; void pixel_ssim_4x4x2_core( const uint8_t *pix1, intptr_t stride1,
+;                             const uint8_t *pix2, intptr_t stride2, int sums[2][4] )
  ;-----------------------------------------------------------------------------
  %macro SSIM_ITER 1
  %if HIGH_BIT_DEPTH
@@ -4006,8 +4006,10 @@ SSIM
  ;=============================================================================
  
  %macro ADS_START 0
-%if WIN64
+%if UNIX64
      movsxd  r5,  r5d
+%else
+    mov     r5d, r5m
  %endif
      mov     r0d, r5d
      lea     r6,  [r4+r5+15]
@@ -4030,7 +4032,7 @@ SSIM
  ;                 uint16_t *cost_mvx, int16_t *mvs, int width, int thresh )
  ;-----------------------------------------------------------------------------
  INIT_MMX mmx2
-cglobal pixel_ads4, 6,7
+cglobal pixel_ads4, 5,7
      movq    mm6, [r0]
      movq    mm4, [r0+8]
      pshufw  mm7, mm6, 0
@@ -4061,7 +4063,7 @@ cglobal pixel_ads4, 6,7
      movd    [r6], mm1
      ADS_END 1
  
-cglobal pixel_ads2, 6,7
+cglobal pixel_ads2, 5,7
      movq    mm6, [r0]
      pshufw  mm5, r6m, 0
      pshufw  mm7, mm6, 0
@@ -4082,7 +4084,7 @@ cglobal pixel_ads2, 6,7
      movd    [r6], mm4
      ADS_END 1
  
-cglobal pixel_ads1, 6,7
+cglobal pixel_ads1, 5,7
      pshufw  mm7, [r0], 0
      pshufw  mm6, r6m, 0
      ADS_START
@@ -4104,7 +4106,7 @@ cglobal pixel_ads1, 6,7
      ADS_END 2
  
  %macro ADS_XMM 0
-cglobal pixel_ads4, 6,7,12
+cglobal pixel_ads4, 5,7,12
      movdqa  xmm4, [r0]
      pshuflw xmm7, xmm4, 0
      pshuflw xmm6, xmm4, q2222
@@ -4168,7 +4170,7 @@ cglobal pixel_ads4, 6,7,12
  %endif ; ARCH
      ADS_END 2
  
-cglobal pixel_ads2, 6,7,8
+cglobal pixel_ads2, 5,7,8
      movq    xmm6, [r0]
      movd    xmm5, r6m
      pshuflw xmm7, xmm6, 0
@@ -4193,7 +4195,7 @@ cglobal pixel_ads2, 6,7,8
      movq    [r6], xmm1
      ADS_END 2
  
-cglobal pixel_ads1, 6,7,8
+cglobal pixel_ads1, 5,7,8
      movd    xmm7, [r0]
      movd    xmm6, r6m
      pshuflw xmm7, xmm7, 0
diff --git a/common/x86/pixel.h b/common/x86/pixel.h

index 11823f0855c23108d17194017ade9a7b209d5a88..eeea9c7057bdea7b2fb890d461b26d7c970c49e1 100644 (file)
--- a/common/x86/pixel.h
+++ b/common/x86/pixel.h
@@ -39,11 +39,11 @@
      ret x264_pixel_##name##_4x4_##suffix args;\
  
  #define DECL_X1( name, suffix ) \
-    DECL_PIXELS( int, name, suffix, ( pixel *, int, pixel *, int ) )
+    DECL_PIXELS( int, name, suffix, ( pixel *, intptr_t, pixel *, intptr_t ) )
  
  #define DECL_X4( name, suffix ) \
-    DECL_PIXELS( void, name##_x3, suffix, ( pixel *, pixel *, pixel *, pixel *, int, int * ) )\
-    DECL_PIXELS( void, name##_x4, suffix, ( pixel *, pixel *, pixel *, pixel *, pixel *, int, int * ) )
+    DECL_PIXELS( void, name##_x3, suffix, ( pixel *, pixel *, pixel *, pixel *, intptr_t, int * ) )\
+    DECL_PIXELS( void, name##_x4, suffix, ( pixel *, pixel *, pixel *, pixel *, pixel *, intptr_t, int * ) )
  
  DECL_X1( sad, mmx2 )
  DECL_X1( sad, sse2 )
@@ -84,16 +84,16 @@ DECL_X4( sad, cache64_mmx2 );
  DECL_X4( sad, cache64_sse2 );
  DECL_X4( sad, cache64_ssse3 );
  
-DECL_PIXELS( uint64_t, var, mmx2, ( pixel *pix, int i_stride ))
-DECL_PIXELS( uint64_t, var, sse2, ( pixel *pix, int i_stride ))
-DECL_PIXELS( uint64_t, var, avx,  ( pixel *pix, int i_stride ))
-DECL_PIXELS( uint64_t, var, xop,  ( pixel *pix, int i_stride ))
-DECL_PIXELS( uint64_t, hadamard_ac, mmx2,  ( pixel *pix, int i_stride ))
-DECL_PIXELS( uint64_t, hadamard_ac, sse2,  ( pixel *pix, int i_stride ))
-DECL_PIXELS( uint64_t, hadamard_ac, ssse3, ( pixel *pix, int i_stride ))
-DECL_PIXELS( uint64_t, hadamard_ac, sse4,  ( pixel *pix, int i_stride ))
-DECL_PIXELS( uint64_t, hadamard_ac, avx,   ( pixel *pix, int i_stride ))
-DECL_PIXELS( uint64_t, hadamard_ac, xop,   ( pixel *pix, int i_stride ))
+DECL_PIXELS( uint64_t, var, mmx2, ( pixel *pix, intptr_t i_stride ))
+DECL_PIXELS( uint64_t, var, sse2, ( pixel *pix, intptr_t i_stride ))
+DECL_PIXELS( uint64_t, var, avx,  ( pixel *pix, intptr_t i_stride ))
+DECL_PIXELS( uint64_t, var, xop,  ( pixel *pix, intptr_t i_stride ))
+DECL_PIXELS( uint64_t, hadamard_ac, mmx2,  ( pixel *pix, intptr_t i_stride ))
+DECL_PIXELS( uint64_t, hadamard_ac, sse2,  ( pixel *pix, intptr_t i_stride ))
+DECL_PIXELS( uint64_t, hadamard_ac, ssse3, ( pixel *pix, intptr_t i_stride ))
+DECL_PIXELS( uint64_t, hadamard_ac, sse4,  ( pixel *pix, intptr_t i_stride ))
+DECL_PIXELS( uint64_t, hadamard_ac, avx,   ( pixel *pix, intptr_t i_stride ))
+DECL_PIXELS( uint64_t, hadamard_ac, xop,   ( pixel *pix, intptr_t i_stride ))
  
  
  void x264_intra_satd_x3_4x4_mmx2   ( pixel   *, pixel   *, int * );
@@ -130,35 +130,35 @@ int x264_intra_sad_x9_8x8_ssse3 ( uint8_t *, uint8_t *, uint8_t *, uint16_t *, u
  int x264_intra_sad_x9_8x8_sse4  ( uint8_t *, uint8_t *, uint8_t *, uint16_t *, uint16_t * );
  int x264_intra_sad_x9_8x8_avx   ( uint8_t *, uint8_t *, uint8_t *, uint16_t *, uint16_t * );
  
-void x264_pixel_ssd_nv12_core_mmx2( pixel *pixuv1, int stride1,
-                                    pixel *pixuv2, int stride2, int width,
+void x264_pixel_ssd_nv12_core_mmx2( pixel *pixuv1, intptr_t stride1,
+                                    pixel *pixuv2, intptr_t stride2, int width,
                                      int height, uint64_t *ssd_u, uint64_t *ssd_v );
-void x264_pixel_ssd_nv12_core_sse2( pixel *pixuv1, int stride1,
-                                    pixel *pixuv2, int stride2, int width,
+void x264_pixel_ssd_nv12_core_sse2( pixel *pixuv1, intptr_t stride1,
+                                    pixel *pixuv2, intptr_t stride2, int width,
                                      int height, uint64_t *ssd_u, uint64_t *ssd_v );
-void x264_pixel_ssd_nv12_core_avx ( pixel *pixuv1, int stride1,
-                                    pixel *pixuv2, int stride2, int width,
+void x264_pixel_ssd_nv12_core_avx ( pixel *pixuv1, intptr_t stride1,
+                                    pixel *pixuv2, intptr_t stride2, int width,
                                      int height, uint64_t *ssd_u, uint64_t *ssd_v );
-void x264_pixel_ssim_4x4x2_core_mmx2( const uint8_t *pix1, int stride1,
-                                      const uint8_t *pix2, int stride2, int sums[2][4] );
-void x264_pixel_ssim_4x4x2_core_sse2( const pixel *pix1, int stride1,
-                                      const pixel *pix2, int stride2, int sums[2][4] );
-void x264_pixel_ssim_4x4x2_core_avx ( const pixel *pix1, int stride1,
-                                      const pixel *pix2, int stride2, int sums[2][4] );
+void x264_pixel_ssim_4x4x2_core_mmx2( const uint8_t *pix1, intptr_t stride1,
+                                      const uint8_t *pix2, intptr_t stride2, int sums[2][4] );
+void x264_pixel_ssim_4x4x2_core_sse2( const pixel *pix1, intptr_t stride1,
+                                      const pixel *pix2, intptr_t stride2, int sums[2][4] );
+void x264_pixel_ssim_4x4x2_core_avx ( const pixel *pix1, intptr_t stride1,
+                                      const pixel *pix2, intptr_t stride2, int sums[2][4] );
  float x264_pixel_ssim_end4_sse2( int sum0[5][4], int sum1[5][4], int width );
-float x264_pixel_ssim_end4_avx( int sum0[5][4], int sum1[5][4], int width );
-int  x264_pixel_var2_8x8_mmx2( pixel *, int, pixel *, int, int * );
-int  x264_pixel_var2_8x8_sse2( pixel *, int, pixel *, int, int * );
-int  x264_pixel_var2_8x8_ssse3( uint8_t *, int, uint8_t *, int, int * );
-int  x264_pixel_var2_8x8_xop( uint8_t *, int, uint8_t *, int, int * );
-int  x264_pixel_var2_8x16_mmx2( pixel *, int, pixel *, int, int * );
-int  x264_pixel_var2_8x16_sse2( pixel *, int, pixel *, int, int * );
-int  x264_pixel_var2_8x16_ssse3( uint8_t *, int, uint8_t *, int, int * );
-int  x264_pixel_var2_8x16_xop( uint8_t *, int, uint8_t *, int, int * );
-int  x264_pixel_vsad_mmx2( pixel *src, int stride, int height );
-int  x264_pixel_vsad_sse2( pixel *src, int stride, int height );
-int  x264_pixel_vsad_ssse3( pixel *src, int stride, int height );
-int  x264_pixel_vsad_xop( pixel *src, int stride, int height );
+float x264_pixel_ssim_end4_avx ( int sum0[5][4], int sum1[5][4], int width );
+int  x264_pixel_var2_8x8_mmx2  ( pixel *,   intptr_t, pixel *,   intptr_t, int * );
+int  x264_pixel_var2_8x8_sse2  ( pixel *,   intptr_t, pixel *,   intptr_t, int * );
+int  x264_pixel_var2_8x8_ssse3 ( uint8_t *, intptr_t, uint8_t *, intptr_t, int * );
+int  x264_pixel_var2_8x8_xop   ( uint8_t *, intptr_t, uint8_t *, intptr_t, int * );
+int  x264_pixel_var2_8x16_mmx2 ( pixel *,   intptr_t, pixel *,   intptr_t, int * );
+int  x264_pixel_var2_8x16_sse2 ( pixel *,   intptr_t, pixel *,   intptr_t, int * );
+int  x264_pixel_var2_8x16_ssse3( uint8_t *, intptr_t, uint8_t *, intptr_t, int * );
+int  x264_pixel_var2_8x16_xop  ( uint8_t *, intptr_t, uint8_t *, intptr_t, int * );
+int  x264_pixel_vsad_mmx2 ( pixel *src, intptr_t stride, int height );
+int  x264_pixel_vsad_sse2 ( pixel *src, intptr_t stride, int height );
+int  x264_pixel_vsad_ssse3( pixel *src, intptr_t stride, int height );
+int  x264_pixel_vsad_xop  ( pixel *src, intptr_t stride, int height );
  
  #define DECL_ADS( size, suffix ) \
  int x264_pixel_ads##size##_##suffix( int enc_dc[size], uint16_t *sums, int delta,\
diff --git a/common/x86/quant-a.asm b/common/x86/quant-a.asm

index 744b836d21fefa3bbb2f3c3c5a1b0ad56166cd4f..883f0018c520b9d0532e204df3c09ed0bf00fe25 100644 (file)
--- a/common/x86/quant-a.asm
+++ b/common/x86/quant-a.asm
@@ -759,6 +759,7 @@ OPTIMIZE_CHROMA_2x2_DC
  %macro DENOISE_DCT 0
  cglobal denoise_dct, 4,4,8
      pxor      m6, m6
+    movsxdifnidn r3, r3d
  .loop:
      mova      m2, [r0+r3*4-2*mmsize]
      mova      m3, [r0+r3*4-1*mmsize]
@@ -804,6 +805,7 @@ DENOISE_DCT
  %macro DENOISE_DCT 0
  cglobal denoise_dct, 4,4,7
      pxor      m6, m6
+    movsxdifnidn r3, r3d
  .loop:
      mova      m2, [r0+r3*2-2*mmsize]
      mova      m3, [r0+r3*2-1*mmsize]
diff --git a/common/x86/sad-a.asm b/common/x86/sad-a.asm

index 58f4273ec6801da94d9e31ee280ec01ad77076dd..50ad2d7276ce2f3d573ee4beddef3064632ce883 100644 (file)
--- a/common/x86/sad-a.asm
+++ b/common/x86/sad-a.asm
@@ -80,7 +80,7 @@ cextern sw_64
  %endmacro
  
  ;-----------------------------------------------------------------------------
-; int pixel_sad_16x16( uint8_t *, int, uint8_t *, int )
+; int pixel_sad_16x16( uint8_t *, intptr_t, uint8_t *, intptr_t )
  ;-----------------------------------------------------------------------------
  %macro SAD 2
  cglobal pixel_sad_%1x%2_mmx2, 4,4
@@ -116,7 +116,7 @@ SAD  4,  4
  
  %macro SAD_W16 0
  ;-----------------------------------------------------------------------------
-; int pixel_sad_16x16( uint8_t *, int, uint8_t *, int )
+; int pixel_sad_16x16( uint8_t *, intptr_t, uint8_t *, intptr_t )
  ;-----------------------------------------------------------------------------
  cglobal pixel_sad_16x16, 4,4,8
      movu    m0, [r2]
@@ -183,7 +183,7 @@ cglobal pixel_sad_16x16, 4,4,8
      SAD_END_SSE2
  
  ;-----------------------------------------------------------------------------
-; int pixel_sad_16x8( uint8_t *, int, uint8_t *, int )
+; int pixel_sad_16x8( uint8_t *, intptr_t, uint8_t *, intptr_t )
  ;-----------------------------------------------------------------------------
  cglobal pixel_sad_16x8, 4,4
      movu    m0, [r2]
@@ -257,7 +257,7 @@ cglobal pixel_sad_8x16_sse2, 4,4
      RET
  
  ;-----------------------------------------------------------------------------
-; void pixel_vsad( pixel *src, int stride );
+; void pixel_vsad( pixel *src, intptr_t stride );
  ;-----------------------------------------------------------------------------
  
  %if ARCH_X86_64 == 0
@@ -867,14 +867,10 @@ INTRA_SAD16
  
  ;-----------------------------------------------------------------------------
  ; void pixel_sad_x3_16x16( uint8_t *fenc, uint8_t *pix0, uint8_t *pix1,
-;                          uint8_t *pix2, int i_stride, int scores[3] )
+;                          uint8_t *pix2, intptr_t i_stride, int scores[3] )
  ;-----------------------------------------------------------------------------
  %macro SAD_X 3
  cglobal pixel_sad_x%1_%2x%3_mmx2, %1+2, %1+2
-%if WIN64
-    %assign i %1+1
-    movsxd r %+ i, r %+ i %+ d
-%endif
      SAD_X%1_2x%2P 1
  %rep %3/2-1
      SAD_X%1_2x%2P 0
@@ -1190,14 +1186,10 @@ SAD_X 4,  4,  4
  
  ;-----------------------------------------------------------------------------
  ; void pixel_sad_x3_16x16( uint8_t *fenc, uint8_t *pix0, uint8_t *pix1,
-;                          uint8_t *pix2, int i_stride, int scores[3] )
+;                          uint8_t *pix2, intptr_t i_stride, int scores[3] )
  ;-----------------------------------------------------------------------------
  %macro SAD_X_SSE2 3
  cglobal pixel_sad_x%1_%2x%3, 2+%1,2+%1,9
-%if WIN64
-    %assign i %1+1
-    movsxd r %+ i, r %+ i %+ d
-%endif
      SAD_X%1_2x%2P_SSE2 1
  %rep %3/2-1
      SAD_X%1_2x%2P_SSE2 0
@@ -1485,9 +1477,6 @@ cglobal pixel_sad_x4_%1x%2_cache%3_%6
  %if ARCH_X86_64
      PROLOGUE 6,9
      mov  r8,  r6mp
-%if WIN64
-    movsxd r5, r5d
-%endif
      push r4
      push r3
      push r2
diff --git a/common/x86/sad16-a.asm b/common/x86/sad16-a.asm

index 39f7259817976064c85a69f23b86fda61869dfa2..273d0a01aee3c98c8f1b28fe07103390b9dd147f 100644 (file)
--- a/common/x86/sad16-a.asm
+++ b/common/x86/sad16-a.asm
@@ -87,7 +87,7 @@ cextern pw_8
  %endmacro
  
  ;-----------------------------------------------------------------------------
-; int pixel_sad_NxM( uint16_t *, int, uint16_t *, int )
+; int pixel_sad_NxM( uint16_t *, intptr_t, uint16_t *, intptr_t )
  ;-----------------------------------------------------------------------------
  %macro SAD_MMX 3
  cglobal pixel_sad_%1x%2, 4,4
@@ -152,7 +152,7 @@ SAD_MMX  4,  4, 2
  %endmacro
  
  ;-----------------------------------------------------------------------------
-; int pixel_sad_NxM( uint16_t *, int, uint16_t *, int )
+; int pixel_sad_NxM( uint16_t *, intptr_t, uint16_t *, intptr_t )
  ;-----------------------------------------------------------------------------
  %macro SAD_XMM 2
  cglobal pixel_sad_%1x%2, 4,4,8
@@ -402,15 +402,12 @@ PIXEL_VSAD
  
  ;-----------------------------------------------------------------------------
  ; void pixel_sad_xK_MxN( uint16_t *fenc, uint16_t *pix0, uint16_t *pix1,
-;                        uint16_t *pix2, int i_stride, int scores[3] )
+;                        uint16_t *pix2, intptr_t i_stride, int scores[3] )
  ;-----------------------------------------------------------------------------
  %macro SAD_X 3
  cglobal pixel_sad_x%1_%2x%3, 6,7,XMM_REGS
      %assign regnum %1+1
      %xdefine STRIDE r %+ regnum
-%if WIN64
-    movsxd STRIDE, STRIDE %+ d
-%endif
      mov     r6, %3/2-1
      SAD_X%1_ONE_START
      SAD_X%1_ONE 2*FENC_STRIDE, 2*STRIDE
diff --git a/common/x86/x86inc.asm b/common/x86/x86inc.asm

index f9be2cf01ab90afa8268d58bc1a4b25309f097b0..4b2229ece7809a568f1972a29ee32a99b65e9407 100644 (file)
--- a/common/x86/x86inc.asm
+++ b/common/x86/x86inc.asm
@@ -52,11 +52,6 @@
      %define mangle(x) x
  %endif
  
-; FIXME: All of the 64bit asm functions that take a stride as an argument
-; via register, assume that the high dword of that register is filled with 0.
-; This is true in practice (since we never do any 64bit arithmetic on strides,
-; and x264's strides are all positive), but is not guaranteed by the ABI.
-
  ; Name of the .rodata section.
  ; Kludge: Something on OS X fails to align .rodata even given an align attribute,
  ; so use a different read-only section.
diff --git a/encoder/analyse.c b/encoder/analyse.c

index 51691c93d893b430a9ce897a1d5c3d6172619f02..e47787718dac7d5810fc3a914902c01e8d3ba96a 100644 (file)
--- a/encoder/analyse.c
+++ b/encoder/analyse.c
@@ -1987,7 +1987,7 @@ static void x264_mb_analyse_inter_b16x16( x264_t *h, x264_mb_analysis_t *a )
      ALIGNED_ARRAY_16( pixel, pix0,[16*16] );
      ALIGNED_ARRAY_16( pixel, pix1,[16*16] );
      pixel *src0, *src1;
-    int stride0 = 16, stride1 = 16;
+    intptr_t stride0 = 16, stride1 = 16;
      int i_ref, i_mvc;
      ALIGNED_4( int16_t mvc[9][2] );
      int try_skip = a->b_try_skip;
@@ -2304,7 +2304,7 @@ static void x264_mb_analyse_inter_b8x8_mixed_ref( x264_t *h, x264_mb_analysis_t
          int y8 = i>>1;
          int i_part_cost;
          int i_part_cost_bi;
-        int stride[2] = {8,8};
+        intptr_t stride[2] = {8,8};
          pixel *src[2];
          x264_me_t m;
          m.i_pixel = PIXEL_8x8;
@@ -2393,7 +2393,7 @@ static void x264_mb_analyse_inter_b8x8( x264_t *h, x264_mb_analysis_t *a )
          int y8 = i>>1;
          int i_part_cost;
          int i_part_cost_bi = 0;
-        int stride[2] = {8,8};
+        intptr_t stride[2] = {8,8};
          pixel *src[2];
  
          for( int l = 0; l < 2; l++ )
@@ -2464,7 +2464,7 @@ static void x264_mb_analyse_inter_b16x8( x264_t *h, x264_mb_analysis_t *a, int i
      {
          int i_part_cost;
          int i_part_cost_bi = 0;
-        int stride[2] = {16,16};
+        intptr_t stride[2] = {16,16};
          pixel *src[2];
          x264_me_t m;
          m.i_pixel = PIXEL_16x8;
@@ -2558,7 +2558,7 @@ static void x264_mb_analyse_inter_b8x16( x264_t *h, x264_mb_analysis_t *a, int i
      {
          int i_part_cost;
          int i_part_cost_bi = 0;
-        int stride[2] = {8,8};
+        intptr_t stride[2] = {8,8};
          pixel *src[2];
          x264_me_t m;
          m.i_pixel = PIXEL_8x16;
diff --git a/encoder/me.c b/encoder/me.c

index ccc7ad40ade5a38e120c0f15b60f2777c6c49244..7b11e01d5f4023def4043fa63ded5c16646ee1ec 100644 (file)
--- a/encoder/me.c
+++ b/encoder/me.c
@@ -70,7 +70,7 @@ static void refine_subpel( x264_t *h, x264_me_t *m, int hpel_iters, int qpel_ite
  
  #define COST_MV_HPEL( mx, my ) \
  { \
-    int stride2 = 16; \
+    intptr_t stride2 = 16; \
      pixel *src = h->mc.get_ref( pix, &stride2, m->p_fref, stride, mx, my, bw, bh, &m->weight[0] ); \
      int cost = h->pixf.fpelcmp[i_pixel]( p_fenc, FENC_STRIDE, src, stride2 ) \
               + p_cost_mvx[ mx ] + p_cost_mvy[ my ]; \
@@ -775,7 +775,7 @@ void x264_me_refine_qpel_refdupe( x264_t *h, x264_me_t *m, int *p_halfpel_thresh
  
  #define COST_MV_SAD( mx, my ) \
  { \
-    int stride = 16; \
+    intptr_t stride = 16; \
      pixel *src = h->mc.get_ref( pix, &stride, m->p_fref, m->i_stride[0], mx, my, bw, bh, &m->weight[0] ); \
      int cost = h->pixf.fpelcmp[i_pixel]( m->p_fenc[0], FENC_STRIDE, src, stride ) \
               + p_cost_mvx[ mx ] + p_cost_mvy[ my ]; \
@@ -785,7 +785,7 @@ void x264_me_refine_qpel_refdupe( x264_t *h, x264_me_t *m, int *p_halfpel_thresh
  #define COST_MV_SATD( mx, my, dir ) \
  if( b_refine_qpel || (dir^1) != odir ) \
  { \
-    int stride = 16; \
+    intptr_t stride = 16; \
      pixel *src = h->mc.get_ref( pix, &stride, &m->p_fref[0], m->i_stride[0], mx, my, bw, bh, &m->weight[0] ); \
      int cost = h->pixf.mbcmp_unaligned[i_pixel]( m->p_fenc[0], FENC_STRIDE, src, stride ) \
               + p_cost_mvx[ mx ] + p_cost_mvy[ my ]; \
@@ -854,7 +854,7 @@ static void refine_subpel( x264_t *h, x264_me_t *m, int hpel_iters, int qpel_ite
      {
          int omx = bmx, omy = bmy;
          int costs[4];
-        int stride = 64; // candidates are either all hpel or all qpel, so one stride is enough
+        intptr_t stride = 64; // candidates are either all hpel or all qpel, so one stride is enough
          pixel *src0, *src1, *src2, *src3;
          src0 = h->mc.get_ref( pix,    &stride, m->p_fref, m->i_stride[0], omx, omy-2, bw, bh+1, &m->weight[0] );
          src2 = h->mc.get_ref( pix+32, &stride, m->p_fref, m->i_stride[0], omx-2, omy, bw+4, bh, &m->weight[0] );
@@ -988,7 +988,7 @@ static void ALWAYS_INLINE x264_me_refine_bidir( x264_t *h, x264_me_t *m0, x264_m
      int ref1 = h->mb.cache.ref[1][s8];
      const int mv0y_offset = chroma_v_shift & MB_INTERLACED & ref0 ? (h->mb.i_mb_y & 1)*4 - 2 : 0;
      const int mv1y_offset = chroma_v_shift & MB_INTERLACED & ref1 ? (h->mb.i_mb_y & 1)*4 - 2 : 0;
-    int stride[3][2][9];
+    intptr_t stride[3][2][9];
      int bm0x = m0->mv[0];
      int bm0y = m0->mv[1];
      int bm1x = m1->mv[0];
diff --git a/encoder/slicetype.c b/encoder/slicetype.c

index c693b3f4d5ff9059d1b3f63894ed5a02edda9644..65ea761cb2f652fe916c0c7d9e08cca3ef916195 100644 (file)
--- a/encoder/slicetype.c
+++ b/encoder/slicetype.c
@@ -505,7 +505,7 @@ static void x264_slicetype_mb_cost( x264_t *h, x264_mb_analysis_t *a,
          } \
          else \
          { \
-            int stride1 = 16, stride2 = 16; \
+            intptr_t stride1 = 16, stride2 = 16; \
              pixel *src1, *src2; \
              src1 = h->mc.get_ref( pix1, &stride1, m[0].p_fref, m[0].i_stride[0], \
                                    (mv0)[0], (mv0)[1], 8, 8, w ); \
diff --git a/tools/checkasm-a.asm b/tools/checkasm-a.asm

index a0b85fac294fde086cea8d92a0f5b1ad04dfb805..47a4f65e5d50f1ff7d640eafd2f85c404495ba8b 100644 (file)
--- a/tools/checkasm-a.asm
+++ b/tools/checkasm-a.asm
@@ -4,6 +4,7 @@
  ;* Copyright (C) 2008-2012 x264 project
  ;*
  ;* Authors: Loren Merritt <lorenm@u.washington.edu>
+;*          Henrik Gramner <hengar-6@student.ltu.se>
  ;*
  ;* This program is free software; you can redistribute it and/or modify
  ;* it under the terms of the GNU General Public License as published by
@@ -29,7 +30,7 @@ SECTION_RODATA
  
  error_message: db "failed to preserve register", 0
  
-%if WIN64
+%if ARCH_X86_64
  ; just random numbers to reduce the chance of incidental match
  ALIGN 16
  x6:  ddq 0x79445c159ce790641a1b2550a612b48c
@@ -60,64 +61,107 @@ cextern_naked puts
  ; (max_args % 4) must equal 3 for stack alignment
  %define max_args 15
  
+%if ARCH_X86_64
+
+;-----------------------------------------------------------------------------
+; void x264_checkasm_stack_clobber( uint64_t clobber, ... )
+;-----------------------------------------------------------------------------
+cglobal checkasm_stack_clobber, 1,2
+    ; Clobber the stack with junk below the stack pointer
+    %define size (max_args+6)*8
+    SUB  rsp, size
+    mov   r1, size-8
+.loop:
+    mov [rsp+r1], r0
+    sub   r1, 8
+    jge .loop
+    ADD  rsp, size
+    RET
+
  %if WIN64
+    %assign free_regs 7
+%else
+    %assign free_regs 9
+%endif
  
  ;-----------------------------------------------------------------------------
  ; intptr_t x264_checkasm_call( intptr_t (*func)(), int *ok, ... )
  ;-----------------------------------------------------------------------------
  INIT_XMM
-cglobal checkasm_call, 4,15,16
-    SUB  rsp, max_args*8
+cglobal checkasm_call, 2,15,16
+    SUB  rsp, max_args*8+16
      mov  r6, r0
-    mov  [rsp+stack_offset+16], r1
-    mov  r0, r2
-    mov  r1, r3
-    mov r2d, r4m ; FIXME truncates pointer
-    mov r3d, r5m ; FIXME truncates pointer
-%assign i 4
-%rep max_args-4
-    mov  r4, [rsp+stack_offset+8+(i+2)*8]
-    mov  [rsp+i*8], r4
-    %assign i i+1
-%endrep
-%assign i 6
-%rep 16-6
-    mova m %+ i, [x %+ i]
-    %assign i i+1
-%endrep
-%assign i 7
-%rep 15-7
+    mov  [rsp+max_args*8], r1
+
+    ; All arguments have been pushed on the stack instead of registers in order to
+    ; test for incorrect assumptions that 32-bit ints are zero-extended to 64-bit.
+    mov  r0, r6mp
+    mov  r1, r7mp
+    mov  r2, r8mp
+    mov  r3, r9mp
+%if UNIX64
+    mov  r4, r10mp
+    mov  r5, r11mp
+    %assign i 6
+    %rep max_args-6
+        mov  r9, [rsp+stack_offset+(i+1)*8]
+        mov  [rsp+(i-6)*8], r9
+        %assign i i+1
+    %endrep
+%else
+    %assign i 4
+    %rep max_args-4
+        mov  r9, [rsp+stack_offset+(i+7)*8]
+        mov  [rsp+i*8], r9
+        %assign i i+1
+    %endrep
+%endif
+
+%if WIN64
+    %assign i 6
+    %rep 16-6
+        mova m %+ i, [x %+ i]
+        %assign i i+1
+    %endrep
+%endif
+
+%assign i 14
+%rep 15-free_regs
      mov  r %+ i, [n %+ i]
-    %assign i i+1
+    %assign i i-1
  %endrep
      call r6
-%assign i 7
-%rep 15-7
+%assign i 14
+%rep 15-free_regs
      xor  r %+ i, [n %+ i]
-    or   r7, r %+ i
-    %assign i i+1
-%endrep
-%assign i 6
-%rep 16-6
-    pxor m %+ i, [x %+ i]
-    por  m6, m %+ i
-    %assign i i+1
+    or  r14, r %+ i
+    %assign i i-1
  %endrep
+
+%if WIN64
+    %assign i 6
+    %rep 16-6
+        pxor m %+ i, [x %+ i]
+        por  m6, m %+ i
+        %assign i i+1
+    %endrep
      packsswb m6, m6
      movq r5, m6
-    or   r7, r5
+    or  r14, r5
+%endif
+
      jz .ok
-    mov  r4, rax
+    mov  r9, rax
      lea  r0, [error_message]
      call puts
-    mov  r1, [rsp+stack_offset+16]
+    mov  r1, [rsp+max_args*8]
      mov  dword [r1], 0
-    mov  rax, r4
+    mov  rax, r9
  .ok:
-    ADD  rsp, max_args*8
+    ADD  rsp, max_args*8+16
      RET
  
-%elif ARCH_X86_64 == 0
+%else
  
  ; just random numbers to reduce the chance of incidental match
  %define n3 dword 0x6549315c
diff --git a/tools/checkasm.c b/tools/checkasm.c

index 144d28fa84014a107298eb0a96c1bda6a6c53f98..b9b6b8ae31995124ba8a7150e6a31e58f82df8ce 100644 (file)
--- a/tools/checkasm.c
+++ b/tools/checkasm.c
@@ -196,17 +196,33 @@ static void print_bench(void)
  
  #if ARCH_X86 || ARCH_X86_64
  int x264_stack_pagealign( int (*func)(), int align );
+
+/* detect when callee-saved regs aren't saved
+ * needs an explicit asm check because it only sometimes crashes in normal use. */
+intptr_t x264_checkasm_call( intptr_t (*func)(), int *ok, ... );
  #else
  #define x264_stack_pagealign( func, align ) func()
  #endif
  
  #define call_c1(func,...) func(__VA_ARGS__)
  
-#if ARCH_X86 || defined(_WIN64)
-/* detect when callee-saved regs aren't saved.
- * needs an explicit asm check because it only sometimes crashes in normal use. */
-intptr_t x264_checkasm_call( intptr_t (*func)(), int *ok, ... );
-#define call_a1(func,...) x264_checkasm_call((intptr_t(*)())func, &ok, __VA_ARGS__)
+#if ARCH_X86_64
+/* Evil hack: detect incorrect assumptions that 32-bit ints are zero-extended to 64-bit.
+ * This is done by clobbering the stack with junk around the stack pointer and calling the
+ * assembly function through x264_checkasm_call with added dummy arguments which forces all
+ * real arguments to be passed on the stack and not in registers. For 32-bit argument the
+ * upper half of the 64-bit register location on the stack will now contain junk. Note that
+ * this is dependant on compiler behaviour and that interrupts etc. at the wrong time may
+ * overwrite the junk written to the stack so there's no guarantee that it will always
+ * detect all functions that assumes zero-extension.
+ */
+void x264_checkasm_stack_clobber( uint64_t clobber, ... );
+#define call_a1(func,...) ({ \
+    uint64_t r = (rand() & 0xffff) * 0x0001000100010001ULL; \
+    x264_checkasm_stack_clobber( r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r ); /* max_args+6 */ \
+    x264_checkasm_call(( intptr_t(*)())func, &ok, 0, 0, 0, 0, __VA_ARGS__ ); })
+#elif ARCH_X86
+#define call_a1(func,...) x264_checkasm_call( (intptr_t(*)())func, &ok, __VA_ARGS__ )
  #else
  #define call_a1 call_c1
  #endif
@@ -291,8 +307,8 @@ static int check_pixel( int cpu_ref, int cpu_new )
              used_asm = 1; \
              for( int j = 0; j < 64; j++ ) \
              { \
-                res_c   = call_c( pixel_c.name[i], pbuf1, 16, pbuf2+j*!align, 64 ); \
-                res_asm = call_a( pixel_asm.name[i], pbuf1, 16, pbuf2+j*!align, 64 ); \
+                res_c   = call_c( pixel_c.name[i],   pbuf1, (intptr_t)16, pbuf2+j*!align, (intptr_t)64 ); \
+                res_asm = call_a( pixel_asm.name[i], pbuf1, (intptr_t)16, pbuf2+j*!align, (intptr_t)64 ); \
                  if( res_c != res_asm ) \
                  { \
                      ok = 0; \
@@ -332,16 +348,16 @@ static int check_pixel( int cpu_ref, int cpu_new )
              for( int j = 0; j < 64; j++ ) \
              { \
                  pixel *pix2 = pbuf2+j; \
-                res_c[0] = pixel_c.sad[i]( pbuf1, 16, pix2, 64 ); \
+                res_c[0] = pixel_c.sad[i]( pbuf1, 16, pix2,   64 ); \
                  res_c[1] = pixel_c.sad[i]( pbuf1, 16, pix2+6, 64 ); \
                  res_c[2] = pixel_c.sad[i]( pbuf1, 16, pix2+1, 64 ); \
                  if( N == 4 ) \
                  { \
                      res_c[3] = pixel_c.sad[i]( pbuf1, 16, pix2+10, 64 ); \
-                    call_a( pixel_asm.sad_x4[i], pbuf1, pix2, pix2+6, pix2+1, pix2+10, 64, res_asm ); \
+                    call_a( pixel_asm.sad_x4[i], pbuf1, pix2, pix2+6, pix2+1, pix2+10, (intptr_t)64, res_asm ); \
                  } \
                  else \
-                    call_a( pixel_asm.sad_x3[i], pbuf1, pix2, pix2+6, pix2+1, 64, res_asm ); \
+                    call_a( pixel_asm.sad_x3[i], pbuf1, pix2, pix2+6, pix2+1, (intptr_t)64, res_asm ); \
                  if( memcmp(res_c, res_asm, sizeof(res_c)) ) \
                  { \
                      ok = 0; \
@@ -350,9 +366,9 @@ static int check_pixel( int cpu_ref, int cpu_new )
                               res_asm[0], res_asm[1], res_asm[2], res_asm[3] ); \
                  } \
                  if( N == 4 ) \
-                    call_c2( pixel_c.sad_x4[i], pbuf1, pix2, pix2+6, pix2+1, pix2+10, 64, res_asm ); \
+                    call_c2( pixel_c.sad_x4[i], pbuf1, pix2, pix2+6, pix2+1, pix2+10, (intptr_t)64, res_asm ); \
                  else \
-                    call_c2( pixel_c.sad_x3[i], pbuf1, pix2, pix2+6, pix2+1, 64, res_asm ); \
+                    call_c2( pixel_c.sad_x3[i], pbuf1, pix2, pix2+6, pix2+1, (intptr_t)64, res_asm ); \
              } \
          } \
      } \
@@ -367,8 +383,8 @@ static int check_pixel( int cpu_ref, int cpu_new )
          set_func_name( "%s_%s", "var", pixel_names[i] ); \
          used_asm = 1; \
          /* abi-check wrapper can't return uint64_t, so separate it from return value check */ \
-        call_c1( pixel_c.var[i], pbuf1, 16 ); \
-        call_a1( pixel_asm.var[i], pbuf1, 16 ); \
+        call_c1( pixel_c.var[i],   pbuf1,           16 ); \
+        call_a1( pixel_asm.var[i], pbuf1, (intptr_t)16 ); \
          uint64_t res_c   = pixel_c.var[i]( pbuf1, 16 ); \
          uint64_t res_asm = pixel_asm.var[i]( pbuf1, 16 ); \
          if( res_c != res_asm ) \
@@ -376,8 +392,8 @@ static int check_pixel( int cpu_ref, int cpu_new )
              ok = 0; \
              fprintf( stderr, "var[%d]: %d %d != %d %d [FAILED]\n", i, (int)res_c, (int)(res_c>>32), (int)res_asm, (int)(res_asm>>32) ); \
          } \
-        call_c2( pixel_c.var[i], pbuf1, 16 ); \
-        call_a2( pixel_asm.var[i], pbuf1, 16 ); \
+        call_c2( pixel_c.var[i],   pbuf1, (intptr_t)16 ); \
+        call_a2( pixel_asm.var[i], pbuf1, (intptr_t)16 ); \
      }
  
      ok = 1; used_asm = 0;
@@ -392,8 +408,8 @@ static int check_pixel( int cpu_ref, int cpu_new )
          int res_c, res_asm, ssd_c, ssd_asm; \
          set_func_name( "%s_%s", "var2", pixel_names[i] ); \
          used_asm = 1; \
-        res_c   = call_c( pixel_c.var2[i], pbuf1, 16, pbuf2, 16, &ssd_c ); \
-        res_asm = call_a( pixel_asm.var2[i], pbuf1, 16, pbuf2, 16, &ssd_asm ); \
+        res_c   = call_c( pixel_c.var2[i],   pbuf1, (intptr_t)16, pbuf2, (intptr_t)16, &ssd_c   ); \
+        res_asm = call_a( pixel_asm.var2[i], pbuf1, (intptr_t)16, pbuf2, (intptr_t)16, &ssd_asm ); \
          if( res_c != res_asm || ssd_c != ssd_asm ) \
          { \
              ok = 0; \
@@ -415,8 +431,8 @@ static int check_pixel( int cpu_ref, int cpu_new )
              for( int j = 0; j < 32; j++ )
              {
                  pixel *pix = (j&16 ? pbuf1 : pbuf3) + (j&15)*256;
-                call_c1( pixel_c.hadamard_ac[i], pbuf1, 16 );
-                call_a1( pixel_asm.hadamard_ac[i], pbuf1, 16 );
+                call_c1( pixel_c.hadamard_ac[i],   pbuf1, (intptr_t)16 );
+                call_a1( pixel_asm.hadamard_ac[i], pbuf1, (intptr_t)16 );
                  uint64_t rc = pixel_c.hadamard_ac[i]( pix, 16 );
                  uint64_t ra = pixel_asm.hadamard_ac[i]( pix, 16 );
                  if( rc != ra )
@@ -426,8 +442,8 @@ static int check_pixel( int cpu_ref, int cpu_new )
                      break;
                  }
              }
-            call_c2( pixel_c.hadamard_ac[i], pbuf1, 16 );
-            call_a2( pixel_asm.hadamard_ac[i], pbuf1, 16 );
+            call_c2( pixel_c.hadamard_ac[i],   pbuf1, (intptr_t)16 );
+            call_a2( pixel_asm.hadamard_ac[i], pbuf1, (intptr_t)16 );
          }
      report( "pixel hadamard_ac :" );
  
@@ -446,8 +462,8 @@ static int check_pixel( int cpu_ref, int cpu_new )
              for( int j = 0; j < 2 && ok; j++ )
              {
                  pixel *p = j ? pbuf4 : pbuf1;
-                res_c   = call_c( pixel_c.vsad,   p, 16, h );
-                res_asm = call_a( pixel_asm.vsad, p, 16, h );
+                res_c   = call_c( pixel_c.vsad,   p, (intptr_t)16, h );
+                res_asm = call_a( pixel_asm.vsad, p, (intptr_t)16, h );
                  if( res_c != res_asm )
                  {
                      ok = 0;
@@ -627,8 +643,8 @@ static int check_pixel( int cpu_ref, int cpu_new )
              fprintf( stderr, "ssd_nv12: %"PRIu64",%"PRIu64" != %"PRIu64",%"PRIu64"\n",
                       res_u_c, res_v_c, res_u_a, res_v_a );
          }
-        call_c( pixel_c.ssd_nv12_core,   pbuf1, 368, pbuf2, 368, 360, 8, &res_u_c, &res_v_c );
-        call_a( pixel_asm.ssd_nv12_core, pbuf1, 368, pbuf2, 368, 360, 8, &res_u_a, &res_v_a );
+        call_c( pixel_c.ssd_nv12_core,   pbuf1, (intptr_t)368, pbuf2, (intptr_t)368, 360, 8, &res_u_c, &res_v_c );
+        call_a( pixel_asm.ssd_nv12_core, pbuf1, (intptr_t)368, pbuf2, (intptr_t)368, 360, 8, &res_u_a, &res_v_a );
      }
      report( "ssd_nv12 :" );
  
@@ -648,8 +664,8 @@ static int check_pixel( int cpu_ref, int cpu_new )
              fprintf( stderr, "ssim: %.7f != %.7f [FAILED]\n", res_c, res_a );
          }
          set_func_name( "ssim_core" );
-        call_c2( pixel_c.ssim_4x4x2_core,   pbuf1+2, 32, pbuf2+2, 32, sums );
-        call_a2( pixel_asm.ssim_4x4x2_core, pbuf1+2, 32, pbuf2+2, 32, sums );
+        call_c2( pixel_c.ssim_4x4x2_core,   pbuf1+2, (intptr_t)32, pbuf2+2, (intptr_t)32, sums );
+        call_a2( pixel_asm.ssim_4x4x2_core, pbuf1+2, (intptr_t)32, pbuf2+2, (intptr_t)32, sums );
          set_func_name( "ssim_end" );
          call_c2( pixel_c.ssim_end4,   sums, sums, 4 );
          call_a2( pixel_asm.ssim_end4, sums, sums, 4 );
@@ -1054,8 +1070,8 @@ static int check_mc( int cpu_ref, int cpu_new )
              used_asm = 1; \
              for( int i = 0; i < 1024; i++ ) \
                  pbuf3[i] = pbuf4[i] = 0xCD; \
-            call_c( mc_c.mc_luma, dst1, 32, src2, 64, dx, dy, w, h, weight ); \
-            call_a( mc_a.mc_luma, dst2, 32, src2, 64, dx, dy, w, h, weight ); \
+            call_c( mc_c.mc_luma, dst1, (intptr_t)32, src2, (intptr_t)64, dx, dy, w, h, weight ); \
+            call_a( mc_a.mc_luma, dst2, (intptr_t)32, src2, (intptr_t)64, dx, dy, w, h, weight ); \
              if( memcmp( pbuf3, pbuf4, 1024 * sizeof(pixel) ) ) \
              { \
                  fprintf( stderr, "mc_luma[mv(%d,%d) %2dx%-2d]     [FAILED]\n", dx, dy, w, h ); \
@@ -1065,15 +1081,15 @@ static int check_mc( int cpu_ref, int cpu_new )
          if( mc_a.get_ref != mc_ref.get_ref ) \
          { \
              pixel *ref = dst2; \
-            int ref_stride = 32; \
+            intptr_t ref_stride = 32; \
              int w_checked = ( ( sizeof(pixel) == 2 && (w == 12 || w == 20)) ? w-2 : w ); \
              const x264_weight_t *weight = x264_weight_none; \
              set_func_name( "get_ref_%dx%d", w_checked, h ); \
              used_asm = 1; \
              for( int i = 0; i < 1024; i++ ) \
                  pbuf3[i] = pbuf4[i] = 0xCD; \
-            call_c( mc_c.mc_luma, dst1, 32, src2, 64, dx, dy, w, h, weight ); \
-            ref = (pixel*)call_a( mc_a.get_ref, ref, &ref_stride, src2, 64, dx, dy, w, h, weight ); \
+            call_c( mc_c.mc_luma, dst1, (intptr_t)32, src2, (intptr_t)64, dx, dy, w, h, weight ); \
+            ref = (pixel*)call_a( mc_a.get_ref, ref, &ref_stride, src2, (intptr_t)64, dx, dy, w, h, weight ); \
              for( int i = 0; i < h; i++ ) \
                  if( memcmp( dst1+i*32, ref+i*ref_stride, w_checked * sizeof(pixel) ) ) \
                  { \
@@ -1090,14 +1106,14 @@ static int check_mc( int cpu_ref, int cpu_new )
              used_asm = 1; \
              for( int i = 0; i < 1024; i++ ) \
                  pbuf3[i] = pbuf4[i] = 0xCD; \
-            call_c( mc_c.mc_chroma, dst1, dst1+8, 16, src, 64, dx, dy, w, h ); \
-            call_a( mc_a.mc_chroma, dst2, dst2+8, 16, src, 64, dx, dy, w, h ); \
+            call_c( mc_c.mc_chroma, dst1, dst1+8, (intptr_t)16, src, (intptr_t)64, dx, dy, w, h ); \
+            call_a( mc_a.mc_chroma, dst2, dst2+8, (intptr_t)16, src, (intptr_t)64, dx, dy, w, h ); \
              /* mc_chroma width=2 may write garbage to the right of dst. ignore that. */ \
              for( int j = 0; j < h; j++ ) \
                  for( int i = w; i < 8; i++ ) \
                  { \
                      dst2[i+j*16+8] = dst1[i+j*16+8]; \
-                    dst2[i+j*16] = dst1[i+j*16]; \
+                    dst2[i+j*16  ] = dst1[i+j*16  ]; \
                  } \
              if( memcmp( pbuf3, pbuf4, 1024 * sizeof(pixel) ) ) \
              { \
@@ -1149,15 +1165,15 @@ static int check_mc( int cpu_ref, int cpu_new )
          { \
              set_func_name( "%s_%s", #name, pixel_names[i] ); \
              used_asm = 1; \
-            call_c1( mc_c.name[i], pbuf3, 16, pbuf2+1, 16, pbuf1+18, 16, weight ); \
-            call_a1( mc_a.name[i], pbuf4, 16, pbuf2+1, 16, pbuf1+18, 16, weight ); \
+            call_c1( mc_c.name[i], pbuf3, (intptr_t)16, pbuf2+1, (intptr_t)16, pbuf1+18, (intptr_t)16, weight ); \
+            call_a1( mc_a.name[i], pbuf4, (intptr_t)16, pbuf2+1, (intptr_t)16, pbuf1+18, (intptr_t)16, weight ); \
              if( memcmp( pbuf3, pbuf4, 320 * sizeof(pixel) ) ) \
              { \
                  ok = 0; \
                  fprintf( stderr, #name "[%d]: [FAILED]\n", i ); \
              } \
-            call_c2( mc_c.name[i], pbuf3, 16, pbuf2+1, 16, pbuf1+18, 16, weight ); \
-            call_a2( mc_a.name[i], pbuf4, 16, pbuf2+1, 16, pbuf1+18, 16, weight ); \
+            call_c2( mc_c.name[i], pbuf3, (intptr_t)16, pbuf2+1, (intptr_t)16, pbuf1+18, (intptr_t)16, weight ); \
+            call_a2( mc_a.name[i], pbuf4, (intptr_t)16, pbuf2+1, (intptr_t)16, pbuf1+18, (intptr_t)16, weight ); \
          } \
      } \
  }
@@ -1185,9 +1201,9 @@ static int check_mc( int cpu_ref, int cpu_new )
          { \
              set_func_name( "%s_w%d", #name, j ); \
              used_asm = 1; \
-            call_c1( mc_c.weight[i], buffC, 32, pbuf2+align_off, 32, &weight, 16 ); \
+            call_c1( mc_c.weight[i],     buffC, (intptr_t)32, pbuf2+align_off, (intptr_t)32, &weight, 16 ); \
              mc_a.weight_cache(&ha, &weight); \
-            call_a1( weight.weightfn[i], buffA, 32, pbuf2+align_off, 32, &weight, 16 ); \
+            call_a1( weight.weightfn[i], buffA, (intptr_t)32, pbuf2+align_off, (intptr_t)32, &weight, 16 ); \
              for( int k = 0; k < 16; k++ ) \
                  if( memcmp( &buffC[k*32], &buffA[k*32], j * sizeof(pixel) ) ) \
                  { \
@@ -1195,8 +1211,8 @@ static int check_mc( int cpu_ref, int cpu_new )
                      fprintf( stderr, #name "[%d]: [FAILED] s:%d o:%d d%d\n", i, s, o, d ); \
                      break; \
                  } \
-            call_c2( mc_c.weight[i], buffC, 32, pbuf2+align_off, 32, &weight, 16 ); \
-            call_a2( weight.weightfn[i], buffA, 32, pbuf2+align_off, 32, &weight, 16 ); \
+            call_c2( mc_c.weight[i],     buffC, (intptr_t)32, pbuf2+align_off, (intptr_t)32, &weight, 16 ); \
+            call_a2( weight.weightfn[i], buffA, (intptr_t)32, pbuf2+align_off, (intptr_t)32, &weight, 16 ); \
          } \
      }
  
@@ -1248,8 +1264,8 @@ static int check_mc( int cpu_ref, int cpu_new )
              used_asm = 1;
              memset( pbuf3, 0, 64*height );
              memset( pbuf4, 0, 64*height );
-            call_c( mc_c.store_interleave_chroma, pbuf3, 64, pbuf1, pbuf1+16, height );
-            call_a( mc_a.store_interleave_chroma, pbuf4, 64, pbuf1, pbuf1+16, height );
+            call_c( mc_c.store_interleave_chroma, pbuf3, (intptr_t)64, pbuf1, pbuf1+16, height );
+            call_a( mc_a.store_interleave_chroma, pbuf4, (intptr_t)64, pbuf1, pbuf1+16, height );
              if( memcmp( pbuf3, pbuf4, 64*height ) )
              {
                  ok = 0;
@@ -1261,8 +1277,8 @@ static int check_mc( int cpu_ref, int cpu_new )
          {
              set_func_name( "load_deinterleave_chroma_fenc" );
              used_asm = 1;
-            call_c( mc_c.load_deinterleave_chroma_fenc, pbuf3, pbuf1, 64, height );
-            call_a( mc_a.load_deinterleave_chroma_fenc, pbuf4, pbuf1, 64, height );
+            call_c( mc_c.load_deinterleave_chroma_fenc, pbuf3, pbuf1, (intptr_t)64, height );
+            call_a( mc_a.load_deinterleave_chroma_fenc, pbuf4, pbuf1, (intptr_t)64, height );
              if( memcmp( pbuf3, pbuf4, FENC_STRIDE*height ) )
              {
                  ok = 0;
@@ -1274,8 +1290,8 @@ static int check_mc( int cpu_ref, int cpu_new )
          {
              set_func_name( "load_deinterleave_chroma_fdec" );
              used_asm = 1;
-            call_c( mc_c.load_deinterleave_chroma_fdec, pbuf3, pbuf1, 64, height );
-            call_a( mc_a.load_deinterleave_chroma_fdec, pbuf4, pbuf1, 64, height );
+            call_c( mc_c.load_deinterleave_chroma_fdec, pbuf3, pbuf1, (intptr_t)64, height );
+            call_a( mc_a.load_deinterleave_chroma_fdec, pbuf4, pbuf1, (intptr_t)64, height );
              if( memcmp( pbuf3, pbuf4, FDEC_STRIDE*height ) )
              {
                  ok = 0;
@@ -1298,8 +1314,8 @@ static int check_mc( int cpu_ref, int cpu_new )
          {
              int w = plane_specs[i].w;
              int h = plane_specs[i].h;
-            int src_stride = plane_specs[i].src_stride;
-            int dst_stride = (w + 127) & ~63;
+            intptr_t src_stride = plane_specs[i].src_stride;
+            intptr_t dst_stride = (w + 127) & ~63;
              assert( dst_stride * h <= 0x1000 );
              pixel *src1 = pbuf1 + X264_MAX(0, -src_stride) * (h-1);
              memset( pbuf3, 0, 0x1000*sizeof(pixel) );
@@ -1310,7 +1326,7 @@ static int check_mc( int cpu_ref, int cpu_new )
                  if( memcmp( pbuf3+y*dst_stride, pbuf4+y*dst_stride, w*sizeof(pixel) ) )
                  {
                      ok = 0;
-                    fprintf( stderr, "plane_copy FAILED: w=%d h=%d stride=%d\n", w, h, src_stride );
+                    fprintf( stderr, "plane_copy FAILED: w=%d h=%d stride=%d\n", w, h, (int)src_stride );
                      break;
                  }
          }
@@ -1324,8 +1340,8 @@ static int check_mc( int cpu_ref, int cpu_new )
          {
              int w = (plane_specs[i].w + 1) >> 1;
              int h = plane_specs[i].h;
-            int src_stride = (plane_specs[i].src_stride + 1) >> 1;
-            int dst_stride = (2*w + 127) & ~63;
+            intptr_t src_stride = (plane_specs[i].src_stride + 1) >> 1;
+            intptr_t dst_stride = (2*w + 127) & ~63;
              assert( dst_stride * h <= 0x1000 );
              pixel *src1 = pbuf1 + X264_MAX(0, -src_stride) * (h-1);
              memset( pbuf3, 0, 0x1000*sizeof(pixel) );
@@ -1336,7 +1352,7 @@ static int check_mc( int cpu_ref, int cpu_new )
                  if( memcmp( pbuf3+y*dst_stride, pbuf4+y*dst_stride, 2*w*sizeof(pixel) ) )
                  {
                      ok = 0;
-                    fprintf( stderr, "plane_copy_interleave FAILED: w=%d h=%d stride=%d\n", w, h, src_stride );
+                    fprintf( stderr, "plane_copy_interleave FAILED: w=%d h=%d stride=%d\n", w, h, (int)src_stride );
                      break;
                  }
          }
@@ -1350,9 +1366,9 @@ static int check_mc( int cpu_ref, int cpu_new )
          {
              int w = (plane_specs[i].w + 1) >> 1;
              int h = plane_specs[i].h;
-            int dst_stride = w;
-            int src_stride = (2*w + 127) & ~63;
-            int offv = (dst_stride*h + 31) & ~15;
+            intptr_t dst_stride = w;
+            intptr_t src_stride = (2*w + 127) & ~63;
+            intptr_t offv = (dst_stride*h + 31) & ~15;
              memset( pbuf3, 0, 0x1000 );
              memset( pbuf4, 0, 0x1000 );
              call_c( mc_c.plane_copy_deinterleave, pbuf3, dst_stride, pbuf3+offv, dst_stride, pbuf1, src_stride, w, h );
@@ -1362,7 +1378,7 @@ static int check_mc( int cpu_ref, int cpu_new )
                      memcmp( pbuf3+y*dst_stride+offv, pbuf4+y*dst_stride+offv, w ) )
                  {
                      ok = 0;
-                    fprintf( stderr, "plane_copy_deinterleave FAILED: w=%d h=%d stride=%d\n", w, h, src_stride );
+                    fprintf( stderr, "plane_copy_deinterleave FAILED: w=%d h=%d stride=%d\n", w, h, (int)src_stride );
                      break;
                  }
          }
@@ -1379,8 +1395,8 @@ static int check_mc( int cpu_ref, int cpu_new )
          ok = 1; used_asm = 1;
          memset( pbuf3, 0, 4096 * sizeof(pixel) );
          memset( pbuf4, 0, 4096 * sizeof(pixel) );
-        call_c( mc_c.hpel_filter, dstc[0], dstc[1], dstc[2], srchpel, 64, 48, 10, tmp );
-        call_a( mc_a.hpel_filter, dsta[0], dsta[1], dsta[2], srchpel, 64, 48, 10, tmp );
+        call_c( mc_c.hpel_filter, dstc[0], dstc[1], dstc[2], srchpel, (intptr_t)64, 48, 10, tmp );
+        call_a( mc_a.hpel_filter, dsta[0], dsta[1], dsta[2], srchpel, (intptr_t)64, 48, 10, tmp );
          for( int i = 0; i < 3; i++ )
              for( int j = 0; j < 10; j++ )
                  //FIXME ideally the first pixels would match too, but they aren't actually used
@@ -1407,9 +1423,9 @@ static int check_mc( int cpu_ref, int cpu_new )
          ok = 1; used_asm = 1;
          for( int w = 40; w <= 48; w += 8 )
          {
-            int stride = (w+8)&~15;
-            call_c( mc_c.frame_init_lowres_core, pbuf1, dstc[0], dstc[1], dstc[2], dstc[3], w*2, stride, w, 16 );
-            call_a( mc_a.frame_init_lowres_core, pbuf1, dsta[0], dsta[1], dsta[2], dsta[3], w*2, stride, w, 16 );
+            intptr_t stride = (w+8)&~15;
+            call_c( mc_c.frame_init_lowres_core, pbuf1, dstc[0], dstc[1], dstc[2], dstc[3], (intptr_t)w*2, stride, w, 16 );
+            call_a( mc_a.frame_init_lowres_core, pbuf1, dsta[0], dsta[1], dsta[2], dsta[3], (intptr_t)w*2, stride, w, 16 );
              for( int i = 0; i < 16; i++ )
              {
                  for( int j = 0; j < 4; j++ )
@@ -1433,7 +1449,7 @@ static int check_mc( int cpu_ref, int cpu_new )
  #define INTEGRAL_INIT( name, size, ... )\
      if( mc_a.name != mc_ref.name )\
      {\
-        int stride = 80;\
+        intptr_t stride = 80;\
          set_func_name( #name );\
          used_asm = 1;\
          memcpy( buf3, buf1, size*2*stride );\
@@ -1496,7 +1512,7 @@ static int check_mc( int cpu_ref, int cpu_new )
      {
          set_func_name( "memcpy_aligned" );
          ok = 1; used_asm = 1;
-        for( int size = 16; size < 256; size += 16 )
+        for( size_t size = 16; size < 256; size += 16 )
          {
              memset( buf4, 0xAA, size + 1 );
              call_c( mc_c.memcpy_aligned, buf3, buf1, size );
@@ -1504,7 +1520,7 @@ static int check_mc( int cpu_ref, int cpu_new )
              if( memcmp( buf3, buf4, size ) || buf4[size] != 0xAA )
              {
                  ok = 0;
-                fprintf( stderr, "memcpy_aligned FAILED: size=%d\n", size );
+                fprintf( stderr, "memcpy_aligned FAILED: size=%d\n", (int)size );
                  break;
              }
          }
@@ -1515,7 +1531,7 @@ static int check_mc( int cpu_ref, int cpu_new )
      {
          set_func_name( "memzero_aligned" );
          ok = 1; used_asm = 1;
-        for( int size = 128; size < 1024; size += 128 )
+        for( size_t size = 128; size < 1024; size += 128 )
          {
              memset( buf4, 0xAA, size + 1 );
              call_c( mc_c.memzero_aligned, buf3, size );
@@ -1523,7 +1539,7 @@ static int check_mc( int cpu_ref, int cpu_new )
              if( memcmp( buf3, buf4, size ) || buf4[size] != 0xAA )
              {
                  ok = 0;
-                fprintf( stderr, "memzero_aligned FAILED: size=%d\n", size );
+                fprintf( stderr, "memzero_aligned FAILED: size=%d\n", (int)size );
                  break;
              }
          }
@@ -1561,7 +1577,7 @@ static int check_deblock( int cpu_ref, int cpu_new )
  #define TEST_DEBLOCK( name, align, ... ) \
      for( int i = 0; i < 36; i++ ) \
      { \
-        int off = 8*32 + (i&15)*4*!align; /* benchmark various alignments of h filter */ \
+        intptr_t off = 8*32 + (i&15)*4*!align; /* benchmark various alignments of h filter */ \
          for( int j = 0; j < 1024; j++ ) \
              /* two distributions of random to excersize different failure modes */ \
              pbuf3[j] = rand() & (i&1 ? 0xf : PIXEL_MAX ); \
@@ -1570,16 +1586,16 @@ static int check_deblock( int cpu_ref, int cpu_new )
          { \
              set_func_name( #name ); \
              used_asm = 1; \
-            call_c1( db_c.name, pbuf3+off, 32, alphas[i], betas[i], ##__VA_ARGS__ ); \
-            call_a1( db_a.name, pbuf4+off, 32, alphas[i], betas[i], ##__VA_ARGS__ ); \
+            call_c1( db_c.name, pbuf3+off, (intptr_t)32, alphas[i], betas[i], ##__VA_ARGS__ ); \
+            call_a1( db_a.name, pbuf4+off, (intptr_t)32, alphas[i], betas[i], ##__VA_ARGS__ ); \
              if( memcmp( pbuf3, pbuf4, 1024 * sizeof(pixel) ) ) \
              { \
                  ok = 0; \
                  fprintf( stderr, #name "(a=%d, b=%d): [FAILED]\n", alphas[i], betas[i] ); \
                  break; \
              } \
-            call_c2( db_c.name, pbuf3+off, 32, alphas[i], betas[i], ##__VA_ARGS__ ); \
-            call_a2( db_a.name, pbuf4+off, 32, alphas[i], betas[i], ##__VA_ARGS__ ); \
+            call_c2( db_c.name, pbuf3+off, (intptr_t)32, alphas[i], betas[i], ##__VA_ARGS__ ); \
+            call_a2( db_a.name, pbuf4+off, (intptr_t)32, alphas[i], betas[i], ##__VA_ARGS__ ); \
          } \
      }
  
@@ -1935,11 +1951,11 @@ static int check_quant( int cpu_ref, int cpu_new )
              memcpy( dct1, buf1, size*sizeof(dctcoef) );
              memcpy( dct2, buf1, size*sizeof(dctcoef) );
              memcpy( buf3+256, buf3, 256 );
-            call_c1( qf_c.denoise_dct, dct1, (uint32_t*)buf3, (udctcoef*)buf2, size );
+            call_c1( qf_c.denoise_dct, dct1, (uint32_t*)buf3,       (udctcoef*)buf2, size );
              call_a1( qf_a.denoise_dct, dct2, (uint32_t*)(buf3+256), (udctcoef*)buf2, size );
              if( memcmp( dct1, dct2, size*sizeof(dctcoef) ) || memcmp( buf3+4, buf3+256+4, (size-1)*sizeof(uint32_t) ) )
                  ok = 0;
-            call_c2( qf_c.denoise_dct, dct1, (uint32_t*)buf3, (udctcoef*)buf2, size );
+            call_c2( qf_c.denoise_dct, dct1, (uint32_t*)buf3,       (udctcoef*)buf2, size );
              call_a2( qf_a.denoise_dct, dct2, (uint32_t*)(buf3+256), (udctcoef*)buf2, size );
          }
      }
author	Henrik Gramner <hengar-6@student.ltu.se>
	Wed, 1 Feb 2012 22:52:48 +0000 (23:52 +0100)
committer	Fiona Glaser <fiona@x264.com>
	Tue, 6 Mar 2012 18:37:53 +0000 (10:37 -0800)
common/arm/mc-a.S		patch \| blob \| history
common/arm/mc-c.c		patch \| blob \| history
common/arm/pixel.h		patch \| blob \| history
common/deblock.c		patch \| blob \| history
common/frame.c		patch \| blob \| history
common/frame.h		patch \| blob \| history
common/macroblock.c		patch \| blob \| history
common/mc.c		patch \| blob \| history
common/mc.h		patch \| blob \| history
common/pixel.c		patch \| blob \| history
common/pixel.h		patch \| blob \| history
common/ppc/deblock.c		patch \| blob \| history
common/ppc/mc.c		patch \| blob \| history
common/ppc/pixel.c		patch \| blob \| history
common/sparc/pixel.h		patch \| blob \| history
common/x86/deblock-a.asm		patch \| blob \| history
common/x86/mc-a.asm		patch \| blob \| history
common/x86/mc-a2.asm		patch \| blob \| history
common/x86/mc-c.c		patch \| blob \| history
common/x86/pixel-32.asm		patch \| blob \| history
common/x86/pixel-a.asm		patch \| blob \| history
common/x86/pixel.h		patch \| blob \| history
common/x86/quant-a.asm		patch \| blob \| history
common/x86/sad-a.asm		patch \| blob \| history
common/x86/sad16-a.asm		patch \| blob \| history
common/x86/x86inc.asm		patch \| blob \| history
encoder/analyse.c		patch \| blob \| history
encoder/me.c		patch \| blob \| history
encoder/slicetype.c		patch \| blob \| history
tools/checkasm-a.asm		patch \| blob \| history
tools/checkasm.c		patch \| blob \| history