Overhaul macroblock_cache_rect

author Fiona Glaser <fiona@x264.com>

Sun, 14 Mar 2010 08:19:59 +0000 (00:19 -0800)

committer Fiona Glaser <fiona@x264.com>

Sat, 27 Mar 2010 19:47:20 +0000 (12:47 -0700)
author Fiona Glaser <fiona@x264.com>
Sun, 14 Mar 2010 08:19:59 +0000 (00:19 -0800)
committer Fiona Glaser <fiona@x264.com>
Sat, 27 Mar 2010 19:47:20 +0000 (12:47 -0700)
diff --git a/Makefile b/Makefile

index f14549bd1edefc4ba31cb157da6d694eb3407354..4cd57752fe656f431daad2cd8546f12670c5e98b 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -6,8 +6,8 @@ all: default
  
  SRCS = common/mc.c common/predict.c common/pixel.c common/macroblock.c \
         common/frame.c common/dct.c common/cpu.c common/cabac.c \
-       common/common.c common/mdate.c common/set.c \
-       common/quant.c common/vlc.c \
+       common/common.c common/mdate.c common/rectangle.c \
+       common/set.c common/quant.c common/vlc.c \
         encoder/analyse.c encoder/me.c encoder/ratecontrol.c \
         encoder/set.c encoder/macroblock.c encoder/cabac.c \
         encoder/cavlc.c encoder/encoder.c encoder/lookahead.c
diff --git a/common/common.h b/common/common.h

index 295b27d370838730d2c667ade00eb157a84fa774..af59286a04e021e844b5b24f1657dd3506649ebf 100644 (file)
--- a/common/common.h
+++ b/common/common.h
@@ -777,6 +777,7 @@ struct x264_t
  
  // included at the end because it needs x264_t
  #include "macroblock.h"
+#include "rectangle.h"
  
  #ifdef HAVE_MMX
  #include "x86/util.h"
diff --git a/common/macroblock.h b/common/macroblock.h

index a098a9ea70b220927fdc6d7da8f6dc6ede27875e..3e75c4c83443fb54fe2ee9d78fe4f32cdd5428a3 100644 (file)
--- a/common/macroblock.h
+++ b/common/macroblock.h
@@ -333,100 +333,7 @@ static ALWAYS_INLINE uint32_t pack16to32_mask( int a, int b )
     return (a&0xFFFF) + (b<<16);
  #endif
  }
-static ALWAYS_INLINE void x264_macroblock_cache_rect1( void *dst, int width, int height, uint8_t val )
-{
-    uint32_t *d = dst;
-    if( width == 4 )
-    {
-        uint32_t val2 = val * 0x01010101;
-                          M32( d+0 ) = val2;
-        if( height >= 2 ) M32( d+2 ) = val2;
-        if( height == 4 ) M32( d+4 ) = val2;
-        if( height == 4 ) M32( d+6 ) = val2;
-    }
-    else // 2
-    {
-        uint32_t val2 = val * 0x0101;
-                          M16( d+0 ) = val2;
-        if( height >= 2 ) M16( d+2 ) = val2;
-        if( height == 4 ) M16( d+4 ) = val2;
-        if( height == 4 ) M16( d+6 ) = val2;
-    }
-}
-static ALWAYS_INLINE void x264_macroblock_cache_rect2( void *dst, int width, int height, uint16_t val )
-{
-    uint16_t *d = dst;
-    uint32_t val32 = val + (val<<16);
-    uint64_t val64 = val32 + ((uint64_t)val32<<32);
-    if( width == 4 )
-    {
-                          M64( d+ 0 ) = val64;
-        if( height >= 2 ) M64( d+ 8 ) = val64;
-        if( height == 4 ) M64( d+16 ) = val64;
-        if( height == 4 ) M64( d+24 ) = val64;
-    }
-    else if( width == 2 )
-    {
-                          M32( d+ 0 ) = val32;
-        if( height >= 2 ) M32( d+ 8 ) = val32;
-        if( height == 4 ) M32( d+16 ) = val32;
-        if( height == 4 ) M32( d+24 ) = val32;
-    }
-    else //if( width == 1 )
-    {
-                          M16( d+ 0 ) = val;
-        if( height >= 2 ) M16( d+ 8 ) = val;
-        if( height == 4 ) M16( d+16 ) = val;
-        if( height == 4 ) M16( d+24 ) = val;
-    }
-}
-static ALWAYS_INLINE void x264_macroblock_cache_rect4( void *dst, int width, int height, uint32_t val )
-{
-    int dy;
-    if( width == 1 || WORD_SIZE < 8 )
-    {
-        uint32_t *d = dst;
-        for( dy = 0; dy < height; dy++ )
-        {
-                             M32( d+8*dy+0 ) = val;
-            if( width >= 2 ) M32( d+8*dy+1 ) = val;
-            if( width == 4 ) M32( d+8*dy+2 ) = val;
-            if( width == 4 ) M32( d+8*dy+3 ) = val;
-        }
-    }
-    else
-    {
-        uint64_t val64 = val + ((uint64_t)val<<32);
-        uint64_t *d = dst;
-        for( dy = 0; dy < height; dy++ )
-        {
-                             M64( d+4*dy+0 ) = val64;
-            if( width == 4 ) M64( d+4*dy+1 ) = val64;
-        }
-    }
-}
-#define x264_macroblock_cache_mv_ptr( a, x, y, w, h, l, mv ) x264_macroblock_cache_mv( a, x, y, w, h, l, M32( mv ) )
-static ALWAYS_INLINE void x264_macroblock_cache_mv( x264_t *h, int x, int y, int width, int height, int i_list, uint32_t mv )
-{
-    x264_macroblock_cache_rect4( &h->mb.cache.mv[i_list][X264_SCAN8_0+x+8*y], width, height, mv );
-}
-static ALWAYS_INLINE void x264_macroblock_cache_mvd( x264_t *h, int x, int y, int width, int height, int i_list, uint16_t mv )
-{
-    x264_macroblock_cache_rect2( &h->mb.cache.mvd[i_list][X264_SCAN8_0+x+8*y], width, height, mv );
-}
-static ALWAYS_INLINE void x264_macroblock_cache_ref( x264_t *h, int x, int y, int width, int height, int i_list, uint8_t ref )
-{
-    x264_macroblock_cache_rect1( &h->mb.cache.ref[i_list][X264_SCAN8_0+x+8*y], width, height, ref );
-}
-static ALWAYS_INLINE void x264_macroblock_cache_skip( x264_t *h, int x, int y, int width, int height, int b_skip )
-{
-    x264_macroblock_cache_rect1( &h->mb.cache.skip[X264_SCAN8_0+x+8*y], width, height, b_skip );
-}
-static ALWAYS_INLINE void x264_macroblock_cache_intra8x8_pred( x264_t *h, int x, int y, int i_mode )
-{
-    int8_t *cache = &h->mb.cache.intra4x4_pred_mode[X264_SCAN8_0+x+8*y];
-    cache[0] = cache[1] = cache[8] = cache[9] = i_mode;
-}
+
  #define array_non_zero(a) array_non_zero_int(a, sizeof(a))
  #define array_non_zero_int array_non_zero_int
  static ALWAYS_INLINE int array_non_zero_int( int16_t *v, int i_count )
diff --git a/common/osdep.h b/common/osdep.h

index 5223ef5f463cdbbda1253f16eb4dd5f1df0fc763..a6a83f6b2cd6e6033627ebc3cbc9c7af10beb5e6 100644 (file)
--- a/common/osdep.h
+++ b/common/osdep.h
@@ -91,12 +91,14 @@
  #define NOINLINE __attribute__((noinline))
  #define MAY_ALIAS __attribute__((may_alias))
  #define x264_constant_p(x) __builtin_constant_p(x)
+#define x264_nonconstant_p(x) (!__builtin_constant_p(x))
  #else
  #define UNUSED
  #define ALWAYS_INLINE inline
  #define NOINLINE
  #define MAY_ALIAS
  #define x264_constant_p(x) 0
+#define x264_nonconstant_p(x) 0
  #endif
  
  /* threads */
diff --git a/common/rectangle.c b/common/rectangle.c

new file mode 100755 (executable)

index 0000000..389382f
--- /dev/null
+++ b/common/rectangle.c
@@ -0,0 +1,53 @@
+/*****************************************************************************
+ * rectangle.c: h264 encoder library
+ *****************************************************************************
+ * Copyright (C) 2010 Fiona Glaser <fiona@x264.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
+ *****************************************************************************/
+
+#include "common.h"
+
+#define CACHE_FUNC(name,size,width,height)\
+void x264_macroblock_cache_##name##_##width##_##height( void *target, uint32_t val )\
+{\
+    x264_macroblock_cache_rect( target, width*size, height, size, val );\
+}
+
+#define CACHE_FUNCS(name,size)\
+CACHE_FUNC(name,size,4,4)\
+CACHE_FUNC(name,size,2,4)\
+CACHE_FUNC(name,size,4,2)\
+CACHE_FUNC(name,size,2,2)\
+CACHE_FUNC(name,size,2,1)\
+CACHE_FUNC(name,size,1,2)\
+CACHE_FUNC(name,size,1,1)\
+void (*x264_cache_##name##_func_table[10])(void *, uint32_t) =\
+{\
+    x264_macroblock_cache_##name##_1_1,\
+    x264_macroblock_cache_##name##_2_1,\
+    x264_macroblock_cache_##name##_1_2,\
+    x264_macroblock_cache_##name##_2_2,\
+    NULL,\
+    x264_macroblock_cache_##name##_4_2,\
+    NULL,\
+    x264_macroblock_cache_##name##_2_4,\
+    NULL,\
+    x264_macroblock_cache_##name##_4_4\
+};\
+
+CACHE_FUNCS(mv, 4)
+CACHE_FUNCS(mvd, 2)
+CACHE_FUNCS(ref, 1)
diff --git a/common/rectangle.h b/common/rectangle.h

new file mode 100755 (executable)

index 0000000..7b94455
--- /dev/null
+++ b/common/rectangle.h
@@ -0,0 +1,141 @@
+/*****************************************************************************
+ * rectangle.h: h264 encoder library
+ *****************************************************************************
+ * Copyright (C) 2010 Fiona Glaser <fiona@x264.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
+ *****************************************************************************/
+
+/* This function should only be called with constant w / h / s arguments! */
+static ALWAYS_INLINE void x264_macroblock_cache_rect( void *dst, int w, int h, int s, uint32_t v )
+{
+    uint8_t *d = dst;
+    uint16_t v2 = s == 2 ? v : v * 0x101;
+    uint32_t v4 = s == 4 ? v : s == 2 ? v * 0x10001 : v * 0x1010101;
+    uint64_t v8 = v4 + ((uint64_t)v4 << 32);
+    s *= 8;
+
+    if( w == 2 )
+    {
+        M16( d+s*0 ) = v2;
+        if( h == 1 ) return;
+        M16( d+s*1 ) = v2;
+        if( h == 2 ) return;
+        M16( d+s*2 ) = v2;
+        M16( d+s*3 ) = v2;
+    }
+    else if( w == 4 )
+    {
+        M32( d+s*0 ) = v4;
+        if( h == 1 ) return;
+        M32( d+s*1 ) = v4;
+        if( h == 2 ) return;
+        M32( d+s*2 ) = v4;
+        M32( d+s*3 ) = v4;
+    }
+    else if( w == 8 )
+    {
+        if( WORD_SIZE == 8 )
+        {
+            M64( d+s*0 ) = v8;
+            if( h == 1 ) return;
+            M64( d+s*1 ) = v8;
+            if( h == 2 ) return;
+            M64( d+s*2 ) = v8;
+            M64( d+s*3 ) = v8;
+        }
+        else
+        {
+            M32( d+s*0+0 ) = v4;
+            M32( d+s*0+4 ) = v4;
+            if( h == 1 ) return;
+            M32( d+s*1+0 ) = v4;
+            M32( d+s*1+4 ) = v4;
+            if( h == 2 ) return;
+            M32( d+s*2+0 ) = v4;
+            M32( d+s*2+4 ) = v4;
+            M32( d+s*3+0 ) = v4;
+            M32( d+s*3+4 ) = v4;
+        }
+    }
+    else if( w == 16 )
+    {
+        /* height 1, width 16 doesn't occur */
+        assert( h != 1 );
+        if( WORD_SIZE == 8 )
+        {
+            do
+            {
+                M64( d+s*0+0 ) = v8;
+                M64( d+s*0+8 ) = v8;
+                M64( d+s*1+0 ) = v8;
+                M64( d+s*1+8 ) = v8;
+                h -= 2;
+                d += s*2;
+            } while( h );
+        }
+        else
+        {
+            do
+            {
+                M32( d+ 0 ) = v4;
+                M32( d+ 4 ) = v4;
+                M32( d+ 8 ) = v4;
+                M32( d+12 ) = v4;
+                d += s;
+            } while( --h );
+        }
+    }
+    else
+        assert(0);
+}
+
+extern void (*x264_cache_mv_func_table[10])(void *, uint32_t);\
+extern void (*x264_cache_mvd_func_table[10])(void *, uint32_t);\
+extern void (*x264_cache_ref_func_table[10])(void *, uint32_t);\
+
+#define x264_macroblock_cache_mv_ptr( a, x, y, w, h, l, mv ) x264_macroblock_cache_mv( a, x, y, w, h, l, M32( mv ) )
+static ALWAYS_INLINE void x264_macroblock_cache_mv( x264_t *h, int x, int y, int width, int height, int i_list, uint32_t mv )
+{
+    void *mv_cache = &h->mb.cache.mv[i_list][X264_SCAN8_0+x+8*y];
+    if( x264_nonconstant_p( width ) || x264_nonconstant_p( height ) )
+        x264_cache_mv_func_table[width + (height<<1)-3]( mv_cache, mv );
+    else
+        x264_macroblock_cache_rect( mv_cache, width*4, height, 4, mv );
+}
+static ALWAYS_INLINE void x264_macroblock_cache_mvd( x264_t *h, int x, int y, int width, int height, int i_list, uint16_t mvd )
+{
+    void *mvd_cache = &h->mb.cache.mvd[i_list][X264_SCAN8_0+x+8*y];
+    if( x264_nonconstant_p( width ) || x264_nonconstant_p( height ) )
+        x264_cache_mvd_func_table[width + (height<<1)-3]( mvd_cache, mvd );
+    else
+        x264_macroblock_cache_rect( mvd_cache, width*2, height, 2, mvd );
+}
+static ALWAYS_INLINE void x264_macroblock_cache_ref( x264_t *h, int x, int y, int width, int height, int i_list, uint8_t ref )
+{
+    void *ref_cache = &h->mb.cache.ref[i_list][X264_SCAN8_0+x+8*y];
+    if( x264_nonconstant_p( width ) || x264_nonconstant_p( height ) )
+        x264_cache_ref_func_table[width + (height<<1)-3]( ref_cache, ref );
+    else
+        x264_macroblock_cache_rect( ref_cache, width, height, 1, ref );
+}
+static ALWAYS_INLINE void x264_macroblock_cache_skip( x264_t *h, int x, int y, int width, int height, int b_skip )
+{
+    x264_macroblock_cache_rect( &h->mb.cache.skip[X264_SCAN8_0+x+8*y], width, height, 1, b_skip );
+}
+static ALWAYS_INLINE void x264_macroblock_cache_intra8x8_pred( x264_t *h, int x, int y, int i_mode )
+{
+    x264_macroblock_cache_rect( &h->mb.cache.intra4x4_pred_mode[X264_SCAN8_0+x+8*y], 2, 2, 1, i_mode );
+}
diff --git a/encoder/me.c b/encoder/me.c

index 5f29a640d5be513b65f0ed5a7f4484e5b842e105..686ed85960c42f3dd82ae16071489846e88a9528 100644 (file)
--- a/encoder/me.c
+++ b/encoder/me.c
@@ -1087,6 +1087,7 @@ void x264_me_refine_qpel_rd( x264_t *h, x264_me_t *m, int i_lambda2, int i4, int
      int satd;
      int dir = -2;
      int i8 = i4>>2;
+    uint16_t amvd;
  
      uint8_t *pix  = &h->mb.pic.p_fdec[0][block_idx_xy_fdec[i4]];
      uint8_t *pixu = &h->mb.pic.p_fdec[1][(i8>>1)*4*FDEC_STRIDE+(i8&1)*4];
@@ -1174,7 +1175,7 @@ void x264_me_refine_qpel_rd( x264_t *h, x264_me_t *m, int i_lambda2, int i4, int
      m->mv[0] = bmx;
      m->mv[1] = bmy;
      x264_macroblock_cache_mv ( h, block_idx_x[i4], block_idx_y[i4], bw>>2, bh>>2, i_list, pack16to32_mask(bmx, bmy) );
-    uint16_t amvd = pack8to16(X264_MIN(abs(bmx - m->mvp[0]),33), X264_MIN(abs(bmy - m->mvp[1]),33));
+    amvd = pack8to16( X264_MIN(abs(bmx - m->mvp[0]),33), X264_MIN(abs(bmy - m->mvp[1]),33) );
      x264_macroblock_cache_mvd( h, block_idx_x[i4], block_idx_y[i4], bw>>2, bh>>2, i_list, amvd );
      h->mb.b_skip_mc = 0;
  }
author	Fiona Glaser <fiona@x264.com>
	Sun, 14 Mar 2010 08:19:59 +0000 (00:19 -0800)
committer	Fiona Glaser <fiona@x264.com>
	Sat, 27 Mar 2010 19:47:20 +0000 (12:47 -0700)
Makefile		patch \| blob \| history
common/common.h		patch \| blob \| history
common/macroblock.h		patch \| blob \| history
common/osdep.h		patch \| blob \| history
common/rectangle.c	[new file with mode: 0755]	patch \| blob
common/rectangle.h	[new file with mode: 0755]	patch \| blob
encoder/me.c		patch \| blob \| history