From dd354db4db2f26e63ed36eb790052c6794e5a684 Mon Sep 17 00:00:00 2001
From: Loren Merritt <pengvado@akuvian.org>
Date: Fri, 6 Jan 2012 15:53:04 +0000
Subject: [PATCH] CABAC trellis opts part 3: make some arrays non-static

---
 common/dct.c         | 64 +++++++++++++++++++++++++++++++++++++++++++
 common/dct.h         | 65 +++-----------------------------------------
 encoder/cabac.c      | 54 ++++++++++++++++++++----------------
 encoder/macroblock.c |  2 +-
 encoder/rdo.c        | 34 +++++++++++------------
 5 files changed, 116 insertions(+), 103 deletions(-)

diff --git a/common/dct.c b/common/dct.c
index 92b70a18..ac078a12 100644
--- a/common/dct.c
+++ b/common/dct.c
@@ -36,6 +36,70 @@
 #   include "arm/dct.h"
 #endif
 
+/* the inverse of the scaling factors introduced by 8x8 fdct */
+/* uint32 is for the asm implementation of trellis. the actual values fit in uint16. */
+#define W(i) (i==0 ? FIX8(1.0000) :\
+              i==1 ? FIX8(0.8859) :\
+              i==2 ? FIX8(1.6000) :\
+              i==3 ? FIX8(0.9415) :\
+              i==4 ? FIX8(1.2651) :\
+              i==5 ? FIX8(1.1910) :0)
+const uint32_t x264_dct8_weight_tab[64] = {
+    W(0), W(3), W(4), W(3),  W(0), W(3), W(4), W(3),
+    W(3), W(1), W(5), W(1),  W(3), W(1), W(5), W(1),
+    W(4), W(5), W(2), W(5),  W(4), W(5), W(2), W(5),
+    W(3), W(1), W(5), W(1),  W(3), W(1), W(5), W(1),
+
+    W(0), W(3), W(4), W(3),  W(0), W(3), W(4), W(3),
+    W(3), W(1), W(5), W(1),  W(3), W(1), W(5), W(1),
+    W(4), W(5), W(2), W(5),  W(4), W(5), W(2), W(5),
+    W(3), W(1), W(5), W(1),  W(3), W(1), W(5), W(1)
+};
+#undef W
+
+#define W(i) (i==0 ? FIX8(1.76777) :\
+              i==1 ? FIX8(1.11803) :\
+              i==2 ? FIX8(0.70711) :0)
+const uint32_t x264_dct4_weight_tab[16] = {
+    W(0), W(1), W(0), W(1),
+    W(1), W(2), W(1), W(2),
+    W(0), W(1), W(0), W(1),
+    W(1), W(2), W(1), W(2)
+};
+#undef W
+
+/* inverse squared */
+#define W(i) (i==0 ? FIX8(3.125) :\
+              i==1 ? FIX8(1.25) :\
+              i==2 ? FIX8(0.5) :0)
+const uint32_t x264_dct4_weight2_tab[16] = {
+    W(0), W(1), W(0), W(1),
+    W(1), W(2), W(1), W(2),
+    W(0), W(1), W(0), W(1),
+    W(1), W(2), W(1), W(2)
+};
+#undef W
+
+#define W(i) (i==0 ? FIX8(1.00000) :\
+              i==1 ? FIX8(0.78487) :\
+              i==2 ? FIX8(2.56132) :\
+              i==3 ? FIX8(0.88637) :\
+              i==4 ? FIX8(1.60040) :\
+              i==5 ? FIX8(1.41850) :0)
+const uint32_t x264_dct8_weight2_tab[64] = {
+    W(0), W(3), W(4), W(3),  W(0), W(3), W(4), W(3),
+    W(3), W(1), W(5), W(1),  W(3), W(1), W(5), W(1),
+    W(4), W(5), W(2), W(5),  W(4), W(5), W(2), W(5),
+    W(3), W(1), W(5), W(1),  W(3), W(1), W(5), W(1),
+
+    W(0), W(3), W(4), W(3),  W(0), W(3), W(4), W(3),
+    W(3), W(1), W(5), W(1),  W(3), W(1), W(5), W(1),
+    W(4), W(5), W(2), W(5),  W(4), W(5), W(2), W(5),
+    W(3), W(1), W(5), W(1),  W(3), W(1), W(5), W(1)
+};
+#undef W
+
+
 static void dct4x4dc( dctcoef d[16] )
 {
     dctcoef tmp[16];
diff --git a/common/dct.h b/common/dct.h
index 0a6934b5..55c0624c 100644
--- a/common/dct.h
+++ b/common/dct.h
@@ -26,67 +26,10 @@
 #ifndef X264_DCT_H
 #define X264_DCT_H
 
-/* the inverse of the scaling factors introduced by 8x8 fdct */
-#define W(i) (i==0 ? FIX8(1.0000) :\
-              i==1 ? FIX8(0.8859) :\
-              i==2 ? FIX8(1.6000) :\
-              i==3 ? FIX8(0.9415) :\
-              i==4 ? FIX8(1.2651) :\
-              i==5 ? FIX8(1.1910) :0)
-static const uint16_t x264_dct8_weight_tab[64] = {
-    W(0), W(3), W(4), W(3),  W(0), W(3), W(4), W(3),
-    W(3), W(1), W(5), W(1),  W(3), W(1), W(5), W(1),
-    W(4), W(5), W(2), W(5),  W(4), W(5), W(2), W(5),
-    W(3), W(1), W(5), W(1),  W(3), W(1), W(5), W(1),
-
-    W(0), W(3), W(4), W(3),  W(0), W(3), W(4), W(3),
-    W(3), W(1), W(5), W(1),  W(3), W(1), W(5), W(1),
-    W(4), W(5), W(2), W(5),  W(4), W(5), W(2), W(5),
-    W(3), W(1), W(5), W(1),  W(3), W(1), W(5), W(1)
-};
-#undef W
-
-#define W(i) (i==0 ? FIX8(1.76777) :\
-              i==1 ? FIX8(1.11803) :\
-              i==2 ? FIX8(0.70711) :0)
-static const uint16_t x264_dct4_weight_tab[16] = {
-    W(0), W(1), W(0), W(1),
-    W(1), W(2), W(1), W(2),
-    W(0), W(1), W(0), W(1),
-    W(1), W(2), W(1), W(2)
-};
-#undef W
-
-/* inverse squared */
-#define W(i) (i==0 ? FIX8(3.125) :\
-              i==1 ? FIX8(1.25) :\
-              i==2 ? FIX8(0.5) :0)
-static const uint16_t x264_dct4_weight2_tab[16] = {
-    W(0), W(1), W(0), W(1),
-    W(1), W(2), W(1), W(2),
-    W(0), W(1), W(0), W(1),
-    W(1), W(2), W(1), W(2)
-};
-#undef W
-
-#define W(i) (i==0 ? FIX8(1.00000) :\
-              i==1 ? FIX8(0.78487) :\
-              i==2 ? FIX8(2.56132) :\
-              i==3 ? FIX8(0.88637) :\
-              i==4 ? FIX8(1.60040) :\
-              i==5 ? FIX8(1.41850) :0)
-static const uint16_t x264_dct8_weight2_tab[64] = {
-    W(0), W(3), W(4), W(3),  W(0), W(3), W(4), W(3),
-    W(3), W(1), W(5), W(1),  W(3), W(1), W(5), W(1),
-    W(4), W(5), W(2), W(5),  W(4), W(5), W(2), W(5),
-    W(3), W(1), W(5), W(1),  W(3), W(1), W(5), W(1),
-
-    W(0), W(3), W(4), W(3),  W(0), W(3), W(4), W(3),
-    W(3), W(1), W(5), W(1),  W(3), W(1), W(5), W(1),
-    W(4), W(5), W(2), W(5),  W(4), W(5), W(2), W(5),
-    W(3), W(1), W(5), W(1),  W(3), W(1), W(5), W(1)
-};
-#undef W
+extern const uint32_t x264_dct4_weight_tab[16];
+extern const uint32_t x264_dct8_weight_tab[64];
+extern const uint32_t x264_dct4_weight2_tab[16];
+extern const uint32_t x264_dct8_weight2_tab[64];
 
 typedef struct
 {
diff --git a/encoder/cabac.c b/encoder/cabac.c
index 67483e44..985546f9 100644
--- a/encoder/cabac.c
+++ b/encoder/cabac.c
@@ -294,8 +294,8 @@ static ALWAYS_INLINE int x264_cabac_mvd_cpn( x264_t *h, x264_cabac_t *cb, int i_
         x264_cabac_encode_decision( cb, ctxbase + 5, 1 );
         if( i_abs < 9 )
         {
-            cb->f8_bits_encoded += cabac_size_unary[i_abs - 3][cb->state[ctxbase+6]];
-            cb->state[ctxbase+6] = cabac_transition_unary[i_abs - 3][cb->state[ctxbase+6]];
+            cb->f8_bits_encoded += x264_cabac_size_unary[i_abs - 3][cb->state[ctxbase+6]];
+            cb->state[ctxbase+6] = x264_cabac_transition_unary[i_abs - 3][cb->state[ctxbase+6]];
         }
         else
         {
@@ -658,7 +658,12 @@ static const uint16_t coeff_abs_level_m1_offset[14] =
 {
     227+0, 227+10, 227+20, 227+30, 227+39, 426, 952+0, 952+10, 952+20, 708, 982+0, 982+10, 982+20, 766
 };
-static const uint8_t significant_coeff_flag_offset_8x8[2][63] =
+#if RDO_SKIP_BS
+extern const uint8_t x264_significant_coeff_flag_offset_8x8[2][63];
+extern const uint8_t x264_last_coeff_flag_offset_8x8[63];
+extern const uint8_t x264_coeff_flag_offset_chroma_422_dc[7];
+#else
+const uint8_t x264_significant_coeff_flag_offset_8x8[2][63] =
 {{
     0, 1, 2, 3, 4, 5, 5, 4, 4, 3, 3, 4, 4, 4, 5, 5,
     4, 4, 4, 4, 3, 3, 6, 7, 7, 7, 8, 9,10, 9, 8, 7,
@@ -670,14 +675,15 @@ static const uint8_t significant_coeff_flag_offset_8x8[2][63] =
     9, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,13,13, 9,
     9,10,10, 8,13,13, 9, 9,10,10,14,14,14,14,14
 }};
-static const uint8_t last_coeff_flag_offset_8x8[63] =
+const uint8_t x264_last_coeff_flag_offset_8x8[63] =
 {
     0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
     2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
     3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
     5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8
 };
-static const uint8_t coeff_flag_offset_chroma_422_dc[7] = { 0, 0, 1, 1, 2, 2, 2 }; /* MIN( i/2, 2 ) */
+const uint8_t x264_coeff_flag_offset_chroma_422_dc[7] = { 0, 0, 1, 1, 2, 2, 2 }; /* MIN( i/2, 2 ) */
+#endif
 
 // node ctx: 0..3: abslevel1 (with abslevelgt1 == 0).
 //           4..7: abslevelgt1 + 3 (and abslevel1 doesn't matter).
@@ -737,15 +743,15 @@ static ALWAYS_INLINE void x264_cabac_block_residual_internal( x264_t *h, x264_ca
     if( chroma422dc )
     {
         int count_m1 = 7;
-        WRITE_SIGMAP( coeff_flag_offset_chroma_422_dc[i], coeff_flag_offset_chroma_422_dc[i] )
+        WRITE_SIGMAP( x264_coeff_flag_offset_chroma_422_dc[i], x264_coeff_flag_offset_chroma_422_dc[i] )
     }
     else
     {
         int count_m1 = count_cat_m1[ctx_block_cat];
         if( count_m1 == 63 )
         {
-            const uint8_t *sig_offset = significant_coeff_flag_offset_8x8[MB_INTERLACED];
-            WRITE_SIGMAP( sig_offset[i], last_coeff_flag_offset_8x8[i] )
+            const uint8_t *sig_offset = x264_significant_coeff_flag_offset_8x8[MB_INTERLACED];
+            WRITE_SIGMAP( sig_offset[i], x264_last_coeff_flag_offset_8x8[i] )
         }
         else
             WRITE_SIGMAP( i, i )
@@ -799,7 +805,7 @@ static void x264_cabac_block_residual_422_dc( x264_t *h, x264_cabac_t *cb, int c
  * is nearly no quality penalty for this (~0.001db) and the speed boost (~30%) is worth it. */
 static void ALWAYS_INLINE x264_cabac_block_residual_internal( x264_t *h, x264_cabac_t *cb, int ctx_block_cat, dctcoef *l, int b_8x8, int chroma422dc )
 {
-    const uint8_t *sig_offset = significant_coeff_flag_offset_8x8[MB_INTERLACED];
+    const uint8_t *sig_offset = x264_significant_coeff_flag_offset_8x8[MB_INTERLACED];
     int ctx_sig = significant_coeff_flag_offset[MB_INTERLACED][ctx_block_cat];
     int ctx_last = last_coeff_flag_offset[MB_INTERLACED][ctx_block_cat];
     int ctx_level = coeff_abs_level_m1_offset[ctx_block_cat];
@@ -812,9 +818,9 @@ static void ALWAYS_INLINE x264_cabac_block_residual_internal( x264_t *h, x264_ca
     if( last != (b_8x8 ? 63 : chroma422dc ? 7 : count_cat_m1[ctx_block_cat]) )
     {
         x264_cabac_encode_decision( cb, ctx_sig + (b_8x8 ? sig_offset[last] :
-                                    chroma422dc ? coeff_flag_offset_chroma_422_dc[last] : last), 1 );
-        x264_cabac_encode_decision( cb, ctx_last + (b_8x8 ? last_coeff_flag_offset_8x8[last] :
-                                    chroma422dc ? coeff_flag_offset_chroma_422_dc[last] : last), 1 );
+                                    chroma422dc ? x264_coeff_flag_offset_chroma_422_dc[last] : last), 1 );
+        x264_cabac_encode_decision( cb, ctx_last + (b_8x8 ? x264_last_coeff_flag_offset_8x8[last] :
+                                    chroma422dc ? x264_coeff_flag_offset_chroma_422_dc[last] : last), 1 );
     }
 
     if( coeff_abs > 1 )
@@ -823,13 +829,13 @@ static void ALWAYS_INLINE x264_cabac_block_residual_internal( x264_t *h, x264_ca
         ctx = levelgt1_ctx[0] + ctx_level;
         if( coeff_abs < 15 )
         {
-            cb->f8_bits_encoded += cabac_size_unary[coeff_abs-1][cb->state[ctx]];
-            cb->state[ctx] = cabac_transition_unary[coeff_abs-1][cb->state[ctx]];
+            cb->f8_bits_encoded += x264_cabac_size_unary[coeff_abs-1][cb->state[ctx]];
+            cb->state[ctx] = x264_cabac_transition_unary[coeff_abs-1][cb->state[ctx]];
         }
         else
         {
-            cb->f8_bits_encoded += cabac_size_unary[14][cb->state[ctx]];
-            cb->state[ctx] = cabac_transition_unary[14][cb->state[ctx]];
+            cb->f8_bits_encoded += x264_cabac_size_unary[14][cb->state[ctx]];
+            cb->state[ctx] = x264_cabac_transition_unary[14][cb->state[ctx]];
             x264_cabac_encode_ue_bypass( cb, 0, coeff_abs - 15 );
         }
         node_ctx = coeff_abs_level_transition[1][0];
@@ -847,9 +853,9 @@ static void ALWAYS_INLINE x264_cabac_block_residual_internal( x264_t *h, x264_ca
         {
             coeff_abs = abs(l[i]);
             x264_cabac_encode_decision( cb, ctx_sig + (b_8x8 ? sig_offset[i] :
-                                        chroma422dc ? coeff_flag_offset_chroma_422_dc[i] : i), 1 );
-            x264_cabac_encode_decision( cb, ctx_last + (b_8x8 ? last_coeff_flag_offset_8x8[i] :
-                                        chroma422dc ? coeff_flag_offset_chroma_422_dc[i] : i), 0 );
+                                        chroma422dc ? x264_coeff_flag_offset_chroma_422_dc[i] : i), 1 );
+            x264_cabac_encode_decision( cb, ctx_last + (b_8x8 ? x264_last_coeff_flag_offset_8x8[i] :
+                                        chroma422dc ? x264_coeff_flag_offset_chroma_422_dc[i] : i), 0 );
             ctx = coeff_abs_level1_ctx[node_ctx] + ctx_level;
 
             if( coeff_abs > 1 )
@@ -858,13 +864,13 @@ static void ALWAYS_INLINE x264_cabac_block_residual_internal( x264_t *h, x264_ca
                 ctx = levelgt1_ctx[node_ctx] + ctx_level;
                 if( coeff_abs < 15 )
                 {
-                    cb->f8_bits_encoded += cabac_size_unary[coeff_abs-1][cb->state[ctx]];
-                    cb->state[ctx] = cabac_transition_unary[coeff_abs-1][cb->state[ctx]];
+                    cb->f8_bits_encoded += x264_cabac_size_unary[coeff_abs-1][cb->state[ctx]];
+                    cb->state[ctx] = x264_cabac_transition_unary[coeff_abs-1][cb->state[ctx]];
                 }
                 else
                 {
-                    cb->f8_bits_encoded += cabac_size_unary[14][cb->state[ctx]];
-                    cb->state[ctx] = cabac_transition_unary[14][cb->state[ctx]];
+                    cb->f8_bits_encoded += x264_cabac_size_unary[14][cb->state[ctx]];
+                    cb->state[ctx] = x264_cabac_transition_unary[14][cb->state[ctx]];
                     x264_cabac_encode_ue_bypass( cb, 0, coeff_abs - 15 );
                 }
                 node_ctx = coeff_abs_level_transition[1][node_ctx];
@@ -878,7 +884,7 @@ static void ALWAYS_INLINE x264_cabac_block_residual_internal( x264_t *h, x264_ca
         }
         else
             x264_cabac_encode_decision( cb, ctx_sig + (b_8x8 ? sig_offset[i] :
-                                        chroma422dc ? coeff_flag_offset_chroma_422_dc[i] : i), 0 );
+                                        chroma422dc ? x264_coeff_flag_offset_chroma_422_dc[i] : i), 0 );
     }
 }
 
diff --git a/encoder/macroblock.c b/encoder/macroblock.c
index 28c4d02d..f8eb45fd 100644
--- a/encoder/macroblock.c
+++ b/encoder/macroblock.c
@@ -1090,7 +1090,7 @@ void x264_noise_reduction_update( x264_t *h )
     {
         int dct8x8 = cat&1;
         int size = dct8x8 ? 64 : 16;
-        const uint16_t *weight = dct8x8 ? x264_dct8_weight2_tab : x264_dct4_weight2_tab;
+        const uint32_t *weight = dct8x8 ? x264_dct8_weight2_tab : x264_dct4_weight2_tab;
 
         if( h->nr_count[cat] > (dct8x8 ? (1<<16) : (1<<18)) )
         {
diff --git a/encoder/rdo.c b/encoder/rdo.c
index 9a930b1a..b1495d42 100644
--- a/encoder/rdo.c
+++ b/encoder/rdo.c
@@ -32,8 +32,8 @@
 
 /* Transition and size tables for abs<9 MVD and residual coding */
 /* Consist of i_prefix-2 1s, one zero, and a bypass sign bit */
-static uint8_t cabac_transition_unary[15][128];
-static uint16_t cabac_size_unary[15][128];
+uint8_t x264_cabac_transition_unary[15][128];
+uint16_t x264_cabac_size_unary[15][128];
 /* Transition and size tables for abs>9 MVD */
 /* Consist of 5 1s and a bypass sign bit */
 static uint8_t cabac_transition_5ones[128];
@@ -386,8 +386,8 @@ void x264_rdo_init( void )
                 f8_bits += x264_cabac_size_decision2( &ctx, 0 );
             f8_bits += 1 << CABAC_SIZE_BITS; //sign
 
-            cabac_size_unary[i_prefix][i_ctx] = f8_bits;
-            cabac_transition_unary[i_prefix][i_ctx] = ctx;
+            x264_cabac_size_unary[i_prefix][i_ctx] = f8_bits;
+            x264_cabac_transition_unary[i_prefix][i_ctx] = ctx;
         }
     }
     for( int i_ctx = 0; i_ctx < 128; i_ctx++ )
@@ -469,7 +469,7 @@ int trellis_dc_shortcut( int sign_coef, int quant_coef, int unquant_mf, int coef
             unsigned f8_bits = cost_sig;
             int prefix = X264_MIN( abs_level - 1, 14 );
             f8_bits += x264_cabac_size_decision_noup2( cabac_state+1, prefix > 0 );
-            f8_bits += cabac_size_unary[prefix][cabac_state[5]];
+            f8_bits += x264_cabac_size_unary[prefix][cabac_state[5]];
             if( abs_level >= 15 )
                 f8_bits += bs_size_ue_big( abs_level - 15 ) << CABAC_SIZE_BITS;
             score += (uint64_t)f8_bits * lambda2 >> ( CABAC_SIZE_BITS - LAMBDA_BITS );
@@ -496,7 +496,7 @@ int trellis_coef( int j, int const_level, int abs_level, int prefix, int suffix_
     if( const_level > 1 )
     {
         levelgt1_state = j >= 6 ? nodes_prev[j].cabac_state[levelgt1_ctx-6] : level_state[levelgt1_ctx];
-        f8_bits += cabac_size_unary[prefix][levelgt1_state] + suffix_cost;
+        f8_bits += x264_cabac_size_unary[prefix][levelgt1_state] + suffix_cost;
     }
     else
         f8_bits += 1 << CABAC_SIZE_BITS;
@@ -513,7 +513,7 @@ int trellis_coef( int j, int const_level, int abs_level, int prefix, int suffix_
         if( j >= 3 ) // skip the transition if we're not going to reuse the context
             nodes_cur[node_ctx].cabac_state[level1_ctx>>2] = x264_cabac_transition[level1_state][const_level > 1];
         if( const_level > 1 && node_ctx == 7 )
-            nodes_cur[node_ctx].cabac_state[levelgt1_ctx-6] = cabac_transition_unary[prefix][levelgt1_state];
+            nodes_cur[node_ctx].cabac_state[levelgt1_ctx-6] = x264_cabac_transition_unary[prefix][levelgt1_state];
         nodes_cur[node_ctx].level_idx = nodes_prev[j].level_idx;
         SET_LEVEL( nodes_cur[node_ctx], nodes_prev[j], abs_level );
     }
@@ -636,8 +636,8 @@ int quant_trellis_cabac( x264_t *h, dctcoef *dct,
 {
     ALIGNED_ARRAY_16( dctcoef, orig_coefs, [64] );
     ALIGNED_ARRAY_16( dctcoef, quant_coefs, [64] );
-    const uint16_t *coef_weight1 = num_coefs == 64 ? x264_dct8_weight_tab : x264_dct4_weight_tab;
-    const uint16_t *coef_weight2 = num_coefs == 64 ? x264_dct8_weight2_tab : x264_dct4_weight2_tab;
+    const uint32_t *coef_weight1 = num_coefs == 64 ? x264_dct8_weight_tab : x264_dct4_weight_tab;
+    const uint32_t *coef_weight2 = num_coefs == 64 ? x264_dct8_weight2_tab : x264_dct4_weight2_tab;
     const int b_interlaced = MB_INTERLACED;
     uint8_t *cabac_state_sig = &h->cabac.state[ significant_coeff_flag_offset[b_interlaced][ctx_block_cat] ];
     uint8_t *cabac_state_last = &h->cabac.state[ last_coeff_flag_offset[b_interlaced][ctx_block_cat] ];
@@ -738,8 +738,8 @@ int quant_trellis_cabac( x264_t *h, dctcoef *dct,
              * subtracting from one score is equivalent to adding to the rest. */\
             if( !ctx_hi )\
             {\
-                int sigindex = !dc && num_coefs == 64 ? significant_coeff_flag_offset_8x8[b_interlaced][i] :\
-                               b_chroma && dc && num_coefs == 8 ? coeff_flag_offset_chroma_422_dc[i] : i;\
+                int sigindex = !dc && num_coefs == 64 ? x264_significant_coeff_flag_offset_8x8[b_interlaced][i] :\
+                               b_chroma && dc && num_coefs == 8 ? x264_coeff_flag_offset_chroma_422_dc[i] : i;\
                 uint64_t cost_sig0 = x264_cabac_size_decision_noup2( &cabac_state_sig[sigindex], 0 )\
                                    * (uint64_t)lambda2 >> ( CABAC_SIZE_BITS - LAMBDA_BITS );\
                 nodes_cur[0].score -= cost_sig0;\
@@ -759,10 +759,10 @@ int quant_trellis_cabac( x264_t *h, dctcoef *dct,
 \
         if( i < num_coefs-1 || ctx_hi )\
         {\
-            int sigindex  = !dc && num_coefs == 64 ? significant_coeff_flag_offset_8x8[b_interlaced][i] :\
-                            b_chroma && dc && num_coefs == 8 ? coeff_flag_offset_chroma_422_dc[i] : i;\
-            int lastindex = !dc && num_coefs == 64 ? last_coeff_flag_offset_8x8[i] :\
-                            b_chroma && dc && num_coefs == 8 ? coeff_flag_offset_chroma_422_dc[i] : i;\
+            int sigindex  = !dc && num_coefs == 64 ? x264_significant_coeff_flag_offset_8x8[b_interlaced][i] :\
+                            b_chroma && dc && num_coefs == 8 ? x264_coeff_flag_offset_chroma_422_dc[i] : i;\
+            int lastindex = !dc && num_coefs == 64 ? x264_last_coeff_flag_offset_8x8[i] :\
+                            b_chroma && dc && num_coefs == 8 ? x264_coeff_flag_offset_chroma_422_dc[i] : i;\
             cost_siglast[0] = x264_cabac_size_decision_noup2( &cabac_state_sig[sigindex], 0 );\
             int cost_sig1   = x264_cabac_size_decision_noup2( &cabac_state_sig[sigindex], 1 );\
             cost_siglast[1] = x264_cabac_size_decision_noup2( &cabac_state_last[lastindex], 0 ) + cost_sig1;\
@@ -890,8 +890,8 @@ int quant_trellis_cavlc( x264_t *h, dctcoef *dct,
 {
     ALIGNED_16( dctcoef quant_coefs[2][16] );
     ALIGNED_16( dctcoef coefs[16] ) = {0};
-    const uint16_t *coef_weight1 = b_8x8 ? x264_dct8_weight_tab : x264_dct4_weight_tab;
-    const uint16_t *coef_weight2 = b_8x8 ? x264_dct8_weight2_tab : x264_dct4_weight2_tab;
+    const uint32_t *coef_weight1 = b_8x8 ? x264_dct8_weight_tab : x264_dct4_weight_tab;
+    const uint32_t *coef_weight2 = b_8x8 ? x264_dct8_weight2_tab : x264_dct4_weight2_tab;
     int delta_distortion[16];
     int64_t score = 1ULL<<62;
     int i, j;
-- 
2.39.2