]> git.sesse.net Git - x264/blobdiff - common/quant.c
Faster mbtree propagate and x264_log2, less memory usage
[x264] / common / quant.c
index ac798a25a8d58f901245079623b29845bf42e4ed..263fb7c1529cbbec42a34937290b86e4eca79c06 100644 (file)
@@ -29,6 +29,9 @@
 #ifdef ARCH_PPC
 #   include "ppc/quant.h"
 #endif
+#ifdef ARCH_ARM
+#   include "arm/quant.h"
+#endif
 
 #define QUANT_ONE( coef, mf, f ) \
 { \
         (coef) = (f + (coef)) * (mf) >> 16; \
     else \
         (coef) = - ((f - (coef)) * (mf) >> 16); \
+    nz |= (coef); \
 }
 
-static void quant_8x8( int16_t dct[8][8], uint16_t mf[64], uint16_t bias[64] )
+static int quant_8x8( int16_t dct[8][8], uint16_t mf[64], uint16_t bias[64] )
 {
-    int i;
+    int i, nz = 0;
     for( i = 0; i < 64; i++ )
         QUANT_ONE( dct[0][i], mf[i], bias[i] );
+    return !!nz;
 }
 
-static void quant_4x4( int16_t dct[4][4], uint16_t mf[16], uint16_t bias[16] )
+static int quant_4x4( int16_t dct[4][4], uint16_t mf[16], uint16_t bias[16] )
 {
-    int i;
+    int i, nz = 0;
     for( i = 0; i < 16; i++ )
         QUANT_ONE( dct[0][i], mf[i], bias[i] );
+    return !!nz;
 }
 
-static void quant_4x4_dc( int16_t dct[4][4], int mf, int bias )
+static int quant_4x4_dc( int16_t dct[4][4], int mf, int bias )
 {
-    int i;
+    int i, nz = 0;
     for( i = 0; i < 16; i++ )
         QUANT_ONE( dct[0][i], mf, bias );
+    return !!nz;
 }
 
-static void quant_2x2_dc( int16_t dct[2][2], int mf, int bias )
+static int quant_2x2_dc( int16_t dct[2][2], int mf, int bias )
 {
+    int nz = 0;
     QUANT_ONE( dct[0][0], mf, bias );
     QUANT_ONE( dct[0][1], mf, bias );
     QUANT_ONE( dct[0][2], mf, bias );
     QUANT_ONE( dct[0][3], mf, bias );
+    return !!nz;
 }
 
 #define DEQUANT_SHL( x ) \
@@ -402,6 +411,13 @@ void x264_quant_init( x264_t *h, int cpu, x264_quant_function_t *pf )
         pf->decimate_score16 = x264_decimate_score16_ssse3;
         pf->decimate_score64 = x264_decimate_score64_ssse3;
     }
+
+    if( cpu&X264_CPU_SSE4 )
+    {
+        pf->quant_4x4_dc = x264_quant_4x4_dc_sse4;
+        pf->quant_4x4 = x264_quant_4x4_sse4;
+        pf->quant_8x8 = x264_quant_8x8_sse4;
+    }
 #endif // HAVE_MMX
 
 #ifdef ARCH_PPC
@@ -415,6 +431,25 @@ void x264_quant_init( x264_t *h, int cpu, x264_quant_function_t *pf )
         pf->dequant_8x8 = x264_dequant_8x8_altivec;
     }
 #endif
+
+#ifdef HAVE_ARMV6
+    if( cpu&X264_CPU_ARMV6 )
+        pf->coeff_last[DCT_CHROMA_DC] = x264_coeff_last4_arm;
+
+    if( cpu&X264_CPU_NEON )
+    {
+        pf->quant_2x2_dc   = x264_quant_2x2_dc_neon;
+        pf->quant_4x4      = x264_quant_4x4_neon;
+        pf->quant_4x4_dc   = x264_quant_4x4_dc_neon;
+        pf->quant_8x8      = x264_quant_8x8_neon;
+        pf->dequant_4x4    = x264_dequant_4x4_neon;
+        pf->dequant_4x4_dc = x264_dequant_4x4_dc_neon;
+        pf->dequant_8x8    = x264_dequant_8x8_neon;
+        pf->coeff_last[ DCT_LUMA_AC] = x264_coeff_last15_neon;
+        pf->coeff_last[DCT_LUMA_4x4] = x264_coeff_last16_neon;
+        pf->coeff_last[DCT_LUMA_8x8] = x264_coeff_last64_neon;
+    }
+#endif
     pf->coeff_last[  DCT_LUMA_DC] = pf->coeff_last[DCT_LUMA_4x4];
     pf->coeff_last[DCT_CHROMA_AC] = pf->coeff_last[ DCT_LUMA_AC];
     pf->coeff_level_run[  DCT_LUMA_DC] = pf->coeff_level_run[DCT_LUMA_4x4];