]> git.sesse.net Git - x264/blobdiff - common/bitstream.c
arm: Add x264_nal_escape_neon
[x264] / common / bitstream.c
index c056ffbd76b2411392538ac4a31c6e11bf8ccbd1..ec9836ac019930198f53242c69ddcbd7eb66e83b 100644 (file)
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * bitstream.c: bitstream writing
  *****************************************************************************
- * Copyright (C) 2003-2013 x264 project
+ * Copyright (C) 2003-2015 x264 project
  *
  * Authors: Laurent Aimar <fenrir@via.ecp.fr>
  *          Fiona Glaser <fiona@x264.com>
@@ -39,11 +39,22 @@ static uint8_t *x264_nal_escape_c( uint8_t *dst, uint8_t *src, uint8_t *end )
     return dst;
 }
 
-#if HAVE_MMX
 uint8_t *x264_nal_escape_mmx2( uint8_t *dst, uint8_t *src, uint8_t *end );
 uint8_t *x264_nal_escape_sse2( uint8_t *dst, uint8_t *src, uint8_t *end );
-uint8_t *x264_nal_escape_avx( uint8_t *dst, uint8_t *src, uint8_t *end );
-#endif
+uint8_t *x264_nal_escape_avx2( uint8_t *dst, uint8_t *src, uint8_t *end );
+void x264_cabac_block_residual_rd_internal_sse2       ( dctcoef *l, int b_interlaced, intptr_t ctx_block_cat, x264_cabac_t *cb );
+void x264_cabac_block_residual_rd_internal_sse2_lzcnt ( dctcoef *l, int b_interlaced, intptr_t ctx_block_cat, x264_cabac_t *cb );
+void x264_cabac_block_residual_rd_internal_ssse3      ( dctcoef *l, int b_interlaced, intptr_t ctx_block_cat, x264_cabac_t *cb );
+void x264_cabac_block_residual_rd_internal_ssse3_lzcnt( dctcoef *l, int b_interlaced, intptr_t ctx_block_cat, x264_cabac_t *cb );
+void x264_cabac_block_residual_8x8_rd_internal_sse2       ( dctcoef *l, int b_interlaced, intptr_t ctx_block_cat, x264_cabac_t *cb );
+void x264_cabac_block_residual_8x8_rd_internal_sse2_lzcnt ( dctcoef *l, int b_interlaced, intptr_t ctx_block_cat, x264_cabac_t *cb );
+void x264_cabac_block_residual_8x8_rd_internal_ssse3      ( dctcoef *l, int b_interlaced, intptr_t ctx_block_cat, x264_cabac_t *cb );
+void x264_cabac_block_residual_8x8_rd_internal_ssse3_lzcnt( dctcoef *l, int b_interlaced, intptr_t ctx_block_cat, x264_cabac_t *cb );
+void x264_cabac_block_residual_internal_sse2       ( dctcoef *l, int b_interlaced, intptr_t ctx_block_cat, x264_cabac_t *cb );
+void x264_cabac_block_residual_internal_sse2_lzcnt ( dctcoef *l, int b_interlaced, intptr_t ctx_block_cat, x264_cabac_t *cb );
+void x264_cabac_block_residual_internal_avx2_bmi2 ( dctcoef *l, int b_interlaced, intptr_t ctx_block_cat, x264_cabac_t *cb );
+
+uint8_t *x264_nal_escape_neon( uint8_t *dst, uint8_t *src, uint8_t *end );
 
 /****************************************************************************
  * x264_nal_encode:
@@ -88,13 +99,57 @@ void x264_nal_encode( x264_t *h, uint8_t *dst, x264_nal_t *nal )
 
 void x264_bitstream_init( int cpu, x264_bitstream_function_t *pf )
 {
+    memset( pf, 0, sizeof(*pf) );
+
     pf->nal_escape = x264_nal_escape_c;
 #if HAVE_MMX
+#if ARCH_X86_64
+    pf->cabac_block_residual_internal = x264_cabac_block_residual_internal_sse2;
+    pf->cabac_block_residual_rd_internal = x264_cabac_block_residual_rd_internal_sse2;
+    pf->cabac_block_residual_8x8_rd_internal = x264_cabac_block_residual_8x8_rd_internal_sse2;
+#endif
+
     if( cpu&X264_CPU_MMX2 )
         pf->nal_escape = x264_nal_escape_mmx2;
-    if( (cpu&X264_CPU_SSE2) && (cpu&X264_CPU_SSE2_IS_FAST) )
-        pf->nal_escape = x264_nal_escape_sse2;
-    if( cpu&X264_CPU_AVX )
-        pf->nal_escape = x264_nal_escape_avx;
+    if( cpu&X264_CPU_SSE2 )
+    {
+#if ARCH_X86_64
+        if( cpu&X264_CPU_LZCNT )
+        {
+            pf->cabac_block_residual_internal = x264_cabac_block_residual_internal_sse2_lzcnt;
+            pf->cabac_block_residual_rd_internal = x264_cabac_block_residual_rd_internal_sse2_lzcnt;
+            pf->cabac_block_residual_8x8_rd_internal = x264_cabac_block_residual_8x8_rd_internal_sse2_lzcnt;
+        }
+#endif
+        if( cpu&X264_CPU_SSE2_IS_FAST )
+            pf->nal_escape = x264_nal_escape_sse2;
+    }
+#if ARCH_X86_64
+    if( cpu&X264_CPU_SSSE3 )
+    {
+        pf->cabac_block_residual_rd_internal = x264_cabac_block_residual_rd_internal_ssse3;
+        pf->cabac_block_residual_8x8_rd_internal = x264_cabac_block_residual_8x8_rd_internal_ssse3;
+        if( cpu&X264_CPU_LZCNT )
+        {
+            pf->cabac_block_residual_rd_internal = x264_cabac_block_residual_rd_internal_ssse3_lzcnt;
+            pf->cabac_block_residual_8x8_rd_internal = x264_cabac_block_residual_8x8_rd_internal_ssse3_lzcnt;
+        }
+    }
+
+    if( cpu&X264_CPU_AVX2 )
+    {
+        pf->nal_escape = x264_nal_escape_avx2;
+        if( cpu&X264_CPU_BMI2 )
+            pf->cabac_block_residual_internal = x264_cabac_block_residual_internal_avx2_bmi2;
+    }
+#endif
+#endif
+#if HAVE_ARMV6
+    if( cpu&X264_CPU_NEON )
+        pf->nal_escape = x264_nal_escape_neon;
+#endif
+#if ARCH_AARCH64
+    if( cpu&X264_CPU_NEON )
+        pf->nal_escape = x264_nal_escape_neon;
 #endif
 }