]> git.sesse.net Git - x264/blobdiff - common/bitstream.c
x86: Add asm for mbtree fixed point conversion
[x264] / common / bitstream.c
index e094c261dfb6227574381b1e9076b013de7a72f1..f1a49968dfa21eb9404bbad904dc08933d6f38dd 100644 (file)
@@ -1,7 +1,7 @@
 /*****************************************************************************
- * bitstream.c: h264 encoder library
+ * bitstream.c: bitstream writing
  *****************************************************************************
- * Copyright (C) 2010 x264 project
+ * Copyright (C) 2003-2016 x264 project
  *
  * Authors: Laurent Aimar <fenrir@via.ecp.fr>
  *          Fiona Glaser <fiona@x264.com>
@@ -19,6 +19,9 @@
  * You should have received a copy of the GNU General Public License
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
+ *
+ * This program is also available under a commercial proprietary license.
+ * For more information, contact us at licensing@x264.com.
  *****************************************************************************/
 
 #include "common.h"
@@ -36,15 +39,27 @@ static uint8_t *x264_nal_escape_c( uint8_t *dst, uint8_t *src, uint8_t *end )
     return dst;
 }
 
-#ifdef HAVE_MMX
-uint8_t *x264_nal_escape_mmxext( uint8_t *dst, uint8_t *src, uint8_t *end );
+uint8_t *x264_nal_escape_mmx2( uint8_t *dst, uint8_t *src, uint8_t *end );
 uint8_t *x264_nal_escape_sse2( uint8_t *dst, uint8_t *src, uint8_t *end );
-#endif
+uint8_t *x264_nal_escape_avx2( uint8_t *dst, uint8_t *src, uint8_t *end );
+void x264_cabac_block_residual_rd_internal_sse2       ( dctcoef *l, int b_interlaced, intptr_t ctx_block_cat, x264_cabac_t *cb );
+void x264_cabac_block_residual_rd_internal_sse2_lzcnt ( dctcoef *l, int b_interlaced, intptr_t ctx_block_cat, x264_cabac_t *cb );
+void x264_cabac_block_residual_rd_internal_ssse3      ( dctcoef *l, int b_interlaced, intptr_t ctx_block_cat, x264_cabac_t *cb );
+void x264_cabac_block_residual_rd_internal_ssse3_lzcnt( dctcoef *l, int b_interlaced, intptr_t ctx_block_cat, x264_cabac_t *cb );
+void x264_cabac_block_residual_8x8_rd_internal_sse2       ( dctcoef *l, int b_interlaced, intptr_t ctx_block_cat, x264_cabac_t *cb );
+void x264_cabac_block_residual_8x8_rd_internal_sse2_lzcnt ( dctcoef *l, int b_interlaced, intptr_t ctx_block_cat, x264_cabac_t *cb );
+void x264_cabac_block_residual_8x8_rd_internal_ssse3      ( dctcoef *l, int b_interlaced, intptr_t ctx_block_cat, x264_cabac_t *cb );
+void x264_cabac_block_residual_8x8_rd_internal_ssse3_lzcnt( dctcoef *l, int b_interlaced, intptr_t ctx_block_cat, x264_cabac_t *cb );
+void x264_cabac_block_residual_internal_sse2       ( dctcoef *l, int b_interlaced, intptr_t ctx_block_cat, x264_cabac_t *cb );
+void x264_cabac_block_residual_internal_sse2_lzcnt ( dctcoef *l, int b_interlaced, intptr_t ctx_block_cat, x264_cabac_t *cb );
+void x264_cabac_block_residual_internal_avx2_bmi2 ( dctcoef *l, int b_interlaced, intptr_t ctx_block_cat, x264_cabac_t *cb );
+
+uint8_t *x264_nal_escape_neon( uint8_t *dst, uint8_t *src, uint8_t *end );
 
 /****************************************************************************
  * x264_nal_encode:
  ****************************************************************************/
-int x264_nal_encode( x264_t *h, uint8_t *dst, x264_nal_t *nal, int b_long_startcode )
+void x264_nal_encode( x264_t *h, uint8_t *dst, x264_nal_t *nal )
 {
     uint8_t *src = nal->p_payload;
     uint8_t *end = nal->p_payload + nal->i_payload;
@@ -52,7 +67,7 @@ int x264_nal_encode( x264_t *h, uint8_t *dst, x264_nal_t *nal, int b_long_startc
 
     if( h->param.b_annexb )
     {
-        if( b_long_startcode )
+        if( nal->b_long_startcode )
             *dst++ = 0x00;
         *dst++ = 0x00;
         *dst++ = 0x00;
@@ -65,28 +80,89 @@ int x264_nal_encode( x264_t *h, uint8_t *dst, x264_nal_t *nal, int b_long_startc
     *dst++ = ( 0x00 << 7 ) | ( nal->i_ref_idc << 5 ) | nal->i_type;
 
     dst = h->bsf.nal_escape( dst, src, end );
-    int size = (dst - orig_dst) - 4;
+    int size = dst - orig_dst;
+
+    /* Apply AVC-Intra padding */
+    if( h->param.i_avcintra_class )
+    {
+        int padding = nal->i_payload + nal->i_padding + NALU_OVERHEAD - size;
+        if( padding > 0 )
+        {
+            memset( dst, 0, padding );
+            size += padding;
+        }
+        nal->i_padding = X264_MAX( padding, 0 );
+    }
 
     /* Write the size header for mp4/etc */
     if( !h->param.b_annexb )
     {
         /* Size doesn't include the size of the header we're writing now. */
-        orig_dst[0] = size>>24;
-        orig_dst[1] = size>>16;
-        orig_dst[2] = size>> 8;
-        orig_dst[3] = size>> 0;
+        int chunk_size = size - 4;
+        orig_dst[0] = chunk_size >> 24;
+        orig_dst[1] = chunk_size >> 16;
+        orig_dst[2] = chunk_size >> 8;
+        orig_dst[3] = chunk_size >> 0;
     }
 
-    return size+4;
+    nal->i_payload = size;
+    nal->p_payload = orig_dst;
+    x264_emms();
 }
 
 void x264_bitstream_init( int cpu, x264_bitstream_function_t *pf )
 {
+    memset( pf, 0, sizeof(*pf) );
+
     pf->nal_escape = x264_nal_escape_c;
-#ifdef HAVE_MMX
-    if( cpu&X264_CPU_MMXEXT )
-        pf->nal_escape = x264_nal_escape_mmxext;
-    if( (cpu&X264_CPU_SSE2) && (cpu&X264_CPU_SSE2_IS_FAST) )
-        pf->nal_escape = x264_nal_escape_sse2;
+#if HAVE_MMX
+#if ARCH_X86_64
+    pf->cabac_block_residual_internal = x264_cabac_block_residual_internal_sse2;
+    pf->cabac_block_residual_rd_internal = x264_cabac_block_residual_rd_internal_sse2;
+    pf->cabac_block_residual_8x8_rd_internal = x264_cabac_block_residual_8x8_rd_internal_sse2;
+#endif
+
+    if( cpu&X264_CPU_MMX2 )
+        pf->nal_escape = x264_nal_escape_mmx2;
+    if( cpu&X264_CPU_SSE2 )
+    {
+#if ARCH_X86_64
+        if( cpu&X264_CPU_LZCNT )
+        {
+            pf->cabac_block_residual_internal = x264_cabac_block_residual_internal_sse2_lzcnt;
+            pf->cabac_block_residual_rd_internal = x264_cabac_block_residual_rd_internal_sse2_lzcnt;
+            pf->cabac_block_residual_8x8_rd_internal = x264_cabac_block_residual_8x8_rd_internal_sse2_lzcnt;
+        }
+#endif
+        if( cpu&X264_CPU_SSE2_IS_FAST )
+            pf->nal_escape = x264_nal_escape_sse2;
+    }
+#if ARCH_X86_64
+    if( cpu&X264_CPU_SSSE3 )
+    {
+        pf->cabac_block_residual_rd_internal = x264_cabac_block_residual_rd_internal_ssse3;
+        pf->cabac_block_residual_8x8_rd_internal = x264_cabac_block_residual_8x8_rd_internal_ssse3;
+        if( cpu&X264_CPU_LZCNT )
+        {
+            pf->cabac_block_residual_rd_internal = x264_cabac_block_residual_rd_internal_ssse3_lzcnt;
+            pf->cabac_block_residual_8x8_rd_internal = x264_cabac_block_residual_8x8_rd_internal_ssse3_lzcnt;
+        }
+    }
+
+    if( cpu&X264_CPU_AVX2 )
+    {
+        pf->nal_escape = x264_nal_escape_avx2;
+        if( cpu&X264_CPU_BMI2 )
+            pf->cabac_block_residual_internal = x264_cabac_block_residual_internal_avx2_bmi2;
+    }
+#endif
+#endif
+#if HAVE_ARMV6
+    if( cpu&X264_CPU_NEON )
+        pf->nal_escape = x264_nal_escape_neon;
+#endif
+#if ARCH_AARCH64
+    if( cpu&X264_CPU_NEON )
+        pf->nal_escape = x264_nal_escape_neon;
 #endif
 }