1 /*****************************************************************************
2 * bitstream.h: bitstream writing
3 *****************************************************************************
4 * Copyright (C) 2003-2015 x264 project
6 * Authors: Loren Merritt <lorenm@u.washington.edu>
7 * Fiona Glaser <fiona@x264.com>
8 * Laurent Aimar <fenrir@via.ecp.fr>
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
24 * This program is also available under a commercial proprietary license.
25 * For more information, contact us at licensing@x264.com.
26 *****************************************************************************/
41 /* Next level table to use */
52 int i_left; /* i_count number of available bits */
53 int i_bits_encoded; /* RD only */
60 ALIGNED_16( dctcoef level[18] );
63 extern const vlc_t x264_coeff0_token[6];
64 extern const vlc_t x264_coeff_token[6][16][4];
65 extern const vlc_t x264_total_zeros[15][16];
66 extern const vlc_t x264_total_zeros_2x2_dc[3][4];
67 extern const vlc_t x264_total_zeros_2x4_dc[7][8];
71 uint8_t *(*nal_escape) ( uint8_t *dst, uint8_t *src, uint8_t *end );
72 void (*cabac_block_residual_internal)( dctcoef *l, int b_interlaced,
73 intptr_t ctx_block_cat, x264_cabac_t *cb );
74 void (*cabac_block_residual_rd_internal)( dctcoef *l, int b_interlaced,
75 intptr_t ctx_block_cat, x264_cabac_t *cb );
76 void (*cabac_block_residual_8x8_rd_internal)( dctcoef *l, int b_interlaced,
77 intptr_t ctx_block_cat, x264_cabac_t *cb );
78 } x264_bitstream_function_t;
80 void x264_bitstream_init( int cpu, x264_bitstream_function_t *pf );
82 /* A larger level table size theoretically could help a bit at extremely
83 * high bitrates, but the cost in cache is usually too high for it to be
85 * This size appears to be optimal for QP18 encoding on a Nehalem CPU.
86 * FIXME: Do further testing? */
87 #define LEVEL_TABLE_SIZE 128
88 extern vlc_large_t x264_level_token[7][LEVEL_TABLE_SIZE];
90 /* The longest possible set of zero run codes sums to 25 bits. This leaves
91 * plenty of room for both the code (25 bits) and size (5 bits) in a uint32_t. */
93 extern uint32_t x264_run_before[1<<16];
95 static inline void bs_init( bs_t *s, void *p_data, int i_data )
97 int offset = ((intptr_t)p_data & 3);
98 s->p = s->p_start = (uint8_t*)p_data - offset;
99 s->p_end = (uint8_t*)p_data + i_data;
100 s->i_left = (WORD_SIZE - offset)*8;
101 s->cur_bits = endian_fix32( M32(s->p) );
102 s->cur_bits >>= (4-offset)*8;
104 static inline int bs_pos( bs_t *s )
106 return( 8 * (s->p - s->p_start) + (WORD_SIZE*8) - s->i_left );
109 /* Write the rest of cur_bits to the bitstream; results in a bitstream no longer 32-bit aligned. */
110 static inline void bs_flush( bs_t *s )
112 M32( s->p ) = endian_fix32( s->cur_bits << (s->i_left&31) );
113 s->p += WORD_SIZE - (s->i_left >> 3);
114 s->i_left = WORD_SIZE*8;
116 /* The inverse of bs_flush: prepare the bitstream to be written to again. */
117 static inline void bs_realign( bs_t *s )
119 int offset = ((intptr_t)s->p & 3);
122 s->p = (uint8_t*)s->p - offset;
123 s->i_left = (WORD_SIZE - offset)*8;
124 s->cur_bits = endian_fix32( M32(s->p) );
125 s->cur_bits >>= (4-offset)*8;
129 static inline void bs_write( bs_t *s, int i_count, uint32_t i_bits )
133 s->cur_bits = (s->cur_bits << i_count) | i_bits;
134 s->i_left -= i_count;
135 if( s->i_left <= 32 )
138 M32( s->p ) = s->cur_bits >> (32 - s->i_left);
140 M32( s->p ) = endian_fix( s->cur_bits << s->i_left );
148 if( i_count < s->i_left )
150 s->cur_bits = (s->cur_bits << i_count) | i_bits;
151 s->i_left -= i_count;
155 i_count -= s->i_left;
156 s->cur_bits = (s->cur_bits << s->i_left) | (i_bits >> i_count);
157 M32( s->p ) = endian_fix( s->cur_bits );
159 s->cur_bits = i_bits;
160 s->i_left = 32 - i_count;
165 /* Special case to eliminate branch in normal bs_write. */
166 /* Golomb never writes an even-size code, so this is only used in slice headers. */
167 static inline void bs_write32( bs_t *s, uint32_t i_bits )
169 bs_write( s, 16, i_bits >> 16 );
170 bs_write( s, 16, i_bits );
173 static inline void bs_write1( bs_t *s, uint32_t i_bit )
176 s->cur_bits |= i_bit;
178 if( s->i_left == WORD_SIZE*8-32 )
180 M32( s->p ) = endian_fix32( s->cur_bits );
182 s->i_left = WORD_SIZE*8;
186 static inline void bs_align_0( bs_t *s )
188 bs_write( s, s->i_left&7, 0 );
191 static inline void bs_align_1( bs_t *s )
193 bs_write( s, s->i_left&7, (1 << (s->i_left&7)) - 1 );
196 static inline void bs_align_10( bs_t *s )
199 bs_write( s, s->i_left&7, 1 << ( (s->i_left&7) - 1 ) );
202 /* golomb functions */
204 static const uint8_t x264_ue_size_tab[256] =
206 1, 1, 3, 3, 5, 5, 5, 5, 7, 7, 7, 7, 7, 7, 7, 7,
207 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
208 11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,
209 11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,
210 13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,
211 13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,
212 13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,
213 13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,
214 15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,
215 15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,
216 15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,
217 15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,
218 15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,
219 15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,
220 15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,
221 15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,
224 static inline void bs_write_ue_big( bs_t *s, unsigned int val )
238 size += x264_ue_size_tab[tmp];
239 bs_write( s, size>>1, 0 );
240 bs_write( s, (size>>1)+1, val );
243 /* Only works on values under 255. */
244 static inline void bs_write_ue( bs_t *s, int val )
246 bs_write( s, x264_ue_size_tab[val+1], val+1 );
249 static inline void bs_write_se( bs_t *s, int val )
252 /* Faster than (val <= 0 ? -val*2+1 : val*2) */
253 /* 4 instructions on x86, 3 on ARM */
255 if( tmp < 0 ) tmp = val*2;
263 size += x264_ue_size_tab[tmp];
264 bs_write( s, size, val );
267 static inline void bs_write_te( bs_t *s, int x, int val )
270 bs_write1( s, 1^val );
272 bs_write_ue( s, val );
275 static inline void bs_rbsp_trailing( bs_t *s )
278 bs_write( s, s->i_left&7, 0 );
281 static ALWAYS_INLINE int bs_size_ue( unsigned int val )
283 return x264_ue_size_tab[val+1];
286 static ALWAYS_INLINE int bs_size_ue_big( unsigned int val )
289 return x264_ue_size_tab[val+1];
291 return x264_ue_size_tab[(val+1)>>8] + 16;
294 static ALWAYS_INLINE int bs_size_se( int val )
297 if( tmp < 0 ) tmp = val*2;
299 return x264_ue_size_tab[tmp];
301 return x264_ue_size_tab[tmp>>8]+16;
304 static ALWAYS_INLINE int bs_size_te( int x, int val )
309 return x264_ue_size_tab[val+1];