git.sesse.net Git - x264/blob - common/bitstream.h

   1 /*****************************************************************************
   2  * bitstream.h: h264 encoder library
   3  *****************************************************************************
   4  * Copyright (C) 2003-2008 x264 project
   5  *
   6  * Authors: Loren Merritt <lorenm@u.washington.edu>
   7  *          Fiona Glaser <fiona@x264.com>
   8  *          Laurent Aimar <fenrir@via.ecp.fr>
   9  *
  10  * This program is free software; you can redistribute it and/or modify
  11  * it under the terms of the GNU General Public License as published by
  12  * the Free Software Foundation; either version 2 of the License, or
  13  * (at your option) any later version.
  14  *
  15  * This program is distributed in the hope that it will be useful,
  16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  18  * GNU General Public License for more details.
  19  *
  20  * You should have received a copy of the GNU General Public License
  21  * along with this program; if not, write to the Free Software
  22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
  23  *****************************************************************************/
  24
  25 #ifndef X264_BS_H
  26 #define X264_BS_H
  27
  28 typedef struct
  29 {
  30     uint8_t i_bits;
  31     uint8_t i_size;
  32 } vlc_t;
  33
  34 typedef struct
  35 {
  36     uint16_t i_bits;
  37     uint8_t  i_size;
  38     /* Next level table to use */
  39     uint8_t  i_next;
  40 } vlc_large_t;
  41
  42 typedef struct bs_s
  43 {
  44     uint8_t *p_start;
  45     uint8_t *p;
  46     uint8_t *p_end;
  47
  48     intptr_t cur_bits;
  49     int     i_left;    /* i_count number of available bits */
  50     int     i_bits_encoded; /* RD only */
  51 } bs_t;
  52
  53 typedef struct
  54 {
  55     int     last;
  56     int16_t level[16];
  57     uint8_t run[16];
  58 } x264_run_level_t;
  59
  60 extern const vlc_t x264_coeff0_token[5];
  61 extern const vlc_t x264_coeff_token[5][16][4];
  62 extern const vlc_t x264_total_zeros[15][16];
  63 extern const vlc_t x264_total_zeros_dc[3][4];
  64 extern const vlc_t x264_run_before[7][16];
  65
  66 typedef struct
  67 {
  68     uint8_t *(*nal_escape) ( uint8_t *dst, uint8_t *src, uint8_t *end );
  69 } x264_bitstream_function_t;
  70
  71 int x264_nal_encode( x264_t *h, uint8_t *dst, x264_nal_t *nal, int b_long_startcode );
  72 void x264_bitstream_init( int cpu, x264_bitstream_function_t *pf );
  73
  74 /* A larger level table size theoretically could help a bit at extremely
  75  * high bitrates, but the cost in cache is usually too high for it to be
  76  * useful.
  77  * This size appears to be optimal for QP18 encoding on a Nehalem CPU.
  78  * FIXME: Do further testing? */
  79 #define LEVEL_TABLE_SIZE 128
  80 extern vlc_large_t x264_level_token[7][LEVEL_TABLE_SIZE];
  81
  82 static inline void bs_init( bs_t *s, void *p_data, int i_data )
  83 {
  84     int offset = ((intptr_t)p_data & 3);
  85     s->p       = s->p_start = (uint8_t*)p_data - offset;
  86     s->p_end   = (uint8_t*)p_data + i_data;
  87     s->i_left  = (WORD_SIZE - offset)*8;
  88     s->cur_bits = endian_fix32( M32(s->p) );
  89     s->cur_bits >>= (4-offset)*8;
  90 }
  91 static inline int bs_pos( bs_t *s )
  92 {
  93     return( 8 * (s->p - s->p_start) + (WORD_SIZE*8) - s->i_left );
  94 }
  95
  96 /* Write the rest of cur_bits to the bitstream; results in a bitstream no longer 32-bit aligned. */
  97 static inline void bs_flush( bs_t *s )
  98 {
  99     M32( s->p ) = endian_fix32( s->cur_bits << (s->i_left&31) );
 100     s->p += WORD_SIZE - s->i_left / 8;
 101     s->i_left = WORD_SIZE*8;
 102 }
 103 /* The inverse of bs_flush: prepare the bitstream to be written to again. */
 104 static inline void bs_realign( bs_t *s )
 105 {
 106     int offset = ((intptr_t)s->p & 3);
 107     if( offset )
 108     {
 109         s->p       = (uint8_t*)s->p - offset;
 110         s->i_left  = (WORD_SIZE - offset)*8;
 111         s->cur_bits = endian_fix32( M32(s->p) );
 112         s->cur_bits >>= (4-offset)*8;
 113     }
 114 }
 115
 116 static inline void bs_write( bs_t *s, int i_count, uint32_t i_bits )
 117 {
 118     if( WORD_SIZE == 8 )
 119     {
 120         s->cur_bits = (s->cur_bits << i_count) | i_bits;
 121         s->i_left -= i_count;
 122         if( s->i_left <= 32 )
 123         {
 124 #ifdef WORDS_BIGENDIAN
 125             M32( s->p ) = s->cur_bits >> (32 - s->i_left);
 126 #else
 127             M32( s->p ) = endian_fix( s->cur_bits << s->i_left );
 128 #endif
 129             s->i_left += 32;
 130             s->p += 4;
 131         }
 132     }
 133     else
 134     {
 135         if( i_count < s->i_left )
 136         {
 137             s->cur_bits = (s->cur_bits << i_count) | i_bits;
 138             s->i_left -= i_count;
 139         }
 140         else
 141         {
 142             i_count -= s->i_left;
 143             s->cur_bits = (s->cur_bits << s->i_left) | (i_bits >> i_count);
 144             M32( s->p ) = endian_fix( s->cur_bits );
 145             s->p += 4;
 146             s->cur_bits = i_bits;
 147             s->i_left = 32 - i_count;
 148         }
 149     }
 150 }
 151
 152 /* Special case to eliminate branch in normal bs_write. */
 153 /* Golomb never writes an even-size code, so this is only used in slice headers. */
 154 static inline void bs_write32( bs_t *s, uint32_t i_bits )
 155 {
 156     bs_write( s, 16, i_bits >> 16 );
 157     bs_write( s, 16, i_bits );
 158 }
 159
 160 static inline void bs_write1( bs_t *s, uint32_t i_bit )
 161 {
 162     s->cur_bits <<= 1;
 163     s->cur_bits |= i_bit;
 164     s->i_left--;
 165     if( s->i_left == WORD_SIZE*8-32 )
 166     {
 167         M32( s->p ) = endian_fix32( s->cur_bits );
 168         s->p += 4;
 169         s->i_left = WORD_SIZE*8;
 170     }
 171 }
 172
 173 static inline void bs_align_0( bs_t *s )
 174 {
 175     bs_write( s, s->i_left&7, 0 );
 176     bs_flush( s );
 177 }
 178 static inline void bs_align_1( bs_t *s )
 179 {
 180     bs_write( s, s->i_left&7, (1 << (s->i_left&7)) - 1 );
 181     bs_flush( s );
 182 }
 183 static inline void bs_align_10( bs_t *s )
 184 {
 185     if( s->i_left&7 )
 186         bs_write( s, s->i_left&7, 1 << ( (s->i_left&7) - 1 ) );
 187 }
 188
 189 /* golomb functions */
 190
 191 static const uint8_t x264_ue_size_tab[256] =
 192 {
 193      1, 1, 3, 3, 5, 5, 5, 5, 7, 7, 7, 7, 7, 7, 7, 7,
 194      9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
 195     11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,
 196     11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,
 197     13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,
 198     13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,
 199     13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,
 200     13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,
 201     15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,
 202     15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,
 203     15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,
 204     15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,
 205     15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,
 206     15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,
 207     15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,
 208     15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,
 209 };
 210
 211 static inline void bs_write_ue_big( bs_t *s, unsigned int val )
 212 {
 213     int size = 0;
 214     int tmp = ++val;
 215     if( tmp >= 0x10000 )
 216     {
 217         size = 32;
 218         tmp >>= 16;
 219     }
 220     if( tmp >= 0x100 )
 221     {
 222         size += 16;
 223         tmp >>= 8;
 224     }
 225     size += x264_ue_size_tab[tmp];
 226     bs_write( s, size>>1, 0 );
 227     bs_write( s, (size>>1)+1, val );
 228 }
 229
 230 /* Only works on values under 255. */
 231 static inline void bs_write_ue( bs_t *s, int val )
 232 {
 233     bs_write( s, x264_ue_size_tab[val+1], val+1 );
 234 }
 235
 236 static inline void bs_write_se( bs_t *s, int val )
 237 {
 238     int size = 0;
 239     /* Faster than (val <= 0 ? -val*2+1 : val*2) */
 240     /* 4 instructions on x86, 3 on ARM */
 241     int tmp = 1 - val*2;
 242     if( tmp < 0 ) tmp = val*2;
 243     val = tmp;
 244
 245     if( tmp >= 0x100 )
 246     {
 247         size = 16;
 248         tmp >>= 8;
 249     }
 250     size += x264_ue_size_tab[tmp];
 251     bs_write( s, size, val );
 252 }
 253
 254 static inline void bs_write_te( bs_t *s, int x, int val )
 255 {
 256     if( x == 1 )
 257         bs_write1( s, 1^val );
 258     else //if( x > 1 )
 259         bs_write_ue( s, val );
 260 }
 261
 262 static inline void bs_rbsp_trailing( bs_t *s )
 263 {
 264     bs_write1( s, 1 );
 265     bs_write( s, s->i_left&7, 0  );
 266 }
 267
 268 static ALWAYS_INLINE int bs_size_ue( unsigned int val )
 269 {
 270     return x264_ue_size_tab[val+1];
 271 }
 272
 273 static ALWAYS_INLINE int bs_size_ue_big( unsigned int val )
 274 {
 275     if( val < 255 )
 276         return x264_ue_size_tab[val+1];
 277     else
 278         return x264_ue_size_tab[(val+1)>>8] + 16;
 279 }
 280
 281 static ALWAYS_INLINE int bs_size_se( int val )
 282 {
 283     int tmp = 1 - val*2;
 284     if( tmp < 0 ) tmp = val*2;
 285     if( tmp < 256 )
 286         return x264_ue_size_tab[tmp];
 287     else
 288         return x264_ue_size_tab[tmp>>8]+16;
 289 }
 290
 291 static ALWAYS_INLINE int bs_size_te( int x, int val )
 292 {
 293     if( x == 1 )
 294         return 1;
 295     else //if( x > 1 )
 296         return x264_ue_size_tab[val+1];
 297 }
 298
 299 #endif