git.sesse.net Git - x264/blob - common/frame.c

   1 /*****************************************************************************
   2  * frame.c: h264 encoder library
   3  *****************************************************************************
   4  * Copyright (C) 2003 Laurent Aimar
   5  * $Id: frame.c,v 1.1 2004/06/03 19:27:06 fenrir Exp $
   6  *
   7  * Authors: Laurent Aimar <fenrir@via.ecp.fr>
   8  *
   9  * This program is free software; you can redistribute it and/or modify
  10  * it under the terms of the GNU General Public License as published by
  11  * the Free Software Foundation; either version 2 of the License, or
  12  * (at your option) any later version.
  13  *
  14  * This program is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  17  * GNU General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU General Public License
  20  * along with this program; if not, write to the Free Software
  21  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111, USA.
  22  *****************************************************************************/
  23
  24 #include "common.h"
  25
  26 #define PADH 32
  27 #define PADV 32
  28
  29 x264_frame_t *x264_frame_new( x264_t *h )
  30 {
  31     x264_frame_t *frame = x264_malloc( sizeof(x264_frame_t) );
  32     int i, j;
  33
  34     int i_mb_count = h->mb.i_mb_count;
  35     int i_stride;
  36     int i_lines;
  37     int i_padv = PADV << h->param.b_interlaced;
  38
  39     if( !frame ) return NULL;
  40
  41     memset( frame, 0, sizeof(x264_frame_t) );
  42
  43     /* allocate frame data (+64 for extra data for me) */
  44     i_stride = ( ( h->param.i_width  + 15 ) & -16 )+ 2*PADH;
  45     i_lines  = ( ( h->param.i_height + 15 ) & -16 );
  46     if( h->param.b_interlaced )
  47         i_lines = ( i_lines + 31 ) & -32;
  48
  49     frame->i_plane = 3;
  50     for( i = 0; i < 3; i++ )
  51     {
  52         int i_divh = 1;
  53         int i_divw = 1;
  54         if( i > 0 )
  55         {
  56             if( h->param.i_csp == X264_CSP_I420 )
  57                 i_divh = i_divw = 2;
  58             else if( h->param.i_csp == X264_CSP_I422 )
  59                 i_divw = 2;
  60         }
  61         frame->i_stride[i] = i_stride / i_divw;
  62         frame->i_lines[i] = i_lines / i_divh;
  63         CHECKED_MALLOC( frame->buffer[i],
  64                         frame->i_stride[i] * ( frame->i_lines[i] + 2*i_padv / i_divh ) );
  65
  66         frame->plane[i] = ((uint8_t*)frame->buffer[i]) +
  67                           frame->i_stride[i] * i_padv / i_divh + PADH / i_divw;
  68     }
  69     frame->i_stride[3] = 0;
  70     frame->i_lines[3] = 0;
  71     frame->buffer[3] = NULL;
  72     frame->plane[3] = NULL;
  73
  74     frame->filtered[0] = frame->plane[0];
  75     for( i = 0; i < 3; i++ )
  76     {
  77         CHECKED_MALLOC( frame->buffer[4+i],
  78                         frame->i_stride[0] * ( frame->i_lines[0] + 2*i_padv ) );
  79         frame->filtered[i+1] = ((uint8_t*)frame->buffer[4+i]) +
  80                                 frame->i_stride[0] * i_padv + PADH;
  81     }
  82
  83     if( h->frames.b_have_lowres )
  84     {
  85         frame->i_stride_lowres = frame->i_stride[0]/2 + PADH;
  86         frame->i_lines_lowres = frame->i_lines[0]/2;
  87         for( i = 0; i < 4; i++ )
  88         {
  89             CHECKED_MALLOC( frame->buffer_lowres[i],
  90                             frame->i_stride_lowres * ( frame->i_lines[0]/2 + 2*i_padv ) );
  91             frame->lowres[i] = ((uint8_t*)frame->buffer_lowres[i]) +
  92                                 frame->i_stride_lowres * i_padv + PADH;
  93         }
  94     }
  95
  96     if( h->param.analyse.i_me_method == X264_ME_ESA )
  97     {
  98         CHECKED_MALLOC( frame->buffer[7],
  99                         2 * frame->i_stride[0] * (frame->i_lines[0] + 2*i_padv) * sizeof(uint16_t) );
 100         frame->integral = (uint16_t*)frame->buffer[7] + frame->i_stride[0] * i_padv + PADH;
 101     }
 102
 103     frame->i_poc = -1;
 104     frame->i_type = X264_TYPE_AUTO;
 105     frame->i_qpplus1 = 0;
 106     frame->i_pts = -1;
 107     frame->i_frame = -1;
 108     frame->i_frame_num = -1;
 109     frame->i_lines_completed = -1;
 110
 111     CHECKED_MALLOC( frame->mb_type, i_mb_count * sizeof(int8_t));
 112     CHECKED_MALLOC( frame->mv[0], 2*16 * i_mb_count * sizeof(int16_t) );
 113     CHECKED_MALLOC( frame->ref[0], 4 * i_mb_count * sizeof(int8_t) );
 114     if( h->param.i_bframe )
 115     {
 116         CHECKED_MALLOC( frame->mv[1], 2*16 * i_mb_count * sizeof(int16_t) );
 117         CHECKED_MALLOC( frame->ref[1], 4 * i_mb_count * sizeof(int8_t) );
 118     }
 119     else
 120     {
 121         frame->mv[1]  = NULL;
 122         frame->ref[1] = NULL;
 123     }
 124
 125     CHECKED_MALLOC( frame->i_row_bits, i_lines/16 * sizeof(int) );
 126     CHECKED_MALLOC( frame->i_row_qp, i_lines/16 * sizeof(int) );
 127     for( i = 0; i < h->param.i_bframe + 2; i++ )
 128         for( j = 0; j < h->param.i_bframe + 2; j++ )
 129             CHECKED_MALLOC( frame->i_row_satds[i][j], i_lines/16 * sizeof(int) );
 130
 131     pthread_mutex_init( &frame->mutex, NULL );
 132     pthread_cond_init( &frame->cv, NULL );
 133
 134     return frame;
 135
 136 fail:
 137     x264_frame_delete( frame );
 138     return NULL;
 139 }
 140
 141 void x264_frame_delete( x264_frame_t *frame )
 142 {
 143     int i, j;
 144     for( i = 0; i < 8; i++ )
 145         x264_free( frame->buffer[i] );
 146     for( i = 0; i < 4; i++ )
 147         x264_free( frame->buffer_lowres[i] );
 148     for( i = 0; i < X264_BFRAME_MAX+2; i++ )
 149         for( j = 0; j < X264_BFRAME_MAX+2; j++ )
 150             x264_free( frame->i_row_satds[i][j] );
 151     x264_free( frame->i_row_bits );
 152     x264_free( frame->i_row_qp );
 153     x264_free( frame->mb_type );
 154     x264_free( frame->mv[0] );
 155     x264_free( frame->mv[1] );
 156     x264_free( frame->ref[0] );
 157     x264_free( frame->ref[1] );
 158     pthread_mutex_destroy( &frame->mutex );
 159     pthread_cond_destroy( &frame->cv );
 160     x264_free( frame );
 161 }
 162
 163 void x264_frame_copy_picture( x264_t *h, x264_frame_t *dst, x264_picture_t *src )
 164 {
 165     int i_csp = src->img.i_csp & X264_CSP_MASK;
 166     dst->i_type     = src->i_type;
 167     dst->i_qpplus1  = src->i_qpplus1;
 168     dst->i_pts      = src->i_pts;
 169
 170     if( i_csp <= X264_CSP_NONE  || i_csp >= X264_CSP_MAX )
 171         x264_log( h, X264_LOG_ERROR, "Arg invalid CSP\n" );
 172     else
 173         h->csp.convert[i_csp]( &h->mc, dst, &src->img, h->param.i_width, h->param.i_height );
 174 }
 175
 176
 177
 178 static void plane_expand_border( uint8_t *pix, int i_stride, int i_width, int i_height, int i_padh, int i_padv, int b_pad_top, int b_pad_bottom )
 179 {
 180 #define PPIXEL(x, y) ( pix + (x) + (y)*i_stride )
 181     int y;
 182     for( y = 0; y < i_height; y++ )
 183     {
 184         /* left band */
 185         memset( PPIXEL(-i_padh, y), PPIXEL(0, y)[0], i_padh );
 186         /* right band */
 187         memset( PPIXEL(i_width, y), PPIXEL(i_width-1, y)[0], i_padh );
 188     }
 189     /* upper band */
 190     if( b_pad_top )
 191     for( y = 0; y < i_padv; y++ )
 192         memcpy( PPIXEL(-i_padh, -y-1), PPIXEL(-i_padh, 0), i_width+2*i_padh );
 193     /* lower band */
 194     if( b_pad_bottom )
 195     for( y = 0; y < i_padv; y++ )
 196         memcpy( PPIXEL(-i_padh, i_height+y), PPIXEL(-i_padh, i_height-1), i_width+2*i_padh );
 197 #undef PPIXEL
 198 }
 199
 200 void x264_frame_expand_border( x264_t *h, x264_frame_t *frame, int mb_y, int b_end )
 201 {
 202     int i;
 203     int b_start = !mb_y;
 204     if( mb_y & h->sh.b_mbaff )
 205         return;
 206     for( i = 0; i < frame->i_plane; i++ )
 207     {
 208         int stride = frame->i_stride[i];
 209         int width = 16*h->sps->i_mb_width >> !!i;
 210         int height = (b_end ? 16*(h->sps->i_mb_height - mb_y) >> h->sh.b_mbaff : 16) >> !!i;
 211         int padh = PADH >> !!i;
 212         int padv = PADV >> !!i;
 213         // buffer: 2 chroma, 3 luma (rounded to 4) because deblocking goes beyond the top of the mb
 214         uint8_t *pix = frame->plane[i] + X264_MAX(0, (16*mb_y-4)*stride >> !!i);
 215         if( b_end && !b_start )
 216             height += 4 >> (!!i + h->sh.b_mbaff);
 217         if( h->sh.b_mbaff )
 218         {
 219             plane_expand_border( pix, stride*2, width, height, padh, padv, b_start, b_end );
 220             plane_expand_border( pix+stride, stride*2, width, height, padh, padv, b_start, b_end );
 221         }
 222         else
 223         {
 224             plane_expand_border( pix, stride, width, height, padh, padv, b_start, b_end );
 225         }
 226     }
 227 }
 228
 229 void x264_frame_expand_border_filtered( x264_t *h, x264_frame_t *frame, int mb_y, int b_end )
 230 {
 231     /* during filtering, 8 extra pixels were filtered on each edge.
 232        we want to expand border from the last filtered pixel */
 233     int b_start = !mb_y;
 234     int stride = frame->i_stride[0];
 235     int width = 16*h->sps->i_mb_width + 16;
 236     int height = b_end ? (16*(h->sps->i_mb_height - mb_y) >> h->sh.b_mbaff) + 16 : 16;
 237     int padh = PADH - 8;
 238     int padv = PADV - 8;
 239     int i;
 240     for( i = 1; i < 4; i++ )
 241     {
 242         // buffer: 8 luma, to match the hpel filter
 243         uint8_t *pix = frame->filtered[i] + (16*mb_y - (8 << h->sh.b_mbaff)) * stride - 8;
 244         if( h->sh.b_mbaff )
 245         {
 246             plane_expand_border( pix, stride*2, width, height, padh, padv, b_start, b_end );
 247             plane_expand_border( pix+stride, stride*2, width, height, padh, padv, b_start, b_end );
 248         }
 249         else
 250         {
 251             plane_expand_border( pix, stride, width, height, padh, padv, b_start, b_end );
 252         }
 253     }
 254 }
 255
 256 void x264_frame_expand_border_lowres( x264_frame_t *frame )
 257 {
 258     int i;
 259     for( i = 0; i < 4; i++ )
 260         plane_expand_border( frame->lowres[i], frame->i_stride_lowres, frame->i_stride_lowres - 2*PADH, frame->i_lines_lowres, PADH, PADV, 1, 1 );
 261 }
 262
 263 void x264_frame_expand_border_mod16( x264_t *h, x264_frame_t *frame )
 264 {
 265     int i, y;
 266     for( i = 0; i < frame->i_plane; i++ )
 267     {
 268         int i_subsample = i ? 1 : 0;
 269         int i_width = h->param.i_width >> i_subsample;
 270         int i_height = h->param.i_height >> i_subsample;
 271         int i_padx = ( h->sps->i_mb_width * 16 - h->param.i_width ) >> i_subsample;
 272         int i_pady = ( h->sps->i_mb_height * 16 - h->param.i_height ) >> i_subsample;
 273
 274         if( i_padx )
 275         {
 276             for( y = 0; y < i_height; y++ )
 277                 memset( &frame->plane[i][y*frame->i_stride[i] + i_width],
 278                          frame->plane[i][y*frame->i_stride[i] + i_width - 1],
 279                          i_padx );
 280         }
 281         if( i_pady )
 282         {
 283             //FIXME interlace? or just let it pad using the wrong field
 284             for( y = i_height; y < i_height + i_pady; y++ )
 285                 memcpy( &frame->plane[i][y*frame->i_stride[i]],
 286                         &frame->plane[i][(i_height-1)*frame->i_stride[i]],
 287                         i_width + i_padx );
 288         }
 289     }
 290 }
 291
 292
 293 /* cavlc + 8x8 transform stores nnz per 16 coeffs for the purpose of
 294  * entropy coding, but per 64 coeffs for the purpose of deblocking */
 295 void munge_cavlc_nnz_row( x264_t *h, int mb_y, uint8_t (*buf)[16] )
 296 {
 297     uint32_t (*src)[6] = (uint32_t(*)[6])h->mb.non_zero_count + mb_y * h->sps->i_mb_width;
 298     int8_t *transform = h->mb.mb_transform_size + mb_y * h->sps->i_mb_width;
 299     int x;
 300     for( x=0; x<h->sps->i_mb_width; x++ )
 301     {
 302         memcpy( buf+x, src+x, 16 );
 303         if( transform[x] )
 304         {
 305             if( src[x][0] ) src[x][0] = 0x01010101;
 306             if( src[x][1] ) src[x][1] = 0x01010101;
 307             if( src[x][2] ) src[x][2] = 0x01010101;
 308             if( src[x][3] ) src[x][3] = 0x01010101;
 309         }
 310     }
 311 }
 312
 313 static void restore_cavlc_nnz_row( x264_t *h, int mb_y, uint8_t (*buf)[16] )
 314 {
 315     uint8_t (*dst)[24] = h->mb.non_zero_count + mb_y * h->sps->i_mb_width;
 316     int x;
 317     for( x=0; x<h->sps->i_mb_width; x++ )
 318         memcpy( dst+x, buf+x, 16 );
 319 }
 320
 321 static void munge_cavlc_nnz( x264_t *h, int mb_y, uint8_t (*buf)[16], void (*func)(x264_t*, int, uint8_t (*)[16]) )
 322 {
 323     func( h, mb_y, buf );
 324     if( mb_y > 0 )
 325         func( h, mb_y-1, buf + h->sps->i_mb_width );
 326     if( h->sh.b_mbaff )
 327     {
 328         func( h, mb_y+1, buf + h->sps->i_mb_width * 2 );
 329         if( mb_y > 0 )
 330             func( h, mb_y-2, buf + h->sps->i_mb_width * 3 );
 331     }
 332 }
 333
 334
 335 /* Deblocking filter */
 336
 337 static const int i_alpha_table[52] =
 338 {
 339      0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
 340      0,  0,  0,  0,  0,  0,  4,  4,  5,  6,
 341      7,  8,  9, 10, 12, 13, 15, 17, 20, 22,
 342     25, 28, 32, 36, 40, 45, 50, 56, 63, 71,
 343     80, 90,101,113,127,144,162,182,203,226,
 344     255, 255
 345 };
 346 static const int i_beta_table[52] =
 347 {
 348      0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
 349      0,  0,  0,  0,  0,  0,  2,  2,  2,  3,
 350      3,  3,  3,  4,  4,  4,  6,  6,  7,  7,
 351      8,  8,  9,  9, 10, 10, 11, 11, 12, 12,
 352     13, 13, 14, 14, 15, 15, 16, 16, 17, 17,
 353     18, 18
 354 };
 355 static const int i_tc0_table[52][3] =
 356 {
 357     { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 },
 358     { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 },
 359     { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 1 },
 360     { 0, 0, 1 }, { 0, 0, 1 }, { 0, 0, 1 }, { 0, 1, 1 }, { 0, 1, 1 }, { 1, 1, 1 },
 361     { 1, 1, 1 }, { 1, 1, 1 }, { 1, 1, 1 }, { 1, 1, 2 }, { 1, 1, 2 }, { 1, 1, 2 },
 362     { 1, 1, 2 }, { 1, 2, 3 }, { 1, 2, 3 }, { 2, 2, 3 }, { 2, 2, 4 }, { 2, 3, 4 },
 363     { 2, 3, 4 }, { 3, 3, 5 }, { 3, 4, 6 }, { 3, 4, 6 }, { 4, 5, 7 }, { 4, 5, 8 },
 364     { 4, 6, 9 }, { 5, 7,10 }, { 6, 8,11 }, { 6, 8,13 }, { 7,10,14 }, { 8,11,16 },
 365     { 9,12,18 }, {10,13,20 }, {11,15,23 }, {13,17,25 }
 366 };
 367
 368 /* From ffmpeg */
 369 static inline int clip_uint8( int a )
 370 {
 371     if (a&(~255))
 372         return (-a)>>31;
 373     else
 374         return a;
 375 }
 376
 377 static inline void deblock_luma_c( uint8_t *pix, int xstride, int ystride, int alpha, int beta, int8_t *tc0 )
 378 {
 379     int i, d;
 380     for( i = 0; i < 4; i++ ) {
 381         if( tc0[i] < 0 ) {
 382             pix += 4*ystride;
 383             continue;
 384         }
 385         for( d = 0; d < 4; d++ ) {
 386             const int p2 = pix[-3*xstride];
 387             const int p1 = pix[-2*xstride];
 388             const int p0 = pix[-1*xstride];
 389             const int q0 = pix[ 0*xstride];
 390             const int q1 = pix[ 1*xstride];
 391             const int q2 = pix[ 2*xstride];
 392
 393             if( abs( p0 - q0 ) < alpha &&
 394                 abs( p1 - p0 ) < beta &&
 395                 abs( q1 - q0 ) < beta ) {
 396
 397                 int tc = tc0[i];
 398                 int delta;
 399
 400                 if( abs( p2 - p0 ) < beta ) {
 401                     pix[-2*xstride] = p1 + x264_clip3( (( p2 + ((p0 + q0 + 1) >> 1)) >> 1) - p1, -tc0[i], tc0[i] );
 402                     tc++;
 403                 }
 404                 if( abs( q2 - q0 ) < beta ) {
 405                     pix[ 1*xstride] = q1 + x264_clip3( (( q2 + ((p0 + q0 + 1) >> 1)) >> 1) - q1, -tc0[i], tc0[i] );
 406                     tc++;
 407                 }
 408
 409                 delta = x264_clip3( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
 410                 pix[-1*xstride] = clip_uint8( p0 + delta );    /* p0' */
 411                 pix[ 0*xstride] = clip_uint8( q0 - delta );    /* q0' */
 412             }
 413             pix += ystride;
 414         }
 415     }
 416 }
 417 static void deblock_v_luma_c( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 )
 418 {
 419     deblock_luma_c( pix, stride, 1, alpha, beta, tc0 );
 420 }
 421 static void deblock_h_luma_c( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 )
 422 {
 423     deblock_luma_c( pix, 1, stride, alpha, beta, tc0 );
 424 }
 425
 426 static inline void deblock_chroma_c( uint8_t *pix, int xstride, int ystride, int alpha, int beta, int8_t *tc0 )
 427 {
 428     int i, d;
 429     for( i = 0; i < 4; i++ ) {
 430         const int tc = tc0[i];
 431         if( tc <= 0 ) {
 432             pix += 2*ystride;
 433             continue;
 434         }
 435         for( d = 0; d < 2; d++ ) {
 436             const int p1 = pix[-2*xstride];
 437             const int p0 = pix[-1*xstride];
 438             const int q0 = pix[ 0*xstride];
 439             const int q1 = pix[ 1*xstride];
 440
 441             if( abs( p0 - q0 ) < alpha &&
 442                 abs( p1 - p0 ) < beta &&
 443                 abs( q1 - q0 ) < beta ) {
 444
 445                 int delta = x264_clip3( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
 446                 pix[-1*xstride] = clip_uint8( p0 + delta );    /* p0' */
 447                 pix[ 0*xstride] = clip_uint8( q0 - delta );    /* q0' */
 448             }
 449             pix += ystride;
 450         }
 451     }
 452 }
 453 static void deblock_v_chroma_c( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 )
 454 {
 455     deblock_chroma_c( pix, stride, 1, alpha, beta, tc0 );
 456 }
 457 static void deblock_h_chroma_c( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 )
 458 {
 459     deblock_chroma_c( pix, 1, stride, alpha, beta, tc0 );
 460 }
 461
 462 static inline void deblock_luma_intra_c( uint8_t *pix, int xstride, int ystride, int alpha, int beta )
 463 {
 464     int d;
 465     for( d = 0; d < 16; d++ ) {
 466         const int p2 = pix[-3*xstride];
 467         const int p1 = pix[-2*xstride];
 468         const int p0 = pix[-1*xstride];
 469         const int q0 = pix[ 0*xstride];
 470         const int q1 = pix[ 1*xstride];
 471         const int q2 = pix[ 2*xstride];
 472
 473         if( abs( p0 - q0 ) < alpha &&
 474             abs( p1 - p0 ) < beta &&
 475             abs( q1 - q0 ) < beta ) {
 476
 477             if(abs( p0 - q0 ) < ((alpha >> 2) + 2) ){
 478                 if( abs( p2 - p0 ) < beta)
 479                 {
 480                     const int p3 = pix[-4*xstride];
 481                     /* p0', p1', p2' */
 482                     pix[-1*xstride] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
 483                     pix[-2*xstride] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
 484                     pix[-3*xstride] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
 485                 } else {
 486                     /* p0' */
 487                     pix[-1*xstride] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
 488                 }
 489                 if( abs( q2 - q0 ) < beta)
 490                 {
 491                     const int q3 = pix[3*xstride];
 492                     /* q0', q1', q2' */
 493                     pix[0*xstride] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
 494                     pix[1*xstride] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
 495                     pix[2*xstride] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
 496                 } else {
 497                     /* q0' */
 498                     pix[0*xstride] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
 499                 }
 500             }else{
 501                 /* p0', q0' */
 502                 pix[-1*xstride] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
 503                 pix[ 0*xstride] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
 504             }
 505         }
 506         pix += ystride;
 507     }
 508 }
 509 static void deblock_v_luma_intra_c( uint8_t *pix, int stride, int alpha, int beta )
 510 {
 511     deblock_luma_intra_c( pix, stride, 1, alpha, beta );
 512 }
 513 static void deblock_h_luma_intra_c( uint8_t *pix, int stride, int alpha, int beta )
 514 {
 515     deblock_luma_intra_c( pix, 1, stride, alpha, beta );
 516 }
 517
 518 static inline void deblock_chroma_intra_c( uint8_t *pix, int xstride, int ystride, int alpha, int beta )
 519 {
 520     int d;
 521     for( d = 0; d < 8; d++ ) {
 522         const int p1 = pix[-2*xstride];
 523         const int p0 = pix[-1*xstride];
 524         const int q0 = pix[ 0*xstride];
 525         const int q1 = pix[ 1*xstride];
 526
 527         if( abs( p0 - q0 ) < alpha &&
 528             abs( p1 - p0 ) < beta &&
 529             abs( q1 - q0 ) < beta ) {
 530
 531             pix[-1*xstride] = (2*p1 + p0 + q1 + 2) >> 2;   /* p0' */
 532             pix[ 0*xstride] = (2*q1 + q0 + p1 + 2) >> 2;   /* q0' */
 533         }
 534
 535         pix += ystride;
 536     }
 537 }
 538 static void deblock_v_chroma_intra_c( uint8_t *pix, int stride, int alpha, int beta )
 539 {
 540     deblock_chroma_intra_c( pix, stride, 1, alpha, beta );
 541 }
 542 static void deblock_h_chroma_intra_c( uint8_t *pix, int stride, int alpha, int beta )
 543 {
 544     deblock_chroma_intra_c( pix, 1, stride, alpha, beta );
 545 }
 546
 547 static inline void deblock_edge( x264_t *h, uint8_t *pix, int i_stride, int bS[4], int i_qp, int b_chroma,
 548                                  x264_deblock_inter_t pf_inter, x264_deblock_intra_t pf_intra )
 549 {
 550     int i;
 551     const int index_a = x264_clip3( i_qp + h->sh.i_alpha_c0_offset, 0, 51 );
 552     const int alpha = i_alpha_table[index_a];
 553     const int beta  = i_beta_table[x264_clip3( i_qp + h->sh.i_beta_offset, 0, 51 )];
 554
 555     if( bS[0] < 4 ) {
 556         int8_t tc[4];
 557         for(i=0; i<4; i++)
 558             tc[i] = (bS[i] ? i_tc0_table[index_a][bS[i] - 1] : -1) + b_chroma;
 559         pf_inter( pix, i_stride, alpha, beta, tc );
 560     } else {
 561         pf_intra( pix, i_stride, alpha, beta );
 562     }
 563 }
 564
 565 void x264_frame_deblock_row( x264_t *h, int mb_y )
 566 {
 567     const int s8x8 = 2 * h->mb.i_mb_stride;
 568     const int s4x4 = 4 * h->mb.i_mb_stride;
 569     const int b_interlaced = h->sh.b_mbaff;
 570     const int mvy_limit = 4 >> b_interlaced;
 571     int mb_x;
 572
 573     int i_stride2[3] = { h->fdec->i_stride[0] << b_interlaced,
 574                          h->fdec->i_stride[1] << b_interlaced,
 575                          h->fdec->i_stride[2] << b_interlaced };
 576
 577     if( !h->pps->b_cabac && h->pps->b_transform_8x8_mode )
 578         munge_cavlc_nnz( h, mb_y, h->mb.nnz_backup, munge_cavlc_nnz_row );
 579
 580     for( mb_x = 0; mb_x < h->sps->i_mb_width; )
 581     {
 582         const int mb_xy  = mb_y * h->mb.i_mb_stride + mb_x;
 583         const int mb_8x8 = 2 * s8x8 * mb_y + 2 * mb_x;
 584         const int mb_4x4 = 4 * s4x4 * mb_y + 4 * mb_x;
 585         const int b_8x8_transform = h->mb.mb_transform_size[mb_xy];
 586         const int i_edge_end = (h->mb.type[mb_xy] == P_SKIP) ? 1 : 4;
 587         int i_edge, i_dir;
 588
 589         int i_pix_y[3] = { 16*mb_y*h->fdec->i_stride[0] + 16*mb_x,
 590                             8*mb_y*h->fdec->i_stride[1] +  8*mb_x,
 591                             8*mb_y*h->fdec->i_stride[2] +  8*mb_x };
 592         if( b_interlaced && (mb_y&1) )
 593         {
 594             i_pix_y[0] -= 15*h->fdec->i_stride[0];
 595             i_pix_y[1] -=  7*h->fdec->i_stride[1];
 596             i_pix_y[2] -=  7*h->fdec->i_stride[2];
 597         }
 598
 599         x264_prefetch_fenc( h, h->fdec, mb_x, mb_y );
 600
 601         /* i_dir == 0 -> vertical edge
 602          * i_dir == 1 -> horizontal edge */
 603         for( i_dir = 0; i_dir < 2; i_dir++ )
 604         {
 605             int i_start = (i_dir ? (mb_y <= b_interlaced) : (mb_x == 0));
 606             int i_qp, i_qpn;
 607
 608             for( i_edge = i_start; i_edge < i_edge_end; i_edge++ )
 609             {
 610                 int mbn_xy, mbn_8x8, mbn_4x4;
 611                 int bS[4];  /* filtering strength */
 612
 613                 if( b_8x8_transform && (i_edge&1) )
 614                     continue;
 615
 616                 mbn_xy  = i_edge > 0 ? mb_xy  : ( i_dir == 0 ? mb_xy  - 1 : mb_xy - h->mb.i_mb_stride );
 617                 mbn_8x8 = i_edge > 0 ? mb_8x8 : ( i_dir == 0 ? mb_8x8 - 2 : mb_8x8 - 2 * s8x8 );
 618                 mbn_4x4 = i_edge > 0 ? mb_4x4 : ( i_dir == 0 ? mb_4x4 - 4 : mb_4x4 - 4 * s4x4 );
 619
 620                 if( b_interlaced && i_edge == 0 && i_dir == 1 )
 621                 {
 622                     mbn_xy -= h->mb.i_mb_stride;
 623                     mbn_8x8 -= 2 * s8x8;
 624                     mbn_4x4 -= 4 * s4x4;
 625                 }
 626
 627                 /* *** Get bS for each 4px for the current edge *** */
 628                 if( IS_INTRA( h->mb.type[mb_xy] ) || IS_INTRA( h->mb.type[mbn_xy] ) )
 629                 {
 630                     bS[0] = bS[1] = bS[2] = bS[3] = ( i_edge == 0 && !(b_interlaced && i_dir) ? 4 : 3 );
 631                 }
 632                 else
 633                 {
 634                     int i;
 635                     for( i = 0; i < 4; i++ )
 636                     {
 637                         int x  = i_dir == 0 ? i_edge : i;
 638                         int y  = i_dir == 0 ? i      : i_edge;
 639                         int xn = (x - (i_dir == 0 ? 1 : 0 ))&0x03;
 640                         int yn = (y - (i_dir == 0 ? 0 : 1 ))&0x03;
 641
 642                         if( h->mb.non_zero_count[mb_xy][block_idx_xy[x][y]] != 0 ||
 643                             h->mb.non_zero_count[mbn_xy][block_idx_xy[xn][yn]] != 0 )
 644                         {
 645                             bS[i] = 2;
 646                         }
 647                         else
 648                         {
 649                             /* FIXME: A given frame may occupy more than one position in
 650                              * the reference list. So we should compare the frame numbers,
 651                              * not the indices in the ref list.
 652                              * No harm yet, as we don't generate that case.*/
 653
 654                             int i8p= mb_8x8+(x/2)+(y/2)*s8x8;
 655                             int i8q= mbn_8x8+(xn/2)+(yn/2)*s8x8;
 656                             int i4p= mb_4x4+x+y*s4x4;
 657                             int i4q= mbn_4x4+xn+yn*s4x4;
 658                             int l;
 659
 660                             bS[i] = 0;
 661
 662                             for( l = 0; l < 1 + (h->sh.i_type == SLICE_TYPE_B); l++ )
 663                             {
 664                                 if( h->mb.ref[l][i8p] != h->mb.ref[l][i8q] ||
 665                                     abs( h->mb.mv[l][i4p][0] - h->mb.mv[l][i4q][0] ) >= 4 ||
 666                                     abs( h->mb.mv[l][i4p][1] - h->mb.mv[l][i4q][1] ) >= mvy_limit )
 667                                 {
 668                                     bS[i] = 1;
 669                                     break;
 670                                 }
 671                             }
 672                         }
 673                     }
 674                 }
 675
 676                 /* *** filter *** */
 677                 /* Y plane */
 678                 i_qp = h->mb.qp[mb_xy];
 679                 i_qpn= h->mb.qp[mbn_xy];
 680
 681                 if( i_dir == 0 )
 682                 {
 683                     /* vertical edge */
 684                     deblock_edge( h, &h->fdec->plane[0][i_pix_y[0] + 4*i_edge],
 685                                   i_stride2[0], bS, (i_qp+i_qpn+1) >> 1, 0,
 686                                   h->loopf.deblock_h_luma, h->loopf.deblock_h_luma_intra );
 687                     if( !(i_edge & 1) )
 688                     {
 689                         /* U/V planes */
 690                         int i_qpc = ( i_chroma_qp_table[x264_clip3( i_qp + h->pps->i_chroma_qp_index_offset, 0, 51 )] +
 691                                       i_chroma_qp_table[x264_clip3( i_qpn + h->pps->i_chroma_qp_index_offset, 0, 51 )] + 1 ) >> 1;
 692                         deblock_edge( h, &h->fdec->plane[1][i_pix_y[1] + 2*i_edge],
 693                                       i_stride2[1], bS, i_qpc, 1,
 694                                       h->loopf.deblock_h_chroma, h->loopf.deblock_h_chroma_intra );
 695                         deblock_edge( h, &h->fdec->plane[2][i_pix_y[2] + 2*i_edge],
 696                                       i_stride2[2], bS, i_qpc, 1,
 697                                       h->loopf.deblock_h_chroma, h->loopf.deblock_h_chroma_intra );
 698                     }
 699                 }
 700                 else
 701                 {
 702                     /* horizontal edge */
 703                     deblock_edge( h, &h->fdec->plane[0][i_pix_y[0] + 4*i_edge*i_stride2[0]],
 704                                   i_stride2[0], bS, (i_qp+i_qpn+1) >> 1, 0,
 705                                   h->loopf.deblock_v_luma, h->loopf.deblock_v_luma_intra );
 706                     /* U/V planes */
 707                     if( !(i_edge & 1) )
 708                     {
 709                         int i_qpc = ( i_chroma_qp_table[x264_clip3( i_qp + h->pps->i_chroma_qp_index_offset, 0, 51 )] +
 710                                       i_chroma_qp_table[x264_clip3( i_qpn + h->pps->i_chroma_qp_index_offset, 0, 51 )] + 1 ) >> 1;
 711                         deblock_edge( h, &h->fdec->plane[1][i_pix_y[1] + 2*i_edge*i_stride2[1]],
 712                                       i_stride2[1], bS, i_qpc, 1,
 713                                       h->loopf.deblock_v_chroma, h->loopf.deblock_v_chroma_intra );
 714                         deblock_edge( h, &h->fdec->plane[2][i_pix_y[2] + 2*i_edge*i_stride2[2]],
 715                                       i_stride2[2], bS, i_qpc, 1,
 716                                       h->loopf.deblock_v_chroma, h->loopf.deblock_v_chroma_intra );
 717                     }
 718                 }
 719             }
 720         }
 721
 722         /* next mb */
 723         if( !b_interlaced || (mb_y&1) )
 724             mb_x++;
 725         mb_y ^= b_interlaced;
 726     }
 727
 728     if( !h->pps->b_cabac && h->pps->b_transform_8x8_mode )
 729         munge_cavlc_nnz( h, mb_y, h->mb.nnz_backup, restore_cavlc_nnz_row );
 730 }
 731
 732 void x264_frame_deblock( x264_t *h )
 733 {
 734     int mb_y;
 735     for( mb_y = 0; mb_y < h->sps->i_mb_height; mb_y += 1 + h->sh.b_mbaff )
 736         x264_frame_deblock_row( h, mb_y );
 737 }
 738
 739 #ifdef HAVE_MMX
 740 void x264_deblock_v_chroma_mmxext( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 );
 741 void x264_deblock_h_chroma_mmxext( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 );
 742 void x264_deblock_v_chroma_intra_mmxext( uint8_t *pix, int stride, int alpha, int beta );
 743 void x264_deblock_h_chroma_intra_mmxext( uint8_t *pix, int stride, int alpha, int beta );
 744
 745 #ifdef ARCH_X86_64
 746 void x264_deblock_v_luma_sse2( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 );
 747 void x264_deblock_h_luma_sse2( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 );
 748 #else
 749 void x264_deblock_h_luma_mmxext( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 );
 750 void x264_deblock_v8_luma_mmxext( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 );
 751
 752 void x264_deblock_v_luma_mmxext( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 )
 753 {
 754     x264_deblock_v8_luma_mmxext( pix,   stride, alpha, beta, tc0   );
 755     x264_deblock_v8_luma_mmxext( pix+8, stride, alpha, beta, tc0+2 );
 756 }
 757 #endif
 758 #endif
 759
 760 #ifdef ARCH_PPC
 761 void x264_deblock_v_luma_altivec( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 );
 762 void x264_deblock_h_luma_altivec( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 );
 763 #endif // ARCH_PPC
 764
 765 void x264_deblock_init( int cpu, x264_deblock_function_t *pf )
 766 {
 767     pf->deblock_v_luma = deblock_v_luma_c;
 768     pf->deblock_h_luma = deblock_h_luma_c;
 769     pf->deblock_v_chroma = deblock_v_chroma_c;
 770     pf->deblock_h_chroma = deblock_h_chroma_c;
 771     pf->deblock_v_luma_intra = deblock_v_luma_intra_c;
 772     pf->deblock_h_luma_intra = deblock_h_luma_intra_c;
 773     pf->deblock_v_chroma_intra = deblock_v_chroma_intra_c;
 774     pf->deblock_h_chroma_intra = deblock_h_chroma_intra_c;
 775
 776 #ifdef HAVE_MMX
 777     if( cpu&X264_CPU_MMXEXT )
 778     {
 779         pf->deblock_v_chroma = x264_deblock_v_chroma_mmxext;
 780         pf->deblock_h_chroma = x264_deblock_h_chroma_mmxext;
 781         pf->deblock_v_chroma_intra = x264_deblock_v_chroma_intra_mmxext;
 782         pf->deblock_h_chroma_intra = x264_deblock_h_chroma_intra_mmxext;
 783
 784 #ifdef ARCH_X86_64
 785         if( cpu&X264_CPU_SSE2 )
 786         {
 787             pf->deblock_v_luma = x264_deblock_v_luma_sse2;
 788             pf->deblock_h_luma = x264_deblock_h_luma_sse2;
 789         }
 790 #else
 791         pf->deblock_v_luma = x264_deblock_v_luma_mmxext;
 792         pf->deblock_h_luma = x264_deblock_h_luma_mmxext;
 793 #endif
 794     }
 795 #endif
 796
 797 #ifdef ARCH_PPC
 798     if( cpu&X264_CPU_ALTIVEC )
 799     {
 800         pf->deblock_v_luma = x264_deblock_v_luma_altivec;
 801         pf->deblock_h_luma = x264_deblock_h_luma_altivec;
 802    }
 803 #endif // ARCH_PPC
 804 }
 805
 806
 807 /* threading */
 808
 809 #ifdef HAVE_PTHREAD
 810 void x264_frame_cond_broadcast( x264_frame_t *frame, int i_lines_completed )
 811 {
 812     pthread_mutex_lock( &frame->mutex );
 813     frame->i_lines_completed = i_lines_completed;
 814     pthread_cond_broadcast( &frame->cv );
 815     pthread_mutex_unlock( &frame->mutex );
 816 }
 817
 818 void x264_frame_cond_wait( x264_frame_t *frame, int i_lines_completed )
 819 {
 820     pthread_mutex_lock( &frame->mutex );
 821     while( frame->i_lines_completed < i_lines_completed )
 822         pthread_cond_wait( &frame->cv, &frame->mutex );
 823     pthread_mutex_unlock( &frame->mutex );
 824 }
 825
 826 #else
 827 void x264_frame_cond_broadcast( x264_frame_t *frame, int i_lines_completed )
 828 {}
 829 void x264_frame_cond_wait( x264_frame_t *frame, int i_lines_completed )
 830 {}
 831 #endif
 832
 833
 834 /* list operators */
 835
 836 void x264_frame_push( x264_frame_t **list, x264_frame_t *frame )
 837 {
 838     int i = 0;
 839     while( list[i] ) i++;
 840     list[i] = frame;
 841 }
 842
 843 x264_frame_t *x264_frame_pop( x264_frame_t **list )
 844 {
 845     x264_frame_t *frame;
 846     int i = 0;
 847     assert( list[0] );
 848     while( list[i+1] ) i++;
 849     frame = list[i];
 850     list[i] = NULL;
 851     return frame;
 852 }
 853
 854 void x264_frame_unshift( x264_frame_t **list, x264_frame_t *frame )
 855 {
 856     int i = 0;
 857     while( list[i] ) i++;
 858     while( i-- )
 859         list[i+1] = list[i];
 860     list[0] = frame;
 861 }
 862
 863 x264_frame_t *x264_frame_shift( x264_frame_t **list )
 864 {
 865     x264_frame_t *frame = list[0];
 866     int i;
 867     for( i = 0; list[i]; i++ )
 868         list[i] = list[i+1];
 869     assert(frame);
 870     return frame;
 871 }
 872
 873 void x264_frame_push_unused( x264_t *h, x264_frame_t *frame )
 874 {
 875     assert( frame->i_reference_count > 0 );
 876     frame->i_reference_count--;
 877     if( frame->i_reference_count == 0 )
 878         x264_frame_push( h->frames.unused, frame );
 879     assert( h->frames.unused[ sizeof(h->frames.unused) / sizeof(*h->frames.unused) - 1 ] == NULL );
 880 }
 881
 882 x264_frame_t *x264_frame_pop_unused( x264_t *h )
 883 {
 884     x264_frame_t *frame;
 885     if( h->frames.unused[0] )
 886         frame = x264_frame_pop( h->frames.unused );
 887     else
 888         frame = x264_frame_new( h );
 889     assert( frame->i_reference_count == 0 );
 890     frame->i_reference_count = 1;
 891     return frame;
 892 }
 893
 894 void x264_frame_sort( x264_frame_t **list, int b_dts )
 895 {
 896     int i, b_ok;
 897     do {
 898         b_ok = 1;
 899         for( i = 0; list[i+1]; i++ )
 900         {
 901             int dtype = list[i]->i_type - list[i+1]->i_type;
 902             int dtime = list[i]->i_frame - list[i+1]->i_frame;
 903             int swap = b_dts ? dtype > 0 || ( dtype == 0 && dtime > 0 )
 904                              : dtime > 0;
 905             if( swap )
 906             {
 907                 XCHG( x264_frame_t*, list[i], list[i+1] );
 908                 b_ok = 0;
 909             }
 910         }
 911     } while( !b_ok );
 912 }