git.sesse.net Git - x264/blob - common/frame.c

   1 /*****************************************************************************
   2  * frame.c: h264 encoder library
   3  *****************************************************************************
   4  * Copyright (C) 2003 Laurent Aimar
   5  * $Id: frame.c,v 1.1 2004/06/03 19:27:06 fenrir Exp $
   6  *
   7  * Authors: Laurent Aimar <fenrir@via.ecp.fr>
   8  *
   9  * This program is free software; you can redistribute it and/or modify
  10  * it under the terms of the GNU General Public License as published by
  11  * the Free Software Foundation; either version 2 of the License, or
  12  * (at your option) any later version.
  13  *
  14  * This program is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  17  * GNU General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU General Public License
  20  * along with this program; if not, write to the Free Software
  21  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111, USA.
  22  *****************************************************************************/
  23
  24 #include "common.h"
  25
  26 #define PADH 32
  27 #define PADV 32
  28
  29 x264_frame_t *x264_frame_new( x264_t *h )
  30 {
  31     x264_frame_t *frame = x264_malloc( sizeof(x264_frame_t) );
  32     int i, j;
  33
  34     int i_mb_count = h->mb.i_mb_count;
  35     int i_stride, i_width, i_lines;
  36     int i_padv = PADV << h->param.b_interlaced;
  37
  38     if( !frame ) return NULL;
  39
  40     memset( frame, 0, sizeof(x264_frame_t) );
  41
  42     /* allocate frame data (+64 for extra data for me) */
  43     i_width  = ( ( h->param.i_width  + 15 ) & -16 );
  44     i_stride = i_width + 2*PADH;
  45     i_lines  = ( ( h->param.i_height + 15 ) & -16 );
  46     if( h->param.b_interlaced )
  47         i_lines = ( i_lines + 31 ) & -32;
  48
  49     frame->i_plane = 3;
  50     for( i = 0; i < 3; i++ )
  51     {
  52         int i_divh = 1;
  53         int i_divw = 1;
  54         if( i > 0 )
  55         {
  56             if( h->param.i_csp == X264_CSP_I420 )
  57                 i_divh = i_divw = 2;
  58             else if( h->param.i_csp == X264_CSP_I422 )
  59                 i_divw = 2;
  60         }
  61         frame->i_stride[i] = i_stride / i_divw;
  62         frame->i_width[i] = i_width / i_divw;
  63         frame->i_lines[i] = i_lines / i_divh;
  64         CHECKED_MALLOC( frame->buffer[i],
  65                         frame->i_stride[i] * ( frame->i_lines[i] + 2*i_padv / i_divh ) );
  66
  67         frame->plane[i] = ((uint8_t*)frame->buffer[i]) +
  68                           frame->i_stride[i] * i_padv / i_divh + PADH / i_divw;
  69     }
  70
  71     frame->filtered[0] = frame->plane[0];
  72     for( i = 0; i < 3; i++ )
  73     {
  74         CHECKED_MALLOC( frame->buffer[4+i],
  75                         frame->i_stride[0] * ( frame->i_lines[0] + 2*i_padv ) );
  76         frame->filtered[i+1] = ((uint8_t*)frame->buffer[4+i]) +
  77                                 frame->i_stride[0] * i_padv + PADH;
  78     }
  79
  80     if( h->frames.b_have_lowres )
  81     {
  82         frame->i_width_lowres = frame->i_width[0]/2;
  83         frame->i_stride_lowres = frame->i_width_lowres + 2*PADH;
  84         frame->i_lines_lowres = frame->i_lines[0]/2;
  85         for( i = 0; i < 4; i++ )
  86         {
  87             CHECKED_MALLOC( frame->buffer_lowres[i],
  88                             frame->i_stride_lowres * ( frame->i_lines[0]/2 + 2*i_padv ) );
  89             frame->lowres[i] = ((uint8_t*)frame->buffer_lowres[i]) +
  90                                 frame->i_stride_lowres * i_padv + PADH;
  91         }
  92     }
  93
  94     if( h->param.analyse.i_me_method == X264_ME_ESA )
  95     {
  96         CHECKED_MALLOC( frame->buffer[7],
  97                         2 * frame->i_stride[0] * (frame->i_lines[0] + 2*i_padv) * sizeof(uint16_t) );
  98         frame->integral = (uint16_t*)frame->buffer[7] + frame->i_stride[0] * i_padv + PADH;
  99     }
 100
 101     frame->i_poc = -1;
 102     frame->i_type = X264_TYPE_AUTO;
 103     frame->i_qpplus1 = 0;
 104     frame->i_pts = -1;
 105     frame->i_frame = -1;
 106     frame->i_frame_num = -1;
 107     frame->i_lines_completed = -1;
 108
 109     CHECKED_MALLOC( frame->mb_type, i_mb_count * sizeof(int8_t));
 110     CHECKED_MALLOC( frame->mv[0], 2*16 * i_mb_count * sizeof(int16_t) );
 111     CHECKED_MALLOC( frame->ref[0], 4 * i_mb_count * sizeof(int8_t) );
 112     if( h->param.i_bframe )
 113     {
 114         CHECKED_MALLOC( frame->mv[1], 2*16 * i_mb_count * sizeof(int16_t) );
 115         CHECKED_MALLOC( frame->ref[1], 4 * i_mb_count * sizeof(int8_t) );
 116     }
 117     else
 118     {
 119         frame->mv[1]  = NULL;
 120         frame->ref[1] = NULL;
 121     }
 122
 123     CHECKED_MALLOC( frame->i_row_bits, i_lines/16 * sizeof(int) );
 124     CHECKED_MALLOC( frame->i_row_qp, i_lines/16 * sizeof(int) );
 125     for( i = 0; i < h->param.i_bframe + 2; i++ )
 126         for( j = 0; j < h->param.i_bframe + 2; j++ )
 127             CHECKED_MALLOC( frame->i_row_satds[i][j], i_lines/16 * sizeof(int) );
 128
 129     x264_pthread_mutex_init( &frame->mutex, NULL );
 130     x264_pthread_cond_init( &frame->cv, NULL );
 131
 132     return frame;
 133
 134 fail:
 135     x264_frame_delete( frame );
 136     return NULL;
 137 }
 138
 139 void x264_frame_delete( x264_frame_t *frame )
 140 {
 141     int i, j;
 142     for( i = 0; i < 8; i++ )
 143         x264_free( frame->buffer[i] );
 144     for( i = 0; i < 4; i++ )
 145         x264_free( frame->buffer_lowres[i] );
 146     for( i = 0; i < X264_BFRAME_MAX+2; i++ )
 147         for( j = 0; j < X264_BFRAME_MAX+2; j++ )
 148             x264_free( frame->i_row_satds[i][j] );
 149     x264_free( frame->i_row_bits );
 150     x264_free( frame->i_row_qp );
 151     x264_free( frame->mb_type );
 152     x264_free( frame->mv[0] );
 153     x264_free( frame->mv[1] );
 154     x264_free( frame->ref[0] );
 155     x264_free( frame->ref[1] );
 156     x264_pthread_mutex_destroy( &frame->mutex );
 157     x264_pthread_cond_destroy( &frame->cv );
 158     x264_free( frame );
 159 }
 160
 161 void x264_frame_copy_picture( x264_t *h, x264_frame_t *dst, x264_picture_t *src )
 162 {
 163     int i_csp = src->img.i_csp & X264_CSP_MASK;
 164     dst->i_type     = src->i_type;
 165     dst->i_qpplus1  = src->i_qpplus1;
 166     dst->i_pts      = src->i_pts;
 167
 168     if( i_csp <= X264_CSP_NONE  || i_csp >= X264_CSP_MAX )
 169         x264_log( h, X264_LOG_ERROR, "Arg invalid CSP\n" );
 170     else
 171         h->csp.convert[i_csp]( &h->mc, dst, &src->img, h->param.i_width, h->param.i_height );
 172 }
 173
 174
 175
 176 static void plane_expand_border( uint8_t *pix, int i_stride, int i_width, int i_height, int i_padh, int i_padv, int b_pad_top, int b_pad_bottom )
 177 {
 178 #define PPIXEL(x, y) ( pix + (x) + (y)*i_stride )
 179     int y;
 180     for( y = 0; y < i_height; y++ )
 181     {
 182         /* left band */
 183         memset( PPIXEL(-i_padh, y), PPIXEL(0, y)[0], i_padh );
 184         /* right band */
 185         memset( PPIXEL(i_width, y), PPIXEL(i_width-1, y)[0], i_padh );
 186     }
 187     /* upper band */
 188     if( b_pad_top )
 189     for( y = 0; y < i_padv; y++ )
 190         memcpy( PPIXEL(-i_padh, -y-1), PPIXEL(-i_padh, 0), i_width+2*i_padh );
 191     /* lower band */
 192     if( b_pad_bottom )
 193     for( y = 0; y < i_padv; y++ )
 194         memcpy( PPIXEL(-i_padh, i_height+y), PPIXEL(-i_padh, i_height-1), i_width+2*i_padh );
 195 #undef PPIXEL
 196 }
 197
 198 void x264_frame_expand_border( x264_t *h, x264_frame_t *frame, int mb_y, int b_end )
 199 {
 200     int i;
 201     int b_start = !mb_y;
 202     if( mb_y & h->sh.b_mbaff )
 203         return;
 204     for( i = 0; i < frame->i_plane; i++ )
 205     {
 206         int stride = frame->i_stride[i];
 207         int width = 16*h->sps->i_mb_width >> !!i;
 208         int height = (b_end ? 16*(h->sps->i_mb_height - mb_y) >> h->sh.b_mbaff : 16) >> !!i;
 209         int padh = PADH >> !!i;
 210         int padv = PADV >> !!i;
 211         // buffer: 2 chroma, 3 luma (rounded to 4) because deblocking goes beyond the top of the mb
 212         uint8_t *pix = frame->plane[i] + X264_MAX(0, (16*mb_y-4)*stride >> !!i);
 213         if( b_end && !b_start )
 214             height += 4 >> (!!i + h->sh.b_mbaff);
 215         if( h->sh.b_mbaff )
 216         {
 217             plane_expand_border( pix, stride*2, width, height, padh, padv, b_start, b_end );
 218             plane_expand_border( pix+stride, stride*2, width, height, padh, padv, b_start, b_end );
 219         }
 220         else
 221         {
 222             plane_expand_border( pix, stride, width, height, padh, padv, b_start, b_end );
 223         }
 224     }
 225 }
 226
 227 void x264_frame_expand_border_filtered( x264_t *h, x264_frame_t *frame, int mb_y, int b_end )
 228 {
 229     /* during filtering, 8 extra pixels were filtered on each edge.
 230        we want to expand border from the last filtered pixel */
 231     int b_start = !mb_y;
 232     int stride = frame->i_stride[0];
 233     int width = 16*h->sps->i_mb_width + 16;
 234     int height = b_end ? (16*(h->sps->i_mb_height - mb_y) >> h->sh.b_mbaff) + 16 : 16;
 235     int padh = PADH - 8;
 236     int padv = PADV - 8;
 237     int i;
 238     for( i = 1; i < 4; i++ )
 239     {
 240         // buffer: 8 luma, to match the hpel filter
 241         uint8_t *pix = frame->filtered[i] + (16*mb_y - (8 << h->sh.b_mbaff)) * stride - 8;
 242         if( h->sh.b_mbaff )
 243         {
 244             plane_expand_border( pix, stride*2, width, height, padh, padv, b_start, b_end );
 245             plane_expand_border( pix+stride, stride*2, width, height, padh, padv, b_start, b_end );
 246         }
 247         else
 248         {
 249             plane_expand_border( pix, stride, width, height, padh, padv, b_start, b_end );
 250         }
 251     }
 252 }
 253
 254 void x264_frame_expand_border_lowres( x264_frame_t *frame )
 255 {
 256     int i;
 257     for( i = 0; i < 4; i++ )
 258         plane_expand_border( frame->lowres[i], frame->i_stride_lowres, frame->i_stride_lowres - 2*PADH, frame->i_lines_lowres, PADH, PADV, 1, 1 );
 259 }
 260
 261 void x264_frame_expand_border_mod16( x264_t *h, x264_frame_t *frame )
 262 {
 263     int i, y;
 264     for( i = 0; i < frame->i_plane; i++ )
 265     {
 266         int i_subsample = i ? 1 : 0;
 267         int i_width = h->param.i_width >> i_subsample;
 268         int i_height = h->param.i_height >> i_subsample;
 269         int i_padx = ( h->sps->i_mb_width * 16 - h->param.i_width ) >> i_subsample;
 270         int i_pady = ( h->sps->i_mb_height * 16 - h->param.i_height ) >> i_subsample;
 271
 272         if( i_padx )
 273         {
 274             for( y = 0; y < i_height; y++ )
 275                 memset( &frame->plane[i][y*frame->i_stride[i] + i_width],
 276                          frame->plane[i][y*frame->i_stride[i] + i_width - 1],
 277                          i_padx );
 278         }
 279         if( i_pady )
 280         {
 281             //FIXME interlace? or just let it pad using the wrong field
 282             for( y = i_height; y < i_height + i_pady; y++ )
 283                 memcpy( &frame->plane[i][y*frame->i_stride[i]],
 284                         &frame->plane[i][(i_height-1)*frame->i_stride[i]],
 285                         i_width + i_padx );
 286         }
 287     }
 288 }
 289
 290
 291 /* cavlc + 8x8 transform stores nnz per 16 coeffs for the purpose of
 292  * entropy coding, but per 64 coeffs for the purpose of deblocking */
 293 void munge_cavlc_nnz_row( x264_t *h, int mb_y, uint8_t (*buf)[16] )
 294 {
 295     uint32_t (*src)[6] = (uint32_t(*)[6])h->mb.non_zero_count + mb_y * h->sps->i_mb_width;
 296     int8_t *transform = h->mb.mb_transform_size + mb_y * h->sps->i_mb_width;
 297     int x;
 298     for( x=0; x<h->sps->i_mb_width; x++ )
 299     {
 300         memcpy( buf+x, src+x, 16 );
 301         if( transform[x] )
 302         {
 303             if( src[x][0] ) src[x][0] = 0x01010101;
 304             if( src[x][1] ) src[x][1] = 0x01010101;
 305             if( src[x][2] ) src[x][2] = 0x01010101;
 306             if( src[x][3] ) src[x][3] = 0x01010101;
 307         }
 308     }
 309 }
 310
 311 static void restore_cavlc_nnz_row( x264_t *h, int mb_y, uint8_t (*buf)[16] )
 312 {
 313     uint8_t (*dst)[24] = h->mb.non_zero_count + mb_y * h->sps->i_mb_width;
 314     int x;
 315     for( x=0; x<h->sps->i_mb_width; x++ )
 316         memcpy( dst+x, buf+x, 16 );
 317 }
 318
 319 static void munge_cavlc_nnz( x264_t *h, int mb_y, uint8_t (*buf)[16], void (*func)(x264_t*, int, uint8_t (*)[16]) )
 320 {
 321     func( h, mb_y, buf );
 322     if( mb_y > 0 )
 323         func( h, mb_y-1, buf + h->sps->i_mb_width );
 324     if( h->sh.b_mbaff )
 325     {
 326         func( h, mb_y+1, buf + h->sps->i_mb_width * 2 );
 327         if( mb_y > 0 )
 328             func( h, mb_y-2, buf + h->sps->i_mb_width * 3 );
 329     }
 330 }
 331
 332
 333 /* Deblocking filter */
 334
 335 static const int i_alpha_table[52] =
 336 {
 337      0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
 338      0,  0,  0,  0,  0,  0,  4,  4,  5,  6,
 339      7,  8,  9, 10, 12, 13, 15, 17, 20, 22,
 340     25, 28, 32, 36, 40, 45, 50, 56, 63, 71,
 341     80, 90,101,113,127,144,162,182,203,226,
 342     255, 255
 343 };
 344 static const int i_beta_table[52] =
 345 {
 346      0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
 347      0,  0,  0,  0,  0,  0,  2,  2,  2,  3,
 348      3,  3,  3,  4,  4,  4,  6,  6,  7,  7,
 349      8,  8,  9,  9, 10, 10, 11, 11, 12, 12,
 350     13, 13, 14, 14, 15, 15, 16, 16, 17, 17,
 351     18, 18
 352 };
 353 static const int i_tc0_table[52][3] =
 354 {
 355     { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 },
 356     { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 },
 357     { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 1 },
 358     { 0, 0, 1 }, { 0, 0, 1 }, { 0, 0, 1 }, { 0, 1, 1 }, { 0, 1, 1 }, { 1, 1, 1 },
 359     { 1, 1, 1 }, { 1, 1, 1 }, { 1, 1, 1 }, { 1, 1, 2 }, { 1, 1, 2 }, { 1, 1, 2 },
 360     { 1, 1, 2 }, { 1, 2, 3 }, { 1, 2, 3 }, { 2, 2, 3 }, { 2, 2, 4 }, { 2, 3, 4 },
 361     { 2, 3, 4 }, { 3, 3, 5 }, { 3, 4, 6 }, { 3, 4, 6 }, { 4, 5, 7 }, { 4, 5, 8 },
 362     { 4, 6, 9 }, { 5, 7,10 }, { 6, 8,11 }, { 6, 8,13 }, { 7,10,14 }, { 8,11,16 },
 363     { 9,12,18 }, {10,13,20 }, {11,15,23 }, {13,17,25 }
 364 };
 365
 366 /* From ffmpeg */
 367 static inline int clip_uint8( int a )
 368 {
 369     if (a&(~255))
 370         return (-a)>>31;
 371     else
 372         return a;
 373 }
 374
 375 static inline void deblock_luma_c( uint8_t *pix, int xstride, int ystride, int alpha, int beta, int8_t *tc0 )
 376 {
 377     int i, d;
 378     for( i = 0; i < 4; i++ ) {
 379         if( tc0[i] < 0 ) {
 380             pix += 4*ystride;
 381             continue;
 382         }
 383         for( d = 0; d < 4; d++ ) {
 384             const int p2 = pix[-3*xstride];
 385             const int p1 = pix[-2*xstride];
 386             const int p0 = pix[-1*xstride];
 387             const int q0 = pix[ 0*xstride];
 388             const int q1 = pix[ 1*xstride];
 389             const int q2 = pix[ 2*xstride];
 390
 391             if( abs( p0 - q0 ) < alpha &&
 392                 abs( p1 - p0 ) < beta &&
 393                 abs( q1 - q0 ) < beta ) {
 394
 395                 int tc = tc0[i];
 396                 int delta;
 397
 398                 if( abs( p2 - p0 ) < beta ) {
 399                     pix[-2*xstride] = p1 + x264_clip3( (( p2 + ((p0 + q0 + 1) >> 1)) >> 1) - p1, -tc0[i], tc0[i] );
 400                     tc++;
 401                 }
 402                 if( abs( q2 - q0 ) < beta ) {
 403                     pix[ 1*xstride] = q1 + x264_clip3( (( q2 + ((p0 + q0 + 1) >> 1)) >> 1) - q1, -tc0[i], tc0[i] );
 404                     tc++;
 405                 }
 406
 407                 delta = x264_clip3( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
 408                 pix[-1*xstride] = clip_uint8( p0 + delta );    /* p0' */
 409                 pix[ 0*xstride] = clip_uint8( q0 - delta );    /* q0' */
 410             }
 411             pix += ystride;
 412         }
 413     }
 414 }
 415 static void deblock_v_luma_c( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 )
 416 {
 417     deblock_luma_c( pix, stride, 1, alpha, beta, tc0 );
 418 }
 419 static void deblock_h_luma_c( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 )
 420 {
 421     deblock_luma_c( pix, 1, stride, alpha, beta, tc0 );
 422 }
 423
 424 static inline void deblock_chroma_c( uint8_t *pix, int xstride, int ystride, int alpha, int beta, int8_t *tc0 )
 425 {
 426     int i, d;
 427     for( i = 0; i < 4; i++ ) {
 428         const int tc = tc0[i];
 429         if( tc <= 0 ) {
 430             pix += 2*ystride;
 431             continue;
 432         }
 433         for( d = 0; d < 2; d++ ) {
 434             const int p1 = pix[-2*xstride];
 435             const int p0 = pix[-1*xstride];
 436             const int q0 = pix[ 0*xstride];
 437             const int q1 = pix[ 1*xstride];
 438
 439             if( abs( p0 - q0 ) < alpha &&
 440                 abs( p1 - p0 ) < beta &&
 441                 abs( q1 - q0 ) < beta ) {
 442
 443                 int delta = x264_clip3( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
 444                 pix[-1*xstride] = clip_uint8( p0 + delta );    /* p0' */
 445                 pix[ 0*xstride] = clip_uint8( q0 - delta );    /* q0' */
 446             }
 447             pix += ystride;
 448         }
 449     }
 450 }
 451 static void deblock_v_chroma_c( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 )
 452 {
 453     deblock_chroma_c( pix, stride, 1, alpha, beta, tc0 );
 454 }
 455 static void deblock_h_chroma_c( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 )
 456 {
 457     deblock_chroma_c( pix, 1, stride, alpha, beta, tc0 );
 458 }
 459
 460 static inline void deblock_luma_intra_c( uint8_t *pix, int xstride, int ystride, int alpha, int beta )
 461 {
 462     int d;
 463     for( d = 0; d < 16; d++ ) {
 464         const int p2 = pix[-3*xstride];
 465         const int p1 = pix[-2*xstride];
 466         const int p0 = pix[-1*xstride];
 467         const int q0 = pix[ 0*xstride];
 468         const int q1 = pix[ 1*xstride];
 469         const int q2 = pix[ 2*xstride];
 470
 471         if( abs( p0 - q0 ) < alpha &&
 472             abs( p1 - p0 ) < beta &&
 473             abs( q1 - q0 ) < beta ) {
 474
 475             if(abs( p0 - q0 ) < ((alpha >> 2) + 2) ){
 476                 if( abs( p2 - p0 ) < beta)
 477                 {
 478                     const int p3 = pix[-4*xstride];
 479                     /* p0', p1', p2' */
 480                     pix[-1*xstride] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
 481                     pix[-2*xstride] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
 482                     pix[-3*xstride] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
 483                 } else {
 484                     /* p0' */
 485                     pix[-1*xstride] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
 486                 }
 487                 if( abs( q2 - q0 ) < beta)
 488                 {
 489                     const int q3 = pix[3*xstride];
 490                     /* q0', q1', q2' */
 491                     pix[0*xstride] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
 492                     pix[1*xstride] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
 493                     pix[2*xstride] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
 494                 } else {
 495                     /* q0' */
 496                     pix[0*xstride] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
 497                 }
 498             }else{
 499                 /* p0', q0' */
 500                 pix[-1*xstride] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
 501                 pix[ 0*xstride] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
 502             }
 503         }
 504         pix += ystride;
 505     }
 506 }
 507 static void deblock_v_luma_intra_c( uint8_t *pix, int stride, int alpha, int beta )
 508 {
 509     deblock_luma_intra_c( pix, stride, 1, alpha, beta );
 510 }
 511 static void deblock_h_luma_intra_c( uint8_t *pix, int stride, int alpha, int beta )
 512 {
 513     deblock_luma_intra_c( pix, 1, stride, alpha, beta );
 514 }
 515
 516 static inline void deblock_chroma_intra_c( uint8_t *pix, int xstride, int ystride, int alpha, int beta )
 517 {
 518     int d;
 519     for( d = 0; d < 8; d++ ) {
 520         const int p1 = pix[-2*xstride];
 521         const int p0 = pix[-1*xstride];
 522         const int q0 = pix[ 0*xstride];
 523         const int q1 = pix[ 1*xstride];
 524
 525         if( abs( p0 - q0 ) < alpha &&
 526             abs( p1 - p0 ) < beta &&
 527             abs( q1 - q0 ) < beta ) {
 528
 529             pix[-1*xstride] = (2*p1 + p0 + q1 + 2) >> 2;   /* p0' */
 530             pix[ 0*xstride] = (2*q1 + q0 + p1 + 2) >> 2;   /* q0' */
 531         }
 532
 533         pix += ystride;
 534     }
 535 }
 536 static void deblock_v_chroma_intra_c( uint8_t *pix, int stride, int alpha, int beta )
 537 {
 538     deblock_chroma_intra_c( pix, stride, 1, alpha, beta );
 539 }
 540 static void deblock_h_chroma_intra_c( uint8_t *pix, int stride, int alpha, int beta )
 541 {
 542     deblock_chroma_intra_c( pix, 1, stride, alpha, beta );
 543 }
 544
 545 static inline void deblock_edge( x264_t *h, uint8_t *pix, int i_stride, int bS[4], int i_qp, int b_chroma,
 546                                  x264_deblock_inter_t pf_inter, x264_deblock_intra_t pf_intra )
 547 {
 548     int i;
 549     const int index_a = x264_clip3( i_qp + h->sh.i_alpha_c0_offset, 0, 51 );
 550     const int alpha = i_alpha_table[index_a];
 551     const int beta  = i_beta_table[x264_clip3( i_qp + h->sh.i_beta_offset, 0, 51 )];
 552
 553     if( bS[0] < 4 ) {
 554         int8_t tc[4];
 555         for(i=0; i<4; i++)
 556             tc[i] = (bS[i] ? i_tc0_table[index_a][bS[i] - 1] : -1) + b_chroma;
 557         pf_inter( pix, i_stride, alpha, beta, tc );
 558     } else {
 559         pf_intra( pix, i_stride, alpha, beta );
 560     }
 561 }
 562
 563 void x264_frame_deblock_row( x264_t *h, int mb_y )
 564 {
 565     const int s8x8 = 2 * h->mb.i_mb_stride;
 566     const int s4x4 = 4 * h->mb.i_mb_stride;
 567     const int b_interlaced = h->sh.b_mbaff;
 568     const int mvy_limit = 4 >> b_interlaced;
 569     int mb_x;
 570
 571     int i_stride2[3] = { h->fdec->i_stride[0] << b_interlaced,
 572                          h->fdec->i_stride[1] << b_interlaced,
 573                          h->fdec->i_stride[2] << b_interlaced };
 574
 575     if( !h->pps->b_cabac && h->pps->b_transform_8x8_mode )
 576         munge_cavlc_nnz( h, mb_y, h->mb.nnz_backup, munge_cavlc_nnz_row );
 577
 578     for( mb_x = 0; mb_x < h->sps->i_mb_width; )
 579     {
 580         const int mb_xy  = mb_y * h->mb.i_mb_stride + mb_x;
 581         const int mb_8x8 = 2 * s8x8 * mb_y + 2 * mb_x;
 582         const int mb_4x4 = 4 * s4x4 * mb_y + 4 * mb_x;
 583         const int b_8x8_transform = h->mb.mb_transform_size[mb_xy];
 584         const int i_edge_end = (h->mb.type[mb_xy] == P_SKIP) ? 1 : 4;
 585         int i_edge, i_dir;
 586
 587         int i_pix_y[3] = { 16*mb_y*h->fdec->i_stride[0] + 16*mb_x,
 588                             8*mb_y*h->fdec->i_stride[1] +  8*mb_x,
 589                             8*mb_y*h->fdec->i_stride[2] +  8*mb_x };
 590         if( b_interlaced && (mb_y&1) )
 591         {
 592             i_pix_y[0] -= 15*h->fdec->i_stride[0];
 593             i_pix_y[1] -=  7*h->fdec->i_stride[1];
 594             i_pix_y[2] -=  7*h->fdec->i_stride[2];
 595         }
 596
 597         x264_prefetch_fenc( h, h->fdec, mb_x, mb_y );
 598
 599         /* i_dir == 0 -> vertical edge
 600          * i_dir == 1 -> horizontal edge */
 601         for( i_dir = 0; i_dir < 2; i_dir++ )
 602         {
 603             int i_start = (i_dir ? (mb_y <= b_interlaced) : (mb_x == 0));
 604             int i_qp, i_qpn;
 605
 606             for( i_edge = i_start; i_edge < i_edge_end; i_edge++ )
 607             {
 608                 int mbn_xy, mbn_8x8, mbn_4x4;
 609                 int bS[4];  /* filtering strength */
 610
 611                 if( b_8x8_transform && (i_edge&1) )
 612                     continue;
 613
 614                 mbn_xy  = i_edge > 0 ? mb_xy  : ( i_dir == 0 ? mb_xy  - 1 : mb_xy - h->mb.i_mb_stride );
 615                 mbn_8x8 = i_edge > 0 ? mb_8x8 : ( i_dir == 0 ? mb_8x8 - 2 : mb_8x8 - 2 * s8x8 );
 616                 mbn_4x4 = i_edge > 0 ? mb_4x4 : ( i_dir == 0 ? mb_4x4 - 4 : mb_4x4 - 4 * s4x4 );
 617
 618                 if( b_interlaced && i_edge == 0 && i_dir == 1 )
 619                 {
 620                     mbn_xy -= h->mb.i_mb_stride;
 621                     mbn_8x8 -= 2 * s8x8;
 622                     mbn_4x4 -= 4 * s4x4;
 623                 }
 624
 625                 /* *** Get bS for each 4px for the current edge *** */
 626                 if( IS_INTRA( h->mb.type[mb_xy] ) || IS_INTRA( h->mb.type[mbn_xy] ) )
 627                 {
 628                     bS[0] = bS[1] = bS[2] = bS[3] = ( i_edge == 0 && !(b_interlaced && i_dir) ? 4 : 3 );
 629                 }
 630                 else
 631                 {
 632                     int i;
 633                     for( i = 0; i < 4; i++ )
 634                     {
 635                         int x  = i_dir == 0 ? i_edge : i;
 636                         int y  = i_dir == 0 ? i      : i_edge;
 637                         int xn = (x - (i_dir == 0 ? 1 : 0 ))&0x03;
 638                         int yn = (y - (i_dir == 0 ? 0 : 1 ))&0x03;
 639
 640                         if( h->mb.non_zero_count[mb_xy][block_idx_xy[x][y]] != 0 ||
 641                             h->mb.non_zero_count[mbn_xy][block_idx_xy[xn][yn]] != 0 )
 642                         {
 643                             bS[i] = 2;
 644                         }
 645                         else
 646                         {
 647                             /* FIXME: A given frame may occupy more than one position in
 648                              * the reference list. So we should compare the frame numbers,
 649                              * not the indices in the ref list.
 650                              * No harm yet, as we don't generate that case.*/
 651
 652                             int i8p= mb_8x8+(x/2)+(y/2)*s8x8;
 653                             int i8q= mbn_8x8+(xn/2)+(yn/2)*s8x8;
 654                             int i4p= mb_4x4+x+y*s4x4;
 655                             int i4q= mbn_4x4+xn+yn*s4x4;
 656                             int l;
 657
 658                             bS[i] = 0;
 659
 660                             for( l = 0; l < 1 + (h->sh.i_type == SLICE_TYPE_B); l++ )
 661                             {
 662                                 if( h->mb.ref[l][i8p] != h->mb.ref[l][i8q] ||
 663                                     abs( h->mb.mv[l][i4p][0] - h->mb.mv[l][i4q][0] ) >= 4 ||
 664                                     abs( h->mb.mv[l][i4p][1] - h->mb.mv[l][i4q][1] ) >= mvy_limit )
 665                                 {
 666                                     bS[i] = 1;
 667                                     break;
 668                                 }
 669                             }
 670                         }
 671                     }
 672                 }
 673
 674                 /* *** filter *** */
 675                 /* Y plane */
 676                 i_qp = h->mb.qp[mb_xy];
 677                 i_qpn= h->mb.qp[mbn_xy];
 678
 679                 if( i_dir == 0 )
 680                 {
 681                     /* vertical edge */
 682                     deblock_edge( h, &h->fdec->plane[0][i_pix_y[0] + 4*i_edge],
 683                                   i_stride2[0], bS, (i_qp+i_qpn+1) >> 1, 0,
 684                                   h->loopf.deblock_h_luma, h->loopf.deblock_h_luma_intra );
 685                     if( !(i_edge & 1) )
 686                     {
 687                         /* U/V planes */
 688                         int i_qpc = ( i_chroma_qp_table[x264_clip3( i_qp + h->pps->i_chroma_qp_index_offset, 0, 51 )] +
 689                                       i_chroma_qp_table[x264_clip3( i_qpn + h->pps->i_chroma_qp_index_offset, 0, 51 )] + 1 ) >> 1;
 690                         deblock_edge( h, &h->fdec->plane[1][i_pix_y[1] + 2*i_edge],
 691                                       i_stride2[1], bS, i_qpc, 1,
 692                                       h->loopf.deblock_h_chroma, h->loopf.deblock_h_chroma_intra );
 693                         deblock_edge( h, &h->fdec->plane[2][i_pix_y[2] + 2*i_edge],
 694                                       i_stride2[2], bS, i_qpc, 1,
 695                                       h->loopf.deblock_h_chroma, h->loopf.deblock_h_chroma_intra );
 696                     }
 697                 }
 698                 else
 699                 {
 700                     /* horizontal edge */
 701                     deblock_edge( h, &h->fdec->plane[0][i_pix_y[0] + 4*i_edge*i_stride2[0]],
 702                                   i_stride2[0], bS, (i_qp+i_qpn+1) >> 1, 0,
 703                                   h->loopf.deblock_v_luma, h->loopf.deblock_v_luma_intra );
 704                     /* U/V planes */
 705                     if( !(i_edge & 1) )
 706                     {
 707                         int i_qpc = ( i_chroma_qp_table[x264_clip3( i_qp + h->pps->i_chroma_qp_index_offset, 0, 51 )] +
 708                                       i_chroma_qp_table[x264_clip3( i_qpn + h->pps->i_chroma_qp_index_offset, 0, 51 )] + 1 ) >> 1;
 709                         deblock_edge( h, &h->fdec->plane[1][i_pix_y[1] + 2*i_edge*i_stride2[1]],
 710                                       i_stride2[1], bS, i_qpc, 1,
 711                                       h->loopf.deblock_v_chroma, h->loopf.deblock_v_chroma_intra );
 712                         deblock_edge( h, &h->fdec->plane[2][i_pix_y[2] + 2*i_edge*i_stride2[2]],
 713                                       i_stride2[2], bS, i_qpc, 1,
 714                                       h->loopf.deblock_v_chroma, h->loopf.deblock_v_chroma_intra );
 715                     }
 716                 }
 717             }
 718         }
 719
 720         /* next mb */
 721         if( !b_interlaced || (mb_y&1) )
 722             mb_x++;
 723         mb_y ^= b_interlaced;
 724     }
 725
 726     if( !h->pps->b_cabac && h->pps->b_transform_8x8_mode )
 727         munge_cavlc_nnz( h, mb_y, h->mb.nnz_backup, restore_cavlc_nnz_row );
 728 }
 729
 730 void x264_frame_deblock( x264_t *h )
 731 {
 732     int mb_y;
 733     for( mb_y = 0; mb_y < h->sps->i_mb_height; mb_y += 1 + h->sh.b_mbaff )
 734         x264_frame_deblock_row( h, mb_y );
 735 }
 736
 737 #ifdef HAVE_MMX
 738 void x264_deblock_v_chroma_mmxext( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 );
 739 void x264_deblock_h_chroma_mmxext( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 );
 740 void x264_deblock_v_chroma_intra_mmxext( uint8_t *pix, int stride, int alpha, int beta );
 741 void x264_deblock_h_chroma_intra_mmxext( uint8_t *pix, int stride, int alpha, int beta );
 742
 743 #ifdef ARCH_X86_64
 744 void x264_deblock_v_luma_sse2( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 );
 745 void x264_deblock_h_luma_sse2( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 );
 746 #else
 747 void x264_deblock_h_luma_mmxext( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 );
 748 void x264_deblock_v8_luma_mmxext( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 );
 749
 750 void x264_deblock_v_luma_mmxext( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 )
 751 {
 752     x264_deblock_v8_luma_mmxext( pix,   stride, alpha, beta, tc0   );
 753     x264_deblock_v8_luma_mmxext( pix+8, stride, alpha, beta, tc0+2 );
 754 }
 755 #endif
 756 #endif
 757
 758 #ifdef ARCH_PPC
 759 void x264_deblock_v_luma_altivec( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 );
 760 void x264_deblock_h_luma_altivec( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 );
 761 #endif // ARCH_PPC
 762
 763 void x264_deblock_init( int cpu, x264_deblock_function_t *pf )
 764 {
 765     pf->deblock_v_luma = deblock_v_luma_c;
 766     pf->deblock_h_luma = deblock_h_luma_c;
 767     pf->deblock_v_chroma = deblock_v_chroma_c;
 768     pf->deblock_h_chroma = deblock_h_chroma_c;
 769     pf->deblock_v_luma_intra = deblock_v_luma_intra_c;
 770     pf->deblock_h_luma_intra = deblock_h_luma_intra_c;
 771     pf->deblock_v_chroma_intra = deblock_v_chroma_intra_c;
 772     pf->deblock_h_chroma_intra = deblock_h_chroma_intra_c;
 773
 774 #ifdef HAVE_MMX
 775     if( cpu&X264_CPU_MMXEXT )
 776     {
 777         pf->deblock_v_chroma = x264_deblock_v_chroma_mmxext;
 778         pf->deblock_h_chroma = x264_deblock_h_chroma_mmxext;
 779         pf->deblock_v_chroma_intra = x264_deblock_v_chroma_intra_mmxext;
 780         pf->deblock_h_chroma_intra = x264_deblock_h_chroma_intra_mmxext;
 781
 782 #ifdef ARCH_X86_64
 783         if( cpu&X264_CPU_SSE2 )
 784         {
 785             pf->deblock_v_luma = x264_deblock_v_luma_sse2;
 786             pf->deblock_h_luma = x264_deblock_h_luma_sse2;
 787         }
 788 #else
 789         pf->deblock_v_luma = x264_deblock_v_luma_mmxext;
 790         pf->deblock_h_luma = x264_deblock_h_luma_mmxext;
 791 #endif
 792     }
 793 #endif
 794
 795 #ifdef ARCH_PPC
 796     if( cpu&X264_CPU_ALTIVEC )
 797     {
 798         pf->deblock_v_luma = x264_deblock_v_luma_altivec;
 799         pf->deblock_h_luma = x264_deblock_h_luma_altivec;
 800    }
 801 #endif // ARCH_PPC
 802 }
 803
 804
 805 /* threading */
 806
 807 #ifdef HAVE_PTHREAD
 808 void x264_frame_cond_broadcast( x264_frame_t *frame, int i_lines_completed )
 809 {
 810     x264_pthread_mutex_lock( &frame->mutex );
 811     frame->i_lines_completed = i_lines_completed;
 812     x264_pthread_cond_broadcast( &frame->cv );
 813     x264_pthread_mutex_unlock( &frame->mutex );
 814 }
 815
 816 void x264_frame_cond_wait( x264_frame_t *frame, int i_lines_completed )
 817 {
 818     x264_pthread_mutex_lock( &frame->mutex );
 819     while( frame->i_lines_completed < i_lines_completed )
 820         x264_pthread_cond_wait( &frame->cv, &frame->mutex );
 821     x264_pthread_mutex_unlock( &frame->mutex );
 822 }
 823
 824 #else
 825 void x264_frame_cond_broadcast( x264_frame_t *frame, int i_lines_completed )
 826 {}
 827 void x264_frame_cond_wait( x264_frame_t *frame, int i_lines_completed )
 828 {}
 829 #endif
 830
 831
 832 /* list operators */
 833
 834 void x264_frame_push( x264_frame_t **list, x264_frame_t *frame )
 835 {
 836     int i = 0;
 837     while( list[i] ) i++;
 838     list[i] = frame;
 839 }
 840
 841 x264_frame_t *x264_frame_pop( x264_frame_t **list )
 842 {
 843     x264_frame_t *frame;
 844     int i = 0;
 845     assert( list[0] );
 846     while( list[i+1] ) i++;
 847     frame = list[i];
 848     list[i] = NULL;
 849     return frame;
 850 }
 851
 852 void x264_frame_unshift( x264_frame_t **list, x264_frame_t *frame )
 853 {
 854     int i = 0;
 855     while( list[i] ) i++;
 856     while( i-- )
 857         list[i+1] = list[i];
 858     list[0] = frame;
 859 }
 860
 861 x264_frame_t *x264_frame_shift( x264_frame_t **list )
 862 {
 863     x264_frame_t *frame = list[0];
 864     int i;
 865     for( i = 0; list[i]; i++ )
 866         list[i] = list[i+1];
 867     assert(frame);
 868     return frame;
 869 }
 870
 871 void x264_frame_push_unused( x264_t *h, x264_frame_t *frame )
 872 {
 873     assert( frame->i_reference_count > 0 );
 874     frame->i_reference_count--;
 875     if( frame->i_reference_count == 0 )
 876         x264_frame_push( h->frames.unused, frame );
 877     assert( h->frames.unused[ sizeof(h->frames.unused) / sizeof(*h->frames.unused) - 1 ] == NULL );
 878 }
 879
 880 x264_frame_t *x264_frame_pop_unused( x264_t *h )
 881 {
 882     x264_frame_t *frame;
 883     if( h->frames.unused[0] )
 884         frame = x264_frame_pop( h->frames.unused );
 885     else
 886         frame = x264_frame_new( h );
 887     assert( frame->i_reference_count == 0 );
 888     frame->i_reference_count = 1;
 889     return frame;
 890 }
 891
 892 void x264_frame_sort( x264_frame_t **list, int b_dts )
 893 {
 894     int i, b_ok;
 895     do {
 896         b_ok = 1;
 897         for( i = 0; list[i+1]; i++ )
 898         {
 899             int dtype = list[i]->i_type - list[i+1]->i_type;
 900             int dtime = list[i]->i_frame - list[i+1]->i_frame;
 901             int swap = b_dts ? dtype > 0 || ( dtype == 0 && dtime > 0 )
 902                              : dtime > 0;
 903             if( swap )
 904             {
 905                 XCHG( x264_frame_t*, list[i], list[i+1] );
 906                 b_ok = 0;
 907             }
 908         }
 909     } while( !b_ok );
 910 }