git.sesse.net Git - x264/blob - common/frame.c

   1 /*****************************************************************************
   2  * frame.c: h264 encoder library
   3  *****************************************************************************
   4  * Copyright (C) 2003 Laurent Aimar
   5  * $Id: frame.c,v 1.1 2004/06/03 19:27:06 fenrir Exp $
   6  *
   7  * Authors: Laurent Aimar <fenrir@via.ecp.fr>
   8  *
   9  * This program is free software; you can redistribute it and/or modify
  10  * it under the terms of the GNU General Public License as published by
  11  * the Free Software Foundation; either version 2 of the License, or
  12  * (at your option) any later version.
  13  *
  14  * This program is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  17  * GNU General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU General Public License
  20  * along with this program; if not, write to the Free Software
  21  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111, USA.
  22  *****************************************************************************/
  23
  24 #include <stdio.h>
  25 #include <string.h>
  26 #include <unistd.h>
  27
  28 #include "common.h"
  29
  30 #define PADH 32
  31 #define PADV 32
  32
  33 x264_frame_t *x264_frame_new( x264_t *h )
  34 {
  35     x264_frame_t *frame = x264_malloc( sizeof(x264_frame_t) );
  36     int i, j;
  37
  38     int i_mb_count = h->mb.i_mb_count;
  39     int i_stride;
  40     int i_lines;
  41     int i_padv = PADV << h->param.b_interlaced;
  42
  43     if( !frame ) return NULL;
  44
  45     memset( frame, 0, sizeof(x264_frame_t) );
  46
  47     /* allocate frame data (+64 for extra data for me) */
  48     i_stride = ( ( h->param.i_width  + 15 ) & -16 )+ 2*PADH;
  49     i_lines  = ( ( h->param.i_height + 15 ) & -16 );
  50     if( h->param.b_interlaced )
  51         i_lines = ( i_lines + 31 ) & -32;
  52
  53     frame->i_plane = 3;
  54     for( i = 0; i < 3; i++ )
  55     {
  56         int i_divh = 1;
  57         int i_divw = 1;
  58         if( i > 0 )
  59         {
  60             if( h->param.i_csp == X264_CSP_I420 )
  61                 i_divh = i_divw = 2;
  62             else if( h->param.i_csp == X264_CSP_I422 )
  63                 i_divw = 2;
  64         }
  65         frame->i_stride[i] = i_stride / i_divw;
  66         frame->i_lines[i] = i_lines / i_divh;
  67         CHECKED_MALLOC( frame->buffer[i],
  68                         frame->i_stride[i] * ( frame->i_lines[i] + 2*i_padv / i_divh ) );
  69
  70         frame->plane[i] = ((uint8_t*)frame->buffer[i]) +
  71                           frame->i_stride[i] * i_padv / i_divh + PADH / i_divw;
  72     }
  73     frame->i_stride[3] = 0;
  74     frame->i_lines[3] = 0;
  75     frame->buffer[3] = NULL;
  76     frame->plane[3] = NULL;
  77
  78     frame->filtered[0] = frame->plane[0];
  79     for( i = 0; i < 3; i++ )
  80     {
  81         CHECKED_MALLOC( frame->buffer[4+i],
  82                         frame->i_stride[0] * ( frame->i_lines[0] + 2*i_padv ) );
  83         frame->filtered[i+1] = ((uint8_t*)frame->buffer[4+i]) +
  84                                 frame->i_stride[0] * i_padv + PADH;
  85     }
  86
  87     if( h->frames.b_have_lowres )
  88     {
  89         frame->i_stride_lowres = frame->i_stride[0]/2 + PADH;
  90         frame->i_lines_lowres = frame->i_lines[0]/2;
  91         for( i = 0; i < 4; i++ )
  92         {
  93             CHECKED_MALLOC( frame->buffer_lowres[i],
  94                             frame->i_stride_lowres * ( frame->i_lines[0]/2 + 2*i_padv ) );
  95             frame->lowres[i] = ((uint8_t*)frame->buffer_lowres[i]) +
  96                                 frame->i_stride_lowres * i_padv + PADH;
  97         }
  98     }
  99
 100     if( h->param.analyse.i_me_method == X264_ME_ESA )
 101     {
 102         CHECKED_MALLOC( frame->buffer[7],
 103                         2 * frame->i_stride[0] * (frame->i_lines[0] + 2*i_padv) * sizeof(uint16_t) );
 104         frame->integral = (uint16_t*)frame->buffer[7] + frame->i_stride[0] * i_padv + PADH;
 105     }
 106
 107     frame->i_poc = -1;
 108     frame->i_type = X264_TYPE_AUTO;
 109     frame->i_qpplus1 = 0;
 110     frame->i_pts = -1;
 111     frame->i_frame = -1;
 112     frame->i_frame_num = -1;
 113     frame->i_lines_completed = -1;
 114
 115     CHECKED_MALLOC( frame->mb_type, i_mb_count * sizeof(int8_t));
 116     CHECKED_MALLOC( frame->mv[0], 2*16 * i_mb_count * sizeof(int16_t) );
 117     CHECKED_MALLOC( frame->ref[0], 4 * i_mb_count * sizeof(int8_t) );
 118     if( h->param.i_bframe )
 119     {
 120         CHECKED_MALLOC( frame->mv[1], 2*16 * i_mb_count * sizeof(int16_t) );
 121         CHECKED_MALLOC( frame->ref[1], 4 * i_mb_count * sizeof(int8_t) );
 122     }
 123     else
 124     {
 125         frame->mv[1]  = NULL;
 126         frame->ref[1] = NULL;
 127     }
 128
 129     CHECKED_MALLOC( frame->i_row_bits, i_lines/16 * sizeof(int) );
 130     CHECKED_MALLOC( frame->i_row_qp, i_lines/16 * sizeof(int) );
 131     for( i = 0; i < h->param.i_bframe + 2; i++ )
 132         for( j = 0; j < h->param.i_bframe + 2; j++ )
 133             CHECKED_MALLOC( frame->i_row_satds[i][j], i_lines/16 * sizeof(int) );
 134
 135     pthread_mutex_init( &frame->mutex, NULL );
 136     pthread_cond_init( &frame->cv, NULL );
 137
 138     return frame;
 139
 140 fail:
 141     x264_frame_delete( frame );
 142     return NULL;
 143 }
 144
 145 void x264_frame_delete( x264_frame_t *frame )
 146 {
 147     int i, j;
 148     for( i = 0; i < 8; i++ )
 149         x264_free( frame->buffer[i] );
 150     for( i = 0; i < 4; i++ )
 151         x264_free( frame->buffer_lowres[i] );
 152     for( i = 0; i < X264_BFRAME_MAX+2; i++ )
 153         for( j = 0; j < X264_BFRAME_MAX+2; j++ )
 154             x264_free( frame->i_row_satds[i][j] );
 155     x264_free( frame->i_row_bits );
 156     x264_free( frame->i_row_qp );
 157     x264_free( frame->mb_type );
 158     x264_free( frame->mv[0] );
 159     x264_free( frame->mv[1] );
 160     x264_free( frame->ref[0] );
 161     x264_free( frame->ref[1] );
 162     pthread_mutex_destroy( &frame->mutex );
 163     pthread_cond_destroy( &frame->cv );
 164     x264_free( frame );
 165 }
 166
 167 void x264_frame_copy_picture( x264_t *h, x264_frame_t *dst, x264_picture_t *src )
 168 {
 169     int i_csp = src->img.i_csp & X264_CSP_MASK;
 170     dst->i_type     = src->i_type;
 171     dst->i_qpplus1  = src->i_qpplus1;
 172     dst->i_pts      = src->i_pts;
 173
 174     if( i_csp <= X264_CSP_NONE  || i_csp >= X264_CSP_MAX )
 175         x264_log( h, X264_LOG_ERROR, "Arg invalid CSP\n" );
 176     else
 177         h->csp.convert[i_csp]( &h->mc, dst, &src->img, h->param.i_width, h->param.i_height );
 178 }
 179
 180
 181
 182 static void plane_expand_border( uint8_t *pix, int i_stride, int i_width, int i_height, int i_padh, int i_padv, int b_pad_top, int b_pad_bottom )
 183 {
 184 #define PPIXEL(x, y) ( pix + (x) + (y)*i_stride )
 185     int y;
 186     for( y = 0; y < i_height; y++ )
 187     {
 188         /* left band */
 189         memset( PPIXEL(-i_padh, y), PPIXEL(0, y)[0], i_padh );
 190         /* right band */
 191         memset( PPIXEL(i_width, y), PPIXEL(i_width-1, y)[0], i_padh );
 192     }
 193     /* upper band */
 194     if( b_pad_top )
 195     for( y = 0; y < i_padv; y++ )
 196         memcpy( PPIXEL(-i_padh, -y-1), PPIXEL(-i_padh, 0), i_width+2*i_padh );
 197     /* lower band */
 198     if( b_pad_bottom )
 199     for( y = 0; y < i_padv; y++ )
 200         memcpy( PPIXEL(-i_padh, i_height+y), PPIXEL(-i_padh, i_height-1), i_width+2*i_padh );
 201 #undef PPIXEL
 202 }
 203
 204 void x264_frame_expand_border( x264_t *h, x264_frame_t *frame, int mb_y, int b_end )
 205 {
 206     int i;
 207     int b_start = !mb_y;
 208     if( mb_y & h->sh.b_mbaff )
 209         return;
 210     for( i = 0; i < frame->i_plane; i++ )
 211     {
 212         int stride = frame->i_stride[i];
 213         int width = 16*h->sps->i_mb_width >> !!i;
 214         int height = (b_end ? 16*(h->sps->i_mb_height - mb_y) >> h->sh.b_mbaff : 16) >> !!i;
 215         int padh = PADH >> !!i;
 216         int padv = PADV >> !!i;
 217         // buffer: 2 chroma, 3 luma (rounded to 4) because deblocking goes beyond the top of the mb
 218         uint8_t *pix = frame->plane[i] + X264_MAX(0, (16*mb_y-4)*stride >> !!i);
 219         if( b_end && !b_start )
 220             height += 4 >> (!!i + h->sh.b_mbaff);
 221         if( h->sh.b_mbaff )
 222         {
 223             plane_expand_border( pix, stride*2, width, height, padh, padv, b_start, b_end );
 224             plane_expand_border( pix+stride, stride*2, width, height, padh, padv, b_start, b_end );
 225         }
 226         else
 227         {
 228             plane_expand_border( pix, stride, width, height, padh, padv, b_start, b_end );
 229         }
 230     }
 231 }
 232
 233 void x264_frame_expand_border_filtered( x264_t *h, x264_frame_t *frame, int mb_y, int b_end )
 234 {
 235     /* during filtering, 8 extra pixels were filtered on each edge.
 236        we want to expand border from the last filtered pixel */
 237     int b_start = !mb_y;
 238     int stride = frame->i_stride[0];
 239     int width = 16*h->sps->i_mb_width + 16;
 240     int height = b_end ? (16*(h->sps->i_mb_height - mb_y) >> h->sh.b_mbaff) + 16 : 16;
 241     int padh = PADH - 8;
 242     int padv = PADV - 8;
 243     int i;
 244     for( i = 1; i < 4; i++ )
 245     {
 246         // buffer: 8 luma, to match the hpel filter
 247         uint8_t *pix = frame->filtered[i] + (16*mb_y - (8 << h->sh.b_mbaff)) * stride - 8;
 248         if( h->sh.b_mbaff )
 249         {
 250             plane_expand_border( pix, stride*2, width, height, padh, padv, b_start, b_end );
 251             plane_expand_border( pix+stride, stride*2, width, height, padh, padv, b_start, b_end );
 252         }
 253         else
 254         {
 255             plane_expand_border( pix, stride, width, height, padh, padv, b_start, b_end );
 256         }
 257     }
 258 }
 259
 260 void x264_frame_expand_border_lowres( x264_frame_t *frame )
 261 {
 262     int i;
 263     for( i = 0; i < 4; i++ )
 264         plane_expand_border( frame->lowres[i], frame->i_stride_lowres, frame->i_stride_lowres - 2*PADH, frame->i_lines_lowres, PADH, PADV, 1, 1 );
 265 }
 266
 267 void x264_frame_expand_border_mod16( x264_t *h, x264_frame_t *frame )
 268 {
 269     int i, y;
 270     for( i = 0; i < frame->i_plane; i++ )
 271     {
 272         int i_subsample = i ? 1 : 0;
 273         int i_width = h->param.i_width >> i_subsample;
 274         int i_height = h->param.i_height >> i_subsample;
 275         int i_padx = ( h->sps->i_mb_width * 16 - h->param.i_width ) >> i_subsample;
 276         int i_pady = ( h->sps->i_mb_height * 16 - h->param.i_height ) >> i_subsample;
 277
 278         if( i_padx )
 279         {
 280             for( y = 0; y < i_height; y++ )
 281                 memset( &frame->plane[i][y*frame->i_stride[i] + i_width],
 282                          frame->plane[i][y*frame->i_stride[i] + i_width - 1],
 283                          i_padx );
 284         }
 285         if( i_pady )
 286         {
 287             //FIXME interlace? or just let it pad using the wrong field
 288             for( y = i_height; y < i_height + i_pady; y++ )
 289                 memcpy( &frame->plane[i][y*frame->i_stride[i]],
 290                         &frame->plane[i][(i_height-1)*frame->i_stride[i]],
 291                         i_width + i_padx );
 292         }
 293     }
 294 }
 295
 296
 297 /* Deblocking filter */
 298
 299 static const int i_alpha_table[52] =
 300 {
 301      0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
 302      0,  0,  0,  0,  0,  0,  4,  4,  5,  6,
 303      7,  8,  9, 10, 12, 13, 15, 17, 20, 22,
 304     25, 28, 32, 36, 40, 45, 50, 56, 63, 71,
 305     80, 90,101,113,127,144,162,182,203,226,
 306     255, 255
 307 };
 308 static const int i_beta_table[52] =
 309 {
 310      0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
 311      0,  0,  0,  0,  0,  0,  2,  2,  2,  3,
 312      3,  3,  3,  4,  4,  4,  6,  6,  7,  7,
 313      8,  8,  9,  9, 10, 10, 11, 11, 12, 12,
 314     13, 13, 14, 14, 15, 15, 16, 16, 17, 17,
 315     18, 18
 316 };
 317 static const int i_tc0_table[52][3] =
 318 {
 319     { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 },
 320     { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 },
 321     { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 1 },
 322     { 0, 0, 1 }, { 0, 0, 1 }, { 0, 0, 1 }, { 0, 1, 1 }, { 0, 1, 1 }, { 1, 1, 1 },
 323     { 1, 1, 1 }, { 1, 1, 1 }, { 1, 1, 1 }, { 1, 1, 2 }, { 1, 1, 2 }, { 1, 1, 2 },
 324     { 1, 1, 2 }, { 1, 2, 3 }, { 1, 2, 3 }, { 2, 2, 3 }, { 2, 2, 4 }, { 2, 3, 4 },
 325     { 2, 3, 4 }, { 3, 3, 5 }, { 3, 4, 6 }, { 3, 4, 6 }, { 4, 5, 7 }, { 4, 5, 8 },
 326     { 4, 6, 9 }, { 5, 7,10 }, { 6, 8,11 }, { 6, 8,13 }, { 7,10,14 }, { 8,11,16 },
 327     { 9,12,18 }, {10,13,20 }, {11,15,23 }, {13,17,25 }
 328 };
 329
 330 /* From ffmpeg */
 331 static inline int clip_uint8( int a )
 332 {
 333     if (a&(~255))
 334         return (-a)>>31;
 335     else
 336         return a;
 337 }
 338
 339 static inline void deblock_luma_c( uint8_t *pix, int xstride, int ystride, int alpha, int beta, int8_t *tc0 )
 340 {
 341     int i, d;
 342     for( i = 0; i < 4; i++ ) {
 343         if( tc0[i] < 0 ) {
 344             pix += 4*ystride;
 345             continue;
 346         }
 347         for( d = 0; d < 4; d++ ) {
 348             const int p2 = pix[-3*xstride];
 349             const int p1 = pix[-2*xstride];
 350             const int p0 = pix[-1*xstride];
 351             const int q0 = pix[ 0*xstride];
 352             const int q1 = pix[ 1*xstride];
 353             const int q2 = pix[ 2*xstride];
 354
 355             if( abs( p0 - q0 ) < alpha &&
 356                 abs( p1 - p0 ) < beta &&
 357                 abs( q1 - q0 ) < beta ) {
 358
 359                 int tc = tc0[i];
 360                 int delta;
 361
 362                 if( abs( p2 - p0 ) < beta ) {
 363                     pix[-2*xstride] = p1 + x264_clip3( (( p2 + ((p0 + q0 + 1) >> 1)) >> 1) - p1, -tc0[i], tc0[i] );
 364                     tc++;
 365                 }
 366                 if( abs( q2 - q0 ) < beta ) {
 367                     pix[ 1*xstride] = q1 + x264_clip3( (( q2 + ((p0 + q0 + 1) >> 1)) >> 1) - q1, -tc0[i], tc0[i] );
 368                     tc++;
 369                 }
 370
 371                 delta = x264_clip3( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
 372                 pix[-1*xstride] = clip_uint8( p0 + delta );    /* p0' */
 373                 pix[ 0*xstride] = clip_uint8( q0 - delta );    /* q0' */
 374             }
 375             pix += ystride;
 376         }
 377     }
 378 }
 379 static void deblock_v_luma_c( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 )
 380 {
 381     deblock_luma_c( pix, stride, 1, alpha, beta, tc0 );
 382 }
 383 static void deblock_h_luma_c( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 )
 384 {
 385     deblock_luma_c( pix, 1, stride, alpha, beta, tc0 );
 386 }
 387
 388 static inline void deblock_chroma_c( uint8_t *pix, int xstride, int ystride, int alpha, int beta, int8_t *tc0 )
 389 {
 390     int i, d;
 391     for( i = 0; i < 4; i++ ) {
 392         const int tc = tc0[i];
 393         if( tc <= 0 ) {
 394             pix += 2*ystride;
 395             continue;
 396         }
 397         for( d = 0; d < 2; d++ ) {
 398             const int p1 = pix[-2*xstride];
 399             const int p0 = pix[-1*xstride];
 400             const int q0 = pix[ 0*xstride];
 401             const int q1 = pix[ 1*xstride];
 402
 403             if( abs( p0 - q0 ) < alpha &&
 404                 abs( p1 - p0 ) < beta &&
 405                 abs( q1 - q0 ) < beta ) {
 406
 407                 int delta = x264_clip3( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
 408                 pix[-1*xstride] = clip_uint8( p0 + delta );    /* p0' */
 409                 pix[ 0*xstride] = clip_uint8( q0 - delta );    /* q0' */
 410             }
 411             pix += ystride;
 412         }
 413     }
 414 }
 415 static void deblock_v_chroma_c( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 )
 416 {
 417     deblock_chroma_c( pix, stride, 1, alpha, beta, tc0 );
 418 }
 419 static void deblock_h_chroma_c( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 )
 420 {
 421     deblock_chroma_c( pix, 1, stride, alpha, beta, tc0 );
 422 }
 423
 424 static inline void deblock_luma_intra_c( uint8_t *pix, int xstride, int ystride, int alpha, int beta )
 425 {
 426     int d;
 427     for( d = 0; d < 16; d++ ) {
 428         const int p2 = pix[-3*xstride];
 429         const int p1 = pix[-2*xstride];
 430         const int p0 = pix[-1*xstride];
 431         const int q0 = pix[ 0*xstride];
 432         const int q1 = pix[ 1*xstride];
 433         const int q2 = pix[ 2*xstride];
 434
 435         if( abs( p0 - q0 ) < alpha &&
 436             abs( p1 - p0 ) < beta &&
 437             abs( q1 - q0 ) < beta ) {
 438
 439             if(abs( p0 - q0 ) < ((alpha >> 2) + 2) ){
 440                 if( abs( p2 - p0 ) < beta)
 441                 {
 442                     const int p3 = pix[-4*xstride];
 443                     /* p0', p1', p2' */
 444                     pix[-1*xstride] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
 445                     pix[-2*xstride] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
 446                     pix[-3*xstride] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
 447                 } else {
 448                     /* p0' */
 449                     pix[-1*xstride] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
 450                 }
 451                 if( abs( q2 - q0 ) < beta)
 452                 {
 453                     const int q3 = pix[3*xstride];
 454                     /* q0', q1', q2' */
 455                     pix[0*xstride] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
 456                     pix[1*xstride] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
 457                     pix[2*xstride] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
 458                 } else {
 459                     /* q0' */
 460                     pix[0*xstride] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
 461                 }
 462             }else{
 463                 /* p0', q0' */
 464                 pix[-1*xstride] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
 465                 pix[ 0*xstride] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
 466             }
 467         }
 468         pix += ystride;
 469     }
 470 }
 471 static void deblock_v_luma_intra_c( uint8_t *pix, int stride, int alpha, int beta )
 472 {
 473     deblock_luma_intra_c( pix, stride, 1, alpha, beta );
 474 }
 475 static void deblock_h_luma_intra_c( uint8_t *pix, int stride, int alpha, int beta )
 476 {
 477     deblock_luma_intra_c( pix, 1, stride, alpha, beta );
 478 }
 479
 480 static inline void deblock_chroma_intra_c( uint8_t *pix, int xstride, int ystride, int alpha, int beta )
 481 {
 482     int d;
 483     for( d = 0; d < 8; d++ ) {
 484         const int p1 = pix[-2*xstride];
 485         const int p0 = pix[-1*xstride];
 486         const int q0 = pix[ 0*xstride];
 487         const int q1 = pix[ 1*xstride];
 488
 489         if( abs( p0 - q0 ) < alpha &&
 490             abs( p1 - p0 ) < beta &&
 491             abs( q1 - q0 ) < beta ) {
 492
 493             pix[-1*xstride] = (2*p1 + p0 + q1 + 2) >> 2;   /* p0' */
 494             pix[ 0*xstride] = (2*q1 + q0 + p1 + 2) >> 2;   /* q0' */
 495         }
 496
 497         pix += ystride;
 498     }
 499 }
 500 static void deblock_v_chroma_intra_c( uint8_t *pix, int stride, int alpha, int beta )
 501 {
 502     deblock_chroma_intra_c( pix, stride, 1, alpha, beta );
 503 }
 504 static void deblock_h_chroma_intra_c( uint8_t *pix, int stride, int alpha, int beta )
 505 {
 506     deblock_chroma_intra_c( pix, 1, stride, alpha, beta );
 507 }
 508
 509 static inline void deblock_edge( x264_t *h, uint8_t *pix, int i_stride, int bS[4], int i_qp, int b_chroma,
 510                                  x264_deblock_inter_t pf_inter, x264_deblock_intra_t pf_intra )
 511 {
 512     int i;
 513     const int index_a = x264_clip3( i_qp + h->sh.i_alpha_c0_offset, 0, 51 );
 514     const int alpha = i_alpha_table[index_a];
 515     const int beta  = i_beta_table[x264_clip3( i_qp + h->sh.i_beta_offset, 0, 51 )];
 516
 517     if( bS[0] < 4 ) {
 518         int8_t tc[4];
 519         for(i=0; i<4; i++)
 520             tc[i] = (bS[i] ? i_tc0_table[index_a][bS[i] - 1] : -1) + b_chroma;
 521         pf_inter( pix, i_stride, alpha, beta, tc );
 522     } else {
 523         pf_intra( pix, i_stride, alpha, beta );
 524     }
 525 }
 526
 527 void x264_frame_deblock_row( x264_t *h, int mb_y )
 528 {
 529     const int s8x8 = 2 * h->mb.i_mb_stride;
 530     const int s4x4 = 4 * h->mb.i_mb_stride;
 531     const int b_interlaced = h->sh.b_mbaff;
 532     const int mvy_limit = 4 >> b_interlaced;
 533     int mb_x;
 534
 535     int i_stride2[3] = { h->fdec->i_stride[0] << b_interlaced,
 536                          h->fdec->i_stride[1] << b_interlaced,
 537                          h->fdec->i_stride[2] << b_interlaced };
 538
 539     for( mb_x = 0; mb_x < h->sps->i_mb_width; )
 540     {
 541         const int mb_xy  = mb_y * h->mb.i_mb_stride + mb_x;
 542         const int mb_8x8 = 2 * s8x8 * mb_y + 2 * mb_x;
 543         const int mb_4x4 = 4 * s4x4 * mb_y + 4 * mb_x;
 544         const int b_8x8_transform = h->mb.mb_transform_size[mb_xy];
 545         const int i_edge_end = (h->mb.type[mb_xy] == P_SKIP) ? 1 : 4;
 546         int i_edge, i_dir;
 547
 548         int i_pix_y[3] = { 16*mb_y*h->fdec->i_stride[0] + 16*mb_x,
 549                             8*mb_y*h->fdec->i_stride[1] +  8*mb_x,
 550                             8*mb_y*h->fdec->i_stride[2] +  8*mb_x };
 551         if( b_interlaced && (mb_y&1) )
 552         {
 553             i_pix_y[0] -= 15*h->fdec->i_stride[0];
 554             i_pix_y[1] -=  7*h->fdec->i_stride[1];
 555             i_pix_y[2] -=  7*h->fdec->i_stride[2];
 556         }
 557
 558         x264_prefetch_fenc( h, h->fdec, mb_x, mb_y );
 559
 560         /* cavlc + 8x8 transform stores nnz per 16 coeffs for the purpose of
 561          * entropy coding, but per 64 coeffs for the purpose of deblocking */
 562         if( !h->param.b_cabac && b_8x8_transform )
 563         {
 564             uint32_t *nnz = (uint32_t*)h->mb.non_zero_count[mb_xy];
 565             if( nnz[0] ) nnz[0] = 0x01010101;
 566             if( nnz[1] ) nnz[1] = 0x01010101;
 567             if( nnz[2] ) nnz[2] = 0x01010101;
 568             if( nnz[3] ) nnz[3] = 0x01010101;
 569         }
 570
 571         /* i_dir == 0 -> vertical edge
 572          * i_dir == 1 -> horizontal edge */
 573         for( i_dir = 0; i_dir < 2; i_dir++ )
 574         {
 575             int i_start = (i_dir ? (mb_y <= b_interlaced) : (mb_x == 0));
 576             int i_qp, i_qpn;
 577
 578             for( i_edge = i_start; i_edge < i_edge_end; i_edge++ )
 579             {
 580                 int mbn_xy, mbn_8x8, mbn_4x4;
 581                 int bS[4];  /* filtering strength */
 582
 583                 if( b_8x8_transform && (i_edge&1) )
 584                     continue;
 585
 586                 mbn_xy  = i_edge > 0 ? mb_xy  : ( i_dir == 0 ? mb_xy  - 1 : mb_xy - h->mb.i_mb_stride );
 587                 mbn_8x8 = i_edge > 0 ? mb_8x8 : ( i_dir == 0 ? mb_8x8 - 2 : mb_8x8 - 2 * s8x8 );
 588                 mbn_4x4 = i_edge > 0 ? mb_4x4 : ( i_dir == 0 ? mb_4x4 - 4 : mb_4x4 - 4 * s4x4 );
 589
 590                 if( b_interlaced && i_edge == 0 && i_dir == 1 )
 591                 {
 592                     mbn_xy -= h->mb.i_mb_stride;
 593                     mbn_8x8 -= 2 * s8x8;
 594                     mbn_4x4 -= 4 * s4x4;
 595                 }
 596
 597                 /* *** Get bS for each 4px for the current edge *** */
 598                 if( IS_INTRA( h->mb.type[mb_xy] ) || IS_INTRA( h->mb.type[mbn_xy] ) )
 599                 {
 600                     bS[0] = bS[1] = bS[2] = bS[3] = ( i_edge == 0 && !(b_interlaced && i_dir) ? 4 : 3 );
 601                 }
 602                 else
 603                 {
 604                     int i;
 605                     for( i = 0; i < 4; i++ )
 606                     {
 607                         int x  = i_dir == 0 ? i_edge : i;
 608                         int y  = i_dir == 0 ? i      : i_edge;
 609                         int xn = (x - (i_dir == 0 ? 1 : 0 ))&0x03;
 610                         int yn = (y - (i_dir == 0 ? 0 : 1 ))&0x03;
 611
 612                         if( h->mb.non_zero_count[mb_xy][block_idx_xy[x][y]] != 0 ||
 613                             h->mb.non_zero_count[mbn_xy][block_idx_xy[xn][yn]] != 0 )
 614                         {
 615                             bS[i] = 2;
 616                         }
 617                         else
 618                         {
 619                             /* FIXME: A given frame may occupy more than one position in
 620                              * the reference list. So we should compare the frame numbers,
 621                              * not the indices in the ref list.
 622                              * No harm yet, as we don't generate that case.*/
 623
 624                             int i8p= mb_8x8+(x/2)+(y/2)*s8x8;
 625                             int i8q= mbn_8x8+(xn/2)+(yn/2)*s8x8;
 626                             int i4p= mb_4x4+x+y*s4x4;
 627                             int i4q= mbn_4x4+xn+yn*s4x4;
 628                             int l;
 629
 630                             bS[i] = 0;
 631
 632                             for( l = 0; l < 1 + (h->sh.i_type == SLICE_TYPE_B); l++ )
 633                             {
 634                                 if( h->mb.ref[l][i8p] != h->mb.ref[l][i8q] ||
 635                                     abs( h->mb.mv[l][i4p][0] - h->mb.mv[l][i4q][0] ) >= 4 ||
 636                                     abs( h->mb.mv[l][i4p][1] - h->mb.mv[l][i4q][1] ) >= mvy_limit )
 637                                 {
 638                                     bS[i] = 1;
 639                                     break;
 640                                 }
 641                             }
 642                         }
 643                     }
 644                 }
 645
 646                 /* *** filter *** */
 647                 /* Y plane */
 648                 i_qp = h->mb.qp[mb_xy];
 649                 i_qpn= h->mb.qp[mbn_xy];
 650
 651                 if( i_dir == 0 )
 652                 {
 653                     /* vertical edge */
 654                     deblock_edge( h, &h->fdec->plane[0][i_pix_y[0] + 4*i_edge],
 655                                   i_stride2[0], bS, (i_qp+i_qpn+1) >> 1, 0,
 656                                   h->loopf.deblock_h_luma, h->loopf.deblock_h_luma_intra );
 657                     if( !(i_edge & 1) )
 658                     {
 659                         /* U/V planes */
 660                         int i_qpc = ( i_chroma_qp_table[x264_clip3( i_qp + h->pps->i_chroma_qp_index_offset, 0, 51 )] +
 661                                       i_chroma_qp_table[x264_clip3( i_qpn + h->pps->i_chroma_qp_index_offset, 0, 51 )] + 1 ) >> 1;
 662                         deblock_edge( h, &h->fdec->plane[1][i_pix_y[1] + 2*i_edge],
 663                                       i_stride2[1], bS, i_qpc, 1,
 664                                       h->loopf.deblock_h_chroma, h->loopf.deblock_h_chroma_intra );
 665                         deblock_edge( h, &h->fdec->plane[2][i_pix_y[2] + 2*i_edge],
 666                                       i_stride2[2], bS, i_qpc, 1,
 667                                       h->loopf.deblock_h_chroma, h->loopf.deblock_h_chroma_intra );
 668                     }
 669                 }
 670                 else
 671                 {
 672                     /* horizontal edge */
 673                     deblock_edge( h, &h->fdec->plane[0][i_pix_y[0] + 4*i_edge*i_stride2[0]],
 674                                   i_stride2[0], bS, (i_qp+i_qpn+1) >> 1, 0,
 675                                   h->loopf.deblock_v_luma, h->loopf.deblock_v_luma_intra );
 676                     /* U/V planes */
 677                     if( !(i_edge & 1) )
 678                     {
 679                         int i_qpc = ( i_chroma_qp_table[x264_clip3( i_qp + h->pps->i_chroma_qp_index_offset, 0, 51 )] +
 680                                       i_chroma_qp_table[x264_clip3( i_qpn + h->pps->i_chroma_qp_index_offset, 0, 51 )] + 1 ) >> 1;
 681                         deblock_edge( h, &h->fdec->plane[1][i_pix_y[1] + 2*i_edge*i_stride2[1]],
 682                                       i_stride2[1], bS, i_qpc, 1,
 683                                       h->loopf.deblock_v_chroma, h->loopf.deblock_v_chroma_intra );
 684                         deblock_edge( h, &h->fdec->plane[2][i_pix_y[2] + 2*i_edge*i_stride2[2]],
 685                                       i_stride2[2], bS, i_qpc, 1,
 686                                       h->loopf.deblock_v_chroma, h->loopf.deblock_v_chroma_intra );
 687                     }
 688                 }
 689             }
 690         }
 691
 692         /* next mb */
 693         if( !b_interlaced || (mb_y&1) )
 694             mb_x++;
 695         mb_y ^= b_interlaced;
 696     }
 697 }
 698
 699 void x264_frame_deblock( x264_t *h )
 700 {
 701     int mb_y;
 702     for( mb_y = 0; mb_y < h->sps->i_mb_height; mb_y += 1 + h->sh.b_mbaff )
 703         x264_frame_deblock_row( h, mb_y );
 704 }
 705
 706 #ifdef HAVE_MMX
 707 void x264_deblock_v_chroma_mmxext( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 );
 708 void x264_deblock_h_chroma_mmxext( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 );
 709 void x264_deblock_v_chroma_intra_mmxext( uint8_t *pix, int stride, int alpha, int beta );
 710 void x264_deblock_h_chroma_intra_mmxext( uint8_t *pix, int stride, int alpha, int beta );
 711
 712 #ifdef ARCH_X86_64
 713 void x264_deblock_v_luma_sse2( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 );
 714 void x264_deblock_h_luma_sse2( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 );
 715 #else
 716 void x264_deblock_h_luma_mmxext( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 );
 717 void x264_deblock_v8_luma_mmxext( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 );
 718
 719 void x264_deblock_v_luma_mmxext( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 )
 720 {
 721     x264_deblock_v8_luma_mmxext( pix,   stride, alpha, beta, tc0   );
 722     x264_deblock_v8_luma_mmxext( pix+8, stride, alpha, beta, tc0+2 );
 723 }
 724 #endif
 725 #endif
 726
 727 void x264_deblock_init( int cpu, x264_deblock_function_t *pf )
 728 {
 729     pf->deblock_v_luma = deblock_v_luma_c;
 730     pf->deblock_h_luma = deblock_h_luma_c;
 731     pf->deblock_v_chroma = deblock_v_chroma_c;
 732     pf->deblock_h_chroma = deblock_h_chroma_c;
 733     pf->deblock_v_luma_intra = deblock_v_luma_intra_c;
 734     pf->deblock_h_luma_intra = deblock_h_luma_intra_c;
 735     pf->deblock_v_chroma_intra = deblock_v_chroma_intra_c;
 736     pf->deblock_h_chroma_intra = deblock_h_chroma_intra_c;
 737
 738 #ifdef HAVE_MMX
 739     if( cpu&X264_CPU_MMXEXT )
 740     {
 741         pf->deblock_v_chroma = x264_deblock_v_chroma_mmxext;
 742         pf->deblock_h_chroma = x264_deblock_h_chroma_mmxext;
 743         pf->deblock_v_chroma_intra = x264_deblock_v_chroma_intra_mmxext;
 744         pf->deblock_h_chroma_intra = x264_deblock_h_chroma_intra_mmxext;
 745
 746 #ifdef ARCH_X86_64
 747         if( cpu&X264_CPU_SSE2 )
 748         {
 749             pf->deblock_v_luma = x264_deblock_v_luma_sse2;
 750             pf->deblock_h_luma = x264_deblock_h_luma_sse2;
 751         }
 752 #else
 753         pf->deblock_v_luma = x264_deblock_v_luma_mmxext;
 754         pf->deblock_h_luma = x264_deblock_h_luma_mmxext;
 755 #endif
 756     }
 757 #endif
 758 }
 759
 760
 761 /* threading */
 762
 763 #ifdef HAVE_PTHREAD
 764 void x264_frame_cond_broadcast( x264_frame_t *frame, int i_lines_completed )
 765 {
 766     pthread_mutex_lock( &frame->mutex );
 767     frame->i_lines_completed = i_lines_completed;
 768     pthread_cond_broadcast( &frame->cv );
 769     pthread_mutex_unlock( &frame->mutex );
 770 }
 771
 772 void x264_frame_cond_wait( x264_frame_t *frame, int i_lines_completed )
 773 {
 774     pthread_mutex_lock( &frame->mutex );
 775     while( frame->i_lines_completed < i_lines_completed )
 776         pthread_cond_wait( &frame->cv, &frame->mutex );
 777     pthread_mutex_unlock( &frame->mutex );
 778 }
 779
 780 #else
 781 void x264_frame_cond_broadcast( x264_frame_t *frame, int i_lines_completed )
 782 {}
 783 void x264_frame_cond_wait( x264_frame_t *frame, int i_lines_completed )
 784 {}
 785 #endif
 786
 787
 788 /* list operators */
 789
 790 void x264_frame_push( x264_frame_t **list, x264_frame_t *frame )
 791 {
 792     int i = 0;
 793     while( list[i] ) i++;
 794     list[i] = frame;
 795 }
 796
 797 x264_frame_t *x264_frame_pop( x264_frame_t **list )
 798 {
 799     x264_frame_t *frame;
 800     int i = 0;
 801     assert( list[0] );
 802     while( list[i+1] ) i++;
 803     frame = list[i];
 804     list[i] = NULL;
 805     return frame;
 806 }
 807
 808 void x264_frame_unshift( x264_frame_t **list, x264_frame_t *frame )
 809 {
 810     int i = 0;
 811     while( list[i] ) i++;
 812     while( i-- )
 813         list[i+1] = list[i];
 814     list[0] = frame;
 815 }
 816
 817 x264_frame_t *x264_frame_shift( x264_frame_t **list )
 818 {
 819     x264_frame_t *frame = list[0];
 820     int i;
 821     for( i = 0; list[i]; i++ )
 822         list[i] = list[i+1];
 823     assert(frame);
 824     return frame;
 825 }
 826
 827 void x264_frame_push_unused( x264_t *h, x264_frame_t *frame )
 828 {
 829     assert( frame->i_reference_count > 0 );
 830     frame->i_reference_count--;
 831     if( frame->i_reference_count == 0 )
 832         x264_frame_push( h->frames.unused, frame );
 833     assert( h->frames.unused[ sizeof(h->frames.unused) / sizeof(*h->frames.unused) - 1 ] == NULL );
 834 }
 835
 836 x264_frame_t *x264_frame_pop_unused( x264_t *h )
 837 {
 838     x264_frame_t *frame;
 839     if( h->frames.unused[0] )
 840         frame = x264_frame_pop( h->frames.unused );
 841     else
 842         frame = x264_frame_new( h );
 843     assert( frame->i_reference_count == 0 );
 844     frame->i_reference_count = 1;
 845     return frame;
 846 }
 847
 848 void x264_frame_sort( x264_frame_t **list, int b_dts )
 849 {
 850     int i, b_ok;
 851     do {
 852         b_ok = 1;
 853         for( i = 0; list[i+1]; i++ )
 854         {
 855             int dtype = list[i]->i_type - list[i+1]->i_type;
 856             int dtime = list[i]->i_frame - list[i+1]->i_frame;
 857             int swap = b_dts ? dtype > 0 || ( dtype == 0 && dtime > 0 )
 858                              : dtime > 0;
 859             if( swap )
 860             {
 861                 XCHG( x264_frame_t*, list[i], list[i+1] );
 862                 b_ok = 0;
 863             }
 864         }
 865     } while( !b_ok );
 866 }