git.sesse.net Git - x264/blob - common/frame.c

   1 /*****************************************************************************
   2  * frame.c: h264 encoder library
   3  *****************************************************************************
   4  * Copyright (C) 2003-2008 x264 project
   5  *
   6  * Authors: Laurent Aimar <fenrir@via.ecp.fr>
   7  *          Loren Merritt <lorenm@u.washington.edu>
   8  *          Fiona Glaser <fiona@x264.com>
   9  *
  10  * This program is free software; you can redistribute it and/or modify
  11  * it under the terms of the GNU General Public License as published by
  12  * the Free Software Foundation; either version 2 of the License, or
  13  * (at your option) any later version.
  14  *
  15  * This program is distributed in the hope that it will be useful,
  16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  18  * GNU General Public License for more details.
  19  *
  20  * You should have received a copy of the GNU General Public License
  21  * along with this program; if not, write to the Free Software
  22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
  23  *****************************************************************************/
  24
  25 #include "common.h"
  26
  27 #define ALIGN(x,a) (((x)+((a)-1))&~((a)-1))
  28
  29 x264_frame_t *x264_frame_new( x264_t *h )
  30 {
  31     x264_frame_t *frame;
  32     int i, j;
  33
  34     int i_mb_count = h->mb.i_mb_count;
  35     int i_stride, i_width, i_lines;
  36     int i_padv = PADV << h->param.b_interlaced;
  37     int luma_plane_size;
  38     int chroma_plane_size;
  39     int align = h->param.cpu&X264_CPU_CACHELINE_64 ? 64 : h->param.cpu&X264_CPU_CACHELINE_32 ? 32 : 16;
  40
  41     CHECKED_MALLOCZERO( frame, sizeof(x264_frame_t) );
  42
  43     /* allocate frame data (+64 for extra data for me) */
  44     i_width  = ALIGN( h->param.i_width, 16 );
  45     i_stride = ALIGN( i_width + 2*PADH, align );
  46     i_lines  = ALIGN( h->param.i_height, 16<<h->param.b_interlaced );
  47
  48     frame->i_plane = 3;
  49     for( i = 0; i < 3; i++ )
  50     {
  51         frame->i_stride[i] = ALIGN( i_stride >> !!i, align );
  52         frame->i_width[i] = i_width >> !!i;
  53         frame->i_lines[i] = i_lines >> !!i;
  54     }
  55
  56     luma_plane_size = (frame->i_stride[0] * ( frame->i_lines[0] + 2*i_padv ));
  57     chroma_plane_size = (frame->i_stride[1] * ( frame->i_lines[1] + 2*i_padv ));
  58     for( i = 1; i < 3; i++ )
  59     {
  60         CHECKED_MALLOC( frame->buffer[i], chroma_plane_size );
  61         frame->plane[i] = frame->buffer[i] + (frame->i_stride[i] * i_padv + PADH)/2;
  62     }
  63     /* all 4 luma planes allocated together, since the cacheline split code
  64      * requires them to be in-phase wrt cacheline alignment. */
  65     if( h->param.analyse.i_subpel_refine )
  66     {
  67         CHECKED_MALLOC( frame->buffer[0], 4*luma_plane_size);
  68         for( i = 0; i < 4; i++ )
  69             frame->filtered[i] = frame->buffer[0] + i*luma_plane_size + frame->i_stride[0] * i_padv + PADH;
  70         frame->plane[0] = frame->filtered[0];
  71     }
  72     else
  73     {
  74         CHECKED_MALLOC( frame->buffer[0], luma_plane_size);
  75         frame->plane[0] = frame->buffer[0] + frame->i_stride[0] * i_padv + PADH;
  76     }
  77
  78     if( h->frames.b_have_lowres )
  79     {
  80         frame->i_width_lowres = frame->i_width[0]/2;
  81         frame->i_stride_lowres = ALIGN( frame->i_width_lowres + 2*PADH, align );
  82         frame->i_lines_lowres = frame->i_lines[0]/2;
  83
  84         luma_plane_size = frame->i_stride_lowres * ( frame->i_lines[0]/2 + 2*i_padv );
  85
  86         CHECKED_MALLOC( frame->buffer_lowres[0], 4 * luma_plane_size );
  87         for( i = 0; i < 4; i++ )
  88             frame->lowres[i] = frame->buffer_lowres[0] + (frame->i_stride_lowres * i_padv + PADH) + i * luma_plane_size;
  89
  90         for( j = 0; j <= !!h->param.i_bframe; j++ )
  91             for( i = 0; i <= h->param.i_bframe; i++ )
  92             {
  93                 CHECKED_MALLOCZERO( frame->lowres_mvs[j][i], 2*h->mb.i_mb_count*sizeof(int16_t) );
  94                 CHECKED_MALLOC( frame->lowres_mv_costs[j][i], h->mb.i_mb_count*sizeof(int) );
  95             }
  96         CHECKED_MALLOC( frame->i_propagate_cost, (i_mb_count+3) * sizeof(uint16_t) );
  97         for( j = 0; j <= h->param.i_bframe+1; j++ )
  98             for( i = 0; i <= h->param.i_bframe+1; i++ )
  99             {
 100                 CHECKED_MALLOC( frame->lowres_costs[j][i], (i_mb_count+3) * sizeof(uint16_t) );
 101                 CHECKED_MALLOC( frame->lowres_inter_types[j][i], (i_mb_count+3) * sizeof(uint8_t) );
 102             }
 103         frame->i_intra_cost = frame->lowres_costs[0][0];
 104         memset( frame->i_intra_cost, -1, (i_mb_count+3) * sizeof(uint16_t) );
 105     }
 106
 107     if( h->param.analyse.i_me_method >= X264_ME_ESA )
 108     {
 109         CHECKED_MALLOC( frame->buffer[3],
 110                         frame->i_stride[0] * (frame->i_lines[0] + 2*i_padv) * sizeof(uint16_t) << h->frames.b_have_sub8x8_esa );
 111         frame->integral = (uint16_t*)frame->buffer[3] + frame->i_stride[0] * i_padv + PADH;
 112     }
 113
 114     frame->i_poc = -1;
 115     frame->i_type = X264_TYPE_AUTO;
 116     frame->i_qpplus1 = 0;
 117     frame->i_pts = -1;
 118     frame->i_frame = -1;
 119     frame->i_frame_num = -1;
 120     frame->i_lines_completed = -1;
 121
 122     CHECKED_MALLOC( frame->mb_type, i_mb_count * sizeof(int8_t));
 123     CHECKED_MALLOC( frame->mv[0], 2*16 * i_mb_count * sizeof(int16_t) );
 124     CHECKED_MALLOC( frame->ref[0], 4 * i_mb_count * sizeof(int8_t) );
 125     if( h->param.i_bframe )
 126     {
 127         CHECKED_MALLOC( frame->mv[1], 2*16 * i_mb_count * sizeof(int16_t) );
 128         CHECKED_MALLOC( frame->ref[1], 4 * i_mb_count * sizeof(int8_t) );
 129     }
 130     else
 131     {
 132         frame->mv[1]  = NULL;
 133         frame->ref[1] = NULL;
 134     }
 135
 136     CHECKED_MALLOC( frame->i_row_bits, i_lines/16 * sizeof(int) );
 137     CHECKED_MALLOC( frame->i_row_qp, i_lines/16 * sizeof(int) );
 138     for( i = 0; i < h->param.i_bframe + 2; i++ )
 139         for( j = 0; j < h->param.i_bframe + 2; j++ )
 140             CHECKED_MALLOC( frame->i_row_satds[i][j], i_lines/16 * sizeof(int) );
 141
 142     if( h->param.rc.i_aq_mode )
 143     {
 144         CHECKED_MALLOC( frame->f_qp_offset, h->mb.i_mb_count * sizeof(float) );
 145         CHECKED_MALLOC( frame->f_qp_offset_aq, h->mb.i_mb_count * sizeof(float) );
 146         if( h->frames.b_have_lowres )
 147             /* shouldn't really be initialized, just silences a valgrind false-positive in x264_mbtree_propagate_cost_sse2 */
 148             CHECKED_MALLOCZERO( frame->i_inv_qscale_factor, (h->mb.i_mb_count+3) * sizeof(uint16_t) );
 149     }
 150
 151     if( x264_pthread_mutex_init( &frame->mutex, NULL ) )
 152         goto fail;
 153     if( x264_pthread_cond_init( &frame->cv, NULL ) )
 154         goto fail;
 155
 156     return frame;
 157
 158 fail:
 159     x264_free( frame );
 160     return NULL;
 161 }
 162
 163 void x264_frame_delete( x264_frame_t *frame )
 164 {
 165     int i, j;
 166     for( i = 0; i < 4; i++ )
 167         x264_free( frame->buffer[i] );
 168     for( i = 0; i < 4; i++ )
 169         x264_free( frame->buffer_lowres[i] );
 170     for( i = 0; i < X264_BFRAME_MAX+2; i++ )
 171         for( j = 0; j < X264_BFRAME_MAX+2; j++ )
 172             x264_free( frame->i_row_satds[i][j] );
 173     for( j = 0; j < 2; j++ )
 174         for( i = 0; i <= X264_BFRAME_MAX; i++ )
 175         {
 176             x264_free( frame->lowres_mvs[j][i] );
 177             x264_free( frame->lowres_mv_costs[j][i] );
 178         }
 179     x264_free( frame->i_propagate_cost );
 180     for( j = 0; j <= X264_BFRAME_MAX+1; j++ )
 181         for( i = 0; i <= X264_BFRAME_MAX+1; i++ )
 182         {
 183             x264_free( frame->lowres_costs[j][i] );
 184             x264_free( frame->lowres_inter_types[j][i] );
 185         }
 186     x264_free( frame->f_qp_offset );
 187     x264_free( frame->f_qp_offset_aq );
 188     x264_free( frame->i_inv_qscale_factor );
 189     x264_free( frame->i_row_bits );
 190     x264_free( frame->i_row_qp );
 191     x264_free( frame->mb_type );
 192     x264_free( frame->mv[0] );
 193     x264_free( frame->mv[1] );
 194     x264_free( frame->ref[0] );
 195     x264_free( frame->ref[1] );
 196     x264_pthread_mutex_destroy( &frame->mutex );
 197     x264_pthread_cond_destroy( &frame->cv );
 198     x264_free( frame );
 199 }
 200
 201 int x264_frame_copy_picture( x264_t *h, x264_frame_t *dst, x264_picture_t *src )
 202 {
 203     int i_csp = src->img.i_csp & X264_CSP_MASK;
 204     int i;
 205     if( i_csp != X264_CSP_I420 && i_csp != X264_CSP_YV12 )
 206     {
 207         x264_log( h, X264_LOG_ERROR, "Arg invalid CSP\n" );
 208         return -1;
 209     }
 210
 211     dst->i_type     = src->i_type;
 212     dst->i_qpplus1  = src->i_qpplus1;
 213     dst->i_pts      = src->i_pts;
 214     dst->param      = src->param;
 215
 216     for( i=0; i<3; i++ )
 217     {
 218         int s = (i_csp == X264_CSP_YV12 && i) ? i^3 : i;
 219         uint8_t *plane = src->img.plane[s];
 220         int stride = src->img.i_stride[s];
 221         int width = h->param.i_width >> !!i;
 222         int height = h->param.i_height >> !!i;
 223         if( src->img.i_csp & X264_CSP_VFLIP )
 224         {
 225             plane += (height-1)*stride;
 226             stride = -stride;
 227         }
 228         h->mc.plane_copy( dst->plane[i], dst->i_stride[i], plane, stride, width, height );
 229     }
 230     return 0;
 231 }
 232
 233
 234
 235 static void plane_expand_border( uint8_t *pix, int i_stride, int i_width, int i_height, int i_padh, int i_padv, int b_pad_top, int b_pad_bottom )
 236 {
 237 #define PPIXEL(x, y) ( pix + (x) + (y)*i_stride )
 238     int y;
 239     for( y = 0; y < i_height; y++ )
 240     {
 241         /* left band */
 242         memset( PPIXEL(-i_padh, y), PPIXEL(0, y)[0], i_padh );
 243         /* right band */
 244         memset( PPIXEL(i_width, y), PPIXEL(i_width-1, y)[0], i_padh );
 245     }
 246     /* upper band */
 247     if( b_pad_top )
 248     for( y = 0; y < i_padv; y++ )
 249         memcpy( PPIXEL(-i_padh, -y-1), PPIXEL(-i_padh, 0), i_width+2*i_padh );
 250     /* lower band */
 251     if( b_pad_bottom )
 252     for( y = 0; y < i_padv; y++ )
 253         memcpy( PPIXEL(-i_padh, i_height+y), PPIXEL(-i_padh, i_height-1), i_width+2*i_padh );
 254 #undef PPIXEL
 255 }
 256
 257 void x264_frame_expand_border( x264_t *h, x264_frame_t *frame, int mb_y, int b_end )
 258 {
 259     int i;
 260     int b_start = !mb_y;
 261     if( mb_y & h->sh.b_mbaff )
 262         return;
 263     for( i = 0; i < frame->i_plane; i++ )
 264     {
 265         int stride = frame->i_stride[i];
 266         int width = 16*h->sps->i_mb_width >> !!i;
 267         int height = (b_end ? 16*(h->sps->i_mb_height - mb_y) >> h->sh.b_mbaff : 16) >> !!i;
 268         int padh = PADH >> !!i;
 269         int padv = PADV >> !!i;
 270         // buffer: 2 chroma, 3 luma (rounded to 4) because deblocking goes beyond the top of the mb
 271         uint8_t *pix = frame->plane[i] + X264_MAX(0, (16*mb_y-4)*stride >> !!i);
 272         if( b_end && !b_start )
 273             height += 4 >> (!!i + h->sh.b_mbaff);
 274         if( h->sh.b_mbaff )
 275         {
 276             plane_expand_border( pix, stride*2, width, height, padh, padv, b_start, b_end );
 277             plane_expand_border( pix+stride, stride*2, width, height, padh, padv, b_start, b_end );
 278         }
 279         else
 280         {
 281             plane_expand_border( pix, stride, width, height, padh, padv, b_start, b_end );
 282         }
 283     }
 284 }
 285
 286 void x264_frame_expand_border_filtered( x264_t *h, x264_frame_t *frame, int mb_y, int b_end )
 287 {
 288     /* during filtering, 8 extra pixels were filtered on each edge,
 289      * but up to 3 of the horizontal ones may be wrong.
 290        we want to expand border from the last filtered pixel */
 291     int b_start = !mb_y;
 292     int stride = frame->i_stride[0];
 293     int width = 16*h->sps->i_mb_width + 8;
 294     int height = b_end ? (16*(h->sps->i_mb_height - mb_y) >> h->sh.b_mbaff) + 16 : 16;
 295     int padh = PADH - 4;
 296     int padv = PADV - 8;
 297     int i;
 298     for( i = 1; i < 4; i++ )
 299     {
 300         // buffer: 8 luma, to match the hpel filter
 301         uint8_t *pix = frame->filtered[i] + (16*mb_y - (8 << h->sh.b_mbaff)) * stride - 4;
 302         if( h->sh.b_mbaff )
 303         {
 304             plane_expand_border( pix, stride*2, width, height, padh, padv, b_start, b_end );
 305             plane_expand_border( pix+stride, stride*2, width, height, padh, padv, b_start, b_end );
 306         }
 307         else
 308         {
 309             plane_expand_border( pix, stride, width, height, padh, padv, b_start, b_end );
 310         }
 311     }
 312 }
 313
 314 void x264_frame_expand_border_lowres( x264_frame_t *frame )
 315 {
 316     int i;
 317     for( i = 0; i < 4; i++ )
 318         plane_expand_border( frame->lowres[i], frame->i_stride_lowres, frame->i_width_lowres, frame->i_lines_lowres, PADH, PADV, 1, 1 );
 319 }
 320
 321 void x264_frame_expand_border_mod16( x264_t *h, x264_frame_t *frame )
 322 {
 323     int i, y;
 324     for( i = 0; i < frame->i_plane; i++ )
 325     {
 326         int i_subsample = i ? 1 : 0;
 327         int i_width = h->param.i_width >> i_subsample;
 328         int i_height = h->param.i_height >> i_subsample;
 329         int i_padx = ( h->sps->i_mb_width * 16 - h->param.i_width ) >> i_subsample;
 330         int i_pady = ( h->sps->i_mb_height * 16 - h->param.i_height ) >> i_subsample;
 331
 332         if( i_padx )
 333         {
 334             for( y = 0; y < i_height; y++ )
 335                 memset( &frame->plane[i][y*frame->i_stride[i] + i_width],
 336                          frame->plane[i][y*frame->i_stride[i] + i_width - 1],
 337                          i_padx );
 338         }
 339         if( i_pady )
 340         {
 341             //FIXME interlace? or just let it pad using the wrong field
 342             for( y = i_height; y < i_height + i_pady; y++ )
 343                 memcpy( &frame->plane[i][y*frame->i_stride[i]],
 344                         &frame->plane[i][(i_height-1)*frame->i_stride[i]],
 345                         i_width + i_padx );
 346         }
 347     }
 348 }
 349
 350
 351 /* cavlc + 8x8 transform stores nnz per 16 coeffs for the purpose of
 352  * entropy coding, but per 64 coeffs for the purpose of deblocking */
 353 static void munge_cavlc_nnz_row( x264_t *h, int mb_y, uint8_t (*buf)[16] )
 354 {
 355     uint32_t (*src)[6] = (uint32_t(*)[6])h->mb.non_zero_count + mb_y * h->sps->i_mb_width;
 356     int8_t *transform = h->mb.mb_transform_size + mb_y * h->sps->i_mb_width;
 357     int x, nnz;
 358     for( x=0; x<h->sps->i_mb_width; x++ )
 359     {
 360         memcpy( buf+x, src+x, 16 );
 361         if( transform[x] )
 362         {
 363             nnz = src[x][0] | src[x][1];
 364             src[x][0] = src[x][1] = ((uint16_t)nnz ? 0x0101 : 0) + (nnz>>16 ? 0x01010000 : 0);
 365             nnz = src[x][2] | src[x][3];
 366             src[x][2] = src[x][3] = ((uint16_t)nnz ? 0x0101 : 0) + (nnz>>16 ? 0x01010000 : 0);
 367         }
 368     }
 369 }
 370
 371 static void restore_cavlc_nnz_row( x264_t *h, int mb_y, uint8_t (*buf)[16] )
 372 {
 373     uint8_t (*dst)[24] = h->mb.non_zero_count + mb_y * h->sps->i_mb_width;
 374     int x;
 375     for( x=0; x<h->sps->i_mb_width; x++ )
 376         memcpy( dst+x, buf+x, 16 );
 377 }
 378
 379 static void munge_cavlc_nnz( x264_t *h, int mb_y, uint8_t (*buf)[16], void (*func)(x264_t*, int, uint8_t (*)[16]) )
 380 {
 381     func( h, mb_y, buf );
 382     if( mb_y > 0 )
 383         func( h, mb_y-1, buf + h->sps->i_mb_width );
 384     if( h->sh.b_mbaff )
 385     {
 386         func( h, mb_y+1, buf + h->sps->i_mb_width * 2 );
 387         if( mb_y > 0 )
 388             func( h, mb_y-2, buf + h->sps->i_mb_width * 3 );
 389     }
 390 }
 391
 392
 393 /* Deblocking filter */
 394 static const uint8_t i_alpha_table[52+12*2] =
 395 {
 396      0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
 397      0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
 398      0,  0,  0,  0,  0,  0,  4,  4,  5,  6,
 399      7,  8,  9, 10, 12, 13, 15, 17, 20, 22,
 400     25, 28, 32, 36, 40, 45, 50, 56, 63, 71,
 401     80, 90,101,113,127,144,162,182,203,226,
 402    255,255,
 403    255,255,255,255,255,255,255,255,255,255,255,255,
 404 };
 405 static const uint8_t i_beta_table[52+12*2] =
 406 {
 407      0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
 408      0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
 409      0,  0,  0,  0,  0,  0,  2,  2,  2,  3,
 410      3,  3,  3,  4,  4,  4,  6,  6,  7,  7,
 411      8,  8,  9,  9, 10, 10, 11, 11, 12, 12,
 412     13, 13, 14, 14, 15, 15, 16, 16, 17, 17,
 413     18, 18,
 414     18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18,
 415 };
 416 static const int8_t i_tc0_table[52+12*2][4] =
 417 {
 418     {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 },
 419     {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 },
 420     {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 },
 421     {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 },
 422     {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 1 },
 423     {-1, 0, 0, 1 }, {-1, 0, 0, 1 }, {-1, 0, 0, 1 }, {-1, 0, 1, 1 }, {-1, 0, 1, 1 }, {-1, 1, 1, 1 },
 424     {-1, 1, 1, 1 }, {-1, 1, 1, 1 }, {-1, 1, 1, 1 }, {-1, 1, 1, 2 }, {-1, 1, 1, 2 }, {-1, 1, 1, 2 },
 425     {-1, 1, 1, 2 }, {-1, 1, 2, 3 }, {-1, 1, 2, 3 }, {-1, 2, 2, 3 }, {-1, 2, 2, 4 }, {-1, 2, 3, 4 },
 426     {-1, 2, 3, 4 }, {-1, 3, 3, 5 }, {-1, 3, 4, 6 }, {-1, 3, 4, 6 }, {-1, 4, 5, 7 }, {-1, 4, 5, 8 },
 427     {-1, 4, 6, 9 }, {-1, 5, 7,10 }, {-1, 6, 8,11 }, {-1, 6, 8,13 }, {-1, 7,10,14 }, {-1, 8,11,16 },
 428     {-1, 9,12,18 }, {-1,10,13,20 }, {-1,11,15,23 }, {-1,13,17,25 },
 429     {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 },
 430     {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 },
 431 };
 432 #define alpha_table(x) i_alpha_table[(x)+12]
 433 #define beta_table(x)  i_beta_table[(x)+12]
 434 #define tc0_table(x)   i_tc0_table[(x)+12]
 435
 436 /* From ffmpeg */
 437 static inline void deblock_luma_c( uint8_t *pix, int xstride, int ystride, int alpha, int beta, int8_t *tc0 )
 438 {
 439     int i, d;
 440     for( i = 0; i < 4; i++ )
 441     {
 442         if( tc0[i] < 0 )
 443         {
 444             pix += 4*ystride;
 445             continue;
 446         }
 447         for( d = 0; d < 4; d++ )
 448         {
 449             const int p2 = pix[-3*xstride];
 450             const int p1 = pix[-2*xstride];
 451             const int p0 = pix[-1*xstride];
 452             const int q0 = pix[ 0*xstride];
 453             const int q1 = pix[ 1*xstride];
 454             const int q2 = pix[ 2*xstride];
 455
 456             if( abs( p0 - q0 ) < alpha && abs( p1 - p0 ) < beta && abs( q1 - q0 ) < beta )
 457             {
 458                 int tc = tc0[i];
 459                 int delta;
 460                 if( abs( p2 - p0 ) < beta )
 461                 {
 462                     pix[-2*xstride] = p1 + x264_clip3( (( p2 + ((p0 + q0 + 1) >> 1)) >> 1) - p1, -tc0[i], tc0[i] );
 463                     tc++;
 464                 }
 465                 if( abs( q2 - q0 ) < beta )
 466                 {
 467                     pix[ 1*xstride] = q1 + x264_clip3( (( q2 + ((p0 + q0 + 1) >> 1)) >> 1) - q1, -tc0[i], tc0[i] );
 468                     tc++;
 469                 }
 470
 471                 delta = x264_clip3( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
 472                 pix[-1*xstride] = x264_clip_uint8( p0 + delta );    /* p0' */
 473                 pix[ 0*xstride] = x264_clip_uint8( q0 - delta );    /* q0' */
 474             }
 475             pix += ystride;
 476         }
 477     }
 478 }
 479 static void deblock_v_luma_c( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 )
 480 {
 481     deblock_luma_c( pix, stride, 1, alpha, beta, tc0 );
 482 }
 483 static void deblock_h_luma_c( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 )
 484 {
 485     deblock_luma_c( pix, 1, stride, alpha, beta, tc0 );
 486 }
 487
 488 static inline void deblock_chroma_c( uint8_t *pix, int xstride, int ystride, int alpha, int beta, int8_t *tc0 )
 489 {
 490     int i, d;
 491     for( i = 0; i < 4; i++ )
 492     {
 493         const int tc = tc0[i];
 494         if( tc <= 0 )
 495         {
 496             pix += 2*ystride;
 497             continue;
 498         }
 499         for( d = 0; d < 2; d++ )
 500         {
 501             const int p1 = pix[-2*xstride];
 502             const int p0 = pix[-1*xstride];
 503             const int q0 = pix[ 0*xstride];
 504             const int q1 = pix[ 1*xstride];
 505
 506             if( abs( p0 - q0 ) < alpha && abs( p1 - p0 ) < beta && abs( q1 - q0 ) < beta )
 507             {
 508                 int delta = x264_clip3( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
 509                 pix[-1*xstride] = x264_clip_uint8( p0 + delta );    /* p0' */
 510                 pix[ 0*xstride] = x264_clip_uint8( q0 - delta );    /* q0' */
 511             }
 512             pix += ystride;
 513         }
 514     }
 515 }
 516 static void deblock_v_chroma_c( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 )
 517 {
 518     deblock_chroma_c( pix, stride, 1, alpha, beta, tc0 );
 519 }
 520 static void deblock_h_chroma_c( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 )
 521 {
 522     deblock_chroma_c( pix, 1, stride, alpha, beta, tc0 );
 523 }
 524
 525 static inline void deblock_luma_intra_c( uint8_t *pix, int xstride, int ystride, int alpha, int beta )
 526 {
 527     int d;
 528     for( d = 0; d < 16; d++ )
 529     {
 530         const int p2 = pix[-3*xstride];
 531         const int p1 = pix[-2*xstride];
 532         const int p0 = pix[-1*xstride];
 533         const int q0 = pix[ 0*xstride];
 534         const int q1 = pix[ 1*xstride];
 535         const int q2 = pix[ 2*xstride];
 536
 537         if( abs( p0 - q0 ) < alpha && abs( p1 - p0 ) < beta && abs( q1 - q0 ) < beta )
 538         {
 539             if(abs( p0 - q0 ) < ((alpha >> 2) + 2) )
 540             {
 541                 if( abs( p2 - p0 ) < beta ) /* p0', p1', p2' */
 542                 {
 543                     const int p3 = pix[-4*xstride];
 544                     pix[-1*xstride] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
 545                     pix[-2*xstride] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
 546                     pix[-3*xstride] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
 547                 }
 548                 else /* p0' */
 549                     pix[-1*xstride] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
 550                 if( abs( q2 - q0 ) < beta ) /* q0', q1', q2' */
 551                 {
 552                     const int q3 = pix[3*xstride];
 553                     pix[0*xstride] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
 554                     pix[1*xstride] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
 555                     pix[2*xstride] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
 556                 }
 557                 else /* q0' */
 558                     pix[0*xstride] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
 559             }
 560             else /* p0', q0' */
 561             {
 562                 pix[-1*xstride] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
 563                 pix[ 0*xstride] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
 564             }
 565         }
 566         pix += ystride;
 567     }
 568 }
 569 static void deblock_v_luma_intra_c( uint8_t *pix, int stride, int alpha, int beta )
 570 {
 571     deblock_luma_intra_c( pix, stride, 1, alpha, beta );
 572 }
 573 static void deblock_h_luma_intra_c( uint8_t *pix, int stride, int alpha, int beta )
 574 {
 575     deblock_luma_intra_c( pix, 1, stride, alpha, beta );
 576 }
 577
 578 static inline void deblock_chroma_intra_c( uint8_t *pix, int xstride, int ystride, int alpha, int beta )
 579 {
 580     int d;
 581     for( d = 0; d < 8; d++ )
 582     {
 583         const int p1 = pix[-2*xstride];
 584         const int p0 = pix[-1*xstride];
 585         const int q0 = pix[ 0*xstride];
 586         const int q1 = pix[ 1*xstride];
 587
 588         if( abs( p0 - q0 ) < alpha && abs( p1 - p0 ) < beta && abs( q1 - q0 ) < beta )
 589         {
 590             pix[-1*xstride] = (2*p1 + p0 + q1 + 2) >> 2;   /* p0' */
 591             pix[ 0*xstride] = (2*q1 + q0 + p1 + 2) >> 2;   /* q0' */
 592         }
 593         pix += ystride;
 594     }
 595 }
 596 static void deblock_v_chroma_intra_c( uint8_t *pix, int stride, int alpha, int beta )
 597 {
 598     deblock_chroma_intra_c( pix, stride, 1, alpha, beta );
 599 }
 600 static void deblock_h_chroma_intra_c( uint8_t *pix, int stride, int alpha, int beta )
 601 {
 602     deblock_chroma_intra_c( pix, 1, stride, alpha, beta );
 603 }
 604
 605 static inline void deblock_edge( x264_t *h, uint8_t *pix1, uint8_t *pix2, int i_stride, uint8_t bS[4], int i_qp, int b_chroma, x264_deblock_inter_t pf_inter )
 606 {
 607     const int index_a = i_qp + h->sh.i_alpha_c0_offset;
 608     const int alpha = alpha_table(index_a);
 609     const int beta  = beta_table(i_qp + h->sh.i_beta_offset);
 610     int8_t tc[4];
 611
 612     if( !alpha || !beta )
 613         return;
 614
 615     tc[0] = tc0_table(index_a)[bS[0]] + b_chroma;
 616     tc[1] = tc0_table(index_a)[bS[1]] + b_chroma;
 617     tc[2] = tc0_table(index_a)[bS[2]] + b_chroma;
 618     tc[3] = tc0_table(index_a)[bS[3]] + b_chroma;
 619
 620     pf_inter( pix1, i_stride, alpha, beta, tc );
 621     if( b_chroma )
 622         pf_inter( pix2, i_stride, alpha, beta, tc );
 623 }
 624
 625 static inline void deblock_edge_intra( x264_t *h, uint8_t *pix1, uint8_t *pix2, int i_stride, uint8_t bS[4], int i_qp, int b_chroma, x264_deblock_intra_t pf_intra )
 626 {
 627     const int alpha = alpha_table(i_qp + h->sh.i_alpha_c0_offset);
 628     const int beta  = beta_table(i_qp + h->sh.i_beta_offset);
 629
 630     if( !alpha || !beta )
 631         return;
 632
 633     pf_intra( pix1, i_stride, alpha, beta );
 634     if( b_chroma )
 635         pf_intra( pix2, i_stride, alpha, beta );
 636 }
 637
 638 void x264_frame_deblock_row( x264_t *h, int mb_y )
 639 {
 640     const int s8x8 = 2 * h->mb.i_mb_stride;
 641     const int s4x4 = 4 * h->mb.i_mb_stride;
 642     const int b_interlaced = h->sh.b_mbaff;
 643     const int mvy_limit = 4 >> b_interlaced;
 644     const int qp_thresh = 15 - X264_MIN(h->sh.i_alpha_c0_offset, h->sh.i_beta_offset) - X264_MAX(0, h->param.analyse.i_chroma_qp_offset);
 645     const int no_sub8x8 = !(h->param.analyse.inter & X264_ANALYSE_PSUB8x8);
 646     int mb_x;
 647     int stridey   = h->fdec->i_stride[0];
 648     int stride2y  = stridey << b_interlaced;
 649     int strideuv  = h->fdec->i_stride[1];
 650     int stride2uv = strideuv << b_interlaced;
 651
 652     if( !h->pps->b_cabac && h->pps->b_transform_8x8_mode )
 653         munge_cavlc_nnz( h, mb_y, h->mb.nnz_backup, munge_cavlc_nnz_row );
 654
 655     for( mb_x = 0; mb_x < h->sps->i_mb_width; mb_x += (~b_interlaced | mb_y)&1, mb_y ^= b_interlaced )
 656     {
 657         const int mb_xy  = mb_y * h->mb.i_mb_stride + mb_x;
 658         const int mb_8x8 = 2 * s8x8 * mb_y + 2 * mb_x;
 659         const int mb_4x4 = 4 * s4x4 * mb_y + 4 * mb_x;
 660         const int b_8x8_transform = h->mb.mb_transform_size[mb_xy];
 661         const int i_qp = h->mb.qp[mb_xy];
 662         int i_edge_end = (h->mb.type[mb_xy] == P_SKIP) ? 1 : 4;
 663         uint8_t *pixy = h->fdec->plane[0] + 16*mb_y*stridey  + 16*mb_x;
 664         uint8_t *pixu = h->fdec->plane[1] +  8*mb_y*strideuv +  8*mb_x;
 665         uint8_t *pixv = h->fdec->plane[2] +  8*mb_y*strideuv +  8*mb_x;
 666         if( b_interlaced && (mb_y&1) )
 667         {
 668             pixy -= 15*stridey;
 669             pixu -=  7*strideuv;
 670             pixv -=  7*strideuv;
 671         }
 672
 673         x264_prefetch_fenc( h, h->fdec, mb_x, mb_y );
 674
 675         if( i_qp <= qp_thresh )
 676             i_edge_end = 1;
 677
 678         #define FILTER_DIR(intra, i_dir)\
 679         {\
 680             /* Y plane */\
 681             i_qpn= h->mb.qp[mbn_xy];\
 682             if( i_dir == 0 )\
 683             {\
 684                 /* vertical edge */\
 685                 deblock_edge##intra( h, pixy + 4*i_edge, NULL,\
 686                               stride2y, bS, (i_qp+i_qpn+1) >> 1, 0,\
 687                               h->loopf.deblock_h_luma##intra );\
 688                 if( !(i_edge & 1) )\
 689                 {\
 690                     /* U/V planes */\
 691                     int i_qpc = (h->chroma_qp_table[i_qp] + h->chroma_qp_table[i_qpn] + 1) >> 1;\
 692                     deblock_edge##intra( h, pixu + 2*i_edge, pixv + 2*i_edge,\
 693                                   stride2uv, bS, i_qpc, 1,\
 694                                   h->loopf.deblock_h_chroma##intra );\
 695                 }\
 696             }\
 697             else\
 698             {\
 699                 /* horizontal edge */\
 700                 deblock_edge##intra( h, pixy + 4*i_edge*stride2y, NULL,\
 701                               stride2y, bS, (i_qp+i_qpn+1) >> 1, 0,\
 702                               h->loopf.deblock_v_luma##intra );\
 703                 /* U/V planes */\
 704                 if( !(i_edge & 1) )\
 705                 {\
 706                     int i_qpc = (h->chroma_qp_table[i_qp] + h->chroma_qp_table[i_qpn] + 1) >> 1;\
 707                     deblock_edge##intra( h, pixu + 2*i_edge*stride2uv, pixv + 2*i_edge*stride2uv,\
 708                                   stride2uv, bS, i_qpc, 1,\
 709                                   h->loopf.deblock_v_chroma##intra );\
 710                 }\
 711             }\
 712         }
 713
 714         #define DEBLOCK_STRENGTH(i_dir)\
 715         {\
 716             /* *** Get bS for each 4px for the current edge *** */\
 717             if( IS_INTRA( h->mb.type[mb_xy] ) || IS_INTRA( h->mb.type[mbn_xy]) )\
 718                 *(uint32_t*)bS = 0x03030303;\
 719             else\
 720             {\
 721                 *(uint32_t*)bS = 0x00000000;\
 722                 for( i = 0; i < 4; i++ )\
 723                 {\
 724                     int x  = i_dir == 0 ? i_edge : i;\
 725                     int y  = i_dir == 0 ? i      : i_edge;\
 726                     int xn = i_dir == 0 ? (x - 1)&0x03 : x;\
 727                     int yn = i_dir == 0 ? y : (y - 1)&0x03;\
 728                     if( h->mb.non_zero_count[mb_xy][x+y*4] != 0 ||\
 729                         h->mb.non_zero_count[mbn_xy][xn+yn*4] != 0 )\
 730                         bS[i] = 2;\
 731                     else if(!(i_edge&no_sub8x8))\
 732                     {\
 733                         if((i&no_sub8x8) && bS[i-1] != 2)\
 734                             bS[i] = bS[i-1];\
 735                         else\
 736                         {\
 737                             /* FIXME: A given frame may occupy more than one position in\
 738                              * the reference list. So we should compare the frame numbers,\
 739                              * not the indices in the ref list.\
 740                              * No harm yet, as we don't generate that case.*/\
 741                             int i8p= mb_8x8+(x>>1)+(y>>1)*s8x8;\
 742                             int i8q= mbn_8x8+(xn>>1)+(yn>>1)*s8x8;\
 743                             int i4p= mb_4x4+x+y*s4x4;\
 744                             int i4q= mbn_4x4+xn+yn*s4x4;\
 745                             if((h->mb.ref[0][i8p] != h->mb.ref[0][i8q] ||\
 746                                 abs( h->mb.mv[0][i4p][0] - h->mb.mv[0][i4q][0] ) >= 4 ||\
 747                                 abs( h->mb.mv[0][i4p][1] - h->mb.mv[0][i4q][1] ) >= mvy_limit ) ||\
 748                                (h->sh.i_type == SLICE_TYPE_B &&\
 749                                (h->mb.ref[1][i8p] != h->mb.ref[1][i8q] ||\
 750                                 abs( h->mb.mv[1][i4p][0] - h->mb.mv[1][i4q][0] ) >= 4 ||\
 751                                 abs( h->mb.mv[1][i4p][1] - h->mb.mv[1][i4q][1] ) >= mvy_limit )))\
 752                             {\
 753                                 bS[i] = 1;\
 754                             }\
 755                         }\
 756                     }\
 757                 }\
 758             }\
 759         }
 760
 761         /* i_dir == 0 -> vertical edge
 762          * i_dir == 1 -> horizontal edge */
 763         #define DEBLOCK_DIR(i_dir)\
 764         {\
 765             int i_edge = (i_dir ? (mb_y <= b_interlaced) : (mb_x == 0));\
 766             int i_qpn, i, mbn_xy, mbn_8x8, mbn_4x4;\
 767             DECLARE_ALIGNED_4( uint8_t bS[4] );  /* filtering strength */\
 768             if( i_edge )\
 769                 i_edge+= b_8x8_transform;\
 770             else\
 771             {\
 772                 mbn_xy  = i_dir == 0 ? mb_xy  - 1 : mb_xy - h->mb.i_mb_stride;\
 773                 mbn_8x8 = i_dir == 0 ? mb_8x8 - 2 : mb_8x8 - 2 * s8x8;\
 774                 mbn_4x4 = i_dir == 0 ? mb_4x4 - 4 : mb_4x4 - 4 * s4x4;\
 775                 if( b_interlaced && i_dir == 1 )\
 776                 {\
 777                     mbn_xy -= h->mb.i_mb_stride;\
 778                     mbn_8x8 -= 2 * s8x8;\
 779                     mbn_4x4 -= 4 * s4x4;\
 780                 }\
 781                 else if( IS_INTRA( h->mb.type[mb_xy] ) || IS_INTRA( h->mb.type[mbn_xy]) )\
 782                 {\
 783                     FILTER_DIR( _intra, i_dir );\
 784                     goto end##i_dir;\
 785                 }\
 786                 DEBLOCK_STRENGTH(i_dir);\
 787                 if( *(uint32_t*)bS )\
 788                     FILTER_DIR( , i_dir);\
 789                 end##i_dir:\
 790                 i_edge += b_8x8_transform+1;\
 791             }\
 792             mbn_xy  = mb_xy;\
 793             mbn_8x8 = mb_8x8;\
 794             mbn_4x4 = mb_4x4;\
 795             for( ; i_edge < i_edge_end; i_edge+=b_8x8_transform+1 )\
 796             {\
 797                 DEBLOCK_STRENGTH(i_dir);\
 798                 if( *(uint32_t*)bS )\
 799                     FILTER_DIR( , i_dir);\
 800             }\
 801         }
 802
 803         DEBLOCK_DIR(0);
 804         DEBLOCK_DIR(1);
 805     }
 806
 807     if( !h->pps->b_cabac && h->pps->b_transform_8x8_mode )
 808         munge_cavlc_nnz( h, mb_y, h->mb.nnz_backup, restore_cavlc_nnz_row );
 809 }
 810
 811 void x264_frame_deblock( x264_t *h )
 812 {
 813     int mb_y;
 814     for( mb_y = 0; mb_y < h->sps->i_mb_height; mb_y += 1 + h->sh.b_mbaff )
 815         x264_frame_deblock_row( h, mb_y );
 816 }
 817
 818 #ifdef HAVE_MMX
 819 void x264_deblock_v_chroma_mmxext( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 );
 820 void x264_deblock_h_chroma_mmxext( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 );
 821 void x264_deblock_v_chroma_intra_mmxext( uint8_t *pix, int stride, int alpha, int beta );
 822 void x264_deblock_h_chroma_intra_mmxext( uint8_t *pix, int stride, int alpha, int beta );
 823
 824 void x264_deblock_v_luma_sse2( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 );
 825 void x264_deblock_h_luma_sse2( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 );
 826 void x264_deblock_v_luma_intra_sse2( uint8_t *pix, int stride, int alpha, int beta );
 827 void x264_deblock_h_luma_intra_sse2( uint8_t *pix, int stride, int alpha, int beta );
 828 #ifdef ARCH_X86
 829 void x264_deblock_h_luma_mmxext( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 );
 830 void x264_deblock_v8_luma_mmxext( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 );
 831 void x264_deblock_h_luma_intra_mmxext( uint8_t *pix, int stride, int alpha, int beta );
 832 void x264_deblock_v8_luma_intra_mmxext( uint8_t *pix, int stride, int alpha, int beta );
 833
 834 static void x264_deblock_v_luma_mmxext( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 )
 835 {
 836     x264_deblock_v8_luma_mmxext( pix,   stride, alpha, beta, tc0   );
 837     x264_deblock_v8_luma_mmxext( pix+8, stride, alpha, beta, tc0+2 );
 838 }
 839 static void x264_deblock_v_luma_intra_mmxext( uint8_t *pix, int stride, int alpha, int beta )
 840 {
 841     x264_deblock_v8_luma_intra_mmxext( pix,   stride, alpha, beta );
 842     x264_deblock_v8_luma_intra_mmxext( pix+8, stride, alpha, beta );
 843 }
 844 #endif
 845 #endif
 846
 847 #ifdef ARCH_PPC
 848 void x264_deblock_v_luma_altivec( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 );
 849 void x264_deblock_h_luma_altivec( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 );
 850 #endif // ARCH_PPC
 851
 852 void x264_deblock_init( int cpu, x264_deblock_function_t *pf )
 853 {
 854     pf->deblock_v_luma = deblock_v_luma_c;
 855     pf->deblock_h_luma = deblock_h_luma_c;
 856     pf->deblock_v_chroma = deblock_v_chroma_c;
 857     pf->deblock_h_chroma = deblock_h_chroma_c;
 858     pf->deblock_v_luma_intra = deblock_v_luma_intra_c;
 859     pf->deblock_h_luma_intra = deblock_h_luma_intra_c;
 860     pf->deblock_v_chroma_intra = deblock_v_chroma_intra_c;
 861     pf->deblock_h_chroma_intra = deblock_h_chroma_intra_c;
 862
 863 #ifdef HAVE_MMX
 864     if( cpu&X264_CPU_MMXEXT )
 865     {
 866         pf->deblock_v_chroma = x264_deblock_v_chroma_mmxext;
 867         pf->deblock_h_chroma = x264_deblock_h_chroma_mmxext;
 868         pf->deblock_v_chroma_intra = x264_deblock_v_chroma_intra_mmxext;
 869         pf->deblock_h_chroma_intra = x264_deblock_h_chroma_intra_mmxext;
 870 #ifdef ARCH_X86
 871         pf->deblock_v_luma = x264_deblock_v_luma_mmxext;
 872         pf->deblock_h_luma = x264_deblock_h_luma_mmxext;
 873         pf->deblock_v_luma_intra = x264_deblock_v_luma_intra_mmxext;
 874         pf->deblock_h_luma_intra = x264_deblock_h_luma_intra_mmxext;
 875 #endif
 876         if( (cpu&X264_CPU_SSE2) && !(cpu&X264_CPU_STACK_MOD4) )
 877         {
 878             pf->deblock_v_luma = x264_deblock_v_luma_sse2;
 879             pf->deblock_h_luma = x264_deblock_h_luma_sse2;
 880             pf->deblock_v_luma_intra = x264_deblock_v_luma_intra_sse2;
 881             pf->deblock_h_luma_intra = x264_deblock_h_luma_intra_sse2;
 882         }
 883     }
 884 #endif
 885
 886 #ifdef ARCH_PPC
 887     if( cpu&X264_CPU_ALTIVEC )
 888     {
 889         pf->deblock_v_luma = x264_deblock_v_luma_altivec;
 890         pf->deblock_h_luma = x264_deblock_h_luma_altivec;
 891    }
 892 #endif // ARCH_PPC
 893 }
 894
 895
 896 /* threading */
 897 void x264_frame_cond_broadcast( x264_frame_t *frame, int i_lines_completed )
 898 {
 899     x264_pthread_mutex_lock( &frame->mutex );
 900     frame->i_lines_completed = i_lines_completed;
 901     x264_pthread_cond_broadcast( &frame->cv );
 902     x264_pthread_mutex_unlock( &frame->mutex );
 903 }
 904
 905 void x264_frame_cond_wait( x264_frame_t *frame, int i_lines_completed )
 906 {
 907     x264_pthread_mutex_lock( &frame->mutex );
 908     while( frame->i_lines_completed < i_lines_completed )
 909         x264_pthread_cond_wait( &frame->cv, &frame->mutex );
 910     x264_pthread_mutex_unlock( &frame->mutex );
 911 }
 912
 913 /* list operators */
 914
 915 void x264_frame_push( x264_frame_t **list, x264_frame_t *frame )
 916 {
 917     int i = 0;
 918     while( list[i] ) i++;
 919     list[i] = frame;
 920 }
 921
 922 x264_frame_t *x264_frame_pop( x264_frame_t **list )
 923 {
 924     x264_frame_t *frame;
 925     int i = 0;
 926     assert( list[0] );
 927     while( list[i+1] ) i++;
 928     frame = list[i];
 929     list[i] = NULL;
 930     return frame;
 931 }
 932
 933 void x264_frame_unshift( x264_frame_t **list, x264_frame_t *frame )
 934 {
 935     int i = 0;
 936     while( list[i] ) i++;
 937     while( i-- )
 938         list[i+1] = list[i];
 939     list[0] = frame;
 940 }
 941
 942 x264_frame_t *x264_frame_shift( x264_frame_t **list )
 943 {
 944     x264_frame_t *frame = list[0];
 945     int i;
 946     for( i = 0; list[i]; i++ )
 947         list[i] = list[i+1];
 948     assert(frame);
 949     return frame;
 950 }
 951
 952 void x264_frame_push_unused( x264_t *h, x264_frame_t *frame )
 953 {
 954     assert( frame->i_reference_count > 0 );
 955     frame->i_reference_count--;
 956     if( frame->i_reference_count == 0 )
 957         x264_frame_push( h->frames.unused, frame );
 958     assert( h->frames.unused[ sizeof(h->frames.unused) / sizeof(*h->frames.unused) - 1 ] == NULL );
 959 }
 960
 961 x264_frame_t *x264_frame_pop_unused( x264_t *h )
 962 {
 963     x264_frame_t *frame;
 964     if( h->frames.unused[0] )
 965         frame = x264_frame_pop( h->frames.unused );
 966     else
 967         frame = x264_frame_new( h );
 968     if( !frame )
 969         return NULL;
 970     frame->i_reference_count = 1;
 971     frame->b_intra_calculated = 0;
 972     return frame;
 973 }
 974
 975 void x264_frame_sort( x264_frame_t **list, int b_dts )
 976 {
 977     int i, b_ok;
 978     do {
 979         b_ok = 1;
 980         for( i = 0; list[i+1]; i++ )
 981         {
 982             int dtype = list[i]->i_type - list[i+1]->i_type;
 983             int dtime = list[i]->i_frame - list[i+1]->i_frame;
 984             int swap = b_dts ? dtype > 0 || ( dtype == 0 && dtime > 0 )
 985                              : dtime > 0;
 986             if( swap )
 987             {
 988                 XCHG( x264_frame_t*, list[i], list[i+1] );
 989                 b_ok = 0;
 990             }
 991         }
 992     } while( !b_ok );
 993 }