1 /*****************************************************************************
2 * frame.c: h264 encoder library
3 *****************************************************************************
4 * Copyright (C) 2003 Laurent Aimar
5 * $Id: frame.c,v 1.1 2004/06/03 19:27:06 fenrir Exp $
7 * Authors: Laurent Aimar <fenrir@via.ecp.fr>
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to the Free Software
21 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA.
22 *****************************************************************************/
33 x264_frame_t *x264_frame_new( x264_t *h )
35 x264_frame_t *frame = x264_malloc( sizeof(x264_frame_t) );
38 int i_mb_count = h->mb.i_mb_count;
41 int i_padv = PADV << h->param.b_interlaced;
43 if( !frame ) return NULL;
45 memset( frame, 0, sizeof(x264_frame_t) );
47 /* allocate frame data (+64 for extra data for me) */
48 i_stride = ( ( h->param.i_width + 15 ) & -16 )+ 2*PADH;
49 i_lines = ( ( h->param.i_height + 15 ) & -16 );
50 if( h->param.b_interlaced )
51 i_lines = ( i_lines + 31 ) & -32;
54 for( i = 0; i < 3; i++ )
60 if( h->param.i_csp == X264_CSP_I420 )
62 else if( h->param.i_csp == X264_CSP_I422 )
65 frame->i_stride[i] = i_stride / i_divw;
66 frame->i_lines[i] = i_lines / i_divh;
67 CHECKED_MALLOC( frame->buffer[i],
68 frame->i_stride[i] * ( frame->i_lines[i] + 2*i_padv / i_divh ) );
70 frame->plane[i] = ((uint8_t*)frame->buffer[i]) +
71 frame->i_stride[i] * i_padv / i_divh + PADH / i_divw;
73 frame->i_stride[3] = 0;
74 frame->i_lines[3] = 0;
75 frame->buffer[3] = NULL;
76 frame->plane[3] = NULL;
78 frame->filtered[0] = frame->plane[0];
79 for( i = 0; i < 3; i++ )
81 CHECKED_MALLOC( frame->buffer[4+i],
82 frame->i_stride[0] * ( frame->i_lines[0] + 2*i_padv ) );
83 frame->filtered[i+1] = ((uint8_t*)frame->buffer[4+i]) +
84 frame->i_stride[0] * i_padv + PADH;
87 if( h->frames.b_have_lowres )
89 frame->i_stride_lowres = frame->i_stride[0]/2 + PADH;
90 frame->i_lines_lowres = frame->i_lines[0]/2;
91 for( i = 0; i < 4; i++ )
93 CHECKED_MALLOC( frame->buffer_lowres[i],
94 frame->i_stride_lowres * ( frame->i_lines[0]/2 + 2*i_padv ) );
95 frame->lowres[i] = ((uint8_t*)frame->buffer_lowres[i]) +
96 frame->i_stride_lowres * i_padv + PADH;
100 if( h->param.analyse.i_me_method == X264_ME_ESA )
102 CHECKED_MALLOC( frame->buffer[7],
103 2 * frame->i_stride[0] * (frame->i_lines[0] + 2*i_padv) * sizeof(uint16_t) );
104 frame->integral = (uint16_t*)frame->buffer[7] + frame->i_stride[0] * i_padv + PADH;
108 frame->i_type = X264_TYPE_AUTO;
109 frame->i_qpplus1 = 0;
112 frame->i_frame_num = -1;
113 frame->i_lines_completed = -1;
115 CHECKED_MALLOC( frame->mb_type, i_mb_count * sizeof(int8_t));
116 CHECKED_MALLOC( frame->mv[0], 2*16 * i_mb_count * sizeof(int16_t) );
117 CHECKED_MALLOC( frame->ref[0], 4 * i_mb_count * sizeof(int8_t) );
118 if( h->param.i_bframe )
120 CHECKED_MALLOC( frame->mv[1], 2*16 * i_mb_count * sizeof(int16_t) );
121 CHECKED_MALLOC( frame->ref[1], 4 * i_mb_count * sizeof(int8_t) );
126 frame->ref[1] = NULL;
129 CHECKED_MALLOC( frame->i_row_bits, i_lines/16 * sizeof(int) );
130 CHECKED_MALLOC( frame->i_row_qp, i_lines/16 * sizeof(int) );
131 for( i = 0; i < h->param.i_bframe + 2; i++ )
132 for( j = 0; j < h->param.i_bframe + 2; j++ )
133 CHECKED_MALLOC( frame->i_row_satds[i][j], i_lines/16 * sizeof(int) );
135 pthread_mutex_init( &frame->mutex, NULL );
136 pthread_cond_init( &frame->cv, NULL );
141 x264_frame_delete( frame );
145 void x264_frame_delete( x264_frame_t *frame )
148 for( i = 0; i < 8; i++ )
149 x264_free( frame->buffer[i] );
150 for( i = 0; i < 4; i++ )
151 x264_free( frame->buffer_lowres[i] );
152 for( i = 0; i < X264_BFRAME_MAX+2; i++ )
153 for( j = 0; j < X264_BFRAME_MAX+2; j++ )
154 x264_free( frame->i_row_satds[i][j] );
155 x264_free( frame->i_row_bits );
156 x264_free( frame->i_row_qp );
157 x264_free( frame->mb_type );
158 x264_free( frame->mv[0] );
159 x264_free( frame->mv[1] );
160 x264_free( frame->ref[0] );
161 x264_free( frame->ref[1] );
162 pthread_mutex_destroy( &frame->mutex );
163 pthread_cond_destroy( &frame->cv );
167 void x264_frame_copy_picture( x264_t *h, x264_frame_t *dst, x264_picture_t *src )
169 int i_csp = src->img.i_csp & X264_CSP_MASK;
170 dst->i_type = src->i_type;
171 dst->i_qpplus1 = src->i_qpplus1;
172 dst->i_pts = src->i_pts;
174 if( i_csp <= X264_CSP_NONE || i_csp >= X264_CSP_MAX )
175 x264_log( h, X264_LOG_ERROR, "Arg invalid CSP\n" );
177 h->csp.convert[i_csp]( &h->mc, dst, &src->img, h->param.i_width, h->param.i_height );
182 static void plane_expand_border( uint8_t *pix, int i_stride, int i_width, int i_height, int i_padh, int i_padv, int b_pad_top, int b_pad_bottom )
184 #define PPIXEL(x, y) ( pix + (x) + (y)*i_stride )
186 for( y = 0; y < i_height; y++ )
189 memset( PPIXEL(-i_padh, y), PPIXEL(0, y)[0], i_padh );
191 memset( PPIXEL(i_width, y), PPIXEL(i_width-1, y)[0], i_padh );
195 for( y = 0; y < i_padv; y++ )
196 memcpy( PPIXEL(-i_padh, -y-1), PPIXEL(-i_padh, 0), i_width+2*i_padh );
199 for( y = 0; y < i_padv; y++ )
200 memcpy( PPIXEL(-i_padh, i_height+y), PPIXEL(-i_padh, i_height-1), i_width+2*i_padh );
204 void x264_frame_expand_border( x264_t *h, x264_frame_t *frame, int mb_y, int b_end )
208 if( mb_y & h->sh.b_mbaff )
210 for( i = 0; i < frame->i_plane; i++ )
212 int stride = frame->i_stride[i];
213 int width = 16*h->sps->i_mb_width >> !!i;
214 int height = (b_end ? 16*(h->sps->i_mb_height - mb_y) >> h->sh.b_mbaff : 16) >> !!i;
215 int padh = PADH >> !!i;
216 int padv = PADV >> !!i;
217 // buffer: 2 chroma, 3 luma (rounded to 4) because deblocking goes beyond the top of the mb
218 uint8_t *pix = frame->plane[i] + X264_MAX(0, (16*mb_y-4)*stride >> !!i);
219 if( b_end && !b_start )
220 height += 4 >> (!!i + h->sh.b_mbaff);
223 plane_expand_border( pix, stride*2, width, height, padh, padv, b_start, b_end );
224 plane_expand_border( pix+stride, stride*2, width, height, padh, padv, b_start, b_end );
228 plane_expand_border( pix, stride, width, height, padh, padv, b_start, b_end );
233 void x264_frame_expand_border_filtered( x264_t *h, x264_frame_t *frame, int mb_y, int b_end )
235 /* during filtering, 8 extra pixels were filtered on each edge.
236 we want to expand border from the last filtered pixel */
238 int stride = frame->i_stride[0];
239 int width = 16*h->sps->i_mb_width + 16;
240 int height = b_end ? (16*(h->sps->i_mb_height - mb_y) >> h->sh.b_mbaff) + 16 : 16;
244 for( i = 1; i < 4; i++ )
246 // buffer: 8 luma, to match the hpel filter
247 uint8_t *pix = frame->filtered[i] + (16*mb_y - (8 << h->sh.b_mbaff)) * stride - 8;
250 plane_expand_border( pix, stride*2, width, height, padh, padv, b_start, b_end );
251 plane_expand_border( pix+stride, stride*2, width, height, padh, padv, b_start, b_end );
255 plane_expand_border( pix, stride, width, height, padh, padv, b_start, b_end );
260 void x264_frame_expand_border_lowres( x264_frame_t *frame )
263 for( i = 0; i < 4; i++ )
264 plane_expand_border( frame->lowres[i], frame->i_stride_lowres, frame->i_stride_lowres - 2*PADH, frame->i_lines_lowres, PADH, PADV, 1, 1 );
267 void x264_frame_expand_border_mod16( x264_t *h, x264_frame_t *frame )
270 for( i = 0; i < frame->i_plane; i++ )
272 int i_subsample = i ? 1 : 0;
273 int i_width = h->param.i_width >> i_subsample;
274 int i_height = h->param.i_height >> i_subsample;
275 int i_padx = ( h->sps->i_mb_width * 16 - h->param.i_width ) >> i_subsample;
276 int i_pady = ( h->sps->i_mb_height * 16 - h->param.i_height ) >> i_subsample;
280 for( y = 0; y < i_height; y++ )
281 memset( &frame->plane[i][y*frame->i_stride[i] + i_width],
282 frame->plane[i][y*frame->i_stride[i] + i_width - 1],
287 //FIXME interlace? or just let it pad using the wrong field
288 for( y = i_height; y < i_height + i_pady; y++ )
289 memcpy( &frame->plane[i][y*frame->i_stride[i]],
290 &frame->plane[i][(i_height-1)*frame->i_stride[i]],
297 /* Deblocking filter */
299 static const int i_alpha_table[52] =
301 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
302 0, 0, 0, 0, 0, 0, 4, 4, 5, 6,
303 7, 8, 9, 10, 12, 13, 15, 17, 20, 22,
304 25, 28, 32, 36, 40, 45, 50, 56, 63, 71,
305 80, 90,101,113,127,144,162,182,203,226,
308 static const int i_beta_table[52] =
310 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
311 0, 0, 0, 0, 0, 0, 2, 2, 2, 3,
312 3, 3, 3, 4, 4, 4, 6, 6, 7, 7,
313 8, 8, 9, 9, 10, 10, 11, 11, 12, 12,
314 13, 13, 14, 14, 15, 15, 16, 16, 17, 17,
317 static const int i_tc0_table[52][3] =
319 { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 },
320 { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 },
321 { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 1 },
322 { 0, 0, 1 }, { 0, 0, 1 }, { 0, 0, 1 }, { 0, 1, 1 }, { 0, 1, 1 }, { 1, 1, 1 },
323 { 1, 1, 1 }, { 1, 1, 1 }, { 1, 1, 1 }, { 1, 1, 2 }, { 1, 1, 2 }, { 1, 1, 2 },
324 { 1, 1, 2 }, { 1, 2, 3 }, { 1, 2, 3 }, { 2, 2, 3 }, { 2, 2, 4 }, { 2, 3, 4 },
325 { 2, 3, 4 }, { 3, 3, 5 }, { 3, 4, 6 }, { 3, 4, 6 }, { 4, 5, 7 }, { 4, 5, 8 },
326 { 4, 6, 9 }, { 5, 7,10 }, { 6, 8,11 }, { 6, 8,13 }, { 7,10,14 }, { 8,11,16 },
327 { 9,12,18 }, {10,13,20 }, {11,15,23 }, {13,17,25 }
331 static inline int clip_uint8( int a )
339 static inline void deblock_luma_c( uint8_t *pix, int xstride, int ystride, int alpha, int beta, int8_t *tc0 )
342 for( i = 0; i < 4; i++ ) {
347 for( d = 0; d < 4; d++ ) {
348 const int p2 = pix[-3*xstride];
349 const int p1 = pix[-2*xstride];
350 const int p0 = pix[-1*xstride];
351 const int q0 = pix[ 0*xstride];
352 const int q1 = pix[ 1*xstride];
353 const int q2 = pix[ 2*xstride];
355 if( abs( p0 - q0 ) < alpha &&
356 abs( p1 - p0 ) < beta &&
357 abs( q1 - q0 ) < beta ) {
362 if( abs( p2 - p0 ) < beta ) {
363 pix[-2*xstride] = p1 + x264_clip3( (( p2 + ((p0 + q0 + 1) >> 1)) >> 1) - p1, -tc0[i], tc0[i] );
366 if( abs( q2 - q0 ) < beta ) {
367 pix[ 1*xstride] = q1 + x264_clip3( (( q2 + ((p0 + q0 + 1) >> 1)) >> 1) - q1, -tc0[i], tc0[i] );
371 delta = x264_clip3( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
372 pix[-1*xstride] = clip_uint8( p0 + delta ); /* p0' */
373 pix[ 0*xstride] = clip_uint8( q0 - delta ); /* q0' */
379 static void deblock_v_luma_c( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 )
381 deblock_luma_c( pix, stride, 1, alpha, beta, tc0 );
383 static void deblock_h_luma_c( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 )
385 deblock_luma_c( pix, 1, stride, alpha, beta, tc0 );
388 static inline void deblock_chroma_c( uint8_t *pix, int xstride, int ystride, int alpha, int beta, int8_t *tc0 )
391 for( i = 0; i < 4; i++ ) {
392 const int tc = tc0[i];
397 for( d = 0; d < 2; d++ ) {
398 const int p1 = pix[-2*xstride];
399 const int p0 = pix[-1*xstride];
400 const int q0 = pix[ 0*xstride];
401 const int q1 = pix[ 1*xstride];
403 if( abs( p0 - q0 ) < alpha &&
404 abs( p1 - p0 ) < beta &&
405 abs( q1 - q0 ) < beta ) {
407 int delta = x264_clip3( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
408 pix[-1*xstride] = clip_uint8( p0 + delta ); /* p0' */
409 pix[ 0*xstride] = clip_uint8( q0 - delta ); /* q0' */
415 static void deblock_v_chroma_c( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 )
417 deblock_chroma_c( pix, stride, 1, alpha, beta, tc0 );
419 static void deblock_h_chroma_c( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 )
421 deblock_chroma_c( pix, 1, stride, alpha, beta, tc0 );
424 static inline void deblock_luma_intra_c( uint8_t *pix, int xstride, int ystride, int alpha, int beta )
427 for( d = 0; d < 16; d++ ) {
428 const int p2 = pix[-3*xstride];
429 const int p1 = pix[-2*xstride];
430 const int p0 = pix[-1*xstride];
431 const int q0 = pix[ 0*xstride];
432 const int q1 = pix[ 1*xstride];
433 const int q2 = pix[ 2*xstride];
435 if( abs( p0 - q0 ) < alpha &&
436 abs( p1 - p0 ) < beta &&
437 abs( q1 - q0 ) < beta ) {
439 if(abs( p0 - q0 ) < ((alpha >> 2) + 2) ){
440 if( abs( p2 - p0 ) < beta)
442 const int p3 = pix[-4*xstride];
444 pix[-1*xstride] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
445 pix[-2*xstride] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
446 pix[-3*xstride] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
449 pix[-1*xstride] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
451 if( abs( q2 - q0 ) < beta)
453 const int q3 = pix[3*xstride];
455 pix[0*xstride] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
456 pix[1*xstride] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
457 pix[2*xstride] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
460 pix[0*xstride] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
464 pix[-1*xstride] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
465 pix[ 0*xstride] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
471 static void deblock_v_luma_intra_c( uint8_t *pix, int stride, int alpha, int beta )
473 deblock_luma_intra_c( pix, stride, 1, alpha, beta );
475 static void deblock_h_luma_intra_c( uint8_t *pix, int stride, int alpha, int beta )
477 deblock_luma_intra_c( pix, 1, stride, alpha, beta );
480 static inline void deblock_chroma_intra_c( uint8_t *pix, int xstride, int ystride, int alpha, int beta )
483 for( d = 0; d < 8; d++ ) {
484 const int p1 = pix[-2*xstride];
485 const int p0 = pix[-1*xstride];
486 const int q0 = pix[ 0*xstride];
487 const int q1 = pix[ 1*xstride];
489 if( abs( p0 - q0 ) < alpha &&
490 abs( p1 - p0 ) < beta &&
491 abs( q1 - q0 ) < beta ) {
493 pix[-1*xstride] = (2*p1 + p0 + q1 + 2) >> 2; /* p0' */
494 pix[ 0*xstride] = (2*q1 + q0 + p1 + 2) >> 2; /* q0' */
500 static void deblock_v_chroma_intra_c( uint8_t *pix, int stride, int alpha, int beta )
502 deblock_chroma_intra_c( pix, stride, 1, alpha, beta );
504 static void deblock_h_chroma_intra_c( uint8_t *pix, int stride, int alpha, int beta )
506 deblock_chroma_intra_c( pix, 1, stride, alpha, beta );
509 static inline void deblock_edge( x264_t *h, uint8_t *pix, int i_stride, int bS[4], int i_qp, int b_chroma,
510 x264_deblock_inter_t pf_inter, x264_deblock_intra_t pf_intra )
513 const int index_a = x264_clip3( i_qp + h->sh.i_alpha_c0_offset, 0, 51 );
514 const int alpha = i_alpha_table[index_a];
515 const int beta = i_beta_table[x264_clip3( i_qp + h->sh.i_beta_offset, 0, 51 )];
520 tc[i] = (bS[i] ? i_tc0_table[index_a][bS[i] - 1] : -1) + b_chroma;
521 pf_inter( pix, i_stride, alpha, beta, tc );
523 pf_intra( pix, i_stride, alpha, beta );
527 void x264_frame_deblock_row( x264_t *h, int mb_y )
529 const int s8x8 = 2 * h->mb.i_mb_stride;
530 const int s4x4 = 4 * h->mb.i_mb_stride;
531 const int b_interlaced = h->sh.b_mbaff;
532 const int mvy_limit = 4 >> b_interlaced;
535 int i_stride2[3] = { h->fdec->i_stride[0] << b_interlaced,
536 h->fdec->i_stride[1] << b_interlaced,
537 h->fdec->i_stride[2] << b_interlaced };
539 for( mb_x = 0; mb_x < h->sps->i_mb_width; )
541 const int mb_xy = mb_y * h->mb.i_mb_stride + mb_x;
542 const int mb_8x8 = 2 * s8x8 * mb_y + 2 * mb_x;
543 const int mb_4x4 = 4 * s4x4 * mb_y + 4 * mb_x;
544 const int b_8x8_transform = h->mb.mb_transform_size[mb_xy];
545 const int i_edge_end = (h->mb.type[mb_xy] == P_SKIP) ? 1 : 4;
548 int i_pix_y[3] = { 16*mb_y*h->fdec->i_stride[0] + 16*mb_x,
549 8*mb_y*h->fdec->i_stride[1] + 8*mb_x,
550 8*mb_y*h->fdec->i_stride[2] + 8*mb_x };
551 if( b_interlaced && (mb_y&1) )
553 i_pix_y[0] -= 15*h->fdec->i_stride[0];
554 i_pix_y[1] -= 7*h->fdec->i_stride[1];
555 i_pix_y[2] -= 7*h->fdec->i_stride[2];
558 x264_prefetch_fenc( h, h->fdec, mb_x, mb_y );
560 /* cavlc + 8x8 transform stores nnz per 16 coeffs for the purpose of
561 * entropy coding, but per 64 coeffs for the purpose of deblocking */
562 if( !h->param.b_cabac && b_8x8_transform )
564 uint32_t *nnz = (uint32_t*)h->mb.non_zero_count[mb_xy];
565 if( nnz[0] ) nnz[0] = 0x01010101;
566 if( nnz[1] ) nnz[1] = 0x01010101;
567 if( nnz[2] ) nnz[2] = 0x01010101;
568 if( nnz[3] ) nnz[3] = 0x01010101;
571 /* i_dir == 0 -> vertical edge
572 * i_dir == 1 -> horizontal edge */
573 for( i_dir = 0; i_dir < 2; i_dir++ )
575 int i_start = (i_dir ? (mb_y <= b_interlaced) : (mb_x == 0));
578 for( i_edge = i_start; i_edge < i_edge_end; i_edge++ )
580 int mbn_xy, mbn_8x8, mbn_4x4;
581 int bS[4]; /* filtering strength */
583 if( b_8x8_transform && (i_edge&1) )
586 mbn_xy = i_edge > 0 ? mb_xy : ( i_dir == 0 ? mb_xy - 1 : mb_xy - h->mb.i_mb_stride );
587 mbn_8x8 = i_edge > 0 ? mb_8x8 : ( i_dir == 0 ? mb_8x8 - 2 : mb_8x8 - 2 * s8x8 );
588 mbn_4x4 = i_edge > 0 ? mb_4x4 : ( i_dir == 0 ? mb_4x4 - 4 : mb_4x4 - 4 * s4x4 );
590 if( b_interlaced && i_edge == 0 && i_dir == 1 )
592 mbn_xy -= h->mb.i_mb_stride;
597 /* *** Get bS for each 4px for the current edge *** */
598 if( IS_INTRA( h->mb.type[mb_xy] ) || IS_INTRA( h->mb.type[mbn_xy] ) )
600 bS[0] = bS[1] = bS[2] = bS[3] = ( i_edge == 0 && !(b_interlaced && i_dir) ? 4 : 3 );
605 for( i = 0; i < 4; i++ )
607 int x = i_dir == 0 ? i_edge : i;
608 int y = i_dir == 0 ? i : i_edge;
609 int xn = (x - (i_dir == 0 ? 1 : 0 ))&0x03;
610 int yn = (y - (i_dir == 0 ? 0 : 1 ))&0x03;
612 if( h->mb.non_zero_count[mb_xy][block_idx_xy[x][y]] != 0 ||
613 h->mb.non_zero_count[mbn_xy][block_idx_xy[xn][yn]] != 0 )
619 /* FIXME: A given frame may occupy more than one position in
620 * the reference list. So we should compare the frame numbers,
621 * not the indices in the ref list.
622 * No harm yet, as we don't generate that case.*/
624 int i8p= mb_8x8+(x/2)+(y/2)*s8x8;
625 int i8q= mbn_8x8+(xn/2)+(yn/2)*s8x8;
626 int i4p= mb_4x4+x+y*s4x4;
627 int i4q= mbn_4x4+xn+yn*s4x4;
632 for( l = 0; l < 1 + (h->sh.i_type == SLICE_TYPE_B); l++ )
634 if( h->mb.ref[l][i8p] != h->mb.ref[l][i8q] ||
635 abs( h->mb.mv[l][i4p][0] - h->mb.mv[l][i4q][0] ) >= 4 ||
636 abs( h->mb.mv[l][i4p][1] - h->mb.mv[l][i4q][1] ) >= mvy_limit )
648 i_qp = h->mb.qp[mb_xy];
649 i_qpn= h->mb.qp[mbn_xy];
654 deblock_edge( h, &h->fdec->plane[0][i_pix_y[0] + 4*i_edge],
655 i_stride2[0], bS, (i_qp+i_qpn+1) >> 1, 0,
656 h->loopf.deblock_h_luma, h->loopf.deblock_h_luma_intra );
660 int i_qpc = ( i_chroma_qp_table[x264_clip3( i_qp + h->pps->i_chroma_qp_index_offset, 0, 51 )] +
661 i_chroma_qp_table[x264_clip3( i_qpn + h->pps->i_chroma_qp_index_offset, 0, 51 )] + 1 ) >> 1;
662 deblock_edge( h, &h->fdec->plane[1][i_pix_y[1] + 2*i_edge],
663 i_stride2[1], bS, i_qpc, 1,
664 h->loopf.deblock_h_chroma, h->loopf.deblock_h_chroma_intra );
665 deblock_edge( h, &h->fdec->plane[2][i_pix_y[2] + 2*i_edge],
666 i_stride2[2], bS, i_qpc, 1,
667 h->loopf.deblock_h_chroma, h->loopf.deblock_h_chroma_intra );
672 /* horizontal edge */
673 deblock_edge( h, &h->fdec->plane[0][i_pix_y[0] + 4*i_edge*i_stride2[0]],
674 i_stride2[0], bS, (i_qp+i_qpn+1) >> 1, 0,
675 h->loopf.deblock_v_luma, h->loopf.deblock_v_luma_intra );
679 int i_qpc = ( i_chroma_qp_table[x264_clip3( i_qp + h->pps->i_chroma_qp_index_offset, 0, 51 )] +
680 i_chroma_qp_table[x264_clip3( i_qpn + h->pps->i_chroma_qp_index_offset, 0, 51 )] + 1 ) >> 1;
681 deblock_edge( h, &h->fdec->plane[1][i_pix_y[1] + 2*i_edge*i_stride2[1]],
682 i_stride2[1], bS, i_qpc, 1,
683 h->loopf.deblock_v_chroma, h->loopf.deblock_v_chroma_intra );
684 deblock_edge( h, &h->fdec->plane[2][i_pix_y[2] + 2*i_edge*i_stride2[2]],
685 i_stride2[2], bS, i_qpc, 1,
686 h->loopf.deblock_v_chroma, h->loopf.deblock_v_chroma_intra );
693 if( !b_interlaced || (mb_y&1) )
695 mb_y ^= b_interlaced;
699 void x264_frame_deblock( x264_t *h )
702 for( mb_y = 0; mb_y < h->sps->i_mb_height; mb_y += 1 + h->sh.b_mbaff )
703 x264_frame_deblock_row( h, mb_y );
707 void x264_deblock_v_chroma_mmxext( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 );
708 void x264_deblock_h_chroma_mmxext( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 );
709 void x264_deblock_v_chroma_intra_mmxext( uint8_t *pix, int stride, int alpha, int beta );
710 void x264_deblock_h_chroma_intra_mmxext( uint8_t *pix, int stride, int alpha, int beta );
713 void x264_deblock_v_luma_sse2( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 );
714 void x264_deblock_h_luma_sse2( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 );
716 void x264_deblock_h_luma_mmxext( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 );
717 void x264_deblock_v8_luma_mmxext( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 );
719 void x264_deblock_v_luma_mmxext( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 )
721 x264_deblock_v8_luma_mmxext( pix, stride, alpha, beta, tc0 );
722 x264_deblock_v8_luma_mmxext( pix+8, stride, alpha, beta, tc0+2 );
727 void x264_deblock_init( int cpu, x264_deblock_function_t *pf )
729 pf->deblock_v_luma = deblock_v_luma_c;
730 pf->deblock_h_luma = deblock_h_luma_c;
731 pf->deblock_v_chroma = deblock_v_chroma_c;
732 pf->deblock_h_chroma = deblock_h_chroma_c;
733 pf->deblock_v_luma_intra = deblock_v_luma_intra_c;
734 pf->deblock_h_luma_intra = deblock_h_luma_intra_c;
735 pf->deblock_v_chroma_intra = deblock_v_chroma_intra_c;
736 pf->deblock_h_chroma_intra = deblock_h_chroma_intra_c;
739 if( cpu&X264_CPU_MMXEXT )
741 pf->deblock_v_chroma = x264_deblock_v_chroma_mmxext;
742 pf->deblock_h_chroma = x264_deblock_h_chroma_mmxext;
743 pf->deblock_v_chroma_intra = x264_deblock_v_chroma_intra_mmxext;
744 pf->deblock_h_chroma_intra = x264_deblock_h_chroma_intra_mmxext;
747 if( cpu&X264_CPU_SSE2 )
749 pf->deblock_v_luma = x264_deblock_v_luma_sse2;
750 pf->deblock_h_luma = x264_deblock_h_luma_sse2;
753 pf->deblock_v_luma = x264_deblock_v_luma_mmxext;
754 pf->deblock_h_luma = x264_deblock_h_luma_mmxext;
764 void x264_frame_cond_broadcast( x264_frame_t *frame, int i_lines_completed )
766 pthread_mutex_lock( &frame->mutex );
767 frame->i_lines_completed = i_lines_completed;
768 pthread_cond_broadcast( &frame->cv );
769 pthread_mutex_unlock( &frame->mutex );
772 void x264_frame_cond_wait( x264_frame_t *frame, int i_lines_completed )
774 pthread_mutex_lock( &frame->mutex );
775 while( frame->i_lines_completed < i_lines_completed )
776 pthread_cond_wait( &frame->cv, &frame->mutex );
777 pthread_mutex_unlock( &frame->mutex );
781 void x264_frame_cond_broadcast( x264_frame_t *frame, int i_lines_completed )
783 void x264_frame_cond_wait( x264_frame_t *frame, int i_lines_completed )
790 void x264_frame_push( x264_frame_t **list, x264_frame_t *frame )
793 while( list[i] ) i++;
797 x264_frame_t *x264_frame_pop( x264_frame_t **list )
802 while( list[i+1] ) i++;
808 void x264_frame_unshift( x264_frame_t **list, x264_frame_t *frame )
811 while( list[i] ) i++;
817 x264_frame_t *x264_frame_shift( x264_frame_t **list )
819 x264_frame_t *frame = list[0];
821 for( i = 0; list[i]; i++ )
827 void x264_frame_push_unused( x264_t *h, x264_frame_t *frame )
829 assert( frame->i_reference_count > 0 );
830 frame->i_reference_count--;
831 if( frame->i_reference_count == 0 )
832 x264_frame_push( h->frames.unused, frame );
833 assert( h->frames.unused[ sizeof(h->frames.unused) / sizeof(*h->frames.unused) - 1 ] == NULL );
836 x264_frame_t *x264_frame_pop_unused( x264_t *h )
839 if( h->frames.unused[0] )
840 frame = x264_frame_pop( h->frames.unused );
842 frame = x264_frame_new( h );
843 assert( frame->i_reference_count == 0 );
844 frame->i_reference_count = 1;
848 void x264_frame_sort( x264_frame_t **list, int b_dts )
853 for( i = 0; list[i+1]; i++ )
855 int dtype = list[i]->i_type - list[i+1]->i_type;
856 int dtime = list[i]->i_frame - list[i+1]->i_frame;
857 int swap = b_dts ? dtype > 0 || ( dtype == 0 && dtime > 0 )
861 XCHG( x264_frame_t*, list[i], list[i+1] );