1 /*****************************************************************************
2 * frame.c: h264 encoder library
3 *****************************************************************************
4 * Copyright (C) 2003 Laurent Aimar
5 * $Id: frame.c,v 1.1 2004/06/03 19:27:06 fenrir Exp $
7 * Authors: Laurent Aimar <fenrir@via.ecp.fr>
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to the Free Software
21 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA.
22 *****************************************************************************/
29 x264_frame_t *x264_frame_new( x264_t *h )
31 x264_frame_t *frame = x264_malloc( sizeof(x264_frame_t) );
34 int i_mb_count = h->mb.i_mb_count;
35 int i_stride, i_width, i_lines;
36 int i_padv = PADV << h->param.b_interlaced;
38 if( !frame ) return NULL;
40 memset( frame, 0, sizeof(x264_frame_t) );
42 /* allocate frame data (+64 for extra data for me) */
43 i_width = ( ( h->param.i_width + 15 ) & -16 );
44 i_stride = i_width + 2*PADH;
45 i_lines = ( ( h->param.i_height + 15 ) & -16 );
46 if( h->param.b_interlaced )
47 i_lines = ( i_lines + 31 ) & -32;
50 for( i = 0; i < 3; i++ )
56 if( h->param.i_csp == X264_CSP_I420 )
58 else if( h->param.i_csp == X264_CSP_I422 )
61 frame->i_stride[i] = i_stride / i_divw;
62 frame->i_width[i] = i_width / i_divw;
63 frame->i_lines[i] = i_lines / i_divh;
64 CHECKED_MALLOC( frame->buffer[i],
65 frame->i_stride[i] * ( frame->i_lines[i] + 2*i_padv / i_divh ) );
67 frame->plane[i] = ((uint8_t*)frame->buffer[i]) +
68 frame->i_stride[i] * i_padv / i_divh + PADH / i_divw;
71 frame->filtered[0] = frame->plane[0];
72 for( i = 0; i < 3; i++ )
74 CHECKED_MALLOC( frame->buffer[4+i],
75 frame->i_stride[0] * ( frame->i_lines[0] + 2*i_padv ) );
76 frame->filtered[i+1] = ((uint8_t*)frame->buffer[4+i]) +
77 frame->i_stride[0] * i_padv + PADH;
80 if( h->frames.b_have_lowres )
82 frame->i_width_lowres = frame->i_width[0]/2;
83 frame->i_stride_lowres = frame->i_width_lowres + 2*PADH;
84 frame->i_lines_lowres = frame->i_lines[0]/2;
85 for( i = 0; i < 4; i++ )
87 CHECKED_MALLOC( frame->buffer_lowres[i],
88 frame->i_stride_lowres * ( frame->i_lines[0]/2 + 2*i_padv ) );
89 frame->lowres[i] = ((uint8_t*)frame->buffer_lowres[i]) +
90 frame->i_stride_lowres * i_padv + PADH;
94 if( h->param.analyse.i_me_method == X264_ME_ESA )
96 CHECKED_MALLOC( frame->buffer[7],
97 2 * frame->i_stride[0] * (frame->i_lines[0] + 2*i_padv) * sizeof(uint16_t) );
98 frame->integral = (uint16_t*)frame->buffer[7] + frame->i_stride[0] * i_padv + PADH;
102 frame->i_type = X264_TYPE_AUTO;
103 frame->i_qpplus1 = 0;
106 frame->i_frame_num = -1;
107 frame->i_lines_completed = -1;
109 CHECKED_MALLOC( frame->mb_type, i_mb_count * sizeof(int8_t));
110 CHECKED_MALLOC( frame->mv[0], 2*16 * i_mb_count * sizeof(int16_t) );
111 CHECKED_MALLOC( frame->ref[0], 4 * i_mb_count * sizeof(int8_t) );
112 if( h->param.i_bframe )
114 CHECKED_MALLOC( frame->mv[1], 2*16 * i_mb_count * sizeof(int16_t) );
115 CHECKED_MALLOC( frame->ref[1], 4 * i_mb_count * sizeof(int8_t) );
120 frame->ref[1] = NULL;
123 CHECKED_MALLOC( frame->i_row_bits, i_lines/16 * sizeof(int) );
124 CHECKED_MALLOC( frame->i_row_qp, i_lines/16 * sizeof(int) );
125 for( i = 0; i < h->param.i_bframe + 2; i++ )
126 for( j = 0; j < h->param.i_bframe + 2; j++ )
127 CHECKED_MALLOC( frame->i_row_satds[i][j], i_lines/16 * sizeof(int) );
129 x264_pthread_mutex_init( &frame->mutex, NULL );
130 x264_pthread_cond_init( &frame->cv, NULL );
135 x264_frame_delete( frame );
139 void x264_frame_delete( x264_frame_t *frame )
142 for( i = 0; i < 8; i++ )
143 x264_free( frame->buffer[i] );
144 for( i = 0; i < 4; i++ )
145 x264_free( frame->buffer_lowres[i] );
146 for( i = 0; i < X264_BFRAME_MAX+2; i++ )
147 for( j = 0; j < X264_BFRAME_MAX+2; j++ )
148 x264_free( frame->i_row_satds[i][j] );
149 x264_free( frame->i_row_bits );
150 x264_free( frame->i_row_qp );
151 x264_free( frame->mb_type );
152 x264_free( frame->mv[0] );
153 x264_free( frame->mv[1] );
154 x264_free( frame->ref[0] );
155 x264_free( frame->ref[1] );
156 x264_pthread_mutex_destroy( &frame->mutex );
157 x264_pthread_cond_destroy( &frame->cv );
161 void x264_frame_copy_picture( x264_t *h, x264_frame_t *dst, x264_picture_t *src )
163 int i_csp = src->img.i_csp & X264_CSP_MASK;
164 dst->i_type = src->i_type;
165 dst->i_qpplus1 = src->i_qpplus1;
166 dst->i_pts = src->i_pts;
168 if( i_csp <= X264_CSP_NONE || i_csp >= X264_CSP_MAX )
169 x264_log( h, X264_LOG_ERROR, "Arg invalid CSP\n" );
171 h->csp.convert[i_csp]( &h->mc, dst, &src->img, h->param.i_width, h->param.i_height );
176 static void plane_expand_border( uint8_t *pix, int i_stride, int i_width, int i_height, int i_padh, int i_padv, int b_pad_top, int b_pad_bottom )
178 #define PPIXEL(x, y) ( pix + (x) + (y)*i_stride )
180 for( y = 0; y < i_height; y++ )
183 memset( PPIXEL(-i_padh, y), PPIXEL(0, y)[0], i_padh );
185 memset( PPIXEL(i_width, y), PPIXEL(i_width-1, y)[0], i_padh );
189 for( y = 0; y < i_padv; y++ )
190 memcpy( PPIXEL(-i_padh, -y-1), PPIXEL(-i_padh, 0), i_width+2*i_padh );
193 for( y = 0; y < i_padv; y++ )
194 memcpy( PPIXEL(-i_padh, i_height+y), PPIXEL(-i_padh, i_height-1), i_width+2*i_padh );
198 void x264_frame_expand_border( x264_t *h, x264_frame_t *frame, int mb_y, int b_end )
202 if( mb_y & h->sh.b_mbaff )
204 for( i = 0; i < frame->i_plane; i++ )
206 int stride = frame->i_stride[i];
207 int width = 16*h->sps->i_mb_width >> !!i;
208 int height = (b_end ? 16*(h->sps->i_mb_height - mb_y) >> h->sh.b_mbaff : 16) >> !!i;
209 int padh = PADH >> !!i;
210 int padv = PADV >> !!i;
211 // buffer: 2 chroma, 3 luma (rounded to 4) because deblocking goes beyond the top of the mb
212 uint8_t *pix = frame->plane[i] + X264_MAX(0, (16*mb_y-4)*stride >> !!i);
213 if( b_end && !b_start )
214 height += 4 >> (!!i + h->sh.b_mbaff);
217 plane_expand_border( pix, stride*2, width, height, padh, padv, b_start, b_end );
218 plane_expand_border( pix+stride, stride*2, width, height, padh, padv, b_start, b_end );
222 plane_expand_border( pix, stride, width, height, padh, padv, b_start, b_end );
227 void x264_frame_expand_border_filtered( x264_t *h, x264_frame_t *frame, int mb_y, int b_end )
229 /* during filtering, 8 extra pixels were filtered on each edge.
230 we want to expand border from the last filtered pixel */
232 int stride = frame->i_stride[0];
233 int width = 16*h->sps->i_mb_width + 16;
234 int height = b_end ? (16*(h->sps->i_mb_height - mb_y) >> h->sh.b_mbaff) + 16 : 16;
238 for( i = 1; i < 4; i++ )
240 // buffer: 8 luma, to match the hpel filter
241 uint8_t *pix = frame->filtered[i] + (16*mb_y - (8 << h->sh.b_mbaff)) * stride - 8;
244 plane_expand_border( pix, stride*2, width, height, padh, padv, b_start, b_end );
245 plane_expand_border( pix+stride, stride*2, width, height, padh, padv, b_start, b_end );
249 plane_expand_border( pix, stride, width, height, padh, padv, b_start, b_end );
254 void x264_frame_expand_border_lowres( x264_frame_t *frame )
257 for( i = 0; i < 4; i++ )
258 plane_expand_border( frame->lowres[i], frame->i_stride_lowres, frame->i_stride_lowres - 2*PADH, frame->i_lines_lowres, PADH, PADV, 1, 1 );
261 void x264_frame_expand_border_mod16( x264_t *h, x264_frame_t *frame )
264 for( i = 0; i < frame->i_plane; i++ )
266 int i_subsample = i ? 1 : 0;
267 int i_width = h->param.i_width >> i_subsample;
268 int i_height = h->param.i_height >> i_subsample;
269 int i_padx = ( h->sps->i_mb_width * 16 - h->param.i_width ) >> i_subsample;
270 int i_pady = ( h->sps->i_mb_height * 16 - h->param.i_height ) >> i_subsample;
274 for( y = 0; y < i_height; y++ )
275 memset( &frame->plane[i][y*frame->i_stride[i] + i_width],
276 frame->plane[i][y*frame->i_stride[i] + i_width - 1],
281 //FIXME interlace? or just let it pad using the wrong field
282 for( y = i_height; y < i_height + i_pady; y++ )
283 memcpy( &frame->plane[i][y*frame->i_stride[i]],
284 &frame->plane[i][(i_height-1)*frame->i_stride[i]],
291 /* cavlc + 8x8 transform stores nnz per 16 coeffs for the purpose of
292 * entropy coding, but per 64 coeffs for the purpose of deblocking */
293 void munge_cavlc_nnz_row( x264_t *h, int mb_y, uint8_t (*buf)[16] )
295 uint32_t (*src)[6] = (uint32_t(*)[6])h->mb.non_zero_count + mb_y * h->sps->i_mb_width;
296 int8_t *transform = h->mb.mb_transform_size + mb_y * h->sps->i_mb_width;
298 for( x=0; x<h->sps->i_mb_width; x++ )
300 memcpy( buf+x, src+x, 16 );
303 if( src[x][0] ) src[x][0] = 0x01010101;
304 if( src[x][1] ) src[x][1] = 0x01010101;
305 if( src[x][2] ) src[x][2] = 0x01010101;
306 if( src[x][3] ) src[x][3] = 0x01010101;
311 static void restore_cavlc_nnz_row( x264_t *h, int mb_y, uint8_t (*buf)[16] )
313 uint8_t (*dst)[24] = h->mb.non_zero_count + mb_y * h->sps->i_mb_width;
315 for( x=0; x<h->sps->i_mb_width; x++ )
316 memcpy( dst+x, buf+x, 16 );
319 static void munge_cavlc_nnz( x264_t *h, int mb_y, uint8_t (*buf)[16], void (*func)(x264_t*, int, uint8_t (*)[16]) )
321 func( h, mb_y, buf );
323 func( h, mb_y-1, buf + h->sps->i_mb_width );
326 func( h, mb_y+1, buf + h->sps->i_mb_width * 2 );
328 func( h, mb_y-2, buf + h->sps->i_mb_width * 3 );
333 /* Deblocking filter */
335 static const int i_alpha_table[52] =
337 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
338 0, 0, 0, 0, 0, 0, 4, 4, 5, 6,
339 7, 8, 9, 10, 12, 13, 15, 17, 20, 22,
340 25, 28, 32, 36, 40, 45, 50, 56, 63, 71,
341 80, 90,101,113,127,144,162,182,203,226,
344 static const int i_beta_table[52] =
346 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
347 0, 0, 0, 0, 0, 0, 2, 2, 2, 3,
348 3, 3, 3, 4, 4, 4, 6, 6, 7, 7,
349 8, 8, 9, 9, 10, 10, 11, 11, 12, 12,
350 13, 13, 14, 14, 15, 15, 16, 16, 17, 17,
353 static const int i_tc0_table[52][3] =
355 { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 },
356 { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 },
357 { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 1 },
358 { 0, 0, 1 }, { 0, 0, 1 }, { 0, 0, 1 }, { 0, 1, 1 }, { 0, 1, 1 }, { 1, 1, 1 },
359 { 1, 1, 1 }, { 1, 1, 1 }, { 1, 1, 1 }, { 1, 1, 2 }, { 1, 1, 2 }, { 1, 1, 2 },
360 { 1, 1, 2 }, { 1, 2, 3 }, { 1, 2, 3 }, { 2, 2, 3 }, { 2, 2, 4 }, { 2, 3, 4 },
361 { 2, 3, 4 }, { 3, 3, 5 }, { 3, 4, 6 }, { 3, 4, 6 }, { 4, 5, 7 }, { 4, 5, 8 },
362 { 4, 6, 9 }, { 5, 7,10 }, { 6, 8,11 }, { 6, 8,13 }, { 7,10,14 }, { 8,11,16 },
363 { 9,12,18 }, {10,13,20 }, {11,15,23 }, {13,17,25 }
367 static inline int clip_uint8( int a )
375 static inline void deblock_luma_c( uint8_t *pix, int xstride, int ystride, int alpha, int beta, int8_t *tc0 )
378 for( i = 0; i < 4; i++ ) {
383 for( d = 0; d < 4; d++ ) {
384 const int p2 = pix[-3*xstride];
385 const int p1 = pix[-2*xstride];
386 const int p0 = pix[-1*xstride];
387 const int q0 = pix[ 0*xstride];
388 const int q1 = pix[ 1*xstride];
389 const int q2 = pix[ 2*xstride];
391 if( abs( p0 - q0 ) < alpha &&
392 abs( p1 - p0 ) < beta &&
393 abs( q1 - q0 ) < beta ) {
398 if( abs( p2 - p0 ) < beta ) {
399 pix[-2*xstride] = p1 + x264_clip3( (( p2 + ((p0 + q0 + 1) >> 1)) >> 1) - p1, -tc0[i], tc0[i] );
402 if( abs( q2 - q0 ) < beta ) {
403 pix[ 1*xstride] = q1 + x264_clip3( (( q2 + ((p0 + q0 + 1) >> 1)) >> 1) - q1, -tc0[i], tc0[i] );
407 delta = x264_clip3( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
408 pix[-1*xstride] = clip_uint8( p0 + delta ); /* p0' */
409 pix[ 0*xstride] = clip_uint8( q0 - delta ); /* q0' */
415 static void deblock_v_luma_c( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 )
417 deblock_luma_c( pix, stride, 1, alpha, beta, tc0 );
419 static void deblock_h_luma_c( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 )
421 deblock_luma_c( pix, 1, stride, alpha, beta, tc0 );
424 static inline void deblock_chroma_c( uint8_t *pix, int xstride, int ystride, int alpha, int beta, int8_t *tc0 )
427 for( i = 0; i < 4; i++ ) {
428 const int tc = tc0[i];
433 for( d = 0; d < 2; d++ ) {
434 const int p1 = pix[-2*xstride];
435 const int p0 = pix[-1*xstride];
436 const int q0 = pix[ 0*xstride];
437 const int q1 = pix[ 1*xstride];
439 if( abs( p0 - q0 ) < alpha &&
440 abs( p1 - p0 ) < beta &&
441 abs( q1 - q0 ) < beta ) {
443 int delta = x264_clip3( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
444 pix[-1*xstride] = clip_uint8( p0 + delta ); /* p0' */
445 pix[ 0*xstride] = clip_uint8( q0 - delta ); /* q0' */
451 static void deblock_v_chroma_c( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 )
453 deblock_chroma_c( pix, stride, 1, alpha, beta, tc0 );
455 static void deblock_h_chroma_c( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 )
457 deblock_chroma_c( pix, 1, stride, alpha, beta, tc0 );
460 static inline void deblock_luma_intra_c( uint8_t *pix, int xstride, int ystride, int alpha, int beta )
463 for( d = 0; d < 16; d++ ) {
464 const int p2 = pix[-3*xstride];
465 const int p1 = pix[-2*xstride];
466 const int p0 = pix[-1*xstride];
467 const int q0 = pix[ 0*xstride];
468 const int q1 = pix[ 1*xstride];
469 const int q2 = pix[ 2*xstride];
471 if( abs( p0 - q0 ) < alpha &&
472 abs( p1 - p0 ) < beta &&
473 abs( q1 - q0 ) < beta ) {
475 if(abs( p0 - q0 ) < ((alpha >> 2) + 2) ){
476 if( abs( p2 - p0 ) < beta)
478 const int p3 = pix[-4*xstride];
480 pix[-1*xstride] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
481 pix[-2*xstride] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
482 pix[-3*xstride] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
485 pix[-1*xstride] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
487 if( abs( q2 - q0 ) < beta)
489 const int q3 = pix[3*xstride];
491 pix[0*xstride] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
492 pix[1*xstride] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
493 pix[2*xstride] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
496 pix[0*xstride] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
500 pix[-1*xstride] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
501 pix[ 0*xstride] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
507 static void deblock_v_luma_intra_c( uint8_t *pix, int stride, int alpha, int beta )
509 deblock_luma_intra_c( pix, stride, 1, alpha, beta );
511 static void deblock_h_luma_intra_c( uint8_t *pix, int stride, int alpha, int beta )
513 deblock_luma_intra_c( pix, 1, stride, alpha, beta );
516 static inline void deblock_chroma_intra_c( uint8_t *pix, int xstride, int ystride, int alpha, int beta )
519 for( d = 0; d < 8; d++ ) {
520 const int p1 = pix[-2*xstride];
521 const int p0 = pix[-1*xstride];
522 const int q0 = pix[ 0*xstride];
523 const int q1 = pix[ 1*xstride];
525 if( abs( p0 - q0 ) < alpha &&
526 abs( p1 - p0 ) < beta &&
527 abs( q1 - q0 ) < beta ) {
529 pix[-1*xstride] = (2*p1 + p0 + q1 + 2) >> 2; /* p0' */
530 pix[ 0*xstride] = (2*q1 + q0 + p1 + 2) >> 2; /* q0' */
536 static void deblock_v_chroma_intra_c( uint8_t *pix, int stride, int alpha, int beta )
538 deblock_chroma_intra_c( pix, stride, 1, alpha, beta );
540 static void deblock_h_chroma_intra_c( uint8_t *pix, int stride, int alpha, int beta )
542 deblock_chroma_intra_c( pix, 1, stride, alpha, beta );
545 static inline void deblock_edge( x264_t *h, uint8_t *pix, int i_stride, int bS[4], int i_qp, int b_chroma,
546 x264_deblock_inter_t pf_inter, x264_deblock_intra_t pf_intra )
549 const int index_a = x264_clip3( i_qp + h->sh.i_alpha_c0_offset, 0, 51 );
550 const int alpha = i_alpha_table[index_a];
551 const int beta = i_beta_table[x264_clip3( i_qp + h->sh.i_beta_offset, 0, 51 )];
556 tc[i] = (bS[i] ? i_tc0_table[index_a][bS[i] - 1] : -1) + b_chroma;
557 pf_inter( pix, i_stride, alpha, beta, tc );
559 pf_intra( pix, i_stride, alpha, beta );
563 void x264_frame_deblock_row( x264_t *h, int mb_y )
565 const int s8x8 = 2 * h->mb.i_mb_stride;
566 const int s4x4 = 4 * h->mb.i_mb_stride;
567 const int b_interlaced = h->sh.b_mbaff;
568 const int mvy_limit = 4 >> b_interlaced;
571 int i_stride2[3] = { h->fdec->i_stride[0] << b_interlaced,
572 h->fdec->i_stride[1] << b_interlaced,
573 h->fdec->i_stride[2] << b_interlaced };
575 if( !h->pps->b_cabac && h->pps->b_transform_8x8_mode )
576 munge_cavlc_nnz( h, mb_y, h->mb.nnz_backup, munge_cavlc_nnz_row );
578 for( mb_x = 0; mb_x < h->sps->i_mb_width; )
580 const int mb_xy = mb_y * h->mb.i_mb_stride + mb_x;
581 const int mb_8x8 = 2 * s8x8 * mb_y + 2 * mb_x;
582 const int mb_4x4 = 4 * s4x4 * mb_y + 4 * mb_x;
583 const int b_8x8_transform = h->mb.mb_transform_size[mb_xy];
584 const int i_edge_end = (h->mb.type[mb_xy] == P_SKIP) ? 1 : 4;
587 int i_pix_y[3] = { 16*mb_y*h->fdec->i_stride[0] + 16*mb_x,
588 8*mb_y*h->fdec->i_stride[1] + 8*mb_x,
589 8*mb_y*h->fdec->i_stride[2] + 8*mb_x };
590 if( b_interlaced && (mb_y&1) )
592 i_pix_y[0] -= 15*h->fdec->i_stride[0];
593 i_pix_y[1] -= 7*h->fdec->i_stride[1];
594 i_pix_y[2] -= 7*h->fdec->i_stride[2];
597 x264_prefetch_fenc( h, h->fdec, mb_x, mb_y );
599 /* i_dir == 0 -> vertical edge
600 * i_dir == 1 -> horizontal edge */
601 for( i_dir = 0; i_dir < 2; i_dir++ )
603 int i_start = (i_dir ? (mb_y <= b_interlaced) : (mb_x == 0));
606 for( i_edge = i_start; i_edge < i_edge_end; i_edge++ )
608 int mbn_xy, mbn_8x8, mbn_4x4;
609 int bS[4]; /* filtering strength */
611 if( b_8x8_transform && (i_edge&1) )
614 mbn_xy = i_edge > 0 ? mb_xy : ( i_dir == 0 ? mb_xy - 1 : mb_xy - h->mb.i_mb_stride );
615 mbn_8x8 = i_edge > 0 ? mb_8x8 : ( i_dir == 0 ? mb_8x8 - 2 : mb_8x8 - 2 * s8x8 );
616 mbn_4x4 = i_edge > 0 ? mb_4x4 : ( i_dir == 0 ? mb_4x4 - 4 : mb_4x4 - 4 * s4x4 );
618 if( b_interlaced && i_edge == 0 && i_dir == 1 )
620 mbn_xy -= h->mb.i_mb_stride;
625 /* *** Get bS for each 4px for the current edge *** */
626 if( IS_INTRA( h->mb.type[mb_xy] ) || IS_INTRA( h->mb.type[mbn_xy] ) )
628 bS[0] = bS[1] = bS[2] = bS[3] = ( i_edge == 0 && !(b_interlaced && i_dir) ? 4 : 3 );
633 for( i = 0; i < 4; i++ )
635 int x = i_dir == 0 ? i_edge : i;
636 int y = i_dir == 0 ? i : i_edge;
637 int xn = (x - (i_dir == 0 ? 1 : 0 ))&0x03;
638 int yn = (y - (i_dir == 0 ? 0 : 1 ))&0x03;
640 if( h->mb.non_zero_count[mb_xy][block_idx_xy[x][y]] != 0 ||
641 h->mb.non_zero_count[mbn_xy][block_idx_xy[xn][yn]] != 0 )
647 /* FIXME: A given frame may occupy more than one position in
648 * the reference list. So we should compare the frame numbers,
649 * not the indices in the ref list.
650 * No harm yet, as we don't generate that case.*/
652 int i8p= mb_8x8+(x/2)+(y/2)*s8x8;
653 int i8q= mbn_8x8+(xn/2)+(yn/2)*s8x8;
654 int i4p= mb_4x4+x+y*s4x4;
655 int i4q= mbn_4x4+xn+yn*s4x4;
660 for( l = 0; l < 1 + (h->sh.i_type == SLICE_TYPE_B); l++ )
662 if( h->mb.ref[l][i8p] != h->mb.ref[l][i8q] ||
663 abs( h->mb.mv[l][i4p][0] - h->mb.mv[l][i4q][0] ) >= 4 ||
664 abs( h->mb.mv[l][i4p][1] - h->mb.mv[l][i4q][1] ) >= mvy_limit )
676 i_qp = h->mb.qp[mb_xy];
677 i_qpn= h->mb.qp[mbn_xy];
682 deblock_edge( h, &h->fdec->plane[0][i_pix_y[0] + 4*i_edge],
683 i_stride2[0], bS, (i_qp+i_qpn+1) >> 1, 0,
684 h->loopf.deblock_h_luma, h->loopf.deblock_h_luma_intra );
688 int i_qpc = ( i_chroma_qp_table[x264_clip3( i_qp + h->pps->i_chroma_qp_index_offset, 0, 51 )] +
689 i_chroma_qp_table[x264_clip3( i_qpn + h->pps->i_chroma_qp_index_offset, 0, 51 )] + 1 ) >> 1;
690 deblock_edge( h, &h->fdec->plane[1][i_pix_y[1] + 2*i_edge],
691 i_stride2[1], bS, i_qpc, 1,
692 h->loopf.deblock_h_chroma, h->loopf.deblock_h_chroma_intra );
693 deblock_edge( h, &h->fdec->plane[2][i_pix_y[2] + 2*i_edge],
694 i_stride2[2], bS, i_qpc, 1,
695 h->loopf.deblock_h_chroma, h->loopf.deblock_h_chroma_intra );
700 /* horizontal edge */
701 deblock_edge( h, &h->fdec->plane[0][i_pix_y[0] + 4*i_edge*i_stride2[0]],
702 i_stride2[0], bS, (i_qp+i_qpn+1) >> 1, 0,
703 h->loopf.deblock_v_luma, h->loopf.deblock_v_luma_intra );
707 int i_qpc = ( i_chroma_qp_table[x264_clip3( i_qp + h->pps->i_chroma_qp_index_offset, 0, 51 )] +
708 i_chroma_qp_table[x264_clip3( i_qpn + h->pps->i_chroma_qp_index_offset, 0, 51 )] + 1 ) >> 1;
709 deblock_edge( h, &h->fdec->plane[1][i_pix_y[1] + 2*i_edge*i_stride2[1]],
710 i_stride2[1], bS, i_qpc, 1,
711 h->loopf.deblock_v_chroma, h->loopf.deblock_v_chroma_intra );
712 deblock_edge( h, &h->fdec->plane[2][i_pix_y[2] + 2*i_edge*i_stride2[2]],
713 i_stride2[2], bS, i_qpc, 1,
714 h->loopf.deblock_v_chroma, h->loopf.deblock_v_chroma_intra );
721 if( !b_interlaced || (mb_y&1) )
723 mb_y ^= b_interlaced;
726 if( !h->pps->b_cabac && h->pps->b_transform_8x8_mode )
727 munge_cavlc_nnz( h, mb_y, h->mb.nnz_backup, restore_cavlc_nnz_row );
730 void x264_frame_deblock( x264_t *h )
733 for( mb_y = 0; mb_y < h->sps->i_mb_height; mb_y += 1 + h->sh.b_mbaff )
734 x264_frame_deblock_row( h, mb_y );
738 void x264_deblock_v_chroma_mmxext( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 );
739 void x264_deblock_h_chroma_mmxext( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 );
740 void x264_deblock_v_chroma_intra_mmxext( uint8_t *pix, int stride, int alpha, int beta );
741 void x264_deblock_h_chroma_intra_mmxext( uint8_t *pix, int stride, int alpha, int beta );
744 void x264_deblock_v_luma_sse2( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 );
745 void x264_deblock_h_luma_sse2( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 );
747 void x264_deblock_h_luma_mmxext( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 );
748 void x264_deblock_v8_luma_mmxext( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 );
750 void x264_deblock_v_luma_mmxext( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 )
752 x264_deblock_v8_luma_mmxext( pix, stride, alpha, beta, tc0 );
753 x264_deblock_v8_luma_mmxext( pix+8, stride, alpha, beta, tc0+2 );
759 void x264_deblock_v_luma_altivec( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 );
760 void x264_deblock_h_luma_altivec( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 );
763 void x264_deblock_init( int cpu, x264_deblock_function_t *pf )
765 pf->deblock_v_luma = deblock_v_luma_c;
766 pf->deblock_h_luma = deblock_h_luma_c;
767 pf->deblock_v_chroma = deblock_v_chroma_c;
768 pf->deblock_h_chroma = deblock_h_chroma_c;
769 pf->deblock_v_luma_intra = deblock_v_luma_intra_c;
770 pf->deblock_h_luma_intra = deblock_h_luma_intra_c;
771 pf->deblock_v_chroma_intra = deblock_v_chroma_intra_c;
772 pf->deblock_h_chroma_intra = deblock_h_chroma_intra_c;
775 if( cpu&X264_CPU_MMXEXT )
777 pf->deblock_v_chroma = x264_deblock_v_chroma_mmxext;
778 pf->deblock_h_chroma = x264_deblock_h_chroma_mmxext;
779 pf->deblock_v_chroma_intra = x264_deblock_v_chroma_intra_mmxext;
780 pf->deblock_h_chroma_intra = x264_deblock_h_chroma_intra_mmxext;
783 if( cpu&X264_CPU_SSE2 )
785 pf->deblock_v_luma = x264_deblock_v_luma_sse2;
786 pf->deblock_h_luma = x264_deblock_h_luma_sse2;
789 pf->deblock_v_luma = x264_deblock_v_luma_mmxext;
790 pf->deblock_h_luma = x264_deblock_h_luma_mmxext;
796 if( cpu&X264_CPU_ALTIVEC )
798 pf->deblock_v_luma = x264_deblock_v_luma_altivec;
799 pf->deblock_h_luma = x264_deblock_h_luma_altivec;
808 void x264_frame_cond_broadcast( x264_frame_t *frame, int i_lines_completed )
810 x264_pthread_mutex_lock( &frame->mutex );
811 frame->i_lines_completed = i_lines_completed;
812 x264_pthread_cond_broadcast( &frame->cv );
813 x264_pthread_mutex_unlock( &frame->mutex );
816 void x264_frame_cond_wait( x264_frame_t *frame, int i_lines_completed )
818 x264_pthread_mutex_lock( &frame->mutex );
819 while( frame->i_lines_completed < i_lines_completed )
820 x264_pthread_cond_wait( &frame->cv, &frame->mutex );
821 x264_pthread_mutex_unlock( &frame->mutex );
825 void x264_frame_cond_broadcast( x264_frame_t *frame, int i_lines_completed )
827 void x264_frame_cond_wait( x264_frame_t *frame, int i_lines_completed )
834 void x264_frame_push( x264_frame_t **list, x264_frame_t *frame )
837 while( list[i] ) i++;
841 x264_frame_t *x264_frame_pop( x264_frame_t **list )
846 while( list[i+1] ) i++;
852 void x264_frame_unshift( x264_frame_t **list, x264_frame_t *frame )
855 while( list[i] ) i++;
861 x264_frame_t *x264_frame_shift( x264_frame_t **list )
863 x264_frame_t *frame = list[0];
865 for( i = 0; list[i]; i++ )
871 void x264_frame_push_unused( x264_t *h, x264_frame_t *frame )
873 assert( frame->i_reference_count > 0 );
874 frame->i_reference_count--;
875 if( frame->i_reference_count == 0 )
876 x264_frame_push( h->frames.unused, frame );
877 assert( h->frames.unused[ sizeof(h->frames.unused) / sizeof(*h->frames.unused) - 1 ] == NULL );
880 x264_frame_t *x264_frame_pop_unused( x264_t *h )
883 if( h->frames.unused[0] )
884 frame = x264_frame_pop( h->frames.unused );
886 frame = x264_frame_new( h );
887 assert( frame->i_reference_count == 0 );
888 frame->i_reference_count = 1;
892 void x264_frame_sort( x264_frame_t **list, int b_dts )
897 for( i = 0; list[i+1]; i++ )
899 int dtype = list[i]->i_type - list[i+1]->i_type;
900 int dtime = list[i]->i_frame - list[i+1]->i_frame;
901 int swap = b_dts ? dtype > 0 || ( dtype == 0 && dtime > 0 )
905 XCHG( x264_frame_t*, list[i], list[i+1] );