1 /*****************************************************************************
2 * frame.c: h264 encoder library
3 *****************************************************************************
4 * Copyright (C) 2003 Laurent Aimar
5 * $Id: frame.c,v 1.1 2004/06/03 19:27:06 fenrir Exp $
7 * Authors: Laurent Aimar <fenrir@via.ecp.fr>
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to the Free Software
21 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA.
22 *****************************************************************************/
29 x264_frame_t *x264_frame_new( x264_t *h )
31 x264_frame_t *frame = x264_malloc( sizeof(x264_frame_t) );
34 int i_mb_count = h->mb.i_mb_count;
37 int i_padv = PADV << h->param.b_interlaced;
39 if( !frame ) return NULL;
41 memset( frame, 0, sizeof(x264_frame_t) );
43 /* allocate frame data (+64 for extra data for me) */
44 i_stride = ( ( h->param.i_width + 15 ) & -16 )+ 2*PADH;
45 i_lines = ( ( h->param.i_height + 15 ) & -16 );
46 if( h->param.b_interlaced )
47 i_lines = ( i_lines + 31 ) & -32;
50 for( i = 0; i < 3; i++ )
56 if( h->param.i_csp == X264_CSP_I420 )
58 else if( h->param.i_csp == X264_CSP_I422 )
61 frame->i_stride[i] = i_stride / i_divw;
62 frame->i_lines[i] = i_lines / i_divh;
63 CHECKED_MALLOC( frame->buffer[i],
64 frame->i_stride[i] * ( frame->i_lines[i] + 2*i_padv / i_divh ) );
66 frame->plane[i] = ((uint8_t*)frame->buffer[i]) +
67 frame->i_stride[i] * i_padv / i_divh + PADH / i_divw;
69 frame->i_stride[3] = 0;
70 frame->i_lines[3] = 0;
71 frame->buffer[3] = NULL;
72 frame->plane[3] = NULL;
74 frame->filtered[0] = frame->plane[0];
75 for( i = 0; i < 3; i++ )
77 CHECKED_MALLOC( frame->buffer[4+i],
78 frame->i_stride[0] * ( frame->i_lines[0] + 2*i_padv ) );
79 frame->filtered[i+1] = ((uint8_t*)frame->buffer[4+i]) +
80 frame->i_stride[0] * i_padv + PADH;
83 if( h->frames.b_have_lowres )
85 frame->i_stride_lowres = frame->i_stride[0]/2 + PADH;
86 frame->i_lines_lowres = frame->i_lines[0]/2;
87 for( i = 0; i < 4; i++ )
89 CHECKED_MALLOC( frame->buffer_lowres[i],
90 frame->i_stride_lowres * ( frame->i_lines[0]/2 + 2*i_padv ) );
91 frame->lowres[i] = ((uint8_t*)frame->buffer_lowres[i]) +
92 frame->i_stride_lowres * i_padv + PADH;
96 if( h->param.analyse.i_me_method == X264_ME_ESA )
98 CHECKED_MALLOC( frame->buffer[7],
99 2 * frame->i_stride[0] * (frame->i_lines[0] + 2*i_padv) * sizeof(uint16_t) );
100 frame->integral = (uint16_t*)frame->buffer[7] + frame->i_stride[0] * i_padv + PADH;
104 frame->i_type = X264_TYPE_AUTO;
105 frame->i_qpplus1 = 0;
108 frame->i_frame_num = -1;
109 frame->i_lines_completed = -1;
111 CHECKED_MALLOC( frame->mb_type, i_mb_count * sizeof(int8_t));
112 CHECKED_MALLOC( frame->mv[0], 2*16 * i_mb_count * sizeof(int16_t) );
113 CHECKED_MALLOC( frame->ref[0], 4 * i_mb_count * sizeof(int8_t) );
114 if( h->param.i_bframe )
116 CHECKED_MALLOC( frame->mv[1], 2*16 * i_mb_count * sizeof(int16_t) );
117 CHECKED_MALLOC( frame->ref[1], 4 * i_mb_count * sizeof(int8_t) );
122 frame->ref[1] = NULL;
125 CHECKED_MALLOC( frame->i_row_bits, i_lines/16 * sizeof(int) );
126 CHECKED_MALLOC( frame->i_row_qp, i_lines/16 * sizeof(int) );
127 for( i = 0; i < h->param.i_bframe + 2; i++ )
128 for( j = 0; j < h->param.i_bframe + 2; j++ )
129 CHECKED_MALLOC( frame->i_row_satds[i][j], i_lines/16 * sizeof(int) );
131 pthread_mutex_init( &frame->mutex, NULL );
132 pthread_cond_init( &frame->cv, NULL );
137 x264_frame_delete( frame );
141 void x264_frame_delete( x264_frame_t *frame )
144 for( i = 0; i < 8; i++ )
145 x264_free( frame->buffer[i] );
146 for( i = 0; i < 4; i++ )
147 x264_free( frame->buffer_lowres[i] );
148 for( i = 0; i < X264_BFRAME_MAX+2; i++ )
149 for( j = 0; j < X264_BFRAME_MAX+2; j++ )
150 x264_free( frame->i_row_satds[i][j] );
151 x264_free( frame->i_row_bits );
152 x264_free( frame->i_row_qp );
153 x264_free( frame->mb_type );
154 x264_free( frame->mv[0] );
155 x264_free( frame->mv[1] );
156 x264_free( frame->ref[0] );
157 x264_free( frame->ref[1] );
158 pthread_mutex_destroy( &frame->mutex );
159 pthread_cond_destroy( &frame->cv );
163 void x264_frame_copy_picture( x264_t *h, x264_frame_t *dst, x264_picture_t *src )
165 int i_csp = src->img.i_csp & X264_CSP_MASK;
166 dst->i_type = src->i_type;
167 dst->i_qpplus1 = src->i_qpplus1;
168 dst->i_pts = src->i_pts;
170 if( i_csp <= X264_CSP_NONE || i_csp >= X264_CSP_MAX )
171 x264_log( h, X264_LOG_ERROR, "Arg invalid CSP\n" );
173 h->csp.convert[i_csp]( &h->mc, dst, &src->img, h->param.i_width, h->param.i_height );
178 static void plane_expand_border( uint8_t *pix, int i_stride, int i_width, int i_height, int i_padh, int i_padv, int b_pad_top, int b_pad_bottom )
180 #define PPIXEL(x, y) ( pix + (x) + (y)*i_stride )
182 for( y = 0; y < i_height; y++ )
185 memset( PPIXEL(-i_padh, y), PPIXEL(0, y)[0], i_padh );
187 memset( PPIXEL(i_width, y), PPIXEL(i_width-1, y)[0], i_padh );
191 for( y = 0; y < i_padv; y++ )
192 memcpy( PPIXEL(-i_padh, -y-1), PPIXEL(-i_padh, 0), i_width+2*i_padh );
195 for( y = 0; y < i_padv; y++ )
196 memcpy( PPIXEL(-i_padh, i_height+y), PPIXEL(-i_padh, i_height-1), i_width+2*i_padh );
200 void x264_frame_expand_border( x264_t *h, x264_frame_t *frame, int mb_y, int b_end )
204 if( mb_y & h->sh.b_mbaff )
206 for( i = 0; i < frame->i_plane; i++ )
208 int stride = frame->i_stride[i];
209 int width = 16*h->sps->i_mb_width >> !!i;
210 int height = (b_end ? 16*(h->sps->i_mb_height - mb_y) >> h->sh.b_mbaff : 16) >> !!i;
211 int padh = PADH >> !!i;
212 int padv = PADV >> !!i;
213 // buffer: 2 chroma, 3 luma (rounded to 4) because deblocking goes beyond the top of the mb
214 uint8_t *pix = frame->plane[i] + X264_MAX(0, (16*mb_y-4)*stride >> !!i);
215 if( b_end && !b_start )
216 height += 4 >> (!!i + h->sh.b_mbaff);
219 plane_expand_border( pix, stride*2, width, height, padh, padv, b_start, b_end );
220 plane_expand_border( pix+stride, stride*2, width, height, padh, padv, b_start, b_end );
224 plane_expand_border( pix, stride, width, height, padh, padv, b_start, b_end );
229 void x264_frame_expand_border_filtered( x264_t *h, x264_frame_t *frame, int mb_y, int b_end )
231 /* during filtering, 8 extra pixels were filtered on each edge.
232 we want to expand border from the last filtered pixel */
234 int stride = frame->i_stride[0];
235 int width = 16*h->sps->i_mb_width + 16;
236 int height = b_end ? (16*(h->sps->i_mb_height - mb_y) >> h->sh.b_mbaff) + 16 : 16;
240 for( i = 1; i < 4; i++ )
242 // buffer: 8 luma, to match the hpel filter
243 uint8_t *pix = frame->filtered[i] + (16*mb_y - (8 << h->sh.b_mbaff)) * stride - 8;
246 plane_expand_border( pix, stride*2, width, height, padh, padv, b_start, b_end );
247 plane_expand_border( pix+stride, stride*2, width, height, padh, padv, b_start, b_end );
251 plane_expand_border( pix, stride, width, height, padh, padv, b_start, b_end );
256 void x264_frame_expand_border_lowres( x264_frame_t *frame )
259 for( i = 0; i < 4; i++ )
260 plane_expand_border( frame->lowres[i], frame->i_stride_lowres, frame->i_stride_lowres - 2*PADH, frame->i_lines_lowres, PADH, PADV, 1, 1 );
263 void x264_frame_expand_border_mod16( x264_t *h, x264_frame_t *frame )
266 for( i = 0; i < frame->i_plane; i++ )
268 int i_subsample = i ? 1 : 0;
269 int i_width = h->param.i_width >> i_subsample;
270 int i_height = h->param.i_height >> i_subsample;
271 int i_padx = ( h->sps->i_mb_width * 16 - h->param.i_width ) >> i_subsample;
272 int i_pady = ( h->sps->i_mb_height * 16 - h->param.i_height ) >> i_subsample;
276 for( y = 0; y < i_height; y++ )
277 memset( &frame->plane[i][y*frame->i_stride[i] + i_width],
278 frame->plane[i][y*frame->i_stride[i] + i_width - 1],
283 //FIXME interlace? or just let it pad using the wrong field
284 for( y = i_height; y < i_height + i_pady; y++ )
285 memcpy( &frame->plane[i][y*frame->i_stride[i]],
286 &frame->plane[i][(i_height-1)*frame->i_stride[i]],
293 /* cavlc + 8x8 transform stores nnz per 16 coeffs for the purpose of
294 * entropy coding, but per 64 coeffs for the purpose of deblocking */
295 void munge_cavlc_nnz_row( x264_t *h, int mb_y, uint8_t (*buf)[16] )
297 uint32_t (*src)[6] = (uint32_t(*)[6])h->mb.non_zero_count + mb_y * h->sps->i_mb_width;
298 int8_t *transform = h->mb.mb_transform_size + mb_y * h->sps->i_mb_width;
300 for( x=0; x<h->sps->i_mb_width; x++ )
302 memcpy( buf+x, src+x, 16 );
305 if( src[x][0] ) src[x][0] = 0x01010101;
306 if( src[x][1] ) src[x][1] = 0x01010101;
307 if( src[x][2] ) src[x][2] = 0x01010101;
308 if( src[x][3] ) src[x][3] = 0x01010101;
313 static void restore_cavlc_nnz_row( x264_t *h, int mb_y, uint8_t (*buf)[16] )
315 uint8_t (*dst)[24] = h->mb.non_zero_count + mb_y * h->sps->i_mb_width;
317 for( x=0; x<h->sps->i_mb_width; x++ )
318 memcpy( dst+x, buf+x, 16 );
321 static void munge_cavlc_nnz( x264_t *h, int mb_y, uint8_t (*buf)[16], void (*func)(x264_t*, int, uint8_t (*)[16]) )
323 func( h, mb_y, buf );
325 func( h, mb_y-1, buf + h->sps->i_mb_width );
328 func( h, mb_y+1, buf + h->sps->i_mb_width * 2 );
330 func( h, mb_y-2, buf + h->sps->i_mb_width * 3 );
335 /* Deblocking filter */
337 static const int i_alpha_table[52] =
339 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
340 0, 0, 0, 0, 0, 0, 4, 4, 5, 6,
341 7, 8, 9, 10, 12, 13, 15, 17, 20, 22,
342 25, 28, 32, 36, 40, 45, 50, 56, 63, 71,
343 80, 90,101,113,127,144,162,182,203,226,
346 static const int i_beta_table[52] =
348 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
349 0, 0, 0, 0, 0, 0, 2, 2, 2, 3,
350 3, 3, 3, 4, 4, 4, 6, 6, 7, 7,
351 8, 8, 9, 9, 10, 10, 11, 11, 12, 12,
352 13, 13, 14, 14, 15, 15, 16, 16, 17, 17,
355 static const int i_tc0_table[52][3] =
357 { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 },
358 { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 },
359 { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 1 },
360 { 0, 0, 1 }, { 0, 0, 1 }, { 0, 0, 1 }, { 0, 1, 1 }, { 0, 1, 1 }, { 1, 1, 1 },
361 { 1, 1, 1 }, { 1, 1, 1 }, { 1, 1, 1 }, { 1, 1, 2 }, { 1, 1, 2 }, { 1, 1, 2 },
362 { 1, 1, 2 }, { 1, 2, 3 }, { 1, 2, 3 }, { 2, 2, 3 }, { 2, 2, 4 }, { 2, 3, 4 },
363 { 2, 3, 4 }, { 3, 3, 5 }, { 3, 4, 6 }, { 3, 4, 6 }, { 4, 5, 7 }, { 4, 5, 8 },
364 { 4, 6, 9 }, { 5, 7,10 }, { 6, 8,11 }, { 6, 8,13 }, { 7,10,14 }, { 8,11,16 },
365 { 9,12,18 }, {10,13,20 }, {11,15,23 }, {13,17,25 }
369 static inline int clip_uint8( int a )
377 static inline void deblock_luma_c( uint8_t *pix, int xstride, int ystride, int alpha, int beta, int8_t *tc0 )
380 for( i = 0; i < 4; i++ ) {
385 for( d = 0; d < 4; d++ ) {
386 const int p2 = pix[-3*xstride];
387 const int p1 = pix[-2*xstride];
388 const int p0 = pix[-1*xstride];
389 const int q0 = pix[ 0*xstride];
390 const int q1 = pix[ 1*xstride];
391 const int q2 = pix[ 2*xstride];
393 if( abs( p0 - q0 ) < alpha &&
394 abs( p1 - p0 ) < beta &&
395 abs( q1 - q0 ) < beta ) {
400 if( abs( p2 - p0 ) < beta ) {
401 pix[-2*xstride] = p1 + x264_clip3( (( p2 + ((p0 + q0 + 1) >> 1)) >> 1) - p1, -tc0[i], tc0[i] );
404 if( abs( q2 - q0 ) < beta ) {
405 pix[ 1*xstride] = q1 + x264_clip3( (( q2 + ((p0 + q0 + 1) >> 1)) >> 1) - q1, -tc0[i], tc0[i] );
409 delta = x264_clip3( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
410 pix[-1*xstride] = clip_uint8( p0 + delta ); /* p0' */
411 pix[ 0*xstride] = clip_uint8( q0 - delta ); /* q0' */
417 static void deblock_v_luma_c( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 )
419 deblock_luma_c( pix, stride, 1, alpha, beta, tc0 );
421 static void deblock_h_luma_c( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 )
423 deblock_luma_c( pix, 1, stride, alpha, beta, tc0 );
426 static inline void deblock_chroma_c( uint8_t *pix, int xstride, int ystride, int alpha, int beta, int8_t *tc0 )
429 for( i = 0; i < 4; i++ ) {
430 const int tc = tc0[i];
435 for( d = 0; d < 2; d++ ) {
436 const int p1 = pix[-2*xstride];
437 const int p0 = pix[-1*xstride];
438 const int q0 = pix[ 0*xstride];
439 const int q1 = pix[ 1*xstride];
441 if( abs( p0 - q0 ) < alpha &&
442 abs( p1 - p0 ) < beta &&
443 abs( q1 - q0 ) < beta ) {
445 int delta = x264_clip3( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
446 pix[-1*xstride] = clip_uint8( p0 + delta ); /* p0' */
447 pix[ 0*xstride] = clip_uint8( q0 - delta ); /* q0' */
453 static void deblock_v_chroma_c( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 )
455 deblock_chroma_c( pix, stride, 1, alpha, beta, tc0 );
457 static void deblock_h_chroma_c( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 )
459 deblock_chroma_c( pix, 1, stride, alpha, beta, tc0 );
462 static inline void deblock_luma_intra_c( uint8_t *pix, int xstride, int ystride, int alpha, int beta )
465 for( d = 0; d < 16; d++ ) {
466 const int p2 = pix[-3*xstride];
467 const int p1 = pix[-2*xstride];
468 const int p0 = pix[-1*xstride];
469 const int q0 = pix[ 0*xstride];
470 const int q1 = pix[ 1*xstride];
471 const int q2 = pix[ 2*xstride];
473 if( abs( p0 - q0 ) < alpha &&
474 abs( p1 - p0 ) < beta &&
475 abs( q1 - q0 ) < beta ) {
477 if(abs( p0 - q0 ) < ((alpha >> 2) + 2) ){
478 if( abs( p2 - p0 ) < beta)
480 const int p3 = pix[-4*xstride];
482 pix[-1*xstride] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
483 pix[-2*xstride] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
484 pix[-3*xstride] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
487 pix[-1*xstride] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
489 if( abs( q2 - q0 ) < beta)
491 const int q3 = pix[3*xstride];
493 pix[0*xstride] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
494 pix[1*xstride] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
495 pix[2*xstride] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
498 pix[0*xstride] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
502 pix[-1*xstride] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
503 pix[ 0*xstride] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
509 static void deblock_v_luma_intra_c( uint8_t *pix, int stride, int alpha, int beta )
511 deblock_luma_intra_c( pix, stride, 1, alpha, beta );
513 static void deblock_h_luma_intra_c( uint8_t *pix, int stride, int alpha, int beta )
515 deblock_luma_intra_c( pix, 1, stride, alpha, beta );
518 static inline void deblock_chroma_intra_c( uint8_t *pix, int xstride, int ystride, int alpha, int beta )
521 for( d = 0; d < 8; d++ ) {
522 const int p1 = pix[-2*xstride];
523 const int p0 = pix[-1*xstride];
524 const int q0 = pix[ 0*xstride];
525 const int q1 = pix[ 1*xstride];
527 if( abs( p0 - q0 ) < alpha &&
528 abs( p1 - p0 ) < beta &&
529 abs( q1 - q0 ) < beta ) {
531 pix[-1*xstride] = (2*p1 + p0 + q1 + 2) >> 2; /* p0' */
532 pix[ 0*xstride] = (2*q1 + q0 + p1 + 2) >> 2; /* q0' */
538 static void deblock_v_chroma_intra_c( uint8_t *pix, int stride, int alpha, int beta )
540 deblock_chroma_intra_c( pix, stride, 1, alpha, beta );
542 static void deblock_h_chroma_intra_c( uint8_t *pix, int stride, int alpha, int beta )
544 deblock_chroma_intra_c( pix, 1, stride, alpha, beta );
547 static inline void deblock_edge( x264_t *h, uint8_t *pix, int i_stride, int bS[4], int i_qp, int b_chroma,
548 x264_deblock_inter_t pf_inter, x264_deblock_intra_t pf_intra )
551 const int index_a = x264_clip3( i_qp + h->sh.i_alpha_c0_offset, 0, 51 );
552 const int alpha = i_alpha_table[index_a];
553 const int beta = i_beta_table[x264_clip3( i_qp + h->sh.i_beta_offset, 0, 51 )];
558 tc[i] = (bS[i] ? i_tc0_table[index_a][bS[i] - 1] : -1) + b_chroma;
559 pf_inter( pix, i_stride, alpha, beta, tc );
561 pf_intra( pix, i_stride, alpha, beta );
565 void x264_frame_deblock_row( x264_t *h, int mb_y )
567 const int s8x8 = 2 * h->mb.i_mb_stride;
568 const int s4x4 = 4 * h->mb.i_mb_stride;
569 const int b_interlaced = h->sh.b_mbaff;
570 const int mvy_limit = 4 >> b_interlaced;
573 int i_stride2[3] = { h->fdec->i_stride[0] << b_interlaced,
574 h->fdec->i_stride[1] << b_interlaced,
575 h->fdec->i_stride[2] << b_interlaced };
577 if( !h->pps->b_cabac && h->pps->b_transform_8x8_mode )
578 munge_cavlc_nnz( h, mb_y, h->mb.nnz_backup, munge_cavlc_nnz_row );
580 for( mb_x = 0; mb_x < h->sps->i_mb_width; )
582 const int mb_xy = mb_y * h->mb.i_mb_stride + mb_x;
583 const int mb_8x8 = 2 * s8x8 * mb_y + 2 * mb_x;
584 const int mb_4x4 = 4 * s4x4 * mb_y + 4 * mb_x;
585 const int b_8x8_transform = h->mb.mb_transform_size[mb_xy];
586 const int i_edge_end = (h->mb.type[mb_xy] == P_SKIP) ? 1 : 4;
589 int i_pix_y[3] = { 16*mb_y*h->fdec->i_stride[0] + 16*mb_x,
590 8*mb_y*h->fdec->i_stride[1] + 8*mb_x,
591 8*mb_y*h->fdec->i_stride[2] + 8*mb_x };
592 if( b_interlaced && (mb_y&1) )
594 i_pix_y[0] -= 15*h->fdec->i_stride[0];
595 i_pix_y[1] -= 7*h->fdec->i_stride[1];
596 i_pix_y[2] -= 7*h->fdec->i_stride[2];
599 x264_prefetch_fenc( h, h->fdec, mb_x, mb_y );
601 /* i_dir == 0 -> vertical edge
602 * i_dir == 1 -> horizontal edge */
603 for( i_dir = 0; i_dir < 2; i_dir++ )
605 int i_start = (i_dir ? (mb_y <= b_interlaced) : (mb_x == 0));
608 for( i_edge = i_start; i_edge < i_edge_end; i_edge++ )
610 int mbn_xy, mbn_8x8, mbn_4x4;
611 int bS[4]; /* filtering strength */
613 if( b_8x8_transform && (i_edge&1) )
616 mbn_xy = i_edge > 0 ? mb_xy : ( i_dir == 0 ? mb_xy - 1 : mb_xy - h->mb.i_mb_stride );
617 mbn_8x8 = i_edge > 0 ? mb_8x8 : ( i_dir == 0 ? mb_8x8 - 2 : mb_8x8 - 2 * s8x8 );
618 mbn_4x4 = i_edge > 0 ? mb_4x4 : ( i_dir == 0 ? mb_4x4 - 4 : mb_4x4 - 4 * s4x4 );
620 if( b_interlaced && i_edge == 0 && i_dir == 1 )
622 mbn_xy -= h->mb.i_mb_stride;
627 /* *** Get bS for each 4px for the current edge *** */
628 if( IS_INTRA( h->mb.type[mb_xy] ) || IS_INTRA( h->mb.type[mbn_xy] ) )
630 bS[0] = bS[1] = bS[2] = bS[3] = ( i_edge == 0 && !(b_interlaced && i_dir) ? 4 : 3 );
635 for( i = 0; i < 4; i++ )
637 int x = i_dir == 0 ? i_edge : i;
638 int y = i_dir == 0 ? i : i_edge;
639 int xn = (x - (i_dir == 0 ? 1 : 0 ))&0x03;
640 int yn = (y - (i_dir == 0 ? 0 : 1 ))&0x03;
642 if( h->mb.non_zero_count[mb_xy][block_idx_xy[x][y]] != 0 ||
643 h->mb.non_zero_count[mbn_xy][block_idx_xy[xn][yn]] != 0 )
649 /* FIXME: A given frame may occupy more than one position in
650 * the reference list. So we should compare the frame numbers,
651 * not the indices in the ref list.
652 * No harm yet, as we don't generate that case.*/
654 int i8p= mb_8x8+(x/2)+(y/2)*s8x8;
655 int i8q= mbn_8x8+(xn/2)+(yn/2)*s8x8;
656 int i4p= mb_4x4+x+y*s4x4;
657 int i4q= mbn_4x4+xn+yn*s4x4;
662 for( l = 0; l < 1 + (h->sh.i_type == SLICE_TYPE_B); l++ )
664 if( h->mb.ref[l][i8p] != h->mb.ref[l][i8q] ||
665 abs( h->mb.mv[l][i4p][0] - h->mb.mv[l][i4q][0] ) >= 4 ||
666 abs( h->mb.mv[l][i4p][1] - h->mb.mv[l][i4q][1] ) >= mvy_limit )
678 i_qp = h->mb.qp[mb_xy];
679 i_qpn= h->mb.qp[mbn_xy];
684 deblock_edge( h, &h->fdec->plane[0][i_pix_y[0] + 4*i_edge],
685 i_stride2[0], bS, (i_qp+i_qpn+1) >> 1, 0,
686 h->loopf.deblock_h_luma, h->loopf.deblock_h_luma_intra );
690 int i_qpc = ( i_chroma_qp_table[x264_clip3( i_qp + h->pps->i_chroma_qp_index_offset, 0, 51 )] +
691 i_chroma_qp_table[x264_clip3( i_qpn + h->pps->i_chroma_qp_index_offset, 0, 51 )] + 1 ) >> 1;
692 deblock_edge( h, &h->fdec->plane[1][i_pix_y[1] + 2*i_edge],
693 i_stride2[1], bS, i_qpc, 1,
694 h->loopf.deblock_h_chroma, h->loopf.deblock_h_chroma_intra );
695 deblock_edge( h, &h->fdec->plane[2][i_pix_y[2] + 2*i_edge],
696 i_stride2[2], bS, i_qpc, 1,
697 h->loopf.deblock_h_chroma, h->loopf.deblock_h_chroma_intra );
702 /* horizontal edge */
703 deblock_edge( h, &h->fdec->plane[0][i_pix_y[0] + 4*i_edge*i_stride2[0]],
704 i_stride2[0], bS, (i_qp+i_qpn+1) >> 1, 0,
705 h->loopf.deblock_v_luma, h->loopf.deblock_v_luma_intra );
709 int i_qpc = ( i_chroma_qp_table[x264_clip3( i_qp + h->pps->i_chroma_qp_index_offset, 0, 51 )] +
710 i_chroma_qp_table[x264_clip3( i_qpn + h->pps->i_chroma_qp_index_offset, 0, 51 )] + 1 ) >> 1;
711 deblock_edge( h, &h->fdec->plane[1][i_pix_y[1] + 2*i_edge*i_stride2[1]],
712 i_stride2[1], bS, i_qpc, 1,
713 h->loopf.deblock_v_chroma, h->loopf.deblock_v_chroma_intra );
714 deblock_edge( h, &h->fdec->plane[2][i_pix_y[2] + 2*i_edge*i_stride2[2]],
715 i_stride2[2], bS, i_qpc, 1,
716 h->loopf.deblock_v_chroma, h->loopf.deblock_v_chroma_intra );
723 if( !b_interlaced || (mb_y&1) )
725 mb_y ^= b_interlaced;
728 if( !h->pps->b_cabac && h->pps->b_transform_8x8_mode )
729 munge_cavlc_nnz( h, mb_y, h->mb.nnz_backup, restore_cavlc_nnz_row );
732 void x264_frame_deblock( x264_t *h )
735 for( mb_y = 0; mb_y < h->sps->i_mb_height; mb_y += 1 + h->sh.b_mbaff )
736 x264_frame_deblock_row( h, mb_y );
740 void x264_deblock_v_chroma_mmxext( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 );
741 void x264_deblock_h_chroma_mmxext( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 );
742 void x264_deblock_v_chroma_intra_mmxext( uint8_t *pix, int stride, int alpha, int beta );
743 void x264_deblock_h_chroma_intra_mmxext( uint8_t *pix, int stride, int alpha, int beta );
746 void x264_deblock_v_luma_sse2( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 );
747 void x264_deblock_h_luma_sse2( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 );
749 void x264_deblock_h_luma_mmxext( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 );
750 void x264_deblock_v8_luma_mmxext( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 );
752 void x264_deblock_v_luma_mmxext( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 )
754 x264_deblock_v8_luma_mmxext( pix, stride, alpha, beta, tc0 );
755 x264_deblock_v8_luma_mmxext( pix+8, stride, alpha, beta, tc0+2 );
761 void x264_deblock_v_luma_altivec( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 );
762 void x264_deblock_h_luma_altivec( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 );
765 void x264_deblock_init( int cpu, x264_deblock_function_t *pf )
767 pf->deblock_v_luma = deblock_v_luma_c;
768 pf->deblock_h_luma = deblock_h_luma_c;
769 pf->deblock_v_chroma = deblock_v_chroma_c;
770 pf->deblock_h_chroma = deblock_h_chroma_c;
771 pf->deblock_v_luma_intra = deblock_v_luma_intra_c;
772 pf->deblock_h_luma_intra = deblock_h_luma_intra_c;
773 pf->deblock_v_chroma_intra = deblock_v_chroma_intra_c;
774 pf->deblock_h_chroma_intra = deblock_h_chroma_intra_c;
777 if( cpu&X264_CPU_MMXEXT )
779 pf->deblock_v_chroma = x264_deblock_v_chroma_mmxext;
780 pf->deblock_h_chroma = x264_deblock_h_chroma_mmxext;
781 pf->deblock_v_chroma_intra = x264_deblock_v_chroma_intra_mmxext;
782 pf->deblock_h_chroma_intra = x264_deblock_h_chroma_intra_mmxext;
785 if( cpu&X264_CPU_SSE2 )
787 pf->deblock_v_luma = x264_deblock_v_luma_sse2;
788 pf->deblock_h_luma = x264_deblock_h_luma_sse2;
791 pf->deblock_v_luma = x264_deblock_v_luma_mmxext;
792 pf->deblock_h_luma = x264_deblock_h_luma_mmxext;
798 if( cpu&X264_CPU_ALTIVEC )
800 pf->deblock_v_luma = x264_deblock_v_luma_altivec;
801 pf->deblock_h_luma = x264_deblock_h_luma_altivec;
810 void x264_frame_cond_broadcast( x264_frame_t *frame, int i_lines_completed )
812 pthread_mutex_lock( &frame->mutex );
813 frame->i_lines_completed = i_lines_completed;
814 pthread_cond_broadcast( &frame->cv );
815 pthread_mutex_unlock( &frame->mutex );
818 void x264_frame_cond_wait( x264_frame_t *frame, int i_lines_completed )
820 pthread_mutex_lock( &frame->mutex );
821 while( frame->i_lines_completed < i_lines_completed )
822 pthread_cond_wait( &frame->cv, &frame->mutex );
823 pthread_mutex_unlock( &frame->mutex );
827 void x264_frame_cond_broadcast( x264_frame_t *frame, int i_lines_completed )
829 void x264_frame_cond_wait( x264_frame_t *frame, int i_lines_completed )
836 void x264_frame_push( x264_frame_t **list, x264_frame_t *frame )
839 while( list[i] ) i++;
843 x264_frame_t *x264_frame_pop( x264_frame_t **list )
848 while( list[i+1] ) i++;
854 void x264_frame_unshift( x264_frame_t **list, x264_frame_t *frame )
857 while( list[i] ) i++;
863 x264_frame_t *x264_frame_shift( x264_frame_t **list )
865 x264_frame_t *frame = list[0];
867 for( i = 0; list[i]; i++ )
873 void x264_frame_push_unused( x264_t *h, x264_frame_t *frame )
875 assert( frame->i_reference_count > 0 );
876 frame->i_reference_count--;
877 if( frame->i_reference_count == 0 )
878 x264_frame_push( h->frames.unused, frame );
879 assert( h->frames.unused[ sizeof(h->frames.unused) / sizeof(*h->frames.unused) - 1 ] == NULL );
882 x264_frame_t *x264_frame_pop_unused( x264_t *h )
885 if( h->frames.unused[0] )
886 frame = x264_frame_pop( h->frames.unused );
888 frame = x264_frame_new( h );
889 assert( frame->i_reference_count == 0 );
890 frame->i_reference_count = 1;
894 void x264_frame_sort( x264_frame_t **list, int b_dts )
899 for( i = 0; list[i+1]; i++ )
901 int dtype = list[i]->i_type - list[i+1]->i_type;
902 int dtime = list[i]->i_frame - list[i+1]->i_frame;
903 int swap = b_dts ? dtype > 0 || ( dtype == 0 && dtime > 0 )
907 XCHG( x264_frame_t*, list[i], list[i+1] );