1 /*****************************************************************************
2 * frame.c: h264 encoder library
3 *****************************************************************************
4 * Copyright (C) 2003 Laurent Aimar
5 * $Id: frame.c,v 1.1 2004/06/03 19:27:06 fenrir Exp $
7 * Authors: Laurent Aimar <fenrir@via.ecp.fr>
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to the Free Software
21 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA.
22 *****************************************************************************/
26 x264_frame_t *x264_frame_new( x264_t *h )
28 x264_frame_t *frame = x264_malloc( sizeof(x264_frame_t) );
31 int i_mb_count = h->mb.i_mb_count;
32 int i_stride, i_width, i_lines;
33 int i_padv = PADV << h->param.b_interlaced;
35 if( !frame ) return NULL;
37 memset( frame, 0, sizeof(x264_frame_t) );
39 /* allocate frame data (+64 for extra data for me) */
40 i_width = ( ( h->param.i_width + 15 ) & -16 );
41 i_stride = i_width + 2*PADH;
42 i_lines = ( ( h->param.i_height + 15 ) & -16 );
43 if( h->param.b_interlaced )
44 i_lines = ( i_lines + 31 ) & -32;
46 if( h->param.cpu&X264_CPU_CACHELINE_SPLIT )
48 int align = h->param.cpu&X264_CPU_CACHELINE_32 ? 32 : 64;
49 i_stride = (i_stride + align-1) & -align;
53 for( i = 0; i < 3; i++ )
55 frame->i_stride[i] = i_stride >> !!i;
56 frame->i_width[i] = i_width >> !!i;
57 frame->i_lines[i] = i_lines >> !!i;
58 CHECKED_MALLOC( frame->buffer[i],
59 frame->i_stride[i] * (i_lines + 2*i_padv) >> !!i );
60 frame->plane[i] = ((uint8_t*)frame->buffer[i]) +
61 ((frame->i_stride[i] * i_padv + PADH) >> !!i);
64 frame->filtered[0] = frame->plane[0];
65 for( i = 0; i < 3; i++ )
67 CHECKED_MALLOC( frame->buffer[4+i],
68 frame->i_stride[0] * ( frame->i_lines[0] + 2*i_padv ) );
69 frame->filtered[i+1] = ((uint8_t*)frame->buffer[4+i]) +
70 frame->i_stride[0] * i_padv + PADH;
73 if( h->frames.b_have_lowres )
75 frame->i_width_lowres = frame->i_width[0]/2;
76 frame->i_stride_lowres = frame->i_width_lowres + 2*PADH;
77 frame->i_lines_lowres = frame->i_lines[0]/2;
78 for( i = 0; i < 4; i++ )
80 CHECKED_MALLOC( frame->buffer_lowres[i],
81 frame->i_stride_lowres * ( frame->i_lines[0]/2 + 2*i_padv ) );
82 frame->lowres[i] = ((uint8_t*)frame->buffer_lowres[i]) +
83 frame->i_stride_lowres * i_padv + PADH;
87 if( h->param.analyse.i_me_method >= X264_ME_ESA )
89 CHECKED_MALLOC( frame->buffer[7],
90 2 * frame->i_stride[0] * (frame->i_lines[0] + 2*i_padv) * sizeof(uint16_t) );
91 frame->integral = (uint16_t*)frame->buffer[7] + frame->i_stride[0] * i_padv + PADH;
95 frame->i_type = X264_TYPE_AUTO;
99 frame->i_frame_num = -1;
100 frame->i_lines_completed = -1;
102 CHECKED_MALLOC( frame->mb_type, i_mb_count * sizeof(int8_t));
103 CHECKED_MALLOC( frame->mv[0], 2*16 * i_mb_count * sizeof(int16_t) );
104 CHECKED_MALLOC( frame->ref[0], 4 * i_mb_count * sizeof(int8_t) );
105 if( h->param.i_bframe )
107 CHECKED_MALLOC( frame->mv[1], 2*16 * i_mb_count * sizeof(int16_t) );
108 CHECKED_MALLOC( frame->ref[1], 4 * i_mb_count * sizeof(int8_t) );
113 frame->ref[1] = NULL;
116 CHECKED_MALLOC( frame->i_row_bits, i_lines/16 * sizeof(int) );
117 CHECKED_MALLOC( frame->i_row_qp, i_lines/16 * sizeof(int) );
118 for( i = 0; i < h->param.i_bframe + 2; i++ )
119 for( j = 0; j < h->param.i_bframe + 2; j++ )
120 CHECKED_MALLOC( frame->i_row_satds[i][j], i_lines/16 * sizeof(int) );
122 x264_pthread_mutex_init( &frame->mutex, NULL );
123 x264_pthread_cond_init( &frame->cv, NULL );
128 x264_frame_delete( frame );
132 void x264_frame_delete( x264_frame_t *frame )
135 for( i = 0; i < 8; i++ )
136 x264_free( frame->buffer[i] );
137 for( i = 0; i < 4; i++ )
138 x264_free( frame->buffer_lowres[i] );
139 for( i = 0; i < X264_BFRAME_MAX+2; i++ )
140 for( j = 0; j < X264_BFRAME_MAX+2; j++ )
141 x264_free( frame->i_row_satds[i][j] );
142 x264_free( frame->i_row_bits );
143 x264_free( frame->i_row_qp );
144 x264_free( frame->mb_type );
145 x264_free( frame->mv[0] );
146 x264_free( frame->mv[1] );
147 x264_free( frame->ref[0] );
148 x264_free( frame->ref[1] );
149 x264_pthread_mutex_destroy( &frame->mutex );
150 x264_pthread_cond_destroy( &frame->cv );
154 int x264_frame_copy_picture( x264_t *h, x264_frame_t *dst, x264_picture_t *src )
156 int i_csp = src->img.i_csp & X264_CSP_MASK;
158 if( i_csp != X264_CSP_I420 && i_csp != X264_CSP_YV12 )
160 x264_log( h, X264_LOG_ERROR, "Arg invalid CSP\n" );
164 dst->i_type = src->i_type;
165 dst->i_qpplus1 = src->i_qpplus1;
166 dst->i_pts = src->i_pts;
170 int s = (i_csp == X264_CSP_YV12 && i) ? i^3 : i;
171 uint8_t *plane = src->img.plane[s];
172 int stride = src->img.i_stride[s];
173 int width = h->param.i_width >> !!i;
174 int height = h->param.i_height >> !!i;
175 if( src->img.i_csp & X264_CSP_VFLIP )
177 plane += (height-1)*stride;
180 h->mc.plane_copy( dst->plane[i], dst->i_stride[i], plane, stride, width, height );
187 static void plane_expand_border( uint8_t *pix, int i_stride, int i_width, int i_height, int i_padh, int i_padv, int b_pad_top, int b_pad_bottom )
189 #define PPIXEL(x, y) ( pix + (x) + (y)*i_stride )
191 for( y = 0; y < i_height; y++ )
194 memset( PPIXEL(-i_padh, y), PPIXEL(0, y)[0], i_padh );
196 memset( PPIXEL(i_width, y), PPIXEL(i_width-1, y)[0], i_padh );
200 for( y = 0; y < i_padv; y++ )
201 memcpy( PPIXEL(-i_padh, -y-1), PPIXEL(-i_padh, 0), i_width+2*i_padh );
204 for( y = 0; y < i_padv; y++ )
205 memcpy( PPIXEL(-i_padh, i_height+y), PPIXEL(-i_padh, i_height-1), i_width+2*i_padh );
209 void x264_frame_expand_border( x264_t *h, x264_frame_t *frame, int mb_y, int b_end )
213 if( mb_y & h->sh.b_mbaff )
215 for( i = 0; i < frame->i_plane; i++ )
217 int stride = frame->i_stride[i];
218 int width = 16*h->sps->i_mb_width >> !!i;
219 int height = (b_end ? 16*(h->sps->i_mb_height - mb_y) >> h->sh.b_mbaff : 16) >> !!i;
220 int padh = PADH >> !!i;
221 int padv = PADV >> !!i;
222 // buffer: 2 chroma, 3 luma (rounded to 4) because deblocking goes beyond the top of the mb
223 uint8_t *pix = frame->plane[i] + X264_MAX(0, (16*mb_y-4)*stride >> !!i);
224 if( b_end && !b_start )
225 height += 4 >> (!!i + h->sh.b_mbaff);
228 plane_expand_border( pix, stride*2, width, height, padh, padv, b_start, b_end );
229 plane_expand_border( pix+stride, stride*2, width, height, padh, padv, b_start, b_end );
233 plane_expand_border( pix, stride, width, height, padh, padv, b_start, b_end );
238 void x264_frame_expand_border_filtered( x264_t *h, x264_frame_t *frame, int mb_y, int b_end )
240 /* during filtering, 8 extra pixels were filtered on each edge.
241 we want to expand border from the last filtered pixel */
243 int stride = frame->i_stride[0];
244 int width = 16*h->sps->i_mb_width + 16;
245 int height = b_end ? (16*(h->sps->i_mb_height - mb_y) >> h->sh.b_mbaff) + 16 : 16;
249 for( i = 1; i < 4; i++ )
251 // buffer: 8 luma, to match the hpel filter
252 uint8_t *pix = frame->filtered[i] + (16*mb_y - (8 << h->sh.b_mbaff)) * stride - 8;
255 plane_expand_border( pix, stride*2, width, height, padh, padv, b_start, b_end );
256 plane_expand_border( pix+stride, stride*2, width, height, padh, padv, b_start, b_end );
260 plane_expand_border( pix, stride, width, height, padh, padv, b_start, b_end );
265 void x264_frame_expand_border_lowres( x264_frame_t *frame )
268 for( i = 0; i < 4; i++ )
269 plane_expand_border( frame->lowres[i], frame->i_stride_lowres, frame->i_stride_lowres - 2*PADH, frame->i_lines_lowres, PADH, PADV, 1, 1 );
272 void x264_frame_expand_border_mod16( x264_t *h, x264_frame_t *frame )
275 for( i = 0; i < frame->i_plane; i++ )
277 int i_subsample = i ? 1 : 0;
278 int i_width = h->param.i_width >> i_subsample;
279 int i_height = h->param.i_height >> i_subsample;
280 int i_padx = ( h->sps->i_mb_width * 16 - h->param.i_width ) >> i_subsample;
281 int i_pady = ( h->sps->i_mb_height * 16 - h->param.i_height ) >> i_subsample;
285 for( y = 0; y < i_height; y++ )
286 memset( &frame->plane[i][y*frame->i_stride[i] + i_width],
287 frame->plane[i][y*frame->i_stride[i] + i_width - 1],
292 //FIXME interlace? or just let it pad using the wrong field
293 for( y = i_height; y < i_height + i_pady; y++ )
294 memcpy( &frame->plane[i][y*frame->i_stride[i]],
295 &frame->plane[i][(i_height-1)*frame->i_stride[i]],
302 /* cavlc + 8x8 transform stores nnz per 16 coeffs for the purpose of
303 * entropy coding, but per 64 coeffs for the purpose of deblocking */
304 void munge_cavlc_nnz_row( x264_t *h, int mb_y, uint8_t (*buf)[16] )
306 uint32_t (*src)[6] = (uint32_t(*)[6])h->mb.non_zero_count + mb_y * h->sps->i_mb_width;
307 int8_t *transform = h->mb.mb_transform_size + mb_y * h->sps->i_mb_width;
309 for( x=0; x<h->sps->i_mb_width; x++ )
311 memcpy( buf+x, src+x, 16 );
314 if( src[x][0] ) src[x][0] = 0x01010101;
315 if( src[x][1] ) src[x][1] = 0x01010101;
316 if( src[x][2] ) src[x][2] = 0x01010101;
317 if( src[x][3] ) src[x][3] = 0x01010101;
322 static void restore_cavlc_nnz_row( x264_t *h, int mb_y, uint8_t (*buf)[16] )
324 uint8_t (*dst)[24] = h->mb.non_zero_count + mb_y * h->sps->i_mb_width;
326 for( x=0; x<h->sps->i_mb_width; x++ )
327 memcpy( dst+x, buf+x, 16 );
330 static void munge_cavlc_nnz( x264_t *h, int mb_y, uint8_t (*buf)[16], void (*func)(x264_t*, int, uint8_t (*)[16]) )
332 func( h, mb_y, buf );
334 func( h, mb_y-1, buf + h->sps->i_mb_width );
337 func( h, mb_y+1, buf + h->sps->i_mb_width * 2 );
339 func( h, mb_y-2, buf + h->sps->i_mb_width * 3 );
344 /* Deblocking filter */
346 static const int i_alpha_table[52] =
348 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
349 0, 0, 0, 0, 0, 0, 4, 4, 5, 6,
350 7, 8, 9, 10, 12, 13, 15, 17, 20, 22,
351 25, 28, 32, 36, 40, 45, 50, 56, 63, 71,
352 80, 90,101,113,127,144,162,182,203,226,
355 static const int i_beta_table[52] =
357 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
358 0, 0, 0, 0, 0, 0, 2, 2, 2, 3,
359 3, 3, 3, 4, 4, 4, 6, 6, 7, 7,
360 8, 8, 9, 9, 10, 10, 11, 11, 12, 12,
361 13, 13, 14, 14, 15, 15, 16, 16, 17, 17,
364 static const int i_tc0_table[52][3] =
366 { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 },
367 { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 },
368 { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 1 },
369 { 0, 0, 1 }, { 0, 0, 1 }, { 0, 0, 1 }, { 0, 1, 1 }, { 0, 1, 1 }, { 1, 1, 1 },
370 { 1, 1, 1 }, { 1, 1, 1 }, { 1, 1, 1 }, { 1, 1, 2 }, { 1, 1, 2 }, { 1, 1, 2 },
371 { 1, 1, 2 }, { 1, 2, 3 }, { 1, 2, 3 }, { 2, 2, 3 }, { 2, 2, 4 }, { 2, 3, 4 },
372 { 2, 3, 4 }, { 3, 3, 5 }, { 3, 4, 6 }, { 3, 4, 6 }, { 4, 5, 7 }, { 4, 5, 8 },
373 { 4, 6, 9 }, { 5, 7,10 }, { 6, 8,11 }, { 6, 8,13 }, { 7,10,14 }, { 8,11,16 },
374 { 9,12,18 }, {10,13,20 }, {11,15,23 }, {13,17,25 }
378 static inline int clip_uint8( int a )
386 static inline void deblock_luma_c( uint8_t *pix, int xstride, int ystride, int alpha, int beta, int8_t *tc0 )
389 for( i = 0; i < 4; i++ ) {
394 for( d = 0; d < 4; d++ ) {
395 const int p2 = pix[-3*xstride];
396 const int p1 = pix[-2*xstride];
397 const int p0 = pix[-1*xstride];
398 const int q0 = pix[ 0*xstride];
399 const int q1 = pix[ 1*xstride];
400 const int q2 = pix[ 2*xstride];
402 if( abs( p0 - q0 ) < alpha &&
403 abs( p1 - p0 ) < beta &&
404 abs( q1 - q0 ) < beta ) {
409 if( abs( p2 - p0 ) < beta ) {
410 pix[-2*xstride] = p1 + x264_clip3( (( p2 + ((p0 + q0 + 1) >> 1)) >> 1) - p1, -tc0[i], tc0[i] );
413 if( abs( q2 - q0 ) < beta ) {
414 pix[ 1*xstride] = q1 + x264_clip3( (( q2 + ((p0 + q0 + 1) >> 1)) >> 1) - q1, -tc0[i], tc0[i] );
418 delta = x264_clip3( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
419 pix[-1*xstride] = clip_uint8( p0 + delta ); /* p0' */
420 pix[ 0*xstride] = clip_uint8( q0 - delta ); /* q0' */
426 static void deblock_v_luma_c( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 )
428 deblock_luma_c( pix, stride, 1, alpha, beta, tc0 );
430 static void deblock_h_luma_c( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 )
432 deblock_luma_c( pix, 1, stride, alpha, beta, tc0 );
435 static inline void deblock_chroma_c( uint8_t *pix, int xstride, int ystride, int alpha, int beta, int8_t *tc0 )
438 for( i = 0; i < 4; i++ ) {
439 const int tc = tc0[i];
444 for( d = 0; d < 2; d++ ) {
445 const int p1 = pix[-2*xstride];
446 const int p0 = pix[-1*xstride];
447 const int q0 = pix[ 0*xstride];
448 const int q1 = pix[ 1*xstride];
450 if( abs( p0 - q0 ) < alpha &&
451 abs( p1 - p0 ) < beta &&
452 abs( q1 - q0 ) < beta ) {
454 int delta = x264_clip3( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
455 pix[-1*xstride] = clip_uint8( p0 + delta ); /* p0' */
456 pix[ 0*xstride] = clip_uint8( q0 - delta ); /* q0' */
462 static void deblock_v_chroma_c( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 )
464 deblock_chroma_c( pix, stride, 1, alpha, beta, tc0 );
466 static void deblock_h_chroma_c( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 )
468 deblock_chroma_c( pix, 1, stride, alpha, beta, tc0 );
471 static inline void deblock_luma_intra_c( uint8_t *pix, int xstride, int ystride, int alpha, int beta )
474 for( d = 0; d < 16; d++ ) {
475 const int p2 = pix[-3*xstride];
476 const int p1 = pix[-2*xstride];
477 const int p0 = pix[-1*xstride];
478 const int q0 = pix[ 0*xstride];
479 const int q1 = pix[ 1*xstride];
480 const int q2 = pix[ 2*xstride];
482 if( abs( p0 - q0 ) < alpha &&
483 abs( p1 - p0 ) < beta &&
484 abs( q1 - q0 ) < beta ) {
486 if(abs( p0 - q0 ) < ((alpha >> 2) + 2) ){
487 if( abs( p2 - p0 ) < beta)
489 const int p3 = pix[-4*xstride];
491 pix[-1*xstride] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
492 pix[-2*xstride] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
493 pix[-3*xstride] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
496 pix[-1*xstride] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
498 if( abs( q2 - q0 ) < beta)
500 const int q3 = pix[3*xstride];
502 pix[0*xstride] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
503 pix[1*xstride] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
504 pix[2*xstride] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
507 pix[0*xstride] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
511 pix[-1*xstride] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
512 pix[ 0*xstride] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
518 static void deblock_v_luma_intra_c( uint8_t *pix, int stride, int alpha, int beta )
520 deblock_luma_intra_c( pix, stride, 1, alpha, beta );
522 static void deblock_h_luma_intra_c( uint8_t *pix, int stride, int alpha, int beta )
524 deblock_luma_intra_c( pix, 1, stride, alpha, beta );
527 static inline void deblock_chroma_intra_c( uint8_t *pix, int xstride, int ystride, int alpha, int beta )
530 for( d = 0; d < 8; d++ ) {
531 const int p1 = pix[-2*xstride];
532 const int p0 = pix[-1*xstride];
533 const int q0 = pix[ 0*xstride];
534 const int q1 = pix[ 1*xstride];
536 if( abs( p0 - q0 ) < alpha &&
537 abs( p1 - p0 ) < beta &&
538 abs( q1 - q0 ) < beta ) {
540 pix[-1*xstride] = (2*p1 + p0 + q1 + 2) >> 2; /* p0' */
541 pix[ 0*xstride] = (2*q1 + q0 + p1 + 2) >> 2; /* q0' */
547 static void deblock_v_chroma_intra_c( uint8_t *pix, int stride, int alpha, int beta )
549 deblock_chroma_intra_c( pix, stride, 1, alpha, beta );
551 static void deblock_h_chroma_intra_c( uint8_t *pix, int stride, int alpha, int beta )
553 deblock_chroma_intra_c( pix, 1, stride, alpha, beta );
556 static inline void deblock_edge( x264_t *h, uint8_t *pix, int i_stride, int bS[4], int i_qp, int b_chroma,
557 x264_deblock_inter_t pf_inter, x264_deblock_intra_t pf_intra )
560 const int index_a = x264_clip3( i_qp + h->sh.i_alpha_c0_offset, 0, 51 );
561 const int alpha = i_alpha_table[index_a];
562 const int beta = i_beta_table[x264_clip3( i_qp + h->sh.i_beta_offset, 0, 51 )];
567 tc[i] = (bS[i] ? i_tc0_table[index_a][bS[i] - 1] : -1) + b_chroma;
568 pf_inter( pix, i_stride, alpha, beta, tc );
570 pf_intra( pix, i_stride, alpha, beta );
574 void x264_frame_deblock_row( x264_t *h, int mb_y )
576 const int s8x8 = 2 * h->mb.i_mb_stride;
577 const int s4x4 = 4 * h->mb.i_mb_stride;
578 const int b_interlaced = h->sh.b_mbaff;
579 const int mvy_limit = 4 >> b_interlaced;
582 int i_stride2[3] = { h->fdec->i_stride[0] << b_interlaced,
583 h->fdec->i_stride[1] << b_interlaced,
584 h->fdec->i_stride[2] << b_interlaced };
586 if( !h->pps->b_cabac && h->pps->b_transform_8x8_mode )
587 munge_cavlc_nnz( h, mb_y, h->mb.nnz_backup, munge_cavlc_nnz_row );
589 for( mb_x = 0; mb_x < h->sps->i_mb_width; )
591 const int mb_xy = mb_y * h->mb.i_mb_stride + mb_x;
592 const int mb_8x8 = 2 * s8x8 * mb_y + 2 * mb_x;
593 const int mb_4x4 = 4 * s4x4 * mb_y + 4 * mb_x;
594 const int b_8x8_transform = h->mb.mb_transform_size[mb_xy];
595 const int i_edge_end = (h->mb.type[mb_xy] == P_SKIP) ? 1 : 4;
598 int i_pix_y[3] = { 16*mb_y*h->fdec->i_stride[0] + 16*mb_x,
599 8*mb_y*h->fdec->i_stride[1] + 8*mb_x,
600 8*mb_y*h->fdec->i_stride[2] + 8*mb_x };
601 if( b_interlaced && (mb_y&1) )
603 i_pix_y[0] -= 15*h->fdec->i_stride[0];
604 i_pix_y[1] -= 7*h->fdec->i_stride[1];
605 i_pix_y[2] -= 7*h->fdec->i_stride[2];
608 x264_prefetch_fenc( h, h->fdec, mb_x, mb_y );
610 /* i_dir == 0 -> vertical edge
611 * i_dir == 1 -> horizontal edge */
612 for( i_dir = 0; i_dir < 2; i_dir++ )
614 int i_start = (i_dir ? (mb_y <= b_interlaced) : (mb_x == 0));
617 for( i_edge = i_start; i_edge < i_edge_end; i_edge++ )
619 int mbn_xy, mbn_8x8, mbn_4x4;
620 int bS[4]; /* filtering strength */
622 if( b_8x8_transform && (i_edge&1) )
625 mbn_xy = i_edge > 0 ? mb_xy : ( i_dir == 0 ? mb_xy - 1 : mb_xy - h->mb.i_mb_stride );
626 mbn_8x8 = i_edge > 0 ? mb_8x8 : ( i_dir == 0 ? mb_8x8 - 2 : mb_8x8 - 2 * s8x8 );
627 mbn_4x4 = i_edge > 0 ? mb_4x4 : ( i_dir == 0 ? mb_4x4 - 4 : mb_4x4 - 4 * s4x4 );
629 if( b_interlaced && i_edge == 0 && i_dir == 1 )
631 mbn_xy -= h->mb.i_mb_stride;
636 /* *** Get bS for each 4px for the current edge *** */
637 if( IS_INTRA( h->mb.type[mb_xy] ) || IS_INTRA( h->mb.type[mbn_xy] ) )
639 bS[0] = bS[1] = bS[2] = bS[3] = ( i_edge == 0 && !(b_interlaced && i_dir) ? 4 : 3 );
644 for( i = 0; i < 4; i++ )
646 int x = i_dir == 0 ? i_edge : i;
647 int y = i_dir == 0 ? i : i_edge;
648 int xn = (x - (i_dir == 0 ? 1 : 0 ))&0x03;
649 int yn = (y - (i_dir == 0 ? 0 : 1 ))&0x03;
651 if( h->mb.non_zero_count[mb_xy][block_idx_xy[x][y]] != 0 ||
652 h->mb.non_zero_count[mbn_xy][block_idx_xy[xn][yn]] != 0 )
658 /* FIXME: A given frame may occupy more than one position in
659 * the reference list. So we should compare the frame numbers,
660 * not the indices in the ref list.
661 * No harm yet, as we don't generate that case.*/
663 int i8p= mb_8x8+(x/2)+(y/2)*s8x8;
664 int i8q= mbn_8x8+(xn/2)+(yn/2)*s8x8;
665 int i4p= mb_4x4+x+y*s4x4;
666 int i4q= mbn_4x4+xn+yn*s4x4;
671 for( l = 0; l < 1 + (h->sh.i_type == SLICE_TYPE_B); l++ )
673 if( h->mb.ref[l][i8p] != h->mb.ref[l][i8q] ||
674 abs( h->mb.mv[l][i4p][0] - h->mb.mv[l][i4q][0] ) >= 4 ||
675 abs( h->mb.mv[l][i4p][1] - h->mb.mv[l][i4q][1] ) >= mvy_limit )
687 i_qp = h->mb.qp[mb_xy];
688 i_qpn= h->mb.qp[mbn_xy];
693 deblock_edge( h, &h->fdec->plane[0][i_pix_y[0] + 4*i_edge],
694 i_stride2[0], bS, (i_qp+i_qpn+1) >> 1, 0,
695 h->loopf.deblock_h_luma, h->loopf.deblock_h_luma_intra );
699 int i_qpc = ( i_chroma_qp_table[x264_clip3( i_qp + h->pps->i_chroma_qp_index_offset, 0, 51 )] +
700 i_chroma_qp_table[x264_clip3( i_qpn + h->pps->i_chroma_qp_index_offset, 0, 51 )] + 1 ) >> 1;
701 deblock_edge( h, &h->fdec->plane[1][i_pix_y[1] + 2*i_edge],
702 i_stride2[1], bS, i_qpc, 1,
703 h->loopf.deblock_h_chroma, h->loopf.deblock_h_chroma_intra );
704 deblock_edge( h, &h->fdec->plane[2][i_pix_y[2] + 2*i_edge],
705 i_stride2[2], bS, i_qpc, 1,
706 h->loopf.deblock_h_chroma, h->loopf.deblock_h_chroma_intra );
711 /* horizontal edge */
712 deblock_edge( h, &h->fdec->plane[0][i_pix_y[0] + 4*i_edge*i_stride2[0]],
713 i_stride2[0], bS, (i_qp+i_qpn+1) >> 1, 0,
714 h->loopf.deblock_v_luma, h->loopf.deblock_v_luma_intra );
718 int i_qpc = ( i_chroma_qp_table[x264_clip3( i_qp + h->pps->i_chroma_qp_index_offset, 0, 51 )] +
719 i_chroma_qp_table[x264_clip3( i_qpn + h->pps->i_chroma_qp_index_offset, 0, 51 )] + 1 ) >> 1;
720 deblock_edge( h, &h->fdec->plane[1][i_pix_y[1] + 2*i_edge*i_stride2[1]],
721 i_stride2[1], bS, i_qpc, 1,
722 h->loopf.deblock_v_chroma, h->loopf.deblock_v_chroma_intra );
723 deblock_edge( h, &h->fdec->plane[2][i_pix_y[2] + 2*i_edge*i_stride2[2]],
724 i_stride2[2], bS, i_qpc, 1,
725 h->loopf.deblock_v_chroma, h->loopf.deblock_v_chroma_intra );
732 if( !b_interlaced || (mb_y&1) )
734 mb_y ^= b_interlaced;
737 if( !h->pps->b_cabac && h->pps->b_transform_8x8_mode )
738 munge_cavlc_nnz( h, mb_y, h->mb.nnz_backup, restore_cavlc_nnz_row );
741 void x264_frame_deblock( x264_t *h )
744 for( mb_y = 0; mb_y < h->sps->i_mb_height; mb_y += 1 + h->sh.b_mbaff )
745 x264_frame_deblock_row( h, mb_y );
749 void x264_deblock_v_chroma_mmxext( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 );
750 void x264_deblock_h_chroma_mmxext( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 );
751 void x264_deblock_v_chroma_intra_mmxext( uint8_t *pix, int stride, int alpha, int beta );
752 void x264_deblock_h_chroma_intra_mmxext( uint8_t *pix, int stride, int alpha, int beta );
754 void x264_deblock_v_luma_sse2( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 );
755 void x264_deblock_h_luma_sse2( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 );
757 void x264_deblock_h_luma_mmxext( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 );
758 void x264_deblock_v8_luma_mmxext( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 );
760 void x264_deblock_v_luma_mmxext( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 )
762 x264_deblock_v8_luma_mmxext( pix, stride, alpha, beta, tc0 );
763 x264_deblock_v8_luma_mmxext( pix+8, stride, alpha, beta, tc0+2 );
769 void x264_deblock_v_luma_altivec( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 );
770 void x264_deblock_h_luma_altivec( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 );
773 void x264_deblock_init( int cpu, x264_deblock_function_t *pf )
775 pf->deblock_v_luma = deblock_v_luma_c;
776 pf->deblock_h_luma = deblock_h_luma_c;
777 pf->deblock_v_chroma = deblock_v_chroma_c;
778 pf->deblock_h_chroma = deblock_h_chroma_c;
779 pf->deblock_v_luma_intra = deblock_v_luma_intra_c;
780 pf->deblock_h_luma_intra = deblock_h_luma_intra_c;
781 pf->deblock_v_chroma_intra = deblock_v_chroma_intra_c;
782 pf->deblock_h_chroma_intra = deblock_h_chroma_intra_c;
785 if( cpu&X264_CPU_MMXEXT )
787 pf->deblock_v_chroma = x264_deblock_v_chroma_mmxext;
788 pf->deblock_h_chroma = x264_deblock_h_chroma_mmxext;
789 pf->deblock_v_chroma_intra = x264_deblock_v_chroma_intra_mmxext;
790 pf->deblock_h_chroma_intra = x264_deblock_h_chroma_intra_mmxext;
792 pf->deblock_v_luma = x264_deblock_v_luma_mmxext;
793 pf->deblock_h_luma = x264_deblock_h_luma_mmxext;
795 if( cpu&X264_CPU_SSE2 )
797 pf->deblock_v_luma = x264_deblock_v_luma_sse2;
798 pf->deblock_h_luma = x264_deblock_h_luma_sse2;
804 if( cpu&X264_CPU_ALTIVEC )
806 pf->deblock_v_luma = x264_deblock_v_luma_altivec;
807 pf->deblock_h_luma = x264_deblock_h_luma_altivec;
816 void x264_frame_cond_broadcast( x264_frame_t *frame, int i_lines_completed )
818 x264_pthread_mutex_lock( &frame->mutex );
819 frame->i_lines_completed = i_lines_completed;
820 x264_pthread_cond_broadcast( &frame->cv );
821 x264_pthread_mutex_unlock( &frame->mutex );
824 void x264_frame_cond_wait( x264_frame_t *frame, int i_lines_completed )
826 x264_pthread_mutex_lock( &frame->mutex );
827 while( frame->i_lines_completed < i_lines_completed )
828 x264_pthread_cond_wait( &frame->cv, &frame->mutex );
829 x264_pthread_mutex_unlock( &frame->mutex );
833 void x264_frame_cond_broadcast( x264_frame_t *frame, int i_lines_completed )
835 void x264_frame_cond_wait( x264_frame_t *frame, int i_lines_completed )
842 void x264_frame_push( x264_frame_t **list, x264_frame_t *frame )
845 while( list[i] ) i++;
849 x264_frame_t *x264_frame_pop( x264_frame_t **list )
854 while( list[i+1] ) i++;
860 void x264_frame_unshift( x264_frame_t **list, x264_frame_t *frame )
863 while( list[i] ) i++;
869 x264_frame_t *x264_frame_shift( x264_frame_t **list )
871 x264_frame_t *frame = list[0];
873 for( i = 0; list[i]; i++ )
879 void x264_frame_push_unused( x264_t *h, x264_frame_t *frame )
881 assert( frame->i_reference_count > 0 );
882 frame->i_reference_count--;
883 if( frame->i_reference_count == 0 )
884 x264_frame_push( h->frames.unused, frame );
885 assert( h->frames.unused[ sizeof(h->frames.unused) / sizeof(*h->frames.unused) - 1 ] == NULL );
888 x264_frame_t *x264_frame_pop_unused( x264_t *h )
891 if( h->frames.unused[0] )
892 frame = x264_frame_pop( h->frames.unused );
894 frame = x264_frame_new( h );
895 assert( frame->i_reference_count == 0 );
896 frame->i_reference_count = 1;
900 void x264_frame_sort( x264_frame_t **list, int b_dts )
905 for( i = 0; list[i+1]; i++ )
907 int dtype = list[i]->i_type - list[i+1]->i_type;
908 int dtime = list[i]->i_frame - list[i+1]->i_frame;
909 int swap = b_dts ? dtype > 0 || ( dtype == 0 && dtime > 0 )
913 XCHG( x264_frame_t*, list[i], list[i+1] );