1 /*****************************************************************************
2 * frame.c: h264 encoder library
3 *****************************************************************************
4 * Copyright (C) 2003 Laurent Aimar
5 * $Id: frame.c,v 1.1 2004/06/03 19:27:06 fenrir Exp $
7 * Authors: Laurent Aimar <fenrir@via.ecp.fr>
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to the Free Software
21 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA.
22 *****************************************************************************/
26 x264_frame_t *x264_frame_new( x264_t *h )
28 x264_frame_t *frame = x264_malloc( sizeof(x264_frame_t) );
31 int i_mb_count = h->mb.i_mb_count;
32 int i_stride, i_width, i_lines;
33 int i_padv = PADV << h->param.b_interlaced;
36 if( !frame ) return NULL;
38 memset( frame, 0, sizeof(x264_frame_t) );
40 /* allocate frame data (+64 for extra data for me) */
41 i_width = ( ( h->param.i_width + 15 ) & -16 );
42 i_stride = i_width + 2*PADH;
43 i_lines = ( ( h->param.i_height + 15 ) & -16 );
44 if( h->param.b_interlaced )
45 i_lines = ( i_lines + 31 ) & -32;
47 if( h->param.cpu&X264_CPU_CACHELINE_SPLIT )
49 int align = h->param.cpu&X264_CPU_CACHELINE_32 ? 32 : 64;
50 i_stride = (i_stride + align-1) & -align;
54 for( i = 0; i < 3; i++ )
56 frame->i_stride[i] = i_stride >> !!i;
57 frame->i_width[i] = i_width >> !!i;
58 frame->i_lines[i] = i_lines >> !!i;
61 luma_plane_size = (frame->i_stride[0] * ( frame->i_lines[0] + 2*i_padv ));
62 for( i = 1; i < 3; i++ )
64 CHECKED_MALLOC( frame->buffer[i], luma_plane_size/4 );
65 frame->plane[i] = (uint8_t*)frame->buffer[i] + (frame->i_stride[i] * i_padv + PADH)/2;
67 /* all 4 luma planes allocated together, since the cacheline split code
68 * requires them to be in-phase wrt cacheline alignment. */
69 CHECKED_MALLOC( frame->buffer[0], 4*luma_plane_size);
70 for( i = 0; i < 4; i++ )
71 frame->filtered[i] = (uint8_t*)frame->buffer[0] + i*luma_plane_size + frame->i_stride[0] * i_padv + PADH;
72 frame->plane[0] = frame->filtered[0];
74 if( h->frames.b_have_lowres )
76 frame->i_width_lowres = frame->i_width[0]/2;
77 frame->i_stride_lowres = frame->i_width_lowres + 2*PADH;
78 frame->i_lines_lowres = frame->i_lines[0]/2;
79 for( i = 0; i < 4; i++ )
81 CHECKED_MALLOC( frame->buffer_lowres[i],
82 frame->i_stride_lowres * ( frame->i_lines[0]/2 + 2*i_padv ) );
83 frame->lowres[i] = ((uint8_t*)frame->buffer_lowres[i]) +
84 frame->i_stride_lowres * i_padv + PADH;
88 if( h->param.analyse.i_me_method >= X264_ME_ESA )
90 CHECKED_MALLOC( frame->buffer[3],
91 2 * frame->i_stride[0] * (frame->i_lines[0] + 2*i_padv) * sizeof(uint16_t) );
92 frame->integral = (uint16_t*)frame->buffer[3] + frame->i_stride[0] * i_padv + PADH;
96 frame->i_type = X264_TYPE_AUTO;
100 frame->i_frame_num = -1;
101 frame->i_lines_completed = -1;
103 CHECKED_MALLOC( frame->mb_type, i_mb_count * sizeof(int8_t));
104 CHECKED_MALLOC( frame->mv[0], 2*16 * i_mb_count * sizeof(int16_t) );
105 CHECKED_MALLOC( frame->ref[0], 4 * i_mb_count * sizeof(int8_t) );
106 if( h->param.i_bframe )
108 CHECKED_MALLOC( frame->mv[1], 2*16 * i_mb_count * sizeof(int16_t) );
109 CHECKED_MALLOC( frame->ref[1], 4 * i_mb_count * sizeof(int8_t) );
114 frame->ref[1] = NULL;
117 CHECKED_MALLOC( frame->i_row_bits, i_lines/16 * sizeof(int) );
118 CHECKED_MALLOC( frame->i_row_qp, i_lines/16 * sizeof(int) );
119 for( i = 0; i < h->param.i_bframe + 2; i++ )
120 for( j = 0; j < h->param.i_bframe + 2; j++ )
121 CHECKED_MALLOC( frame->i_row_satds[i][j], i_lines/16 * sizeof(int) );
123 x264_pthread_mutex_init( &frame->mutex, NULL );
124 x264_pthread_cond_init( &frame->cv, NULL );
129 x264_frame_delete( frame );
133 void x264_frame_delete( x264_frame_t *frame )
136 for( i = 0; i < 4; i++ )
137 x264_free( frame->buffer[i] );
138 for( i = 0; i < 4; i++ )
139 x264_free( frame->buffer_lowres[i] );
140 for( i = 0; i < X264_BFRAME_MAX+2; i++ )
141 for( j = 0; j < X264_BFRAME_MAX+2; j++ )
142 x264_free( frame->i_row_satds[i][j] );
143 x264_free( frame->i_row_bits );
144 x264_free( frame->i_row_qp );
145 x264_free( frame->mb_type );
146 x264_free( frame->mv[0] );
147 x264_free( frame->mv[1] );
148 x264_free( frame->ref[0] );
149 x264_free( frame->ref[1] );
150 x264_pthread_mutex_destroy( &frame->mutex );
151 x264_pthread_cond_destroy( &frame->cv );
155 int x264_frame_copy_picture( x264_t *h, x264_frame_t *dst, x264_picture_t *src )
157 int i_csp = src->img.i_csp & X264_CSP_MASK;
159 if( i_csp != X264_CSP_I420 && i_csp != X264_CSP_YV12 )
161 x264_log( h, X264_LOG_ERROR, "Arg invalid CSP\n" );
165 dst->i_type = src->i_type;
166 dst->i_qpplus1 = src->i_qpplus1;
167 dst->i_pts = src->i_pts;
171 int s = (i_csp == X264_CSP_YV12 && i) ? i^3 : i;
172 uint8_t *plane = src->img.plane[s];
173 int stride = src->img.i_stride[s];
174 int width = h->param.i_width >> !!i;
175 int height = h->param.i_height >> !!i;
176 if( src->img.i_csp & X264_CSP_VFLIP )
178 plane += (height-1)*stride;
181 h->mc.plane_copy( dst->plane[i], dst->i_stride[i], plane, stride, width, height );
188 static void plane_expand_border( uint8_t *pix, int i_stride, int i_width, int i_height, int i_padh, int i_padv, int b_pad_top, int b_pad_bottom )
190 #define PPIXEL(x, y) ( pix + (x) + (y)*i_stride )
192 for( y = 0; y < i_height; y++ )
195 memset( PPIXEL(-i_padh, y), PPIXEL(0, y)[0], i_padh );
197 memset( PPIXEL(i_width, y), PPIXEL(i_width-1, y)[0], i_padh );
201 for( y = 0; y < i_padv; y++ )
202 memcpy( PPIXEL(-i_padh, -y-1), PPIXEL(-i_padh, 0), i_width+2*i_padh );
205 for( y = 0; y < i_padv; y++ )
206 memcpy( PPIXEL(-i_padh, i_height+y), PPIXEL(-i_padh, i_height-1), i_width+2*i_padh );
210 void x264_frame_expand_border( x264_t *h, x264_frame_t *frame, int mb_y, int b_end )
214 if( mb_y & h->sh.b_mbaff )
216 for( i = 0; i < frame->i_plane; i++ )
218 int stride = frame->i_stride[i];
219 int width = 16*h->sps->i_mb_width >> !!i;
220 int height = (b_end ? 16*(h->sps->i_mb_height - mb_y) >> h->sh.b_mbaff : 16) >> !!i;
221 int padh = PADH >> !!i;
222 int padv = PADV >> !!i;
223 // buffer: 2 chroma, 3 luma (rounded to 4) because deblocking goes beyond the top of the mb
224 uint8_t *pix = frame->plane[i] + X264_MAX(0, (16*mb_y-4)*stride >> !!i);
225 if( b_end && !b_start )
226 height += 4 >> (!!i + h->sh.b_mbaff);
229 plane_expand_border( pix, stride*2, width, height, padh, padv, b_start, b_end );
230 plane_expand_border( pix+stride, stride*2, width, height, padh, padv, b_start, b_end );
234 plane_expand_border( pix, stride, width, height, padh, padv, b_start, b_end );
239 void x264_frame_expand_border_filtered( x264_t *h, x264_frame_t *frame, int mb_y, int b_end )
241 /* during filtering, 8 extra pixels were filtered on each edge,
242 * but up to 3 of the horizontal ones may be wrong.
243 we want to expand border from the last filtered pixel */
245 int stride = frame->i_stride[0];
246 int width = 16*h->sps->i_mb_width + 8;
247 int height = b_end ? (16*(h->sps->i_mb_height - mb_y) >> h->sh.b_mbaff) + 16 : 16;
251 for( i = 1; i < 4; i++ )
253 // buffer: 8 luma, to match the hpel filter
254 uint8_t *pix = frame->filtered[i] + (16*mb_y - (8 << h->sh.b_mbaff)) * stride - 4;
257 plane_expand_border( pix, stride*2, width, height, padh, padv, b_start, b_end );
258 plane_expand_border( pix+stride, stride*2, width, height, padh, padv, b_start, b_end );
262 plane_expand_border( pix, stride, width, height, padh, padv, b_start, b_end );
267 void x264_frame_expand_border_lowres( x264_frame_t *frame )
270 for( i = 0; i < 4; i++ )
271 plane_expand_border( frame->lowres[i], frame->i_stride_lowres, frame->i_stride_lowres - 2*PADH, frame->i_lines_lowres, PADH, PADV, 1, 1 );
274 void x264_frame_expand_border_mod16( x264_t *h, x264_frame_t *frame )
277 for( i = 0; i < frame->i_plane; i++ )
279 int i_subsample = i ? 1 : 0;
280 int i_width = h->param.i_width >> i_subsample;
281 int i_height = h->param.i_height >> i_subsample;
282 int i_padx = ( h->sps->i_mb_width * 16 - h->param.i_width ) >> i_subsample;
283 int i_pady = ( h->sps->i_mb_height * 16 - h->param.i_height ) >> i_subsample;
287 for( y = 0; y < i_height; y++ )
288 memset( &frame->plane[i][y*frame->i_stride[i] + i_width],
289 frame->plane[i][y*frame->i_stride[i] + i_width - 1],
294 //FIXME interlace? or just let it pad using the wrong field
295 for( y = i_height; y < i_height + i_pady; y++ )
296 memcpy( &frame->plane[i][y*frame->i_stride[i]],
297 &frame->plane[i][(i_height-1)*frame->i_stride[i]],
304 /* cavlc + 8x8 transform stores nnz per 16 coeffs for the purpose of
305 * entropy coding, but per 64 coeffs for the purpose of deblocking */
306 void munge_cavlc_nnz_row( x264_t *h, int mb_y, uint8_t (*buf)[16] )
308 uint32_t (*src)[6] = (uint32_t(*)[6])h->mb.non_zero_count + mb_y * h->sps->i_mb_width;
309 int8_t *transform = h->mb.mb_transform_size + mb_y * h->sps->i_mb_width;
311 for( x=0; x<h->sps->i_mb_width; x++ )
313 memcpy( buf+x, src+x, 16 );
316 if( src[x][0] ) src[x][0] = 0x01010101;
317 if( src[x][1] ) src[x][1] = 0x01010101;
318 if( src[x][2] ) src[x][2] = 0x01010101;
319 if( src[x][3] ) src[x][3] = 0x01010101;
324 static void restore_cavlc_nnz_row( x264_t *h, int mb_y, uint8_t (*buf)[16] )
326 uint8_t (*dst)[24] = h->mb.non_zero_count + mb_y * h->sps->i_mb_width;
328 for( x=0; x<h->sps->i_mb_width; x++ )
329 memcpy( dst+x, buf+x, 16 );
332 static void munge_cavlc_nnz( x264_t *h, int mb_y, uint8_t (*buf)[16], void (*func)(x264_t*, int, uint8_t (*)[16]) )
334 func( h, mb_y, buf );
336 func( h, mb_y-1, buf + h->sps->i_mb_width );
339 func( h, mb_y+1, buf + h->sps->i_mb_width * 2 );
341 func( h, mb_y-2, buf + h->sps->i_mb_width * 3 );
346 /* Deblocking filter */
348 static const int i_alpha_table[52] =
350 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
351 0, 0, 0, 0, 0, 0, 4, 4, 5, 6,
352 7, 8, 9, 10, 12, 13, 15, 17, 20, 22,
353 25, 28, 32, 36, 40, 45, 50, 56, 63, 71,
354 80, 90,101,113,127,144,162,182,203,226,
357 static const int i_beta_table[52] =
359 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
360 0, 0, 0, 0, 0, 0, 2, 2, 2, 3,
361 3, 3, 3, 4, 4, 4, 6, 6, 7, 7,
362 8, 8, 9, 9, 10, 10, 11, 11, 12, 12,
363 13, 13, 14, 14, 15, 15, 16, 16, 17, 17,
366 static const int i_tc0_table[52][3] =
368 { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 },
369 { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 },
370 { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 1 },
371 { 0, 0, 1 }, { 0, 0, 1 }, { 0, 0, 1 }, { 0, 1, 1 }, { 0, 1, 1 }, { 1, 1, 1 },
372 { 1, 1, 1 }, { 1, 1, 1 }, { 1, 1, 1 }, { 1, 1, 2 }, { 1, 1, 2 }, { 1, 1, 2 },
373 { 1, 1, 2 }, { 1, 2, 3 }, { 1, 2, 3 }, { 2, 2, 3 }, { 2, 2, 4 }, { 2, 3, 4 },
374 { 2, 3, 4 }, { 3, 3, 5 }, { 3, 4, 6 }, { 3, 4, 6 }, { 4, 5, 7 }, { 4, 5, 8 },
375 { 4, 6, 9 }, { 5, 7,10 }, { 6, 8,11 }, { 6, 8,13 }, { 7,10,14 }, { 8,11,16 },
376 { 9,12,18 }, {10,13,20 }, {11,15,23 }, {13,17,25 }
380 static inline int clip_uint8( int a )
388 static inline void deblock_luma_c( uint8_t *pix, int xstride, int ystride, int alpha, int beta, int8_t *tc0 )
391 for( i = 0; i < 4; i++ ) {
396 for( d = 0; d < 4; d++ ) {
397 const int p2 = pix[-3*xstride];
398 const int p1 = pix[-2*xstride];
399 const int p0 = pix[-1*xstride];
400 const int q0 = pix[ 0*xstride];
401 const int q1 = pix[ 1*xstride];
402 const int q2 = pix[ 2*xstride];
404 if( abs( p0 - q0 ) < alpha &&
405 abs( p1 - p0 ) < beta &&
406 abs( q1 - q0 ) < beta ) {
411 if( abs( p2 - p0 ) < beta ) {
412 pix[-2*xstride] = p1 + x264_clip3( (( p2 + ((p0 + q0 + 1) >> 1)) >> 1) - p1, -tc0[i], tc0[i] );
415 if( abs( q2 - q0 ) < beta ) {
416 pix[ 1*xstride] = q1 + x264_clip3( (( q2 + ((p0 + q0 + 1) >> 1)) >> 1) - q1, -tc0[i], tc0[i] );
420 delta = x264_clip3( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
421 pix[-1*xstride] = clip_uint8( p0 + delta ); /* p0' */
422 pix[ 0*xstride] = clip_uint8( q0 - delta ); /* q0' */
428 static void deblock_v_luma_c( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 )
430 deblock_luma_c( pix, stride, 1, alpha, beta, tc0 );
432 static void deblock_h_luma_c( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 )
434 deblock_luma_c( pix, 1, stride, alpha, beta, tc0 );
437 static inline void deblock_chroma_c( uint8_t *pix, int xstride, int ystride, int alpha, int beta, int8_t *tc0 )
440 for( i = 0; i < 4; i++ ) {
441 const int tc = tc0[i];
446 for( d = 0; d < 2; d++ ) {
447 const int p1 = pix[-2*xstride];
448 const int p0 = pix[-1*xstride];
449 const int q0 = pix[ 0*xstride];
450 const int q1 = pix[ 1*xstride];
452 if( abs( p0 - q0 ) < alpha &&
453 abs( p1 - p0 ) < beta &&
454 abs( q1 - q0 ) < beta ) {
456 int delta = x264_clip3( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
457 pix[-1*xstride] = clip_uint8( p0 + delta ); /* p0' */
458 pix[ 0*xstride] = clip_uint8( q0 - delta ); /* q0' */
464 static void deblock_v_chroma_c( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 )
466 deblock_chroma_c( pix, stride, 1, alpha, beta, tc0 );
468 static void deblock_h_chroma_c( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 )
470 deblock_chroma_c( pix, 1, stride, alpha, beta, tc0 );
473 static inline void deblock_luma_intra_c( uint8_t *pix, int xstride, int ystride, int alpha, int beta )
476 for( d = 0; d < 16; d++ ) {
477 const int p2 = pix[-3*xstride];
478 const int p1 = pix[-2*xstride];
479 const int p0 = pix[-1*xstride];
480 const int q0 = pix[ 0*xstride];
481 const int q1 = pix[ 1*xstride];
482 const int q2 = pix[ 2*xstride];
484 if( abs( p0 - q0 ) < alpha &&
485 abs( p1 - p0 ) < beta &&
486 abs( q1 - q0 ) < beta ) {
488 if(abs( p0 - q0 ) < ((alpha >> 2) + 2) ){
489 if( abs( p2 - p0 ) < beta)
491 const int p3 = pix[-4*xstride];
493 pix[-1*xstride] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
494 pix[-2*xstride] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
495 pix[-3*xstride] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
498 pix[-1*xstride] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
500 if( abs( q2 - q0 ) < beta)
502 const int q3 = pix[3*xstride];
504 pix[0*xstride] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
505 pix[1*xstride] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
506 pix[2*xstride] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
509 pix[0*xstride] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
513 pix[-1*xstride] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
514 pix[ 0*xstride] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
520 static void deblock_v_luma_intra_c( uint8_t *pix, int stride, int alpha, int beta )
522 deblock_luma_intra_c( pix, stride, 1, alpha, beta );
524 static void deblock_h_luma_intra_c( uint8_t *pix, int stride, int alpha, int beta )
526 deblock_luma_intra_c( pix, 1, stride, alpha, beta );
529 static inline void deblock_chroma_intra_c( uint8_t *pix, int xstride, int ystride, int alpha, int beta )
532 for( d = 0; d < 8; d++ ) {
533 const int p1 = pix[-2*xstride];
534 const int p0 = pix[-1*xstride];
535 const int q0 = pix[ 0*xstride];
536 const int q1 = pix[ 1*xstride];
538 if( abs( p0 - q0 ) < alpha &&
539 abs( p1 - p0 ) < beta &&
540 abs( q1 - q0 ) < beta ) {
542 pix[-1*xstride] = (2*p1 + p0 + q1 + 2) >> 2; /* p0' */
543 pix[ 0*xstride] = (2*q1 + q0 + p1 + 2) >> 2; /* q0' */
549 static void deblock_v_chroma_intra_c( uint8_t *pix, int stride, int alpha, int beta )
551 deblock_chroma_intra_c( pix, stride, 1, alpha, beta );
553 static void deblock_h_chroma_intra_c( uint8_t *pix, int stride, int alpha, int beta )
555 deblock_chroma_intra_c( pix, 1, stride, alpha, beta );
558 static inline void deblock_edge( x264_t *h, uint8_t *pix, int i_stride, int bS[4], int i_qp, int b_chroma,
559 x264_deblock_inter_t pf_inter, x264_deblock_intra_t pf_intra )
562 const int index_a = x264_clip3( i_qp + h->sh.i_alpha_c0_offset, 0, 51 );
563 const int alpha = i_alpha_table[index_a];
564 const int beta = i_beta_table[x264_clip3( i_qp + h->sh.i_beta_offset, 0, 51 )];
569 tc[i] = (bS[i] ? i_tc0_table[index_a][bS[i] - 1] : -1) + b_chroma;
570 pf_inter( pix, i_stride, alpha, beta, tc );
572 pf_intra( pix, i_stride, alpha, beta );
576 void x264_frame_deblock_row( x264_t *h, int mb_y )
578 const int s8x8 = 2 * h->mb.i_mb_stride;
579 const int s4x4 = 4 * h->mb.i_mb_stride;
580 const int b_interlaced = h->sh.b_mbaff;
581 const int mvy_limit = 4 >> b_interlaced;
584 int i_stride2[3] = { h->fdec->i_stride[0] << b_interlaced,
585 h->fdec->i_stride[1] << b_interlaced,
586 h->fdec->i_stride[2] << b_interlaced };
588 if( !h->pps->b_cabac && h->pps->b_transform_8x8_mode )
589 munge_cavlc_nnz( h, mb_y, h->mb.nnz_backup, munge_cavlc_nnz_row );
591 for( mb_x = 0; mb_x < h->sps->i_mb_width; )
593 const int mb_xy = mb_y * h->mb.i_mb_stride + mb_x;
594 const int mb_8x8 = 2 * s8x8 * mb_y + 2 * mb_x;
595 const int mb_4x4 = 4 * s4x4 * mb_y + 4 * mb_x;
596 const int b_8x8_transform = h->mb.mb_transform_size[mb_xy];
597 const int i_edge_end = (h->mb.type[mb_xy] == P_SKIP) ? 1 : 4;
600 int i_pix_y[3] = { 16*mb_y*h->fdec->i_stride[0] + 16*mb_x,
601 8*mb_y*h->fdec->i_stride[1] + 8*mb_x,
602 8*mb_y*h->fdec->i_stride[2] + 8*mb_x };
603 if( b_interlaced && (mb_y&1) )
605 i_pix_y[0] -= 15*h->fdec->i_stride[0];
606 i_pix_y[1] -= 7*h->fdec->i_stride[1];
607 i_pix_y[2] -= 7*h->fdec->i_stride[2];
610 x264_prefetch_fenc( h, h->fdec, mb_x, mb_y );
612 /* i_dir == 0 -> vertical edge
613 * i_dir == 1 -> horizontal edge */
614 for( i_dir = 0; i_dir < 2; i_dir++ )
616 int i_start = (i_dir ? (mb_y <= b_interlaced) : (mb_x == 0));
619 for( i_edge = i_start; i_edge < i_edge_end; i_edge++ )
621 int mbn_xy, mbn_8x8, mbn_4x4;
622 int bS[4]; /* filtering strength */
624 if( b_8x8_transform && (i_edge&1) )
627 mbn_xy = i_edge > 0 ? mb_xy : ( i_dir == 0 ? mb_xy - 1 : mb_xy - h->mb.i_mb_stride );
628 mbn_8x8 = i_edge > 0 ? mb_8x8 : ( i_dir == 0 ? mb_8x8 - 2 : mb_8x8 - 2 * s8x8 );
629 mbn_4x4 = i_edge > 0 ? mb_4x4 : ( i_dir == 0 ? mb_4x4 - 4 : mb_4x4 - 4 * s4x4 );
631 if( b_interlaced && i_edge == 0 && i_dir == 1 )
633 mbn_xy -= h->mb.i_mb_stride;
638 /* *** Get bS for each 4px for the current edge *** */
639 if( IS_INTRA( h->mb.type[mb_xy] ) || IS_INTRA( h->mb.type[mbn_xy] ) )
641 bS[0] = bS[1] = bS[2] = bS[3] = ( i_edge == 0 && !(b_interlaced && i_dir) ? 4 : 3 );
646 for( i = 0; i < 4; i++ )
648 int x = i_dir == 0 ? i_edge : i;
649 int y = i_dir == 0 ? i : i_edge;
650 int xn = (x - (i_dir == 0 ? 1 : 0 ))&0x03;
651 int yn = (y - (i_dir == 0 ? 0 : 1 ))&0x03;
653 if( h->mb.non_zero_count[mb_xy][block_idx_xy[x][y]] != 0 ||
654 h->mb.non_zero_count[mbn_xy][block_idx_xy[xn][yn]] != 0 )
660 /* FIXME: A given frame may occupy more than one position in
661 * the reference list. So we should compare the frame numbers,
662 * not the indices in the ref list.
663 * No harm yet, as we don't generate that case.*/
665 int i8p= mb_8x8+(x/2)+(y/2)*s8x8;
666 int i8q= mbn_8x8+(xn/2)+(yn/2)*s8x8;
667 int i4p= mb_4x4+x+y*s4x4;
668 int i4q= mbn_4x4+xn+yn*s4x4;
673 for( l = 0; l < 1 + (h->sh.i_type == SLICE_TYPE_B); l++ )
675 if( h->mb.ref[l][i8p] != h->mb.ref[l][i8q] ||
676 abs( h->mb.mv[l][i4p][0] - h->mb.mv[l][i4q][0] ) >= 4 ||
677 abs( h->mb.mv[l][i4p][1] - h->mb.mv[l][i4q][1] ) >= mvy_limit )
689 i_qp = h->mb.qp[mb_xy];
690 i_qpn= h->mb.qp[mbn_xy];
695 deblock_edge( h, &h->fdec->plane[0][i_pix_y[0] + 4*i_edge],
696 i_stride2[0], bS, (i_qp+i_qpn+1) >> 1, 0,
697 h->loopf.deblock_h_luma, h->loopf.deblock_h_luma_intra );
701 int i_qpc = ( i_chroma_qp_table[x264_clip3( i_qp + h->pps->i_chroma_qp_index_offset, 0, 51 )] +
702 i_chroma_qp_table[x264_clip3( i_qpn + h->pps->i_chroma_qp_index_offset, 0, 51 )] + 1 ) >> 1;
703 deblock_edge( h, &h->fdec->plane[1][i_pix_y[1] + 2*i_edge],
704 i_stride2[1], bS, i_qpc, 1,
705 h->loopf.deblock_h_chroma, h->loopf.deblock_h_chroma_intra );
706 deblock_edge( h, &h->fdec->plane[2][i_pix_y[2] + 2*i_edge],
707 i_stride2[2], bS, i_qpc, 1,
708 h->loopf.deblock_h_chroma, h->loopf.deblock_h_chroma_intra );
713 /* horizontal edge */
714 deblock_edge( h, &h->fdec->plane[0][i_pix_y[0] + 4*i_edge*i_stride2[0]],
715 i_stride2[0], bS, (i_qp+i_qpn+1) >> 1, 0,
716 h->loopf.deblock_v_luma, h->loopf.deblock_v_luma_intra );
720 int i_qpc = ( i_chroma_qp_table[x264_clip3( i_qp + h->pps->i_chroma_qp_index_offset, 0, 51 )] +
721 i_chroma_qp_table[x264_clip3( i_qpn + h->pps->i_chroma_qp_index_offset, 0, 51 )] + 1 ) >> 1;
722 deblock_edge( h, &h->fdec->plane[1][i_pix_y[1] + 2*i_edge*i_stride2[1]],
723 i_stride2[1], bS, i_qpc, 1,
724 h->loopf.deblock_v_chroma, h->loopf.deblock_v_chroma_intra );
725 deblock_edge( h, &h->fdec->plane[2][i_pix_y[2] + 2*i_edge*i_stride2[2]],
726 i_stride2[2], bS, i_qpc, 1,
727 h->loopf.deblock_v_chroma, h->loopf.deblock_v_chroma_intra );
734 if( !b_interlaced || (mb_y&1) )
736 mb_y ^= b_interlaced;
739 if( !h->pps->b_cabac && h->pps->b_transform_8x8_mode )
740 munge_cavlc_nnz( h, mb_y, h->mb.nnz_backup, restore_cavlc_nnz_row );
743 void x264_frame_deblock( x264_t *h )
746 for( mb_y = 0; mb_y < h->sps->i_mb_height; mb_y += 1 + h->sh.b_mbaff )
747 x264_frame_deblock_row( h, mb_y );
751 void x264_deblock_v_chroma_mmxext( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 );
752 void x264_deblock_h_chroma_mmxext( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 );
753 void x264_deblock_v_chroma_intra_mmxext( uint8_t *pix, int stride, int alpha, int beta );
754 void x264_deblock_h_chroma_intra_mmxext( uint8_t *pix, int stride, int alpha, int beta );
756 void x264_deblock_v_luma_sse2( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 );
757 void x264_deblock_h_luma_sse2( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 );
759 void x264_deblock_h_luma_mmxext( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 );
760 void x264_deblock_v8_luma_mmxext( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 );
762 void x264_deblock_v_luma_mmxext( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 )
764 x264_deblock_v8_luma_mmxext( pix, stride, alpha, beta, tc0 );
765 x264_deblock_v8_luma_mmxext( pix+8, stride, alpha, beta, tc0+2 );
771 void x264_deblock_v_luma_altivec( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 );
772 void x264_deblock_h_luma_altivec( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 );
775 void x264_deblock_init( int cpu, x264_deblock_function_t *pf )
777 pf->deblock_v_luma = deblock_v_luma_c;
778 pf->deblock_h_luma = deblock_h_luma_c;
779 pf->deblock_v_chroma = deblock_v_chroma_c;
780 pf->deblock_h_chroma = deblock_h_chroma_c;
781 pf->deblock_v_luma_intra = deblock_v_luma_intra_c;
782 pf->deblock_h_luma_intra = deblock_h_luma_intra_c;
783 pf->deblock_v_chroma_intra = deblock_v_chroma_intra_c;
784 pf->deblock_h_chroma_intra = deblock_h_chroma_intra_c;
787 if( cpu&X264_CPU_MMXEXT )
789 pf->deblock_v_chroma = x264_deblock_v_chroma_mmxext;
790 pf->deblock_h_chroma = x264_deblock_h_chroma_mmxext;
791 pf->deblock_v_chroma_intra = x264_deblock_v_chroma_intra_mmxext;
792 pf->deblock_h_chroma_intra = x264_deblock_h_chroma_intra_mmxext;
794 pf->deblock_v_luma = x264_deblock_v_luma_mmxext;
795 pf->deblock_h_luma = x264_deblock_h_luma_mmxext;
797 if( cpu&X264_CPU_SSE2 )
799 pf->deblock_v_luma = x264_deblock_v_luma_sse2;
800 pf->deblock_h_luma = x264_deblock_h_luma_sse2;
806 if( cpu&X264_CPU_ALTIVEC )
808 pf->deblock_v_luma = x264_deblock_v_luma_altivec;
809 pf->deblock_h_luma = x264_deblock_h_luma_altivec;
818 void x264_frame_cond_broadcast( x264_frame_t *frame, int i_lines_completed )
820 x264_pthread_mutex_lock( &frame->mutex );
821 frame->i_lines_completed = i_lines_completed;
822 x264_pthread_cond_broadcast( &frame->cv );
823 x264_pthread_mutex_unlock( &frame->mutex );
826 void x264_frame_cond_wait( x264_frame_t *frame, int i_lines_completed )
828 x264_pthread_mutex_lock( &frame->mutex );
829 while( frame->i_lines_completed < i_lines_completed )
830 x264_pthread_cond_wait( &frame->cv, &frame->mutex );
831 x264_pthread_mutex_unlock( &frame->mutex );
835 void x264_frame_cond_broadcast( x264_frame_t *frame, int i_lines_completed )
837 void x264_frame_cond_wait( x264_frame_t *frame, int i_lines_completed )
844 void x264_frame_push( x264_frame_t **list, x264_frame_t *frame )
847 while( list[i] ) i++;
851 x264_frame_t *x264_frame_pop( x264_frame_t **list )
856 while( list[i+1] ) i++;
862 void x264_frame_unshift( x264_frame_t **list, x264_frame_t *frame )
865 while( list[i] ) i++;
871 x264_frame_t *x264_frame_shift( x264_frame_t **list )
873 x264_frame_t *frame = list[0];
875 for( i = 0; list[i]; i++ )
881 void x264_frame_push_unused( x264_t *h, x264_frame_t *frame )
883 assert( frame->i_reference_count > 0 );
884 frame->i_reference_count--;
885 if( frame->i_reference_count == 0 )
886 x264_frame_push( h->frames.unused, frame );
887 assert( h->frames.unused[ sizeof(h->frames.unused) / sizeof(*h->frames.unused) - 1 ] == NULL );
890 x264_frame_t *x264_frame_pop_unused( x264_t *h )
893 if( h->frames.unused[0] )
894 frame = x264_frame_pop( h->frames.unused );
896 frame = x264_frame_new( h );
897 assert( frame->i_reference_count == 0 );
898 frame->i_reference_count = 1;
902 void x264_frame_sort( x264_frame_t **list, int b_dts )
907 for( i = 0; list[i+1]; i++ )
909 int dtype = list[i]->i_type - list[i+1]->i_type;
910 int dtime = list[i]->i_frame - list[i+1]->i_frame;
911 int swap = b_dts ? dtype > 0 || ( dtype == 0 && dtime > 0 )
915 XCHG( x264_frame_t*, list[i], list[i+1] );