1 /*****************************************************************************
2 * frame.c: h264 encoder library
3 *****************************************************************************
4 * Copyright (C) 2003 Laurent Aimar
5 * $Id: frame.c,v 1.1 2004/06/03 19:27:06 fenrir Exp $
7 * Authors: Laurent Aimar <fenrir@via.ecp.fr>
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to the Free Software
21 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA.
22 *****************************************************************************/
26 x264_frame_t *x264_frame_new( x264_t *h )
28 x264_frame_t *frame = x264_malloc( sizeof(x264_frame_t) );
31 int i_mb_count = h->mb.i_mb_count;
32 int i_stride, i_width, i_lines;
33 int i_padv = PADV << h->param.b_interlaced;
36 if( !frame ) return NULL;
38 memset( frame, 0, sizeof(x264_frame_t) );
40 /* allocate frame data (+64 for extra data for me) */
41 i_width = ( ( h->param.i_width + 15 ) & -16 );
42 i_stride = i_width + 2*PADH;
43 i_lines = ( ( h->param.i_height + 15 ) & -16 );
44 if( h->param.b_interlaced )
45 i_lines = ( i_lines + 31 ) & -32;
47 if( h->param.cpu&X264_CPU_CACHELINE_SPLIT )
49 int align = h->param.cpu&X264_CPU_CACHELINE_32 ? 32 : 64;
50 i_stride = (i_stride + align-1) & -align;
54 for( i = 0; i < 3; i++ )
56 frame->i_stride[i] = i_stride >> !!i;
57 frame->i_width[i] = i_width >> !!i;
58 frame->i_lines[i] = i_lines >> !!i;
61 luma_plane_size = (frame->i_stride[0] * ( frame->i_lines[0] + 2*i_padv ));
62 for( i = 1; i < 3; i++ )
64 CHECKED_MALLOC( frame->buffer[i], luma_plane_size/4 );
65 frame->plane[i] = (uint8_t*)frame->buffer[i] + (frame->i_stride[i] * i_padv + PADH)/2;
67 /* all 4 luma planes allocated together, since the cacheline split code
68 * requires them to be in-phase wrt cacheline alignment. */
69 CHECKED_MALLOC( frame->buffer[0], 4*luma_plane_size);
70 for( i = 0; i < 4; i++ )
71 frame->filtered[i] = (uint8_t*)frame->buffer[0] + i*luma_plane_size + frame->i_stride[0] * i_padv + PADH;
72 frame->plane[0] = frame->filtered[0];
74 if( h->frames.b_have_lowres )
76 frame->i_width_lowres = frame->i_width[0]/2;
77 frame->i_stride_lowres = frame->i_width_lowres + 2*PADH;
78 frame->i_lines_lowres = frame->i_lines[0]/2;
79 for( i = 0; i < 4; i++ )
81 CHECKED_MALLOC( frame->buffer_lowres[i],
82 frame->i_stride_lowres * ( frame->i_lines[0]/2 + 2*i_padv ) );
83 frame->lowres[i] = ((uint8_t*)frame->buffer_lowres[i]) +
84 frame->i_stride_lowres * i_padv + PADH;
88 if( h->param.analyse.i_me_method >= X264_ME_ESA )
90 CHECKED_MALLOC( frame->buffer[3],
91 2 * frame->i_stride[0] * (frame->i_lines[0] + 2*i_padv) * sizeof(uint16_t) );
92 frame->integral = (uint16_t*)frame->buffer[3] + frame->i_stride[0] * i_padv + PADH;
96 frame->i_type = X264_TYPE_AUTO;
100 frame->i_frame_num = -1;
101 frame->i_lines_completed = -1;
103 CHECKED_MALLOC( frame->mb_type, i_mb_count * sizeof(int8_t));
104 CHECKED_MALLOC( frame->mv[0], 2*16 * i_mb_count * sizeof(int16_t) );
105 CHECKED_MALLOC( frame->ref[0], 4 * i_mb_count * sizeof(int8_t) );
106 if( h->param.i_bframe )
108 CHECKED_MALLOC( frame->mv[1], 2*16 * i_mb_count * sizeof(int16_t) );
109 CHECKED_MALLOC( frame->ref[1], 4 * i_mb_count * sizeof(int8_t) );
114 frame->ref[1] = NULL;
117 CHECKED_MALLOC( frame->i_row_bits, i_lines/16 * sizeof(int) );
118 CHECKED_MALLOC( frame->i_row_qp, i_lines/16 * sizeof(int) );
119 for( i = 0; i < h->param.i_bframe + 2; i++ )
120 for( j = 0; j < h->param.i_bframe + 2; j++ )
121 CHECKED_MALLOC( frame->i_row_satds[i][j], i_lines/16 * sizeof(int) );
123 x264_pthread_mutex_init( &frame->mutex, NULL );
124 x264_pthread_cond_init( &frame->cv, NULL );
129 x264_frame_delete( frame );
133 void x264_frame_delete( x264_frame_t *frame )
136 for( i = 0; i < 4; i++ )
137 x264_free( frame->buffer[i] );
138 for( i = 0; i < 4; i++ )
139 x264_free( frame->buffer_lowres[i] );
140 for( i = 0; i < X264_BFRAME_MAX+2; i++ )
141 for( j = 0; j < X264_BFRAME_MAX+2; j++ )
142 x264_free( frame->i_row_satds[i][j] );
143 x264_free( frame->i_row_bits );
144 x264_free( frame->i_row_qp );
145 x264_free( frame->mb_type );
146 x264_free( frame->mv[0] );
147 x264_free( frame->mv[1] );
148 x264_free( frame->ref[0] );
149 x264_free( frame->ref[1] );
150 x264_pthread_mutex_destroy( &frame->mutex );
151 x264_pthread_cond_destroy( &frame->cv );
155 int x264_frame_copy_picture( x264_t *h, x264_frame_t *dst, x264_picture_t *src )
157 int i_csp = src->img.i_csp & X264_CSP_MASK;
159 if( i_csp != X264_CSP_I420 && i_csp != X264_CSP_YV12 )
161 x264_log( h, X264_LOG_ERROR, "Arg invalid CSP\n" );
165 dst->i_type = src->i_type;
166 dst->i_qpplus1 = src->i_qpplus1;
167 dst->i_pts = src->i_pts;
171 int s = (i_csp == X264_CSP_YV12 && i) ? i^3 : i;
172 uint8_t *plane = src->img.plane[s];
173 int stride = src->img.i_stride[s];
174 int width = h->param.i_width >> !!i;
175 int height = h->param.i_height >> !!i;
176 if( src->img.i_csp & X264_CSP_VFLIP )
178 plane += (height-1)*stride;
181 h->mc.plane_copy( dst->plane[i], dst->i_stride[i], plane, stride, width, height );
188 static void plane_expand_border( uint8_t *pix, int i_stride, int i_width, int i_height, int i_padh, int i_padv, int b_pad_top, int b_pad_bottom )
190 #define PPIXEL(x, y) ( pix + (x) + (y)*i_stride )
192 for( y = 0; y < i_height; y++ )
195 memset( PPIXEL(-i_padh, y), PPIXEL(0, y)[0], i_padh );
197 memset( PPIXEL(i_width, y), PPIXEL(i_width-1, y)[0], i_padh );
201 for( y = 0; y < i_padv; y++ )
202 memcpy( PPIXEL(-i_padh, -y-1), PPIXEL(-i_padh, 0), i_width+2*i_padh );
205 for( y = 0; y < i_padv; y++ )
206 memcpy( PPIXEL(-i_padh, i_height+y), PPIXEL(-i_padh, i_height-1), i_width+2*i_padh );
210 void x264_frame_expand_border( x264_t *h, x264_frame_t *frame, int mb_y, int b_end )
214 if( mb_y & h->sh.b_mbaff )
216 for( i = 0; i < frame->i_plane; i++ )
218 int stride = frame->i_stride[i];
219 int width = 16*h->sps->i_mb_width >> !!i;
220 int height = (b_end ? 16*(h->sps->i_mb_height - mb_y) >> h->sh.b_mbaff : 16) >> !!i;
221 int padh = PADH >> !!i;
222 int padv = PADV >> !!i;
223 // buffer: 2 chroma, 3 luma (rounded to 4) because deblocking goes beyond the top of the mb
224 uint8_t *pix = frame->plane[i] + X264_MAX(0, (16*mb_y-4)*stride >> !!i);
225 if( b_end && !b_start )
226 height += 4 >> (!!i + h->sh.b_mbaff);
229 plane_expand_border( pix, stride*2, width, height, padh, padv, b_start, b_end );
230 plane_expand_border( pix+stride, stride*2, width, height, padh, padv, b_start, b_end );
234 plane_expand_border( pix, stride, width, height, padh, padv, b_start, b_end );
239 void x264_frame_expand_border_filtered( x264_t *h, x264_frame_t *frame, int mb_y, int b_end )
241 /* during filtering, 8 extra pixels were filtered on each edge,
242 * but up to 3 of the horizontal ones may be wrong.
243 we want to expand border from the last filtered pixel */
245 int stride = frame->i_stride[0];
246 int width = 16*h->sps->i_mb_width + 8;
247 int height = b_end ? (16*(h->sps->i_mb_height - mb_y) >> h->sh.b_mbaff) + 16 : 16;
251 for( i = 1; i < 4; i++ )
253 // buffer: 8 luma, to match the hpel filter
254 uint8_t *pix = frame->filtered[i] + (16*mb_y - (8 << h->sh.b_mbaff)) * stride - 4;
257 plane_expand_border( pix, stride*2, width, height, padh, padv, b_start, b_end );
258 plane_expand_border( pix+stride, stride*2, width, height, padh, padv, b_start, b_end );
262 plane_expand_border( pix, stride, width, height, padh, padv, b_start, b_end );
267 void x264_frame_expand_border_lowres( x264_frame_t *frame )
270 for( i = 0; i < 4; i++ )
271 plane_expand_border( frame->lowres[i], frame->i_stride_lowres, frame->i_stride_lowres - 2*PADH, frame->i_lines_lowres, PADH, PADV, 1, 1 );
274 void x264_frame_expand_border_mod16( x264_t *h, x264_frame_t *frame )
277 for( i = 0; i < frame->i_plane; i++ )
279 int i_subsample = i ? 1 : 0;
280 int i_width = h->param.i_width >> i_subsample;
281 int i_height = h->param.i_height >> i_subsample;
282 int i_padx = ( h->sps->i_mb_width * 16 - h->param.i_width ) >> i_subsample;
283 int i_pady = ( h->sps->i_mb_height * 16 - h->param.i_height ) >> i_subsample;
287 for( y = 0; y < i_height; y++ )
288 memset( &frame->plane[i][y*frame->i_stride[i] + i_width],
289 frame->plane[i][y*frame->i_stride[i] + i_width - 1],
294 //FIXME interlace? or just let it pad using the wrong field
295 for( y = i_height; y < i_height + i_pady; y++ )
296 memcpy( &frame->plane[i][y*frame->i_stride[i]],
297 &frame->plane[i][(i_height-1)*frame->i_stride[i]],
304 /* cavlc + 8x8 transform stores nnz per 16 coeffs for the purpose of
305 * entropy coding, but per 64 coeffs for the purpose of deblocking */
306 void munge_cavlc_nnz_row( x264_t *h, int mb_y, uint8_t (*buf)[16] )
308 uint32_t (*src)[6] = (uint32_t(*)[6])h->mb.non_zero_count + mb_y * h->sps->i_mb_width;
309 int8_t *transform = h->mb.mb_transform_size + mb_y * h->sps->i_mb_width;
311 for( x=0; x<h->sps->i_mb_width; x++ )
313 memcpy( buf+x, src+x, 16 );
316 if( src[x][0] ) src[x][0] = 0x01010101;
317 if( src[x][1] ) src[x][1] = 0x01010101;
318 if( src[x][2] ) src[x][2] = 0x01010101;
319 if( src[x][3] ) src[x][3] = 0x01010101;
324 static void restore_cavlc_nnz_row( x264_t *h, int mb_y, uint8_t (*buf)[16] )
326 uint8_t (*dst)[24] = h->mb.non_zero_count + mb_y * h->sps->i_mb_width;
328 for( x=0; x<h->sps->i_mb_width; x++ )
329 memcpy( dst+x, buf+x, 16 );
332 static void munge_cavlc_nnz( x264_t *h, int mb_y, uint8_t (*buf)[16], void (*func)(x264_t*, int, uint8_t (*)[16]) )
334 func( h, mb_y, buf );
336 func( h, mb_y-1, buf + h->sps->i_mb_width );
339 func( h, mb_y+1, buf + h->sps->i_mb_width * 2 );
341 func( h, mb_y-2, buf + h->sps->i_mb_width * 3 );
346 /* Deblocking filter */
348 static const int i_alpha_table[52] =
350 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
351 0, 0, 0, 0, 0, 0, 4, 4, 5, 6,
352 7, 8, 9, 10, 12, 13, 15, 17, 20, 22,
353 25, 28, 32, 36, 40, 45, 50, 56, 63, 71,
354 80, 90,101,113,127,144,162,182,203,226,
357 static const int i_beta_table[52] =
359 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
360 0, 0, 0, 0, 0, 0, 2, 2, 2, 3,
361 3, 3, 3, 4, 4, 4, 6, 6, 7, 7,
362 8, 8, 9, 9, 10, 10, 11, 11, 12, 12,
363 13, 13, 14, 14, 15, 15, 16, 16, 17, 17,
366 static const int i_tc0_table[52][3] =
368 { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 },
369 { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 },
370 { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 1 },
371 { 0, 0, 1 }, { 0, 0, 1 }, { 0, 0, 1 }, { 0, 1, 1 }, { 0, 1, 1 }, { 1, 1, 1 },
372 { 1, 1, 1 }, { 1, 1, 1 }, { 1, 1, 1 }, { 1, 1, 2 }, { 1, 1, 2 }, { 1, 1, 2 },
373 { 1, 1, 2 }, { 1, 2, 3 }, { 1, 2, 3 }, { 2, 2, 3 }, { 2, 2, 4 }, { 2, 3, 4 },
374 { 2, 3, 4 }, { 3, 3, 5 }, { 3, 4, 6 }, { 3, 4, 6 }, { 4, 5, 7 }, { 4, 5, 8 },
375 { 4, 6, 9 }, { 5, 7,10 }, { 6, 8,11 }, { 6, 8,13 }, { 7,10,14 }, { 8,11,16 },
376 { 9,12,18 }, {10,13,20 }, {11,15,23 }, {13,17,25 }
380 static inline int clip_uint8( int a )
388 static inline void deblock_luma_c( uint8_t *pix, int xstride, int ystride, int alpha, int beta, int8_t *tc0 )
391 for( i = 0; i < 4; i++ ) {
396 for( d = 0; d < 4; d++ ) {
397 const int p2 = pix[-3*xstride];
398 const int p1 = pix[-2*xstride];
399 const int p0 = pix[-1*xstride];
400 const int q0 = pix[ 0*xstride];
401 const int q1 = pix[ 1*xstride];
402 const int q2 = pix[ 2*xstride];
404 if( abs( p0 - q0 ) < alpha &&
405 abs( p1 - p0 ) < beta &&
406 abs( q1 - q0 ) < beta ) {
411 if( abs( p2 - p0 ) < beta ) {
412 pix[-2*xstride] = p1 + x264_clip3( (( p2 + ((p0 + q0 + 1) >> 1)) >> 1) - p1, -tc0[i], tc0[i] );
415 if( abs( q2 - q0 ) < beta ) {
416 pix[ 1*xstride] = q1 + x264_clip3( (( q2 + ((p0 + q0 + 1) >> 1)) >> 1) - q1, -tc0[i], tc0[i] );
420 delta = x264_clip3( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
421 pix[-1*xstride] = clip_uint8( p0 + delta ); /* p0' */
422 pix[ 0*xstride] = clip_uint8( q0 - delta ); /* q0' */
428 static void deblock_v_luma_c( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 )
430 deblock_luma_c( pix, stride, 1, alpha, beta, tc0 );
432 static void deblock_h_luma_c( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 )
434 deblock_luma_c( pix, 1, stride, alpha, beta, tc0 );
437 static inline void deblock_chroma_c( uint8_t *pix, int xstride, int ystride, int alpha, int beta, int8_t *tc0 )
440 for( i = 0; i < 4; i++ ) {
441 const int tc = tc0[i];
446 for( d = 0; d < 2; d++ ) {
447 const int p1 = pix[-2*xstride];
448 const int p0 = pix[-1*xstride];
449 const int q0 = pix[ 0*xstride];
450 const int q1 = pix[ 1*xstride];
452 if( abs( p0 - q0 ) < alpha &&
453 abs( p1 - p0 ) < beta &&
454 abs( q1 - q0 ) < beta ) {
456 int delta = x264_clip3( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
457 pix[-1*xstride] = clip_uint8( p0 + delta ); /* p0' */
458 pix[ 0*xstride] = clip_uint8( q0 - delta ); /* q0' */
464 static void deblock_v_chroma_c( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 )
466 deblock_chroma_c( pix, stride, 1, alpha, beta, tc0 );
468 static void deblock_h_chroma_c( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 )
470 deblock_chroma_c( pix, 1, stride, alpha, beta, tc0 );
473 static inline void deblock_luma_intra_c( uint8_t *pix, int xstride, int ystride, int alpha, int beta )
476 for( d = 0; d < 16; d++ ) {
477 const int p2 = pix[-3*xstride];
478 const int p1 = pix[-2*xstride];
479 const int p0 = pix[-1*xstride];
480 const int q0 = pix[ 0*xstride];
481 const int q1 = pix[ 1*xstride];
482 const int q2 = pix[ 2*xstride];
484 if( abs( p0 - q0 ) < alpha &&
485 abs( p1 - p0 ) < beta &&
486 abs( q1 - q0 ) < beta ) {
488 if(abs( p0 - q0 ) < ((alpha >> 2) + 2) ){
489 if( abs( p2 - p0 ) < beta)
491 const int p3 = pix[-4*xstride];
493 pix[-1*xstride] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
494 pix[-2*xstride] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
495 pix[-3*xstride] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
498 pix[-1*xstride] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
500 if( abs( q2 - q0 ) < beta)
502 const int q3 = pix[3*xstride];
504 pix[0*xstride] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
505 pix[1*xstride] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
506 pix[2*xstride] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
509 pix[0*xstride] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
513 pix[-1*xstride] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
514 pix[ 0*xstride] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
520 static void deblock_v_luma_intra_c( uint8_t *pix, int stride, int alpha, int beta )
522 deblock_luma_intra_c( pix, stride, 1, alpha, beta );
524 static void deblock_h_luma_intra_c( uint8_t *pix, int stride, int alpha, int beta )
526 deblock_luma_intra_c( pix, 1, stride, alpha, beta );
529 static inline void deblock_chroma_intra_c( uint8_t *pix, int xstride, int ystride, int alpha, int beta )
532 for( d = 0; d < 8; d++ ) {
533 const int p1 = pix[-2*xstride];
534 const int p0 = pix[-1*xstride];
535 const int q0 = pix[ 0*xstride];
536 const int q1 = pix[ 1*xstride];
538 if( abs( p0 - q0 ) < alpha &&
539 abs( p1 - p0 ) < beta &&
540 abs( q1 - q0 ) < beta ) {
542 pix[-1*xstride] = (2*p1 + p0 + q1 + 2) >> 2; /* p0' */
543 pix[ 0*xstride] = (2*q1 + q0 + p1 + 2) >> 2; /* q0' */
549 static void deblock_v_chroma_intra_c( uint8_t *pix, int stride, int alpha, int beta )
551 deblock_chroma_intra_c( pix, stride, 1, alpha, beta );
553 static void deblock_h_chroma_intra_c( uint8_t *pix, int stride, int alpha, int beta )
555 deblock_chroma_intra_c( pix, 1, stride, alpha, beta );
558 static inline void deblock_edge( x264_t *h, uint8_t *pix, int i_stride, int bS[4], int i_qp, int b_chroma,
559 x264_deblock_inter_t pf_inter, x264_deblock_intra_t pf_intra )
562 const int index_a = x264_clip3( i_qp + h->sh.i_alpha_c0_offset, 0, 51 );
563 const int alpha = i_alpha_table[index_a];
564 const int beta = i_beta_table[x264_clip3( i_qp + h->sh.i_beta_offset, 0, 51 )];
569 tc[i] = (bS[i] ? i_tc0_table[index_a][bS[i] - 1] : -1) + b_chroma;
570 pf_inter( pix, i_stride, alpha, beta, tc );
572 pf_intra( pix, i_stride, alpha, beta );
576 void x264_frame_deblock_row( x264_t *h, int mb_y )
578 const int s8x8 = 2 * h->mb.i_mb_stride;
579 const int s4x4 = 4 * h->mb.i_mb_stride;
580 const int b_interlaced = h->sh.b_mbaff;
581 const int mvy_limit = 4 >> b_interlaced;
584 int i_stride2[3] = { h->fdec->i_stride[0] << b_interlaced,
585 h->fdec->i_stride[1] << b_interlaced,
586 h->fdec->i_stride[2] << b_interlaced };
588 if( !h->pps->b_cabac && h->pps->b_transform_8x8_mode )
589 munge_cavlc_nnz( h, mb_y, h->mb.nnz_backup, munge_cavlc_nnz_row );
591 for( mb_x = 0; mb_x < h->sps->i_mb_width; )
593 const int mb_xy = mb_y * h->mb.i_mb_stride + mb_x;
594 const int mb_8x8 = 2 * s8x8 * mb_y + 2 * mb_x;
595 const int mb_4x4 = 4 * s4x4 * mb_y + 4 * mb_x;
596 const int b_8x8_transform = h->mb.mb_transform_size[mb_xy];
597 const int i_edge_end = (h->mb.type[mb_xy] == P_SKIP) ? 1 : 4;
600 int i_pix_y[3] = { 16*mb_y*h->fdec->i_stride[0] + 16*mb_x,
601 8*mb_y*h->fdec->i_stride[1] + 8*mb_x,
602 8*mb_y*h->fdec->i_stride[2] + 8*mb_x };
603 if( b_interlaced && (mb_y&1) )
605 i_pix_y[0] -= 15*h->fdec->i_stride[0];
606 i_pix_y[1] -= 7*h->fdec->i_stride[1];
607 i_pix_y[2] -= 7*h->fdec->i_stride[2];
610 x264_prefetch_fenc( h, h->fdec, mb_x, mb_y );
612 /* i_dir == 0 -> vertical edge
613 * i_dir == 1 -> horizontal edge */
614 for( i_dir = 0; i_dir < 2; i_dir++ )
616 int i_start = (i_dir ? (mb_y <= b_interlaced) : (mb_x == 0));
619 for( i_edge = i_start; i_edge < i_edge_end; i_edge++ )
621 int mbn_xy, mbn_8x8, mbn_4x4;
622 int bS[4]; /* filtering strength */
624 if( b_8x8_transform && (i_edge&1) )
627 mbn_xy = i_edge > 0 ? mb_xy : ( i_dir == 0 ? mb_xy - 1 : mb_xy - h->mb.i_mb_stride );
628 mbn_8x8 = i_edge > 0 ? mb_8x8 : ( i_dir == 0 ? mb_8x8 - 2 : mb_8x8 - 2 * s8x8 );
629 mbn_4x4 = i_edge > 0 ? mb_4x4 : ( i_dir == 0 ? mb_4x4 - 4 : mb_4x4 - 4 * s4x4 );
631 if( b_interlaced && i_edge == 0 && i_dir == 1 )
633 mbn_xy -= h->mb.i_mb_stride;
638 /* *** Get bS for each 4px for the current edge *** */
639 if( IS_INTRA( h->mb.type[mb_xy] ) || IS_INTRA( h->mb.type[mbn_xy] ) )
641 bS[0] = bS[1] = bS[2] = bS[3] = ( i_edge == 0 && !(b_interlaced && i_dir) ? 4 : 3 );
646 for( i = 0; i < 4; i++ )
648 int x = i_dir == 0 ? i_edge : i;
649 int y = i_dir == 0 ? i : i_edge;
650 int xn = (x - (i_dir == 0 ? 1 : 0 ))&0x03;
651 int yn = (y - (i_dir == 0 ? 0 : 1 ))&0x03;
653 if( h->mb.non_zero_count[mb_xy][block_idx_xy[x][y]] != 0 ||
654 h->mb.non_zero_count[mbn_xy][block_idx_xy[xn][yn]] != 0 )
660 /* FIXME: A given frame may occupy more than one position in
661 * the reference list. So we should compare the frame numbers,
662 * not the indices in the ref list.
663 * No harm yet, as we don't generate that case.*/
665 int i8p= mb_8x8+(x/2)+(y/2)*s8x8;
666 int i8q= mbn_8x8+(xn/2)+(yn/2)*s8x8;
667 int i4p= mb_4x4+x+y*s4x4;
668 int i4q= mbn_4x4+xn+yn*s4x4;
673 for( l = 0; l < 1 + (h->sh.i_type == SLICE_TYPE_B); l++ )
675 if( h->mb.ref[l][i8p] != h->mb.ref[l][i8q] ||
676 abs( h->mb.mv[l][i4p][0] - h->mb.mv[l][i4q][0] ) >= 4 ||
677 abs( h->mb.mv[l][i4p][1] - h->mb.mv[l][i4q][1] ) >= mvy_limit )
689 i_qp = h->mb.qp[mb_xy];
690 i_qpn= h->mb.qp[mbn_xy];
695 deblock_edge( h, &h->fdec->plane[0][i_pix_y[0] + 4*i_edge],
696 i_stride2[0], bS, (i_qp+i_qpn+1) >> 1, 0,
697 h->loopf.deblock_h_luma, h->loopf.deblock_h_luma_intra );
701 int i_qpc = ( i_chroma_qp_table[x264_clip3( i_qp + h->pps->i_chroma_qp_index_offset, 0, 51 )] +
702 i_chroma_qp_table[x264_clip3( i_qpn + h->pps->i_chroma_qp_index_offset, 0, 51 )] + 1 ) >> 1;
703 deblock_edge( h, &h->fdec->plane[1][i_pix_y[1] + 2*i_edge],
704 i_stride2[1], bS, i_qpc, 1,
705 h->loopf.deblock_h_chroma, h->loopf.deblock_h_chroma_intra );
706 deblock_edge( h, &h->fdec->plane[2][i_pix_y[2] + 2*i_edge],
707 i_stride2[2], bS, i_qpc, 1,
708 h->loopf.deblock_h_chroma, h->loopf.deblock_h_chroma_intra );
713 /* horizontal edge */
714 deblock_edge( h, &h->fdec->plane[0][i_pix_y[0] + 4*i_edge*i_stride2[0]],
715 i_stride2[0], bS, (i_qp+i_qpn+1) >> 1, 0,
716 h->loopf.deblock_v_luma, h->loopf.deblock_v_luma_intra );
720 int i_qpc = ( i_chroma_qp_table[x264_clip3( i_qp + h->pps->i_chroma_qp_index_offset, 0, 51 )] +
721 i_chroma_qp_table[x264_clip3( i_qpn + h->pps->i_chroma_qp_index_offset, 0, 51 )] + 1 ) >> 1;
722 deblock_edge( h, &h->fdec->plane[1][i_pix_y[1] + 2*i_edge*i_stride2[1]],
723 i_stride2[1], bS, i_qpc, 1,
724 h->loopf.deblock_v_chroma, h->loopf.deblock_v_chroma_intra );
725 deblock_edge( h, &h->fdec->plane[2][i_pix_y[2] + 2*i_edge*i_stride2[2]],
726 i_stride2[2], bS, i_qpc, 1,
727 h->loopf.deblock_v_chroma, h->loopf.deblock_v_chroma_intra );
734 if( !b_interlaced || (mb_y&1) )
736 mb_y ^= b_interlaced;
739 if( !h->pps->b_cabac && h->pps->b_transform_8x8_mode )
740 munge_cavlc_nnz( h, mb_y, h->mb.nnz_backup, restore_cavlc_nnz_row );
743 void x264_frame_deblock( x264_t *h )
746 for( mb_y = 0; mb_y < h->sps->i_mb_height; mb_y += 1 + h->sh.b_mbaff )
747 x264_frame_deblock_row( h, mb_y );
751 void x264_deblock_v_chroma_mmxext( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 );
752 void x264_deblock_h_chroma_mmxext( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 );
753 void x264_deblock_v_chroma_intra_mmxext( uint8_t *pix, int stride, int alpha, int beta );
754 void x264_deblock_h_chroma_intra_mmxext( uint8_t *pix, int stride, int alpha, int beta );
756 void x264_deblock_v_luma_sse2( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 );
757 void x264_deblock_h_luma_sse2( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 );
758 void x264_deblock_v_luma_intra_sse2( uint8_t *pix, int stride, int alpha, int beta );
759 void x264_deblock_h_luma_intra_sse2( uint8_t *pix, int stride, int alpha, int beta );
761 void x264_deblock_h_luma_mmxext( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 );
762 void x264_deblock_v8_luma_mmxext( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 );
763 void x264_deblock_h_luma_intra_mmxext( uint8_t *pix, int stride, int alpha, int beta );
764 void x264_deblock_v8_luma_intra_mmxext( uint8_t *pix, int stride, int alpha, int beta );
766 void x264_deblock_v_luma_mmxext( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 )
768 x264_deblock_v8_luma_mmxext( pix, stride, alpha, beta, tc0 );
769 x264_deblock_v8_luma_mmxext( pix+8, stride, alpha, beta, tc0+2 );
771 void x264_deblock_v_luma_intra_mmxext( uint8_t *pix, int stride, int alpha, int beta )
773 x264_deblock_v8_luma_intra_mmxext( pix, stride, alpha, beta );
774 x264_deblock_v8_luma_intra_mmxext( pix+8, stride, alpha, beta );
780 void x264_deblock_v_luma_altivec( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 );
781 void x264_deblock_h_luma_altivec( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 );
784 void x264_deblock_init( int cpu, x264_deblock_function_t *pf )
786 pf->deblock_v_luma = deblock_v_luma_c;
787 pf->deblock_h_luma = deblock_h_luma_c;
788 pf->deblock_v_chroma = deblock_v_chroma_c;
789 pf->deblock_h_chroma = deblock_h_chroma_c;
790 pf->deblock_v_luma_intra = deblock_v_luma_intra_c;
791 pf->deblock_h_luma_intra = deblock_h_luma_intra_c;
792 pf->deblock_v_chroma_intra = deblock_v_chroma_intra_c;
793 pf->deblock_h_chroma_intra = deblock_h_chroma_intra_c;
796 if( cpu&X264_CPU_MMXEXT )
798 pf->deblock_v_chroma = x264_deblock_v_chroma_mmxext;
799 pf->deblock_h_chroma = x264_deblock_h_chroma_mmxext;
800 pf->deblock_v_chroma_intra = x264_deblock_v_chroma_intra_mmxext;
801 pf->deblock_h_chroma_intra = x264_deblock_h_chroma_intra_mmxext;
803 pf->deblock_v_luma = x264_deblock_v_luma_mmxext;
804 pf->deblock_h_luma = x264_deblock_h_luma_mmxext;
805 pf->deblock_v_luma_intra = x264_deblock_v_luma_intra_mmxext;
806 pf->deblock_h_luma_intra = x264_deblock_h_luma_intra_mmxext;
808 if( cpu&X264_CPU_SSE2 )
810 pf->deblock_v_luma = x264_deblock_v_luma_sse2;
811 pf->deblock_h_luma = x264_deblock_h_luma_sse2;
812 pf->deblock_v_luma_intra = x264_deblock_v_luma_intra_sse2;
813 pf->deblock_h_luma_intra = x264_deblock_h_luma_intra_sse2;
819 if( cpu&X264_CPU_ALTIVEC )
821 pf->deblock_v_luma = x264_deblock_v_luma_altivec;
822 pf->deblock_h_luma = x264_deblock_h_luma_altivec;
831 void x264_frame_cond_broadcast( x264_frame_t *frame, int i_lines_completed )
833 x264_pthread_mutex_lock( &frame->mutex );
834 frame->i_lines_completed = i_lines_completed;
835 x264_pthread_cond_broadcast( &frame->cv );
836 x264_pthread_mutex_unlock( &frame->mutex );
839 void x264_frame_cond_wait( x264_frame_t *frame, int i_lines_completed )
841 x264_pthread_mutex_lock( &frame->mutex );
842 while( frame->i_lines_completed < i_lines_completed )
843 x264_pthread_cond_wait( &frame->cv, &frame->mutex );
844 x264_pthread_mutex_unlock( &frame->mutex );
847 void x264_frame_size_estimated_set( x264_t *h, int bits )
849 x264_pthread_mutex_lock( &h->fenc->mutex );
850 x264_ratecontrol_set_estimated_size(h, bits);
851 x264_pthread_mutex_unlock( &h->fenc->mutex );
854 int x264_frame_size_estimated_get( x264_t const *h)
857 x264_pthread_mutex_lock( &h->fenc->mutex );
858 size = x264_ratecontrol_get_estimated_size(h);
859 x264_pthread_mutex_unlock( &h->fenc->mutex );
864 void x264_frame_cond_broadcast( x264_frame_t *frame, int i_lines_completed )
866 void x264_frame_cond_wait( x264_frame_t *frame, int i_lines_completed )
869 void x264_frame_size_estimated_set( x264_t *h, int bits )
871 x264_ratecontrol_set_estimated_size(h, bits);
874 int x264_frame_size_estimated_get( x264_t const *h)
877 size = x264_ratecontrol_set_estimated_size(h);
885 void x264_frame_push( x264_frame_t **list, x264_frame_t *frame )
888 while( list[i] ) i++;
892 x264_frame_t *x264_frame_pop( x264_frame_t **list )
897 while( list[i+1] ) i++;
903 void x264_frame_unshift( x264_frame_t **list, x264_frame_t *frame )
906 while( list[i] ) i++;
912 x264_frame_t *x264_frame_shift( x264_frame_t **list )
914 x264_frame_t *frame = list[0];
916 for( i = 0; list[i]; i++ )
922 void x264_frame_push_unused( x264_t *h, x264_frame_t *frame )
924 assert( frame->i_reference_count > 0 );
925 frame->i_reference_count--;
926 if( frame->i_reference_count == 0 )
927 x264_frame_push( h->frames.unused, frame );
928 assert( h->frames.unused[ sizeof(h->frames.unused) / sizeof(*h->frames.unused) - 1 ] == NULL );
931 x264_frame_t *x264_frame_pop_unused( x264_t *h )
934 if( h->frames.unused[0] )
935 frame = x264_frame_pop( h->frames.unused );
937 frame = x264_frame_new( h );
938 assert( frame->i_reference_count == 0 );
939 frame->i_reference_count = 1;
943 void x264_frame_sort( x264_frame_t **list, int b_dts )
948 for( i = 0; list[i+1]; i++ )
950 int dtype = list[i]->i_type - list[i+1]->i_type;
951 int dtime = list[i]->i_frame - list[i+1]->i_frame;
952 int swap = b_dts ? dtype > 0 || ( dtype == 0 && dtime > 0 )
956 XCHG( x264_frame_t*, list[i], list[i+1] );