1 /*****************************************************************************
2 * frame.c: h264 encoder library
3 *****************************************************************************
4 * Copyright (C) 2003-2008 x264 project
6 * Authors: Laurent Aimar <fenrir@via.ecp.fr>
7 * Loren Merritt <lorenm@u.washington.edu>
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
22 *****************************************************************************/
26 x264_frame_t *x264_frame_new( x264_t *h )
28 x264_frame_t *frame = x264_malloc( sizeof(x264_frame_t) );
31 int i_mb_count = h->mb.i_mb_count;
32 int i_stride, i_width, i_lines;
33 int i_padv = PADV << h->param.b_interlaced;
36 if( !frame ) return NULL;
38 memset( frame, 0, sizeof(x264_frame_t) );
40 /* allocate frame data (+64 for extra data for me) */
41 i_width = ( ( h->param.i_width + 15 ) & -16 );
42 i_stride = i_width + 2*PADH;
43 i_lines = ( ( h->param.i_height + 15 ) & -16 );
44 if( h->param.b_interlaced )
45 i_lines = ( i_lines + 31 ) & -32;
47 if( h->param.cpu&X264_CPU_CACHELINE_64 )
48 i_stride = (i_stride + 63) & ~63;
49 else if( h->param.cpu&X264_CPU_CACHELINE_32 )
50 i_stride = (i_stride + 31) & ~31;
53 for( i = 0; i < 3; i++ )
55 frame->i_stride[i] = i_stride >> !!i;
56 frame->i_width[i] = i_width >> !!i;
57 frame->i_lines[i] = i_lines >> !!i;
60 luma_plane_size = (frame->i_stride[0] * ( frame->i_lines[0] + 2*i_padv ));
61 for( i = 1; i < 3; i++ )
63 CHECKED_MALLOC( frame->buffer[i], luma_plane_size/4 );
64 frame->plane[i] = (uint8_t*)frame->buffer[i] + (frame->i_stride[i] * i_padv + PADH)/2;
66 /* all 4 luma planes allocated together, since the cacheline split code
67 * requires them to be in-phase wrt cacheline alignment. */
68 CHECKED_MALLOC( frame->buffer[0], 4*luma_plane_size);
69 for( i = 0; i < 4; i++ )
70 frame->filtered[i] = (uint8_t*)frame->buffer[0] + i*luma_plane_size + frame->i_stride[0] * i_padv + PADH;
71 frame->plane[0] = frame->filtered[0];
73 if( h->frames.b_have_lowres )
75 frame->i_width_lowres = frame->i_width[0]/2;
76 frame->i_stride_lowres = (frame->i_width_lowres + 2*PADH + 15) & ~15;
77 frame->i_lines_lowres = frame->i_lines[0]/2;
78 for( i = 0; i < 4; i++ )
80 CHECKED_MALLOC( frame->buffer_lowres[i],
81 frame->i_stride_lowres * ( frame->i_lines[0]/2 + 2*i_padv ) );
82 frame->lowres[i] = ((uint8_t*)frame->buffer_lowres[i]) +
83 frame->i_stride_lowres * i_padv + PADH;
87 if( h->param.analyse.i_me_method >= X264_ME_ESA )
89 CHECKED_MALLOC( frame->buffer[3],
90 2 * frame->i_stride[0] * (frame->i_lines[0] + 2*i_padv) * sizeof(uint16_t) );
91 frame->integral = (uint16_t*)frame->buffer[3] + frame->i_stride[0] * i_padv + PADH;
95 frame->i_type = X264_TYPE_AUTO;
99 frame->i_frame_num = -1;
100 frame->i_lines_completed = -1;
102 CHECKED_MALLOC( frame->mb_type, i_mb_count * sizeof(int8_t));
103 CHECKED_MALLOC( frame->mv[0], 2*16 * i_mb_count * sizeof(int16_t) );
104 CHECKED_MALLOC( frame->ref[0], 4 * i_mb_count * sizeof(int8_t) );
105 if( h->param.i_bframe )
107 CHECKED_MALLOC( frame->mv[1], 2*16 * i_mb_count * sizeof(int16_t) );
108 CHECKED_MALLOC( frame->ref[1], 4 * i_mb_count * sizeof(int8_t) );
113 frame->ref[1] = NULL;
116 CHECKED_MALLOC( frame->i_row_bits, i_lines/16 * sizeof(int) );
117 CHECKED_MALLOC( frame->i_row_qp, i_lines/16 * sizeof(int) );
118 for( i = 0; i < h->param.i_bframe + 2; i++ )
119 for( j = 0; j < h->param.i_bframe + 2; j++ )
120 CHECKED_MALLOC( frame->i_row_satds[i][j], i_lines/16 * sizeof(int) );
122 x264_pthread_mutex_init( &frame->mutex, NULL );
123 x264_pthread_cond_init( &frame->cv, NULL );
128 x264_frame_delete( frame );
132 void x264_frame_delete( x264_frame_t *frame )
135 for( i = 0; i < 4; i++ )
136 x264_free( frame->buffer[i] );
137 for( i = 0; i < 4; i++ )
138 x264_free( frame->buffer_lowres[i] );
139 for( i = 0; i < X264_BFRAME_MAX+2; i++ )
140 for( j = 0; j < X264_BFRAME_MAX+2; j++ )
141 x264_free( frame->i_row_satds[i][j] );
142 x264_free( frame->i_row_bits );
143 x264_free( frame->i_row_qp );
144 x264_free( frame->mb_type );
145 x264_free( frame->mv[0] );
146 x264_free( frame->mv[1] );
147 x264_free( frame->ref[0] );
148 x264_free( frame->ref[1] );
149 x264_pthread_mutex_destroy( &frame->mutex );
150 x264_pthread_cond_destroy( &frame->cv );
154 int x264_frame_copy_picture( x264_t *h, x264_frame_t *dst, x264_picture_t *src )
156 int i_csp = src->img.i_csp & X264_CSP_MASK;
158 if( i_csp != X264_CSP_I420 && i_csp != X264_CSP_YV12 )
160 x264_log( h, X264_LOG_ERROR, "Arg invalid CSP\n" );
164 dst->i_type = src->i_type;
165 dst->i_qpplus1 = src->i_qpplus1;
166 dst->i_pts = src->i_pts;
170 int s = (i_csp == X264_CSP_YV12 && i) ? i^3 : i;
171 uint8_t *plane = src->img.plane[s];
172 int stride = src->img.i_stride[s];
173 int width = h->param.i_width >> !!i;
174 int height = h->param.i_height >> !!i;
175 if( src->img.i_csp & X264_CSP_VFLIP )
177 plane += (height-1)*stride;
180 h->mc.plane_copy( dst->plane[i], dst->i_stride[i], plane, stride, width, height );
187 static void plane_expand_border( uint8_t *pix, int i_stride, int i_width, int i_height, int i_padh, int i_padv, int b_pad_top, int b_pad_bottom )
189 #define PPIXEL(x, y) ( pix + (x) + (y)*i_stride )
191 for( y = 0; y < i_height; y++ )
194 memset( PPIXEL(-i_padh, y), PPIXEL(0, y)[0], i_padh );
196 memset( PPIXEL(i_width, y), PPIXEL(i_width-1, y)[0], i_padh );
200 for( y = 0; y < i_padv; y++ )
201 memcpy( PPIXEL(-i_padh, -y-1), PPIXEL(-i_padh, 0), i_width+2*i_padh );
204 for( y = 0; y < i_padv; y++ )
205 memcpy( PPIXEL(-i_padh, i_height+y), PPIXEL(-i_padh, i_height-1), i_width+2*i_padh );
209 void x264_frame_expand_border( x264_t *h, x264_frame_t *frame, int mb_y, int b_end )
213 if( mb_y & h->sh.b_mbaff )
215 for( i = 0; i < frame->i_plane; i++ )
217 int stride = frame->i_stride[i];
218 int width = 16*h->sps->i_mb_width >> !!i;
219 int height = (b_end ? 16*(h->sps->i_mb_height - mb_y) >> h->sh.b_mbaff : 16) >> !!i;
220 int padh = PADH >> !!i;
221 int padv = PADV >> !!i;
222 // buffer: 2 chroma, 3 luma (rounded to 4) because deblocking goes beyond the top of the mb
223 uint8_t *pix = frame->plane[i] + X264_MAX(0, (16*mb_y-4)*stride >> !!i);
224 if( b_end && !b_start )
225 height += 4 >> (!!i + h->sh.b_mbaff);
228 plane_expand_border( pix, stride*2, width, height, padh, padv, b_start, b_end );
229 plane_expand_border( pix+stride, stride*2, width, height, padh, padv, b_start, b_end );
233 plane_expand_border( pix, stride, width, height, padh, padv, b_start, b_end );
238 void x264_frame_expand_border_filtered( x264_t *h, x264_frame_t *frame, int mb_y, int b_end )
240 /* during filtering, 8 extra pixels were filtered on each edge,
241 * but up to 3 of the horizontal ones may be wrong.
242 we want to expand border from the last filtered pixel */
244 int stride = frame->i_stride[0];
245 int width = 16*h->sps->i_mb_width + 8;
246 int height = b_end ? (16*(h->sps->i_mb_height - mb_y) >> h->sh.b_mbaff) + 16 : 16;
250 for( i = 1; i < 4; i++ )
252 // buffer: 8 luma, to match the hpel filter
253 uint8_t *pix = frame->filtered[i] + (16*mb_y - (8 << h->sh.b_mbaff)) * stride - 4;
256 plane_expand_border( pix, stride*2, width, height, padh, padv, b_start, b_end );
257 plane_expand_border( pix+stride, stride*2, width, height, padh, padv, b_start, b_end );
261 plane_expand_border( pix, stride, width, height, padh, padv, b_start, b_end );
266 void x264_frame_expand_border_lowres( x264_frame_t *frame )
269 for( i = 0; i < 4; i++ )
270 plane_expand_border( frame->lowres[i], frame->i_stride_lowres, frame->i_stride_lowres - 2*PADH, frame->i_lines_lowres, PADH, PADV, 1, 1 );
273 void x264_frame_expand_border_mod16( x264_t *h, x264_frame_t *frame )
276 for( i = 0; i < frame->i_plane; i++ )
278 int i_subsample = i ? 1 : 0;
279 int i_width = h->param.i_width >> i_subsample;
280 int i_height = h->param.i_height >> i_subsample;
281 int i_padx = ( h->sps->i_mb_width * 16 - h->param.i_width ) >> i_subsample;
282 int i_pady = ( h->sps->i_mb_height * 16 - h->param.i_height ) >> i_subsample;
286 for( y = 0; y < i_height; y++ )
287 memset( &frame->plane[i][y*frame->i_stride[i] + i_width],
288 frame->plane[i][y*frame->i_stride[i] + i_width - 1],
293 //FIXME interlace? or just let it pad using the wrong field
294 for( y = i_height; y < i_height + i_pady; y++ )
295 memcpy( &frame->plane[i][y*frame->i_stride[i]],
296 &frame->plane[i][(i_height-1)*frame->i_stride[i]],
303 /* cavlc + 8x8 transform stores nnz per 16 coeffs for the purpose of
304 * entropy coding, but per 64 coeffs for the purpose of deblocking */
305 void munge_cavlc_nnz_row( x264_t *h, int mb_y, uint8_t (*buf)[16] )
307 uint32_t (*src)[6] = (uint32_t(*)[6])h->mb.non_zero_count + mb_y * h->sps->i_mb_width;
308 int8_t *transform = h->mb.mb_transform_size + mb_y * h->sps->i_mb_width;
310 for( x=0; x<h->sps->i_mb_width; x++ )
312 memcpy( buf+x, src+x, 16 );
315 nnz = src[x][0] | src[x][1];
316 src[x][0] = src[x][1] = ((uint16_t)nnz ? 0x0101 : 0) + (nnz>>16 ? 0x01010000 : 0);
317 nnz = src[x][2] | src[x][3];
318 src[x][2] = src[x][3] = ((uint16_t)nnz ? 0x0101 : 0) + (nnz>>16 ? 0x01010000 : 0);
323 static void restore_cavlc_nnz_row( x264_t *h, int mb_y, uint8_t (*buf)[16] )
325 uint8_t (*dst)[24] = h->mb.non_zero_count + mb_y * h->sps->i_mb_width;
327 for( x=0; x<h->sps->i_mb_width; x++ )
328 memcpy( dst+x, buf+x, 16 );
331 static void munge_cavlc_nnz( x264_t *h, int mb_y, uint8_t (*buf)[16], void (*func)(x264_t*, int, uint8_t (*)[16]) )
333 func( h, mb_y, buf );
335 func( h, mb_y-1, buf + h->sps->i_mb_width );
338 func( h, mb_y+1, buf + h->sps->i_mb_width * 2 );
340 func( h, mb_y-2, buf + h->sps->i_mb_width * 3 );
345 /* Deblocking filter */
347 static const int i_alpha_table[52] =
349 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
350 0, 0, 0, 0, 0, 0, 4, 4, 5, 6,
351 7, 8, 9, 10, 12, 13, 15, 17, 20, 22,
352 25, 28, 32, 36, 40, 45, 50, 56, 63, 71,
353 80, 90,101,113,127,144,162,182,203,226,
356 static const int i_beta_table[52] =
358 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
359 0, 0, 0, 0, 0, 0, 2, 2, 2, 3,
360 3, 3, 3, 4, 4, 4, 6, 6, 7, 7,
361 8, 8, 9, 9, 10, 10, 11, 11, 12, 12,
362 13, 13, 14, 14, 15, 15, 16, 16, 17, 17,
365 static const int i_tc0_table[52][3] =
367 { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 },
368 { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 },
369 { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 1 },
370 { 0, 0, 1 }, { 0, 0, 1 }, { 0, 0, 1 }, { 0, 1, 1 }, { 0, 1, 1 }, { 1, 1, 1 },
371 { 1, 1, 1 }, { 1, 1, 1 }, { 1, 1, 1 }, { 1, 1, 2 }, { 1, 1, 2 }, { 1, 1, 2 },
372 { 1, 1, 2 }, { 1, 2, 3 }, { 1, 2, 3 }, { 2, 2, 3 }, { 2, 2, 4 }, { 2, 3, 4 },
373 { 2, 3, 4 }, { 3, 3, 5 }, { 3, 4, 6 }, { 3, 4, 6 }, { 4, 5, 7 }, { 4, 5, 8 },
374 { 4, 6, 9 }, { 5, 7,10 }, { 6, 8,11 }, { 6, 8,13 }, { 7,10,14 }, { 8,11,16 },
375 { 9,12,18 }, {10,13,20 }, {11,15,23 }, {13,17,25 }
379 static inline int clip_uint8( int a )
387 static inline void deblock_luma_c( uint8_t *pix, int xstride, int ystride, int alpha, int beta, int8_t *tc0 )
390 for( i = 0; i < 4; i++ ) {
395 for( d = 0; d < 4; d++ ) {
396 const int p2 = pix[-3*xstride];
397 const int p1 = pix[-2*xstride];
398 const int p0 = pix[-1*xstride];
399 const int q0 = pix[ 0*xstride];
400 const int q1 = pix[ 1*xstride];
401 const int q2 = pix[ 2*xstride];
403 if( abs( p0 - q0 ) < alpha &&
404 abs( p1 - p0 ) < beta &&
405 abs( q1 - q0 ) < beta ) {
410 if( abs( p2 - p0 ) < beta ) {
411 pix[-2*xstride] = p1 + x264_clip3( (( p2 + ((p0 + q0 + 1) >> 1)) >> 1) - p1, -tc0[i], tc0[i] );
414 if( abs( q2 - q0 ) < beta ) {
415 pix[ 1*xstride] = q1 + x264_clip3( (( q2 + ((p0 + q0 + 1) >> 1)) >> 1) - q1, -tc0[i], tc0[i] );
419 delta = x264_clip3( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
420 pix[-1*xstride] = clip_uint8( p0 + delta ); /* p0' */
421 pix[ 0*xstride] = clip_uint8( q0 - delta ); /* q0' */
427 static void deblock_v_luma_c( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 )
429 deblock_luma_c( pix, stride, 1, alpha, beta, tc0 );
431 static void deblock_h_luma_c( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 )
433 deblock_luma_c( pix, 1, stride, alpha, beta, tc0 );
436 static inline void deblock_chroma_c( uint8_t *pix, int xstride, int ystride, int alpha, int beta, int8_t *tc0 )
439 for( i = 0; i < 4; i++ ) {
440 const int tc = tc0[i];
445 for( d = 0; d < 2; d++ ) {
446 const int p1 = pix[-2*xstride];
447 const int p0 = pix[-1*xstride];
448 const int q0 = pix[ 0*xstride];
449 const int q1 = pix[ 1*xstride];
451 if( abs( p0 - q0 ) < alpha &&
452 abs( p1 - p0 ) < beta &&
453 abs( q1 - q0 ) < beta ) {
455 int delta = x264_clip3( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
456 pix[-1*xstride] = clip_uint8( p0 + delta ); /* p0' */
457 pix[ 0*xstride] = clip_uint8( q0 - delta ); /* q0' */
463 static void deblock_v_chroma_c( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 )
465 deblock_chroma_c( pix, stride, 1, alpha, beta, tc0 );
467 static void deblock_h_chroma_c( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 )
469 deblock_chroma_c( pix, 1, stride, alpha, beta, tc0 );
472 static inline void deblock_luma_intra_c( uint8_t *pix, int xstride, int ystride, int alpha, int beta )
475 for( d = 0; d < 16; d++ ) {
476 const int p2 = pix[-3*xstride];
477 const int p1 = pix[-2*xstride];
478 const int p0 = pix[-1*xstride];
479 const int q0 = pix[ 0*xstride];
480 const int q1 = pix[ 1*xstride];
481 const int q2 = pix[ 2*xstride];
483 if( abs( p0 - q0 ) < alpha &&
484 abs( p1 - p0 ) < beta &&
485 abs( q1 - q0 ) < beta ) {
487 if(abs( p0 - q0 ) < ((alpha >> 2) + 2) ){
488 if( abs( p2 - p0 ) < beta)
490 const int p3 = pix[-4*xstride];
492 pix[-1*xstride] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
493 pix[-2*xstride] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
494 pix[-3*xstride] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
497 pix[-1*xstride] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
499 if( abs( q2 - q0 ) < beta)
501 const int q3 = pix[3*xstride];
503 pix[0*xstride] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
504 pix[1*xstride] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
505 pix[2*xstride] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
508 pix[0*xstride] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
512 pix[-1*xstride] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
513 pix[ 0*xstride] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
519 static void deblock_v_luma_intra_c( uint8_t *pix, int stride, int alpha, int beta )
521 deblock_luma_intra_c( pix, stride, 1, alpha, beta );
523 static void deblock_h_luma_intra_c( uint8_t *pix, int stride, int alpha, int beta )
525 deblock_luma_intra_c( pix, 1, stride, alpha, beta );
528 static inline void deblock_chroma_intra_c( uint8_t *pix, int xstride, int ystride, int alpha, int beta )
531 for( d = 0; d < 8; d++ ) {
532 const int p1 = pix[-2*xstride];
533 const int p0 = pix[-1*xstride];
534 const int q0 = pix[ 0*xstride];
535 const int q1 = pix[ 1*xstride];
537 if( abs( p0 - q0 ) < alpha &&
538 abs( p1 - p0 ) < beta &&
539 abs( q1 - q0 ) < beta ) {
541 pix[-1*xstride] = (2*p1 + p0 + q1 + 2) >> 2; /* p0' */
542 pix[ 0*xstride] = (2*q1 + q0 + p1 + 2) >> 2; /* q0' */
548 static void deblock_v_chroma_intra_c( uint8_t *pix, int stride, int alpha, int beta )
550 deblock_chroma_intra_c( pix, stride, 1, alpha, beta );
552 static void deblock_h_chroma_intra_c( uint8_t *pix, int stride, int alpha, int beta )
554 deblock_chroma_intra_c( pix, 1, stride, alpha, beta );
557 static inline void deblock_edge( x264_t *h, uint8_t *pix, int i_stride, int bS[4], int i_qp, int b_chroma,
558 x264_deblock_inter_t pf_inter, x264_deblock_intra_t pf_intra )
561 const int index_a = x264_clip3( i_qp + h->sh.i_alpha_c0_offset, 0, 51 );
562 const int alpha = i_alpha_table[index_a];
563 const int beta = i_beta_table[x264_clip3( i_qp + h->sh.i_beta_offset, 0, 51 )];
568 tc[i] = (bS[i] ? i_tc0_table[index_a][bS[i] - 1] : -1) + b_chroma;
569 pf_inter( pix, i_stride, alpha, beta, tc );
571 pf_intra( pix, i_stride, alpha, beta );
575 void x264_frame_deblock_row( x264_t *h, int mb_y )
577 const int s8x8 = 2 * h->mb.i_mb_stride;
578 const int s4x4 = 4 * h->mb.i_mb_stride;
579 const int b_interlaced = h->sh.b_mbaff;
580 const int mvy_limit = 4 >> b_interlaced;
583 int i_stride2[3] = { h->fdec->i_stride[0] << b_interlaced,
584 h->fdec->i_stride[1] << b_interlaced,
585 h->fdec->i_stride[2] << b_interlaced };
587 if( !h->pps->b_cabac && h->pps->b_transform_8x8_mode )
588 munge_cavlc_nnz( h, mb_y, h->mb.nnz_backup, munge_cavlc_nnz_row );
590 for( mb_x = 0; mb_x < h->sps->i_mb_width; )
592 const int mb_xy = mb_y * h->mb.i_mb_stride + mb_x;
593 const int mb_8x8 = 2 * s8x8 * mb_y + 2 * mb_x;
594 const int mb_4x4 = 4 * s4x4 * mb_y + 4 * mb_x;
595 const int b_8x8_transform = h->mb.mb_transform_size[mb_xy];
596 const int i_edge_end = (h->mb.type[mb_xy] == P_SKIP) ? 1 : 4;
599 int i_pix_y[3] = { 16*mb_y*h->fdec->i_stride[0] + 16*mb_x,
600 8*mb_y*h->fdec->i_stride[1] + 8*mb_x,
601 8*mb_y*h->fdec->i_stride[2] + 8*mb_x };
602 if( b_interlaced && (mb_y&1) )
604 i_pix_y[0] -= 15*h->fdec->i_stride[0];
605 i_pix_y[1] -= 7*h->fdec->i_stride[1];
606 i_pix_y[2] -= 7*h->fdec->i_stride[2];
609 x264_prefetch_fenc( h, h->fdec, mb_x, mb_y );
611 /* i_dir == 0 -> vertical edge
612 * i_dir == 1 -> horizontal edge */
614 #define deblock_dir(i_dir)\
616 int i_start = (i_dir ? (mb_y <= b_interlaced) : (mb_x == 0));\
618 for( i_edge = i_start; i_edge < i_edge_end; i_edge++ )\
620 int mbn_xy, mbn_8x8, mbn_4x4;\
621 int bS[4]; /* filtering strength */\
622 if( b_8x8_transform && (i_edge&1) )\
624 mbn_xy = i_edge > 0 ? mb_xy : ( i_dir == 0 ? mb_xy - 1 : mb_xy - h->mb.i_mb_stride );\
625 mbn_8x8 = i_edge > 0 ? mb_8x8 : ( i_dir == 0 ? mb_8x8 - 2 : mb_8x8 - 2 * s8x8 );\
626 mbn_4x4 = i_edge > 0 ? mb_4x4 : ( i_dir == 0 ? mb_4x4 - 4 : mb_4x4 - 4 * s4x4 );\
627 if( b_interlaced && i_edge == 0 && i_dir == 1 )\
629 mbn_xy -= h->mb.i_mb_stride;\
630 mbn_8x8 -= 2 * s8x8;\
631 mbn_4x4 -= 4 * s4x4;\
633 /* *** Get bS for each 4px for the current edge *** */\
634 if( IS_INTRA( h->mb.type[mb_xy] ) || IS_INTRA( h->mb.type[mbn_xy] ) )\
635 bS[0] = bS[1] = bS[2] = bS[3] = ( i_edge == 0 && !(b_interlaced && i_dir) ? 4 : 3 );\
639 for( i = 0; i < 4; i++ )\
641 int x = i_dir == 0 ? i_edge : i;\
642 int y = i_dir == 0 ? i : i_edge;\
643 int xn = (x - (i_dir == 0 ? 1 : 0 ))&0x03;\
644 int yn = (y - (i_dir == 0 ? 0 : 1 ))&0x03;\
645 if( h->mb.non_zero_count[mb_xy][x+y*4] != 0 ||\
646 h->mb.non_zero_count[mbn_xy][xn+yn*4] != 0 )\
652 /* FIXME: A given frame may occupy more than one position in\
653 * the reference list. So we should compare the frame numbers,\
654 * not the indices in the ref list.\
655 * No harm yet, as we don't generate that case.*/\
656 int i8p= mb_8x8+(x/2)+(y/2)*s8x8;\
657 int i8q= mbn_8x8+(xn/2)+(yn/2)*s8x8;\
658 int i4p= mb_4x4+x+y*s4x4;\
659 int i4q= mbn_4x4+xn+yn*s4x4;\
662 for( l = 0; l < 1 + (h->sh.i_type == SLICE_TYPE_B); l++ )\
664 if( h->mb.ref[l][i8p] != h->mb.ref[l][i8q] ||\
665 abs( h->mb.mv[l][i4p][0] - h->mb.mv[l][i4q][0] ) >= 4 ||\
666 abs( h->mb.mv[l][i4p][1] - h->mb.mv[l][i4q][1] ) >= mvy_limit )\
675 /* *** filter *** */\
677 i_qp = h->mb.qp[mb_xy];\
678 i_qpn= h->mb.qp[mbn_xy];\
682 deblock_edge( h, &h->fdec->plane[0][i_pix_y[0] + 4*i_edge],\
683 i_stride2[0], bS, (i_qp+i_qpn+1) >> 1, 0,\
684 h->loopf.deblock_h_luma, h->loopf.deblock_h_luma_intra );\
688 int i_qpc = ( i_chroma_qp_table[x264_clip3( i_qp + h->pps->i_chroma_qp_index_offset, 0, 51 )] +\
689 i_chroma_qp_table[x264_clip3( i_qpn + h->pps->i_chroma_qp_index_offset, 0, 51 )] + 1 ) >> 1;\
690 deblock_edge( h, &h->fdec->plane[1][i_pix_y[1] + 2*i_edge],\
691 i_stride2[1], bS, i_qpc, 1,\
692 h->loopf.deblock_h_chroma, h->loopf.deblock_h_chroma_intra );\
693 deblock_edge( h, &h->fdec->plane[2][i_pix_y[2] + 2*i_edge],\
694 i_stride2[2], bS, i_qpc, 1,\
695 h->loopf.deblock_h_chroma, h->loopf.deblock_h_chroma_intra );\
700 /* horizontal edge */\
701 deblock_edge( h, &h->fdec->plane[0][i_pix_y[0] + 4*i_edge*i_stride2[0]],\
702 i_stride2[0], bS, (i_qp+i_qpn+1) >> 1, 0,\
703 h->loopf.deblock_v_luma, h->loopf.deblock_v_luma_intra );\
707 int i_qpc = ( i_chroma_qp_table[x264_clip3( i_qp + h->pps->i_chroma_qp_index_offset, 0, 51 )] +\
708 i_chroma_qp_table[x264_clip3( i_qpn + h->pps->i_chroma_qp_index_offset, 0, 51 )] + 1 ) >> 1;\
709 deblock_edge( h, &h->fdec->plane[1][i_pix_y[1] + 2*i_edge*i_stride2[1]],\
710 i_stride2[1], bS, i_qpc, 1,\
711 h->loopf.deblock_v_chroma, h->loopf.deblock_v_chroma_intra );\
712 deblock_edge( h, &h->fdec->plane[2][i_pix_y[2] + 2*i_edge*i_stride2[2]],\
713 i_stride2[2], bS, i_qpc, 1,\
714 h->loopf.deblock_v_chroma, h->loopf.deblock_v_chroma_intra );\
724 if( !b_interlaced || (mb_y&1) )
726 mb_y ^= b_interlaced;
729 if( !h->pps->b_cabac && h->pps->b_transform_8x8_mode )
730 munge_cavlc_nnz( h, mb_y, h->mb.nnz_backup, restore_cavlc_nnz_row );
733 void x264_frame_deblock( x264_t *h )
736 for( mb_y = 0; mb_y < h->sps->i_mb_height; mb_y += 1 + h->sh.b_mbaff )
737 x264_frame_deblock_row( h, mb_y );
741 void x264_deblock_v_chroma_mmxext( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 );
742 void x264_deblock_h_chroma_mmxext( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 );
743 void x264_deblock_v_chroma_intra_mmxext( uint8_t *pix, int stride, int alpha, int beta );
744 void x264_deblock_h_chroma_intra_mmxext( uint8_t *pix, int stride, int alpha, int beta );
746 void x264_deblock_v_luma_sse2( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 );
747 void x264_deblock_h_luma_sse2( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 );
748 void x264_deblock_v_luma_intra_sse2( uint8_t *pix, int stride, int alpha, int beta );
749 void x264_deblock_h_luma_intra_sse2( uint8_t *pix, int stride, int alpha, int beta );
751 void x264_deblock_h_luma_mmxext( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 );
752 void x264_deblock_v8_luma_mmxext( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 );
753 void x264_deblock_h_luma_intra_mmxext( uint8_t *pix, int stride, int alpha, int beta );
754 void x264_deblock_v8_luma_intra_mmxext( uint8_t *pix, int stride, int alpha, int beta );
756 void x264_deblock_v_luma_mmxext( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 )
758 x264_deblock_v8_luma_mmxext( pix, stride, alpha, beta, tc0 );
759 x264_deblock_v8_luma_mmxext( pix+8, stride, alpha, beta, tc0+2 );
761 void x264_deblock_v_luma_intra_mmxext( uint8_t *pix, int stride, int alpha, int beta )
763 x264_deblock_v8_luma_intra_mmxext( pix, stride, alpha, beta );
764 x264_deblock_v8_luma_intra_mmxext( pix+8, stride, alpha, beta );
770 void x264_deblock_v_luma_altivec( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 );
771 void x264_deblock_h_luma_altivec( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 );
774 void x264_deblock_init( int cpu, x264_deblock_function_t *pf )
776 pf->deblock_v_luma = deblock_v_luma_c;
777 pf->deblock_h_luma = deblock_h_luma_c;
778 pf->deblock_v_chroma = deblock_v_chroma_c;
779 pf->deblock_h_chroma = deblock_h_chroma_c;
780 pf->deblock_v_luma_intra = deblock_v_luma_intra_c;
781 pf->deblock_h_luma_intra = deblock_h_luma_intra_c;
782 pf->deblock_v_chroma_intra = deblock_v_chroma_intra_c;
783 pf->deblock_h_chroma_intra = deblock_h_chroma_intra_c;
786 if( cpu&X264_CPU_MMXEXT )
788 pf->deblock_v_chroma = x264_deblock_v_chroma_mmxext;
789 pf->deblock_h_chroma = x264_deblock_h_chroma_mmxext;
790 pf->deblock_v_chroma_intra = x264_deblock_v_chroma_intra_mmxext;
791 pf->deblock_h_chroma_intra = x264_deblock_h_chroma_intra_mmxext;
793 pf->deblock_v_luma = x264_deblock_v_luma_mmxext;
794 pf->deblock_h_luma = x264_deblock_h_luma_mmxext;
795 pf->deblock_v_luma_intra = x264_deblock_v_luma_intra_mmxext;
796 pf->deblock_h_luma_intra = x264_deblock_h_luma_intra_mmxext;
798 if( (cpu&X264_CPU_SSE2) && !(cpu&X264_CPU_STACK_MOD4) )
800 pf->deblock_v_luma = x264_deblock_v_luma_sse2;
801 pf->deblock_h_luma = x264_deblock_h_luma_sse2;
802 pf->deblock_v_luma_intra = x264_deblock_v_luma_intra_sse2;
803 pf->deblock_h_luma_intra = x264_deblock_h_luma_intra_sse2;
809 if( cpu&X264_CPU_ALTIVEC )
811 pf->deblock_v_luma = x264_deblock_v_luma_altivec;
812 pf->deblock_h_luma = x264_deblock_h_luma_altivec;
819 void x264_frame_cond_broadcast( x264_frame_t *frame, int i_lines_completed )
821 x264_pthread_mutex_lock( &frame->mutex );
822 frame->i_lines_completed = i_lines_completed;
823 x264_pthread_cond_broadcast( &frame->cv );
824 x264_pthread_mutex_unlock( &frame->mutex );
827 void x264_frame_cond_wait( x264_frame_t *frame, int i_lines_completed )
829 x264_pthread_mutex_lock( &frame->mutex );
830 while( frame->i_lines_completed < i_lines_completed )
831 x264_pthread_cond_wait( &frame->cv, &frame->mutex );
832 x264_pthread_mutex_unlock( &frame->mutex );
837 void x264_frame_push( x264_frame_t **list, x264_frame_t *frame )
840 while( list[i] ) i++;
844 x264_frame_t *x264_frame_pop( x264_frame_t **list )
849 while( list[i+1] ) i++;
855 void x264_frame_unshift( x264_frame_t **list, x264_frame_t *frame )
858 while( list[i] ) i++;
864 x264_frame_t *x264_frame_shift( x264_frame_t **list )
866 x264_frame_t *frame = list[0];
868 for( i = 0; list[i]; i++ )
874 void x264_frame_push_unused( x264_t *h, x264_frame_t *frame )
876 assert( frame->i_reference_count > 0 );
877 frame->i_reference_count--;
878 if( frame->i_reference_count == 0 )
879 x264_frame_push( h->frames.unused, frame );
880 assert( h->frames.unused[ sizeof(h->frames.unused) / sizeof(*h->frames.unused) - 1 ] == NULL );
883 x264_frame_t *x264_frame_pop_unused( x264_t *h )
886 if( h->frames.unused[0] )
887 frame = x264_frame_pop( h->frames.unused );
889 frame = x264_frame_new( h );
890 assert( frame->i_reference_count == 0 );
891 frame->i_reference_count = 1;
895 void x264_frame_sort( x264_frame_t **list, int b_dts )
900 for( i = 0; list[i+1]; i++ )
902 int dtype = list[i]->i_type - list[i+1]->i_type;
903 int dtime = list[i]->i_frame - list[i+1]->i_frame;
904 int swap = b_dts ? dtype > 0 || ( dtype == 0 && dtime > 0 )
908 XCHG( x264_frame_t*, list[i], list[i+1] );