1 /*****************************************************************************
2 * frame.c: h264 encoder library
3 *****************************************************************************
4 * Copyright (C) 2003-2008 x264 project
6 * Authors: Laurent Aimar <fenrir@via.ecp.fr>
7 * Loren Merritt <lorenm@u.washington.edu>
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
22 *****************************************************************************/
26 #define ALIGN(x,a) (((x)+((a)-1))&~((a)-1))
28 x264_frame_t *x264_frame_new( x264_t *h )
30 x264_frame_t *frame = x264_malloc( sizeof(x264_frame_t) );
33 int i_mb_count = h->mb.i_mb_count;
34 int i_stride, i_width, i_lines;
35 int i_padv = PADV << h->param.b_interlaced;
37 int align = h->param.cpu&X264_CPU_CACHELINE_64 ? 64 : h->param.cpu&X264_CPU_CACHELINE_32 ? 32 : 16;
39 if( !frame ) return NULL;
41 memset( frame, 0, sizeof(x264_frame_t) );
43 /* allocate frame data (+64 for extra data for me) */
44 i_width = ALIGN( h->param.i_width, 16 );
45 i_stride = ALIGN( i_width + 2*PADH, align );
46 i_lines = ALIGN( h->param.i_height, 16<<h->param.b_interlaced );
49 for( i = 0; i < 3; i++ )
51 frame->i_stride[i] = i_stride >> !!i;
52 frame->i_width[i] = i_width >> !!i;
53 frame->i_lines[i] = i_lines >> !!i;
56 luma_plane_size = (frame->i_stride[0] * ( frame->i_lines[0] + 2*i_padv ));
57 for( i = 1; i < 3; i++ )
59 CHECKED_MALLOC( frame->buffer[i], luma_plane_size/4 );
60 frame->plane[i] = frame->buffer[i] + (frame->i_stride[i] * i_padv + PADH)/2;
62 /* all 4 luma planes allocated together, since the cacheline split code
63 * requires them to be in-phase wrt cacheline alignment. */
64 CHECKED_MALLOC( frame->buffer[0], 4*luma_plane_size);
65 for( i = 0; i < 4; i++ )
66 frame->filtered[i] = frame->buffer[0] + i*luma_plane_size + frame->i_stride[0] * i_padv + PADH;
67 frame->plane[0] = frame->filtered[0];
69 if( h->frames.b_have_lowres )
71 frame->i_width_lowres = frame->i_width[0]/2;
72 frame->i_stride_lowres = ALIGN( frame->i_width_lowres + 2*PADH, align );
73 frame->i_lines_lowres = frame->i_lines[0]/2;
75 luma_plane_size = frame->i_stride_lowres * ( frame->i_lines[0]/2 + 2*i_padv );
77 CHECKED_MALLOC( frame->buffer_lowres[0], 4 * luma_plane_size );
78 for( i = 0; i < 4; i++ )
79 frame->lowres[i] = frame->buffer_lowres[0] + (frame->i_stride_lowres * i_padv + PADH) + i * luma_plane_size;
82 if( h->param.analyse.i_me_method >= X264_ME_ESA )
84 CHECKED_MALLOC( frame->buffer[3],
85 2 * frame->i_stride[0] * (frame->i_lines[0] + 2*i_padv) * sizeof(uint16_t) );
86 frame->integral = (uint16_t*)frame->buffer[3] + frame->i_stride[0] * i_padv + PADH;
90 frame->i_type = X264_TYPE_AUTO;
94 frame->i_frame_num = -1;
95 frame->i_lines_completed = -1;
97 CHECKED_MALLOC( frame->mb_type, i_mb_count * sizeof(int8_t));
98 CHECKED_MALLOC( frame->mv[0], 2*16 * i_mb_count * sizeof(int16_t) );
99 CHECKED_MALLOC( frame->ref[0], 4 * i_mb_count * sizeof(int8_t) );
100 if( h->param.i_bframe )
102 CHECKED_MALLOC( frame->mv[1], 2*16 * i_mb_count * sizeof(int16_t) );
103 CHECKED_MALLOC( frame->ref[1], 4 * i_mb_count * sizeof(int8_t) );
108 frame->ref[1] = NULL;
111 CHECKED_MALLOC( frame->i_row_bits, i_lines/16 * sizeof(int) );
112 CHECKED_MALLOC( frame->i_row_qp, i_lines/16 * sizeof(int) );
113 for( i = 0; i < h->param.i_bframe + 2; i++ )
114 for( j = 0; j < h->param.i_bframe + 2; j++ )
115 CHECKED_MALLOC( frame->i_row_satds[i][j], i_lines/16 * sizeof(int) );
117 x264_pthread_mutex_init( &frame->mutex, NULL );
118 x264_pthread_cond_init( &frame->cv, NULL );
123 x264_frame_delete( frame );
127 void x264_frame_delete( x264_frame_t *frame )
130 for( i = 0; i < 4; i++ )
131 x264_free( frame->buffer[i] );
132 for( i = 0; i < 4; i++ )
133 x264_free( frame->buffer_lowres[i] );
134 for( i = 0; i < X264_BFRAME_MAX+2; i++ )
135 for( j = 0; j < X264_BFRAME_MAX+2; j++ )
136 x264_free( frame->i_row_satds[i][j] );
137 x264_free( frame->i_row_bits );
138 x264_free( frame->i_row_qp );
139 x264_free( frame->mb_type );
140 x264_free( frame->mv[0] );
141 x264_free( frame->mv[1] );
142 x264_free( frame->ref[0] );
143 x264_free( frame->ref[1] );
144 x264_pthread_mutex_destroy( &frame->mutex );
145 x264_pthread_cond_destroy( &frame->cv );
149 int x264_frame_copy_picture( x264_t *h, x264_frame_t *dst, x264_picture_t *src )
151 int i_csp = src->img.i_csp & X264_CSP_MASK;
153 if( i_csp != X264_CSP_I420 && i_csp != X264_CSP_YV12 )
155 x264_log( h, X264_LOG_ERROR, "Arg invalid CSP\n" );
159 dst->i_type = src->i_type;
160 dst->i_qpplus1 = src->i_qpplus1;
161 dst->i_pts = src->i_pts;
165 int s = (i_csp == X264_CSP_YV12 && i) ? i^3 : i;
166 uint8_t *plane = src->img.plane[s];
167 int stride = src->img.i_stride[s];
168 int width = h->param.i_width >> !!i;
169 int height = h->param.i_height >> !!i;
170 if( src->img.i_csp & X264_CSP_VFLIP )
172 plane += (height-1)*stride;
175 h->mc.plane_copy( dst->plane[i], dst->i_stride[i], plane, stride, width, height );
182 static void plane_expand_border( uint8_t *pix, int i_stride, int i_width, int i_height, int i_padh, int i_padv, int b_pad_top, int b_pad_bottom )
184 #define PPIXEL(x, y) ( pix + (x) + (y)*i_stride )
186 for( y = 0; y < i_height; y++ )
189 memset( PPIXEL(-i_padh, y), PPIXEL(0, y)[0], i_padh );
191 memset( PPIXEL(i_width, y), PPIXEL(i_width-1, y)[0], i_padh );
195 for( y = 0; y < i_padv; y++ )
196 memcpy( PPIXEL(-i_padh, -y-1), PPIXEL(-i_padh, 0), i_width+2*i_padh );
199 for( y = 0; y < i_padv; y++ )
200 memcpy( PPIXEL(-i_padh, i_height+y), PPIXEL(-i_padh, i_height-1), i_width+2*i_padh );
204 void x264_frame_expand_border( x264_t *h, x264_frame_t *frame, int mb_y, int b_end )
208 if( mb_y & h->sh.b_mbaff )
210 for( i = 0; i < frame->i_plane; i++ )
212 int stride = frame->i_stride[i];
213 int width = 16*h->sps->i_mb_width >> !!i;
214 int height = (b_end ? 16*(h->sps->i_mb_height - mb_y) >> h->sh.b_mbaff : 16) >> !!i;
215 int padh = PADH >> !!i;
216 int padv = PADV >> !!i;
217 // buffer: 2 chroma, 3 luma (rounded to 4) because deblocking goes beyond the top of the mb
218 uint8_t *pix = frame->plane[i] + X264_MAX(0, (16*mb_y-4)*stride >> !!i);
219 if( b_end && !b_start )
220 height += 4 >> (!!i + h->sh.b_mbaff);
223 plane_expand_border( pix, stride*2, width, height, padh, padv, b_start, b_end );
224 plane_expand_border( pix+stride, stride*2, width, height, padh, padv, b_start, b_end );
228 plane_expand_border( pix, stride, width, height, padh, padv, b_start, b_end );
233 void x264_frame_expand_border_filtered( x264_t *h, x264_frame_t *frame, int mb_y, int b_end )
235 /* during filtering, 8 extra pixels were filtered on each edge,
236 * but up to 3 of the horizontal ones may be wrong.
237 we want to expand border from the last filtered pixel */
239 int stride = frame->i_stride[0];
240 int width = 16*h->sps->i_mb_width + 8;
241 int height = b_end ? (16*(h->sps->i_mb_height - mb_y) >> h->sh.b_mbaff) + 16 : 16;
245 for( i = 1; i < 4; i++ )
247 // buffer: 8 luma, to match the hpel filter
248 uint8_t *pix = frame->filtered[i] + (16*mb_y - (8 << h->sh.b_mbaff)) * stride - 4;
251 plane_expand_border( pix, stride*2, width, height, padh, padv, b_start, b_end );
252 plane_expand_border( pix+stride, stride*2, width, height, padh, padv, b_start, b_end );
256 plane_expand_border( pix, stride, width, height, padh, padv, b_start, b_end );
261 void x264_frame_expand_border_lowres( x264_frame_t *frame )
264 for( i = 0; i < 4; i++ )
265 plane_expand_border( frame->lowres[i], frame->i_stride_lowres, frame->i_stride_lowres - 2*PADH, frame->i_lines_lowres, PADH, PADV, 1, 1 );
268 void x264_frame_expand_border_mod16( x264_t *h, x264_frame_t *frame )
271 for( i = 0; i < frame->i_plane; i++ )
273 int i_subsample = i ? 1 : 0;
274 int i_width = h->param.i_width >> i_subsample;
275 int i_height = h->param.i_height >> i_subsample;
276 int i_padx = ( h->sps->i_mb_width * 16 - h->param.i_width ) >> i_subsample;
277 int i_pady = ( h->sps->i_mb_height * 16 - h->param.i_height ) >> i_subsample;
281 for( y = 0; y < i_height; y++ )
282 memset( &frame->plane[i][y*frame->i_stride[i] + i_width],
283 frame->plane[i][y*frame->i_stride[i] + i_width - 1],
288 //FIXME interlace? or just let it pad using the wrong field
289 for( y = i_height; y < i_height + i_pady; y++ )
290 memcpy( &frame->plane[i][y*frame->i_stride[i]],
291 &frame->plane[i][(i_height-1)*frame->i_stride[i]],
298 /* cavlc + 8x8 transform stores nnz per 16 coeffs for the purpose of
299 * entropy coding, but per 64 coeffs for the purpose of deblocking */
300 void munge_cavlc_nnz_row( x264_t *h, int mb_y, uint8_t (*buf)[16] )
302 uint32_t (*src)[6] = (uint32_t(*)[6])h->mb.non_zero_count + mb_y * h->sps->i_mb_width;
303 int8_t *transform = h->mb.mb_transform_size + mb_y * h->sps->i_mb_width;
305 for( x=0; x<h->sps->i_mb_width; x++ )
307 memcpy( buf+x, src+x, 16 );
310 nnz = src[x][0] | src[x][1];
311 src[x][0] = src[x][1] = ((uint16_t)nnz ? 0x0101 : 0) + (nnz>>16 ? 0x01010000 : 0);
312 nnz = src[x][2] | src[x][3];
313 src[x][2] = src[x][3] = ((uint16_t)nnz ? 0x0101 : 0) + (nnz>>16 ? 0x01010000 : 0);
318 static void restore_cavlc_nnz_row( x264_t *h, int mb_y, uint8_t (*buf)[16] )
320 uint8_t (*dst)[24] = h->mb.non_zero_count + mb_y * h->sps->i_mb_width;
322 for( x=0; x<h->sps->i_mb_width; x++ )
323 memcpy( dst+x, buf+x, 16 );
326 static void munge_cavlc_nnz( x264_t *h, int mb_y, uint8_t (*buf)[16], void (*func)(x264_t*, int, uint8_t (*)[16]) )
328 func( h, mb_y, buf );
330 func( h, mb_y-1, buf + h->sps->i_mb_width );
333 func( h, mb_y+1, buf + h->sps->i_mb_width * 2 );
335 func( h, mb_y-2, buf + h->sps->i_mb_width * 3 );
340 /* Deblocking filter */
341 static const uint8_t i_alpha_table[52+12*2] =
343 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
344 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
345 0, 0, 0, 0, 0, 0, 4, 4, 5, 6,
346 7, 8, 9, 10, 12, 13, 15, 17, 20, 22,
347 25, 28, 32, 36, 40, 45, 50, 56, 63, 71,
348 80, 90,101,113,127,144,162,182,203,226,
350 255,255,255,255,255,255,255,255,255,255,255,255,
352 static const uint8_t i_beta_table[52+12*2] =
354 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
355 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
356 0, 0, 0, 0, 0, 0, 2, 2, 2, 3,
357 3, 3, 3, 4, 4, 4, 6, 6, 7, 7,
358 8, 8, 9, 9, 10, 10, 11, 11, 12, 12,
359 13, 13, 14, 14, 15, 15, 16, 16, 17, 17,
361 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18,
363 static const int8_t i_tc0_table[52+12*2][4] =
365 {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 },
366 {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 },
367 {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 },
368 {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 },
369 {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 1 },
370 {-1, 0, 0, 1 }, {-1, 0, 0, 1 }, {-1, 0, 0, 1 }, {-1, 0, 1, 1 }, {-1, 0, 1, 1 }, {-1, 1, 1, 1 },
371 {-1, 1, 1, 1 }, {-1, 1, 1, 1 }, {-1, 1, 1, 1 }, {-1, 1, 1, 2 }, {-1, 1, 1, 2 }, {-1, 1, 1, 2 },
372 {-1, 1, 1, 2 }, {-1, 1, 2, 3 }, {-1, 1, 2, 3 }, {-1, 2, 2, 3 }, {-1, 2, 2, 4 }, {-1, 2, 3, 4 },
373 {-1, 2, 3, 4 }, {-1, 3, 3, 5 }, {-1, 3, 4, 6 }, {-1, 3, 4, 6 }, {-1, 4, 5, 7 }, {-1, 4, 5, 8 },
374 {-1, 4, 6, 9 }, {-1, 5, 7,10 }, {-1, 6, 8,11 }, {-1, 6, 8,13 }, {-1, 7,10,14 }, {-1, 8,11,16 },
375 {-1, 9,12,18 }, {-1,10,13,20 }, {-1,11,15,23 }, {-1,13,17,25 },
376 {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 },
377 {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 },
379 #define alpha_table(x) i_alpha_table[(x)+12]
380 #define beta_table(x) i_beta_table[(x)+12]
381 #define tc0_table(x) i_tc0_table[(x)+12]
384 static inline void deblock_luma_c( uint8_t *pix, int xstride, int ystride, int alpha, int beta, int8_t *tc0 )
387 for( i = 0; i < 4; i++ )
394 for( d = 0; d < 4; d++ )
396 const int p2 = pix[-3*xstride];
397 const int p1 = pix[-2*xstride];
398 const int p0 = pix[-1*xstride];
399 const int q0 = pix[ 0*xstride];
400 const int q1 = pix[ 1*xstride];
401 const int q2 = pix[ 2*xstride];
403 if( abs( p0 - q0 ) < alpha && abs( p1 - p0 ) < beta && abs( q1 - q0 ) < beta )
407 if( abs( p2 - p0 ) < beta )
409 pix[-2*xstride] = p1 + x264_clip3( (( p2 + ((p0 + q0 + 1) >> 1)) >> 1) - p1, -tc0[i], tc0[i] );
412 if( abs( q2 - q0 ) < beta )
414 pix[ 1*xstride] = q1 + x264_clip3( (( q2 + ((p0 + q0 + 1) >> 1)) >> 1) - q1, -tc0[i], tc0[i] );
418 delta = x264_clip3( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
419 pix[-1*xstride] = x264_clip_uint8( p0 + delta ); /* p0' */
420 pix[ 0*xstride] = x264_clip_uint8( q0 - delta ); /* q0' */
426 static void deblock_v_luma_c( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 )
428 deblock_luma_c( pix, stride, 1, alpha, beta, tc0 );
430 static void deblock_h_luma_c( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 )
432 deblock_luma_c( pix, 1, stride, alpha, beta, tc0 );
435 static inline void deblock_chroma_c( uint8_t *pix, int xstride, int ystride, int alpha, int beta, int8_t *tc0 )
438 for( i = 0; i < 4; i++ )
440 const int tc = tc0[i];
446 for( d = 0; d < 2; d++ )
448 const int p1 = pix[-2*xstride];
449 const int p0 = pix[-1*xstride];
450 const int q0 = pix[ 0*xstride];
451 const int q1 = pix[ 1*xstride];
453 if( abs( p0 - q0 ) < alpha && abs( p1 - p0 ) < beta && abs( q1 - q0 ) < beta )
455 int delta = x264_clip3( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
456 pix[-1*xstride] = x264_clip_uint8( p0 + delta ); /* p0' */
457 pix[ 0*xstride] = x264_clip_uint8( q0 - delta ); /* q0' */
463 static void deblock_v_chroma_c( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 )
465 deblock_chroma_c( pix, stride, 1, alpha, beta, tc0 );
467 static void deblock_h_chroma_c( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 )
469 deblock_chroma_c( pix, 1, stride, alpha, beta, tc0 );
472 static inline void deblock_luma_intra_c( uint8_t *pix, int xstride, int ystride, int alpha, int beta )
475 for( d = 0; d < 16; d++ )
477 const int p2 = pix[-3*xstride];
478 const int p1 = pix[-2*xstride];
479 const int p0 = pix[-1*xstride];
480 const int q0 = pix[ 0*xstride];
481 const int q1 = pix[ 1*xstride];
482 const int q2 = pix[ 2*xstride];
484 if( abs( p0 - q0 ) < alpha && abs( p1 - p0 ) < beta && abs( q1 - q0 ) < beta )
486 if(abs( p0 - q0 ) < ((alpha >> 2) + 2) )
488 if( abs( p2 - p0 ) < beta ) /* p0', p1', p2' */
490 const int p3 = pix[-4*xstride];
491 pix[-1*xstride] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
492 pix[-2*xstride] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
493 pix[-3*xstride] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
496 pix[-1*xstride] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
497 if( abs( q2 - q0 ) < beta ) /* q0', q1', q2' */
499 const int q3 = pix[3*xstride];
500 pix[0*xstride] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
501 pix[1*xstride] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
502 pix[2*xstride] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
505 pix[0*xstride] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
509 pix[-1*xstride] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
510 pix[ 0*xstride] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
516 static void deblock_v_luma_intra_c( uint8_t *pix, int stride, int alpha, int beta )
518 deblock_luma_intra_c( pix, stride, 1, alpha, beta );
520 static void deblock_h_luma_intra_c( uint8_t *pix, int stride, int alpha, int beta )
522 deblock_luma_intra_c( pix, 1, stride, alpha, beta );
525 static inline void deblock_chroma_intra_c( uint8_t *pix, int xstride, int ystride, int alpha, int beta )
528 for( d = 0; d < 8; d++ )
530 const int p1 = pix[-2*xstride];
531 const int p0 = pix[-1*xstride];
532 const int q0 = pix[ 0*xstride];
533 const int q1 = pix[ 1*xstride];
535 if( abs( p0 - q0 ) < alpha && abs( p1 - p0 ) < beta && abs( q1 - q0 ) < beta )
537 pix[-1*xstride] = (2*p1 + p0 + q1 + 2) >> 2; /* p0' */
538 pix[ 0*xstride] = (2*q1 + q0 + p1 + 2) >> 2; /* q0' */
543 static void deblock_v_chroma_intra_c( uint8_t *pix, int stride, int alpha, int beta )
545 deblock_chroma_intra_c( pix, stride, 1, alpha, beta );
547 static void deblock_h_chroma_intra_c( uint8_t *pix, int stride, int alpha, int beta )
549 deblock_chroma_intra_c( pix, 1, stride, alpha, beta );
552 static inline void deblock_edge( x264_t *h, uint8_t *pix1, uint8_t *pix2, int i_stride, uint8_t bS[4], int i_qp, int b_chroma, x264_deblock_inter_t pf_inter )
554 const int index_a = i_qp + h->sh.i_alpha_c0_offset;
555 const int alpha = alpha_table(index_a);
556 const int beta = beta_table(i_qp + h->sh.i_beta_offset);
559 if( !alpha || !beta )
562 tc[0] = tc0_table(index_a)[bS[0]] + b_chroma;
563 tc[1] = tc0_table(index_a)[bS[1]] + b_chroma;
564 tc[2] = tc0_table(index_a)[bS[2]] + b_chroma;
565 tc[3] = tc0_table(index_a)[bS[3]] + b_chroma;
567 pf_inter( pix1, i_stride, alpha, beta, tc );
569 pf_inter( pix2, i_stride, alpha, beta, tc );
572 static inline void deblock_edge_intra( x264_t *h, uint8_t *pix1, uint8_t *pix2, int i_stride, uint8_t bS[4], int i_qp, int b_chroma, x264_deblock_intra_t pf_intra )
574 const int alpha = alpha_table(i_qp + h->sh.i_alpha_c0_offset);
575 const int beta = beta_table(i_qp + h->sh.i_beta_offset);
577 if( !alpha || !beta )
580 pf_intra( pix1, i_stride, alpha, beta );
582 pf_intra( pix2, i_stride, alpha, beta );
585 void x264_frame_deblock_row( x264_t *h, int mb_y )
587 const int s8x8 = 2 * h->mb.i_mb_stride;
588 const int s4x4 = 4 * h->mb.i_mb_stride;
589 const int b_interlaced = h->sh.b_mbaff;
590 const int mvy_limit = 4 >> b_interlaced;
591 const int qp_thresh = 15 - X264_MIN(h->sh.i_alpha_c0_offset, h->sh.i_beta_offset) - X264_MAX(0, h->param.analyse.i_chroma_qp_offset);
593 int stridey = h->fdec->i_stride[0];
594 int stride2y = stridey << b_interlaced;
595 int strideuv = h->fdec->i_stride[1];
596 int stride2uv = strideuv << b_interlaced;
598 if( !h->pps->b_cabac && h->pps->b_transform_8x8_mode )
599 munge_cavlc_nnz( h, mb_y, h->mb.nnz_backup, munge_cavlc_nnz_row );
601 for( mb_x = 0; mb_x < h->sps->i_mb_width; mb_x += (~b_interlaced | mb_y)&1, mb_y ^= b_interlaced )
603 const int mb_xy = mb_y * h->mb.i_mb_stride + mb_x;
604 const int mb_8x8 = 2 * s8x8 * mb_y + 2 * mb_x;
605 const int mb_4x4 = 4 * s4x4 * mb_y + 4 * mb_x;
606 const int b_8x8_transform = h->mb.mb_transform_size[mb_xy];
607 const int i_qp = h->mb.qp[mb_xy];
608 int i_edge_end = (h->mb.type[mb_xy] == P_SKIP) ? 1 : 4;
609 uint8_t *pixy = h->fdec->plane[0] + 16*mb_y*stridey + 16*mb_x;
610 uint8_t *pixu = h->fdec->plane[1] + 8*mb_y*strideuv + 8*mb_x;
611 uint8_t *pixv = h->fdec->plane[2] + 8*mb_y*strideuv + 8*mb_x;
612 if( b_interlaced && (mb_y&1) )
619 x264_prefetch_fenc( h, h->fdec, mb_x, mb_y );
621 if( i_qp <= qp_thresh )
624 #define FILTER_DIR(intra, i_dir)\
627 i_qpn= h->mb.qp[mbn_xy];\
631 deblock_edge##intra( h, pixy + 4*i_edge, NULL,\
632 stride2y, bS, (i_qp+i_qpn+1) >> 1, 0,\
633 h->loopf.deblock_h_luma##intra );\
637 int i_qpc = (h->chroma_qp_table[i_qp] + h->chroma_qp_table[i_qpn] + 1) >> 1;\
638 deblock_edge##intra( h, pixu + 2*i_edge, pixv + 2*i_edge,\
639 stride2uv, bS, i_qpc, 1,\
640 h->loopf.deblock_h_chroma##intra );\
645 /* horizontal edge */\
646 deblock_edge##intra( h, pixy + 4*i_edge*stride2y, NULL,\
647 stride2y, bS, (i_qp+i_qpn+1) >> 1, 0,\
648 h->loopf.deblock_v_luma##intra );\
652 int i_qpc = (h->chroma_qp_table[i_qp] + h->chroma_qp_table[i_qpn] + 1) >> 1;\
653 deblock_edge##intra( h, pixu + 2*i_edge*stride2uv, pixv + 2*i_edge*stride2uv,\
654 stride2uv, bS, i_qpc, 1,\
655 h->loopf.deblock_v_chroma##intra );\
660 #define DEBLOCK_STRENGTH(i_dir)\
662 /* *** Get bS for each 4px for the current edge *** */\
663 if( IS_INTRA( h->mb.type[mb_xy] ) || IS_INTRA( h->mb.type[mbn_xy]) )\
664 *(uint32_t*)bS = 0x03030303;\
667 *(uint32_t*)bS = 0x00000000;\
668 for( i = 0; i < 4; i++ )\
670 int x = i_dir == 0 ? i_edge : i;\
671 int y = i_dir == 0 ? i : i_edge;\
672 int xn = i_dir == 0 ? (x - 1)&0x03 : x;\
673 int yn = i_dir == 0 ? y : (y - 1)&0x03;\
674 if( h->mb.non_zero_count[mb_xy][x+y*4] != 0 ||\
675 h->mb.non_zero_count[mbn_xy][xn+yn*4] != 0 )\
679 /* FIXME: A given frame may occupy more than one position in\
680 * the reference list. So we should compare the frame numbers,\
681 * not the indices in the ref list.\
682 * No harm yet, as we don't generate that case.*/\
683 int i8p= mb_8x8+(x>>1)+(y>>1)*s8x8;\
684 int i8q= mbn_8x8+(xn>>1)+(yn>>1)*s8x8;\
685 int i4p= mb_4x4+x+y*s4x4;\
686 int i4q= mbn_4x4+xn+yn*s4x4;\
687 for( l = 0; l < 1 + (h->sh.i_type == SLICE_TYPE_B); l++ )\
688 if( h->mb.ref[l][i8p] != h->mb.ref[l][i8q] ||\
689 abs( h->mb.mv[l][i4p][0] - h->mb.mv[l][i4q][0] ) >= 4 ||\
690 abs( h->mb.mv[l][i4p][1] - h->mb.mv[l][i4q][1] ) >= mvy_limit )\
700 /* i_dir == 0 -> vertical edge
701 * i_dir == 1 -> horizontal edge */
702 #define DEBLOCK_DIR(i_dir)\
704 int i_edge = (i_dir ? (mb_y <= b_interlaced) : (mb_x == 0));\
705 int i_qpn, i, l, mbn_xy, mbn_8x8, mbn_4x4;\
706 DECLARE_ALIGNED_4( uint8_t bS[4] ); /* filtering strength */\
708 i_edge+= b_8x8_transform;\
711 mbn_xy = i_dir == 0 ? mb_xy - 1 : mb_xy - h->mb.i_mb_stride;\
712 mbn_8x8 = i_dir == 0 ? mb_8x8 - 2 : mb_8x8 - 2 * s8x8;\
713 mbn_4x4 = i_dir == 0 ? mb_4x4 - 4 : mb_4x4 - 4 * s4x4;\
714 if( b_interlaced && i_dir == 1 )\
716 mbn_xy -= h->mb.i_mb_stride;\
717 mbn_8x8 -= 2 * s8x8;\
718 mbn_4x4 -= 4 * s4x4;\
720 else if( IS_INTRA( h->mb.type[mb_xy] ) || IS_INTRA( h->mb.type[mbn_xy]) )\
722 FILTER_DIR( _intra, i_dir );\
725 DEBLOCK_STRENGTH(i_dir);\
726 if( *(uint32_t*)bS )\
727 FILTER_DIR( , i_dir);\
729 i_edge += b_8x8_transform+1;\
734 for( ; i_edge < i_edge_end; i_edge+=b_8x8_transform+1 )\
736 DEBLOCK_STRENGTH(i_dir);\
737 if( *(uint32_t*)bS )\
738 FILTER_DIR( , i_dir);\
746 if( !h->pps->b_cabac && h->pps->b_transform_8x8_mode )
747 munge_cavlc_nnz( h, mb_y, h->mb.nnz_backup, restore_cavlc_nnz_row );
750 void x264_frame_deblock( x264_t *h )
753 for( mb_y = 0; mb_y < h->sps->i_mb_height; mb_y += 1 + h->sh.b_mbaff )
754 x264_frame_deblock_row( h, mb_y );
758 void x264_deblock_v_chroma_mmxext( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 );
759 void x264_deblock_h_chroma_mmxext( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 );
760 void x264_deblock_v_chroma_intra_mmxext( uint8_t *pix, int stride, int alpha, int beta );
761 void x264_deblock_h_chroma_intra_mmxext( uint8_t *pix, int stride, int alpha, int beta );
763 void x264_deblock_v_luma_sse2( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 );
764 void x264_deblock_h_luma_sse2( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 );
765 void x264_deblock_v_luma_intra_sse2( uint8_t *pix, int stride, int alpha, int beta );
766 void x264_deblock_h_luma_intra_sse2( uint8_t *pix, int stride, int alpha, int beta );
768 void x264_deblock_h_luma_mmxext( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 );
769 void x264_deblock_v8_luma_mmxext( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 );
770 void x264_deblock_h_luma_intra_mmxext( uint8_t *pix, int stride, int alpha, int beta );
771 void x264_deblock_v8_luma_intra_mmxext( uint8_t *pix, int stride, int alpha, int beta );
773 void x264_deblock_v_luma_mmxext( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 )
775 x264_deblock_v8_luma_mmxext( pix, stride, alpha, beta, tc0 );
776 x264_deblock_v8_luma_mmxext( pix+8, stride, alpha, beta, tc0+2 );
778 void x264_deblock_v_luma_intra_mmxext( uint8_t *pix, int stride, int alpha, int beta )
780 x264_deblock_v8_luma_intra_mmxext( pix, stride, alpha, beta );
781 x264_deblock_v8_luma_intra_mmxext( pix+8, stride, alpha, beta );
787 void x264_deblock_v_luma_altivec( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 );
788 void x264_deblock_h_luma_altivec( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 );
791 void x264_deblock_init( int cpu, x264_deblock_function_t *pf )
793 pf->deblock_v_luma = deblock_v_luma_c;
794 pf->deblock_h_luma = deblock_h_luma_c;
795 pf->deblock_v_chroma = deblock_v_chroma_c;
796 pf->deblock_h_chroma = deblock_h_chroma_c;
797 pf->deblock_v_luma_intra = deblock_v_luma_intra_c;
798 pf->deblock_h_luma_intra = deblock_h_luma_intra_c;
799 pf->deblock_v_chroma_intra = deblock_v_chroma_intra_c;
800 pf->deblock_h_chroma_intra = deblock_h_chroma_intra_c;
803 if( cpu&X264_CPU_MMXEXT )
805 pf->deblock_v_chroma = x264_deblock_v_chroma_mmxext;
806 pf->deblock_h_chroma = x264_deblock_h_chroma_mmxext;
807 pf->deblock_v_chroma_intra = x264_deblock_v_chroma_intra_mmxext;
808 pf->deblock_h_chroma_intra = x264_deblock_h_chroma_intra_mmxext;
810 pf->deblock_v_luma = x264_deblock_v_luma_mmxext;
811 pf->deblock_h_luma = x264_deblock_h_luma_mmxext;
812 pf->deblock_v_luma_intra = x264_deblock_v_luma_intra_mmxext;
813 pf->deblock_h_luma_intra = x264_deblock_h_luma_intra_mmxext;
815 if( (cpu&X264_CPU_SSE2) && !(cpu&X264_CPU_STACK_MOD4) )
817 pf->deblock_v_luma = x264_deblock_v_luma_sse2;
818 pf->deblock_h_luma = x264_deblock_h_luma_sse2;
819 pf->deblock_v_luma_intra = x264_deblock_v_luma_intra_sse2;
820 pf->deblock_h_luma_intra = x264_deblock_h_luma_intra_sse2;
826 if( cpu&X264_CPU_ALTIVEC )
828 pf->deblock_v_luma = x264_deblock_v_luma_altivec;
829 pf->deblock_h_luma = x264_deblock_h_luma_altivec;
836 void x264_frame_cond_broadcast( x264_frame_t *frame, int i_lines_completed )
838 x264_pthread_mutex_lock( &frame->mutex );
839 frame->i_lines_completed = i_lines_completed;
840 x264_pthread_cond_broadcast( &frame->cv );
841 x264_pthread_mutex_unlock( &frame->mutex );
844 void x264_frame_cond_wait( x264_frame_t *frame, int i_lines_completed )
846 x264_pthread_mutex_lock( &frame->mutex );
847 while( frame->i_lines_completed < i_lines_completed )
848 x264_pthread_cond_wait( &frame->cv, &frame->mutex );
849 x264_pthread_mutex_unlock( &frame->mutex );
854 void x264_frame_push( x264_frame_t **list, x264_frame_t *frame )
857 while( list[i] ) i++;
861 x264_frame_t *x264_frame_pop( x264_frame_t **list )
866 while( list[i+1] ) i++;
872 void x264_frame_unshift( x264_frame_t **list, x264_frame_t *frame )
875 while( list[i] ) i++;
881 x264_frame_t *x264_frame_shift( x264_frame_t **list )
883 x264_frame_t *frame = list[0];
885 for( i = 0; list[i]; i++ )
891 void x264_frame_push_unused( x264_t *h, x264_frame_t *frame )
893 assert( frame->i_reference_count > 0 );
894 frame->i_reference_count--;
895 if( frame->i_reference_count == 0 )
896 x264_frame_push( h->frames.unused, frame );
897 assert( h->frames.unused[ sizeof(h->frames.unused) / sizeof(*h->frames.unused) - 1 ] == NULL );
900 x264_frame_t *x264_frame_pop_unused( x264_t *h )
903 if( h->frames.unused[0] )
904 frame = x264_frame_pop( h->frames.unused );
906 frame = x264_frame_new( h );
907 assert( frame->i_reference_count == 0 );
908 frame->i_reference_count = 1;
912 void x264_frame_sort( x264_frame_t **list, int b_dts )
917 for( i = 0; list[i+1]; i++ )
919 int dtype = list[i]->i_type - list[i+1]->i_type;
920 int dtime = list[i]->i_frame - list[i+1]->i_frame;
921 int swap = b_dts ? dtype > 0 || ( dtype == 0 && dtime > 0 )
925 XCHG( x264_frame_t*, list[i], list[i+1] );