1 /*****************************************************************************
2 * frame.c: h264 encoder library
3 *****************************************************************************
4 * Copyright (C) 2003-2008 x264 project
6 * Authors: Laurent Aimar <fenrir@via.ecp.fr>
7 * Loren Merritt <lorenm@u.washington.edu>
8 * Fiona Glaser <fiona@x264.com>
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
23 *****************************************************************************/
27 #define ALIGN(x,a) (((x)+((a)-1))&~((a)-1))
29 x264_frame_t *x264_frame_new( x264_t *h )
31 x264_frame_t *frame = x264_malloc( sizeof(x264_frame_t) );
34 int i_mb_count = h->mb.i_mb_count;
35 int i_stride, i_width, i_lines;
36 int i_padv = PADV << h->param.b_interlaced;
38 int align = h->param.cpu&X264_CPU_CACHELINE_64 ? 64 : h->param.cpu&X264_CPU_CACHELINE_32 ? 32 : 16;
40 if( !frame ) return NULL;
42 memset( frame, 0, sizeof(x264_frame_t) );
44 /* allocate frame data (+64 for extra data for me) */
45 i_width = ALIGN( h->param.i_width, 16 );
46 i_stride = ALIGN( i_width + 2*PADH, align );
47 i_lines = ALIGN( h->param.i_height, 16<<h->param.b_interlaced );
50 for( i = 0; i < 3; i++ )
52 frame->i_stride[i] = i_stride >> !!i;
53 frame->i_width[i] = i_width >> !!i;
54 frame->i_lines[i] = i_lines >> !!i;
57 luma_plane_size = (frame->i_stride[0] * ( frame->i_lines[0] + 2*i_padv ));
58 for( i = 1; i < 3; i++ )
60 CHECKED_MALLOC( frame->buffer[i], luma_plane_size/4 );
61 frame->plane[i] = frame->buffer[i] + (frame->i_stride[i] * i_padv + PADH)/2;
63 /* all 4 luma planes allocated together, since the cacheline split code
64 * requires them to be in-phase wrt cacheline alignment. */
65 if( h->param.analyse.i_subpel_refine )
67 CHECKED_MALLOC( frame->buffer[0], 4*luma_plane_size);
68 for( i = 0; i < 4; i++ )
69 frame->filtered[i] = frame->buffer[0] + i*luma_plane_size + frame->i_stride[0] * i_padv + PADH;
70 frame->plane[0] = frame->filtered[0];
74 CHECKED_MALLOC( frame->buffer[0], luma_plane_size);
75 frame->plane[0] = frame->buffer[0] + frame->i_stride[0] * i_padv + PADH;
78 if( h->frames.b_have_lowres )
80 frame->i_width_lowres = frame->i_width[0]/2;
81 frame->i_stride_lowres = ALIGN( frame->i_width_lowres + 2*PADH, align );
82 frame->i_lines_lowres = frame->i_lines[0]/2;
84 luma_plane_size = frame->i_stride_lowres * ( frame->i_lines[0]/2 + 2*i_padv );
86 CHECKED_MALLOC( frame->buffer_lowres[0], 4 * luma_plane_size );
87 for( i = 0; i < 4; i++ )
88 frame->lowres[i] = frame->buffer_lowres[0] + (frame->i_stride_lowres * i_padv + PADH) + i * luma_plane_size;
90 for( j = 0; j <= !!h->param.i_bframe; j++ )
91 for( i = 0; i <= h->param.i_bframe; i++ )
93 CHECKED_MALLOC( frame->lowres_mvs[j][i], 2*h->mb.i_mb_count*sizeof(int16_t) );
94 memset( frame->lowres_mvs[j][i], 0, 2*h->mb.i_mb_count*sizeof(int16_t) );
95 CHECKED_MALLOC( frame->lowres_mv_costs[j][i], h->mb.i_mb_count*sizeof(int) );
99 if( h->param.analyse.i_me_method >= X264_ME_ESA )
101 CHECKED_MALLOC( frame->buffer[3],
102 frame->i_stride[0] * (frame->i_lines[0] + 2*i_padv) * sizeof(uint16_t) << h->frames.b_have_sub8x8_esa );
103 frame->integral = (uint16_t*)frame->buffer[3] + frame->i_stride[0] * i_padv + PADH;
107 frame->i_type = X264_TYPE_AUTO;
108 frame->i_qpplus1 = 0;
111 frame->i_frame_num = -1;
112 frame->i_lines_completed = -1;
114 CHECKED_MALLOC( frame->mb_type, i_mb_count * sizeof(int8_t));
115 CHECKED_MALLOC( frame->mv[0], 2*16 * i_mb_count * sizeof(int16_t) );
116 CHECKED_MALLOC( frame->ref[0], 4 * i_mb_count * sizeof(int8_t) );
117 CHECKED_MALLOC( frame->i_intra_cost, i_mb_count * sizeof(uint16_t) );
118 if( h->param.i_bframe )
120 CHECKED_MALLOC( frame->mv[1], 2*16 * i_mb_count * sizeof(int16_t) );
121 CHECKED_MALLOC( frame->ref[1], 4 * i_mb_count * sizeof(int8_t) );
126 frame->ref[1] = NULL;
129 CHECKED_MALLOC( frame->i_row_bits, i_lines/16 * sizeof(int) );
130 CHECKED_MALLOC( frame->i_row_qp, i_lines/16 * sizeof(int) );
131 for( i = 0; i < h->param.i_bframe + 2; i++ )
132 for( j = 0; j < h->param.i_bframe + 2; j++ )
133 CHECKED_MALLOC( frame->i_row_satds[i][j], i_lines/16 * sizeof(int) );
135 if( h->param.rc.i_aq_mode )
137 CHECKED_MALLOC( frame->f_qp_offset, h->mb.i_mb_count * sizeof(float) );
138 if( h->frames.b_have_lowres )
139 CHECKED_MALLOC( frame->i_inv_qscale_factor, h->mb.i_mb_count * sizeof(uint16_t) );
142 x264_pthread_mutex_init( &frame->mutex, NULL );
143 x264_pthread_cond_init( &frame->cv, NULL );
148 x264_frame_delete( frame );
152 void x264_frame_delete( x264_frame_t *frame )
155 for( i = 0; i < 4; i++ )
156 x264_free( frame->buffer[i] );
157 for( i = 0; i < 4; i++ )
158 x264_free( frame->buffer_lowres[i] );
159 for( i = 0; i < X264_BFRAME_MAX+2; i++ )
160 for( j = 0; j < X264_BFRAME_MAX+2; j++ )
161 x264_free( frame->i_row_satds[i][j] );
162 for( j = 0; j < 2; j++ )
163 for( i = 0; i <= X264_BFRAME_MAX; i++ )
165 x264_free( frame->lowres_mvs[j][i] );
166 x264_free( frame->lowres_mv_costs[j][i] );
168 x264_free( frame->f_qp_offset );
169 x264_free( frame->i_inv_qscale_factor );
170 x264_free( frame->i_intra_cost );
171 x264_free( frame->i_row_bits );
172 x264_free( frame->i_row_qp );
173 x264_free( frame->mb_type );
174 x264_free( frame->mv[0] );
175 x264_free( frame->mv[1] );
176 x264_free( frame->ref[0] );
177 x264_free( frame->ref[1] );
178 x264_pthread_mutex_destroy( &frame->mutex );
179 x264_pthread_cond_destroy( &frame->cv );
183 int x264_frame_copy_picture( x264_t *h, x264_frame_t *dst, x264_picture_t *src )
185 int i_csp = src->img.i_csp & X264_CSP_MASK;
187 if( i_csp != X264_CSP_I420 && i_csp != X264_CSP_YV12 )
189 x264_log( h, X264_LOG_ERROR, "Arg invalid CSP\n" );
193 dst->i_type = src->i_type;
194 dst->i_qpplus1 = src->i_qpplus1;
195 dst->i_pts = src->i_pts;
199 int s = (i_csp == X264_CSP_YV12 && i) ? i^3 : i;
200 uint8_t *plane = src->img.plane[s];
201 int stride = src->img.i_stride[s];
202 int width = h->param.i_width >> !!i;
203 int height = h->param.i_height >> !!i;
204 if( src->img.i_csp & X264_CSP_VFLIP )
206 plane += (height-1)*stride;
209 h->mc.plane_copy( dst->plane[i], dst->i_stride[i], plane, stride, width, height );
216 static void plane_expand_border( uint8_t *pix, int i_stride, int i_width, int i_height, int i_padh, int i_padv, int b_pad_top, int b_pad_bottom )
218 #define PPIXEL(x, y) ( pix + (x) + (y)*i_stride )
220 for( y = 0; y < i_height; y++ )
223 memset( PPIXEL(-i_padh, y), PPIXEL(0, y)[0], i_padh );
225 memset( PPIXEL(i_width, y), PPIXEL(i_width-1, y)[0], i_padh );
229 for( y = 0; y < i_padv; y++ )
230 memcpy( PPIXEL(-i_padh, -y-1), PPIXEL(-i_padh, 0), i_width+2*i_padh );
233 for( y = 0; y < i_padv; y++ )
234 memcpy( PPIXEL(-i_padh, i_height+y), PPIXEL(-i_padh, i_height-1), i_width+2*i_padh );
238 void x264_frame_expand_border( x264_t *h, x264_frame_t *frame, int mb_y, int b_end )
242 if( mb_y & h->sh.b_mbaff )
244 for( i = 0; i < frame->i_plane; i++ )
246 int stride = frame->i_stride[i];
247 int width = 16*h->sps->i_mb_width >> !!i;
248 int height = (b_end ? 16*(h->sps->i_mb_height - mb_y) >> h->sh.b_mbaff : 16) >> !!i;
249 int padh = PADH >> !!i;
250 int padv = PADV >> !!i;
251 // buffer: 2 chroma, 3 luma (rounded to 4) because deblocking goes beyond the top of the mb
252 uint8_t *pix = frame->plane[i] + X264_MAX(0, (16*mb_y-4)*stride >> !!i);
253 if( b_end && !b_start )
254 height += 4 >> (!!i + h->sh.b_mbaff);
257 plane_expand_border( pix, stride*2, width, height, padh, padv, b_start, b_end );
258 plane_expand_border( pix+stride, stride*2, width, height, padh, padv, b_start, b_end );
262 plane_expand_border( pix, stride, width, height, padh, padv, b_start, b_end );
267 void x264_frame_expand_border_filtered( x264_t *h, x264_frame_t *frame, int mb_y, int b_end )
269 /* during filtering, 8 extra pixels were filtered on each edge,
270 * but up to 3 of the horizontal ones may be wrong.
271 we want to expand border from the last filtered pixel */
273 int stride = frame->i_stride[0];
274 int width = 16*h->sps->i_mb_width + 8;
275 int height = b_end ? (16*(h->sps->i_mb_height - mb_y) >> h->sh.b_mbaff) + 16 : 16;
279 for( i = 1; i < 4; i++ )
281 // buffer: 8 luma, to match the hpel filter
282 uint8_t *pix = frame->filtered[i] + (16*mb_y - (8 << h->sh.b_mbaff)) * stride - 4;
285 plane_expand_border( pix, stride*2, width, height, padh, padv, b_start, b_end );
286 plane_expand_border( pix+stride, stride*2, width, height, padh, padv, b_start, b_end );
290 plane_expand_border( pix, stride, width, height, padh, padv, b_start, b_end );
295 void x264_frame_expand_border_lowres( x264_frame_t *frame )
298 for( i = 0; i < 4; i++ )
299 plane_expand_border( frame->lowres[i], frame->i_stride_lowres, frame->i_stride_lowres - 2*PADH, frame->i_lines_lowres, PADH, PADV, 1, 1 );
302 void x264_frame_expand_border_mod16( x264_t *h, x264_frame_t *frame )
305 for( i = 0; i < frame->i_plane; i++ )
307 int i_subsample = i ? 1 : 0;
308 int i_width = h->param.i_width >> i_subsample;
309 int i_height = h->param.i_height >> i_subsample;
310 int i_padx = ( h->sps->i_mb_width * 16 - h->param.i_width ) >> i_subsample;
311 int i_pady = ( h->sps->i_mb_height * 16 - h->param.i_height ) >> i_subsample;
315 for( y = 0; y < i_height; y++ )
316 memset( &frame->plane[i][y*frame->i_stride[i] + i_width],
317 frame->plane[i][y*frame->i_stride[i] + i_width - 1],
322 //FIXME interlace? or just let it pad using the wrong field
323 for( y = i_height; y < i_height + i_pady; y++ )
324 memcpy( &frame->plane[i][y*frame->i_stride[i]],
325 &frame->plane[i][(i_height-1)*frame->i_stride[i]],
332 /* cavlc + 8x8 transform stores nnz per 16 coeffs for the purpose of
333 * entropy coding, but per 64 coeffs for the purpose of deblocking */
334 static void munge_cavlc_nnz_row( x264_t *h, int mb_y, uint8_t (*buf)[16] )
336 uint32_t (*src)[6] = (uint32_t(*)[6])h->mb.non_zero_count + mb_y * h->sps->i_mb_width;
337 int8_t *transform = h->mb.mb_transform_size + mb_y * h->sps->i_mb_width;
339 for( x=0; x<h->sps->i_mb_width; x++ )
341 memcpy( buf+x, src+x, 16 );
344 nnz = src[x][0] | src[x][1];
345 src[x][0] = src[x][1] = ((uint16_t)nnz ? 0x0101 : 0) + (nnz>>16 ? 0x01010000 : 0);
346 nnz = src[x][2] | src[x][3];
347 src[x][2] = src[x][3] = ((uint16_t)nnz ? 0x0101 : 0) + (nnz>>16 ? 0x01010000 : 0);
352 static void restore_cavlc_nnz_row( x264_t *h, int mb_y, uint8_t (*buf)[16] )
354 uint8_t (*dst)[24] = h->mb.non_zero_count + mb_y * h->sps->i_mb_width;
356 for( x=0; x<h->sps->i_mb_width; x++ )
357 memcpy( dst+x, buf+x, 16 );
360 static void munge_cavlc_nnz( x264_t *h, int mb_y, uint8_t (*buf)[16], void (*func)(x264_t*, int, uint8_t (*)[16]) )
362 func( h, mb_y, buf );
364 func( h, mb_y-1, buf + h->sps->i_mb_width );
367 func( h, mb_y+1, buf + h->sps->i_mb_width * 2 );
369 func( h, mb_y-2, buf + h->sps->i_mb_width * 3 );
374 /* Deblocking filter */
375 static const uint8_t i_alpha_table[52+12*2] =
377 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
378 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
379 0, 0, 0, 0, 0, 0, 4, 4, 5, 6,
380 7, 8, 9, 10, 12, 13, 15, 17, 20, 22,
381 25, 28, 32, 36, 40, 45, 50, 56, 63, 71,
382 80, 90,101,113,127,144,162,182,203,226,
384 255,255,255,255,255,255,255,255,255,255,255,255,
386 static const uint8_t i_beta_table[52+12*2] =
388 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
389 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
390 0, 0, 0, 0, 0, 0, 2, 2, 2, 3,
391 3, 3, 3, 4, 4, 4, 6, 6, 7, 7,
392 8, 8, 9, 9, 10, 10, 11, 11, 12, 12,
393 13, 13, 14, 14, 15, 15, 16, 16, 17, 17,
395 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18,
397 static const int8_t i_tc0_table[52+12*2][4] =
399 {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 },
400 {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 },
401 {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 },
402 {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 },
403 {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 1 },
404 {-1, 0, 0, 1 }, {-1, 0, 0, 1 }, {-1, 0, 0, 1 }, {-1, 0, 1, 1 }, {-1, 0, 1, 1 }, {-1, 1, 1, 1 },
405 {-1, 1, 1, 1 }, {-1, 1, 1, 1 }, {-1, 1, 1, 1 }, {-1, 1, 1, 2 }, {-1, 1, 1, 2 }, {-1, 1, 1, 2 },
406 {-1, 1, 1, 2 }, {-1, 1, 2, 3 }, {-1, 1, 2, 3 }, {-1, 2, 2, 3 }, {-1, 2, 2, 4 }, {-1, 2, 3, 4 },
407 {-1, 2, 3, 4 }, {-1, 3, 3, 5 }, {-1, 3, 4, 6 }, {-1, 3, 4, 6 }, {-1, 4, 5, 7 }, {-1, 4, 5, 8 },
408 {-1, 4, 6, 9 }, {-1, 5, 7,10 }, {-1, 6, 8,11 }, {-1, 6, 8,13 }, {-1, 7,10,14 }, {-1, 8,11,16 },
409 {-1, 9,12,18 }, {-1,10,13,20 }, {-1,11,15,23 }, {-1,13,17,25 },
410 {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 },
411 {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 },
413 #define alpha_table(x) i_alpha_table[(x)+12]
414 #define beta_table(x) i_beta_table[(x)+12]
415 #define tc0_table(x) i_tc0_table[(x)+12]
418 static inline void deblock_luma_c( uint8_t *pix, int xstride, int ystride, int alpha, int beta, int8_t *tc0 )
421 for( i = 0; i < 4; i++ )
428 for( d = 0; d < 4; d++ )
430 const int p2 = pix[-3*xstride];
431 const int p1 = pix[-2*xstride];
432 const int p0 = pix[-1*xstride];
433 const int q0 = pix[ 0*xstride];
434 const int q1 = pix[ 1*xstride];
435 const int q2 = pix[ 2*xstride];
437 if( abs( p0 - q0 ) < alpha && abs( p1 - p0 ) < beta && abs( q1 - q0 ) < beta )
441 if( abs( p2 - p0 ) < beta )
443 pix[-2*xstride] = p1 + x264_clip3( (( p2 + ((p0 + q0 + 1) >> 1)) >> 1) - p1, -tc0[i], tc0[i] );
446 if( abs( q2 - q0 ) < beta )
448 pix[ 1*xstride] = q1 + x264_clip3( (( q2 + ((p0 + q0 + 1) >> 1)) >> 1) - q1, -tc0[i], tc0[i] );
452 delta = x264_clip3( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
453 pix[-1*xstride] = x264_clip_uint8( p0 + delta ); /* p0' */
454 pix[ 0*xstride] = x264_clip_uint8( q0 - delta ); /* q0' */
460 static void deblock_v_luma_c( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 )
462 deblock_luma_c( pix, stride, 1, alpha, beta, tc0 );
464 static void deblock_h_luma_c( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 )
466 deblock_luma_c( pix, 1, stride, alpha, beta, tc0 );
469 static inline void deblock_chroma_c( uint8_t *pix, int xstride, int ystride, int alpha, int beta, int8_t *tc0 )
472 for( i = 0; i < 4; i++ )
474 const int tc = tc0[i];
480 for( d = 0; d < 2; d++ )
482 const int p1 = pix[-2*xstride];
483 const int p0 = pix[-1*xstride];
484 const int q0 = pix[ 0*xstride];
485 const int q1 = pix[ 1*xstride];
487 if( abs( p0 - q0 ) < alpha && abs( p1 - p0 ) < beta && abs( q1 - q0 ) < beta )
489 int delta = x264_clip3( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
490 pix[-1*xstride] = x264_clip_uint8( p0 + delta ); /* p0' */
491 pix[ 0*xstride] = x264_clip_uint8( q0 - delta ); /* q0' */
497 static void deblock_v_chroma_c( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 )
499 deblock_chroma_c( pix, stride, 1, alpha, beta, tc0 );
501 static void deblock_h_chroma_c( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 )
503 deblock_chroma_c( pix, 1, stride, alpha, beta, tc0 );
506 static inline void deblock_luma_intra_c( uint8_t *pix, int xstride, int ystride, int alpha, int beta )
509 for( d = 0; d < 16; d++ )
511 const int p2 = pix[-3*xstride];
512 const int p1 = pix[-2*xstride];
513 const int p0 = pix[-1*xstride];
514 const int q0 = pix[ 0*xstride];
515 const int q1 = pix[ 1*xstride];
516 const int q2 = pix[ 2*xstride];
518 if( abs( p0 - q0 ) < alpha && abs( p1 - p0 ) < beta && abs( q1 - q0 ) < beta )
520 if(abs( p0 - q0 ) < ((alpha >> 2) + 2) )
522 if( abs( p2 - p0 ) < beta ) /* p0', p1', p2' */
524 const int p3 = pix[-4*xstride];
525 pix[-1*xstride] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
526 pix[-2*xstride] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
527 pix[-3*xstride] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
530 pix[-1*xstride] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
531 if( abs( q2 - q0 ) < beta ) /* q0', q1', q2' */
533 const int q3 = pix[3*xstride];
534 pix[0*xstride] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
535 pix[1*xstride] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
536 pix[2*xstride] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
539 pix[0*xstride] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
543 pix[-1*xstride] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
544 pix[ 0*xstride] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
550 static void deblock_v_luma_intra_c( uint8_t *pix, int stride, int alpha, int beta )
552 deblock_luma_intra_c( pix, stride, 1, alpha, beta );
554 static void deblock_h_luma_intra_c( uint8_t *pix, int stride, int alpha, int beta )
556 deblock_luma_intra_c( pix, 1, stride, alpha, beta );
559 static inline void deblock_chroma_intra_c( uint8_t *pix, int xstride, int ystride, int alpha, int beta )
562 for( d = 0; d < 8; d++ )
564 const int p1 = pix[-2*xstride];
565 const int p0 = pix[-1*xstride];
566 const int q0 = pix[ 0*xstride];
567 const int q1 = pix[ 1*xstride];
569 if( abs( p0 - q0 ) < alpha && abs( p1 - p0 ) < beta && abs( q1 - q0 ) < beta )
571 pix[-1*xstride] = (2*p1 + p0 + q1 + 2) >> 2; /* p0' */
572 pix[ 0*xstride] = (2*q1 + q0 + p1 + 2) >> 2; /* q0' */
577 static void deblock_v_chroma_intra_c( uint8_t *pix, int stride, int alpha, int beta )
579 deblock_chroma_intra_c( pix, stride, 1, alpha, beta );
581 static void deblock_h_chroma_intra_c( uint8_t *pix, int stride, int alpha, int beta )
583 deblock_chroma_intra_c( pix, 1, stride, alpha, beta );
586 static inline void deblock_edge( x264_t *h, uint8_t *pix1, uint8_t *pix2, int i_stride, uint8_t bS[4], int i_qp, int b_chroma, x264_deblock_inter_t pf_inter )
588 const int index_a = i_qp + h->sh.i_alpha_c0_offset;
589 const int alpha = alpha_table(index_a);
590 const int beta = beta_table(i_qp + h->sh.i_beta_offset);
593 if( !alpha || !beta )
596 tc[0] = tc0_table(index_a)[bS[0]] + b_chroma;
597 tc[1] = tc0_table(index_a)[bS[1]] + b_chroma;
598 tc[2] = tc0_table(index_a)[bS[2]] + b_chroma;
599 tc[3] = tc0_table(index_a)[bS[3]] + b_chroma;
601 pf_inter( pix1, i_stride, alpha, beta, tc );
603 pf_inter( pix2, i_stride, alpha, beta, tc );
606 static inline void deblock_edge_intra( x264_t *h, uint8_t *pix1, uint8_t *pix2, int i_stride, uint8_t bS[4], int i_qp, int b_chroma, x264_deblock_intra_t pf_intra )
608 const int alpha = alpha_table(i_qp + h->sh.i_alpha_c0_offset);
609 const int beta = beta_table(i_qp + h->sh.i_beta_offset);
611 if( !alpha || !beta )
614 pf_intra( pix1, i_stride, alpha, beta );
616 pf_intra( pix2, i_stride, alpha, beta );
619 void x264_frame_deblock_row( x264_t *h, int mb_y )
621 const int s8x8 = 2 * h->mb.i_mb_stride;
622 const int s4x4 = 4 * h->mb.i_mb_stride;
623 const int b_interlaced = h->sh.b_mbaff;
624 const int mvy_limit = 4 >> b_interlaced;
625 const int qp_thresh = 15 - X264_MIN(h->sh.i_alpha_c0_offset, h->sh.i_beta_offset) - X264_MAX(0, h->param.analyse.i_chroma_qp_offset);
626 const int no_sub8x8 = !(h->param.analyse.inter & X264_ANALYSE_PSUB8x8);
628 int stridey = h->fdec->i_stride[0];
629 int stride2y = stridey << b_interlaced;
630 int strideuv = h->fdec->i_stride[1];
631 int stride2uv = strideuv << b_interlaced;
633 if( !h->pps->b_cabac && h->pps->b_transform_8x8_mode )
634 munge_cavlc_nnz( h, mb_y, h->mb.nnz_backup, munge_cavlc_nnz_row );
636 for( mb_x = 0; mb_x < h->sps->i_mb_width; mb_x += (~b_interlaced | mb_y)&1, mb_y ^= b_interlaced )
638 const int mb_xy = mb_y * h->mb.i_mb_stride + mb_x;
639 const int mb_8x8 = 2 * s8x8 * mb_y + 2 * mb_x;
640 const int mb_4x4 = 4 * s4x4 * mb_y + 4 * mb_x;
641 const int b_8x8_transform = h->mb.mb_transform_size[mb_xy];
642 const int i_qp = h->mb.qp[mb_xy];
643 int i_edge_end = (h->mb.type[mb_xy] == P_SKIP) ? 1 : 4;
644 uint8_t *pixy = h->fdec->plane[0] + 16*mb_y*stridey + 16*mb_x;
645 uint8_t *pixu = h->fdec->plane[1] + 8*mb_y*strideuv + 8*mb_x;
646 uint8_t *pixv = h->fdec->plane[2] + 8*mb_y*strideuv + 8*mb_x;
647 if( b_interlaced && (mb_y&1) )
654 x264_prefetch_fenc( h, h->fdec, mb_x, mb_y );
656 if( i_qp <= qp_thresh )
659 #define FILTER_DIR(intra, i_dir)\
662 i_qpn= h->mb.qp[mbn_xy];\
666 deblock_edge##intra( h, pixy + 4*i_edge, NULL,\
667 stride2y, bS, (i_qp+i_qpn+1) >> 1, 0,\
668 h->loopf.deblock_h_luma##intra );\
672 int i_qpc = (h->chroma_qp_table[i_qp] + h->chroma_qp_table[i_qpn] + 1) >> 1;\
673 deblock_edge##intra( h, pixu + 2*i_edge, pixv + 2*i_edge,\
674 stride2uv, bS, i_qpc, 1,\
675 h->loopf.deblock_h_chroma##intra );\
680 /* horizontal edge */\
681 deblock_edge##intra( h, pixy + 4*i_edge*stride2y, NULL,\
682 stride2y, bS, (i_qp+i_qpn+1) >> 1, 0,\
683 h->loopf.deblock_v_luma##intra );\
687 int i_qpc = (h->chroma_qp_table[i_qp] + h->chroma_qp_table[i_qpn] + 1) >> 1;\
688 deblock_edge##intra( h, pixu + 2*i_edge*stride2uv, pixv + 2*i_edge*stride2uv,\
689 stride2uv, bS, i_qpc, 1,\
690 h->loopf.deblock_v_chroma##intra );\
695 #define DEBLOCK_STRENGTH(i_dir)\
697 /* *** Get bS for each 4px for the current edge *** */\
698 if( IS_INTRA( h->mb.type[mb_xy] ) || IS_INTRA( h->mb.type[mbn_xy]) )\
699 *(uint32_t*)bS = 0x03030303;\
702 *(uint32_t*)bS = 0x00000000;\
703 for( i = 0; i < 4; i++ )\
705 int x = i_dir == 0 ? i_edge : i;\
706 int y = i_dir == 0 ? i : i_edge;\
707 int xn = i_dir == 0 ? (x - 1)&0x03 : x;\
708 int yn = i_dir == 0 ? y : (y - 1)&0x03;\
709 if( h->mb.non_zero_count[mb_xy][x+y*4] != 0 ||\
710 h->mb.non_zero_count[mbn_xy][xn+yn*4] != 0 )\
712 else if(!(i_edge&no_sub8x8))\
714 if((i&no_sub8x8) && bS[i-1] != 2)\
718 /* FIXME: A given frame may occupy more than one position in\
719 * the reference list. So we should compare the frame numbers,\
720 * not the indices in the ref list.\
721 * No harm yet, as we don't generate that case.*/\
722 int i8p= mb_8x8+(x>>1)+(y>>1)*s8x8;\
723 int i8q= mbn_8x8+(xn>>1)+(yn>>1)*s8x8;\
724 int i4p= mb_4x4+x+y*s4x4;\
725 int i4q= mbn_4x4+xn+yn*s4x4;\
726 if((h->mb.ref[0][i8p] != h->mb.ref[0][i8q] ||\
727 abs( h->mb.mv[0][i4p][0] - h->mb.mv[0][i4q][0] ) >= 4 ||\
728 abs( h->mb.mv[0][i4p][1] - h->mb.mv[0][i4q][1] ) >= mvy_limit ) ||\
729 (h->sh.i_type == SLICE_TYPE_B &&\
730 (h->mb.ref[1][i8p] != h->mb.ref[1][i8q] ||\
731 abs( h->mb.mv[1][i4p][0] - h->mb.mv[1][i4q][0] ) >= 4 ||\
732 abs( h->mb.mv[1][i4p][1] - h->mb.mv[1][i4q][1] ) >= mvy_limit )))\
742 /* i_dir == 0 -> vertical edge
743 * i_dir == 1 -> horizontal edge */
744 #define DEBLOCK_DIR(i_dir)\
746 int i_edge = (i_dir ? (mb_y <= b_interlaced) : (mb_x == 0));\
747 int i_qpn, i, mbn_xy, mbn_8x8, mbn_4x4;\
748 DECLARE_ALIGNED_4( uint8_t bS[4] ); /* filtering strength */\
750 i_edge+= b_8x8_transform;\
753 mbn_xy = i_dir == 0 ? mb_xy - 1 : mb_xy - h->mb.i_mb_stride;\
754 mbn_8x8 = i_dir == 0 ? mb_8x8 - 2 : mb_8x8 - 2 * s8x8;\
755 mbn_4x4 = i_dir == 0 ? mb_4x4 - 4 : mb_4x4 - 4 * s4x4;\
756 if( b_interlaced && i_dir == 1 )\
758 mbn_xy -= h->mb.i_mb_stride;\
759 mbn_8x8 -= 2 * s8x8;\
760 mbn_4x4 -= 4 * s4x4;\
762 else if( IS_INTRA( h->mb.type[mb_xy] ) || IS_INTRA( h->mb.type[mbn_xy]) )\
764 FILTER_DIR( _intra, i_dir );\
767 DEBLOCK_STRENGTH(i_dir);\
768 if( *(uint32_t*)bS )\
769 FILTER_DIR( , i_dir);\
771 i_edge += b_8x8_transform+1;\
776 for( ; i_edge < i_edge_end; i_edge+=b_8x8_transform+1 )\
778 DEBLOCK_STRENGTH(i_dir);\
779 if( *(uint32_t*)bS )\
780 FILTER_DIR( , i_dir);\
788 if( !h->pps->b_cabac && h->pps->b_transform_8x8_mode )
789 munge_cavlc_nnz( h, mb_y, h->mb.nnz_backup, restore_cavlc_nnz_row );
792 void x264_frame_deblock( x264_t *h )
795 for( mb_y = 0; mb_y < h->sps->i_mb_height; mb_y += 1 + h->sh.b_mbaff )
796 x264_frame_deblock_row( h, mb_y );
800 void x264_deblock_v_chroma_mmxext( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 );
801 void x264_deblock_h_chroma_mmxext( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 );
802 void x264_deblock_v_chroma_intra_mmxext( uint8_t *pix, int stride, int alpha, int beta );
803 void x264_deblock_h_chroma_intra_mmxext( uint8_t *pix, int stride, int alpha, int beta );
805 void x264_deblock_v_luma_sse2( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 );
806 void x264_deblock_h_luma_sse2( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 );
807 void x264_deblock_v_luma_intra_sse2( uint8_t *pix, int stride, int alpha, int beta );
808 void x264_deblock_h_luma_intra_sse2( uint8_t *pix, int stride, int alpha, int beta );
810 void x264_deblock_h_luma_mmxext( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 );
811 void x264_deblock_v8_luma_mmxext( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 );
812 void x264_deblock_h_luma_intra_mmxext( uint8_t *pix, int stride, int alpha, int beta );
813 void x264_deblock_v8_luma_intra_mmxext( uint8_t *pix, int stride, int alpha, int beta );
815 static void x264_deblock_v_luma_mmxext( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 )
817 x264_deblock_v8_luma_mmxext( pix, stride, alpha, beta, tc0 );
818 x264_deblock_v8_luma_mmxext( pix+8, stride, alpha, beta, tc0+2 );
820 static void x264_deblock_v_luma_intra_mmxext( uint8_t *pix, int stride, int alpha, int beta )
822 x264_deblock_v8_luma_intra_mmxext( pix, stride, alpha, beta );
823 x264_deblock_v8_luma_intra_mmxext( pix+8, stride, alpha, beta );
829 void x264_deblock_v_luma_altivec( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 );
830 void x264_deblock_h_luma_altivec( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 );
833 void x264_deblock_init( int cpu, x264_deblock_function_t *pf )
835 pf->deblock_v_luma = deblock_v_luma_c;
836 pf->deblock_h_luma = deblock_h_luma_c;
837 pf->deblock_v_chroma = deblock_v_chroma_c;
838 pf->deblock_h_chroma = deblock_h_chroma_c;
839 pf->deblock_v_luma_intra = deblock_v_luma_intra_c;
840 pf->deblock_h_luma_intra = deblock_h_luma_intra_c;
841 pf->deblock_v_chroma_intra = deblock_v_chroma_intra_c;
842 pf->deblock_h_chroma_intra = deblock_h_chroma_intra_c;
845 if( cpu&X264_CPU_MMXEXT )
847 pf->deblock_v_chroma = x264_deblock_v_chroma_mmxext;
848 pf->deblock_h_chroma = x264_deblock_h_chroma_mmxext;
849 pf->deblock_v_chroma_intra = x264_deblock_v_chroma_intra_mmxext;
850 pf->deblock_h_chroma_intra = x264_deblock_h_chroma_intra_mmxext;
852 pf->deblock_v_luma = x264_deblock_v_luma_mmxext;
853 pf->deblock_h_luma = x264_deblock_h_luma_mmxext;
854 pf->deblock_v_luma_intra = x264_deblock_v_luma_intra_mmxext;
855 pf->deblock_h_luma_intra = x264_deblock_h_luma_intra_mmxext;
857 if( (cpu&X264_CPU_SSE2) && !(cpu&X264_CPU_STACK_MOD4) )
859 pf->deblock_v_luma = x264_deblock_v_luma_sse2;
860 pf->deblock_h_luma = x264_deblock_h_luma_sse2;
861 pf->deblock_v_luma_intra = x264_deblock_v_luma_intra_sse2;
862 pf->deblock_h_luma_intra = x264_deblock_h_luma_intra_sse2;
868 if( cpu&X264_CPU_ALTIVEC )
870 pf->deblock_v_luma = x264_deblock_v_luma_altivec;
871 pf->deblock_h_luma = x264_deblock_h_luma_altivec;
878 void x264_frame_cond_broadcast( x264_frame_t *frame, int i_lines_completed )
880 x264_pthread_mutex_lock( &frame->mutex );
881 frame->i_lines_completed = i_lines_completed;
882 x264_pthread_cond_broadcast( &frame->cv );
883 x264_pthread_mutex_unlock( &frame->mutex );
886 void x264_frame_cond_wait( x264_frame_t *frame, int i_lines_completed )
888 x264_pthread_mutex_lock( &frame->mutex );
889 while( frame->i_lines_completed < i_lines_completed )
890 x264_pthread_cond_wait( &frame->cv, &frame->mutex );
891 x264_pthread_mutex_unlock( &frame->mutex );
896 void x264_frame_push( x264_frame_t **list, x264_frame_t *frame )
899 while( list[i] ) i++;
903 x264_frame_t *x264_frame_pop( x264_frame_t **list )
908 while( list[i+1] ) i++;
914 void x264_frame_unshift( x264_frame_t **list, x264_frame_t *frame )
917 while( list[i] ) i++;
923 x264_frame_t *x264_frame_shift( x264_frame_t **list )
925 x264_frame_t *frame = list[0];
927 for( i = 0; list[i]; i++ )
933 void x264_frame_push_unused( x264_t *h, x264_frame_t *frame )
935 assert( frame->i_reference_count > 0 );
936 frame->i_reference_count--;
937 if( frame->i_reference_count == 0 )
938 x264_frame_push( h->frames.unused, frame );
939 assert( h->frames.unused[ sizeof(h->frames.unused) / sizeof(*h->frames.unused) - 1 ] == NULL );
942 x264_frame_t *x264_frame_pop_unused( x264_t *h )
945 if( h->frames.unused[0] )
946 frame = x264_frame_pop( h->frames.unused );
948 frame = x264_frame_new( h );
949 assert( frame->i_reference_count == 0 );
950 frame->i_reference_count = 1;
951 frame->b_intra_calculated = 0;
955 void x264_frame_sort( x264_frame_t **list, int b_dts )
960 for( i = 0; list[i+1]; i++ )
962 int dtype = list[i]->i_type - list[i+1]->i_type;
963 int dtime = list[i]->i_frame - list[i+1]->i_frame;
964 int swap = b_dts ? dtype > 0 || ( dtype == 0 && dtime > 0 )
968 XCHG( x264_frame_t*, list[i], list[i+1] );