1 /*****************************************************************************
2 * frame.c: h264 encoder library
3 *****************************************************************************
4 * Copyright (C) 2003-2008 x264 project
6 * Authors: Laurent Aimar <fenrir@via.ecp.fr>
7 * Loren Merritt <lorenm@u.washington.edu>
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
22 *****************************************************************************/
26 #define ALIGN(x,a) (((x)+((a)-1))&~((a)-1))
28 x264_frame_t *x264_frame_new( x264_t *h )
30 x264_frame_t *frame = x264_malloc( sizeof(x264_frame_t) );
33 int i_mb_count = h->mb.i_mb_count;
34 int i_stride, i_width, i_lines;
35 int i_padv = PADV << h->param.b_interlaced;
37 int align = h->param.cpu&X264_CPU_CACHELINE_64 ? 64 : h->param.cpu&X264_CPU_CACHELINE_32 ? 32 : 16;
39 if( !frame ) return NULL;
41 memset( frame, 0, sizeof(x264_frame_t) );
43 /* allocate frame data (+64 for extra data for me) */
44 i_width = ALIGN( h->param.i_width, 16 );
45 i_stride = ALIGN( i_width + 2*PADH, align );
46 i_lines = ALIGN( h->param.i_height, 16<<h->param.b_interlaced );
49 for( i = 0; i < 3; i++ )
51 frame->i_stride[i] = i_stride >> !!i;
52 frame->i_width[i] = i_width >> !!i;
53 frame->i_lines[i] = i_lines >> !!i;
56 luma_plane_size = (frame->i_stride[0] * ( frame->i_lines[0] + 2*i_padv ));
57 for( i = 1; i < 3; i++ )
59 CHECKED_MALLOC( frame->buffer[i], luma_plane_size/4 );
60 frame->plane[i] = frame->buffer[i] + (frame->i_stride[i] * i_padv + PADH)/2;
62 /* all 4 luma planes allocated together, since the cacheline split code
63 * requires them to be in-phase wrt cacheline alignment. */
64 CHECKED_MALLOC( frame->buffer[0], 4*luma_plane_size);
65 for( i = 0; i < 4; i++ )
66 frame->filtered[i] = frame->buffer[0] + i*luma_plane_size + frame->i_stride[0] * i_padv + PADH;
67 frame->plane[0] = frame->filtered[0];
69 if( h->frames.b_have_lowres )
71 frame->i_width_lowres = frame->i_width[0]/2;
72 frame->i_stride_lowres = ALIGN( frame->i_width_lowres + 2*PADH, align );
73 frame->i_lines_lowres = frame->i_lines[0]/2;
75 luma_plane_size = frame->i_stride_lowres * ( frame->i_lines[0]/2 + 2*i_padv );
77 CHECKED_MALLOC( frame->buffer_lowres[0], 4 * luma_plane_size );
78 for( i = 0; i < 4; i++ )
79 frame->lowres[i] = frame->buffer_lowres[0] + (frame->i_stride_lowres * i_padv + PADH) + i * luma_plane_size;
82 if( h->param.analyse.i_me_method >= X264_ME_ESA )
84 CHECKED_MALLOC( frame->buffer[3],
85 2 * frame->i_stride[0] * (frame->i_lines[0] + 2*i_padv) * sizeof(uint16_t) );
86 frame->integral = (uint16_t*)frame->buffer[3] + frame->i_stride[0] * i_padv + PADH;
90 frame->i_type = X264_TYPE_AUTO;
94 frame->i_frame_num = -1;
95 frame->i_lines_completed = -1;
97 CHECKED_MALLOC( frame->mb_type, i_mb_count * sizeof(int8_t));
98 CHECKED_MALLOC( frame->mv[0], 2*16 * i_mb_count * sizeof(int16_t) );
99 CHECKED_MALLOC( frame->ref[0], 4 * i_mb_count * sizeof(int8_t) );
100 CHECKED_MALLOC( frame->i_intra_cost, i_mb_count * sizeof(uint16_t) );
101 if( h->param.i_bframe )
103 CHECKED_MALLOC( frame->mv[1], 2*16 * i_mb_count * sizeof(int16_t) );
104 CHECKED_MALLOC( frame->ref[1], 4 * i_mb_count * sizeof(int8_t) );
109 frame->ref[1] = NULL;
112 CHECKED_MALLOC( frame->i_row_bits, i_lines/16 * sizeof(int) );
113 CHECKED_MALLOC( frame->i_row_qp, i_lines/16 * sizeof(int) );
114 for( i = 0; i < h->param.i_bframe + 2; i++ )
115 for( j = 0; j < h->param.i_bframe + 2; j++ )
116 CHECKED_MALLOC( frame->i_row_satds[i][j], i_lines/16 * sizeof(int) );
118 if( h->param.rc.i_aq_mode )
119 CHECKED_MALLOC( frame->f_qp_offset, h->mb.i_mb_count * sizeof(float) );
121 x264_pthread_mutex_init( &frame->mutex, NULL );
122 x264_pthread_cond_init( &frame->cv, NULL );
127 x264_frame_delete( frame );
131 void x264_frame_delete( x264_frame_t *frame )
134 for( i = 0; i < 4; i++ )
135 x264_free( frame->buffer[i] );
136 for( i = 0; i < 4; i++ )
137 x264_free( frame->buffer_lowres[i] );
138 for( i = 0; i < X264_BFRAME_MAX+2; i++ )
139 for( j = 0; j < X264_BFRAME_MAX+2; j++ )
140 x264_free( frame->i_row_satds[i][j] );
141 x264_free( frame->i_row_bits );
142 x264_free( frame->i_row_qp );
143 x264_free( frame->mb_type );
144 x264_free( frame->mv[0] );
145 x264_free( frame->mv[1] );
146 x264_free( frame->ref[0] );
147 x264_free( frame->ref[1] );
148 x264_pthread_mutex_destroy( &frame->mutex );
149 x264_pthread_cond_destroy( &frame->cv );
153 int x264_frame_copy_picture( x264_t *h, x264_frame_t *dst, x264_picture_t *src )
155 int i_csp = src->img.i_csp & X264_CSP_MASK;
157 if( i_csp != X264_CSP_I420 && i_csp != X264_CSP_YV12 )
159 x264_log( h, X264_LOG_ERROR, "Arg invalid CSP\n" );
163 dst->i_type = src->i_type;
164 dst->i_qpplus1 = src->i_qpplus1;
165 dst->i_pts = src->i_pts;
169 int s = (i_csp == X264_CSP_YV12 && i) ? i^3 : i;
170 uint8_t *plane = src->img.plane[s];
171 int stride = src->img.i_stride[s];
172 int width = h->param.i_width >> !!i;
173 int height = h->param.i_height >> !!i;
174 if( src->img.i_csp & X264_CSP_VFLIP )
176 plane += (height-1)*stride;
179 h->mc.plane_copy( dst->plane[i], dst->i_stride[i], plane, stride, width, height );
186 static void plane_expand_border( uint8_t *pix, int i_stride, int i_width, int i_height, int i_padh, int i_padv, int b_pad_top, int b_pad_bottom )
188 #define PPIXEL(x, y) ( pix + (x) + (y)*i_stride )
190 for( y = 0; y < i_height; y++ )
193 memset( PPIXEL(-i_padh, y), PPIXEL(0, y)[0], i_padh );
195 memset( PPIXEL(i_width, y), PPIXEL(i_width-1, y)[0], i_padh );
199 for( y = 0; y < i_padv; y++ )
200 memcpy( PPIXEL(-i_padh, -y-1), PPIXEL(-i_padh, 0), i_width+2*i_padh );
203 for( y = 0; y < i_padv; y++ )
204 memcpy( PPIXEL(-i_padh, i_height+y), PPIXEL(-i_padh, i_height-1), i_width+2*i_padh );
208 void x264_frame_expand_border( x264_t *h, x264_frame_t *frame, int mb_y, int b_end )
212 if( mb_y & h->sh.b_mbaff )
214 for( i = 0; i < frame->i_plane; i++ )
216 int stride = frame->i_stride[i];
217 int width = 16*h->sps->i_mb_width >> !!i;
218 int height = (b_end ? 16*(h->sps->i_mb_height - mb_y) >> h->sh.b_mbaff : 16) >> !!i;
219 int padh = PADH >> !!i;
220 int padv = PADV >> !!i;
221 // buffer: 2 chroma, 3 luma (rounded to 4) because deblocking goes beyond the top of the mb
222 uint8_t *pix = frame->plane[i] + X264_MAX(0, (16*mb_y-4)*stride >> !!i);
223 if( b_end && !b_start )
224 height += 4 >> (!!i + h->sh.b_mbaff);
227 plane_expand_border( pix, stride*2, width, height, padh, padv, b_start, b_end );
228 plane_expand_border( pix+stride, stride*2, width, height, padh, padv, b_start, b_end );
232 plane_expand_border( pix, stride, width, height, padh, padv, b_start, b_end );
237 void x264_frame_expand_border_filtered( x264_t *h, x264_frame_t *frame, int mb_y, int b_end )
239 /* during filtering, 8 extra pixels were filtered on each edge,
240 * but up to 3 of the horizontal ones may be wrong.
241 we want to expand border from the last filtered pixel */
243 int stride = frame->i_stride[0];
244 int width = 16*h->sps->i_mb_width + 8;
245 int height = b_end ? (16*(h->sps->i_mb_height - mb_y) >> h->sh.b_mbaff) + 16 : 16;
249 for( i = 1; i < 4; i++ )
251 // buffer: 8 luma, to match the hpel filter
252 uint8_t *pix = frame->filtered[i] + (16*mb_y - (8 << h->sh.b_mbaff)) * stride - 4;
255 plane_expand_border( pix, stride*2, width, height, padh, padv, b_start, b_end );
256 plane_expand_border( pix+stride, stride*2, width, height, padh, padv, b_start, b_end );
260 plane_expand_border( pix, stride, width, height, padh, padv, b_start, b_end );
265 void x264_frame_expand_border_lowres( x264_frame_t *frame )
268 for( i = 0; i < 4; i++ )
269 plane_expand_border( frame->lowres[i], frame->i_stride_lowres, frame->i_stride_lowres - 2*PADH, frame->i_lines_lowres, PADH, PADV, 1, 1 );
272 void x264_frame_expand_border_mod16( x264_t *h, x264_frame_t *frame )
275 for( i = 0; i < frame->i_plane; i++ )
277 int i_subsample = i ? 1 : 0;
278 int i_width = h->param.i_width >> i_subsample;
279 int i_height = h->param.i_height >> i_subsample;
280 int i_padx = ( h->sps->i_mb_width * 16 - h->param.i_width ) >> i_subsample;
281 int i_pady = ( h->sps->i_mb_height * 16 - h->param.i_height ) >> i_subsample;
285 for( y = 0; y < i_height; y++ )
286 memset( &frame->plane[i][y*frame->i_stride[i] + i_width],
287 frame->plane[i][y*frame->i_stride[i] + i_width - 1],
292 //FIXME interlace? or just let it pad using the wrong field
293 for( y = i_height; y < i_height + i_pady; y++ )
294 memcpy( &frame->plane[i][y*frame->i_stride[i]],
295 &frame->plane[i][(i_height-1)*frame->i_stride[i]],
302 /* cavlc + 8x8 transform stores nnz per 16 coeffs for the purpose of
303 * entropy coding, but per 64 coeffs for the purpose of deblocking */
304 static void munge_cavlc_nnz_row( x264_t *h, int mb_y, uint8_t (*buf)[16] )
306 uint32_t (*src)[6] = (uint32_t(*)[6])h->mb.non_zero_count + mb_y * h->sps->i_mb_width;
307 int8_t *transform = h->mb.mb_transform_size + mb_y * h->sps->i_mb_width;
309 for( x=0; x<h->sps->i_mb_width; x++ )
311 memcpy( buf+x, src+x, 16 );
314 nnz = src[x][0] | src[x][1];
315 src[x][0] = src[x][1] = ((uint16_t)nnz ? 0x0101 : 0) + (nnz>>16 ? 0x01010000 : 0);
316 nnz = src[x][2] | src[x][3];
317 src[x][2] = src[x][3] = ((uint16_t)nnz ? 0x0101 : 0) + (nnz>>16 ? 0x01010000 : 0);
322 static void restore_cavlc_nnz_row( x264_t *h, int mb_y, uint8_t (*buf)[16] )
324 uint8_t (*dst)[24] = h->mb.non_zero_count + mb_y * h->sps->i_mb_width;
326 for( x=0; x<h->sps->i_mb_width; x++ )
327 memcpy( dst+x, buf+x, 16 );
330 static void munge_cavlc_nnz( x264_t *h, int mb_y, uint8_t (*buf)[16], void (*func)(x264_t*, int, uint8_t (*)[16]) )
332 func( h, mb_y, buf );
334 func( h, mb_y-1, buf + h->sps->i_mb_width );
337 func( h, mb_y+1, buf + h->sps->i_mb_width * 2 );
339 func( h, mb_y-2, buf + h->sps->i_mb_width * 3 );
344 /* Deblocking filter */
345 static const uint8_t i_alpha_table[52+12*2] =
347 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
348 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
349 0, 0, 0, 0, 0, 0, 4, 4, 5, 6,
350 7, 8, 9, 10, 12, 13, 15, 17, 20, 22,
351 25, 28, 32, 36, 40, 45, 50, 56, 63, 71,
352 80, 90,101,113,127,144,162,182,203,226,
354 255,255,255,255,255,255,255,255,255,255,255,255,
356 static const uint8_t i_beta_table[52+12*2] =
358 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
359 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
360 0, 0, 0, 0, 0, 0, 2, 2, 2, 3,
361 3, 3, 3, 4, 4, 4, 6, 6, 7, 7,
362 8, 8, 9, 9, 10, 10, 11, 11, 12, 12,
363 13, 13, 14, 14, 15, 15, 16, 16, 17, 17,
365 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18,
367 static const int8_t i_tc0_table[52+12*2][4] =
369 {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 },
370 {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 },
371 {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 },
372 {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 },
373 {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 1 },
374 {-1, 0, 0, 1 }, {-1, 0, 0, 1 }, {-1, 0, 0, 1 }, {-1, 0, 1, 1 }, {-1, 0, 1, 1 }, {-1, 1, 1, 1 },
375 {-1, 1, 1, 1 }, {-1, 1, 1, 1 }, {-1, 1, 1, 1 }, {-1, 1, 1, 2 }, {-1, 1, 1, 2 }, {-1, 1, 1, 2 },
376 {-1, 1, 1, 2 }, {-1, 1, 2, 3 }, {-1, 1, 2, 3 }, {-1, 2, 2, 3 }, {-1, 2, 2, 4 }, {-1, 2, 3, 4 },
377 {-1, 2, 3, 4 }, {-1, 3, 3, 5 }, {-1, 3, 4, 6 }, {-1, 3, 4, 6 }, {-1, 4, 5, 7 }, {-1, 4, 5, 8 },
378 {-1, 4, 6, 9 }, {-1, 5, 7,10 }, {-1, 6, 8,11 }, {-1, 6, 8,13 }, {-1, 7,10,14 }, {-1, 8,11,16 },
379 {-1, 9,12,18 }, {-1,10,13,20 }, {-1,11,15,23 }, {-1,13,17,25 },
380 {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 },
381 {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 },
383 #define alpha_table(x) i_alpha_table[(x)+12]
384 #define beta_table(x) i_beta_table[(x)+12]
385 #define tc0_table(x) i_tc0_table[(x)+12]
388 static inline void deblock_luma_c( uint8_t *pix, int xstride, int ystride, int alpha, int beta, int8_t *tc0 )
391 for( i = 0; i < 4; i++ )
398 for( d = 0; d < 4; d++ )
400 const int p2 = pix[-3*xstride];
401 const int p1 = pix[-2*xstride];
402 const int p0 = pix[-1*xstride];
403 const int q0 = pix[ 0*xstride];
404 const int q1 = pix[ 1*xstride];
405 const int q2 = pix[ 2*xstride];
407 if( abs( p0 - q0 ) < alpha && abs( p1 - p0 ) < beta && abs( q1 - q0 ) < beta )
411 if( abs( p2 - p0 ) < beta )
413 pix[-2*xstride] = p1 + x264_clip3( (( p2 + ((p0 + q0 + 1) >> 1)) >> 1) - p1, -tc0[i], tc0[i] );
416 if( abs( q2 - q0 ) < beta )
418 pix[ 1*xstride] = q1 + x264_clip3( (( q2 + ((p0 + q0 + 1) >> 1)) >> 1) - q1, -tc0[i], tc0[i] );
422 delta = x264_clip3( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
423 pix[-1*xstride] = x264_clip_uint8( p0 + delta ); /* p0' */
424 pix[ 0*xstride] = x264_clip_uint8( q0 - delta ); /* q0' */
430 static void deblock_v_luma_c( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 )
432 deblock_luma_c( pix, stride, 1, alpha, beta, tc0 );
434 static void deblock_h_luma_c( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 )
436 deblock_luma_c( pix, 1, stride, alpha, beta, tc0 );
439 static inline void deblock_chroma_c( uint8_t *pix, int xstride, int ystride, int alpha, int beta, int8_t *tc0 )
442 for( i = 0; i < 4; i++ )
444 const int tc = tc0[i];
450 for( d = 0; d < 2; d++ )
452 const int p1 = pix[-2*xstride];
453 const int p0 = pix[-1*xstride];
454 const int q0 = pix[ 0*xstride];
455 const int q1 = pix[ 1*xstride];
457 if( abs( p0 - q0 ) < alpha && abs( p1 - p0 ) < beta && abs( q1 - q0 ) < beta )
459 int delta = x264_clip3( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
460 pix[-1*xstride] = x264_clip_uint8( p0 + delta ); /* p0' */
461 pix[ 0*xstride] = x264_clip_uint8( q0 - delta ); /* q0' */
467 static void deblock_v_chroma_c( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 )
469 deblock_chroma_c( pix, stride, 1, alpha, beta, tc0 );
471 static void deblock_h_chroma_c( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 )
473 deblock_chroma_c( pix, 1, stride, alpha, beta, tc0 );
476 static inline void deblock_luma_intra_c( uint8_t *pix, int xstride, int ystride, int alpha, int beta )
479 for( d = 0; d < 16; d++ )
481 const int p2 = pix[-3*xstride];
482 const int p1 = pix[-2*xstride];
483 const int p0 = pix[-1*xstride];
484 const int q0 = pix[ 0*xstride];
485 const int q1 = pix[ 1*xstride];
486 const int q2 = pix[ 2*xstride];
488 if( abs( p0 - q0 ) < alpha && abs( p1 - p0 ) < beta && abs( q1 - q0 ) < beta )
490 if(abs( p0 - q0 ) < ((alpha >> 2) + 2) )
492 if( abs( p2 - p0 ) < beta ) /* p0', p1', p2' */
494 const int p3 = pix[-4*xstride];
495 pix[-1*xstride] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
496 pix[-2*xstride] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
497 pix[-3*xstride] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
500 pix[-1*xstride] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
501 if( abs( q2 - q0 ) < beta ) /* q0', q1', q2' */
503 const int q3 = pix[3*xstride];
504 pix[0*xstride] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
505 pix[1*xstride] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
506 pix[2*xstride] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
509 pix[0*xstride] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
513 pix[-1*xstride] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
514 pix[ 0*xstride] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
520 static void deblock_v_luma_intra_c( uint8_t *pix, int stride, int alpha, int beta )
522 deblock_luma_intra_c( pix, stride, 1, alpha, beta );
524 static void deblock_h_luma_intra_c( uint8_t *pix, int stride, int alpha, int beta )
526 deblock_luma_intra_c( pix, 1, stride, alpha, beta );
529 static inline void deblock_chroma_intra_c( uint8_t *pix, int xstride, int ystride, int alpha, int beta )
532 for( d = 0; d < 8; d++ )
534 const int p1 = pix[-2*xstride];
535 const int p0 = pix[-1*xstride];
536 const int q0 = pix[ 0*xstride];
537 const int q1 = pix[ 1*xstride];
539 if( abs( p0 - q0 ) < alpha && abs( p1 - p0 ) < beta && abs( q1 - q0 ) < beta )
541 pix[-1*xstride] = (2*p1 + p0 + q1 + 2) >> 2; /* p0' */
542 pix[ 0*xstride] = (2*q1 + q0 + p1 + 2) >> 2; /* q0' */
547 static void deblock_v_chroma_intra_c( uint8_t *pix, int stride, int alpha, int beta )
549 deblock_chroma_intra_c( pix, stride, 1, alpha, beta );
551 static void deblock_h_chroma_intra_c( uint8_t *pix, int stride, int alpha, int beta )
553 deblock_chroma_intra_c( pix, 1, stride, alpha, beta );
556 static inline void deblock_edge( x264_t *h, uint8_t *pix1, uint8_t *pix2, int i_stride, uint8_t bS[4], int i_qp, int b_chroma, x264_deblock_inter_t pf_inter )
558 const int index_a = i_qp + h->sh.i_alpha_c0_offset;
559 const int alpha = alpha_table(index_a);
560 const int beta = beta_table(i_qp + h->sh.i_beta_offset);
563 if( !alpha || !beta )
566 tc[0] = tc0_table(index_a)[bS[0]] + b_chroma;
567 tc[1] = tc0_table(index_a)[bS[1]] + b_chroma;
568 tc[2] = tc0_table(index_a)[bS[2]] + b_chroma;
569 tc[3] = tc0_table(index_a)[bS[3]] + b_chroma;
571 pf_inter( pix1, i_stride, alpha, beta, tc );
573 pf_inter( pix2, i_stride, alpha, beta, tc );
576 static inline void deblock_edge_intra( x264_t *h, uint8_t *pix1, uint8_t *pix2, int i_stride, uint8_t bS[4], int i_qp, int b_chroma, x264_deblock_intra_t pf_intra )
578 const int alpha = alpha_table(i_qp + h->sh.i_alpha_c0_offset);
579 const int beta = beta_table(i_qp + h->sh.i_beta_offset);
581 if( !alpha || !beta )
584 pf_intra( pix1, i_stride, alpha, beta );
586 pf_intra( pix2, i_stride, alpha, beta );
589 void x264_frame_deblock_row( x264_t *h, int mb_y )
591 const int s8x8 = 2 * h->mb.i_mb_stride;
592 const int s4x4 = 4 * h->mb.i_mb_stride;
593 const int b_interlaced = h->sh.b_mbaff;
594 const int mvy_limit = 4 >> b_interlaced;
595 const int qp_thresh = 15 - X264_MIN(h->sh.i_alpha_c0_offset, h->sh.i_beta_offset) - X264_MAX(0, h->param.analyse.i_chroma_qp_offset);
597 int stridey = h->fdec->i_stride[0];
598 int stride2y = stridey << b_interlaced;
599 int strideuv = h->fdec->i_stride[1];
600 int stride2uv = strideuv << b_interlaced;
602 if( !h->pps->b_cabac && h->pps->b_transform_8x8_mode )
603 munge_cavlc_nnz( h, mb_y, h->mb.nnz_backup, munge_cavlc_nnz_row );
605 for( mb_x = 0; mb_x < h->sps->i_mb_width; mb_x += (~b_interlaced | mb_y)&1, mb_y ^= b_interlaced )
607 const int mb_xy = mb_y * h->mb.i_mb_stride + mb_x;
608 const int mb_8x8 = 2 * s8x8 * mb_y + 2 * mb_x;
609 const int mb_4x4 = 4 * s4x4 * mb_y + 4 * mb_x;
610 const int b_8x8_transform = h->mb.mb_transform_size[mb_xy];
611 const int i_qp = h->mb.qp[mb_xy];
612 int i_edge_end = (h->mb.type[mb_xy] == P_SKIP) ? 1 : 4;
613 uint8_t *pixy = h->fdec->plane[0] + 16*mb_y*stridey + 16*mb_x;
614 uint8_t *pixu = h->fdec->plane[1] + 8*mb_y*strideuv + 8*mb_x;
615 uint8_t *pixv = h->fdec->plane[2] + 8*mb_y*strideuv + 8*mb_x;
616 if( b_interlaced && (mb_y&1) )
623 x264_prefetch_fenc( h, h->fdec, mb_x, mb_y );
625 if( i_qp <= qp_thresh )
628 #define FILTER_DIR(intra, i_dir)\
631 i_qpn= h->mb.qp[mbn_xy];\
635 deblock_edge##intra( h, pixy + 4*i_edge, NULL,\
636 stride2y, bS, (i_qp+i_qpn+1) >> 1, 0,\
637 h->loopf.deblock_h_luma##intra );\
641 int i_qpc = (h->chroma_qp_table[i_qp] + h->chroma_qp_table[i_qpn] + 1) >> 1;\
642 deblock_edge##intra( h, pixu + 2*i_edge, pixv + 2*i_edge,\
643 stride2uv, bS, i_qpc, 1,\
644 h->loopf.deblock_h_chroma##intra );\
649 /* horizontal edge */\
650 deblock_edge##intra( h, pixy + 4*i_edge*stride2y, NULL,\
651 stride2y, bS, (i_qp+i_qpn+1) >> 1, 0,\
652 h->loopf.deblock_v_luma##intra );\
656 int i_qpc = (h->chroma_qp_table[i_qp] + h->chroma_qp_table[i_qpn] + 1) >> 1;\
657 deblock_edge##intra( h, pixu + 2*i_edge*stride2uv, pixv + 2*i_edge*stride2uv,\
658 stride2uv, bS, i_qpc, 1,\
659 h->loopf.deblock_v_chroma##intra );\
664 #define DEBLOCK_STRENGTH(i_dir)\
666 /* *** Get bS for each 4px for the current edge *** */\
667 if( IS_INTRA( h->mb.type[mb_xy] ) || IS_INTRA( h->mb.type[mbn_xy]) )\
668 *(uint32_t*)bS = 0x03030303;\
671 *(uint32_t*)bS = 0x00000000;\
672 for( i = 0; i < 4; i++ )\
674 int x = i_dir == 0 ? i_edge : i;\
675 int y = i_dir == 0 ? i : i_edge;\
676 int xn = i_dir == 0 ? (x - 1)&0x03 : x;\
677 int yn = i_dir == 0 ? y : (y - 1)&0x03;\
678 if( h->mb.non_zero_count[mb_xy][x+y*4] != 0 ||\
679 h->mb.non_zero_count[mbn_xy][xn+yn*4] != 0 )\
683 /* FIXME: A given frame may occupy more than one position in\
684 * the reference list. So we should compare the frame numbers,\
685 * not the indices in the ref list.\
686 * No harm yet, as we don't generate that case.*/\
687 int i8p= mb_8x8+(x>>1)+(y>>1)*s8x8;\
688 int i8q= mbn_8x8+(xn>>1)+(yn>>1)*s8x8;\
689 int i4p= mb_4x4+x+y*s4x4;\
690 int i4q= mbn_4x4+xn+yn*s4x4;\
691 for( l = 0; l < 1 + (h->sh.i_type == SLICE_TYPE_B); l++ )\
692 if( h->mb.ref[l][i8p] != h->mb.ref[l][i8q] ||\
693 abs( h->mb.mv[l][i4p][0] - h->mb.mv[l][i4q][0] ) >= 4 ||\
694 abs( h->mb.mv[l][i4p][1] - h->mb.mv[l][i4q][1] ) >= mvy_limit )\
704 /* i_dir == 0 -> vertical edge
705 * i_dir == 1 -> horizontal edge */
706 #define DEBLOCK_DIR(i_dir)\
708 int i_edge = (i_dir ? (mb_y <= b_interlaced) : (mb_x == 0));\
709 int i_qpn, i, l, mbn_xy, mbn_8x8, mbn_4x4;\
710 DECLARE_ALIGNED_4( uint8_t bS[4] ); /* filtering strength */\
712 i_edge+= b_8x8_transform;\
715 mbn_xy = i_dir == 0 ? mb_xy - 1 : mb_xy - h->mb.i_mb_stride;\
716 mbn_8x8 = i_dir == 0 ? mb_8x8 - 2 : mb_8x8 - 2 * s8x8;\
717 mbn_4x4 = i_dir == 0 ? mb_4x4 - 4 : mb_4x4 - 4 * s4x4;\
718 if( b_interlaced && i_dir == 1 )\
720 mbn_xy -= h->mb.i_mb_stride;\
721 mbn_8x8 -= 2 * s8x8;\
722 mbn_4x4 -= 4 * s4x4;\
724 else if( IS_INTRA( h->mb.type[mb_xy] ) || IS_INTRA( h->mb.type[mbn_xy]) )\
726 FILTER_DIR( _intra, i_dir );\
729 DEBLOCK_STRENGTH(i_dir);\
730 if( *(uint32_t*)bS )\
731 FILTER_DIR( , i_dir);\
733 i_edge += b_8x8_transform+1;\
738 for( ; i_edge < i_edge_end; i_edge+=b_8x8_transform+1 )\
740 DEBLOCK_STRENGTH(i_dir);\
741 if( *(uint32_t*)bS )\
742 FILTER_DIR( , i_dir);\
750 if( !h->pps->b_cabac && h->pps->b_transform_8x8_mode )
751 munge_cavlc_nnz( h, mb_y, h->mb.nnz_backup, restore_cavlc_nnz_row );
754 void x264_frame_deblock( x264_t *h )
757 for( mb_y = 0; mb_y < h->sps->i_mb_height; mb_y += 1 + h->sh.b_mbaff )
758 x264_frame_deblock_row( h, mb_y );
762 void x264_deblock_v_chroma_mmxext( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 );
763 void x264_deblock_h_chroma_mmxext( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 );
764 void x264_deblock_v_chroma_intra_mmxext( uint8_t *pix, int stride, int alpha, int beta );
765 void x264_deblock_h_chroma_intra_mmxext( uint8_t *pix, int stride, int alpha, int beta );
767 void x264_deblock_v_luma_sse2( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 );
768 void x264_deblock_h_luma_sse2( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 );
769 void x264_deblock_v_luma_intra_sse2( uint8_t *pix, int stride, int alpha, int beta );
770 void x264_deblock_h_luma_intra_sse2( uint8_t *pix, int stride, int alpha, int beta );
772 void x264_deblock_h_luma_mmxext( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 );
773 void x264_deblock_v8_luma_mmxext( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 );
774 void x264_deblock_h_luma_intra_mmxext( uint8_t *pix, int stride, int alpha, int beta );
775 void x264_deblock_v8_luma_intra_mmxext( uint8_t *pix, int stride, int alpha, int beta );
777 static void x264_deblock_v_luma_mmxext( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 )
779 x264_deblock_v8_luma_mmxext( pix, stride, alpha, beta, tc0 );
780 x264_deblock_v8_luma_mmxext( pix+8, stride, alpha, beta, tc0+2 );
782 static void x264_deblock_v_luma_intra_mmxext( uint8_t *pix, int stride, int alpha, int beta )
784 x264_deblock_v8_luma_intra_mmxext( pix, stride, alpha, beta );
785 x264_deblock_v8_luma_intra_mmxext( pix+8, stride, alpha, beta );
791 void x264_deblock_v_luma_altivec( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 );
792 void x264_deblock_h_luma_altivec( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 );
795 void x264_deblock_init( int cpu, x264_deblock_function_t *pf )
797 pf->deblock_v_luma = deblock_v_luma_c;
798 pf->deblock_h_luma = deblock_h_luma_c;
799 pf->deblock_v_chroma = deblock_v_chroma_c;
800 pf->deblock_h_chroma = deblock_h_chroma_c;
801 pf->deblock_v_luma_intra = deblock_v_luma_intra_c;
802 pf->deblock_h_luma_intra = deblock_h_luma_intra_c;
803 pf->deblock_v_chroma_intra = deblock_v_chroma_intra_c;
804 pf->deblock_h_chroma_intra = deblock_h_chroma_intra_c;
807 if( cpu&X264_CPU_MMXEXT )
809 pf->deblock_v_chroma = x264_deblock_v_chroma_mmxext;
810 pf->deblock_h_chroma = x264_deblock_h_chroma_mmxext;
811 pf->deblock_v_chroma_intra = x264_deblock_v_chroma_intra_mmxext;
812 pf->deblock_h_chroma_intra = x264_deblock_h_chroma_intra_mmxext;
814 pf->deblock_v_luma = x264_deblock_v_luma_mmxext;
815 pf->deblock_h_luma = x264_deblock_h_luma_mmxext;
816 pf->deblock_v_luma_intra = x264_deblock_v_luma_intra_mmxext;
817 pf->deblock_h_luma_intra = x264_deblock_h_luma_intra_mmxext;
819 if( (cpu&X264_CPU_SSE2) && !(cpu&X264_CPU_STACK_MOD4) )
821 pf->deblock_v_luma = x264_deblock_v_luma_sse2;
822 pf->deblock_h_luma = x264_deblock_h_luma_sse2;
823 pf->deblock_v_luma_intra = x264_deblock_v_luma_intra_sse2;
824 pf->deblock_h_luma_intra = x264_deblock_h_luma_intra_sse2;
830 if( cpu&X264_CPU_ALTIVEC )
832 pf->deblock_v_luma = x264_deblock_v_luma_altivec;
833 pf->deblock_h_luma = x264_deblock_h_luma_altivec;
840 void x264_frame_cond_broadcast( x264_frame_t *frame, int i_lines_completed )
842 x264_pthread_mutex_lock( &frame->mutex );
843 frame->i_lines_completed = i_lines_completed;
844 x264_pthread_cond_broadcast( &frame->cv );
845 x264_pthread_mutex_unlock( &frame->mutex );
848 void x264_frame_cond_wait( x264_frame_t *frame, int i_lines_completed )
850 x264_pthread_mutex_lock( &frame->mutex );
851 while( frame->i_lines_completed < i_lines_completed )
852 x264_pthread_cond_wait( &frame->cv, &frame->mutex );
853 x264_pthread_mutex_unlock( &frame->mutex );
858 void x264_frame_push( x264_frame_t **list, x264_frame_t *frame )
861 while( list[i] ) i++;
865 x264_frame_t *x264_frame_pop( x264_frame_t **list )
870 while( list[i+1] ) i++;
876 void x264_frame_unshift( x264_frame_t **list, x264_frame_t *frame )
879 while( list[i] ) i++;
885 x264_frame_t *x264_frame_shift( x264_frame_t **list )
887 x264_frame_t *frame = list[0];
889 for( i = 0; list[i]; i++ )
895 void x264_frame_push_unused( x264_t *h, x264_frame_t *frame )
897 assert( frame->i_reference_count > 0 );
898 frame->i_reference_count--;
899 if( frame->i_reference_count == 0 )
900 x264_frame_push( h->frames.unused, frame );
901 assert( h->frames.unused[ sizeof(h->frames.unused) / sizeof(*h->frames.unused) - 1 ] == NULL );
904 x264_frame_t *x264_frame_pop_unused( x264_t *h )
907 if( h->frames.unused[0] )
908 frame = x264_frame_pop( h->frames.unused );
910 frame = x264_frame_new( h );
911 assert( frame->i_reference_count == 0 );
912 frame->i_reference_count = 1;
913 frame->b_intra_calculated = 0;
917 void x264_frame_sort( x264_frame_t **list, int b_dts )
922 for( i = 0; list[i+1]; i++ )
924 int dtype = list[i]->i_type - list[i+1]->i_type;
925 int dtime = list[i]->i_frame - list[i+1]->i_frame;
926 int swap = b_dts ? dtype > 0 || ( dtype == 0 && dtime > 0 )
930 XCHG( x264_frame_t*, list[i], list[i+1] );