1 /*****************************************************************************
2 * frame.c: h264 encoder library
3 *****************************************************************************
4 * Copyright (C) 2003-2008 x264 project
6 * Authors: Laurent Aimar <fenrir@via.ecp.fr>
7 * Loren Merritt <lorenm@u.washington.edu>
8 * Fiona Glaser <fiona@x264.com>
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
23 *****************************************************************************/
27 #define ALIGN(x,a) (((x)+((a)-1))&~((a)-1))
29 x264_frame_t *x264_frame_new( x264_t *h )
31 x264_frame_t *frame = x264_malloc( sizeof(x264_frame_t) );
34 int i_mb_count = h->mb.i_mb_count;
35 int i_stride, i_width, i_lines;
36 int i_padv = PADV << h->param.b_interlaced;
38 int chroma_plane_size;
39 int align = h->param.cpu&X264_CPU_CACHELINE_64 ? 64 : h->param.cpu&X264_CPU_CACHELINE_32 ? 32 : 16;
41 if( !frame ) return NULL;
43 memset( frame, 0, sizeof(x264_frame_t) );
45 /* allocate frame data (+64 for extra data for me) */
46 i_width = ALIGN( h->param.i_width, 16 );
47 i_stride = ALIGN( i_width + 2*PADH, align );
48 i_lines = ALIGN( h->param.i_height, 16<<h->param.b_interlaced );
51 for( i = 0; i < 3; i++ )
53 frame->i_stride[i] = ALIGN( i_stride >> !!i, align );
54 frame->i_width[i] = i_width >> !!i;
55 frame->i_lines[i] = i_lines >> !!i;
58 luma_plane_size = (frame->i_stride[0] * ( frame->i_lines[0] + 2*i_padv ));
59 chroma_plane_size = (frame->i_stride[1] * ( frame->i_lines[1] + 2*i_padv ));
60 for( i = 1; i < 3; i++ )
62 CHECKED_MALLOC( frame->buffer[i], chroma_plane_size );
63 frame->plane[i] = frame->buffer[i] + (frame->i_stride[i] * i_padv + PADH)/2;
65 /* all 4 luma planes allocated together, since the cacheline split code
66 * requires them to be in-phase wrt cacheline alignment. */
67 if( h->param.analyse.i_subpel_refine )
69 CHECKED_MALLOC( frame->buffer[0], 4*luma_plane_size);
70 for( i = 0; i < 4; i++ )
71 frame->filtered[i] = frame->buffer[0] + i*luma_plane_size + frame->i_stride[0] * i_padv + PADH;
72 frame->plane[0] = frame->filtered[0];
76 CHECKED_MALLOC( frame->buffer[0], luma_plane_size);
77 frame->plane[0] = frame->buffer[0] + frame->i_stride[0] * i_padv + PADH;
80 if( h->frames.b_have_lowres )
82 frame->i_width_lowres = frame->i_width[0]/2;
83 frame->i_stride_lowres = ALIGN( frame->i_width_lowres + 2*PADH, align );
84 frame->i_lines_lowres = frame->i_lines[0]/2;
86 luma_plane_size = frame->i_stride_lowres * ( frame->i_lines[0]/2 + 2*i_padv );
88 CHECKED_MALLOC( frame->buffer_lowres[0], 4 * luma_plane_size );
89 for( i = 0; i < 4; i++ )
90 frame->lowres[i] = frame->buffer_lowres[0] + (frame->i_stride_lowres * i_padv + PADH) + i * luma_plane_size;
92 for( j = 0; j <= !!h->param.i_bframe; j++ )
93 for( i = 0; i <= h->param.i_bframe; i++ )
95 CHECKED_MALLOC( frame->lowres_mvs[j][i], 2*h->mb.i_mb_count*sizeof(int16_t) );
96 memset( frame->lowres_mvs[j][i], 0, 2*h->mb.i_mb_count*sizeof(int16_t) );
97 CHECKED_MALLOC( frame->lowres_mv_costs[j][i], h->mb.i_mb_count*sizeof(int) );
101 if( h->param.analyse.i_me_method >= X264_ME_ESA )
103 CHECKED_MALLOC( frame->buffer[3],
104 frame->i_stride[0] * (frame->i_lines[0] + 2*i_padv) * sizeof(uint16_t) << h->frames.b_have_sub8x8_esa );
105 frame->integral = (uint16_t*)frame->buffer[3] + frame->i_stride[0] * i_padv + PADH;
109 frame->i_type = X264_TYPE_AUTO;
110 frame->i_qpplus1 = 0;
113 frame->i_frame_num = -1;
114 frame->i_lines_completed = -1;
116 CHECKED_MALLOC( frame->mb_type, i_mb_count * sizeof(int8_t));
117 CHECKED_MALLOC( frame->mv[0], 2*16 * i_mb_count * sizeof(int16_t) );
118 CHECKED_MALLOC( frame->ref[0], 4 * i_mb_count * sizeof(int8_t) );
119 CHECKED_MALLOC( frame->i_intra_cost, i_mb_count * sizeof(uint16_t) );
120 if( h->param.i_bframe )
122 CHECKED_MALLOC( frame->mv[1], 2*16 * i_mb_count * sizeof(int16_t) );
123 CHECKED_MALLOC( frame->ref[1], 4 * i_mb_count * sizeof(int8_t) );
128 frame->ref[1] = NULL;
131 CHECKED_MALLOC( frame->i_row_bits, i_lines/16 * sizeof(int) );
132 CHECKED_MALLOC( frame->i_row_qp, i_lines/16 * sizeof(int) );
133 for( i = 0; i < h->param.i_bframe + 2; i++ )
134 for( j = 0; j < h->param.i_bframe + 2; j++ )
135 CHECKED_MALLOC( frame->i_row_satds[i][j], i_lines/16 * sizeof(int) );
137 if( h->param.rc.i_aq_mode )
139 CHECKED_MALLOC( frame->f_qp_offset, h->mb.i_mb_count * sizeof(float) );
140 if( h->frames.b_have_lowres )
141 CHECKED_MALLOC( frame->i_inv_qscale_factor, h->mb.i_mb_count * sizeof(uint16_t) );
144 x264_pthread_mutex_init( &frame->mutex, NULL );
145 x264_pthread_cond_init( &frame->cv, NULL );
150 x264_frame_delete( frame );
154 void x264_frame_delete( x264_frame_t *frame )
157 for( i = 0; i < 4; i++ )
158 x264_free( frame->buffer[i] );
159 for( i = 0; i < 4; i++ )
160 x264_free( frame->buffer_lowres[i] );
161 for( i = 0; i < X264_BFRAME_MAX+2; i++ )
162 for( j = 0; j < X264_BFRAME_MAX+2; j++ )
163 x264_free( frame->i_row_satds[i][j] );
164 for( j = 0; j < 2; j++ )
165 for( i = 0; i <= X264_BFRAME_MAX; i++ )
167 x264_free( frame->lowres_mvs[j][i] );
168 x264_free( frame->lowres_mv_costs[j][i] );
170 x264_free( frame->f_qp_offset );
171 x264_free( frame->i_inv_qscale_factor );
172 x264_free( frame->i_intra_cost );
173 x264_free( frame->i_row_bits );
174 x264_free( frame->i_row_qp );
175 x264_free( frame->mb_type );
176 x264_free( frame->mv[0] );
177 x264_free( frame->mv[1] );
178 x264_free( frame->ref[0] );
179 x264_free( frame->ref[1] );
180 x264_pthread_mutex_destroy( &frame->mutex );
181 x264_pthread_cond_destroy( &frame->cv );
185 int x264_frame_copy_picture( x264_t *h, x264_frame_t *dst, x264_picture_t *src )
187 int i_csp = src->img.i_csp & X264_CSP_MASK;
189 if( i_csp != X264_CSP_I420 && i_csp != X264_CSP_YV12 )
191 x264_log( h, X264_LOG_ERROR, "Arg invalid CSP\n" );
195 dst->i_type = src->i_type;
196 dst->i_qpplus1 = src->i_qpplus1;
197 dst->i_pts = src->i_pts;
201 int s = (i_csp == X264_CSP_YV12 && i) ? i^3 : i;
202 uint8_t *plane = src->img.plane[s];
203 int stride = src->img.i_stride[s];
204 int width = h->param.i_width >> !!i;
205 int height = h->param.i_height >> !!i;
206 if( src->img.i_csp & X264_CSP_VFLIP )
208 plane += (height-1)*stride;
211 h->mc.plane_copy( dst->plane[i], dst->i_stride[i], plane, stride, width, height );
218 static void plane_expand_border( uint8_t *pix, int i_stride, int i_width, int i_height, int i_padh, int i_padv, int b_pad_top, int b_pad_bottom )
220 #define PPIXEL(x, y) ( pix + (x) + (y)*i_stride )
222 for( y = 0; y < i_height; y++ )
225 memset( PPIXEL(-i_padh, y), PPIXEL(0, y)[0], i_padh );
227 memset( PPIXEL(i_width, y), PPIXEL(i_width-1, y)[0], i_padh );
231 for( y = 0; y < i_padv; y++ )
232 memcpy( PPIXEL(-i_padh, -y-1), PPIXEL(-i_padh, 0), i_width+2*i_padh );
235 for( y = 0; y < i_padv; y++ )
236 memcpy( PPIXEL(-i_padh, i_height+y), PPIXEL(-i_padh, i_height-1), i_width+2*i_padh );
240 void x264_frame_expand_border( x264_t *h, x264_frame_t *frame, int mb_y, int b_end )
244 if( mb_y & h->sh.b_mbaff )
246 for( i = 0; i < frame->i_plane; i++ )
248 int stride = frame->i_stride[i];
249 int width = 16*h->sps->i_mb_width >> !!i;
250 int height = (b_end ? 16*(h->sps->i_mb_height - mb_y) >> h->sh.b_mbaff : 16) >> !!i;
251 int padh = PADH >> !!i;
252 int padv = PADV >> !!i;
253 // buffer: 2 chroma, 3 luma (rounded to 4) because deblocking goes beyond the top of the mb
254 uint8_t *pix = frame->plane[i] + X264_MAX(0, (16*mb_y-4)*stride >> !!i);
255 if( b_end && !b_start )
256 height += 4 >> (!!i + h->sh.b_mbaff);
259 plane_expand_border( pix, stride*2, width, height, padh, padv, b_start, b_end );
260 plane_expand_border( pix+stride, stride*2, width, height, padh, padv, b_start, b_end );
264 plane_expand_border( pix, stride, width, height, padh, padv, b_start, b_end );
269 void x264_frame_expand_border_filtered( x264_t *h, x264_frame_t *frame, int mb_y, int b_end )
271 /* during filtering, 8 extra pixels were filtered on each edge,
272 * but up to 3 of the horizontal ones may be wrong.
273 we want to expand border from the last filtered pixel */
275 int stride = frame->i_stride[0];
276 int width = 16*h->sps->i_mb_width + 8;
277 int height = b_end ? (16*(h->sps->i_mb_height - mb_y) >> h->sh.b_mbaff) + 16 : 16;
281 for( i = 1; i < 4; i++ )
283 // buffer: 8 luma, to match the hpel filter
284 uint8_t *pix = frame->filtered[i] + (16*mb_y - (8 << h->sh.b_mbaff)) * stride - 4;
287 plane_expand_border( pix, stride*2, width, height, padh, padv, b_start, b_end );
288 plane_expand_border( pix+stride, stride*2, width, height, padh, padv, b_start, b_end );
292 plane_expand_border( pix, stride, width, height, padh, padv, b_start, b_end );
297 void x264_frame_expand_border_lowres( x264_frame_t *frame )
300 for( i = 0; i < 4; i++ )
301 plane_expand_border( frame->lowres[i], frame->i_stride_lowres, frame->i_stride_lowres - 2*PADH, frame->i_lines_lowres, PADH, PADV, 1, 1 );
304 void x264_frame_expand_border_mod16( x264_t *h, x264_frame_t *frame )
307 for( i = 0; i < frame->i_plane; i++ )
309 int i_subsample = i ? 1 : 0;
310 int i_width = h->param.i_width >> i_subsample;
311 int i_height = h->param.i_height >> i_subsample;
312 int i_padx = ( h->sps->i_mb_width * 16 - h->param.i_width ) >> i_subsample;
313 int i_pady = ( h->sps->i_mb_height * 16 - h->param.i_height ) >> i_subsample;
317 for( y = 0; y < i_height; y++ )
318 memset( &frame->plane[i][y*frame->i_stride[i] + i_width],
319 frame->plane[i][y*frame->i_stride[i] + i_width - 1],
324 //FIXME interlace? or just let it pad using the wrong field
325 for( y = i_height; y < i_height + i_pady; y++ )
326 memcpy( &frame->plane[i][y*frame->i_stride[i]],
327 &frame->plane[i][(i_height-1)*frame->i_stride[i]],
334 /* cavlc + 8x8 transform stores nnz per 16 coeffs for the purpose of
335 * entropy coding, but per 64 coeffs for the purpose of deblocking */
336 static void munge_cavlc_nnz_row( x264_t *h, int mb_y, uint8_t (*buf)[16] )
338 uint32_t (*src)[6] = (uint32_t(*)[6])h->mb.non_zero_count + mb_y * h->sps->i_mb_width;
339 int8_t *transform = h->mb.mb_transform_size + mb_y * h->sps->i_mb_width;
341 for( x=0; x<h->sps->i_mb_width; x++ )
343 memcpy( buf+x, src+x, 16 );
346 nnz = src[x][0] | src[x][1];
347 src[x][0] = src[x][1] = ((uint16_t)nnz ? 0x0101 : 0) + (nnz>>16 ? 0x01010000 : 0);
348 nnz = src[x][2] | src[x][3];
349 src[x][2] = src[x][3] = ((uint16_t)nnz ? 0x0101 : 0) + (nnz>>16 ? 0x01010000 : 0);
354 static void restore_cavlc_nnz_row( x264_t *h, int mb_y, uint8_t (*buf)[16] )
356 uint8_t (*dst)[24] = h->mb.non_zero_count + mb_y * h->sps->i_mb_width;
358 for( x=0; x<h->sps->i_mb_width; x++ )
359 memcpy( dst+x, buf+x, 16 );
362 static void munge_cavlc_nnz( x264_t *h, int mb_y, uint8_t (*buf)[16], void (*func)(x264_t*, int, uint8_t (*)[16]) )
364 func( h, mb_y, buf );
366 func( h, mb_y-1, buf + h->sps->i_mb_width );
369 func( h, mb_y+1, buf + h->sps->i_mb_width * 2 );
371 func( h, mb_y-2, buf + h->sps->i_mb_width * 3 );
376 /* Deblocking filter */
377 static const uint8_t i_alpha_table[52+12*2] =
379 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
380 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
381 0, 0, 0, 0, 0, 0, 4, 4, 5, 6,
382 7, 8, 9, 10, 12, 13, 15, 17, 20, 22,
383 25, 28, 32, 36, 40, 45, 50, 56, 63, 71,
384 80, 90,101,113,127,144,162,182,203,226,
386 255,255,255,255,255,255,255,255,255,255,255,255,
388 static const uint8_t i_beta_table[52+12*2] =
390 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
391 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
392 0, 0, 0, 0, 0, 0, 2, 2, 2, 3,
393 3, 3, 3, 4, 4, 4, 6, 6, 7, 7,
394 8, 8, 9, 9, 10, 10, 11, 11, 12, 12,
395 13, 13, 14, 14, 15, 15, 16, 16, 17, 17,
397 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18,
399 static const int8_t i_tc0_table[52+12*2][4] =
401 {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 },
402 {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 },
403 {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 },
404 {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 },
405 {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 1 },
406 {-1, 0, 0, 1 }, {-1, 0, 0, 1 }, {-1, 0, 0, 1 }, {-1, 0, 1, 1 }, {-1, 0, 1, 1 }, {-1, 1, 1, 1 },
407 {-1, 1, 1, 1 }, {-1, 1, 1, 1 }, {-1, 1, 1, 1 }, {-1, 1, 1, 2 }, {-1, 1, 1, 2 }, {-1, 1, 1, 2 },
408 {-1, 1, 1, 2 }, {-1, 1, 2, 3 }, {-1, 1, 2, 3 }, {-1, 2, 2, 3 }, {-1, 2, 2, 4 }, {-1, 2, 3, 4 },
409 {-1, 2, 3, 4 }, {-1, 3, 3, 5 }, {-1, 3, 4, 6 }, {-1, 3, 4, 6 }, {-1, 4, 5, 7 }, {-1, 4, 5, 8 },
410 {-1, 4, 6, 9 }, {-1, 5, 7,10 }, {-1, 6, 8,11 }, {-1, 6, 8,13 }, {-1, 7,10,14 }, {-1, 8,11,16 },
411 {-1, 9,12,18 }, {-1,10,13,20 }, {-1,11,15,23 }, {-1,13,17,25 },
412 {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 },
413 {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 },
415 #define alpha_table(x) i_alpha_table[(x)+12]
416 #define beta_table(x) i_beta_table[(x)+12]
417 #define tc0_table(x) i_tc0_table[(x)+12]
420 static inline void deblock_luma_c( uint8_t *pix, int xstride, int ystride, int alpha, int beta, int8_t *tc0 )
423 for( i = 0; i < 4; i++ )
430 for( d = 0; d < 4; d++ )
432 const int p2 = pix[-3*xstride];
433 const int p1 = pix[-2*xstride];
434 const int p0 = pix[-1*xstride];
435 const int q0 = pix[ 0*xstride];
436 const int q1 = pix[ 1*xstride];
437 const int q2 = pix[ 2*xstride];
439 if( abs( p0 - q0 ) < alpha && abs( p1 - p0 ) < beta && abs( q1 - q0 ) < beta )
443 if( abs( p2 - p0 ) < beta )
445 pix[-2*xstride] = p1 + x264_clip3( (( p2 + ((p0 + q0 + 1) >> 1)) >> 1) - p1, -tc0[i], tc0[i] );
448 if( abs( q2 - q0 ) < beta )
450 pix[ 1*xstride] = q1 + x264_clip3( (( q2 + ((p0 + q0 + 1) >> 1)) >> 1) - q1, -tc0[i], tc0[i] );
454 delta = x264_clip3( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
455 pix[-1*xstride] = x264_clip_uint8( p0 + delta ); /* p0' */
456 pix[ 0*xstride] = x264_clip_uint8( q0 - delta ); /* q0' */
462 static void deblock_v_luma_c( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 )
464 deblock_luma_c( pix, stride, 1, alpha, beta, tc0 );
466 static void deblock_h_luma_c( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 )
468 deblock_luma_c( pix, 1, stride, alpha, beta, tc0 );
471 static inline void deblock_chroma_c( uint8_t *pix, int xstride, int ystride, int alpha, int beta, int8_t *tc0 )
474 for( i = 0; i < 4; i++ )
476 const int tc = tc0[i];
482 for( d = 0; d < 2; d++ )
484 const int p1 = pix[-2*xstride];
485 const int p0 = pix[-1*xstride];
486 const int q0 = pix[ 0*xstride];
487 const int q1 = pix[ 1*xstride];
489 if( abs( p0 - q0 ) < alpha && abs( p1 - p0 ) < beta && abs( q1 - q0 ) < beta )
491 int delta = x264_clip3( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
492 pix[-1*xstride] = x264_clip_uint8( p0 + delta ); /* p0' */
493 pix[ 0*xstride] = x264_clip_uint8( q0 - delta ); /* q0' */
499 static void deblock_v_chroma_c( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 )
501 deblock_chroma_c( pix, stride, 1, alpha, beta, tc0 );
503 static void deblock_h_chroma_c( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 )
505 deblock_chroma_c( pix, 1, stride, alpha, beta, tc0 );
508 static inline void deblock_luma_intra_c( uint8_t *pix, int xstride, int ystride, int alpha, int beta )
511 for( d = 0; d < 16; d++ )
513 const int p2 = pix[-3*xstride];
514 const int p1 = pix[-2*xstride];
515 const int p0 = pix[-1*xstride];
516 const int q0 = pix[ 0*xstride];
517 const int q1 = pix[ 1*xstride];
518 const int q2 = pix[ 2*xstride];
520 if( abs( p0 - q0 ) < alpha && abs( p1 - p0 ) < beta && abs( q1 - q0 ) < beta )
522 if(abs( p0 - q0 ) < ((alpha >> 2) + 2) )
524 if( abs( p2 - p0 ) < beta ) /* p0', p1', p2' */
526 const int p3 = pix[-4*xstride];
527 pix[-1*xstride] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
528 pix[-2*xstride] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
529 pix[-3*xstride] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
532 pix[-1*xstride] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
533 if( abs( q2 - q0 ) < beta ) /* q0', q1', q2' */
535 const int q3 = pix[3*xstride];
536 pix[0*xstride] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
537 pix[1*xstride] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
538 pix[2*xstride] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
541 pix[0*xstride] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
545 pix[-1*xstride] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
546 pix[ 0*xstride] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
552 static void deblock_v_luma_intra_c( uint8_t *pix, int stride, int alpha, int beta )
554 deblock_luma_intra_c( pix, stride, 1, alpha, beta );
556 static void deblock_h_luma_intra_c( uint8_t *pix, int stride, int alpha, int beta )
558 deblock_luma_intra_c( pix, 1, stride, alpha, beta );
561 static inline void deblock_chroma_intra_c( uint8_t *pix, int xstride, int ystride, int alpha, int beta )
564 for( d = 0; d < 8; d++ )
566 const int p1 = pix[-2*xstride];
567 const int p0 = pix[-1*xstride];
568 const int q0 = pix[ 0*xstride];
569 const int q1 = pix[ 1*xstride];
571 if( abs( p0 - q0 ) < alpha && abs( p1 - p0 ) < beta && abs( q1 - q0 ) < beta )
573 pix[-1*xstride] = (2*p1 + p0 + q1 + 2) >> 2; /* p0' */
574 pix[ 0*xstride] = (2*q1 + q0 + p1 + 2) >> 2; /* q0' */
579 static void deblock_v_chroma_intra_c( uint8_t *pix, int stride, int alpha, int beta )
581 deblock_chroma_intra_c( pix, stride, 1, alpha, beta );
583 static void deblock_h_chroma_intra_c( uint8_t *pix, int stride, int alpha, int beta )
585 deblock_chroma_intra_c( pix, 1, stride, alpha, beta );
588 static inline void deblock_edge( x264_t *h, uint8_t *pix1, uint8_t *pix2, int i_stride, uint8_t bS[4], int i_qp, int b_chroma, x264_deblock_inter_t pf_inter )
590 const int index_a = i_qp + h->sh.i_alpha_c0_offset;
591 const int alpha = alpha_table(index_a);
592 const int beta = beta_table(i_qp + h->sh.i_beta_offset);
595 if( !alpha || !beta )
598 tc[0] = tc0_table(index_a)[bS[0]] + b_chroma;
599 tc[1] = tc0_table(index_a)[bS[1]] + b_chroma;
600 tc[2] = tc0_table(index_a)[bS[2]] + b_chroma;
601 tc[3] = tc0_table(index_a)[bS[3]] + b_chroma;
603 pf_inter( pix1, i_stride, alpha, beta, tc );
605 pf_inter( pix2, i_stride, alpha, beta, tc );
608 static inline void deblock_edge_intra( x264_t *h, uint8_t *pix1, uint8_t *pix2, int i_stride, uint8_t bS[4], int i_qp, int b_chroma, x264_deblock_intra_t pf_intra )
610 const int alpha = alpha_table(i_qp + h->sh.i_alpha_c0_offset);
611 const int beta = beta_table(i_qp + h->sh.i_beta_offset);
613 if( !alpha || !beta )
616 pf_intra( pix1, i_stride, alpha, beta );
618 pf_intra( pix2, i_stride, alpha, beta );
621 void x264_frame_deblock_row( x264_t *h, int mb_y )
623 const int s8x8 = 2 * h->mb.i_mb_stride;
624 const int s4x4 = 4 * h->mb.i_mb_stride;
625 const int b_interlaced = h->sh.b_mbaff;
626 const int mvy_limit = 4 >> b_interlaced;
627 const int qp_thresh = 15 - X264_MIN(h->sh.i_alpha_c0_offset, h->sh.i_beta_offset) - X264_MAX(0, h->param.analyse.i_chroma_qp_offset);
628 const int no_sub8x8 = !(h->param.analyse.inter & X264_ANALYSE_PSUB8x8);
630 int stridey = h->fdec->i_stride[0];
631 int stride2y = stridey << b_interlaced;
632 int strideuv = h->fdec->i_stride[1];
633 int stride2uv = strideuv << b_interlaced;
635 if( !h->pps->b_cabac && h->pps->b_transform_8x8_mode )
636 munge_cavlc_nnz( h, mb_y, h->mb.nnz_backup, munge_cavlc_nnz_row );
638 for( mb_x = 0; mb_x < h->sps->i_mb_width; mb_x += (~b_interlaced | mb_y)&1, mb_y ^= b_interlaced )
640 const int mb_xy = mb_y * h->mb.i_mb_stride + mb_x;
641 const int mb_8x8 = 2 * s8x8 * mb_y + 2 * mb_x;
642 const int mb_4x4 = 4 * s4x4 * mb_y + 4 * mb_x;
643 const int b_8x8_transform = h->mb.mb_transform_size[mb_xy];
644 const int i_qp = h->mb.qp[mb_xy];
645 int i_edge_end = (h->mb.type[mb_xy] == P_SKIP) ? 1 : 4;
646 uint8_t *pixy = h->fdec->plane[0] + 16*mb_y*stridey + 16*mb_x;
647 uint8_t *pixu = h->fdec->plane[1] + 8*mb_y*strideuv + 8*mb_x;
648 uint8_t *pixv = h->fdec->plane[2] + 8*mb_y*strideuv + 8*mb_x;
649 if( b_interlaced && (mb_y&1) )
656 x264_prefetch_fenc( h, h->fdec, mb_x, mb_y );
658 if( i_qp <= qp_thresh )
661 #define FILTER_DIR(intra, i_dir)\
664 i_qpn= h->mb.qp[mbn_xy];\
668 deblock_edge##intra( h, pixy + 4*i_edge, NULL,\
669 stride2y, bS, (i_qp+i_qpn+1) >> 1, 0,\
670 h->loopf.deblock_h_luma##intra );\
674 int i_qpc = (h->chroma_qp_table[i_qp] + h->chroma_qp_table[i_qpn] + 1) >> 1;\
675 deblock_edge##intra( h, pixu + 2*i_edge, pixv + 2*i_edge,\
676 stride2uv, bS, i_qpc, 1,\
677 h->loopf.deblock_h_chroma##intra );\
682 /* horizontal edge */\
683 deblock_edge##intra( h, pixy + 4*i_edge*stride2y, NULL,\
684 stride2y, bS, (i_qp+i_qpn+1) >> 1, 0,\
685 h->loopf.deblock_v_luma##intra );\
689 int i_qpc = (h->chroma_qp_table[i_qp] + h->chroma_qp_table[i_qpn] + 1) >> 1;\
690 deblock_edge##intra( h, pixu + 2*i_edge*stride2uv, pixv + 2*i_edge*stride2uv,\
691 stride2uv, bS, i_qpc, 1,\
692 h->loopf.deblock_v_chroma##intra );\
697 #define DEBLOCK_STRENGTH(i_dir)\
699 /* *** Get bS for each 4px for the current edge *** */\
700 if( IS_INTRA( h->mb.type[mb_xy] ) || IS_INTRA( h->mb.type[mbn_xy]) )\
701 *(uint32_t*)bS = 0x03030303;\
704 *(uint32_t*)bS = 0x00000000;\
705 for( i = 0; i < 4; i++ )\
707 int x = i_dir == 0 ? i_edge : i;\
708 int y = i_dir == 0 ? i : i_edge;\
709 int xn = i_dir == 0 ? (x - 1)&0x03 : x;\
710 int yn = i_dir == 0 ? y : (y - 1)&0x03;\
711 if( h->mb.non_zero_count[mb_xy][x+y*4] != 0 ||\
712 h->mb.non_zero_count[mbn_xy][xn+yn*4] != 0 )\
714 else if(!(i_edge&no_sub8x8))\
716 if((i&no_sub8x8) && bS[i-1] != 2)\
720 /* FIXME: A given frame may occupy more than one position in\
721 * the reference list. So we should compare the frame numbers,\
722 * not the indices in the ref list.\
723 * No harm yet, as we don't generate that case.*/\
724 int i8p= mb_8x8+(x>>1)+(y>>1)*s8x8;\
725 int i8q= mbn_8x8+(xn>>1)+(yn>>1)*s8x8;\
726 int i4p= mb_4x4+x+y*s4x4;\
727 int i4q= mbn_4x4+xn+yn*s4x4;\
728 if((h->mb.ref[0][i8p] != h->mb.ref[0][i8q] ||\
729 abs( h->mb.mv[0][i4p][0] - h->mb.mv[0][i4q][0] ) >= 4 ||\
730 abs( h->mb.mv[0][i4p][1] - h->mb.mv[0][i4q][1] ) >= mvy_limit ) ||\
731 (h->sh.i_type == SLICE_TYPE_B &&\
732 (h->mb.ref[1][i8p] != h->mb.ref[1][i8q] ||\
733 abs( h->mb.mv[1][i4p][0] - h->mb.mv[1][i4q][0] ) >= 4 ||\
734 abs( h->mb.mv[1][i4p][1] - h->mb.mv[1][i4q][1] ) >= mvy_limit )))\
744 /* i_dir == 0 -> vertical edge
745 * i_dir == 1 -> horizontal edge */
746 #define DEBLOCK_DIR(i_dir)\
748 int i_edge = (i_dir ? (mb_y <= b_interlaced) : (mb_x == 0));\
749 int i_qpn, i, mbn_xy, mbn_8x8, mbn_4x4;\
750 DECLARE_ALIGNED_4( uint8_t bS[4] ); /* filtering strength */\
752 i_edge+= b_8x8_transform;\
755 mbn_xy = i_dir == 0 ? mb_xy - 1 : mb_xy - h->mb.i_mb_stride;\
756 mbn_8x8 = i_dir == 0 ? mb_8x8 - 2 : mb_8x8 - 2 * s8x8;\
757 mbn_4x4 = i_dir == 0 ? mb_4x4 - 4 : mb_4x4 - 4 * s4x4;\
758 if( b_interlaced && i_dir == 1 )\
760 mbn_xy -= h->mb.i_mb_stride;\
761 mbn_8x8 -= 2 * s8x8;\
762 mbn_4x4 -= 4 * s4x4;\
764 else if( IS_INTRA( h->mb.type[mb_xy] ) || IS_INTRA( h->mb.type[mbn_xy]) )\
766 FILTER_DIR( _intra, i_dir );\
769 DEBLOCK_STRENGTH(i_dir);\
770 if( *(uint32_t*)bS )\
771 FILTER_DIR( , i_dir);\
773 i_edge += b_8x8_transform+1;\
778 for( ; i_edge < i_edge_end; i_edge+=b_8x8_transform+1 )\
780 DEBLOCK_STRENGTH(i_dir);\
781 if( *(uint32_t*)bS )\
782 FILTER_DIR( , i_dir);\
790 if( !h->pps->b_cabac && h->pps->b_transform_8x8_mode )
791 munge_cavlc_nnz( h, mb_y, h->mb.nnz_backup, restore_cavlc_nnz_row );
794 void x264_frame_deblock( x264_t *h )
797 for( mb_y = 0; mb_y < h->sps->i_mb_height; mb_y += 1 + h->sh.b_mbaff )
798 x264_frame_deblock_row( h, mb_y );
802 void x264_deblock_v_chroma_mmxext( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 );
803 void x264_deblock_h_chroma_mmxext( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 );
804 void x264_deblock_v_chroma_intra_mmxext( uint8_t *pix, int stride, int alpha, int beta );
805 void x264_deblock_h_chroma_intra_mmxext( uint8_t *pix, int stride, int alpha, int beta );
807 void x264_deblock_v_luma_sse2( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 );
808 void x264_deblock_h_luma_sse2( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 );
809 void x264_deblock_v_luma_intra_sse2( uint8_t *pix, int stride, int alpha, int beta );
810 void x264_deblock_h_luma_intra_sse2( uint8_t *pix, int stride, int alpha, int beta );
812 void x264_deblock_h_luma_mmxext( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 );
813 void x264_deblock_v8_luma_mmxext( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 );
814 void x264_deblock_h_luma_intra_mmxext( uint8_t *pix, int stride, int alpha, int beta );
815 void x264_deblock_v8_luma_intra_mmxext( uint8_t *pix, int stride, int alpha, int beta );
817 static void x264_deblock_v_luma_mmxext( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 )
819 x264_deblock_v8_luma_mmxext( pix, stride, alpha, beta, tc0 );
820 x264_deblock_v8_luma_mmxext( pix+8, stride, alpha, beta, tc0+2 );
822 static void x264_deblock_v_luma_intra_mmxext( uint8_t *pix, int stride, int alpha, int beta )
824 x264_deblock_v8_luma_intra_mmxext( pix, stride, alpha, beta );
825 x264_deblock_v8_luma_intra_mmxext( pix+8, stride, alpha, beta );
831 void x264_deblock_v_luma_altivec( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 );
832 void x264_deblock_h_luma_altivec( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 );
835 void x264_deblock_init( int cpu, x264_deblock_function_t *pf )
837 pf->deblock_v_luma = deblock_v_luma_c;
838 pf->deblock_h_luma = deblock_h_luma_c;
839 pf->deblock_v_chroma = deblock_v_chroma_c;
840 pf->deblock_h_chroma = deblock_h_chroma_c;
841 pf->deblock_v_luma_intra = deblock_v_luma_intra_c;
842 pf->deblock_h_luma_intra = deblock_h_luma_intra_c;
843 pf->deblock_v_chroma_intra = deblock_v_chroma_intra_c;
844 pf->deblock_h_chroma_intra = deblock_h_chroma_intra_c;
847 if( cpu&X264_CPU_MMXEXT )
849 pf->deblock_v_chroma = x264_deblock_v_chroma_mmxext;
850 pf->deblock_h_chroma = x264_deblock_h_chroma_mmxext;
851 pf->deblock_v_chroma_intra = x264_deblock_v_chroma_intra_mmxext;
852 pf->deblock_h_chroma_intra = x264_deblock_h_chroma_intra_mmxext;
854 pf->deblock_v_luma = x264_deblock_v_luma_mmxext;
855 pf->deblock_h_luma = x264_deblock_h_luma_mmxext;
856 pf->deblock_v_luma_intra = x264_deblock_v_luma_intra_mmxext;
857 pf->deblock_h_luma_intra = x264_deblock_h_luma_intra_mmxext;
859 if( (cpu&X264_CPU_SSE2) && !(cpu&X264_CPU_STACK_MOD4) )
861 pf->deblock_v_luma = x264_deblock_v_luma_sse2;
862 pf->deblock_h_luma = x264_deblock_h_luma_sse2;
863 pf->deblock_v_luma_intra = x264_deblock_v_luma_intra_sse2;
864 pf->deblock_h_luma_intra = x264_deblock_h_luma_intra_sse2;
870 if( cpu&X264_CPU_ALTIVEC )
872 pf->deblock_v_luma = x264_deblock_v_luma_altivec;
873 pf->deblock_h_luma = x264_deblock_h_luma_altivec;
880 void x264_frame_cond_broadcast( x264_frame_t *frame, int i_lines_completed )
882 x264_pthread_mutex_lock( &frame->mutex );
883 frame->i_lines_completed = i_lines_completed;
884 x264_pthread_cond_broadcast( &frame->cv );
885 x264_pthread_mutex_unlock( &frame->mutex );
888 void x264_frame_cond_wait( x264_frame_t *frame, int i_lines_completed )
890 x264_pthread_mutex_lock( &frame->mutex );
891 while( frame->i_lines_completed < i_lines_completed )
892 x264_pthread_cond_wait( &frame->cv, &frame->mutex );
893 x264_pthread_mutex_unlock( &frame->mutex );
898 void x264_frame_push( x264_frame_t **list, x264_frame_t *frame )
901 while( list[i] ) i++;
905 x264_frame_t *x264_frame_pop( x264_frame_t **list )
910 while( list[i+1] ) i++;
916 void x264_frame_unshift( x264_frame_t **list, x264_frame_t *frame )
919 while( list[i] ) i++;
925 x264_frame_t *x264_frame_shift( x264_frame_t **list )
927 x264_frame_t *frame = list[0];
929 for( i = 0; list[i]; i++ )
935 void x264_frame_push_unused( x264_t *h, x264_frame_t *frame )
937 assert( frame->i_reference_count > 0 );
938 frame->i_reference_count--;
939 if( frame->i_reference_count == 0 )
940 x264_frame_push( h->frames.unused, frame );
941 assert( h->frames.unused[ sizeof(h->frames.unused) / sizeof(*h->frames.unused) - 1 ] == NULL );
944 x264_frame_t *x264_frame_pop_unused( x264_t *h )
947 if( h->frames.unused[0] )
948 frame = x264_frame_pop( h->frames.unused );
950 frame = x264_frame_new( h );
951 assert( frame->i_reference_count == 0 );
952 frame->i_reference_count = 1;
953 frame->b_intra_calculated = 0;
957 void x264_frame_sort( x264_frame_t **list, int b_dts )
962 for( i = 0; list[i+1]; i++ )
964 int dtype = list[i]->i_type - list[i+1]->i_type;
965 int dtime = list[i]->i_frame - list[i+1]->i_frame;
966 int swap = b_dts ? dtype > 0 || ( dtype == 0 && dtime > 0 )
970 XCHG( x264_frame_t*, list[i], list[i+1] );