*
* Authors: Laurent Aimar <fenrir@via.ecp.fr>
* Loren Merritt <lorenm@u.washington.edu>
+ * Fiona Glaser <fiona@x264.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
int i_stride, i_width, i_lines;
int i_padv = PADV << h->param.b_interlaced;
int luma_plane_size;
+ int chroma_plane_size;
int align = h->param.cpu&X264_CPU_CACHELINE_64 ? 64 : h->param.cpu&X264_CPU_CACHELINE_32 ? 32 : 16;
if( !frame ) return NULL;
frame->i_plane = 3;
for( i = 0; i < 3; i++ )
{
- frame->i_stride[i] = i_stride >> !!i;
+ frame->i_stride[i] = ALIGN( i_stride >> !!i, 16 );
frame->i_width[i] = i_width >> !!i;
frame->i_lines[i] = i_lines >> !!i;
}
luma_plane_size = (frame->i_stride[0] * ( frame->i_lines[0] + 2*i_padv ));
+ chroma_plane_size = (frame->i_stride[1] * ( frame->i_lines[1] + 2*i_padv ));
for( i = 1; i < 3; i++ )
{
- CHECKED_MALLOC( frame->buffer[i], luma_plane_size/4 );
+ CHECKED_MALLOC( frame->buffer[i], chroma_plane_size );
frame->plane[i] = frame->buffer[i] + (frame->i_stride[i] * i_padv + PADH)/2;
}
/* all 4 luma planes allocated together, since the cacheline split code
* requires them to be in-phase wrt cacheline alignment. */
- CHECKED_MALLOC( frame->buffer[0], 4*luma_plane_size);
- for( i = 0; i < 4; i++ )
- frame->filtered[i] = frame->buffer[0] + i*luma_plane_size + frame->i_stride[0] * i_padv + PADH;
- frame->plane[0] = frame->filtered[0];
+ if( h->param.analyse.i_subpel_refine )
+ {
+ CHECKED_MALLOC( frame->buffer[0], 4*luma_plane_size);
+ for( i = 0; i < 4; i++ )
+ frame->filtered[i] = frame->buffer[0] + i*luma_plane_size + frame->i_stride[0] * i_padv + PADH;
+ frame->plane[0] = frame->filtered[0];
+ }
+ else
+ {
+ CHECKED_MALLOC( frame->buffer[0], luma_plane_size);
+ frame->plane[0] = frame->buffer[0] + frame->i_stride[0] * i_padv + PADH;
+ }
if( h->frames.b_have_lowres )
{
CHECKED_MALLOC( frame->buffer_lowres[0], 4 * luma_plane_size );
for( i = 0; i < 4; i++ )
frame->lowres[i] = frame->buffer_lowres[0] + (frame->i_stride_lowres * i_padv + PADH) + i * luma_plane_size;
+
+ for( j = 0; j <= !!h->param.i_bframe; j++ )
+ for( i = 0; i <= h->param.i_bframe; i++ )
+ {
+ CHECKED_MALLOC( frame->lowres_mvs[j][i], 2*h->mb.i_mb_count*sizeof(int16_t) );
+ memset( frame->lowres_mvs[j][i], 0, 2*h->mb.i_mb_count*sizeof(int16_t) );
+ CHECKED_MALLOC( frame->lowres_mv_costs[j][i], h->mb.i_mb_count*sizeof(int) );
+ }
}
if( h->param.analyse.i_me_method >= X264_ME_ESA )
{
CHECKED_MALLOC( frame->buffer[3],
- 2 * frame->i_stride[0] * (frame->i_lines[0] + 2*i_padv) * sizeof(uint16_t) );
+ frame->i_stride[0] * (frame->i_lines[0] + 2*i_padv) * sizeof(uint16_t) << h->frames.b_have_sub8x8_esa );
frame->integral = (uint16_t*)frame->buffer[3] + frame->i_stride[0] * i_padv + PADH;
}
CHECKED_MALLOC( frame->i_row_satds[i][j], i_lines/16 * sizeof(int) );
if( h->param.rc.i_aq_mode )
+ {
CHECKED_MALLOC( frame->f_qp_offset, h->mb.i_mb_count * sizeof(float) );
+ if( h->frames.b_have_lowres )
+ CHECKED_MALLOC( frame->i_inv_qscale_factor, h->mb.i_mb_count * sizeof(uint16_t) );
+ }
x264_pthread_mutex_init( &frame->mutex, NULL );
x264_pthread_cond_init( &frame->cv, NULL );
for( i = 0; i < X264_BFRAME_MAX+2; i++ )
for( j = 0; j < X264_BFRAME_MAX+2; j++ )
x264_free( frame->i_row_satds[i][j] );
+ for( j = 0; j < 2; j++ )
+ for( i = 0; i <= X264_BFRAME_MAX; i++ )
+ {
+ x264_free( frame->lowres_mvs[j][i] );
+ x264_free( frame->lowres_mv_costs[j][i] );
+ }
+ x264_free( frame->f_qp_offset );
+ x264_free( frame->i_inv_qscale_factor );
+ x264_free( frame->i_intra_cost );
x264_free( frame->i_row_bits );
x264_free( frame->i_row_qp );
x264_free( frame->mb_type );
const int b_interlaced = h->sh.b_mbaff;
const int mvy_limit = 4 >> b_interlaced;
const int qp_thresh = 15 - X264_MIN(h->sh.i_alpha_c0_offset, h->sh.i_beta_offset) - X264_MAX(0, h->param.analyse.i_chroma_qp_offset);
+ const int no_sub8x8 = !(h->param.analyse.inter & X264_ANALYSE_PSUB8x8);
int mb_x;
int stridey = h->fdec->i_stride[0];
int stride2y = stridey << b_interlaced;
if( h->mb.non_zero_count[mb_xy][x+y*4] != 0 ||\
h->mb.non_zero_count[mbn_xy][xn+yn*4] != 0 )\
bS[i] = 2;\
- else\
+ else if(!(i_edge&no_sub8x8))\
{\
- /* FIXME: A given frame may occupy more than one position in\
- * the reference list. So we should compare the frame numbers,\
- * not the indices in the ref list.\
- * No harm yet, as we don't generate that case.*/\
- int i8p= mb_8x8+(x>>1)+(y>>1)*s8x8;\
- int i8q= mbn_8x8+(xn>>1)+(yn>>1)*s8x8;\
- int i4p= mb_4x4+x+y*s4x4;\
- int i4q= mbn_4x4+xn+yn*s4x4;\
- for( l = 0; l < 1 + (h->sh.i_type == SLICE_TYPE_B); l++ )\
- if( h->mb.ref[l][i8p] != h->mb.ref[l][i8q] ||\
- abs( h->mb.mv[l][i4p][0] - h->mb.mv[l][i4q][0] ) >= 4 ||\
- abs( h->mb.mv[l][i4p][1] - h->mb.mv[l][i4q][1] ) >= mvy_limit )\
+ if((i&no_sub8x8) && bS[i-1] != 2)\
+ bS[i] = bS[i-1];\
+ else\
+ {\
+ /* FIXME: A given frame may occupy more than one position in\
+ * the reference list. So we should compare the frame numbers,\
+ * not the indices in the ref list.\
+ * No harm yet, as we don't generate that case.*/\
+ int i8p= mb_8x8+(x>>1)+(y>>1)*s8x8;\
+ int i8q= mbn_8x8+(xn>>1)+(yn>>1)*s8x8;\
+ int i4p= mb_4x4+x+y*s4x4;\
+ int i4q= mbn_4x4+xn+yn*s4x4;\
+ if((h->mb.ref[0][i8p] != h->mb.ref[0][i8q] ||\
+ abs( h->mb.mv[0][i4p][0] - h->mb.mv[0][i4q][0] ) >= 4 ||\
+ abs( h->mb.mv[0][i4p][1] - h->mb.mv[0][i4q][1] ) >= mvy_limit ) ||\
+ (h->sh.i_type == SLICE_TYPE_B &&\
+ (h->mb.ref[1][i8p] != h->mb.ref[1][i8q] ||\
+ abs( h->mb.mv[1][i4p][0] - h->mb.mv[1][i4q][0] ) >= 4 ||\
+ abs( h->mb.mv[1][i4p][1] - h->mb.mv[1][i4q][1] ) >= mvy_limit )))\
{\
bS[i] = 1;\
- break;\
}\
+ }\
}\
}\
}\
#define DEBLOCK_DIR(i_dir)\
{\
int i_edge = (i_dir ? (mb_y <= b_interlaced) : (mb_x == 0));\
- int i_qpn, i, l, mbn_xy, mbn_8x8, mbn_4x4;\
+ int i_qpn, i, mbn_xy, mbn_8x8, mbn_4x4;\
DECLARE_ALIGNED_4( uint8_t bS[4] ); /* filtering strength */\
if( i_edge )\
i_edge+= b_8x8_transform;\