int i_refc = h->mb.cache.ref[i_list][i8 - 8 + i_width ];
int16_t *mv_c = h->mb.cache.mv[i_list][i8 - 8 + i_width];
- int i_count;
+ int i_count = 0;
- if( (idx&0x03) == 3 || ( i_width == 2 && (idx&0x3) == 2 )|| i_refc == -2 )
+ if( (idx&3) >= 2 + (i_width&1) || i_refc == -2 )
{
i_refc = h->mb.cache.ref[i_list][i8 - 8 - 1];
mv_c = h->mb.cache.mv[i_list][i8 - 8 - 1];
if( h->mb.i_partition == D_16x8 )
{
- if( idx == 0 && i_refb == i_ref )
+ if( idx == 0 )
{
- *(uint32_t*)mvp = *(uint32_t*)mv_b;
- return;
+ if( i_refb == i_ref )
+ {
+ *(uint32_t*)mvp = *(uint32_t*)mv_b;
+ return;
+ }
}
- else if( idx != 0 && i_refa == i_ref )
+ else
{
- *(uint32_t*)mvp = *(uint32_t*)mv_a;
- return;
+ if( i_refa == i_ref )
+ {
+ *(uint32_t*)mvp = *(uint32_t*)mv_a;
+ return;
+ }
}
}
else if( h->mb.i_partition == D_8x16 )
{
- if( idx == 0 && i_refa == i_ref )
+ if( idx == 0 )
{
- *(uint32_t*)mvp = *(uint32_t*)mv_a;
- return;
+ if( i_refa == i_ref )
+ {
+ *(uint32_t*)mvp = *(uint32_t*)mv_a;
+ return;
+ }
}
- else if( idx != 0 && i_refc == i_ref )
+ else
{
- *(uint32_t*)mvp = *(uint32_t*)mv_c;
- return;
+ if( i_refc == i_ref )
+ {
+ *(uint32_t*)mvp = *(uint32_t*)mv_c;
+ return;
+ }
}
}
- i_count = 0;
if( i_refa == i_ref ) i_count++;
if( i_refb == i_ref ) i_count++;
if( i_refc == i_ref ) i_count++;
if( i_count > 1 )
+ {
+median:
x264_median_mv( mvp, mv_a, mv_b, mv_c );
+ }
else if( i_count == 1 )
{
if( i_refa == i_ref )
else if( i_refb == -2 && i_refc == -2 && i_refa != -2 )
*(uint32_t*)mvp = *(uint32_t*)mv_a;
else
- x264_median_mv( mvp, mv_a, mv_b, mv_c );
+ goto median;
}
void x264_mb_predict_mv_16x16( x264_t *h, int i_list, int i_ref, int16_t mvp[2] )
int i_refc = h->mb.cache.ref[i_list][X264_SCAN8_0 - 8 + 4];
int16_t *mv_c = h->mb.cache.mv[i_list][X264_SCAN8_0 - 8 + 4];
- int i_count;
+ int i_count = 0;
if( i_refc == -2 )
{
mv_c = h->mb.cache.mv[i_list][X264_SCAN8_0 - 8 - 1];
}
- i_count = 0;
if( i_refa == i_ref ) i_count++;
if( i_refb == i_ref ) i_count++;
if( i_refc == i_ref ) i_count++;
if( i_count > 1 )
+ {
+median:
x264_median_mv( mvp, mv_a, mv_b, mv_c );
+ }
else if( i_count == 1 )
{
if( i_refa == i_ref )
else if( i_refb == -2 && i_refc == -2 && i_refa != -2 )
*(uint32_t*)mvp = *(uint32_t*)mv_a;
else
- x264_median_mv( mvp, mv_a, mv_b, mv_c );
+ goto median;
}
{
int i_mb_4x4 = 16 * h->mb.i_mb_stride * h->mb.i_mb_y + 4 * h->mb.i_mb_x;
int i_mb_8x8 = 4 * h->mb.i_mb_stride * h->mb.i_mb_y + 2 * h->mb.i_mb_x;
- int i8, i4;
+ int i8;
const int type_col = h->fref1[0]->mb_type[ h->mb.i_mb_xy ];
x264_macroblock_cache_ref( h, 0, 0, 4, 4, 1, 0 );
const int16_t *mv_col = h->fref1[0]->mv[0][ i_mb_4x4 + 3*x8 + 3*y8 * h->mb.i_b4_stride];
const int l0x = ( dist_scale_factor * mv_col[0] + 128 ) >> 8;
const int l0y = ( dist_scale_factor * mv_col[1] + 128 ) >> 8;
+ if( h->param.i_threads > 1 && (l0y > h->mb.mv_max_spel[1] || l0y-mv_col[1] > h->mb.mv_max_spel[1]) )
+ return 0;
x264_macroblock_cache_ref( h, 2*x8, 2*y8, 2, 2, 0, i_ref );
x264_macroblock_cache_mv( h, 2*x8, 2*y8, 2, 2, 0, pack16to32_mask(l0x, l0y) );
x264_macroblock_cache_mv( h, 2*x8, 2*y8, 2, 2, 1, pack16to32_mask(l0x-mv_col[0], l0y-mv_col[1]) );
}
}
- if( h->param.i_threads > 1 )
- {
- for( i4=0; i4<16; i4+=4 )
- {
- if( h->mb.cache.mv[0][x264_scan8[i4]][1] > h->mb.mv_max_spel[1]
- || h->mb.cache.mv[1][x264_scan8[i4]][1] > h->mb.mv_max_spel[1] )
- {
-#if 0
- fprintf(stderr, "direct_temporal: (%d,%d) (%d,%d) > %d \n",
- h->mb.cache.mv[0][x264_scan8[i4]][0],
- h->mb.cache.mv[0][x264_scan8[i4]][1],
- h->mb.cache.mv[1][x264_scan8[i4]][0],
- h->mb.cache.mv[1][x264_scan8[i4]][1],
- h->mb.mv_max_spel[1]);
-#endif
- return 0;
- }
- }
- }
-
return 1;
}
static int x264_mb_predict_mv_direct16x16_spatial( x264_t *h )
{
int ref[2];
- DECLARE_ALIGNED_8( int16_t mv[2][2] );
+ ALIGNED_8( int16_t mv[2][2] );
int i_list;
int i8;
const int8_t *l1ref0 = &h->fref1[0]->ref[0][ h->mb.i_b8_xy ];
const int x8 = i8%2;
const int y8 = i8/2;
const int o8 = x8 + y8 * h->mb.i_b8_stride;
- if( l1ref0[o8] == 0 || ( l1ref0[o8] < 0 && l1ref1[o8] == 0 ) )
+ const int o4 = 3*(x8 + y8 * h->mb.i_b4_stride);
+ if( l1ref0[o8] == 0 )
+ {
+ if( abs( l1mv0[o4][0] ) <= 1 && abs( l1mv0[o4][1] ) <= 1 )
+ {
+ if( ref[0] == 0 ) x264_macroblock_cache_mv( h, 2*x8, 2*y8, 2, 2, 0, 0 );
+ if( ref[1] == 0 ) x264_macroblock_cache_mv( h, 2*x8, 2*y8, 2, 2, 1, 0 );
+ }
+ }
+ else if( l1ref0[o8] < 0 && l1ref1[o8] == 0 )
{
- const int16_t (*l1mv)[2] = (l1ref0[o8] == 0) ? l1mv0 : l1mv1;
- const int16_t *mvcol = l1mv[3*x8 + 3*y8 * h->mb.i_b4_stride];
- if( abs( mvcol[0] ) <= 1 && abs( mvcol[1] ) <= 1 )
+ if( abs( l1mv1[o4][0] ) <= 1 && abs( l1mv1[o4][1] ) <= 1 )
{
- if( ref[0] == 0 )
- x264_macroblock_cache_mv( h, 2*x8, 2*y8, 2, 2, 0, 0 );
- if( ref[1] == 0 )
- x264_macroblock_cache_mv( h, 2*x8, 2*y8, 2, 2, 1, 0 );
+ if( ref[0] == 0 ) x264_macroblock_cache_mv( h, 2*x8, 2*y8, 2, 2, 0, 0 );
+ if( ref[1] == 0 ) x264_macroblock_cache_mv( h, 2*x8, 2*y8, 2, 2, 1, 0 );
}
}
}
int mvy1 = x264_clip3( h->mb.cache.mv[1][i8][1], h->mb.mv_min[1], h->mb.mv_max[1] );
int i_mode = x264_size2pixel[height][width];
int i_stride0 = 16, i_stride1 = 16;
- DECLARE_ALIGNED_16( uint8_t tmp0[16*16] );
- DECLARE_ALIGNED_16( uint8_t tmp1[16*16] );
+ ALIGNED_ARRAY_16( uint8_t, tmp0,[16*16] );
+ ALIGNED_ARRAY_16( uint8_t, tmp1,[16*16] );
uint8_t *src0, *src1;
src0 = h->mc.get_ref( tmp0, &i_stride0, h->mb.pic.p_fref[0][i_ref0], h->mb.pic.i_stride[0],
for( i=0; i<=h->param.b_interlaced; i++ )
for( j=0; j<3; j++ )
{
- CHECKED_MALLOC( h->mb.intra_border_backup[i][j], h->fdec->i_stride[j] );
+ /* shouldn't really be initialized, just silences a valgrind false-positive in predict_8x8_filter_mmx */
+ CHECKED_MALLOCZERO( h->mb.intra_border_backup[i][j], h->fdec->i_stride[j] );
h->mb.intra_border_backup[i][j] += 8;
}
int me_range = X264_MIN(h->param.analyse.i_me_range, h->param.analyse.i_mv_range);
int buf_tesa = (h->param.analyse.i_me_method >= X264_ME_ESA) *
((me_range*2+18) * sizeof(int16_t) + (me_range+4) * (me_range+1) * 4 * sizeof(mvsad_t));
- CHECKED_MALLOC( h->scratch_buffer, X264_MAX3( buf_hpel, buf_ssim, buf_tesa ) );
+ int buf_mbtree = h->param.rc.b_mb_tree * ((h->sps->i_mb_width+3)&~3) * sizeof(int);
+ CHECKED_MALLOC( h->scratch_buffer, X264_MAX4( buf_hpel, buf_ssim, buf_tesa, buf_mbtree ) );
return 0;
fail: return -1;
{
h->mb.i_mb_type_top =
i_top_type= h->mb.type[i_top_xy];
+ h->mb.cache.i_cbp_top = h->mb.cbp[i_top_xy];
h->mb.i_neighbour |= MB_TOP;
else
{
h->mb.i_mb_type_top = -1;
+ h->mb.cache.i_cbp_top = -1;
/* load intra4x4 */
h->mb.cache.intra4x4_pred_mode[x264_scan8[0] - 8] =
i_left_xy = i_mb_xy - 1;
h->mb.i_mb_type_left =
i_left_type = h->mb.type[i_left_xy];
+ h->mb.cache.i_cbp_left = h->mb.cbp[h->mb.i_mb_xy - 1];
h->mb.i_neighbour |= MB_LEFT;
else
{
h->mb.i_mb_type_left = -1;
+ h->mb.cache.i_cbp_left = -1;
h->mb.cache.intra4x4_pred_mode[x264_scan8[0 ] - 1] =
h->mb.cache.intra4x4_pred_mode[x264_scan8[2 ] - 1] =