/*****************************************************************************
* macroblock.c: macroblock common functions
*****************************************************************************
- * Copyright (C) 2003-2013 x264 project
+ * Copyright (C) 2003-2016 x264 project
*
* Authors: Fiona Glaser <fiona@x264.com>
* Laurent Aimar <fenrir@via.ecp.fr>
* Loren Merritt <lorenm@u.washington.edu>
- * Henrik Gramner <hengar-6@student.ltu.se>
+ * Henrik Gramner <henrik@gramner.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
h->mb.b_interlaced = PARAM_INTERLACED;
- CHECKED_MALLOC( h->mb.qp, i_mb_count * sizeof(int8_t) );
- CHECKED_MALLOC( h->mb.cbp, i_mb_count * sizeof(int16_t) );
- CHECKED_MALLOC( h->mb.mb_transform_size, i_mb_count * sizeof(int8_t) );
- CHECKED_MALLOC( h->mb.slice_table, i_mb_count * sizeof(uint16_t) );
- memset( h->mb.slice_table, -1, i_mb_count * sizeof(uint16_t) );
+ PREALLOC_INIT
+
+ PREALLOC( h->mb.qp, i_mb_count * sizeof(int8_t) );
+ PREALLOC( h->mb.cbp, i_mb_count * sizeof(int16_t) );
+ PREALLOC( h->mb.mb_transform_size, i_mb_count * sizeof(int8_t) );
+ PREALLOC( h->mb.slice_table, i_mb_count * sizeof(uint16_t) );
/* 0 -> 3 top(4), 4 -> 6 : left(3) */
- CHECKED_MALLOC( h->mb.intra4x4_pred_mode, i_mb_count * 8 * sizeof(int8_t) );
+ PREALLOC( h->mb.intra4x4_pred_mode, i_mb_count * 8 * sizeof(int8_t) );
/* all coeffs */
- CHECKED_MALLOC( h->mb.non_zero_count, i_mb_count * 48 * sizeof(uint8_t) );
+ PREALLOC( h->mb.non_zero_count, i_mb_count * 48 * sizeof(uint8_t) );
if( h->param.b_cabac )
{
- CHECKED_MALLOC( h->mb.skipbp, i_mb_count * sizeof(int8_t) );
- CHECKED_MALLOC( h->mb.chroma_pred_mode, i_mb_count * sizeof(int8_t) );
- CHECKED_MALLOC( h->mb.mvd[0], i_mb_count * sizeof( **h->mb.mvd ) );
+ PREALLOC( h->mb.skipbp, i_mb_count * sizeof(int8_t) );
+ PREALLOC( h->mb.chroma_pred_mode, i_mb_count * sizeof(int8_t) );
+ PREALLOC( h->mb.mvd[0], i_mb_count * sizeof( **h->mb.mvd ) );
if( h->param.i_bframe )
- CHECKED_MALLOC( h->mb.mvd[1], i_mb_count * sizeof( **h->mb.mvd ) );
+ PREALLOC( h->mb.mvd[1], i_mb_count * sizeof( **h->mb.mvd ) );
}
for( int i = 0; i < 2; i++ )
i_refs = X264_MIN(X264_REF_MAX, i_refs + 1 + (BIT_DEPTH == 8)); //smart weights add two duplicate frames, one in >8-bit
for( int j = !i; j < i_refs; j++ )
- {
- CHECKED_MALLOC( h->mb.mvr[i][j], 2 * (i_mb_count + 1) * sizeof(int16_t) );
- M32( h->mb.mvr[i][j][0] ) = 0;
- h->mb.mvr[i][j]++;
- }
+ PREALLOC( h->mb.mvr[i][j], 2 * (i_mb_count + 1) * sizeof(int16_t) );
}
if( h->param.analyse.i_weighted_pred )
}
for( int i = 0; i < numweightbuf; i++ )
- CHECKED_MALLOC( h->mb.p_weight_buf[i], luma_plane_size * sizeof(pixel) );
+ PREALLOC( h->mb.p_weight_buf[i], luma_plane_size * sizeof(pixel) );
+ }
+
+ PREALLOC_END( h->mb.base );
+
+ memset( h->mb.slice_table, -1, i_mb_count * sizeof(uint16_t) );
+
+ for( int i = 0; i < 2; i++ )
+ {
+ int i_refs = X264_MIN(X264_REF_MAX, (i ? 1 + !!h->param.i_bframe_pyramid : h->param.i_frame_reference) ) << PARAM_INTERLACED;
+ if( h->param.analyse.i_weighted_pred == X264_WEIGHTP_SMART )
+ i_refs = X264_MIN(X264_REF_MAX, i_refs + 1 + (BIT_DEPTH == 8)); //smart weights add two duplicate frames, one in >8-bit
+
+ for( int j = !i; j < i_refs; j++ )
+ {
+ M32( h->mb.mvr[i][j][0] ) = 0;
+ h->mb.mvr[i][j]++;
+ }
}
return 0;
}
void x264_macroblock_cache_free( x264_t *h )
{
- for( int i = 0; i < 2; i++ )
- for( int j = !i; j < X264_REF_MAX*2; j++ )
- if( h->mb.mvr[i][j] )
- x264_free( h->mb.mvr[i][j]-1 );
- for( int i = 0; i < X264_REF_MAX; i++ )
- x264_free( h->mb.p_weight_buf[i] );
-
- if( h->param.b_cabac )
- {
- x264_free( h->mb.skipbp );
- x264_free( h->mb.chroma_pred_mode );
- x264_free( h->mb.mvd[0] );
- x264_free( h->mb.mvd[1] );
- }
- x264_free( h->mb.slice_table );
- x264_free( h->mb.intra4x4_pred_mode );
- x264_free( h->mb.non_zero_count );
- x264_free( h->mb.mb_transform_size );
- x264_free( h->mb.cbp );
- x264_free( h->mb.qp );
+ x264_free( h->mb.base );
}
int x264_macroblock_thread_allocate( x264_t *h, int b_lookahead )
((me_range*2+24) * sizeof(int16_t) + (me_range+4) * (me_range+1) * 4 * sizeof(mvsad_t));
scratch_size = X264_MAX3( buf_hpel, buf_ssim, buf_tesa );
}
- int buf_mbtree = h->param.rc.b_mb_tree * ((h->mb.i_mb_width+7)&~7) * sizeof(int);
+ int buf_mbtree = h->param.rc.b_mb_tree * ((h->mb.i_mb_width+7)&~7) * sizeof(int16_t);
scratch_size = X264_MAX( scratch_size, buf_mbtree );
if( scratch_size )
CHECKED_MALLOC( h->scratch_buffer, scratch_size );
h->scratch_buffer = NULL;
int buf_lookahead_threads = (h->mb.i_mb_height + (4 + 32) * h->param.i_lookahead_threads) * sizeof(int) * 2;
- CHECKED_MALLOC( h->scratch_buffer2, buf_lookahead_threads );
+ int buf_mbtree2 = buf_mbtree * 12; /* size of the internal propagate_list asm buffer */
+ scratch_size = X264_MAX( buf_lookahead_threads, buf_mbtree2 );
+ CHECKED_MALLOC( h->scratch_buffer2, scratch_size );
return 0;
fail:
{
// Looking at the bottom field so always take the bottom macroblock of the pair.
h->mb.cache.topright_ref[l][0] = ref[h->mb.left_b8[0] + 1 + s8x8*2 + s8x8*left_index_table->ref[0]];
- h->mb.cache.topright_ref[l][1] = ref[h->mb.left_b8[0] + 1 + s8x8*2 + s8x8*left_index_table->ref[0]];
+ h->mb.cache.topright_ref[l][1] = ref[h->mb.left_b8[0] + 1 + s8x8*2 + s8x8*left_index_table->ref[1]];
h->mb.cache.topright_ref[l][2] = ref[h->mb.left_b8[0] + 1 + s8x8*2 + s8x8*left_index_table->ref[2]];
CP32( h->mb.cache.topright_mv[l][0], mv[h->mb.left_b4[0] + 3 + s4x4*4 + s4x4*left_index_table->mv[0]] );
CP32( h->mb.cache.topright_mv[l][1], mv[h->mb.left_b4[0] + 3 + s4x4*4 + s4x4*left_index_table->mv[1]] );
}
}
- if( b_mbaff && mb_x == 0 && !(mb_y&1) && mb_y > 0 )
- h->mb.field_decoding_flag = h->mb.field[h->mb.i_mb_xy - h->mb.i_mb_stride];
+ if( b_mbaff && mb_x == 0 && !(mb_y&1) )
+ {
+ if( h->mb.i_mb_top_xy >= h->sh.i_first_mb )
+ h->mb.field_decoding_flag = h->mb.field[h->mb.i_mb_top_xy];
+ else
+ h->mb.field_decoding_flag = 0;
+ }
/* Check whether skip here would cause decoder to predict interlace mode incorrectly.
* FIXME: It might be better to change the interlace type rather than forcing a skip to be non-skip. */
if( b_mbaff )
{
if( MB_INTERLACED != h->mb.field_decoding_flag &&
- h->mb.i_mb_prev_xy >= 0 && IS_SKIP(h->mb.type[h->mb.i_mb_prev_xy]) )
+ (mb_y&1) && IS_SKIP(h->mb.type[h->mb.i_mb_xy - h->mb.i_mb_stride]) )
h->mb.b_allow_skip = 0;
- if( (mb_y&1) && IS_SKIP(h->mb.type[h->mb.i_mb_xy - h->mb.i_mb_stride]) )
- {
- if( h->mb.i_neighbour & MB_LEFT )
- {
- if( h->mb.field[h->mb.i_mb_xy - 1] != MB_INTERLACED )
- h->mb.b_allow_skip = 0;
- }
- else if( h->mb.i_neighbour & MB_TOP )
- {
- if( h->mb.field[h->mb.i_mb_top_xy] != MB_INTERLACED )
- h->mb.b_allow_skip = 0;
- }
- else // Frame mb pair is predicted
- {
- if( MB_INTERLACED )
- h->mb.b_allow_skip = 0;
- }
- }
}
if( h->param.b_cabac )
uint8_t (*bs)[8][4] = h->mb.cache.deblock_strength;
if( IS_INTRA( h->mb.i_type ) )
{
- memset( bs[0][1], 3, 3*4*sizeof(uint8_t) );
- memset( bs[1][1], 3, 3*4*sizeof(uint8_t) );
+ M32( bs[0][1] ) = 0x03030303;
+ M64( bs[0][2] ) = 0x0303030303030303ULL;
+ M32( bs[1][1] ) = 0x03030303;
+ M64( bs[1][2] ) = 0x0303030303030303ULL;
return;
}
M32( bs[0][0] ) = 0x02020202;
M32( bs[0][2] ) = 0x02020202;
M32( bs[0][4] ) = 0x02020202;
- memset( bs[1][0], 2, 5*4*sizeof(uint8_t) ); /* [1][1] and [1][3] has to be set for 4:2:2 */
+ M64( bs[1][0] ) = 0x0202020202020202ULL; /* [1][1] and [1][3] has to be set for 4:2:2 */
+ M64( bs[1][2] ) = 0x0202020202020202ULL;
+ M32( bs[1][4] ) = 0x02020202;
return;
}
}