*
* Authors: Laurent Aimar <fenrir@via.ecp.fr>
* Loren Merritt <lorenm@u.washington.edu>
+ * Fiona Glaser <fiona@x264.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
int i_stride, i_width, i_lines;
int i_padv = PADV << h->param.b_interlaced;
int luma_plane_size;
+ int chroma_plane_size;
int align = h->param.cpu&X264_CPU_CACHELINE_64 ? 64 : h->param.cpu&X264_CPU_CACHELINE_32 ? 32 : 16;
if( !frame ) return NULL;
frame->i_plane = 3;
for( i = 0; i < 3; i++ )
{
- frame->i_stride[i] = i_stride >> !!i;
+ frame->i_stride[i] = ALIGN( i_stride >> !!i, 16 );
frame->i_width[i] = i_width >> !!i;
frame->i_lines[i] = i_lines >> !!i;
}
luma_plane_size = (frame->i_stride[0] * ( frame->i_lines[0] + 2*i_padv ));
+ chroma_plane_size = (frame->i_stride[1] * ( frame->i_lines[1] + 2*i_padv ));
for( i = 1; i < 3; i++ )
{
- CHECKED_MALLOC( frame->buffer[i], luma_plane_size/4 );
+ CHECKED_MALLOC( frame->buffer[i], chroma_plane_size );
frame->plane[i] = frame->buffer[i] + (frame->i_stride[i] * i_padv + PADH)/2;
}
/* all 4 luma planes allocated together, since the cacheline split code
* requires them to be in-phase wrt cacheline alignment. */
- CHECKED_MALLOC( frame->buffer[0], 4*luma_plane_size);
- for( i = 0; i < 4; i++ )
- frame->filtered[i] = frame->buffer[0] + i*luma_plane_size + frame->i_stride[0] * i_padv + PADH;
- frame->plane[0] = frame->filtered[0];
+ if( h->param.analyse.i_subpel_refine )
+ {
+ CHECKED_MALLOC( frame->buffer[0], 4*luma_plane_size);
+ for( i = 0; i < 4; i++ )
+ frame->filtered[i] = frame->buffer[0] + i*luma_plane_size + frame->i_stride[0] * i_padv + PADH;
+ frame->plane[0] = frame->filtered[0];
+ }
+ else
+ {
+ CHECKED_MALLOC( frame->buffer[0], luma_plane_size);
+ frame->plane[0] = frame->buffer[0] + frame->i_stride[0] * i_padv + PADH;
+ }
if( h->frames.b_have_lowres )
{
CHECKED_MALLOC( frame->buffer_lowres[0], 4 * luma_plane_size );
for( i = 0; i < 4; i++ )
frame->lowres[i] = frame->buffer_lowres[0] + (frame->i_stride_lowres * i_padv + PADH) + i * luma_plane_size;
+
+ for( j = 0; j <= !!h->param.i_bframe; j++ )
+ for( i = 0; i <= h->param.i_bframe; i++ )
+ {
+ CHECKED_MALLOC( frame->lowres_mvs[j][i], 2*h->mb.i_mb_count*sizeof(int16_t) );
+ memset( frame->lowres_mvs[j][i], 0, 2*h->mb.i_mb_count*sizeof(int16_t) );
+ CHECKED_MALLOC( frame->lowres_mv_costs[j][i], h->mb.i_mb_count*sizeof(int) );
+ }
}
if( h->param.analyse.i_me_method >= X264_ME_ESA )
{
CHECKED_MALLOC( frame->buffer[3],
- 2 * frame->i_stride[0] * (frame->i_lines[0] + 2*i_padv) * sizeof(uint16_t) );
+ frame->i_stride[0] * (frame->i_lines[0] + 2*i_padv) * sizeof(uint16_t) << h->frames.b_have_sub8x8_esa );
frame->integral = (uint16_t*)frame->buffer[3] + frame->i_stride[0] * i_padv + PADH;
}
CHECKED_MALLOC( frame->mb_type, i_mb_count * sizeof(int8_t));
CHECKED_MALLOC( frame->mv[0], 2*16 * i_mb_count * sizeof(int16_t) );
CHECKED_MALLOC( frame->ref[0], 4 * i_mb_count * sizeof(int8_t) );
+ CHECKED_MALLOC( frame->i_intra_cost, i_mb_count * sizeof(uint16_t) );
if( h->param.i_bframe )
{
CHECKED_MALLOC( frame->mv[1], 2*16 * i_mb_count * sizeof(int16_t) );
for( j = 0; j < h->param.i_bframe + 2; j++ )
CHECKED_MALLOC( frame->i_row_satds[i][j], i_lines/16 * sizeof(int) );
+ if( h->param.rc.i_aq_mode )
+ {
+ CHECKED_MALLOC( frame->f_qp_offset, h->mb.i_mb_count * sizeof(float) );
+ if( h->frames.b_have_lowres )
+ CHECKED_MALLOC( frame->i_inv_qscale_factor, h->mb.i_mb_count * sizeof(uint16_t) );
+ }
+
x264_pthread_mutex_init( &frame->mutex, NULL );
x264_pthread_cond_init( &frame->cv, NULL );
for( i = 0; i < X264_BFRAME_MAX+2; i++ )
for( j = 0; j < X264_BFRAME_MAX+2; j++ )
x264_free( frame->i_row_satds[i][j] );
+ for( j = 0; j < 2; j++ )
+ for( i = 0; i <= X264_BFRAME_MAX; i++ )
+ {
+ x264_free( frame->lowres_mvs[j][i] );
+ x264_free( frame->lowres_mv_costs[j][i] );
+ }
+ x264_free( frame->f_qp_offset );
+ x264_free( frame->i_inv_qscale_factor );
+ x264_free( frame->i_intra_cost );
x264_free( frame->i_row_bits );
x264_free( frame->i_row_qp );
x264_free( frame->mb_type );
void x264_frame_expand_border_filtered( x264_t *h, x264_frame_t *frame, int mb_y, int b_end )
{
/* during filtering, 8 extra pixels were filtered on each edge,
- * but up to 3 of the horizontal ones may be wrong.
+ * but up to 3 of the horizontal ones may be wrong.
we want to expand border from the last filtered pixel */
int b_start = !mb_y;
int stride = frame->i_stride[0];
/* cavlc + 8x8 transform stores nnz per 16 coeffs for the purpose of
* entropy coding, but per 64 coeffs for the purpose of deblocking */
-void munge_cavlc_nnz_row( x264_t *h, int mb_y, uint8_t (*buf)[16] )
+static void munge_cavlc_nnz_row( x264_t *h, int mb_y, uint8_t (*buf)[16] )
{
uint32_t (*src)[6] = (uint32_t(*)[6])h->mb.non_zero_count + mb_y * h->sps->i_mb_width;
int8_t *transform = h->mb.mb_transform_size + mb_y * h->sps->i_mb_width;
#define tc0_table(x) i_tc0_table[(x)+12]
/* From ffmpeg */
-static inline int clip_uint8( int a )
-{
- if (a&(~255))
- return (-a)>>31;
- else
- return a;
-}
-
static inline void deblock_luma_c( uint8_t *pix, int xstride, int ystride, int alpha, int beta, int8_t *tc0 )
{
int i, d;
- for( i = 0; i < 4; i++ ) {
- if( tc0[i] < 0 ) {
+ for( i = 0; i < 4; i++ )
+ {
+ if( tc0[i] < 0 )
+ {
pix += 4*ystride;
continue;
}
- for( d = 0; d < 4; d++ ) {
+ for( d = 0; d < 4; d++ )
+ {
const int p2 = pix[-3*xstride];
const int p1 = pix[-2*xstride];
const int p0 = pix[-1*xstride];
const int q0 = pix[ 0*xstride];
const int q1 = pix[ 1*xstride];
const int q2 = pix[ 2*xstride];
-
- if( abs( p0 - q0 ) < alpha &&
- abs( p1 - p0 ) < beta &&
- abs( q1 - q0 ) < beta ) {
-
+
+ if( abs( p0 - q0 ) < alpha && abs( p1 - p0 ) < beta && abs( q1 - q0 ) < beta )
+ {
int tc = tc0[i];
int delta;
-
- if( abs( p2 - p0 ) < beta ) {
+ if( abs( p2 - p0 ) < beta )
+ {
pix[-2*xstride] = p1 + x264_clip3( (( p2 + ((p0 + q0 + 1) >> 1)) >> 1) - p1, -tc0[i], tc0[i] );
tc++;
}
- if( abs( q2 - q0 ) < beta ) {
+ if( abs( q2 - q0 ) < beta )
+ {
pix[ 1*xstride] = q1 + x264_clip3( (( q2 + ((p0 + q0 + 1) >> 1)) >> 1) - q1, -tc0[i], tc0[i] );
tc++;
}
-
+
delta = x264_clip3( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
- pix[-1*xstride] = clip_uint8( p0 + delta ); /* p0' */
- pix[ 0*xstride] = clip_uint8( q0 - delta ); /* q0' */
+ pix[-1*xstride] = x264_clip_uint8( p0 + delta ); /* p0' */
+ pix[ 0*xstride] = x264_clip_uint8( q0 - delta ); /* q0' */
}
pix += ystride;
}
}
static void deblock_v_luma_c( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 )
{
- deblock_luma_c( pix, stride, 1, alpha, beta, tc0 );
+ deblock_luma_c( pix, stride, 1, alpha, beta, tc0 );
}
static void deblock_h_luma_c( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 )
{
static inline void deblock_chroma_c( uint8_t *pix, int xstride, int ystride, int alpha, int beta, int8_t *tc0 )
{
int i, d;
- for( i = 0; i < 4; i++ ) {
+ for( i = 0; i < 4; i++ )
+ {
const int tc = tc0[i];
- if( tc <= 0 ) {
+ if( tc <= 0 )
+ {
pix += 2*ystride;
continue;
}
- for( d = 0; d < 2; d++ ) {
+ for( d = 0; d < 2; d++ )
+ {
const int p1 = pix[-2*xstride];
const int p0 = pix[-1*xstride];
const int q0 = pix[ 0*xstride];
const int q1 = pix[ 1*xstride];
- if( abs( p0 - q0 ) < alpha &&
- abs( p1 - p0 ) < beta &&
- abs( q1 - q0 ) < beta ) {
-
+ if( abs( p0 - q0 ) < alpha && abs( p1 - p0 ) < beta && abs( q1 - q0 ) < beta )
+ {
int delta = x264_clip3( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
- pix[-1*xstride] = clip_uint8( p0 + delta ); /* p0' */
- pix[ 0*xstride] = clip_uint8( q0 - delta ); /* q0' */
+ pix[-1*xstride] = x264_clip_uint8( p0 + delta ); /* p0' */
+ pix[ 0*xstride] = x264_clip_uint8( q0 - delta ); /* q0' */
}
pix += ystride;
}
}
}
static void deblock_v_chroma_c( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 )
-{
+{
deblock_chroma_c( pix, stride, 1, alpha, beta, tc0 );
}
static void deblock_h_chroma_c( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 )
-{
+{
deblock_chroma_c( pix, 1, stride, alpha, beta, tc0 );
}
static inline void deblock_luma_intra_c( uint8_t *pix, int xstride, int ystride, int alpha, int beta )
{
int d;
- for( d = 0; d < 16; d++ ) {
+ for( d = 0; d < 16; d++ )
+ {
const int p2 = pix[-3*xstride];
const int p1 = pix[-2*xstride];
const int p0 = pix[-1*xstride];
const int q1 = pix[ 1*xstride];
const int q2 = pix[ 2*xstride];
- if( abs( p0 - q0 ) < alpha &&
- abs( p1 - p0 ) < beta &&
- abs( q1 - q0 ) < beta ) {
-
- if(abs( p0 - q0 ) < ((alpha >> 2) + 2) ){
- if( abs( p2 - p0 ) < beta)
+ if( abs( p0 - q0 ) < alpha && abs( p1 - p0 ) < beta && abs( q1 - q0 ) < beta )
+ {
+ if(abs( p0 - q0 ) < ((alpha >> 2) + 2) )
+ {
+ if( abs( p2 - p0 ) < beta ) /* p0', p1', p2' */
{
const int p3 = pix[-4*xstride];
- /* p0', p1', p2' */
pix[-1*xstride] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
pix[-2*xstride] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
pix[-3*xstride] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
- } else {
- /* p0' */
- pix[-1*xstride] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
}
- if( abs( q2 - q0 ) < beta)
+ else /* p0' */
+ pix[-1*xstride] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
+ if( abs( q2 - q0 ) < beta ) /* q0', q1', q2' */
{
const int q3 = pix[3*xstride];
- /* q0', q1', q2' */
pix[0*xstride] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
pix[1*xstride] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
pix[2*xstride] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
- } else {
- /* q0' */
- pix[0*xstride] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
}
- }else{
- /* p0', q0' */
+ else /* q0' */
+ pix[0*xstride] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
+ }
+ else /* p0', q0' */
+ {
pix[-1*xstride] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
pix[ 0*xstride] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
}
}
}
static void deblock_v_luma_intra_c( uint8_t *pix, int stride, int alpha, int beta )
-{
+{
deblock_luma_intra_c( pix, stride, 1, alpha, beta );
}
static void deblock_h_luma_intra_c( uint8_t *pix, int stride, int alpha, int beta )
-{
+{
deblock_luma_intra_c( pix, 1, stride, alpha, beta );
}
static inline void deblock_chroma_intra_c( uint8_t *pix, int xstride, int ystride, int alpha, int beta )
-{
- int d;
- for( d = 0; d < 8; d++ ) {
+{
+ int d;
+ for( d = 0; d < 8; d++ )
+ {
const int p1 = pix[-2*xstride];
const int p0 = pix[-1*xstride];
const int q0 = pix[ 0*xstride];
const int q1 = pix[ 1*xstride];
- if( abs( p0 - q0 ) < alpha &&
- abs( p1 - p0 ) < beta &&
- abs( q1 - q0 ) < beta ) {
-
+ if( abs( p0 - q0 ) < alpha && abs( p1 - p0 ) < beta && abs( q1 - q0 ) < beta )
+ {
pix[-1*xstride] = (2*p1 + p0 + q1 + 2) >> 2; /* p0' */
pix[ 0*xstride] = (2*q1 + q0 + p1 + 2) >> 2; /* q0' */
}
-
pix += ystride;
}
}
static void deblock_v_chroma_intra_c( uint8_t *pix, int stride, int alpha, int beta )
-{
+{
deblock_chroma_intra_c( pix, stride, 1, alpha, beta );
}
static void deblock_h_chroma_intra_c( uint8_t *pix, int stride, int alpha, int beta )
-{
+{
deblock_chroma_intra_c( pix, 1, stride, alpha, beta );
}
tc[3] = tc0_table(index_a)[bS[3]] + b_chroma;
pf_inter( pix1, i_stride, alpha, beta, tc );
- if(b_chroma)
+ if( b_chroma )
pf_inter( pix2, i_stride, alpha, beta, tc );
}
return;
pf_intra( pix1, i_stride, alpha, beta );
- if(b_chroma)
+ if( b_chroma )
pf_intra( pix2, i_stride, alpha, beta );
}
const int b_interlaced = h->sh.b_mbaff;
const int mvy_limit = 4 >> b_interlaced;
const int qp_thresh = 15 - X264_MIN(h->sh.i_alpha_c0_offset, h->sh.i_beta_offset) - X264_MAX(0, h->param.analyse.i_chroma_qp_offset);
+ const int no_sub8x8 = !(h->param.analyse.inter & X264_ANALYSE_PSUB8x8);
int mb_x;
int stridey = h->fdec->i_stride[0];
int stride2y = stridey << b_interlaced;
if( h->mb.non_zero_count[mb_xy][x+y*4] != 0 ||\
h->mb.non_zero_count[mbn_xy][xn+yn*4] != 0 )\
bS[i] = 2;\
- else\
+ else if(!(i_edge&no_sub8x8))\
{\
- /* FIXME: A given frame may occupy more than one position in\
- * the reference list. So we should compare the frame numbers,\
- * not the indices in the ref list.\
- * No harm yet, as we don't generate that case.*/\
- int i8p= mb_8x8+(x>>1)+(y>>1)*s8x8;\
- int i8q= mbn_8x8+(xn>>1)+(yn>>1)*s8x8;\
- int i4p= mb_4x4+x+y*s4x4;\
- int i4q= mbn_4x4+xn+yn*s4x4;\
- for( l = 0; l < 1 + (h->sh.i_type == SLICE_TYPE_B); l++ )\
- if( h->mb.ref[l][i8p] != h->mb.ref[l][i8q] ||\
- abs( h->mb.mv[l][i4p][0] - h->mb.mv[l][i4q][0] ) >= 4 ||\
- abs( h->mb.mv[l][i4p][1] - h->mb.mv[l][i4q][1] ) >= mvy_limit )\
+ if((i&no_sub8x8) && bS[i-1] != 2)\
+ bS[i] = bS[i-1];\
+ else\
+ {\
+ /* FIXME: A given frame may occupy more than one position in\
+ * the reference list. So we should compare the frame numbers,\
+ * not the indices in the ref list.\
+ * No harm yet, as we don't generate that case.*/\
+ int i8p= mb_8x8+(x>>1)+(y>>1)*s8x8;\
+ int i8q= mbn_8x8+(xn>>1)+(yn>>1)*s8x8;\
+ int i4p= mb_4x4+x+y*s4x4;\
+ int i4q= mbn_4x4+xn+yn*s4x4;\
+ if((h->mb.ref[0][i8p] != h->mb.ref[0][i8q] ||\
+ abs( h->mb.mv[0][i4p][0] - h->mb.mv[0][i4q][0] ) >= 4 ||\
+ abs( h->mb.mv[0][i4p][1] - h->mb.mv[0][i4q][1] ) >= mvy_limit ) ||\
+ (h->sh.i_type == SLICE_TYPE_B &&\
+ (h->mb.ref[1][i8p] != h->mb.ref[1][i8q] ||\
+ abs( h->mb.mv[1][i4p][0] - h->mb.mv[1][i4q][0] ) >= 4 ||\
+ abs( h->mb.mv[1][i4p][1] - h->mb.mv[1][i4q][1] ) >= mvy_limit )))\
{\
bS[i] = 1;\
- break;\
}\
+ }\
}\
}\
}\
#define DEBLOCK_DIR(i_dir)\
{\
int i_edge = (i_dir ? (mb_y <= b_interlaced) : (mb_x == 0));\
- int i_qpn, i, l, mbn_xy, mbn_8x8, mbn_4x4;\
+ int i_qpn, i, mbn_xy, mbn_8x8, mbn_4x4;\
DECLARE_ALIGNED_4( uint8_t bS[4] ); /* filtering strength */\
if( i_edge )\
i_edge+= b_8x8_transform;\
void x264_deblock_h_luma_intra_mmxext( uint8_t *pix, int stride, int alpha, int beta );
void x264_deblock_v8_luma_intra_mmxext( uint8_t *pix, int stride, int alpha, int beta );
-void x264_deblock_v_luma_mmxext( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 )
+static void x264_deblock_v_luma_mmxext( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 )
{
x264_deblock_v8_luma_mmxext( pix, stride, alpha, beta, tc0 );
x264_deblock_v8_luma_mmxext( pix+8, stride, alpha, beta, tc0+2 );
}
-void x264_deblock_v_luma_intra_mmxext( uint8_t *pix, int stride, int alpha, int beta )
+static void x264_deblock_v_luma_intra_mmxext( uint8_t *pix, int stride, int alpha, int beta )
{
x264_deblock_v8_luma_intra_mmxext( pix, stride, alpha, beta );
x264_deblock_v8_luma_intra_mmxext( pix+8, stride, alpha, beta );
frame = x264_frame_new( h );
assert( frame->i_reference_count == 0 );
frame->i_reference_count = 1;
+ frame->b_intra_calculated = 0;
return frame;
}