#include "common.h"
-#define PADH 32
-#define PADV 32
-
x264_frame_t *x264_frame_new( x264_t *h )
{
x264_frame_t *frame = x264_malloc( sizeof(x264_frame_t) );
int i_mb_count = h->mb.i_mb_count;
int i_stride, i_width, i_lines;
int i_padv = PADV << h->param.b_interlaced;
+ int luma_plane_size;
if( !frame ) return NULL;
if( h->param.b_interlaced )
i_lines = ( i_lines + 31 ) & -32;
+ if( h->param.cpu&X264_CPU_CACHELINE_SPLIT )
+ {
+ int align = h->param.cpu&X264_CPU_CACHELINE_32 ? 32 : 64;
+ i_stride = (i_stride + align-1) & -align;
+ }
+
frame->i_plane = 3;
for( i = 0; i < 3; i++ )
{
- int i_divh = 1;
- int i_divw = 1;
- if( i > 0 )
- {
- if( h->param.i_csp == X264_CSP_I420 )
- i_divh = i_divw = 2;
- else if( h->param.i_csp == X264_CSP_I422 )
- i_divw = 2;
- }
- frame->i_stride[i] = i_stride / i_divw;
- frame->i_width[i] = i_width / i_divw;
- frame->i_lines[i] = i_lines / i_divh;
- CHECKED_MALLOC( frame->buffer[i],
- frame->i_stride[i] * ( frame->i_lines[i] + 2*i_padv / i_divh ) );
-
- frame->plane[i] = ((uint8_t*)frame->buffer[i]) +
- frame->i_stride[i] * i_padv / i_divh + PADH / i_divw;
+ frame->i_stride[i] = i_stride >> !!i;
+ frame->i_width[i] = i_width >> !!i;
+ frame->i_lines[i] = i_lines >> !!i;
}
- frame->filtered[0] = frame->plane[0];
- for( i = 0; i < 3; i++ )
+ luma_plane_size = (frame->i_stride[0] * ( frame->i_lines[0] + 2*i_padv ));
+ for( i = 1; i < 3; i++ )
{
- CHECKED_MALLOC( frame->buffer[4+i],
- frame->i_stride[0] * ( frame->i_lines[0] + 2*i_padv ) );
- frame->filtered[i+1] = ((uint8_t*)frame->buffer[4+i]) +
- frame->i_stride[0] * i_padv + PADH;
+ CHECKED_MALLOC( frame->buffer[i], luma_plane_size/4 );
+ frame->plane[i] = (uint8_t*)frame->buffer[i] + (frame->i_stride[i] * i_padv + PADH)/2;
}
+ /* all 4 luma planes allocated together, since the cacheline split code
+ * requires them to be in-phase wrt cacheline alignment. */
+ CHECKED_MALLOC( frame->buffer[0], 4*luma_plane_size);
+ for( i = 0; i < 4; i++ )
+ frame->filtered[i] = (uint8_t*)frame->buffer[0] + i*luma_plane_size + frame->i_stride[0] * i_padv + PADH;
+ frame->plane[0] = frame->filtered[0];
if( h->frames.b_have_lowres )
{
}
}
- if( h->param.analyse.i_me_method == X264_ME_ESA )
+ if( h->param.analyse.i_me_method >= X264_ME_ESA )
{
- CHECKED_MALLOC( frame->buffer[7],
+ CHECKED_MALLOC( frame->buffer[3],
2 * frame->i_stride[0] * (frame->i_lines[0] + 2*i_padv) * sizeof(uint16_t) );
- frame->integral = (uint16_t*)frame->buffer[7] + frame->i_stride[0] * i_padv + PADH;
+ frame->integral = (uint16_t*)frame->buffer[3] + frame->i_stride[0] * i_padv + PADH;
}
frame->i_poc = -1;
void x264_frame_delete( x264_frame_t *frame )
{
int i, j;
- for( i = 0; i < 8; i++ )
+ for( i = 0; i < 4; i++ )
x264_free( frame->buffer[i] );
for( i = 0; i < 4; i++ )
x264_free( frame->buffer_lowres[i] );
x264_free( frame );
}
-void x264_frame_copy_picture( x264_t *h, x264_frame_t *dst, x264_picture_t *src )
+int x264_frame_copy_picture( x264_t *h, x264_frame_t *dst, x264_picture_t *src )
{
int i_csp = src->img.i_csp & X264_CSP_MASK;
+ int i;
+ if( i_csp != X264_CSP_I420 && i_csp != X264_CSP_YV12 )
+ {
+ x264_log( h, X264_LOG_ERROR, "Arg invalid CSP\n" );
+ return -1;
+ }
+
dst->i_type = src->i_type;
dst->i_qpplus1 = src->i_qpplus1;
dst->i_pts = src->i_pts;
- if( i_csp <= X264_CSP_NONE || i_csp >= X264_CSP_MAX )
- x264_log( h, X264_LOG_ERROR, "Arg invalid CSP\n" );
- else
- h->csp.convert[i_csp]( &h->mc, dst, &src->img, h->param.i_width, h->param.i_height );
+ for( i=0; i<3; i++ )
+ {
+ int s = (i_csp == X264_CSP_YV12 && i) ? i^3 : i;
+ uint8_t *plane = src->img.plane[s];
+ int stride = src->img.i_stride[s];
+ int width = h->param.i_width >> !!i;
+ int height = h->param.i_height >> !!i;
+ if( src->img.i_csp & X264_CSP_VFLIP )
+ {
+ plane += (height-1)*stride;
+ stride = -stride;
+ }
+ h->mc.plane_copy( dst->plane[i], dst->i_stride[i], plane, stride, width, height );
+ }
+ return 0;
}
void x264_frame_expand_border_filtered( x264_t *h, x264_frame_t *frame, int mb_y, int b_end )
{
- /* during filtering, 8 extra pixels were filtered on each edge.
+ /* during filtering, 8 extra pixels were filtered on each edge,
+ * but up to 3 of the horizontal ones may be wrong.
we want to expand border from the last filtered pixel */
int b_start = !mb_y;
int stride = frame->i_stride[0];
- int width = 16*h->sps->i_mb_width + 16;
+ int width = 16*h->sps->i_mb_width + 8;
int height = b_end ? (16*(h->sps->i_mb_height - mb_y) >> h->sh.b_mbaff) + 16 : 16;
- int padh = PADH - 8;
+ int padh = PADH - 4;
int padv = PADV - 8;
int i;
for( i = 1; i < 4; i++ )
{
// buffer: 8 luma, to match the hpel filter
- uint8_t *pix = frame->filtered[i] + (16*mb_y - (8 << h->sh.b_mbaff)) * stride - 8;
+ uint8_t *pix = frame->filtered[i] + (16*mb_y - (8 << h->sh.b_mbaff)) * stride - 4;
if( h->sh.b_mbaff )
{
plane_expand_border( pix, stride*2, width, height, padh, padv, b_start, b_end );
void x264_deblock_v_chroma_intra_mmxext( uint8_t *pix, int stride, int alpha, int beta );
void x264_deblock_h_chroma_intra_mmxext( uint8_t *pix, int stride, int alpha, int beta );
-#ifdef ARCH_X86_64
void x264_deblock_v_luma_sse2( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 );
void x264_deblock_h_luma_sse2( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 );
-#else
+void x264_deblock_v_luma_intra_sse2( uint8_t *pix, int stride, int alpha, int beta );
+void x264_deblock_h_luma_intra_sse2( uint8_t *pix, int stride, int alpha, int beta );
+#ifdef ARCH_X86
void x264_deblock_h_luma_mmxext( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 );
void x264_deblock_v8_luma_mmxext( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 );
+void x264_deblock_h_luma_intra_mmxext( uint8_t *pix, int stride, int alpha, int beta );
+void x264_deblock_v8_luma_intra_mmxext( uint8_t *pix, int stride, int alpha, int beta );
void x264_deblock_v_luma_mmxext( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 )
{
x264_deblock_v8_luma_mmxext( pix, stride, alpha, beta, tc0 );
x264_deblock_v8_luma_mmxext( pix+8, stride, alpha, beta, tc0+2 );
}
+void x264_deblock_v_luma_intra_mmxext( uint8_t *pix, int stride, int alpha, int beta )
+{
+ x264_deblock_v8_luma_intra_mmxext( pix, stride, alpha, beta );
+ x264_deblock_v8_luma_intra_mmxext( pix+8, stride, alpha, beta );
+}
#endif
#endif
pf->deblock_h_chroma = x264_deblock_h_chroma_mmxext;
pf->deblock_v_chroma_intra = x264_deblock_v_chroma_intra_mmxext;
pf->deblock_h_chroma_intra = x264_deblock_h_chroma_intra_mmxext;
-
-#ifdef ARCH_X86_64
+#ifdef ARCH_X86
+ pf->deblock_v_luma = x264_deblock_v_luma_mmxext;
+ pf->deblock_h_luma = x264_deblock_h_luma_mmxext;
+ pf->deblock_v_luma_intra = x264_deblock_v_luma_intra_mmxext;
+ pf->deblock_h_luma_intra = x264_deblock_h_luma_intra_mmxext;
+#endif
if( cpu&X264_CPU_SSE2 )
{
pf->deblock_v_luma = x264_deblock_v_luma_sse2;
pf->deblock_h_luma = x264_deblock_h_luma_sse2;
+ pf->deblock_v_luma_intra = x264_deblock_v_luma_intra_sse2;
+ pf->deblock_h_luma_intra = x264_deblock_h_luma_intra_sse2;
}
-#else
- pf->deblock_v_luma = x264_deblock_v_luma_mmxext;
- pf->deblock_h_luma = x264_deblock_h_luma_mmxext;
-#endif
}
#endif
x264_pthread_mutex_unlock( &frame->mutex );
}
+void x264_frame_size_estimated_set( x264_t *h, int bits )
+{
+ x264_pthread_mutex_lock( &h->fenc->mutex );
+ x264_ratecontrol_set_estimated_size(h, bits);
+ x264_pthread_mutex_unlock( &h->fenc->mutex );
+}
+
+int x264_frame_size_estimated_get( x264_t const *h)
+{
+ int size;
+ x264_pthread_mutex_lock( &h->fenc->mutex );
+ size = x264_ratecontrol_get_estimated_size(h);
+ x264_pthread_mutex_unlock( &h->fenc->mutex );
+ return size;
+}
+
#else
void x264_frame_cond_broadcast( x264_frame_t *frame, int i_lines_completed )
{}
void x264_frame_cond_wait( x264_frame_t *frame, int i_lines_completed )
{}
+
+void x264_frame_size_estimated_set( x264_t *h, int bits )
+{
+ x264_ratecontrol_set_estimated_size(h, bits);
+}
+
+int x264_frame_size_estimated_get( x264_t const *h)
+{
+ int size;
+ size = x264_ratecontrol_set_estimated_size(h);
+ return size;
+}
#endif