From 9bbfc30284469a70374a75fecfa322c4740dc2b7 Mon Sep 17 00:00:00 2001 From: Fiona Glaser Date: Tue, 18 Oct 2011 14:14:03 -0700 Subject: [PATCH] Split prefetch_fenc between colorspaces Add 4:2:2 version. --- common/arm/mc-c.c | 3 ++- common/mc.c | 3 ++- common/mc.h | 4 ++++ common/x86/mc-a.asm | 25 +++++++++++++++++++++---- common/x86/mc-c.c | 6 ++++-- encoder/encoder.c | 3 +++ 6 files changed, 36 insertions(+), 8 deletions(-) diff --git a/common/arm/mc-c.c b/common/arm/mc-c.c index c437dd30..c1fc05c0 100644 --- a/common/arm/mc-c.c +++ b/common/arm/mc-c.c @@ -210,7 +210,8 @@ void x264_mc_init_arm( int cpu, x264_mc_functions_t *pf ) return; #if !HIGH_BIT_DEPTH - pf->prefetch_fenc = x264_prefetch_fenc_arm; + pf->prefetch_fenc_420 = x264_prefetch_fenc_arm; + pf->prefetch_fenc_422 = x264_prefetch_fenc_arm; /* FIXME */ pf->prefetch_ref = x264_prefetch_ref_arm; #endif // !HIGH_BIT_DEPTH diff --git a/common/mc.c b/common/mc.c index c2b77f58..6f772afa 100644 --- a/common/mc.c +++ b/common/mc.c @@ -506,7 +506,8 @@ void x264_mc_init( int cpu, x264_mc_functions_t *pf ) pf->hpel_filter = hpel_filter; - pf->prefetch_fenc = prefetch_fenc_null; + pf->prefetch_fenc_420 = prefetch_fenc_null; + pf->prefetch_fenc_422 = prefetch_fenc_null; pf->prefetch_ref = prefetch_ref_null; pf->memcpy_aligned = memcpy; pf->memzero_aligned = memzero_aligned; diff --git a/common/mc.h b/common/mc.h index 09dda557..40fb5918 100644 --- a/common/mc.h +++ b/common/mc.h @@ -103,6 +103,10 @@ typedef struct /* prefetch the next few macroblocks of fenc or fdec */ void (*prefetch_fenc)( pixel *pix_y, int stride_y, pixel *pix_uv, int stride_uv, int mb_x ); + void (*prefetch_fenc_420)( pixel *pix_y, int stride_y, + pixel *pix_uv, int stride_uv, int mb_x ); + void (*prefetch_fenc_422)( pixel *pix_y, int stride_y, + pixel *pix_uv, int stride_uv, int mb_x ); /* prefetch the next few macroblocks of a hpel reference frame */ void (*prefetch_ref)( pixel *pix, int stride, int parity ); diff --git a/common/x86/mc-a.asm b/common/x86/mc-a.asm index 7ece5b87..55349f16 100644 --- a/common/x86/mc-a.asm +++ b/common/x86/mc-a.asm @@ -1302,9 +1302,10 @@ MC_COPY 16 ; void prefetch_fenc( pixel *pix_y, int stride_y, ; pixel *pix_uv, int stride_uv, int mb_x ) ;----------------------------------------------------------------------------- -INIT_MMX + +%macro PREFETCH_FENC 1 %ifdef ARCH_X86_64 -cglobal prefetch_fenc_mmx2, 5,5 +cglobal prefetch_fenc_%1, 5,5 FIX_STRIDES r1d, r3d and r4d, 3 mov eax, r4d @@ -1320,10 +1321,15 @@ cglobal prefetch_fenc_mmx2, 5,5 lea r2, [r2+rax*2+64*SIZEOF_PIXEL] prefetcht0 [r2] prefetcht0 [r2+r3] +%ifidn %1, 422 + lea r2, [r2+r3*2] + prefetcht0 [r2] + prefetcht0 [r2+r3] +%endif RET %else -cglobal prefetch_fenc_mmx2, 0,3 +cglobal prefetch_fenc_%1, 0,3 mov r2, r4m mov r1, r1m mov r0, r0m @@ -1346,13 +1352,24 @@ cglobal prefetch_fenc_mmx2, 0,3 lea r0, [r0+r2*2+64*SIZEOF_PIXEL] prefetcht0 [r0] prefetcht0 [r0+r1] +%ifidn %1, 422 + lea r0, [r0+r1*2] + prefetcht0 [r0] + prefetcht0 [r0+r1] +%endif ret %endif ; ARCH_X86_64 +%endmacro + +INIT_MMX mmx2 +PREFETCH_FENC 420 +PREFETCH_FENC 422 ;----------------------------------------------------------------------------- ; void prefetch_ref( pixel *pix, int stride, int parity ) ;----------------------------------------------------------------------------- -cglobal prefetch_ref_mmx2, 3,3 +INIT_MMX mmx2 +cglobal prefetch_ref, 3,3 FIX_STRIDES r1d dec r2d and r2d, r1d diff --git a/common/x86/mc-c.c b/common/x86/mc-c.c index 1bef238f..b64f5f5f 100644 --- a/common/x86/mc-c.c +++ b/common/x86/mc-c.c @@ -86,7 +86,8 @@ void x264_mc_copy_w8_sse2( pixel *, int, pixel *, int, int ); void x264_mc_copy_w16_mmx( pixel *, int, pixel *, int, int ); void x264_mc_copy_w16_sse2( pixel *, int, pixel *, int, int ); void x264_mc_copy_w16_aligned_sse2( pixel *, int, pixel *, int, int ); -void x264_prefetch_fenc_mmx2( pixel *, int, pixel *, int, int ); +void x264_prefetch_fenc_420_mmx2( pixel *, int, pixel *, int, int ); +void x264_prefetch_fenc_422_mmx2( pixel *, int, pixel *, int, int ); void x264_prefetch_ref_mmx2( pixel *, int, int ); void x264_plane_copy_core_mmx2( pixel *, int, pixel *, int, int w, int h); void x264_plane_copy_c( pixel *, int, pixel *, int, int w, int h ); @@ -517,7 +518,8 @@ void x264_mc_init_mmx( int cpu, x264_mc_functions_t *pf ) if( !(cpu&X264_CPU_MMX2) ) return; - pf->prefetch_fenc = x264_prefetch_fenc_mmx2; + pf->prefetch_fenc_420 = x264_prefetch_fenc_420_mmx2; + pf->prefetch_fenc_422 = x264_prefetch_fenc_422_mmx2; pf->prefetch_ref = x264_prefetch_ref_mmx2; pf->plane_copy = x264_plane_copy_mmx2; diff --git a/encoder/encoder.c b/encoder/encoder.c index b8ab8e5d..e253463e 100644 --- a/encoder/encoder.c +++ b/encoder/encoder.c @@ -980,6 +980,7 @@ static void chroma_dsp_init( x264_t *h ) { case CHROMA_420: memcpy( h->predict_chroma, h->predict_8x8c, sizeof(h->predict_chroma) ); + h->mc.prefetch_fenc = h->mc.prefetch_fenc_420; h->loopf.deblock_chroma[0] = h->loopf.deblock_h_chroma_420; h->loopf.deblock_chroma_intra[0] = h->loopf.deblock_h_chroma_420_intra; h->loopf.deblock_chroma_mbaff = h->loopf.deblock_chroma_420_mbaff; @@ -990,6 +991,7 @@ static void chroma_dsp_init( x264_t *h ) break; case CHROMA_422: memcpy( h->predict_chroma, h->predict_8x16c, sizeof(h->predict_chroma) ); + h->mc.prefetch_fenc = h->mc.prefetch_fenc_422; h->loopf.deblock_chroma[0] = h->loopf.deblock_h_chroma_422; h->loopf.deblock_chroma_intra[0] = h->loopf.deblock_h_chroma_422_intra; h->loopf.deblock_chroma_mbaff = h->loopf.deblock_chroma_422_mbaff; @@ -999,6 +1001,7 @@ static void chroma_dsp_init( x264_t *h ) h->quantf.coeff_level_run[DCT_CHROMA_DC] = h->quantf.coeff_level_run8; break; case CHROMA_444: + h->mc.prefetch_fenc = h->mc.prefetch_fenc_422; /* FIXME: doesn't cover V plane */ h->loopf.deblock_chroma_mbaff = h->loopf.deblock_luma_mbaff; h->loopf.deblock_chroma_intra_mbaff = h->loopf.deblock_luma_intra_mbaff; break; -- 2.39.5