Eliminates an extra copy when encoding Android camera preview images.
Checkasm test by Janne Grunau.
ARM assembly with improvements from Janne Grunau.
pop {r4-r7, pc}
endfunc
+function x264_plane_copy_swap_neon
+ push {r4-r5, lr}
+ ldrd r4, r5, [sp, #12]
+ add lr, r4, #15
+ bic lr, lr, #15
+ sub r1, r1, lr, lsl #1
+ sub r3, r3, lr, lsl #1
+1:
+ vld1.8 {q0, q1}, [r2]!
+ subs lr, lr, #16
+ vrev16.8 q0, q0
+ vrev16.8 q1, q1
+ vst1.8 {q0, q1}, [r0]!
+ bgt 1b
+
+ subs r5, r5, #1
+ add r0, r0, r1
+ add r2, r2, r3
+ mov lr, r4
+ bgt 1b
+
+ pop {r4-r5, pc}
+endfunc
+
function x264_store_interleave_chroma_neon
push {lr}
ldr lr, [sp, #4]
void x264_plane_copy_interleave_neon( pixel *dst, intptr_t i_dst,
pixel *srcu, intptr_t i_srcu,
pixel *srcv, intptr_t i_srcv, int w, int h );
+void x264_plane_copy_swap_neon( pixel *dst, intptr_t i_dst,
+ pixel *src, intptr_t i_src, int w, int h );
void x264_store_interleave_chroma_neon( pixel *dst, intptr_t i_dst, pixel *srcu, pixel *srcv, int height );
void x264_load_deinterleave_chroma_fdec_neon( pixel *dst, pixel *src, intptr_t i_src, int height );
pf->plane_copy_deinterleave = x264_plane_copy_deinterleave_neon;
pf->plane_copy_deinterleave_rgb = x264_plane_copy_deinterleave_rgb_neon;
pf->plane_copy_interleave = x264_plane_copy_interleave_neon;
+ pf->plane_copy_swap = x264_plane_copy_swap_neon;
pf->store_interleave_chroma = x264_store_interleave_chroma_neon;
pf->load_deinterleave_chroma_fdec = x264_load_deinterleave_chroma_fdec_neon;
[X264_CSP_I420] = { 3, { 256*1, 256/2, 256/2 }, { 256*1, 256/2, 256/2 } },
[X264_CSP_YV12] = { 3, { 256*1, 256/2, 256/2 }, { 256*1, 256/2, 256/2 } },
[X264_CSP_NV12] = { 2, { 256*1, 256*1 }, { 256*1, 256/2 }, },
+ [X264_CSP_NV21] = { 2, { 256*1, 256*1 }, { 256*1, 256/2 }, },
[X264_CSP_I422] = { 3, { 256*1, 256/2, 256/2 }, { 256*1, 256*1, 256*1 } },
[X264_CSP_YV16] = { 3, { 256*1, 256/2, 256/2 }, { 256*1, 256*1, 256*1 } },
[X264_CSP_NV16] = { 2, { 256*1, 256*1 }, { 256*1, 256*1 }, },
switch( external_csp & X264_CSP_MASK )
{
case X264_CSP_NV12:
+ case X264_CSP_NV21:
case X264_CSP_I420:
case X264_CSP_YV12:
return X264_CSP_NV12;
h->mc.plane_copy( dst->plane[1], dst->i_stride[1], (pixel*)pix[1],
stride[1]/sizeof(pixel), h->param.i_width, h->param.i_height>>v_shift );
}
+ else if( i_csp == X264_CSP_NV21 )
+ {
+ get_plane_ptr( h, src, &pix[1], &stride[1], 1, 0, v_shift );
+ h->mc.plane_copy_swap( dst->plane[1], dst->i_stride[1], (pixel*)pix[1],
+ stride[1]/sizeof(pixel), h->param.i_width>>1, h->param.i_height>>v_shift );
+ }
else if( i_csp == X264_CSP_I420 || i_csp == X264_CSP_I422 || i_csp == X264_CSP_YV12 || i_csp == X264_CSP_YV16 )
{
int uv_swap = i_csp == X264_CSP_YV12 || i_csp == X264_CSP_YV16;
}
}
+void x264_plane_copy_swap_c( pixel *dst, intptr_t i_dst,
+ pixel *src, intptr_t i_src, int w, int h )
+{
+ for( int y=0; y<h; y++, dst+=i_dst, src+=i_src )
+ for( int x=0; x<2*w; x+=2 )
+ {
+ dst[x] = src[x+1];
+ dst[x+1] = src[x];
+ }
+}
+
void x264_plane_copy_interleave_c( pixel *dst, intptr_t i_dst,
pixel *srcu, intptr_t i_srcu,
pixel *srcv, intptr_t i_srcv, int w, int h )
pf->load_deinterleave_chroma_fdec = load_deinterleave_chroma_fdec;
pf->plane_copy = x264_plane_copy_c;
+ pf->plane_copy_swap = x264_plane_copy_swap_c;
pf->plane_copy_interleave = x264_plane_copy_interleave_c;
pf->plane_copy_deinterleave = x264_plane_copy_deinterleave_c;
pf->plane_copy_deinterleave_rgb = x264_plane_copy_deinterleave_rgb_c;
void (*load_deinterleave_chroma_fdec)( pixel *dst, pixel *src, intptr_t i_src, int height );
void (*plane_copy)( pixel *dst, intptr_t i_dst, pixel *src, intptr_t i_src, int w, int h );
+ void (*plane_copy_swap)( pixel *dst, intptr_t i_dst, pixel *src, intptr_t i_src, int w, int h );
void (*plane_copy_interleave)( pixel *dst, intptr_t i_dst, pixel *srcu, intptr_t i_srcu,
pixel *srcv, intptr_t i_srcv, int w, int h );
/* may write up to 15 pixels off the end of each plane */
#endif
if( i_csp <= X264_CSP_NONE || i_csp >= X264_CSP_MAX )
{
- x264_log( h, X264_LOG_ERROR, "invalid CSP (only I420/YV12/NV12/I422/YV16/NV16/I444/YV24/BGR/BGRA/RGB supported)\n" );
+ x264_log( h, X264_LOG_ERROR, "invalid CSP (only I420/YV12/NV12/NV21/I422/YV16/NV16/I444/YV24/BGR/BGRA/RGB supported)\n" );
return -1;
}
csp_mask == X264_CSP_YV16 ||
csp_mask == X264_CSP_YV24 ||
csp_mask == X264_CSP_NV12 ||
+ csp_mask == X264_CSP_NV21 ||
csp_mask == X264_CSP_NV16 ||
csp_mask == X264_CSP_BGR ||
csp_mask == X264_CSP_RGB ||
static int csp_num_interleaved( int csp, int plane )
{
int csp_mask = csp & X264_CSP_MASK;
- return (csp_mask == X264_CSP_NV12 || csp_mask == X264_CSP_NV16) && plane == 1 ? 2 :
+ return (csp_mask == X264_CSP_NV12 || csp_mask == X264_CSP_NV21 || csp_mask == X264_CSP_NV16) && plane == 1 ? 2 :
csp_mask == X264_CSP_BGR || csp_mask == X264_CSP_RGB ? 3 :
csp_mask == X264_CSP_BGRA ? 4 :
1;
case X264_CSP_BGRA: return csp&X264_CSP_HIGH_DEPTH ? AV_PIX_FMT_BGRA64 : AV_PIX_FMT_BGRA;
/* the next csp has no equivalent 16bit depth in swscale */
case X264_CSP_NV12: return csp&X264_CSP_HIGH_DEPTH ? AV_PIX_FMT_NONE : AV_PIX_FMT_NV12;
+ case X264_CSP_NV21: return csp&X264_CSP_HIGH_DEPTH ? AV_PIX_FMT_NONE : AV_PIX_FMT_NV21;
/* the next csp is no supported by swscale at all */
case X264_CSP_NV16:
default: return AV_PIX_FMT_NONE;
[X264_CSP_YV16] = { "yv16", 3, { 1, .5, .5 }, { 1, 1, 1 }, 2, 1 },
[X264_CSP_YV24] = { "yv24", 3, { 1, 1, 1 }, { 1, 1, 1 }, 1, 1 },
[X264_CSP_NV12] = { "nv12", 2, { 1, 1 }, { 1, .5 }, 2, 2 },
+ [X264_CSP_NV21] = { "nv21", 2, { 1, 1 }, { 1, .5 }, 2, 2 },
[X264_CSP_NV16] = { "nv16", 2, { 1, 1 }, { 1, 1 }, 2, 1 },
[X264_CSP_BGR] = { "bgr", 1, { 3 }, { 1 }, 1, 1 },
[X264_CSP_BGRA] = { "bgra", 1, { 4 }, { 1 }, 1, 1 },
}
}
+ if( mc_a.plane_copy_swap != mc_ref.plane_copy_swap )
+ {
+ set_func_name( "plane_copy_swap" );
+ used_asm = 1;
+ for( int i = 0; i < sizeof(plane_specs)/sizeof(*plane_specs); i++ )
+ {
+ int w = (plane_specs[i].w + 1) >> 1;
+ int h = plane_specs[i].h;
+ intptr_t src_stride = plane_specs[i].src_stride;
+ intptr_t dst_stride = (2*w + 127) & ~63;
+ assert( dst_stride * h <= 0x1000 );
+ pixel *src1 = pbuf1 + X264_MAX(0, -src_stride) * (h-1);
+ memset( pbuf3, 0, 0x1000*sizeof(pixel) );
+ memset( pbuf4, 0, 0x1000*sizeof(pixel) );
+ call_c( mc_c.plane_copy_swap, pbuf3, dst_stride, src1, src_stride, w, h );
+ call_a( mc_a.plane_copy_swap, pbuf4, dst_stride, src1, src_stride, w, h );
+ for( int y = 0; y < h; y++ )
+ if( memcmp( pbuf3+y*dst_stride, pbuf4+y*dst_stride, 2*w*sizeof(pixel) ) )
+ {
+ ok = 0;
+ fprintf( stderr, "plane_copy_swap FAILED: w=%d h=%d stride=%d\n", w, h, (int)src_stride );
+ break;
+ }
+ }
+ }
+
if( mc_a.plane_copy_interleave != mc_ref.plane_copy_interleave )
{
set_func_name( "plane_copy_interleave" );
#include "x264_config.h"
-#define X264_BUILD 146
+#define X264_BUILD 147
/* Application developers planning to link against a shared library version of
* libx264 from a Microsoft Visual Studio or similar development environment
#define X264_CSP_I420 0x0001 /* yuv 4:2:0 planar */
#define X264_CSP_YV12 0x0002 /* yvu 4:2:0 planar */
#define X264_CSP_NV12 0x0003 /* yuv 4:2:0, with one y plane and one packed u+v */
-#define X264_CSP_I422 0x0004 /* yuv 4:2:2 planar */
-#define X264_CSP_YV16 0x0005 /* yvu 4:2:2 planar */
-#define X264_CSP_NV16 0x0006 /* yuv 4:2:2, with one y plane and one packed u+v */
-#define X264_CSP_V210 0x0007 /* 10-bit yuv 4:2:2 packed in 32 */
-#define X264_CSP_I444 0x0008 /* yuv 4:4:4 planar */
-#define X264_CSP_YV24 0x0009 /* yvu 4:4:4 planar */
-#define X264_CSP_BGR 0x000a /* packed bgr 24bits */
-#define X264_CSP_BGRA 0x000b /* packed bgr 32bits */
-#define X264_CSP_RGB 0x000c /* packed rgb 24bits */
-#define X264_CSP_MAX 0x000d /* end of list */
+#define X264_CSP_NV21 0x0004 /* yuv 4:2:0, with one y plane and one packed v+u */
+#define X264_CSP_I422 0x0005 /* yuv 4:2:2 planar */
+#define X264_CSP_YV16 0x0006 /* yvu 4:2:2 planar */
+#define X264_CSP_NV16 0x0007 /* yuv 4:2:2, with one y plane and one packed u+v */
+#define X264_CSP_V210 0x0008 /* 10-bit yuv 4:2:2 packed in 32 */
+#define X264_CSP_I444 0x0009 /* yuv 4:4:4 planar */
+#define X264_CSP_YV24 0x000a /* yvu 4:4:4 planar */
+#define X264_CSP_BGR 0x000b /* packed bgr 24bits */
+#define X264_CSP_BGRA 0x000c /* packed bgr 32bits */
+#define X264_CSP_RGB 0x000d /* packed rgb 24bits */
+#define X264_CSP_MAX 0x000e /* end of list */
#define X264_CSP_VFLIP 0x1000 /* the csp is vertically flipped */
#define X264_CSP_HIGH_DEPTH 0x2000 /* the csp has a depth of 16 bits per pixel component */