+ ok = 1; used_asm = 0;
+ for( int height = 8; height <= 16; height += 8 )
+ {
+ if( mc_a.store_interleave_chroma != mc_ref.store_interleave_chroma )
+ {
+ set_func_name( "store_interleave_chroma" );
+ used_asm = 1;
+ memset( pbuf3, 0, 64*height );
+ memset( pbuf4, 0, 64*height );
+ call_c( mc_c.store_interleave_chroma, pbuf3, (intptr_t)64, pbuf1, pbuf1+16, height );
+ call_a( mc_a.store_interleave_chroma, pbuf4, (intptr_t)64, pbuf1, pbuf1+16, height );
+ if( memcmp( pbuf3, pbuf4, 64*height ) )
+ {
+ ok = 0;
+ fprintf( stderr, "store_interleave_chroma FAILED: h=%d\n", height );
+ break;
+ }
+ }
+ if( mc_a.load_deinterleave_chroma_fenc != mc_ref.load_deinterleave_chroma_fenc )
+ {
+ set_func_name( "load_deinterleave_chroma_fenc" );
+ used_asm = 1;
+ call_c( mc_c.load_deinterleave_chroma_fenc, pbuf3, pbuf1, (intptr_t)64, height );
+ call_a( mc_a.load_deinterleave_chroma_fenc, pbuf4, pbuf1, (intptr_t)64, height );
+ if( memcmp( pbuf3, pbuf4, FENC_STRIDE*height ) )
+ {
+ ok = 0;
+ fprintf( stderr, "load_deinterleave_chroma_fenc FAILED: h=%d\n", height );
+ break;
+ }
+ }
+ if( mc_a.load_deinterleave_chroma_fdec != mc_ref.load_deinterleave_chroma_fdec )
+ {
+ set_func_name( "load_deinterleave_chroma_fdec" );
+ used_asm = 1;
+ call_c( mc_c.load_deinterleave_chroma_fdec, pbuf3, pbuf1, (intptr_t)64, height );
+ call_a( mc_a.load_deinterleave_chroma_fdec, pbuf4, pbuf1, (intptr_t)64, height );
+ if( memcmp( pbuf3, pbuf4, FDEC_STRIDE*height ) )
+ {
+ ok = 0;
+ fprintf( stderr, "load_deinterleave_chroma_fdec FAILED: h=%d\n", height );
+ break;
+ }
+ }
+ }
+ report( "store_interleave :" );
+
+ struct plane_spec {
+ int w, h, src_stride;
+ } plane_specs[] = { {2,2,2}, {8,6,8}, {20,31,24}, {32,8,40}, {256,10,272}, {504,7,505}, {528,6,528}, {256,10,-256}, {263,9,-264}, {1904,1,0} };
+ ok = 1; used_asm = 0;
+ if( mc_a.plane_copy != mc_ref.plane_copy )
+ {
+ set_func_name( "plane_copy" );
+ used_asm = 1;
+ for( int i = 0; i < sizeof(plane_specs)/sizeof(*plane_specs); i++ )
+ {
+ int w = plane_specs[i].w;
+ int h = plane_specs[i].h;
+ intptr_t src_stride = plane_specs[i].src_stride;
+ intptr_t dst_stride = (w + 127) & ~63;
+ assert( dst_stride * h <= 0x1000 );
+ pixel *src1 = pbuf1 + X264_MAX(0, -src_stride) * (h-1);
+ memset( pbuf3, 0, 0x1000*sizeof(pixel) );
+ memset( pbuf4, 0, 0x1000*sizeof(pixel) );
+ call_c( mc_c.plane_copy, pbuf3, dst_stride, src1, src_stride, w, h );
+ call_a( mc_a.plane_copy, pbuf4, dst_stride, src1, src_stride, w, h );
+ for( int y = 0; y < h; y++ )
+ if( memcmp( pbuf3+y*dst_stride, pbuf4+y*dst_stride, w*sizeof(pixel) ) )
+ {
+ ok = 0;
+ fprintf( stderr, "plane_copy FAILED: w=%d h=%d stride=%d\n", w, h, (int)src_stride );
+ break;
+ }
+ }
+ }
+
+ if( mc_a.plane_copy_swap != mc_ref.plane_copy_swap )
+ {
+ set_func_name( "plane_copy_swap" );
+ used_asm = 1;
+ for( int i = 0; i < sizeof(plane_specs)/sizeof(*plane_specs); i++ )
+ {
+ int w = (plane_specs[i].w + 1) >> 1;
+ int h = plane_specs[i].h;
+ intptr_t src_stride = plane_specs[i].src_stride;
+ intptr_t dst_stride = (2*w + 127) & ~63;
+ assert( dst_stride * h <= 0x1000 );
+ pixel *src1 = pbuf1 + X264_MAX(0, -src_stride) * (h-1);
+ memset( pbuf3, 0, 0x1000*sizeof(pixel) );
+ memset( pbuf4, 0, 0x1000*sizeof(pixel) );
+ call_c( mc_c.plane_copy_swap, pbuf3, dst_stride, src1, src_stride, w, h );
+ call_a( mc_a.plane_copy_swap, pbuf4, dst_stride, src1, src_stride, w, h );
+ for( int y = 0; y < h; y++ )
+ if( memcmp( pbuf3+y*dst_stride, pbuf4+y*dst_stride, 2*w*sizeof(pixel) ) )
+ {
+ ok = 0;
+ fprintf( stderr, "plane_copy_swap FAILED: w=%d h=%d stride=%d\n", w, h, (int)src_stride );
+ break;
+ }
+ }
+ }
+
+ if( mc_a.plane_copy_interleave != mc_ref.plane_copy_interleave )
+ {
+ set_func_name( "plane_copy_interleave" );
+ used_asm = 1;
+ for( int i = 0; i < sizeof(plane_specs)/sizeof(*plane_specs); i++ )
+ {
+ int w = (plane_specs[i].w + 1) >> 1;
+ int h = plane_specs[i].h;
+ intptr_t src_stride = (plane_specs[i].src_stride + 1) >> 1;
+ intptr_t dst_stride = (2*w + 127) & ~63;
+ assert( dst_stride * h <= 0x1000 );
+ pixel *src1 = pbuf1 + X264_MAX(0, -src_stride) * (h-1);
+ memset( pbuf3, 0, 0x1000*sizeof(pixel) );
+ memset( pbuf4, 0, 0x1000*sizeof(pixel) );
+ call_c( mc_c.plane_copy_interleave, pbuf3, dst_stride, src1, src_stride, src1+1024, src_stride+16, w, h );
+ call_a( mc_a.plane_copy_interleave, pbuf4, dst_stride, src1, src_stride, src1+1024, src_stride+16, w, h );
+ for( int y = 0; y < h; y++ )
+ if( memcmp( pbuf3+y*dst_stride, pbuf4+y*dst_stride, 2*w*sizeof(pixel) ) )
+ {
+ ok = 0;
+ fprintf( stderr, "plane_copy_interleave FAILED: w=%d h=%d stride=%d\n", w, h, (int)src_stride );
+ break;
+ }
+ }
+ }
+
+ if( mc_a.plane_copy_deinterleave != mc_ref.plane_copy_deinterleave )
+ {
+ set_func_name( "plane_copy_deinterleave" );
+ used_asm = 1;
+ for( int i = 0; i < sizeof(plane_specs)/sizeof(*plane_specs); i++ )
+ {
+ int w = (plane_specs[i].w + 1) >> 1;
+ int h = plane_specs[i].h;
+ intptr_t dst_stride = w;
+ intptr_t src_stride = (2*w + 127) & ~63;
+ intptr_t offv = (dst_stride*h + 31) & ~15;
+ memset( pbuf3, 0, 0x1000 );
+ memset( pbuf4, 0, 0x1000 );
+ call_c( mc_c.plane_copy_deinterleave, pbuf3, dst_stride, pbuf3+offv, dst_stride, pbuf1, src_stride, w, h );
+ call_a( mc_a.plane_copy_deinterleave, pbuf4, dst_stride, pbuf4+offv, dst_stride, pbuf1, src_stride, w, h );
+ for( int y = 0; y < h; y++ )
+ if( memcmp( pbuf3+y*dst_stride, pbuf4+y*dst_stride, w ) ||
+ memcmp( pbuf3+y*dst_stride+offv, pbuf4+y*dst_stride+offv, w ) )
+ {
+ ok = 0;
+ fprintf( stderr, "plane_copy_deinterleave FAILED: w=%d h=%d stride=%d\n", w, h, (int)src_stride );
+ break;
+ }
+ }
+ }
+
+ if( mc_a.plane_copy_deinterleave_rgb != mc_ref.plane_copy_deinterleave_rgb )
+ {
+ set_func_name( "plane_copy_deinterleave_rgb" );
+ used_asm = 1;
+ for( int i = 0; i < sizeof(plane_specs)/sizeof(*plane_specs); i++ )
+ {
+ int w = (plane_specs[i].w + 2) >> 2;
+ int h = plane_specs[i].h;
+ intptr_t src_stride = plane_specs[i].src_stride;
+ intptr_t dst_stride = ALIGN( w, 16 );
+ intptr_t offv = dst_stride*h + 16;
+
+ for( int pw = 3; pw <= 4; pw++ )
+ {
+ memset( pbuf3, 0, 0x1000 );
+ memset( pbuf4, 0, 0x1000 );
+ call_c( mc_c.plane_copy_deinterleave_rgb, pbuf3, dst_stride, pbuf3+offv, dst_stride, pbuf3+2*offv, dst_stride, pbuf1, src_stride, pw, w, h );
+ call_a( mc_a.plane_copy_deinterleave_rgb, pbuf4, dst_stride, pbuf4+offv, dst_stride, pbuf4+2*offv, dst_stride, pbuf1, src_stride, pw, w, h );
+ for( int y = 0; y < h; y++ )
+ if( memcmp( pbuf3+y*dst_stride+0*offv, pbuf4+y*dst_stride+0*offv, w ) ||
+ memcmp( pbuf3+y*dst_stride+1*offv, pbuf4+y*dst_stride+1*offv, w ) ||
+ memcmp( pbuf3+y*dst_stride+2*offv, pbuf4+y*dst_stride+2*offv, w ) )
+ {
+ ok = 0;
+ fprintf( stderr, "plane_copy_deinterleave_rgb FAILED: w=%d h=%d stride=%d pw=%d\n", w, h, (int)src_stride, pw );
+ break;
+ }
+ }
+ }
+ }
+ report( "plane_copy :" );
+
+ if( mc_a.plane_copy_deinterleave_v210 != mc_ref.plane_copy_deinterleave_v210 )
+ {
+ set_func_name( "plane_copy_deinterleave_v210" );
+ ok = 1; used_asm = 1;
+ for( int i = 0; i < sizeof(plane_specs)/sizeof(*plane_specs); i++ )
+ {
+ int w = (plane_specs[i].w + 1) >> 1;
+ int h = plane_specs[i].h;
+ intptr_t dst_stride = ALIGN( w, 16 );
+ intptr_t src_stride = (w + 47) / 48 * 128 / sizeof(uint32_t);
+ intptr_t offv = dst_stride*h + 32;
+ memset( pbuf3, 0, 0x1000 );
+ memset( pbuf4, 0, 0x1000 );
+ call_c( mc_c.plane_copy_deinterleave_v210, pbuf3, dst_stride, pbuf3+offv, dst_stride, (uint32_t *)buf1, src_stride, w, h );
+ call_a( mc_a.plane_copy_deinterleave_v210, pbuf4, dst_stride, pbuf4+offv, dst_stride, (uint32_t *)buf1, src_stride, w, h );
+ for( int y = 0; y < h; y++ )
+ if( memcmp( pbuf3+y*dst_stride, pbuf4+y*dst_stride, w*sizeof(uint16_t) ) ||
+ memcmp( pbuf3+y*dst_stride+offv, pbuf4+y*dst_stride+offv, w*sizeof(uint16_t) ) )
+ {
+ ok = 0;
+ fprintf( stderr, "plane_copy_deinterleave_v210 FAILED: w=%d h=%d stride=%d\n", w, h, (int)src_stride );
+ break;
+ }
+ }
+ report( "v210 :" );
+ }
+