+#define PLANE_COPY_SWAP(align, cpu)\
+static void x264_plane_copy_swap_##cpu( pixel *dst, intptr_t i_dst, pixel *src, intptr_t i_src, int w, int h )\
+{\
+ int c_w = (align>>1) / sizeof(pixel) - 1;\
+ if( !(w&c_w) )\
+ x264_plane_copy_swap_core_##cpu( dst, i_dst, src, i_src, w, h );\
+ else if( w > c_w )\
+ {\
+ if( --h > 0 )\
+ {\
+ if( i_src > 0 )\
+ {\
+ x264_plane_copy_swap_core_##cpu( dst, i_dst, src, i_src, (w+c_w)&~c_w, h );\
+ dst += i_dst * h;\
+ src += i_src * h;\
+ }\
+ else\
+ x264_plane_copy_swap_core_##cpu( dst+i_dst, i_dst, src+i_src, i_src, (w+c_w)&~c_w, h );\
+ }\
+ x264_plane_copy_swap_core_##cpu( dst, 0, src, 0, w&~c_w, 1 );\
+ for( int x = 2*(w&~c_w); x < 2*w; x += 2 )\
+ {\
+ dst[x] = src[x+1];\
+ dst[x+1] = src[x];\
+ }\
+ }\
+ else\
+ x264_plane_copy_swap_c( dst, i_dst, src, i_src, w, h );\
+}
+
+PLANE_COPY_SWAP(16, ssse3)
+PLANE_COPY_SWAP(32, avx2)
+