- if(i_count == 128)
- {
- int nonzero;
- asm(
- "movq (%1), %%mm0 \n"
- "por 8(%1), %%mm0 \n"
- "por 16(%1), %%mm0 \n"
- "por 24(%1), %%mm0 \n"
- "por 32(%1), %%mm0 \n"
- "por 40(%1), %%mm0 \n"
- "por 48(%1), %%mm0 \n"
- "por 56(%1), %%mm0 \n"
- "por 64(%1), %%mm0 \n"
- "por 72(%1), %%mm0 \n"
- "por 80(%1), %%mm0 \n"
- "por 88(%1), %%mm0 \n"
- "por 96(%1), %%mm0 \n"
- "por 104(%1), %%mm0 \n"
- "por 112(%1), %%mm0 \n"
- "por 120(%1), %%mm0 \n"
- "packsswb %%mm0, %%mm0 \n"
- "movd %%mm0, %0 \n"
- :"=r"(nonzero)
- :"r"(v)
- );
- return !!nonzero;
- }
- else return array_non_zero_int_c( v, i_count );
+ uint32_t mv_min = pack16to32_mask( mv_x_min, mv_y_min );
+ uint32_t mv_max = pack16to32_mask( mv_x_max, mv_y_max );
+ static const uint64_t pw_2 = 0x0002000200020002ULL;
+ intptr_t i = i_mvc;
+ asm(
+ "movd %2, %%mm5 \n"
+ "movd %3, %%mm6 \n"
+ "movq %4, %%mm7 \n"
+ "punpckldq %%mm5, %%mm5 \n"
+ "punpckldq %%mm6, %%mm6 \n"
+ "test $1, %0 \n"
+ "jz 1f \n"
+ "movd -4(%6,%0,4), %%mm0 \n"
+ "paddw %%mm7, %%mm0 \n"
+ "psraw $2, %%mm0 \n"
+ "pmaxsw %%mm5, %%mm0 \n"
+ "pminsw %%mm6, %%mm0 \n"
+ "movd %%mm0, -4(%5,%0,4) \n"
+ "dec %0 \n"
+ "jz 2f \n"
+ "1: \n"
+ "movq -8(%6,%0,4), %%mm0 \n"
+ "paddw %%mm7, %%mm0 \n"
+ "psraw $2, %%mm0 \n"
+ "pmaxsw %%mm5, %%mm0 \n"
+ "pminsw %%mm6, %%mm0 \n"
+ "movq %%mm0, -8(%5,%0,4) \n"
+ "sub $2, %0 \n"
+ "jnz 1b \n"
+ "2: \n"
+ :"+r"(i), "=m"(M64( dst ))
+ :"g"(mv_min), "g"(mv_max), "m"(pw_2), "r"(dst), "r"(mvc), "m"(M64( mvc ))
+ );