- "movups 16(%%edx), %%xmm4\n" /* w7 | w6 | w5 | w4 */
- "addl $32, %%esi\n"
- "subps %%xmm2, %%xmm6\n" /* -im3 | re3 | -im2 | re2 */
- /* addps %%xmm5, %%xmm0 */
- "mulps %%xmm4, %%xmm6\n"
- "addl $-32, %%edi\n"
- /* movups 16(%%ebx), %%xmm5 d7 | d6 | d5 | d4 */
- "movups %%xmm0, (%%eax)\n"
- /* addps %%xmm5, %%xmm6 */
- "addl $32, %%edx\n"
- "addl $32, %%eax\n"
- /* addl $32, %%ebx */
- "movups %%xmm6, -16(%%eax)\n"
- "decl %%ecx\n"
+ "movaps 16(%%edx), %%xmm4\n" /* w7 | w6 | w5 | w4 */
+ "addl $32, %%esi\n"
+ "subps %%xmm2, %%xmm6\n" /* -im3 | re3 | -im2 | re2 */
+ "mulps %%xmm4, %%xmm6\n"
+ "addl $-32, %%edi\n"
+ "movaps %%xmm0, (%%eax)\n"
+ "addl $32, %%edx\n"
+ "addl $32, %%eax\n"
+ "movaps %%xmm6, -16(%%eax)\n"
+ "decl %%ebx\n"