15, 13, 11, 9, 7, 5, 3, 1, 14, 12, 10, 8, 6, 4, 2, 0);
while (in < (const __m256i *)limit) {
// Note: For brevity, comments show lanes as if they were 2x64-bit (they're actually 2x128).
15, 13, 11, 9, 7, 5, 3, 1, 14, 12, 10, 8, 6, 4, 2, 0);
while (in < (const __m256i *)limit) {
// Note: For brevity, comments show lanes as if they were 2x64-bit (they're actually 2x128).