+#if defined (USE_VNNI)
+ __m512i sum0 = _mm512_setzero_si512();
+ __m512i sum1 = _mm512_setzero_si512();
+ __m512i sum2 = _mm512_setzero_si512();
+ __m512i sum3 = _mm512_setzero_si512();
+ const IndexType kStart = 0;
+#else
+ __m512i sum0 = m512_dpbusd_epi32(input_vector512[0], row0[0]);
+ __m512i sum1 = m512_dpbusd_epi32(input_vector512[0], row1[0]);
+ __m512i sum2 = m512_dpbusd_epi32(input_vector512[0], row2[0]);
+ __m512i sum3 = m512_dpbusd_epi32(input_vector512[0], row3[0]);
+ const IndexType kStart = 1;
+#endif
+
+ for (IndexType j = kStart; j < kNumChunks512; ++j)