const auto iv256 = reinterpret_cast<const __m256i*>(&input_vector[kNumChunks]);
const auto row256 = reinterpret_cast<const __m256i*>(&row[kNumChunks]);
__m256i product256 = _mm256_maddubs_epi16(_mm256_loadA_si256(&iv256[0]), _mm256_load_si256(&row256[0]));
const auto iv256 = reinterpret_cast<const __m256i*>(&input_vector[kNumChunks]);
const auto row256 = reinterpret_cast<const __m256i*>(&row[kNumChunks]);
__m256i product256 = _mm256_maddubs_epi16(_mm256_loadA_si256(&iv256[0]), _mm256_load_si256(&row256[0]));