# sse41 = yes/no --- -msse4.1 --- Use Intel Streaming SIMD Extensions 4.1
# avx2 = yes/no --- -mavx2 --- Use Intel Advanced Vector Extensions 2
# avx512 = yes/no --- -mavx512bw --- Use Intel Advanced Vector Extensions 512
-# vnni = yes/no --- -mavx512vnni --- Use Intel Vector Neural Network Instructions 512
+# vnni256 = yes/no --- -mavx512vnni --- Use Intel Vector Neural Network Instructions 256
+# vnni512 = yes/no --- -mavx512vnni --- Use Intel Vector Neural Network Instructions 512
# neon = yes/no --- -DUSE_NEON --- Use ARM SIMD architecture
#
# Note that Makefile is space sensitive, so when adding new architectures
sse41 = no
avx2 = no
avx512 = no
-vnni = no
+vnni256 = no
+vnni512 = no
neon = no
ARCH = x86-64-modern
STRIP = strip
avx512 = yes
endif
-ifeq ($(findstring -vnni,$(ARCH)),-vnni)
+ifeq ($(findstring -vnni256,$(ARCH)),-vnni256)
+ popcnt = yes
+ sse = yes
+ sse2 = yes
+ ssse3 = yes
+ sse41 = yes
+ avx2 = yes
+ pext = yes
+ vnni256 = yes
+endif
+
+ifeq ($(findstring -vnni512,$(ARCH)),-vnni512)
popcnt = yes
sse = yes
sse2 = yes
avx2 = yes
pext = yes
avx512 = yes
- vnni = yes
+ vnni512 = yes
endif
ifeq ($(sse),yes)
endif
ifeq ($(ARCH),armv8)
- arch = armv8-a
+ arch = armv8
prefetch = yes
popcnt = yes
neon = yes
CXX=g++
CXXFLAGS += -pedantic -Wextra -Wshadow
- ifeq ($(arch),$(filter $(arch),armv7 armv8-a))
+ ifeq ($(arch),$(filter $(arch),armv7 armv8))
ifeq ($(OS),Android)
CXXFLAGS += -m$(bits)
LDFLAGS += -m$(bits)
CXXFLAGS += -mthumb -march=armv7-a -mfloat-abi=softfp -mfpu=neon
STRIP=arm-linux-androideabi-strip
endif
- ifeq ($(arch),armv8-a)
+ ifeq ($(arch),armv8)
comp=aarch64-linux-android21-clang
CXX=aarch64-linux-android21-clang++
STRIP=aarch64-linux-android-strip
### On mingw use Windows threads, otherwise POSIX
ifneq ($(comp),mingw)
+ CXXFLAGS += -DUSE_PTHREADS
# On Android Bionic's C library comes with its own pthread implementation bundled in
ifneq ($(OS),Android)
# Haiku has pthreads in its libroot, so only link it in on other platforms
### 3.6 popcnt
ifeq ($(popcnt),yes)
- ifeq ($(arch),$(filter $(arch),ppc64 armv7 armv8-a arm64))
+ ifeq ($(arch),$(filter $(arch),ppc64 armv7 armv8 arm64))
CXXFLAGS += -DUSE_POPCNT
else ifeq ($(comp),icc)
CXXFLAGS += -msse3 -DUSE_POPCNT
endif
endif
-ifeq ($(vnni),yes)
+ifeq ($(vnni256),yes)
+ CXXFLAGS += -DUSE_VNNI
+ ifeq ($(comp),$(filter $(comp),gcc clang mingw))
+ CXXFLAGS += -mavx512vnni -mavx512dq -mavx512vl -mprefer-vector-width=256
+ endif
+endif
+
+ifeq ($(vnni512),yes)
CXXFLAGS += -DUSE_VNNI
ifeq ($(comp),$(filter $(comp),gcc clang mingw))
CXXFLAGS += -mavx512vnni -mavx512dq -mavx512vl
CXXFLAGS += -DUSE_NEON
ifeq ($(KERNEL),Linux)
ifneq ($(COMP),ndk)
+ ifneq ($(arch),armv8)
CXXFLAGS += -mfpu=neon
endif
endif
+ endif
endif
### 3.7 pext
@echo ""
@echo "Supported archs:"
@echo ""
- @echo "x86-64-vnni > x86 64-bit with vnni support"
+ @echo "x86-64-vnni512 > x86 64-bit with vnni support 512bit wide"
+ @echo "x86-64-vnni256 > x86 64-bit with vnni support 256bit wide"
@echo "x86-64-avx512 > x86 64-bit with avx512 support"
@echo "x86-64-bmi2 > x86 64-bit with bmi2 support"
@echo "x86-64-avx2 > x86 64-bit with avx2 support"
@echo "sse41: '$(sse41)'"
@echo "avx2: '$(avx2)'"
@echo "avx512: '$(avx512)'"
- @echo "vnni: '$(vnni)'"
+ @echo "vnni256: '$(vnni256)'"
+ @echo "vnni512: '$(vnni512)'"
@echo "neon: '$(neon)'"
@echo ""
@echo "Flags:"
@test "$(optimize)" = "yes" || test "$(optimize)" = "no"
@test "$(arch)" = "any" || test "$(arch)" = "x86_64" || test "$(arch)" = "i386" || \
test "$(arch)" = "ppc64" || test "$(arch)" = "ppc" || \
- test "$(arch)" = "armv7" || test "$(arch)" = "armv8-a" || test "$(arch)" = "arm64"
+ test "$(arch)" = "armv7" || test "$(arch)" = "armv8" || test "$(arch)" = "arm64"
@test "$(bits)" = "32" || test "$(bits)" = "64"
@test "$(prefetch)" = "yes" || test "$(prefetch)" = "no"
@test "$(popcnt)" = "yes" || test "$(popcnt)" = "no"
@test "$(sse41)" = "yes" || test "$(sse41)" = "no"
@test "$(avx2)" = "yes" || test "$(avx2)" = "no"
@test "$(avx512)" = "yes" || test "$(avx512)" = "no"
- @test "$(vnni)" = "yes" || test "$(vnni)" = "no"
+ @test "$(vnni256)" = "yes" || test "$(vnni256)" = "no"
+ @test "$(vnni512)" = "yes" || test "$(vnni512)" = "no"
@test "$(neon)" = "yes" || test "$(neon)" = "no"
@test "$(comp)" = "gcc" || test "$(comp)" = "icc" || test "$(comp)" = "mingw" || test "$(comp)" = "clang" \
|| test "$(comp)" = "armv7a-linux-androideabi16-clang" || test "$(comp)" = "aarch64-linux-android21-clang"