+
+ifeq ($(avx2),yes)
+ CXXFLAGS += -DUSE_AVX2
+ ifeq ($(comp),$(filter $(comp),gcc clang mingw))
+ CXXFLAGS += -mavx2
+ endif
+endif
+
+ifeq ($(avx512),yes)
+ CXXFLAGS += -DUSE_AVX512
+ ifeq ($(comp),$(filter $(comp),gcc clang mingw))
+ CXXFLAGS += -mavx512f -mavx512bw
+ endif
+endif
+
+ifeq ($(vnni256),yes)
+ CXXFLAGS += -DUSE_VNNI
+ ifeq ($(comp),$(filter $(comp),gcc clang mingw))
+ CXXFLAGS += -mavx512f -mavx512bw -mavx512vnni -mavx512dq -mavx512vl -mprefer-vector-width=256
+ endif
+endif
+
+ifeq ($(vnni512),yes)
+ CXXFLAGS += -DUSE_VNNI
+ ifeq ($(comp),$(filter $(comp),gcc clang mingw))
+ CXXFLAGS += -mavx512vnni -mavx512dq -mavx512vl
+ endif
+endif
+
+ifeq ($(sse41),yes)
+ CXXFLAGS += -DUSE_SSE41
+ ifeq ($(comp),$(filter $(comp),gcc clang mingw))
+ CXXFLAGS += -msse4.1
+ endif
+endif
+
+ifeq ($(ssse3),yes)
+ CXXFLAGS += -DUSE_SSSE3
+ ifeq ($(comp),$(filter $(comp),gcc clang mingw))
+ CXXFLAGS += -mssse3
+ endif
+endif
+
+ifeq ($(sse2),yes)
+ CXXFLAGS += -DUSE_SSE2
+ ifeq ($(comp),$(filter $(comp),gcc clang mingw))
+ CXXFLAGS += -msse2
+ endif
+endif
+
+ifeq ($(mmx),yes)
+ CXXFLAGS += -DUSE_MMX
+ ifeq ($(comp),$(filter $(comp),gcc clang mingw))
+ CXXFLAGS += -mmmx
+ endif
+endif
+
+ifeq ($(neon),yes)
+ CXXFLAGS += -DUSE_NEON
+ ifeq ($(KERNEL),Linux)
+ ifneq ($(COMP),ndk)
+ ifneq ($(arch),armv8)
+ CXXFLAGS += -mfpu=neon
+ endif
+ endif
+ endif
+endif
+
+### 3.7 pext
+ifeq ($(pext),yes)
+ CXXFLAGS += -DUSE_PEXT
+ ifeq ($(comp),$(filter $(comp),gcc clang mingw))
+ CXXFLAGS += -mbmi2
+ endif
+endif
+
+### 3.8 Link Time Optimization
+### This is a mix of compile and link time options because the lto link phase
+### needs access to the optimization flags.
+ifeq ($(optimize),yes)
+ifeq ($(debug), no)
+ ifeq ($(comp),clang)
+ CXXFLAGS += -flto=thin
+ ifneq ($(findstring MINGW,$(KERNEL)),)
+ CXXFLAGS += -fuse-ld=lld
+ else ifneq ($(findstring MSYS,$(KERNEL)),)
+ CXXFLAGS += -fuse-ld=lld
+ endif
+ LDFLAGS += $(CXXFLAGS)
+
+# GCC and CLANG use different methods for parallelizing LTO and CLANG pretends to be
+# GCC on some systems.
+ else ifeq ($(comp),gcc)
+ ifeq ($(gccisclang),)
+ CXXFLAGS += -flto
+ LDFLAGS += $(CXXFLAGS) -flto=jobserver
+ ifneq ($(findstring MINGW,$(KERNEL)),)
+ LDFLAGS += -save-temps
+ else ifneq ($(findstring MSYS,$(KERNEL)),)
+ LDFLAGS += -save-temps
+ endif
+ else
+ CXXFLAGS += -flto=thin
+ LDFLAGS += $(CXXFLAGS)
+ endif
+
+# To use LTO and static linking on windows, the tool chain requires a recent gcc:
+# gcc version 10.1 in msys2 or TDM-GCC version 9.2 are known to work, older might not.
+# So, only enable it for a cross from Linux by default.
+ else ifeq ($(comp),mingw)
+ ifeq ($(KERNEL),Linux)
+ ifneq ($(arch),i386)
+ CXXFLAGS += -flto
+ LDFLAGS += $(CXXFLAGS) -flto=jobserver
+ endif
+ endif
+ endif
+endif
+endif
+
+### 3.9 Android 5 can only run position independent executables. Note that this
+### breaks Android 4.0 and earlier.
+ifeq ($(OS), Android)
+ CXXFLAGS += -fPIE
+ LDFLAGS += -fPIE -pie