It is enabled when selecting x86-64-modern target, this gives
another nice speed up:
On a Core i5-2500 (3300 Mhz, Sandy Bridge):
64 bit download version:
1597151 n/s
-flto :
1659664 n/s
-flto -msse3:
1732344 n/s
Patch suggested by Tom Vijlbrief.
Also unify flto, popcount and msse3 optimization under "modern"
target, note that this can break the "modern" build on old gcc that
do not support -flto option: in this case update gcc ;-) or default
to the standard build.
No functional change.
Signed-off-by: Marco Costalba <mcostalba@gmail.com>
# flag --- Comp switch --- Description
# ----------------------------------------------------------------------------
#
# flag --- Comp switch --- Description
# ----------------------------------------------------------------------------
#
-# debug = no/yes --- -DNDEBUG --- Enable/Disable debug mode
+# debug = yes/no --- -DNDEBUG --- Enable/Disable debug mode
# optimize = yes/no --- (-O3/-fast etc.) --- Enable/Disable optimizations
# optimize = yes/no --- (-O3/-fast etc.) --- Enable/Disable optimizations
-# arch = (name) --- (-arch) --- Target architecture
-# os = (name) --- --- Target operating system
-# bits = 64/32 --- -DIS_64BIT --- 64-/32-bit operating system
-# bigendian = no/yes --- -DBIGENDIAN --- big/little-endian byte order
-# prefetch = no/yes --- -DUSE_PREFETCH --- Use prefetch x86 asm-instruction
-# bsfq = no/yes --- -DUSE_BSFQ --- Use bsfq x86_64 asm-instruction
-# --- (Works only with GCC and ICC 64-bit)
-# popcnt = no/yes --- -DUSE_POPCNT --- Use popcnt x86_64 asm-instruction
-# lto = no/yes --- -flto --- gcc Link Time Optimization
+# arch = (name) --- (-arch) --- Target architecture
+# os = (name) --- --- Target operating system
+# bits = 64/32 --- -DIS_64BIT --- 64-/32-bit operating system
+# bigendian = yes/no --- -DBIGENDIAN --- big/little-endian byte order
+# prefetch = yes/no --- -DUSE_PREFETCH --- Use prefetch x86 asm-instruction
+# bsfq = yes/no --- -DUSE_BSFQ --- Use bsfq x86_64 asm-instruction (only
+# with GCC and ICC 64-bit)
+# popcnt = yes/no --- -DUSE_POPCNT --- Use popcnt x86_64 asm-instruction
#
# Note that Makefile is space sensitive, so when adding new architectures
# or modifying existing flags, you have to make sure there are no extra spaces
#
# Note that Makefile is space sensitive, so when adding new architectures
# or modifying existing flags, you have to make sure there are no extra spaces
prefetch = yes
bsfq = yes
popcnt = yes
prefetch = yes
bsfq = yes
popcnt = yes
endif
ifeq ($(ARCH),x86-32)
endif
ifeq ($(ARCH),x86-32)
### 3.10 popcnt
ifeq ($(popcnt),yes)
CXXFLAGS += -DUSE_POPCNT
### 3.10 popcnt
ifeq ($(popcnt),yes)
CXXFLAGS += -DUSE_POPCNT
-### 3.11 lto.
-### Note that this is a mix of compile and link time options
-### because the lto link phase needs access to the optimization flags
-ifeq ($(lto),yes)
- CXXFLAGS += -flto
- LDFLAGS += $(CXXFLAGS) -static
+ ### For gcc we add also msse3 support and Link Time Optimization, note that
+ ### this is a mix of compile and link time options because the lto link phase
+ ### needs access to the optimization flags.
+ ifeq ($(comp),gcc)
+ CXXFLAGS += -msse3 -flto
+ LDFLAGS += $(CXXFLAGS) -static
+ endif
endif
### ==========================================================================
endif
### ==========================================================================
@echo ""
@echo "build > Build unoptimized version"
@echo "profile-build > Build PGO-optimized version"
@echo ""
@echo "build > Build unoptimized version"
@echo "profile-build > Build PGO-optimized version"
- @echo "popcnt-profile-build > Build PGO-optimized version with optional popcnt-support"
+ @echo "double-profile-build > Build PGO-optimized version with and without popcnt support"
@echo "strip > Strip executable"
@echo "install > Install executable"
@echo "clean > Clean up"
@echo "strip > Strip executable"
@echo "install > Install executable"
@echo "clean > Clean up"
@echo "Supported archs:"
@echo ""
@echo "x86-64 > x86 64-bit"
@echo "Supported archs:"
@echo ""
@echo "x86-64 > x86 64-bit"
- @echo "x86-64-modern > x86 64-bit with runtime support for popcnt-instruction"
+ @echo "x86-64-modern > x86 64-bit with runtime support for popcnt instruction"
@echo "x86-32 > x86 32-bit excluding very old hardware without SSE-support"
@echo "x86-32-old > x86 32-bit including also very old hardware"
@echo "osx-ppc-64 > PPC-Mac OS X 64 bit"
@echo "x86-32 > x86 32-bit excluding very old hardware without SSE-support"
@echo "x86-32-old > x86 32-bit including also very old hardware"
@echo "osx-ppc-64 > PPC-Mac OS X 64 bit"
@echo "Step 4/4. Deleting profile data ..."
$(MAKE) ARCH=$(ARCH) COMP=$(COMP) $(profile_clean)
@echo "Step 4/4. Deleting profile data ..."
$(MAKE) ARCH=$(ARCH) COMP=$(COMP) $(profile_clean)
$(MAKE) ARCH=$(ARCH) COMP=$(COMP) config-sanity
@echo ""
@echo "Step 0/6. Preparing for profile build."
$(MAKE) ARCH=$(ARCH) COMP=$(COMP) config-sanity
@echo ""
@echo "Step 0/6. Preparing for profile build."