From 8f934dff9a8644c38aaa31fc0ae90c0fab30934a Mon Sep 17 00:00:00 2001 From: Krgp Date: Sat, 30 Apr 2016 16:08:14 +0530 Subject: [PATCH] Remove useless -mbmi flag in Makefile I could not find anything documented that is necessary that prepending -mbmi to -mbmi2 gives some benefit. Instead at https://gcc.gnu.org/onlinedocs/gcc/x86-Built-in-Functions.html#x86-Built-in-Functions The following built-in functions are available when -mbmi is used. All of them generate the machine instruction that is part of the name. unsigned int __builtin_ia32_bextr_u32(unsigned int, unsigned int); unsigned long long __builtin_ia32_bextr_u64 (unsigned long long, unsigned long long); The following built-in functions are available when -mbmi2 is used. All of them generate the machine instruction that is part of the name. unsigned int _bzhi_u32 (unsigned int, unsigned int) unsigned int _pdep_u32 (unsigned int, unsigned int) unsigned int _pext_u32 (unsigned int, unsigned int) unsigned long long _bzhi_u64 (unsigned long long, unsigned long long) unsigned long long _pdep_u64 (unsigned long long, unsigned long long) unsigned long long _pext_u64 (unsigned long long, unsigned long long) and at https://gcc.gnu.org/ml/gcc/2014-02/msg00204.html ( "... The real optimization comes from being able to use pext (parallel bit extract), which can implement several bextr expressions in parallel.") Apart from that we don't use all -msse -msse2 -msse3 -msse4.2 etc. but just -msse3 (or -msse4.2) only. As regards to the speedup within noise level - this pull request is actually reversal of mcostalba#198 wherein prepending -mbmi to -mbmi2 was claimed to be 0.3% faster and here (removing -mbmi) gives 0.4% speed gain. --- src/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Makefile b/src/Makefile index a7d749a4..cb55c8b0 100644 --- a/src/Makefile +++ b/src/Makefile @@ -309,7 +309,7 @@ endif ifeq ($(pext),yes) CXXFLAGS += -DUSE_PEXT ifeq ($(comp),$(filter $(comp),gcc clang mingw)) - CXXFLAGS += -mbmi -mbmi2 + CXXFLAGS += -mbmi2 endif endif -- 2.39.2