[cfe-commits] [PATCH] Clean up and fix X86 features
Jung-uk Kim
jkim at FreeBSD.org
Thu Nov 15 10:41:40 PST 2012
-----BEGIN PGP SIGNED MESSAGE-----
Hash: SHA1
[This is actually PR14344 but I was told to submit the patch here.
Please see the PR14344 for the PR history.]
I have an AMD Family 10h processor and I realized that LZCNT and
POPCNT are not enabled by default. Then, I looked at
lib/Basic/Targets.cpp and found it needs some love (e.g., sync. with
LLVM's X86.td). :-)
Please see the attached patch.
- - AMD Barcelona("amdfam10") and later processors have LZCNT and POPCNT
instructions.
- - AMD Piledriver("bdver2") and later processors have BMI, FMA, and
F16C instructions.
- - Intel Ivy Bridge("core-avx-i") and later processors have F16C
instructions.
- - Do not set SIMD sets (i.e., MMX, SSE3, and AVX) when they are
implicitly set via higher instruction sets.
- - Do not enable POPCNT instruction with SSE4* instruction sets as they
are not part of the specifications nor uses SIMD registers. [1]
Jung-uk Kim
[1] It has little bit of history behind it. AMD called them ABM and
they added "-mabm" option to GCC, which enabled both LZCNT and POPCNT
instructions. OTOH, Intel implemented POPCNT first (Arrandale), then
LZCNT later (upcoming Haswell). Therefore, GCC had to separate the
flag and added "-mpopcnt" and "-mlzcnt" (but kept "-mabm" for backward
compatibility).
http://en.wikipedia.org/wiki/SSE4
-----BEGIN PGP SIGNATURE-----
Version: GnuPG v2.0.19 (FreeBSD)
Comment: Using GnuPG with Mozilla - http://www.enigmail.net/
iEYEARECAAYFAlClN2QACgkQmlay1b9qnVMvagCbBDixUqMhql7mMskysksY+lQI
pr4AoNNvKolAUplLnc98ijaOfLNOM10e
=Wty7
-----END PGP SIGNATURE-----
-------------- next part --------------
--- lib/Basic/Targets.cpp
+++ lib/Basic/Targets.cpp
@@ -1854,58 +1854,54 @@
break;
case CK_Pentium3:
case CK_Pentium3M:
- setFeatureEnabled(Features, "mmx", true);
setFeatureEnabled(Features, "sse", true);
break;
case CK_PentiumM:
case CK_Pentium4:
case CK_Pentium4M:
case CK_x86_64:
- setFeatureEnabled(Features, "mmx", true);
setFeatureEnabled(Features, "sse2", true);
break;
case CK_Yonah:
case CK_Prescott:
case CK_Nocona:
- setFeatureEnabled(Features, "mmx", true);
setFeatureEnabled(Features, "sse3", true);
break;
case CK_Core2:
- setFeatureEnabled(Features, "mmx", true);
setFeatureEnabled(Features, "ssse3", true);
break;
case CK_Penryn:
- setFeatureEnabled(Features, "mmx", true);
setFeatureEnabled(Features, "sse4.1", true);
break;
case CK_Atom:
- setFeatureEnabled(Features, "mmx", true);
setFeatureEnabled(Features, "ssse3", true);
break;
case CK_Corei7:
- setFeatureEnabled(Features, "mmx", true);
setFeatureEnabled(Features, "sse4", true);
+ setFeatureEnabled(Features, "popcnt", true);
break;
case CK_Corei7AVX:
- setFeatureEnabled(Features, "mmx", true);
setFeatureEnabled(Features, "avx", true);
+ setFeatureEnabled(Features, "popcnt", true);
setFeatureEnabled(Features, "aes", true);
setFeatureEnabled(Features, "pclmul", true);
break;
case CK_CoreAVXi:
- setFeatureEnabled(Features, "mmx", true);
setFeatureEnabled(Features, "avx", true);
+ setFeatureEnabled(Features, "popcnt", true);
setFeatureEnabled(Features, "aes", true);
setFeatureEnabled(Features, "pclmul", true);
setFeatureEnabled(Features, "rdrnd", true);
+ setFeatureEnabled(Features, "f16c", true);
break;
case CK_CoreAVX2:
- setFeatureEnabled(Features, "mmx", true);
setFeatureEnabled(Features, "avx2", true);
+ setFeatureEnabled(Features, "popcnt", true);
setFeatureEnabled(Features, "aes", true);
setFeatureEnabled(Features, "pclmul", true);
setFeatureEnabled(Features, "lzcnt", true);
setFeatureEnabled(Features, "rdrnd", true);
+ setFeatureEnabled(Features, "f16c", true);
setFeatureEnabled(Features, "bmi", true);
setFeatureEnabled(Features, "bmi2", true);
setFeatureEnabled(Features, "rtm", true);
@@ -1946,23 +1942,35 @@
setFeatureEnabled(Features, "3dnowa", true);
break;
case CK_AMDFAM10:
- setFeatureEnabled(Features, "sse3", true);
setFeatureEnabled(Features, "sse4a", true);
setFeatureEnabled(Features, "3dnowa", true);
+ setFeatureEnabled(Features, "lzcnt", true);
+ setFeatureEnabled(Features, "popcnt", true);
break;
case CK_BTVER1:
setFeatureEnabled(Features, "ssse3", true);
setFeatureEnabled(Features, "sse4a", true);
+ setFeatureEnabled(Features, "lzcnt", true);
+ setFeatureEnabled(Features, "popcnt", true);
break;
case CK_BDVER1:
+ setFeatureEnabled(Features, "xop", true);
+ setFeatureEnabled(Features, "lzcnt", true);
+ setFeatureEnabled(Features, "popcnt", true);
+ setFeatureEnabled(Features, "aes", true);
+ setFeatureEnabled(Features, "pclmul", true);
+ break;
case CK_BDVER2:
- setFeatureEnabled(Features, "avx", true);
setFeatureEnabled(Features, "xop", true);
+ setFeatureEnabled(Features, "lzcnt", true);
+ setFeatureEnabled(Features, "popcnt", true);
setFeatureEnabled(Features, "aes", true);
setFeatureEnabled(Features, "pclmul", true);
+ setFeatureEnabled(Features, "bmi", true);
+ setFeatureEnabled(Features, "fma", true);
+ setFeatureEnabled(Features, "f16c", true);
break;
case CK_C3_2:
- setFeatureEnabled(Features, "mmx", true);
setFeatureEnabled(Features, "sse", true);
break;
}
@@ -1995,8 +2003,7 @@
Features["ssse3"] = true;
else if (Name == "sse4" || Name == "sse4.2")
Features["mmx"] = Features["sse"] = Features["sse2"] = Features["sse3"] =
- Features["ssse3"] = Features["sse41"] = Features["sse42"] =
- Features["popcnt"] = true;
+ Features["ssse3"] = Features["sse41"] = Features["sse42"] = true;
else if (Name == "sse4.1")
Features["mmx"] = Features["sse"] = Features["sse2"] = Features["sse3"] =
Features["ssse3"] = Features["sse41"] = true;
@@ -2011,25 +2018,24 @@
else if (Name == "avx")
Features["mmx"] = Features["sse"] = Features["sse2"] = Features["sse3"] =
Features["ssse3"] = Features["sse41"] = Features["sse42"] =
- Features["popcnt"] = Features["avx"] = true;
+ Features["avx"] = true;
else if (Name == "avx2")
Features["mmx"] = Features["sse"] = Features["sse2"] = Features["sse3"] =
Features["ssse3"] = Features["sse41"] = Features["sse42"] =
- Features["popcnt"] = Features["avx"] = Features["avx2"] = true;
+ Features["avx"] = Features["avx2"] = true;
else if (Name == "fma")
Features["mmx"] = Features["sse"] = Features["sse2"] = Features["sse3"] =
Features["ssse3"] = Features["sse41"] = Features["sse42"] =
- Features["popcnt"] = Features["avx"] = Features["fma"] = true;
+ Features["avx"] = Features["fma"] = true;
else if (Name == "fma4")
- Features["mmx"] = Features["sse"] = Features["sse2"] = Features["sse3"] =
+ Features["mmx"] = Features["sse"] = Features["sse2"] = Features["sse3"] =
Features["ssse3"] = Features["sse41"] = Features["sse42"] =
- Features["popcnt"] = Features["avx"] = Features["sse4a"] =
- Features["fma4"] = true;
+ Features["avx"] = Features["sse4a"] = Features["fma4"] = true;
else if (Name == "xop")
- Features["mmx"] = Features["sse"] = Features["sse2"] = Features["sse3"] =
+ Features["mmx"] = Features["sse"] = Features["sse2"] = Features["sse3"] =
Features["ssse3"] = Features["sse41"] = Features["sse42"] =
- Features["popcnt"] = Features["avx"] = Features["sse4a"] =
- Features["fma4"] = Features["xop"] = true;
+ Features["avx"] = Features["sse4a"] = Features["fma4"] =
+ Features["xop"] = true;
else if (Name == "sse4a")
Features["mmx"] = Features["sse"] = Features["sse2"] = Features["sse3"] =
Features["sse4a"] = true;
More information about the cfe-commits
mailing list