[cfe-commits] [PATCH] Clean up and fix X86 features

Jung-uk Kim jkim at FreeBSD.org
Thu Nov 15 10:41:40 PST 2012


-----BEGIN PGP SIGNED MESSAGE-----
Hash: SHA1

[This is actually PR14344 but I was told to submit the patch here.
Please see the PR14344 for the PR history.]

I have an AMD Family 10h processor and I realized that LZCNT and
POPCNT are not enabled by default.  Then, I looked at
lib/Basic/Targets.cpp and found it needs some love (e.g., sync. with
LLVM's X86.td). :-)

Please see the attached patch.

- - AMD Barcelona("amdfam10") and later processors have LZCNT and POPCNT
instructions.
- - AMD Piledriver("bdver2") and later processors have BMI, FMA, and
F16C instructions.
- - Intel Ivy Bridge("core-avx-i") and later processors have F16C
instructions.
- - Do not set SIMD sets (i.e., MMX, SSE3, and AVX) when they are
implicitly set via higher instruction sets.
- - Do not enable POPCNT instruction with SSE4* instruction sets as they
are not part of the specifications nor uses SIMD registers. [1]

Jung-uk Kim

[1] It has little bit of history behind it.  AMD called them ABM and
they added "-mabm" option to GCC, which enabled both LZCNT and POPCNT
instructions.  OTOH, Intel implemented POPCNT first (Arrandale), then
LZCNT later (upcoming Haswell).  Therefore, GCC had to separate the
flag and added "-mpopcnt" and "-mlzcnt" (but kept "-mabm" for backward
compatibility).

http://en.wikipedia.org/wiki/SSE4
-----BEGIN PGP SIGNATURE-----
Version: GnuPG v2.0.19 (FreeBSD)
Comment: Using GnuPG with Mozilla - http://www.enigmail.net/

iEYEARECAAYFAlClN2QACgkQmlay1b9qnVMvagCbBDixUqMhql7mMskysksY+lQI
pr4AoNNvKolAUplLnc98ijaOfLNOM10e
=Wty7
-----END PGP SIGNATURE-----
-------------- next part --------------
--- lib/Basic/Targets.cpp
+++ lib/Basic/Targets.cpp
@@ -1854,58 +1854,54 @@
     break;
   case CK_Pentium3:
   case CK_Pentium3M:
-    setFeatureEnabled(Features, "mmx", true);
     setFeatureEnabled(Features, "sse", true);
     break;
   case CK_PentiumM:
   case CK_Pentium4:
   case CK_Pentium4M:
   case CK_x86_64:
-    setFeatureEnabled(Features, "mmx", true);
     setFeatureEnabled(Features, "sse2", true);
     break;
   case CK_Yonah:
   case CK_Prescott:
   case CK_Nocona:
-    setFeatureEnabled(Features, "mmx", true);
     setFeatureEnabled(Features, "sse3", true);
     break;
   case CK_Core2:
-    setFeatureEnabled(Features, "mmx", true);
     setFeatureEnabled(Features, "ssse3", true);
     break;
   case CK_Penryn:
-    setFeatureEnabled(Features, "mmx", true);
     setFeatureEnabled(Features, "sse4.1", true);
     break;
   case CK_Atom:
-    setFeatureEnabled(Features, "mmx", true);
     setFeatureEnabled(Features, "ssse3", true);
     break;
   case CK_Corei7:
-    setFeatureEnabled(Features, "mmx", true);
     setFeatureEnabled(Features, "sse4", true);
+    setFeatureEnabled(Features, "popcnt", true);
     break;
   case CK_Corei7AVX:
-    setFeatureEnabled(Features, "mmx", true);
     setFeatureEnabled(Features, "avx", true);
+    setFeatureEnabled(Features, "popcnt", true);
     setFeatureEnabled(Features, "aes", true);
     setFeatureEnabled(Features, "pclmul", true);
     break;
   case CK_CoreAVXi:
-    setFeatureEnabled(Features, "mmx", true);
     setFeatureEnabled(Features, "avx", true);
+    setFeatureEnabled(Features, "popcnt", true);
     setFeatureEnabled(Features, "aes", true);
     setFeatureEnabled(Features, "pclmul", true);
     setFeatureEnabled(Features, "rdrnd", true);
+    setFeatureEnabled(Features, "f16c", true);
     break;
   case CK_CoreAVX2:
-    setFeatureEnabled(Features, "mmx", true);
     setFeatureEnabled(Features, "avx2", true);
+    setFeatureEnabled(Features, "popcnt", true);
     setFeatureEnabled(Features, "aes", true);
     setFeatureEnabled(Features, "pclmul", true);
     setFeatureEnabled(Features, "lzcnt", true);
     setFeatureEnabled(Features, "rdrnd", true);
+    setFeatureEnabled(Features, "f16c", true);
     setFeatureEnabled(Features, "bmi", true);
     setFeatureEnabled(Features, "bmi2", true);
     setFeatureEnabled(Features, "rtm", true);
@@ -1946,23 +1942,35 @@
     setFeatureEnabled(Features, "3dnowa", true);
     break;
   case CK_AMDFAM10:
-    setFeatureEnabled(Features, "sse3", true);
     setFeatureEnabled(Features, "sse4a", true);
     setFeatureEnabled(Features, "3dnowa", true);
+    setFeatureEnabled(Features, "lzcnt", true);
+    setFeatureEnabled(Features, "popcnt", true);
     break;
   case CK_BTVER1:
     setFeatureEnabled(Features, "ssse3", true);
     setFeatureEnabled(Features, "sse4a", true);
+    setFeatureEnabled(Features, "lzcnt", true);
+    setFeatureEnabled(Features, "popcnt", true);
     break;
   case CK_BDVER1:
+    setFeatureEnabled(Features, "xop", true);
+    setFeatureEnabled(Features, "lzcnt", true);
+    setFeatureEnabled(Features, "popcnt", true);
+    setFeatureEnabled(Features, "aes", true);
+    setFeatureEnabled(Features, "pclmul", true);
+    break;
   case CK_BDVER2:
-    setFeatureEnabled(Features, "avx", true);
     setFeatureEnabled(Features, "xop", true);
+    setFeatureEnabled(Features, "lzcnt", true);
+    setFeatureEnabled(Features, "popcnt", true);
     setFeatureEnabled(Features, "aes", true);
     setFeatureEnabled(Features, "pclmul", true);
+    setFeatureEnabled(Features, "bmi", true);
+    setFeatureEnabled(Features, "fma", true);
+    setFeatureEnabled(Features, "f16c", true);
     break;
   case CK_C3_2:
-    setFeatureEnabled(Features, "mmx", true);
     setFeatureEnabled(Features, "sse", true);
     break;
   }
@@ -1995,8 +2003,7 @@
         Features["ssse3"] = true;
     else if (Name == "sse4" || Name == "sse4.2")
       Features["mmx"] = Features["sse"] = Features["sse2"] = Features["sse3"] =
-        Features["ssse3"] = Features["sse41"] = Features["sse42"] =
-        Features["popcnt"] = true;
+        Features["ssse3"] = Features["sse41"] = Features["sse42"] = true;
     else if (Name == "sse4.1")
       Features["mmx"] = Features["sse"] = Features["sse2"] = Features["sse3"] =
         Features["ssse3"] = Features["sse41"] = true;
@@ -2011,25 +2018,24 @@
     else if (Name == "avx")
       Features["mmx"] = Features["sse"] = Features["sse2"] = Features["sse3"] =
         Features["ssse3"] = Features["sse41"] = Features["sse42"] =
-        Features["popcnt"] = Features["avx"] = true;
+        Features["avx"] = true;
     else if (Name == "avx2")
       Features["mmx"] = Features["sse"] = Features["sse2"] = Features["sse3"] =
         Features["ssse3"] = Features["sse41"] = Features["sse42"] =
-        Features["popcnt"] = Features["avx"] = Features["avx2"] = true;
+        Features["avx"] = Features["avx2"] = true;
     else if (Name == "fma")
       Features["mmx"] = Features["sse"] = Features["sse2"] = Features["sse3"] =
         Features["ssse3"] = Features["sse41"] = Features["sse42"] =
-        Features["popcnt"] = Features["avx"] = Features["fma"] = true;
+        Features["avx"] = Features["fma"] = true;
     else if (Name == "fma4")
-        Features["mmx"] = Features["sse"] = Features["sse2"] = Features["sse3"] =
+      Features["mmx"] = Features["sse"] = Features["sse2"] = Features["sse3"] =
         Features["ssse3"] = Features["sse41"] = Features["sse42"] =
-        Features["popcnt"] = Features["avx"] = Features["sse4a"] =
-        Features["fma4"] = true;
+        Features["avx"] = Features["sse4a"] = Features["fma4"] = true;
     else if (Name == "xop")
-        Features["mmx"] = Features["sse"] = Features["sse2"] = Features["sse3"] =
+      Features["mmx"] = Features["sse"] = Features["sse2"] = Features["sse3"] =
         Features["ssse3"] = Features["sse41"] = Features["sse42"] =
-        Features["popcnt"] = Features["avx"] = Features["sse4a"] =
-        Features["fma4"] = Features["xop"] = true;
+        Features["avx"] = Features["sse4a"] = Features["fma4"] =
+        Features["xop"] = true;
     else if (Name == "sse4a")
       Features["mmx"] = Features["sse"] = Features["sse2"] = Features["sse3"] =
         Features["sse4a"] = true;


More information about the cfe-commits mailing list