[llvm] 0c7af8c - [X86] Optimize getImpliedDisabledFeatures & getImpliedEnabledFeatures after D83273

Fangrui Song via llvm-commits llvm-commits at lists.llvm.org
Tue Aug 4 17:50:29 PDT 2020


Author: Fangrui Song
Date: 2020-08-04T17:50:06-07:00
New Revision: 0c7af8c83bd1acb0ca78f35ddde29b6fde4363a0

URL: https://github.com/llvm/llvm-project/commit/0c7af8c83bd1acb0ca78f35ddde29b6fde4363a0
DIFF: https://github.com/llvm/llvm-project/commit/0c7af8c83bd1acb0ca78f35ddde29b6fde4363a0.diff

LOG: [X86] Optimize getImpliedDisabledFeatures & getImpliedEnabledFeatures after D83273

Previously the time complexity is O(|number of paths from the root to an
implied feature| * CPU_FWATURE_MAX) where CPU_FEATURE_MAX is 92.

The number of paths can be large (theoretically exponential).

For an inline asm statement, there is a code path
`clang::Parser::ParseAsmStatement -> clang::Sema::ActOnGCCAsmStmt -> ASTContext::getFunctionFeatureMap`
leading to potentially many calls of getImpliedEnabledFeatures (41 for my -march=native case).

We should improve the performance a bit in case the number of inline asm
statements is large (Linux kernel builds).

Reviewed By: craig.topper

Differential Revision: https://reviews.llvm.org/D85257

Added: 
    

Modified: 
    llvm/lib/Support/X86TargetParser.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Support/X86TargetParser.cpp b/llvm/lib/Support/X86TargetParser.cpp
index 572d1203aaf2..c629f872df12 100644
--- a/llvm/lib/Support/X86TargetParser.cpp
+++ b/llvm/lib/Support/X86TargetParser.cpp
@@ -37,6 +37,10 @@ class FeatureBitset {
       set(I);
   }
 
+  bool any() const {
+    return llvm::any_of(Bits, [](uint64_t V) { return V != 0; });
+  }
+
   constexpr FeatureBitset &set(unsigned I) {
     // GCC <6.2 crashes if this is written in a single statement.
     uint32_t NewBits = Bits[I / 32] | (uint32_t(1) << (I % 32));
@@ -89,6 +93,13 @@ class FeatureBitset {
       Result.Bits[I] = ~Bits[I];
     return Result;
   }
+
+  constexpr bool operator!=(const FeatureBitset &RHS) const {
+    for (unsigned I = 0, E = array_lengthof(Bits); I != E; ++I)
+      if (Bits[I] != RHS.Bits[I])
+        return true;
+    return false;
+  }
 };
 
 struct ProcInfo {
@@ -552,11 +563,17 @@ void llvm::X86::getFeaturesForCPU(StringRef CPU,
 // For each feature that is (transitively) implied by this feature, set it.
 static void getImpliedEnabledFeatures(FeatureBitset &Bits,
                                       const FeatureBitset &Implies) {
+  // Fast path: Implies is often empty.
+  if (!Implies.any())
+    return;
+  FeatureBitset Prev;
   Bits |= Implies;
-  for (unsigned i = 0; i != CPU_FEATURE_MAX; ++i) {
-    if (Implies[i])
-      getImpliedEnabledFeatures(Bits, FeatureInfos[i].ImpliedFeatures);
-  }
+  do {
+    Prev = Bits;
+    for (unsigned i = CPU_FEATURE_MAX; i;)
+      if (Bits[--i])
+        Bits |= FeatureInfos[i].ImpliedFeatures;
+  } while (Prev != Bits);
 }
 
 /// Create bit vector of features that are implied disabled if the feature
@@ -564,12 +581,14 @@ static void getImpliedEnabledFeatures(FeatureBitset &Bits,
 static void getImpliedDisabledFeatures(FeatureBitset &Bits, unsigned Value) {
   // Check all features looking for any dependent on this feature. If we find
   // one, mark it and recursively find any feature that depend on it.
-  for (unsigned i = 0; i != CPU_FEATURE_MAX; ++i) {
-    if (FeatureInfos[i].ImpliedFeatures[Value]) {
-      Bits.set(i);
-      getImpliedDisabledFeatures(Bits, i);
-    }
-  }
+  FeatureBitset Prev;
+  Bits.set(Value);
+  do {
+    Prev = Bits;
+    for (unsigned i = 0; i != CPU_FEATURE_MAX; ++i)
+      if ((FeatureInfos[i].ImpliedFeatures & Bits).any())
+        Bits.set(i);
+  } while (Prev != Bits);
 }
 
 void llvm::X86::getImpliedFeatures(


        


More information about the llvm-commits mailing list