[llvm] 0c7af8c - [X86] Optimize getImpliedDisabledFeatures & getImpliedEnabledFeatures after D83273
Fangrui Song via llvm-commits
llvm-commits at lists.llvm.org
Tue Aug 4 17:50:29 PDT 2020
Author: Fangrui Song
Date: 2020-08-04T17:50:06-07:00
New Revision: 0c7af8c83bd1acb0ca78f35ddde29b6fde4363a0
URL: https://github.com/llvm/llvm-project/commit/0c7af8c83bd1acb0ca78f35ddde29b6fde4363a0
DIFF: https://github.com/llvm/llvm-project/commit/0c7af8c83bd1acb0ca78f35ddde29b6fde4363a0.diff
LOG: [X86] Optimize getImpliedDisabledFeatures & getImpliedEnabledFeatures after D83273
Previously the time complexity is O(|number of paths from the root to an
implied feature| * CPU_FWATURE_MAX) where CPU_FEATURE_MAX is 92.
The number of paths can be large (theoretically exponential).
For an inline asm statement, there is a code path
`clang::Parser::ParseAsmStatement -> clang::Sema::ActOnGCCAsmStmt -> ASTContext::getFunctionFeatureMap`
leading to potentially many calls of getImpliedEnabledFeatures (41 for my -march=native case).
We should improve the performance a bit in case the number of inline asm
statements is large (Linux kernel builds).
Reviewed By: craig.topper
Differential Revision: https://reviews.llvm.org/D85257
Added:
Modified:
llvm/lib/Support/X86TargetParser.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Support/X86TargetParser.cpp b/llvm/lib/Support/X86TargetParser.cpp
index 572d1203aaf2..c629f872df12 100644
--- a/llvm/lib/Support/X86TargetParser.cpp
+++ b/llvm/lib/Support/X86TargetParser.cpp
@@ -37,6 +37,10 @@ class FeatureBitset {
set(I);
}
+ bool any() const {
+ return llvm::any_of(Bits, [](uint64_t V) { return V != 0; });
+ }
+
constexpr FeatureBitset &set(unsigned I) {
// GCC <6.2 crashes if this is written in a single statement.
uint32_t NewBits = Bits[I / 32] | (uint32_t(1) << (I % 32));
@@ -89,6 +93,13 @@ class FeatureBitset {
Result.Bits[I] = ~Bits[I];
return Result;
}
+
+ constexpr bool operator!=(const FeatureBitset &RHS) const {
+ for (unsigned I = 0, E = array_lengthof(Bits); I != E; ++I)
+ if (Bits[I] != RHS.Bits[I])
+ return true;
+ return false;
+ }
};
struct ProcInfo {
@@ -552,11 +563,17 @@ void llvm::X86::getFeaturesForCPU(StringRef CPU,
// For each feature that is (transitively) implied by this feature, set it.
static void getImpliedEnabledFeatures(FeatureBitset &Bits,
const FeatureBitset &Implies) {
+ // Fast path: Implies is often empty.
+ if (!Implies.any())
+ return;
+ FeatureBitset Prev;
Bits |= Implies;
- for (unsigned i = 0; i != CPU_FEATURE_MAX; ++i) {
- if (Implies[i])
- getImpliedEnabledFeatures(Bits, FeatureInfos[i].ImpliedFeatures);
- }
+ do {
+ Prev = Bits;
+ for (unsigned i = CPU_FEATURE_MAX; i;)
+ if (Bits[--i])
+ Bits |= FeatureInfos[i].ImpliedFeatures;
+ } while (Prev != Bits);
}
/// Create bit vector of features that are implied disabled if the feature
@@ -564,12 +581,14 @@ static void getImpliedEnabledFeatures(FeatureBitset &Bits,
static void getImpliedDisabledFeatures(FeatureBitset &Bits, unsigned Value) {
// Check all features looking for any dependent on this feature. If we find
// one, mark it and recursively find any feature that depend on it.
- for (unsigned i = 0; i != CPU_FEATURE_MAX; ++i) {
- if (FeatureInfos[i].ImpliedFeatures[Value]) {
- Bits.set(i);
- getImpliedDisabledFeatures(Bits, i);
- }
- }
+ FeatureBitset Prev;
+ Bits.set(Value);
+ do {
+ Prev = Bits;
+ for (unsigned i = 0; i != CPU_FEATURE_MAX; ++i)
+ if ((FeatureInfos[i].ImpliedFeatures & Bits).any())
+ Bits.set(i);
+ } while (Prev != Bits);
}
void llvm::X86::getImpliedFeatures(
More information about the llvm-commits
mailing list