[llvm] 168db5e - [SpecialCaseList] Filtering Globs with matching prefix (#164531)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Oct 24 22:59:29 PDT 2025
Author: Vitaly Buka
Date: 2025-10-25T05:59:24Z
New Revision: 168db5eca0da25b92a105c8df5c55ed7fd2d4cfc
URL: https://github.com/llvm/llvm-project/commit/168db5eca0da25b92a105c8df5c55ed7fd2d4cfc
DIFF: https://github.com/llvm/llvm-project/commit/168db5eca0da25b92a105c8df5c55ed7fd2d4cfc.diff
LOG: [SpecialCaseList] Filtering Globs with matching prefix (#164531)
This commit optimizes `SpecialCaseList` by using a `RadixTree` to filter
glob patterns based on their prefixes. When matching a query, the
`RadixTree` quickly identifies all glob patterns whose prefixes match
the query's prefix. This significantly reduces the number of glob
patterns that need to be fully evaluated, leading to performance
improvements, especially when dealing with a large number of patterns.
According to SpecialCaseListBM:
Lookup benchmarks (significant improvements):
```
OVERALL_GEOMEAN -0.8177
```
Lookup like `prefix*` benchmarks (huge improvements):
```
OVERALL_GEOMEAN -0.9819
```
https://gist.github.com/vitalybuka/824884bcbc1713e815068c279159dafe
---------
Co-authored-by: Copilot <175728472+Copilot at users.noreply.github.com>
Added:
Modified:
llvm/include/llvm/Support/SpecialCaseList.h
llvm/lib/Support/SpecialCaseList.cpp
Removed:
################################################################################
diff --git a/llvm/include/llvm/Support/SpecialCaseList.h b/llvm/include/llvm/Support/SpecialCaseList.h
index ead765562504d..a235975b152c3 100644
--- a/llvm/include/llvm/Support/SpecialCaseList.h
+++ b/llvm/include/llvm/Support/SpecialCaseList.h
@@ -13,7 +13,10 @@
#define LLVM_SUPPORT_SPECIALCASELIST_H
#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/RadixTree.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/iterator_range.h"
#include "llvm/Support/Allocator.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/GlobPattern.h"
@@ -162,6 +165,10 @@ class SpecialCaseList {
};
std::vector<GlobMatcher::Glob> Globs;
+
+ RadixTree<iterator_range<StringRef::const_iterator>,
+ SmallVector<const GlobMatcher::Glob *, 1>>
+ PrefixToGlob;
};
/// Represents a set of patterns and their line numbers
diff --git a/llvm/lib/Support/SpecialCaseList.cpp b/llvm/lib/Support/SpecialCaseList.cpp
index f74e52a3a7fa9..c27f627446203 100644
--- a/llvm/lib/Support/SpecialCaseList.cpp
+++ b/llvm/lib/Support/SpecialCaseList.cpp
@@ -89,14 +89,32 @@ void SpecialCaseList::GlobMatcher::preprocess(bool BySize) {
return A.Name.size() < B.Name.size();
});
}
+
+ for (const auto &G : reverse(Globs)) {
+ StringRef Prefix = G.Pattern.prefix();
+
+ auto &V = PrefixToGlob.emplace(Prefix).first->second;
+ V.emplace_back(&G);
+ }
}
void SpecialCaseList::GlobMatcher::match(
StringRef Query,
llvm::function_ref<void(StringRef Rule, unsigned LineNo)> Cb) const {
- for (const auto &G : reverse(Globs))
- if (G.Pattern.match(Query))
- return Cb(G.Name, G.LineNo);
+ if (!PrefixToGlob.empty()) {
+ for (const auto &[_, V] : PrefixToGlob.find_prefixes(Query)) {
+ for (const auto *G : V) {
+ if (G->Pattern.match(Query)) {
+ Cb(G->Name, G->LineNo);
+ // As soon as we find a match in the vector, we can break for this
+ // vector, since the globs are already sorted by priority within the
+ // prefix group. However, we continue searching other prefix groups in
+ // the map, as they may contain a better match overall.
+ break;
+ }
+ }
+ }
+ }
}
SpecialCaseList::Matcher::Matcher(bool UseGlobs, bool RemoveDotSlash)
More information about the llvm-commits
mailing list