[llvm] 1322e71 - [SpecialCaseList] Add RadixTree for substring matching (#164545)
via llvm-commits
llvm-commits at lists.llvm.org
Sun Oct 26 22:30:42 PDT 2025
Author: Vitaly Buka
Date: 2025-10-27T05:30:38Z
New Revision: 1322e71f2baac9d7cfa77cfa5345bfffbff74cf7
URL: https://github.com/llvm/llvm-project/commit/1322e71f2baac9d7cfa77cfa5345bfffbff74cf7
DIFF: https://github.com/llvm/llvm-project/commit/1322e71f2baac9d7cfa77cfa5345bfffbff74cf7.diff
LOG: [SpecialCaseList] Add RadixTree for substring matching (#164545)
This commit adds a new RadixTree to `SpecialCaseList` for handling
substring matches. Previously, `SpecialCaseList` only supported prefix
and suffix matching. With this change, patterns that have neither
prefixes nor suffixes can now be efficiently filtered.
According to SpecialCaseListBM:
Lookup benchmarks (significant improvements):
```
OVERALL_GEOMEAN -0.7809
```
Lookup `*test*` like benchmarks (huge improvements):
```
OVERALL_GEOMEAN -0.9947
```
https://gist.github.com/vitalybuka/ee7f681b448eb18974386ab35e2d4d27
Added:
Modified:
llvm/include/llvm/Support/SpecialCaseList.h
llvm/lib/Support/SpecialCaseList.cpp
Removed:
################################################################################
diff --git a/llvm/include/llvm/Support/SpecialCaseList.h b/llvm/include/llvm/Support/SpecialCaseList.h
index 860f73c798e41..cb8e568de02e0 100644
--- a/llvm/include/llvm/Support/SpecialCaseList.h
+++ b/llvm/include/llvm/Support/SpecialCaseList.h
@@ -170,6 +170,10 @@ class SpecialCaseList {
RadixTree<iterator_range<StringRef::const_reverse_iterator>,
SmallVector<const GlobMatcher::Glob *, 1>>>
PrefixSuffixToGlob;
+
+ RadixTree<iterator_range<StringRef::const_iterator>,
+ SmallVector<const GlobMatcher::Glob *, 1>>
+ SubstrToGlob;
};
/// Represents a set of patterns and their line numbers
diff --git a/llvm/lib/Support/SpecialCaseList.cpp b/llvm/lib/Support/SpecialCaseList.cpp
index 3a9718569a06f..246d90cce3a43 100644
--- a/llvm/lib/Support/SpecialCaseList.cpp
+++ b/llvm/lib/Support/SpecialCaseList.cpp
@@ -94,6 +94,19 @@ void SpecialCaseList::GlobMatcher::preprocess(bool BySize) {
StringRef Prefix = G.Pattern.prefix();
StringRef Suffix = G.Pattern.suffix();
+ if (Suffix.empty() && Prefix.empty()) {
+ // If both prefix and suffix are empty put into special tree to search by
+ // substring in a middle.
+ StringRef Substr = G.Pattern.longest_substr();
+ if (!Substr.empty()) {
+ // But only if substring is not empty. Searching this tree is more
+ // expensive.
+ auto &V = SubstrToGlob.emplace(Substr).first->second;
+ V.emplace_back(&G);
+ continue;
+ }
+ }
+
auto &SToGlob = PrefixSuffixToGlob.emplace(Prefix).first->second;
auto &V = SToGlob.emplace(reverse(Suffix)).first->second;
V.emplace_back(&G);
@@ -119,6 +132,25 @@ void SpecialCaseList::GlobMatcher::match(
}
}
}
+
+ if (!SubstrToGlob.empty()) {
+ // As we don't know when substring exactly starts, we will try all
+ // possibilities. In most cases search will fail on first characters.
+ for (StringRef Q = Query; !Q.empty(); Q = Q.drop_front()) {
+ for (const auto &[_, V] : SubstrToGlob.find_prefixes(Q)) {
+ for (const auto *G : V) {
+ if (G->Pattern.match(Query)) {
+ Cb(G->Name, G->LineNo);
+ // As soon as we find a match in the vector, we can break for this
+ // vector, since the globs are already sorted by priority within the
+ // prefix group. However, we continue searching other prefix groups
+ // in the map, as they may contain a better match overall.
+ break;
+ }
+ }
+ }
+ }
+ }
}
SpecialCaseList::Matcher::Matcher(bool UseGlobs, bool RemoveDotSlash)
More information about the llvm-commits
mailing list