[llvm] 1322e71 - [SpecialCaseList] Add RadixTree for substring matching (#164545)

via llvm-commits llvm-commits at lists.llvm.org
Sun Oct 26 22:30:42 PDT 2025


Author: Vitaly Buka
Date: 2025-10-27T05:30:38Z
New Revision: 1322e71f2baac9d7cfa77cfa5345bfffbff74cf7

URL: https://github.com/llvm/llvm-project/commit/1322e71f2baac9d7cfa77cfa5345bfffbff74cf7
DIFF: https://github.com/llvm/llvm-project/commit/1322e71f2baac9d7cfa77cfa5345bfffbff74cf7.diff

LOG: [SpecialCaseList] Add RadixTree for substring matching (#164545)

This commit adds a new RadixTree to `SpecialCaseList` for handling
substring matches. Previously, `SpecialCaseList` only supported prefix
and suffix matching. With this change, patterns that have neither
prefixes nor suffixes can now be efficiently filtered.

According to SpecialCaseListBM:

Lookup benchmarks (significant improvements):
```
OVERALL_GEOMEAN                       -0.7809
```

Lookup `*test*` like benchmarks (huge improvements):
```
OVERALL_GEOMEAN                       -0.9947
```

https://gist.github.com/vitalybuka/ee7f681b448eb18974386ab35e2d4d27

Added: 
    

Modified: 
    llvm/include/llvm/Support/SpecialCaseList.h
    llvm/lib/Support/SpecialCaseList.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/Support/SpecialCaseList.h b/llvm/include/llvm/Support/SpecialCaseList.h
index 860f73c798e41..cb8e568de02e0 100644
--- a/llvm/include/llvm/Support/SpecialCaseList.h
+++ b/llvm/include/llvm/Support/SpecialCaseList.h
@@ -170,6 +170,10 @@ class SpecialCaseList {
               RadixTree<iterator_range<StringRef::const_reverse_iterator>,
                         SmallVector<const GlobMatcher::Glob *, 1>>>
         PrefixSuffixToGlob;
+
+    RadixTree<iterator_range<StringRef::const_iterator>,
+              SmallVector<const GlobMatcher::Glob *, 1>>
+        SubstrToGlob;
   };
 
   /// Represents a set of patterns and their line numbers

diff  --git a/llvm/lib/Support/SpecialCaseList.cpp b/llvm/lib/Support/SpecialCaseList.cpp
index 3a9718569a06f..246d90cce3a43 100644
--- a/llvm/lib/Support/SpecialCaseList.cpp
+++ b/llvm/lib/Support/SpecialCaseList.cpp
@@ -94,6 +94,19 @@ void SpecialCaseList::GlobMatcher::preprocess(bool BySize) {
     StringRef Prefix = G.Pattern.prefix();
     StringRef Suffix = G.Pattern.suffix();
 
+    if (Suffix.empty() && Prefix.empty()) {
+      // If both prefix and suffix are empty put into special tree to search by
+      // substring in a middle.
+      StringRef Substr = G.Pattern.longest_substr();
+      if (!Substr.empty()) {
+        // But only if substring is not empty. Searching this tree is more
+        // expensive.
+        auto &V = SubstrToGlob.emplace(Substr).first->second;
+        V.emplace_back(&G);
+        continue;
+      }
+    }
+
     auto &SToGlob = PrefixSuffixToGlob.emplace(Prefix).first->second;
     auto &V = SToGlob.emplace(reverse(Suffix)).first->second;
     V.emplace_back(&G);
@@ -119,6 +132,25 @@ void SpecialCaseList::GlobMatcher::match(
       }
     }
   }
+
+  if (!SubstrToGlob.empty()) {
+    // As we don't know when substring exactly starts, we will try all
+    // possibilities. In most cases search will fail on first characters.
+    for (StringRef Q = Query; !Q.empty(); Q = Q.drop_front()) {
+      for (const auto &[_, V] : SubstrToGlob.find_prefixes(Q)) {
+        for (const auto *G : V) {
+          if (G->Pattern.match(Query)) {
+            Cb(G->Name, G->LineNo);
+            // As soon as we find a match in the vector, we can break for this
+            // vector, since the globs are already sorted by priority within the
+            // prefix group. However, we continue searching other prefix groups
+            // in the map, as they may contain a better match overall.
+            break;
+          }
+        }
+      }
+    }
+  }
 }
 
 SpecialCaseList::Matcher::Matcher(bool UseGlobs, bool RemoveDotSlash)


        


More information about the llvm-commits mailing list