[llvm] a2723dd - [NFC][SpecialCaseList] Split Matcher into RegexMatcher and GlobMatcher (#162303)

via llvm-commits llvm-commits at lists.llvm.org
Wed Oct 8 15:26:17 PDT 2025


Author: Vitaly Buka
Date: 2025-10-08T15:26:13-07:00
New Revision: a2723ddf70aa0bfa1758e47a9d12238141e44f86

URL: https://github.com/llvm/llvm-project/commit/a2723ddf70aa0bfa1758e47a9d12238141e44f86
DIFF: https://github.com/llvm/llvm-project/commit/a2723ddf70aa0bfa1758e47a9d12238141e44f86.diff

LOG: [NFC][SpecialCaseList] Split Matcher into RegexMatcher and GlobMatcher (#162303)

Glob will be optimized
Regex we we will keep intact.

Using std::variant to avoid virtual methods,
and allow to switch unique_ptr to move in future.

Added: 
    

Modified: 
    llvm/include/llvm/Support/SpecialCaseList.h
    llvm/lib/Support/SpecialCaseList.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/Support/SpecialCaseList.h b/llvm/include/llvm/Support/SpecialCaseList.h
index c7a10e34618d7..27daa1f4c6116 100644
--- a/llvm/include/llvm/Support/SpecialCaseList.h
+++ b/llvm/include/llvm/Support/SpecialCaseList.h
@@ -20,6 +20,7 @@
 #include <memory>
 #include <string>
 #include <utility>
+#include <variant>
 #include <vector>
 
 namespace llvm {
@@ -120,20 +121,33 @@ class SpecialCaseList {
   SpecialCaseList &operator=(SpecialCaseList const &) = delete;
 
 private:
-  /// Represents a set of globs and their line numbers
-  class Matcher {
+  // Lagacy v1 matcher.
+  class RegexMatcher {
   public:
-    LLVM_ABI Error insert(StringRef Pattern, unsigned LineNumber,
-                          bool UseRegex);
+    LLVM_ABI Error insert(StringRef Pattern, unsigned LineNumber);
     LLVM_ABI void
     match(StringRef Query,
           llvm::function_ref<void(StringRef Rule, unsigned LineNo)> Cb) const;
 
-    LLVM_ABI bool matchAny(StringRef Query) const {
-      bool R = false;
-      match(Query, [&](StringRef, unsigned) { R = true; });
-      return R;
-    }
+    struct Reg {
+      Reg(StringRef Name, unsigned LineNo, Regex &&Rg)
+          : Name(Name), LineNo(LineNo), Rg(std::move(Rg)) {}
+      std::string Name;
+      unsigned LineNo;
+      Regex Rg;
+      Reg(Reg &&) = delete;
+      Reg() = default;
+    };
+
+    std::vector<std::unique_ptr<Reg>> RegExes;
+  };
+
+  class GlobMatcher {
+  public:
+    LLVM_ABI Error insert(StringRef Pattern, unsigned LineNumber);
+    LLVM_ABI void
+    match(StringRef Query,
+          llvm::function_ref<void(StringRef Rule, unsigned LineNo)> Cb) const;
 
     struct Glob {
       Glob(StringRef Name, unsigned LineNo) : Name(Name), LineNo(LineNo) {}
@@ -146,27 +160,37 @@ class SpecialCaseList {
       Glob() = default;
     };
 
-    struct Reg {
-      Reg(StringRef Name, unsigned LineNo, Regex &&Rg)
-          : Name(Name), LineNo(LineNo), Rg(std::move(Rg)) {}
-      std::string Name;
-      unsigned LineNo;
-      Regex Rg;
-      Reg(Reg &&) = delete;
-      Reg() = default;
-    };
+    std::vector<std::unique_ptr<Glob>> Globs;
+  };
 
-    std::vector<std::unique_ptr<Matcher::Glob>> Globs;
-    std::vector<std::unique_ptr<Reg>> RegExes;
-    bool RemoveDotSlash = false;
+  /// Represents a set of patterns and their line numbers
+  class Matcher {
+  public:
+    LLVM_ABI Matcher(bool UseGlobs, bool RemoveDotSlash);
+
+    LLVM_ABI void
+    match(StringRef Query,
+          llvm::function_ref<void(StringRef Rule, unsigned LineNo)> Cb) const;
+
+    LLVM_ABI bool matchAny(StringRef Query) const {
+      bool R = false;
+      match(Query, [&](StringRef, unsigned) { R = true; });
+      return R;
+    }
+
+    LLVM_ABI Error insert(StringRef Pattern, unsigned LineNumber);
+
+    std::variant<RegexMatcher, GlobMatcher> M;
+    bool RemoveDotSlash;
   };
 
   using SectionEntries = StringMap<StringMap<Matcher>>;
 
 protected:
   struct Section {
-    Section(StringRef Str, unsigned FileIdx)
-        : SectionStr(Str), FileIdx(FileIdx) {};
+    Section(StringRef Str, unsigned FileIdx, bool UseGlobs)
+        : SectionMatcher(UseGlobs, /*RemoveDotSlash=*/false), SectionStr(Str),
+          FileIdx(FileIdx) {}
 
     Section(Section &&) = default;
 
@@ -197,7 +221,7 @@ class SpecialCaseList {
 
   LLVM_ABI Expected<Section *> addSection(StringRef SectionStr,
                                           unsigned FileIdx, unsigned LineNo,
-                                          bool UseGlobs = true);
+                                          bool UseGlobs);
 
   /// Parses just-constructed SpecialCaseList entries from a memory buffer.
   LLVM_ABI bool parse(unsigned FileIdx, const MemoryBuffer *MB,

diff  --git a/llvm/lib/Support/SpecialCaseList.cpp b/llvm/lib/Support/SpecialCaseList.cpp
index f184efa433f3d..8ec6b1d46aeff 100644
--- a/llvm/lib/Support/SpecialCaseList.cpp
+++ b/llvm/lib/Support/SpecialCaseList.cpp
@@ -30,57 +30,82 @@
 
 namespace llvm {
 
-Error SpecialCaseList::Matcher::insert(StringRef Pattern, unsigned LineNumber,
-                                       bool UseGlobs) {
+Error SpecialCaseList::RegexMatcher::insert(StringRef Pattern,
+                                            unsigned LineNumber) {
   if (Pattern.empty())
     return createStringError(errc::invalid_argument,
-                             Twine("Supplied ") +
-                                 (UseGlobs ? "glob" : "regex") + " was blank");
-
-  if (!UseGlobs) {
-    // Replace * with .*
-    auto Regexp = Pattern.str();
-    for (size_t pos = 0; (pos = Regexp.find('*', pos)) != std::string::npos;
-         pos += strlen(".*")) {
-      Regexp.replace(pos, strlen("*"), ".*");
-    }
+                             "Supplied regex was blank");
+
+  // Replace * with .*
+  auto Regexp = Pattern.str();
+  for (size_t pos = 0; (pos = Regexp.find('*', pos)) != std::string::npos;
+       pos += strlen(".*")) {
+    Regexp.replace(pos, strlen("*"), ".*");
+  }
 
-    Regexp = (Twine("^(") + StringRef(Regexp) + ")$").str();
+  Regexp = (Twine("^(") + StringRef(Regexp) + ")$").str();
 
-    // Check that the regexp is valid.
-    Regex CheckRE(Regexp);
-    std::string REError;
-    if (!CheckRE.isValid(REError))
-      return createStringError(errc::invalid_argument, REError);
+  // Check that the regexp is valid.
+  Regex CheckRE(Regexp);
+  std::string REError;
+  if (!CheckRE.isValid(REError))
+    return createStringError(errc::invalid_argument, REError);
 
-    auto Rg =
-        std::make_unique<Matcher::Reg>(Pattern, LineNumber, std::move(CheckRE));
-    RegExes.emplace_back(std::move(Rg));
+  auto Rg = std::make_unique<Reg>(Pattern, LineNumber, std::move(CheckRE));
+  RegExes.emplace_back(std::move(Rg));
 
-    return Error::success();
-  }
+  return Error::success();
+}
+
+void SpecialCaseList::RegexMatcher::match(
+    StringRef Query,
+    llvm::function_ref<void(StringRef Rule, unsigned LineNo)> Cb) const {
+  for (const auto &Regex : reverse(RegExes))
+    if (Regex->Rg.match(Query))
+      Cb(Regex->Name, Regex->LineNo);
+}
+
+Error SpecialCaseList::GlobMatcher::insert(StringRef Pattern,
+                                           unsigned LineNumber) {
+  if (Pattern.empty())
+    return createStringError(errc::invalid_argument, "Supplied glob was blank");
 
-  auto Glob = std::make_unique<Matcher::Glob>(Pattern, LineNumber);
+  auto G = std::make_unique<Glob>(Pattern, LineNumber);
   // We must be sure to use the string in `Glob` rather than the provided
   // reference which could be destroyed before match() is called
-  if (auto Err = GlobPattern::create(Glob->Name, /*MaxSubPatterns=*/1024)
-                     .moveInto(Glob->Pattern))
+  if (auto Err = GlobPattern::create(G->Name, /*MaxSubPatterns=*/1024)
+                     .moveInto(G->Pattern))
     return Err;
-  Globs.push_back(std::move(Glob));
+  Globs.emplace_back(std::move(G));
   return Error::success();
 }
 
-void SpecialCaseList::Matcher::match(
+void SpecialCaseList::GlobMatcher::match(
     StringRef Query,
     llvm::function_ref<void(StringRef Rule, unsigned LineNo)> Cb) const {
-  if (RemoveDotSlash)
-    Query = llvm::sys::path::remove_leading_dotslash(Query);
   for (const auto &Glob : reverse(Globs))
     if (Glob->Pattern.match(Query))
       Cb(Glob->Name, Glob->LineNo);
-  for (const auto &Regex : reverse(RegExes))
-    if (Regex->Rg.match(Query))
-      Cb(Regex->Name, Regex->LineNo);
+}
+
+SpecialCaseList::Matcher::Matcher(bool UseGlobs, bool RemoveDotSlash)
+    : RemoveDotSlash(RemoveDotSlash) {
+  if (UseGlobs)
+    M.emplace<GlobMatcher>();
+  else
+    M.emplace<RegexMatcher>();
+}
+
+void SpecialCaseList::Matcher::match(
+    StringRef Query,
+    llvm::function_ref<void(StringRef Rule, unsigned LineNo)> Cb) const {
+  if (RemoveDotSlash)
+    Query = llvm::sys::path::remove_leading_dotslash(Query);
+  return std::visit([&](auto &V) { return V.match(Query, Cb); }, M);
+}
+
+Error SpecialCaseList::Matcher::insert(StringRef Pattern, unsigned LineNumber) {
+  return std::visit([&](auto &V) { return V.insert(Pattern, LineNumber); }, M);
 }
 
 // TODO: Refactor this to return Expected<...>
@@ -139,10 +164,10 @@ bool SpecialCaseList::createInternal(const MemoryBuffer *MB,
 Expected<SpecialCaseList::Section *>
 SpecialCaseList::addSection(StringRef SectionStr, unsigned FileNo,
                             unsigned LineNo, bool UseGlobs) {
-  Sections.emplace_back(SectionStr, FileNo);
+  Sections.emplace_back(SectionStr, FileNo, UseGlobs);
   auto &Section = Sections.back();
 
-  if (auto Err = Section.SectionMatcher.insert(SectionStr, LineNo, UseGlobs)) {
+  if (auto Err = Section.SectionMatcher.insert(SectionStr, LineNo)) {
     return createStringError(errc::invalid_argument,
                              "malformed section at line " + Twine(LineNo) +
                                  ": '" + SectionStr +
@@ -170,7 +195,7 @@ bool SpecialCaseList::parse(unsigned FileIdx, const MemoryBuffer *MB,
   bool RemoveDotSlash = Version > 2;
 
   Section *CurrentSection;
-  if (auto Err = addSection("*", FileIdx, 1).moveInto(CurrentSection)) {
+  if (auto Err = addSection("*", FileIdx, 1, true).moveInto(CurrentSection)) {
     Error = toString(std::move(Err));
     return false;
   }
@@ -213,10 +238,10 @@ bool SpecialCaseList::parse(unsigned FileIdx, const MemoryBuffer *MB,
     }
 
     auto [Pattern, Category] = Postfix.split("=");
-    auto &Entry = CurrentSection->Entries[Prefix][Category];
-    Entry.RemoveDotSlash =
-        RemoveDotSlash && llvm::is_contained(PathPrefixes, Prefix);
-    if (auto Err = Entry.insert(Pattern, LineNo, UseGlobs)) {
+    auto [It, _] = CurrentSection->Entries[Prefix].try_emplace(
+        Category, UseGlobs,
+        RemoveDotSlash && llvm::is_contained(PathPrefixes, Prefix));
+    if (auto Err = It->second.insert(Pattern, LineNo)) {
       Error =
           (Twine("malformed ") + (UseGlobs ? "glob" : "regex") + " in line " +
            Twine(LineNo) + ": '" + Pattern + "': " + toString(std::move(Err)))


        


More information about the llvm-commits mailing list