[llvm] a2723dd - [NFC][SpecialCaseList] Split Matcher into RegexMatcher and GlobMatcher (#162303)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Oct 8 15:26:17 PDT 2025
Author: Vitaly Buka
Date: 2025-10-08T15:26:13-07:00
New Revision: a2723ddf70aa0bfa1758e47a9d12238141e44f86
URL: https://github.com/llvm/llvm-project/commit/a2723ddf70aa0bfa1758e47a9d12238141e44f86
DIFF: https://github.com/llvm/llvm-project/commit/a2723ddf70aa0bfa1758e47a9d12238141e44f86.diff
LOG: [NFC][SpecialCaseList] Split Matcher into RegexMatcher and GlobMatcher (#162303)
Glob will be optimized
Regex we we will keep intact.
Using std::variant to avoid virtual methods,
and allow to switch unique_ptr to move in future.
Added:
Modified:
llvm/include/llvm/Support/SpecialCaseList.h
llvm/lib/Support/SpecialCaseList.cpp
Removed:
################################################################################
diff --git a/llvm/include/llvm/Support/SpecialCaseList.h b/llvm/include/llvm/Support/SpecialCaseList.h
index c7a10e34618d7..27daa1f4c6116 100644
--- a/llvm/include/llvm/Support/SpecialCaseList.h
+++ b/llvm/include/llvm/Support/SpecialCaseList.h
@@ -20,6 +20,7 @@
#include <memory>
#include <string>
#include <utility>
+#include <variant>
#include <vector>
namespace llvm {
@@ -120,20 +121,33 @@ class SpecialCaseList {
SpecialCaseList &operator=(SpecialCaseList const &) = delete;
private:
- /// Represents a set of globs and their line numbers
- class Matcher {
+ // Lagacy v1 matcher.
+ class RegexMatcher {
public:
- LLVM_ABI Error insert(StringRef Pattern, unsigned LineNumber,
- bool UseRegex);
+ LLVM_ABI Error insert(StringRef Pattern, unsigned LineNumber);
LLVM_ABI void
match(StringRef Query,
llvm::function_ref<void(StringRef Rule, unsigned LineNo)> Cb) const;
- LLVM_ABI bool matchAny(StringRef Query) const {
- bool R = false;
- match(Query, [&](StringRef, unsigned) { R = true; });
- return R;
- }
+ struct Reg {
+ Reg(StringRef Name, unsigned LineNo, Regex &&Rg)
+ : Name(Name), LineNo(LineNo), Rg(std::move(Rg)) {}
+ std::string Name;
+ unsigned LineNo;
+ Regex Rg;
+ Reg(Reg &&) = delete;
+ Reg() = default;
+ };
+
+ std::vector<std::unique_ptr<Reg>> RegExes;
+ };
+
+ class GlobMatcher {
+ public:
+ LLVM_ABI Error insert(StringRef Pattern, unsigned LineNumber);
+ LLVM_ABI void
+ match(StringRef Query,
+ llvm::function_ref<void(StringRef Rule, unsigned LineNo)> Cb) const;
struct Glob {
Glob(StringRef Name, unsigned LineNo) : Name(Name), LineNo(LineNo) {}
@@ -146,27 +160,37 @@ class SpecialCaseList {
Glob() = default;
};
- struct Reg {
- Reg(StringRef Name, unsigned LineNo, Regex &&Rg)
- : Name(Name), LineNo(LineNo), Rg(std::move(Rg)) {}
- std::string Name;
- unsigned LineNo;
- Regex Rg;
- Reg(Reg &&) = delete;
- Reg() = default;
- };
+ std::vector<std::unique_ptr<Glob>> Globs;
+ };
- std::vector<std::unique_ptr<Matcher::Glob>> Globs;
- std::vector<std::unique_ptr<Reg>> RegExes;
- bool RemoveDotSlash = false;
+ /// Represents a set of patterns and their line numbers
+ class Matcher {
+ public:
+ LLVM_ABI Matcher(bool UseGlobs, bool RemoveDotSlash);
+
+ LLVM_ABI void
+ match(StringRef Query,
+ llvm::function_ref<void(StringRef Rule, unsigned LineNo)> Cb) const;
+
+ LLVM_ABI bool matchAny(StringRef Query) const {
+ bool R = false;
+ match(Query, [&](StringRef, unsigned) { R = true; });
+ return R;
+ }
+
+ LLVM_ABI Error insert(StringRef Pattern, unsigned LineNumber);
+
+ std::variant<RegexMatcher, GlobMatcher> M;
+ bool RemoveDotSlash;
};
using SectionEntries = StringMap<StringMap<Matcher>>;
protected:
struct Section {
- Section(StringRef Str, unsigned FileIdx)
- : SectionStr(Str), FileIdx(FileIdx) {};
+ Section(StringRef Str, unsigned FileIdx, bool UseGlobs)
+ : SectionMatcher(UseGlobs, /*RemoveDotSlash=*/false), SectionStr(Str),
+ FileIdx(FileIdx) {}
Section(Section &&) = default;
@@ -197,7 +221,7 @@ class SpecialCaseList {
LLVM_ABI Expected<Section *> addSection(StringRef SectionStr,
unsigned FileIdx, unsigned LineNo,
- bool UseGlobs = true);
+ bool UseGlobs);
/// Parses just-constructed SpecialCaseList entries from a memory buffer.
LLVM_ABI bool parse(unsigned FileIdx, const MemoryBuffer *MB,
diff --git a/llvm/lib/Support/SpecialCaseList.cpp b/llvm/lib/Support/SpecialCaseList.cpp
index f184efa433f3d..8ec6b1d46aeff 100644
--- a/llvm/lib/Support/SpecialCaseList.cpp
+++ b/llvm/lib/Support/SpecialCaseList.cpp
@@ -30,57 +30,82 @@
namespace llvm {
-Error SpecialCaseList::Matcher::insert(StringRef Pattern, unsigned LineNumber,
- bool UseGlobs) {
+Error SpecialCaseList::RegexMatcher::insert(StringRef Pattern,
+ unsigned LineNumber) {
if (Pattern.empty())
return createStringError(errc::invalid_argument,
- Twine("Supplied ") +
- (UseGlobs ? "glob" : "regex") + " was blank");
-
- if (!UseGlobs) {
- // Replace * with .*
- auto Regexp = Pattern.str();
- for (size_t pos = 0; (pos = Regexp.find('*', pos)) != std::string::npos;
- pos += strlen(".*")) {
- Regexp.replace(pos, strlen("*"), ".*");
- }
+ "Supplied regex was blank");
+
+ // Replace * with .*
+ auto Regexp = Pattern.str();
+ for (size_t pos = 0; (pos = Regexp.find('*', pos)) != std::string::npos;
+ pos += strlen(".*")) {
+ Regexp.replace(pos, strlen("*"), ".*");
+ }
- Regexp = (Twine("^(") + StringRef(Regexp) + ")$").str();
+ Regexp = (Twine("^(") + StringRef(Regexp) + ")$").str();
- // Check that the regexp is valid.
- Regex CheckRE(Regexp);
- std::string REError;
- if (!CheckRE.isValid(REError))
- return createStringError(errc::invalid_argument, REError);
+ // Check that the regexp is valid.
+ Regex CheckRE(Regexp);
+ std::string REError;
+ if (!CheckRE.isValid(REError))
+ return createStringError(errc::invalid_argument, REError);
- auto Rg =
- std::make_unique<Matcher::Reg>(Pattern, LineNumber, std::move(CheckRE));
- RegExes.emplace_back(std::move(Rg));
+ auto Rg = std::make_unique<Reg>(Pattern, LineNumber, std::move(CheckRE));
+ RegExes.emplace_back(std::move(Rg));
- return Error::success();
- }
+ return Error::success();
+}
+
+void SpecialCaseList::RegexMatcher::match(
+ StringRef Query,
+ llvm::function_ref<void(StringRef Rule, unsigned LineNo)> Cb) const {
+ for (const auto &Regex : reverse(RegExes))
+ if (Regex->Rg.match(Query))
+ Cb(Regex->Name, Regex->LineNo);
+}
+
+Error SpecialCaseList::GlobMatcher::insert(StringRef Pattern,
+ unsigned LineNumber) {
+ if (Pattern.empty())
+ return createStringError(errc::invalid_argument, "Supplied glob was blank");
- auto Glob = std::make_unique<Matcher::Glob>(Pattern, LineNumber);
+ auto G = std::make_unique<Glob>(Pattern, LineNumber);
// We must be sure to use the string in `Glob` rather than the provided
// reference which could be destroyed before match() is called
- if (auto Err = GlobPattern::create(Glob->Name, /*MaxSubPatterns=*/1024)
- .moveInto(Glob->Pattern))
+ if (auto Err = GlobPattern::create(G->Name, /*MaxSubPatterns=*/1024)
+ .moveInto(G->Pattern))
return Err;
- Globs.push_back(std::move(Glob));
+ Globs.emplace_back(std::move(G));
return Error::success();
}
-void SpecialCaseList::Matcher::match(
+void SpecialCaseList::GlobMatcher::match(
StringRef Query,
llvm::function_ref<void(StringRef Rule, unsigned LineNo)> Cb) const {
- if (RemoveDotSlash)
- Query = llvm::sys::path::remove_leading_dotslash(Query);
for (const auto &Glob : reverse(Globs))
if (Glob->Pattern.match(Query))
Cb(Glob->Name, Glob->LineNo);
- for (const auto &Regex : reverse(RegExes))
- if (Regex->Rg.match(Query))
- Cb(Regex->Name, Regex->LineNo);
+}
+
+SpecialCaseList::Matcher::Matcher(bool UseGlobs, bool RemoveDotSlash)
+ : RemoveDotSlash(RemoveDotSlash) {
+ if (UseGlobs)
+ M.emplace<GlobMatcher>();
+ else
+ M.emplace<RegexMatcher>();
+}
+
+void SpecialCaseList::Matcher::match(
+ StringRef Query,
+ llvm::function_ref<void(StringRef Rule, unsigned LineNo)> Cb) const {
+ if (RemoveDotSlash)
+ Query = llvm::sys::path::remove_leading_dotslash(Query);
+ return std::visit([&](auto &V) { return V.match(Query, Cb); }, M);
+}
+
+Error SpecialCaseList::Matcher::insert(StringRef Pattern, unsigned LineNumber) {
+ return std::visit([&](auto &V) { return V.insert(Pattern, LineNumber); }, M);
}
// TODO: Refactor this to return Expected<...>
@@ -139,10 +164,10 @@ bool SpecialCaseList::createInternal(const MemoryBuffer *MB,
Expected<SpecialCaseList::Section *>
SpecialCaseList::addSection(StringRef SectionStr, unsigned FileNo,
unsigned LineNo, bool UseGlobs) {
- Sections.emplace_back(SectionStr, FileNo);
+ Sections.emplace_back(SectionStr, FileNo, UseGlobs);
auto &Section = Sections.back();
- if (auto Err = Section.SectionMatcher.insert(SectionStr, LineNo, UseGlobs)) {
+ if (auto Err = Section.SectionMatcher.insert(SectionStr, LineNo)) {
return createStringError(errc::invalid_argument,
"malformed section at line " + Twine(LineNo) +
": '" + SectionStr +
@@ -170,7 +195,7 @@ bool SpecialCaseList::parse(unsigned FileIdx, const MemoryBuffer *MB,
bool RemoveDotSlash = Version > 2;
Section *CurrentSection;
- if (auto Err = addSection("*", FileIdx, 1).moveInto(CurrentSection)) {
+ if (auto Err = addSection("*", FileIdx, 1, true).moveInto(CurrentSection)) {
Error = toString(std::move(Err));
return false;
}
@@ -213,10 +238,10 @@ bool SpecialCaseList::parse(unsigned FileIdx, const MemoryBuffer *MB,
}
auto [Pattern, Category] = Postfix.split("=");
- auto &Entry = CurrentSection->Entries[Prefix][Category];
- Entry.RemoveDotSlash =
- RemoveDotSlash && llvm::is_contained(PathPrefixes, Prefix);
- if (auto Err = Entry.insert(Pattern, LineNo, UseGlobs)) {
+ auto [It, _] = CurrentSection->Entries[Prefix].try_emplace(
+ Category, UseGlobs,
+ RemoveDotSlash && llvm::is_contained(PathPrefixes, Prefix));
+ if (auto Err = It->second.insert(Pattern, LineNo)) {
Error =
(Twine("malformed ") + (UseGlobs ? "glob" : "regex") + " in line " +
Twine(LineNo) + ": '" + Pattern + "': " + toString(std::move(Err)))
More information about the llvm-commits
mailing list