[llvm-branch-commits] [Support] Implement slash-agnostic path matching in GlobPattern (PR #202854)
via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Tue Jun 9 22:09:06 PDT 2026
llvmorg-github-actions[bot] wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-support
Author: Vitaly Buka (vitalybuka)
<details>
<summary>Changes</summary>
Add a SlashAgnostic option to GlobPattern to allow matching path separators
(both forward slashes and backslashes) agnostically.
When enabled:
- We conservatively reduce the plain prefix and suffix by treating path
separators as metacharacters. This ensures that path separators are
matched via the slash-agnostic state machine rather than plain string
comparison.
- Brackets containing slashes are adjusted to match both separators.
- Character comparisons in the state machine (matchChar) treat '/' and
'\' as equivalent.
Co-authored-by: Devon Loehr <DKLoehr@<!-- -->users.noreply.github.com>
Assisted-by: Gemini
---
Full diff: https://github.com/llvm/llvm-project/pull/202854.diff
3 Files Affected:
- (modified) llvm/include/llvm/Support/GlobPattern.h (+6-3)
- (modified) llvm/lib/Support/GlobPattern.cpp (+38-13)
- (modified) llvm/unittests/Support/GlobPatternTest.cpp (+62)
``````````diff
diff --git a/llvm/include/llvm/Support/GlobPattern.h b/llvm/include/llvm/Support/GlobPattern.h
index 1a6c0ab292ac8..47aa7a1cc74c9 100644
--- a/llvm/include/llvm/Support/GlobPattern.h
+++ b/llvm/include/llvm/Support/GlobPattern.h
@@ -56,7 +56,8 @@ class GlobPattern {
/// created from expanding braces otherwise disable
/// brace expansion
LLVM_ABI static Expected<GlobPattern>
- create(StringRef Pat, std::optional<size_t> MaxSubPatterns = {});
+ create(StringRef Pat, std::optional<size_t> MaxSubPatterns = {},
+ bool SlashAgnostic = false);
/// \returns \p true if \p S matches this glob pattern
LLVM_ABI bool match(StringRef S) const;
@@ -87,12 +88,14 @@ class GlobPattern {
StringRef Pattern;
size_t PrefixSize = 0;
size_t SuffixSize = 0;
+ bool SlashAgnostic = false;
struct SubGlobPattern {
/// \param Pat the pattern to match against
- LLVM_ABI static Expected<SubGlobPattern> create(StringRef Pat);
+ LLVM_ABI static Expected<SubGlobPattern> create(
+ StringRef Pat, bool SlashAgnostic = false);
/// \returns \p true if \p S matches this glob pattern
- LLVM_ABI bool match(StringRef S) const;
+ LLVM_ABI bool match(StringRef S, bool SlashAgnostic) const;
StringRef getPat() const { return StringRef(Pat.data(), Pat.size()); }
// Brackets with their end position and matched bytes.
diff --git a/llvm/lib/Support/GlobPattern.cpp b/llvm/lib/Support/GlobPattern.cpp
index 1aaddbb8408a3..62cc6f16eaf33 100644
--- a/llvm/lib/Support/GlobPattern.cpp
+++ b/llvm/lib/Support/GlobPattern.cpp
@@ -18,6 +18,8 @@ using namespace llvm;
static constexpr char PrefixMetacharacters[] = "?*[{\\";
static constexpr char SuffixMetacharacters[] = "?*[]{}\\";
+static constexpr char PrefixMetacharactersWithSlash[] = "?*[{\\/";
+static constexpr char SuffixMetacharactersWithSlash[] = "?*[]{}\\/";
// Expands character ranges and returns a bitmap.
// For example, "a-cf-hz" is expanded to "abcfghz".
@@ -135,10 +137,12 @@ parseBraceExpansions(StringRef S, std::optional<size_t> MaxSubPatterns) {
return std::move(SubPatterns);
}
-static StringRef maxPlainSubstring(StringRef S) {
+static StringRef maxPlainSubstring(StringRef S, bool SlashAgnostic) {
StringRef Best;
while (!S.empty()) {
- size_t PrefixSize = S.find_first_of(PrefixMetacharacters);
+ const char *Metas =
+ SlashAgnostic ? PrefixMetacharactersWithSlash : PrefixMetacharacters;
+ size_t PrefixSize = S.find_first_of(Metas);
if (PrefixSize == std::string::npos)
PrefixSize = S.size();
@@ -179,12 +183,19 @@ static StringRef maxPlainSubstring(StringRef S) {
}
Expected<GlobPattern>
-GlobPattern::create(StringRef S, std::optional<size_t> MaxSubPatterns) {
+GlobPattern::create(StringRef S, std::optional<size_t> MaxSubPatterns,
+ bool SlashAgnostic) {
GlobPattern Pat;
+ Pat.SlashAgnostic = SlashAgnostic;
Pat.Pattern = S;
+ const char *PrefixMetas =
+ SlashAgnostic ? PrefixMetacharactersWithSlash : PrefixMetacharacters;
+ const char *SuffixMetas =
+ SlashAgnostic ? SuffixMetacharactersWithSlash : SuffixMetacharacters;
+
// Store the prefix that does not contain any metacharacter.
- Pat.PrefixSize = S.find_first_of(PrefixMetacharacters);
+ Pat.PrefixSize = S.find_first_of(PrefixMetas);
if (Pat.PrefixSize == std::string::npos) {
Pat.PrefixSize = S.size();
return Pat;
@@ -192,7 +203,7 @@ GlobPattern::create(StringRef S, std::optional<size_t> MaxSubPatterns) {
S = S.substr(Pat.PrefixSize);
// Just in case we stop on unmatched opening brackets.
- size_t SuffixStart = S.find_last_of(SuffixMetacharacters);
+ size_t SuffixStart = S.find_last_of(SuffixMetas);
assert(SuffixStart != std::string::npos);
if (S[SuffixStart] == '\\')
++SuffixStart;
@@ -205,7 +216,7 @@ GlobPattern::create(StringRef S, std::optional<size_t> MaxSubPatterns) {
if (auto Err = parseBraceExpansions(S, MaxSubPatterns).moveInto(SubPats))
return std::move(Err);
for (StringRef SubPat : SubPats) {
- auto SubGlobOrErr = SubGlobPattern::create(SubPat);
+ auto SubGlobOrErr = SubGlobPattern::create(SubPat, SlashAgnostic);
if (!SubGlobOrErr)
return SubGlobOrErr.takeError();
Pat.SubGlobs.push_back(*SubGlobOrErr);
@@ -215,7 +226,7 @@ GlobPattern::create(StringRef S, std::optional<size_t> MaxSubPatterns) {
}
Expected<GlobPattern::SubGlobPattern>
-GlobPattern::SubGlobPattern::create(StringRef S) {
+GlobPattern::SubGlobPattern::create(StringRef S, bool SlashAgnostic) {
SubGlobPattern Pat;
// Parse brackets.
@@ -237,6 +248,12 @@ GlobPattern::SubGlobPattern::create(StringRef S) {
if (!BVOrErr)
return BVOrErr.takeError();
BitVector &BV = *BVOrErr;
+ if (SlashAgnostic) {
+ if (BV['\\'] || BV['/']) {
+ BV.set('\\');
+ BV.set('/');
+ }
+ }
if (Invert)
BV.flip();
Pat.Brackets.push_back(Bracket{J + 1, std::move(BV)});
@@ -251,8 +268,8 @@ GlobPattern::SubGlobPattern::create(StringRef S) {
}
StringRef GlobPattern::longest_substr() const {
- return maxPlainSubstring(
- Pattern.drop_front(PrefixSize).drop_back(SuffixSize));
+ return maxPlainSubstring(Pattern.drop_front(PrefixSize).drop_back(SuffixSize),
+ SlashAgnostic);
}
bool GlobPattern::match(StringRef S) const {
@@ -263,15 +280,23 @@ bool GlobPattern::match(StringRef S) const {
if (SubGlobs.empty() && S.empty())
return true;
for (auto &Glob : SubGlobs)
- if (Glob.match(S))
+ if (Glob.match(S, SlashAgnostic))
return true;
return false;
}
+static bool matchChar(char PatC, char QueryC, bool SlashAgnostic) {
+ if (PatC == QueryC)
+ return true;
+ return SlashAgnostic && (PatC == '\\' || PatC == '/') &&
+ (QueryC == '\\' || QueryC == '/');
+}
+
// Factor the pattern into segments split by '*'. The segment is matched
// sequentianlly by finding the first occurrence past the end of the previous
// match.
-bool GlobPattern::SubGlobPattern::match(StringRef Str) const {
+bool GlobPattern::SubGlobPattern::match(StringRef Str,
+ bool SlashAgnostic) const {
const char *P = Pat.data(), *SegmentBegin = nullptr, *S = Str.data(),
*SavedS = S;
const char *const PEnd = P + Pat.size(), *const End = S + Str.size();
@@ -293,12 +318,12 @@ bool GlobPattern::SubGlobPattern::match(StringRef Str) const {
continue;
}
} else if (*P == '\\') {
- if (*++P == *S) {
+ if (matchChar(*++P, *S, SlashAgnostic)) {
++P;
++S;
continue;
}
- } else if (*P == *S || *P == '?') {
+ } else if (matchChar(*P, *S, SlashAgnostic) || *P == '?') {
++P;
++S;
continue;
diff --git a/llvm/unittests/Support/GlobPatternTest.cpp b/llvm/unittests/Support/GlobPatternTest.cpp
index 872a21e948d7a..35423e37a3ae0 100644
--- a/llvm/unittests/Support/GlobPatternTest.cpp
+++ b/llvm/unittests/Support/GlobPatternTest.cpp
@@ -327,6 +327,30 @@ TEST_F(GlobPatternTest, PrefixSuffix) {
ASSERT_TRUE((bool)Pat);
EXPECT_EQ("", Pat->prefix());
EXPECT_EQ("cd", Pat->suffix());
+
+ Pat = GlobPattern::create("ab/cd", /*MaxSubPatterns=*/{},
+ /*SlashAgnostic=*/true);
+ ASSERT_TRUE((bool)Pat);
+ EXPECT_EQ("ab", Pat->prefix());
+ EXPECT_EQ("cd", Pat->suffix());
+
+ Pat = GlobPattern::create("ab\\cd", /*MaxSubPatterns=*/{},
+ /*SlashAgnostic=*/true);
+ ASSERT_TRUE((bool)Pat);
+ EXPECT_EQ("ab", Pat->prefix());
+ EXPECT_EQ("d", Pat->suffix());
+
+ Pat = GlobPattern::create("ab/cd", /*MaxSubPatterns=*/{},
+ /*SlashAgnostic=*/false);
+ ASSERT_TRUE((bool)Pat);
+ EXPECT_EQ("ab/cd", Pat->prefix());
+ EXPECT_EQ("", Pat->suffix());
+
+ Pat = GlobPattern::create("ab\\cd", /*MaxSubPatterns=*/{},
+ /*SlashAgnostic=*/false);
+ ASSERT_TRUE((bool)Pat);
+ EXPECT_EQ("ab", Pat->prefix());
+ EXPECT_EQ("d", Pat->suffix());
}
TEST_F(GlobPatternTest, Substr) {
@@ -393,6 +417,26 @@ TEST_F(GlobPatternTest, Substr) {
Pat = GlobPattern::create("a*bcdef{g}*h");
ASSERT_TRUE((bool)Pat);
EXPECT_EQ("bcdef", Pat->longest_substr());
+
+ Pat = GlobPattern::create("a*bc/de*f", /*MaxSubPatterns=*/{},
+ /*SlashAgnostic=*/true);
+ ASSERT_TRUE((bool)Pat);
+ EXPECT_EQ("bc", Pat->longest_substr());
+
+ Pat = GlobPattern::create("a*bc\\de*f", /*MaxSubPatterns=*/{},
+ /*SlashAgnostic=*/true);
+ ASSERT_TRUE((bool)Pat);
+ EXPECT_EQ("bc", Pat->longest_substr());
+
+ Pat = GlobPattern::create("a*bc/de*f", /*MaxSubPatterns=*/{},
+ /*SlashAgnostic=*/false);
+ ASSERT_TRUE((bool)Pat);
+ EXPECT_EQ("bc/de", Pat->longest_substr());
+
+ Pat = GlobPattern::create("a*bc\\de*f", /*MaxSubPatterns=*/{},
+ /*SlashAgnostic=*/false);
+ ASSERT_TRUE((bool)Pat);
+ EXPECT_EQ("bc", Pat->longest_substr());
}
TEST_F(GlobPatternTest, Pathological) {
@@ -409,4 +453,22 @@ TEST_F(GlobPatternTest, Pathological) {
EXPECT_FALSE(Pat->match(S));
EXPECT_TRUE(Pat->match(S + 'b'));
}
+
+TEST_F(GlobPatternTest, SlashAgnosticMatch) {
+ auto Pat1 = GlobPattern::create("foo\\\\bar[a\\\\-z]", 1024,
+ /*SlashAgnostic=*/true);
+ ASSERT_TRUE((bool)Pat1);
+ EXPECT_TRUE(Pat1->match("foo/bar\\"));
+ EXPECT_TRUE(Pat1->match("foo/barb"));
+ EXPECT_TRUE(Pat1->match("foo/bar/"));
+}
+
+TEST_F(GlobPatternTest, SlashAgnosticMatchInverted) {
+ auto Pat = GlobPattern::create("foo\\\\bar[^a\\\\-z]", 1024,
+ /*SlashAgnostic=*/true);
+ ASSERT_TRUE((bool)Pat);
+ EXPECT_FALSE(Pat->match("foo/bar/"));
+ EXPECT_FALSE(Pat->match("foo/barb"));
+ EXPECT_TRUE(Pat->match("foo/bar1"));
+}
}
``````````
</details>
https://github.com/llvm/llvm-project/pull/202854
More information about the llvm-branch-commits
mailing list