[llvm-branch-commits] [Support] Implement slash-agnostic path matching in GlobPattern (PR #202854)

via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Tue Jun 9 22:09:06 PDT 2026


llvmorg-github-actions[bot] wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-llvm-support

Author: Vitaly Buka (vitalybuka)

<details>
<summary>Changes</summary>

Add a SlashAgnostic option to GlobPattern to allow matching path separators
(both forward slashes and backslashes) agnostically.

When enabled:
- We conservatively reduce the plain prefix and suffix by treating path
  separators as metacharacters. This ensures that path separators are
  matched via the slash-agnostic state machine rather than plain string
  comparison.
- Brackets containing slashes are adjusted to match both separators.
- Character comparisons in the state machine (matchChar) treat '/' and
  '\' as equivalent.

Co-authored-by: Devon Loehr <DKLoehr@<!-- -->users.noreply.github.com>

Assisted-by: Gemini


---
Full diff: https://github.com/llvm/llvm-project/pull/202854.diff


3 Files Affected:

- (modified) llvm/include/llvm/Support/GlobPattern.h (+6-3) 
- (modified) llvm/lib/Support/GlobPattern.cpp (+38-13) 
- (modified) llvm/unittests/Support/GlobPatternTest.cpp (+62) 


``````````diff
diff --git a/llvm/include/llvm/Support/GlobPattern.h b/llvm/include/llvm/Support/GlobPattern.h
index 1a6c0ab292ac8..47aa7a1cc74c9 100644
--- a/llvm/include/llvm/Support/GlobPattern.h
+++ b/llvm/include/llvm/Support/GlobPattern.h
@@ -56,7 +56,8 @@ class GlobPattern {
   ///                       created from expanding braces otherwise disable
   ///                       brace expansion
   LLVM_ABI static Expected<GlobPattern>
-  create(StringRef Pat, std::optional<size_t> MaxSubPatterns = {});
+  create(StringRef Pat, std::optional<size_t> MaxSubPatterns = {},
+         bool SlashAgnostic = false);
   /// \returns \p true if \p S matches this glob pattern
   LLVM_ABI bool match(StringRef S) const;
 
@@ -87,12 +88,14 @@ class GlobPattern {
   StringRef Pattern;
   size_t PrefixSize = 0;
   size_t SuffixSize = 0;
+  bool SlashAgnostic = false;
 
   struct SubGlobPattern {
     /// \param Pat the pattern to match against
-    LLVM_ABI static Expected<SubGlobPattern> create(StringRef Pat);
+    LLVM_ABI static Expected<SubGlobPattern> create(
+        StringRef Pat, bool SlashAgnostic = false);
     /// \returns \p true if \p S matches this glob pattern
-    LLVM_ABI bool match(StringRef S) const;
+    LLVM_ABI bool match(StringRef S, bool SlashAgnostic) const;
     StringRef getPat() const { return StringRef(Pat.data(), Pat.size()); }
 
     // Brackets with their end position and matched bytes.
diff --git a/llvm/lib/Support/GlobPattern.cpp b/llvm/lib/Support/GlobPattern.cpp
index 1aaddbb8408a3..62cc6f16eaf33 100644
--- a/llvm/lib/Support/GlobPattern.cpp
+++ b/llvm/lib/Support/GlobPattern.cpp
@@ -18,6 +18,8 @@ using namespace llvm;
 
 static constexpr char PrefixMetacharacters[] = "?*[{\\";
 static constexpr char SuffixMetacharacters[] = "?*[]{}\\";
+static constexpr char PrefixMetacharactersWithSlash[] = "?*[{\\/";
+static constexpr char SuffixMetacharactersWithSlash[] = "?*[]{}\\/";
 
 // Expands character ranges and returns a bitmap.
 // For example, "a-cf-hz" is expanded to "abcfghz".
@@ -135,10 +137,12 @@ parseBraceExpansions(StringRef S, std::optional<size_t> MaxSubPatterns) {
   return std::move(SubPatterns);
 }
 
-static StringRef maxPlainSubstring(StringRef S) {
+static StringRef maxPlainSubstring(StringRef S, bool SlashAgnostic) {
   StringRef Best;
   while (!S.empty()) {
-    size_t PrefixSize = S.find_first_of(PrefixMetacharacters);
+    const char *Metas =
+        SlashAgnostic ? PrefixMetacharactersWithSlash : PrefixMetacharacters;
+    size_t PrefixSize = S.find_first_of(Metas);
     if (PrefixSize == std::string::npos)
       PrefixSize = S.size();
 
@@ -179,12 +183,19 @@ static StringRef maxPlainSubstring(StringRef S) {
 }
 
 Expected<GlobPattern>
-GlobPattern::create(StringRef S, std::optional<size_t> MaxSubPatterns) {
+GlobPattern::create(StringRef S, std::optional<size_t> MaxSubPatterns,
+                    bool SlashAgnostic) {
   GlobPattern Pat;
+  Pat.SlashAgnostic = SlashAgnostic;
   Pat.Pattern = S;
 
+  const char *PrefixMetas =
+      SlashAgnostic ? PrefixMetacharactersWithSlash : PrefixMetacharacters;
+  const char *SuffixMetas =
+      SlashAgnostic ? SuffixMetacharactersWithSlash : SuffixMetacharacters;
+
   // Store the prefix that does not contain any metacharacter.
-  Pat.PrefixSize = S.find_first_of(PrefixMetacharacters);
+  Pat.PrefixSize = S.find_first_of(PrefixMetas);
   if (Pat.PrefixSize == std::string::npos) {
     Pat.PrefixSize = S.size();
     return Pat;
@@ -192,7 +203,7 @@ GlobPattern::create(StringRef S, std::optional<size_t> MaxSubPatterns) {
   S = S.substr(Pat.PrefixSize);
 
   // Just in case we stop on unmatched opening brackets.
-  size_t SuffixStart = S.find_last_of(SuffixMetacharacters);
+  size_t SuffixStart = S.find_last_of(SuffixMetas);
   assert(SuffixStart != std::string::npos);
   if (S[SuffixStart] == '\\')
     ++SuffixStart;
@@ -205,7 +216,7 @@ GlobPattern::create(StringRef S, std::optional<size_t> MaxSubPatterns) {
   if (auto Err = parseBraceExpansions(S, MaxSubPatterns).moveInto(SubPats))
     return std::move(Err);
   for (StringRef SubPat : SubPats) {
-    auto SubGlobOrErr = SubGlobPattern::create(SubPat);
+    auto SubGlobOrErr = SubGlobPattern::create(SubPat, SlashAgnostic);
     if (!SubGlobOrErr)
       return SubGlobOrErr.takeError();
     Pat.SubGlobs.push_back(*SubGlobOrErr);
@@ -215,7 +226,7 @@ GlobPattern::create(StringRef S, std::optional<size_t> MaxSubPatterns) {
 }
 
 Expected<GlobPattern::SubGlobPattern>
-GlobPattern::SubGlobPattern::create(StringRef S) {
+GlobPattern::SubGlobPattern::create(StringRef S, bool SlashAgnostic) {
   SubGlobPattern Pat;
 
   // Parse brackets.
@@ -237,6 +248,12 @@ GlobPattern::SubGlobPattern::create(StringRef S) {
       if (!BVOrErr)
         return BVOrErr.takeError();
       BitVector &BV = *BVOrErr;
+      if (SlashAgnostic) {
+        if (BV['\\'] || BV['/']) {
+          BV.set('\\');
+          BV.set('/');
+        }
+      }
       if (Invert)
         BV.flip();
       Pat.Brackets.push_back(Bracket{J + 1, std::move(BV)});
@@ -251,8 +268,8 @@ GlobPattern::SubGlobPattern::create(StringRef S) {
 }
 
 StringRef GlobPattern::longest_substr() const {
-  return maxPlainSubstring(
-      Pattern.drop_front(PrefixSize).drop_back(SuffixSize));
+  return maxPlainSubstring(Pattern.drop_front(PrefixSize).drop_back(SuffixSize),
+                           SlashAgnostic);
 }
 
 bool GlobPattern::match(StringRef S) const {
@@ -263,15 +280,23 @@ bool GlobPattern::match(StringRef S) const {
   if (SubGlobs.empty() && S.empty())
     return true;
   for (auto &Glob : SubGlobs)
-    if (Glob.match(S))
+    if (Glob.match(S, SlashAgnostic))
       return true;
   return false;
 }
 
+static bool matchChar(char PatC, char QueryC, bool SlashAgnostic) {
+  if (PatC == QueryC)
+    return true;
+  return SlashAgnostic && (PatC == '\\' || PatC == '/') &&
+         (QueryC == '\\' || QueryC == '/');
+}
+
 // Factor the pattern into segments split by '*'. The segment is matched
 // sequentianlly by finding the first occurrence past the end of the previous
 // match.
-bool GlobPattern::SubGlobPattern::match(StringRef Str) const {
+bool GlobPattern::SubGlobPattern::match(StringRef Str,
+                                        bool SlashAgnostic) const {
   const char *P = Pat.data(), *SegmentBegin = nullptr, *S = Str.data(),
              *SavedS = S;
   const char *const PEnd = P + Pat.size(), *const End = S + Str.size();
@@ -293,12 +318,12 @@ bool GlobPattern::SubGlobPattern::match(StringRef Str) const {
         continue;
       }
     } else if (*P == '\\') {
-      if (*++P == *S) {
+      if (matchChar(*++P, *S, SlashAgnostic)) {
         ++P;
         ++S;
         continue;
       }
-    } else if (*P == *S || *P == '?') {
+    } else if (matchChar(*P, *S, SlashAgnostic) || *P == '?') {
       ++P;
       ++S;
       continue;
diff --git a/llvm/unittests/Support/GlobPatternTest.cpp b/llvm/unittests/Support/GlobPatternTest.cpp
index 872a21e948d7a..35423e37a3ae0 100644
--- a/llvm/unittests/Support/GlobPatternTest.cpp
+++ b/llvm/unittests/Support/GlobPatternTest.cpp
@@ -327,6 +327,30 @@ TEST_F(GlobPatternTest, PrefixSuffix) {
   ASSERT_TRUE((bool)Pat);
   EXPECT_EQ("", Pat->prefix());
   EXPECT_EQ("cd", Pat->suffix());
+
+  Pat = GlobPattern::create("ab/cd", /*MaxSubPatterns=*/{},
+                            /*SlashAgnostic=*/true);
+  ASSERT_TRUE((bool)Pat);
+  EXPECT_EQ("ab", Pat->prefix());
+  EXPECT_EQ("cd", Pat->suffix());
+
+  Pat = GlobPattern::create("ab\\cd", /*MaxSubPatterns=*/{},
+                            /*SlashAgnostic=*/true);
+  ASSERT_TRUE((bool)Pat);
+  EXPECT_EQ("ab", Pat->prefix());
+  EXPECT_EQ("d", Pat->suffix());
+
+  Pat = GlobPattern::create("ab/cd", /*MaxSubPatterns=*/{},
+                            /*SlashAgnostic=*/false);
+  ASSERT_TRUE((bool)Pat);
+  EXPECT_EQ("ab/cd", Pat->prefix());
+  EXPECT_EQ("", Pat->suffix());
+
+  Pat = GlobPattern::create("ab\\cd", /*MaxSubPatterns=*/{},
+                            /*SlashAgnostic=*/false);
+  ASSERT_TRUE((bool)Pat);
+  EXPECT_EQ("ab", Pat->prefix());
+  EXPECT_EQ("d", Pat->suffix());
 }
 
 TEST_F(GlobPatternTest, Substr) {
@@ -393,6 +417,26 @@ TEST_F(GlobPatternTest, Substr) {
   Pat = GlobPattern::create("a*bcdef{g}*h");
   ASSERT_TRUE((bool)Pat);
   EXPECT_EQ("bcdef", Pat->longest_substr());
+
+  Pat = GlobPattern::create("a*bc/de*f", /*MaxSubPatterns=*/{},
+                            /*SlashAgnostic=*/true);
+  ASSERT_TRUE((bool)Pat);
+  EXPECT_EQ("bc", Pat->longest_substr());
+
+  Pat = GlobPattern::create("a*bc\\de*f", /*MaxSubPatterns=*/{},
+                            /*SlashAgnostic=*/true);
+  ASSERT_TRUE((bool)Pat);
+  EXPECT_EQ("bc", Pat->longest_substr());
+
+  Pat = GlobPattern::create("a*bc/de*f", /*MaxSubPatterns=*/{},
+                            /*SlashAgnostic=*/false);
+  ASSERT_TRUE((bool)Pat);
+  EXPECT_EQ("bc/de", Pat->longest_substr());
+
+  Pat = GlobPattern::create("a*bc\\de*f", /*MaxSubPatterns=*/{},
+                            /*SlashAgnostic=*/false);
+  ASSERT_TRUE((bool)Pat);
+  EXPECT_EQ("bc", Pat->longest_substr());
 }
 
 TEST_F(GlobPatternTest, Pathological) {
@@ -409,4 +453,22 @@ TEST_F(GlobPatternTest, Pathological) {
   EXPECT_FALSE(Pat->match(S));
   EXPECT_TRUE(Pat->match(S + 'b'));
 }
+
+TEST_F(GlobPatternTest, SlashAgnosticMatch) {
+  auto Pat1 = GlobPattern::create("foo\\\\bar[a\\\\-z]", 1024,
+                                  /*SlashAgnostic=*/true);
+  ASSERT_TRUE((bool)Pat1);
+  EXPECT_TRUE(Pat1->match("foo/bar\\"));
+  EXPECT_TRUE(Pat1->match("foo/barb"));
+  EXPECT_TRUE(Pat1->match("foo/bar/"));
+}
+
+TEST_F(GlobPatternTest, SlashAgnosticMatchInverted) {
+  auto Pat = GlobPattern::create("foo\\\\bar[^a\\\\-z]", 1024,
+                                 /*SlashAgnostic=*/true);
+  ASSERT_TRUE((bool)Pat);
+  EXPECT_FALSE(Pat->match("foo/bar/"));
+  EXPECT_FALSE(Pat->match("foo/barb"));
+  EXPECT_TRUE(Pat->match("foo/bar1"));
+}
 }

``````````

</details>


https://github.com/llvm/llvm-project/pull/202854


More information about the llvm-branch-commits mailing list