[llvm] 5b24b55 - [Support] Extract simple suffix from GlobPattern (#162118)

via llvm-commits llvm-commits at lists.llvm.org
Mon Oct 6 14:42:56 PDT 2025


Author: Vitaly Buka
Date: 2025-10-06T14:42:52-07:00
New Revision: 5b24b558b3b780fb0f3d4587c7f4a3b4fddeec8b

URL: https://github.com/llvm/llvm-project/commit/5b24b558b3b780fb0f3d4587c7f4a3b4fddeec8b
DIFF: https://github.com/llvm/llvm-project/commit/5b24b558b3b780fb0f3d4587c7f4a3b4fddeec8b.diff

LOG: [Support] Extract simple suffix from GlobPattern (#162118)

Existing glob is optimized with prefix for "src:/dir1/dir2/*",
but I notices we often use patterns like "src:*dir1/dir2/file.h".

So suffix will help.

It will be hard to notice in most cases, but I use ignore list to bisect
some falures.
E.g. put 100k entries in the file, and build/test as needed.

On one of hard compilation units glob matching was 400s, after the
change 20s.

Still, there is higher level inefficiency in ignore list matching, which
I will
address in followup patches and remove 20s above.

Added: 
    

Modified: 
    llvm/include/llvm/Support/GlobPattern.h
    llvm/lib/Support/GlobPattern.cpp
    llvm/unittests/Support/GlobPatternTest.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/Support/GlobPattern.h b/llvm/include/llvm/Support/GlobPattern.h
index 62ed4a0f23fd9..c1b44849b9794 100644
--- a/llvm/include/llvm/Support/GlobPattern.h
+++ b/llvm/include/llvm/Support/GlobPattern.h
@@ -65,13 +65,19 @@ class GlobPattern {
   bool isTrivialMatchAll() const {
     if (!Prefix.empty())
       return false;
+    if (!Suffix.empty())
+      return false;
     if (SubGlobs.size() != 1)
       return false;
     return SubGlobs[0].getPat() == "*";
   }
 
+  StringRef prefix() const { return Prefix; }
+  StringRef suffix() const { return Suffix; }
+
 private:
   StringRef Prefix;
+  StringRef Suffix;
 
   struct SubGlobPattern {
     /// \param Pat the pattern to match against

diff  --git a/llvm/lib/Support/GlobPattern.cpp b/llvm/lib/Support/GlobPattern.cpp
index 7004adf461a0c..0ecf47dc1d3d1 100644
--- a/llvm/lib/Support/GlobPattern.cpp
+++ b/llvm/lib/Support/GlobPattern.cpp
@@ -143,6 +143,15 @@ GlobPattern::create(StringRef S, std::optional<size_t> MaxSubPatterns) {
     return Pat;
   S = S.substr(PrefixSize);
 
+  // Just in case we stop on unmatched opening brackets.
+  size_t SuffixStart = S.find_last_of("?*[]{}\\");
+  assert(SuffixStart != std::string::npos);
+  if (S[SuffixStart] == '\\')
+    ++SuffixStart;
+  ++SuffixStart;
+  Pat.Suffix = S.substr(SuffixStart);
+  S = S.substr(0, SuffixStart);
+
   SmallVector<std::string, 1> SubPats;
   if (auto Err = parseBraceExpansions(S, MaxSubPatterns).moveInto(SubPats))
     return std::move(Err);
@@ -193,6 +202,8 @@ GlobPattern::SubGlobPattern::create(StringRef S) {
 bool GlobPattern::match(StringRef S) const {
   if (!S.consume_front(Prefix))
     return false;
+  if (!S.consume_back(Suffix))
+    return false;
   if (SubGlobs.empty() && S.empty())
     return true;
   for (auto &Glob : SubGlobs)

diff  --git a/llvm/unittests/Support/GlobPatternTest.cpp b/llvm/unittests/Support/GlobPatternTest.cpp
index e4f1025b00956..58fd7678131c6 100644
--- a/llvm/unittests/Support/GlobPatternTest.cpp
+++ b/llvm/unittests/Support/GlobPatternTest.cpp
@@ -257,6 +257,78 @@ TEST_F(GlobPatternTest, NUL) {
   }
 }
 
+TEST_F(GlobPatternTest, PrefixSuffix) {
+  auto Pat = GlobPattern::create("");
+  ASSERT_TRUE((bool)Pat);
+  EXPECT_EQ("", Pat->prefix());
+  EXPECT_EQ("", Pat->suffix());
+
+  Pat = GlobPattern::create("abcd");
+  ASSERT_TRUE((bool)Pat);
+  EXPECT_EQ("abcd", Pat->prefix());
+  EXPECT_EQ("", Pat->suffix());
+
+  Pat = GlobPattern::create("*abcd");
+  ASSERT_TRUE((bool)Pat);
+  EXPECT_EQ("", Pat->prefix());
+  EXPECT_EQ("abcd", Pat->suffix());
+
+  Pat = GlobPattern::create("abcd*");
+  ASSERT_TRUE((bool)Pat);
+  EXPECT_EQ("abcd", Pat->prefix());
+  EXPECT_EQ("", Pat->suffix());
+
+  Pat = GlobPattern::create("ab*cd");
+  ASSERT_TRUE((bool)Pat);
+  EXPECT_EQ("ab", Pat->prefix());
+  EXPECT_EQ("cd", Pat->suffix());
+
+  Pat = GlobPattern::create("ab?cd");
+  ASSERT_TRUE((bool)Pat);
+  EXPECT_EQ("ab", Pat->prefix());
+  EXPECT_EQ("cd", Pat->suffix());
+
+  Pat = GlobPattern::create("ab[n]cd");
+  ASSERT_TRUE((bool)Pat);
+  EXPECT_EQ("ab", Pat->prefix());
+  EXPECT_EQ("cd", Pat->suffix());
+
+  Pat = GlobPattern::create("ab{}cd");
+  ASSERT_TRUE((bool)Pat);
+  EXPECT_EQ("ab", Pat->prefix());
+  EXPECT_EQ("cd", Pat->suffix());
+
+  Pat = GlobPattern::create("ab{cd");
+  ASSERT_TRUE((bool)Pat);
+  EXPECT_EQ("ab", Pat->prefix());
+  EXPECT_EQ("cd", Pat->suffix());
+
+  Pat = GlobPattern::create("ab]cd");
+  ASSERT_TRUE((bool)Pat);
+  EXPECT_EQ("ab]cd", Pat->prefix());
+  EXPECT_EQ("", Pat->suffix());
+
+  Pat = GlobPattern::create("ab\\cd");
+  ASSERT_TRUE((bool)Pat);
+  EXPECT_EQ("ab", Pat->prefix());
+  EXPECT_EQ("d", Pat->suffix());
+
+  Pat = GlobPattern::create("ab\\\\cd");
+  ASSERT_TRUE((bool)Pat);
+  EXPECT_EQ("ab", Pat->prefix());
+  EXPECT_EQ("d", Pat->suffix());
+
+  Pat = GlobPattern::create("ab?cd?");
+  ASSERT_TRUE((bool)Pat);
+  EXPECT_EQ("ab", Pat->prefix());
+  EXPECT_EQ("", Pat->suffix());
+
+  Pat = GlobPattern::create("?ab?cd");
+  ASSERT_TRUE((bool)Pat);
+  EXPECT_EQ("", Pat->prefix());
+  EXPECT_EQ("cd", Pat->suffix());
+}
+
 TEST_F(GlobPatternTest, Pathological) {
   std::string P, S(40, 'a');
   StringRef Pieces[] = {"a*", "[ba]*", "{b*,a*}*"};


        


More information about the llvm-commits mailing list