[llvm] 5b24b55 - [Support] Extract simple suffix from GlobPattern (#162118)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Oct 6 14:42:56 PDT 2025
Author: Vitaly Buka
Date: 2025-10-06T14:42:52-07:00
New Revision: 5b24b558b3b780fb0f3d4587c7f4a3b4fddeec8b
URL: https://github.com/llvm/llvm-project/commit/5b24b558b3b780fb0f3d4587c7f4a3b4fddeec8b
DIFF: https://github.com/llvm/llvm-project/commit/5b24b558b3b780fb0f3d4587c7f4a3b4fddeec8b.diff
LOG: [Support] Extract simple suffix from GlobPattern (#162118)
Existing glob is optimized with prefix for "src:/dir1/dir2/*",
but I notices we often use patterns like "src:*dir1/dir2/file.h".
So suffix will help.
It will be hard to notice in most cases, but I use ignore list to bisect
some falures.
E.g. put 100k entries in the file, and build/test as needed.
On one of hard compilation units glob matching was 400s, after the
change 20s.
Still, there is higher level inefficiency in ignore list matching, which
I will
address in followup patches and remove 20s above.
Added:
Modified:
llvm/include/llvm/Support/GlobPattern.h
llvm/lib/Support/GlobPattern.cpp
llvm/unittests/Support/GlobPatternTest.cpp
Removed:
################################################################################
diff --git a/llvm/include/llvm/Support/GlobPattern.h b/llvm/include/llvm/Support/GlobPattern.h
index 62ed4a0f23fd9..c1b44849b9794 100644
--- a/llvm/include/llvm/Support/GlobPattern.h
+++ b/llvm/include/llvm/Support/GlobPattern.h
@@ -65,13 +65,19 @@ class GlobPattern {
bool isTrivialMatchAll() const {
if (!Prefix.empty())
return false;
+ if (!Suffix.empty())
+ return false;
if (SubGlobs.size() != 1)
return false;
return SubGlobs[0].getPat() == "*";
}
+ StringRef prefix() const { return Prefix; }
+ StringRef suffix() const { return Suffix; }
+
private:
StringRef Prefix;
+ StringRef Suffix;
struct SubGlobPattern {
/// \param Pat the pattern to match against
diff --git a/llvm/lib/Support/GlobPattern.cpp b/llvm/lib/Support/GlobPattern.cpp
index 7004adf461a0c..0ecf47dc1d3d1 100644
--- a/llvm/lib/Support/GlobPattern.cpp
+++ b/llvm/lib/Support/GlobPattern.cpp
@@ -143,6 +143,15 @@ GlobPattern::create(StringRef S, std::optional<size_t> MaxSubPatterns) {
return Pat;
S = S.substr(PrefixSize);
+ // Just in case we stop on unmatched opening brackets.
+ size_t SuffixStart = S.find_last_of("?*[]{}\\");
+ assert(SuffixStart != std::string::npos);
+ if (S[SuffixStart] == '\\')
+ ++SuffixStart;
+ ++SuffixStart;
+ Pat.Suffix = S.substr(SuffixStart);
+ S = S.substr(0, SuffixStart);
+
SmallVector<std::string, 1> SubPats;
if (auto Err = parseBraceExpansions(S, MaxSubPatterns).moveInto(SubPats))
return std::move(Err);
@@ -193,6 +202,8 @@ GlobPattern::SubGlobPattern::create(StringRef S) {
bool GlobPattern::match(StringRef S) const {
if (!S.consume_front(Prefix))
return false;
+ if (!S.consume_back(Suffix))
+ return false;
if (SubGlobs.empty() && S.empty())
return true;
for (auto &Glob : SubGlobs)
diff --git a/llvm/unittests/Support/GlobPatternTest.cpp b/llvm/unittests/Support/GlobPatternTest.cpp
index e4f1025b00956..58fd7678131c6 100644
--- a/llvm/unittests/Support/GlobPatternTest.cpp
+++ b/llvm/unittests/Support/GlobPatternTest.cpp
@@ -257,6 +257,78 @@ TEST_F(GlobPatternTest, NUL) {
}
}
+TEST_F(GlobPatternTest, PrefixSuffix) {
+ auto Pat = GlobPattern::create("");
+ ASSERT_TRUE((bool)Pat);
+ EXPECT_EQ("", Pat->prefix());
+ EXPECT_EQ("", Pat->suffix());
+
+ Pat = GlobPattern::create("abcd");
+ ASSERT_TRUE((bool)Pat);
+ EXPECT_EQ("abcd", Pat->prefix());
+ EXPECT_EQ("", Pat->suffix());
+
+ Pat = GlobPattern::create("*abcd");
+ ASSERT_TRUE((bool)Pat);
+ EXPECT_EQ("", Pat->prefix());
+ EXPECT_EQ("abcd", Pat->suffix());
+
+ Pat = GlobPattern::create("abcd*");
+ ASSERT_TRUE((bool)Pat);
+ EXPECT_EQ("abcd", Pat->prefix());
+ EXPECT_EQ("", Pat->suffix());
+
+ Pat = GlobPattern::create("ab*cd");
+ ASSERT_TRUE((bool)Pat);
+ EXPECT_EQ("ab", Pat->prefix());
+ EXPECT_EQ("cd", Pat->suffix());
+
+ Pat = GlobPattern::create("ab?cd");
+ ASSERT_TRUE((bool)Pat);
+ EXPECT_EQ("ab", Pat->prefix());
+ EXPECT_EQ("cd", Pat->suffix());
+
+ Pat = GlobPattern::create("ab[n]cd");
+ ASSERT_TRUE((bool)Pat);
+ EXPECT_EQ("ab", Pat->prefix());
+ EXPECT_EQ("cd", Pat->suffix());
+
+ Pat = GlobPattern::create("ab{}cd");
+ ASSERT_TRUE((bool)Pat);
+ EXPECT_EQ("ab", Pat->prefix());
+ EXPECT_EQ("cd", Pat->suffix());
+
+ Pat = GlobPattern::create("ab{cd");
+ ASSERT_TRUE((bool)Pat);
+ EXPECT_EQ("ab", Pat->prefix());
+ EXPECT_EQ("cd", Pat->suffix());
+
+ Pat = GlobPattern::create("ab]cd");
+ ASSERT_TRUE((bool)Pat);
+ EXPECT_EQ("ab]cd", Pat->prefix());
+ EXPECT_EQ("", Pat->suffix());
+
+ Pat = GlobPattern::create("ab\\cd");
+ ASSERT_TRUE((bool)Pat);
+ EXPECT_EQ("ab", Pat->prefix());
+ EXPECT_EQ("d", Pat->suffix());
+
+ Pat = GlobPattern::create("ab\\\\cd");
+ ASSERT_TRUE((bool)Pat);
+ EXPECT_EQ("ab", Pat->prefix());
+ EXPECT_EQ("d", Pat->suffix());
+
+ Pat = GlobPattern::create("ab?cd?");
+ ASSERT_TRUE((bool)Pat);
+ EXPECT_EQ("ab", Pat->prefix());
+ EXPECT_EQ("", Pat->suffix());
+
+ Pat = GlobPattern::create("?ab?cd");
+ ASSERT_TRUE((bool)Pat);
+ EXPECT_EQ("", Pat->prefix());
+ EXPECT_EQ("cd", Pat->suffix());
+}
+
TEST_F(GlobPatternTest, Pathological) {
std::string P, S(40, 'a');
StringRef Pieces[] = {"a*", "[ba]*", "{b*,a*}*"};
More information about the llvm-commits
mailing list