[llvm] 6fdef0b - [NFC][GlobPattern] Add GlobPattern::longest_substr() (#164512)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Oct 22 20:46:44 PDT 2025
Author: Vitaly Buka
Date: 2025-10-23T03:46:40Z
New Revision: 6fdef0bbe235303dd98be61275acfa79fab34770
URL: https://github.com/llvm/llvm-project/commit/6fdef0bbe235303dd98be61275acfa79fab34770
DIFF: https://github.com/llvm/llvm-project/commit/6fdef0bbe235303dd98be61275acfa79fab34770.diff
LOG: [NFC][GlobPattern] Add GlobPattern::longest_substr() (#164512)
Finds longest (almost) plain substring in the pattern.
Implementation is conservative to avoid false positives.
The result is not used to optimize
`GlobPattern::match()` so it's calculated on
request.
For
* https://github.com/llvm/llvm-project/pull/164545
---------
Co-authored-by: Luke Lau <luke at igalia.com>
Added:
Modified:
llvm/include/llvm/Support/GlobPattern.h
llvm/lib/Support/GlobPattern.cpp
llvm/unittests/Support/GlobPatternTest.cpp
Removed:
################################################################################
diff --git a/llvm/include/llvm/Support/GlobPattern.h b/llvm/include/llvm/Support/GlobPattern.h
index 6ebf64565559b..8cae6a38d8326 100644
--- a/llvm/include/llvm/Support/GlobPattern.h
+++ b/llvm/include/llvm/Support/GlobPattern.h
@@ -79,6 +79,9 @@ class GlobPattern {
StringRef prefix() const { return Pattern.take_front(PrefixSize); }
// Returns plain suffix of the pattern.
StringRef suffix() const { return Pattern.take_back(SuffixSize); }
+ // Returns the longest plain substring of the pattern between prefix and
+ // suffix.
+ StringRef longest_substr() const;
private:
StringRef Pattern;
diff --git a/llvm/lib/Support/GlobPattern.cpp b/llvm/lib/Support/GlobPattern.cpp
index f56a8fcf4bf9d..2715229c65be1 100644
--- a/llvm/lib/Support/GlobPattern.cpp
+++ b/llvm/lib/Support/GlobPattern.cpp
@@ -132,6 +132,49 @@ parseBraceExpansions(StringRef S, std::optional<size_t> MaxSubPatterns) {
return std::move(SubPatterns);
}
+static StringRef maxPlainSubstring(StringRef S) {
+ StringRef Best;
+ while (!S.empty()) {
+ size_t PrefixSize = S.find_first_of("?*[{\\");
+ if (PrefixSize == std::string::npos)
+ PrefixSize = S.size();
+
+ if (Best.size() < PrefixSize)
+ Best = S.take_front(PrefixSize);
+
+ S = S.drop_front(PrefixSize);
+
+ // It's impossible, as the first and last characters of the input string
+ // must be Glob special characters, otherwise they would be parts of
+ // the prefix or the suffix.
+ assert(!S.empty());
+
+ switch (S.front()) {
+ case '\\':
+ S = S.drop_front(2);
+ break;
+ case '[': {
+ // Drop '[' and the first character which can be ']'.
+ S = S.drop_front(2);
+ size_t EndBracket = S.find_first_of("]");
+ // Should not be possible, SubGlobPattern::create should fail on invalid
+ // pattern before we get here.
+ assert(EndBracket != std::string::npos);
+ S = S.drop_front(EndBracket + 1);
+ break;
+ }
+ case '{':
+ // TODO: implement.
+ // Fallback to whatever is best for now.
+ return Best;
+ default:
+ S = S.drop_front(1);
+ }
+ }
+
+ return Best;
+}
+
Expected<GlobPattern>
GlobPattern::create(StringRef S, std::optional<size_t> MaxSubPatterns) {
GlobPattern Pat;
@@ -202,6 +245,11 @@ GlobPattern::SubGlobPattern::create(StringRef S) {
return Pat;
}
+StringRef GlobPattern::longest_substr() const {
+ return maxPlainSubstring(
+ Pattern.drop_front(PrefixSize).drop_back(SuffixSize));
+}
+
bool GlobPattern::match(StringRef S) const {
if (!S.consume_front(prefix()))
return false;
diff --git a/llvm/unittests/Support/GlobPatternTest.cpp b/llvm/unittests/Support/GlobPatternTest.cpp
index 58fd7678131c6..872a21e948d7a 100644
--- a/llvm/unittests/Support/GlobPatternTest.cpp
+++ b/llvm/unittests/Support/GlobPatternTest.cpp
@@ -329,6 +329,72 @@ TEST_F(GlobPatternTest, PrefixSuffix) {
EXPECT_EQ("cd", Pat->suffix());
}
+TEST_F(GlobPatternTest, Substr) {
+ auto Pat = GlobPattern::create("");
+ ASSERT_TRUE((bool)Pat);
+ EXPECT_EQ("", Pat->longest_substr());
+
+ Pat = GlobPattern::create("abcd");
+ ASSERT_TRUE((bool)Pat);
+ EXPECT_EQ("", Pat->longest_substr());
+
+ Pat = GlobPattern::create("a*bcd");
+ ASSERT_TRUE((bool)Pat);
+ EXPECT_EQ("", Pat->longest_substr());
+
+ Pat = GlobPattern::create("*abcd");
+ ASSERT_TRUE((bool)Pat);
+ EXPECT_EQ("", Pat->longest_substr());
+
+ Pat = GlobPattern::create("abcd*");
+ ASSERT_TRUE((bool)Pat);
+ EXPECT_EQ("", Pat->longest_substr());
+
+ Pat = GlobPattern::create("a*bc*d");
+ ASSERT_TRUE((bool)Pat);
+ EXPECT_EQ("bc", Pat->longest_substr());
+
+ Pat = GlobPattern::create("a*bc*def*g");
+ ASSERT_TRUE((bool)Pat);
+ EXPECT_EQ("def", Pat->longest_substr());
+
+ Pat = GlobPattern::create("a*bcd*ef*g");
+ ASSERT_TRUE((bool)Pat);
+ EXPECT_EQ("bcd", Pat->longest_substr());
+
+ Pat = GlobPattern::create("a*bcd*efg*h");
+ ASSERT_TRUE((bool)Pat);
+ EXPECT_EQ("bcd", Pat->longest_substr());
+
+ Pat = GlobPattern::create("a*bcd[ef]g*h");
+ ASSERT_TRUE((bool)Pat);
+ EXPECT_EQ("bcd", Pat->longest_substr());
+
+ Pat = GlobPattern::create("a*bc[d]efg*h");
+ ASSERT_TRUE((bool)Pat);
+ EXPECT_EQ("efg", Pat->longest_substr());
+
+ Pat = GlobPattern::create("a*bc[]]efg*h");
+ ASSERT_TRUE((bool)Pat);
+ EXPECT_EQ("efg", Pat->longest_substr());
+
+ Pat = GlobPattern::create("a*bcde\\fg*h");
+ ASSERT_TRUE((bool)Pat);
+ EXPECT_EQ("bcde", Pat->longest_substr());
+
+ Pat = GlobPattern::create("a*bcde\\[fg*h");
+ ASSERT_TRUE((bool)Pat);
+ EXPECT_EQ("bcde", Pat->longest_substr());
+
+ Pat = GlobPattern::create("a*bcde?fg*h");
+ ASSERT_TRUE((bool)Pat);
+ EXPECT_EQ("bcde", Pat->longest_substr());
+
+ Pat = GlobPattern::create("a*bcdef{g}*h");
+ ASSERT_TRUE((bool)Pat);
+ EXPECT_EQ("bcdef", Pat->longest_substr());
+}
+
TEST_F(GlobPatternTest, Pathological) {
std::string P, S(40, 'a');
StringRef Pieces[] = {"a*", "[ba]*", "{b*,a*}*"};
More information about the llvm-commits
mailing list