[llvm] [GlobPattern] Add GlobPattern::longest_substr(). (PR #164512)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Oct 21 15:56:28 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-support
Author: Vitaly Buka (vitalybuka)
<details>
<summary>Changes</summary>
Finds longest (almost) plain substring in the pattern.
Implementation is conservative to avoid false positives.
The result is not used to optimize
`GlobPattern::match()` so it's calculated on
request.
---
Full diff: https://github.com/llvm/llvm-project/pull/164512.diff
3 Files Affected:
- (modified) llvm/include/llvm/Support/GlobPattern.h (+15-7)
- (modified) llvm/lib/Support/GlobPattern.cpp (+49-8)
- (modified) llvm/unittests/Support/GlobPatternTest.cpp (+58)
``````````diff
diff --git a/llvm/include/llvm/Support/GlobPattern.h b/llvm/include/llvm/Support/GlobPattern.h
index c1b44849b9794..4824f3fa01e5b 100644
--- a/llvm/include/llvm/Support/GlobPattern.h
+++ b/llvm/include/llvm/Support/GlobPattern.h
@@ -63,22 +63,30 @@ class GlobPattern {
// Returns true for glob pattern "*". Can be used to avoid expensive
// preparation/acquisition of the input for match().
bool isTrivialMatchAll() const {
- if (!Prefix.empty())
+ if (PrefixSize)
return false;
- if (!Suffix.empty())
+ if (SuffixSize)
return false;
if (SubGlobs.size() != 1)
return false;
return SubGlobs[0].getPat() == "*";
}
- StringRef prefix() const { return Prefix; }
- StringRef suffix() const { return Suffix; }
+ // The followind functions are as shortcuts to some matching. They are
+ // conservative to simplify implementations.
-private:
- StringRef Prefix;
- StringRef Suffix;
+ // Returns plain prefix of the pattern.
+ StringRef prefix() const { return Pattern.take_front(PrefixSize); }
+ // Returns plain suffix of the pattern.
+ StringRef suffix() const { return Pattern.take_back(SuffixSize); }
+ // Returns the longest plain substring of the pattern between of prefix and
+ // suffix.
+ StringRef longest_substr() const;
+private:
+ StringRef Pattern;
+ size_t PrefixSize = 0;
+ size_t SuffixSize = 0;
struct SubGlobPattern {
/// \param Pat the pattern to match against
LLVM_ABI static Expected<SubGlobPattern> create(StringRef Pat);
diff --git a/llvm/lib/Support/GlobPattern.cpp b/llvm/lib/Support/GlobPattern.cpp
index 0ecf47dc1d3d1..dfc1508ce63af 100644
--- a/llvm/lib/Support/GlobPattern.cpp
+++ b/llvm/lib/Support/GlobPattern.cpp
@@ -132,24 +132,60 @@ parseBraceExpansions(StringRef S, std::optional<size_t> MaxSubPatterns) {
return std::move(SubPatterns);
}
+static StringRef maxPlainSubstring(StringRef S) {
+ StringRef R;
+ while (!S.empty()) {
+ size_t PrefixSize = S.find_first_of("?*[{\\");
+ if (PrefixSize == std::string::npos)
+ PrefixSize = S.size();
+
+ if (R.size() < PrefixSize)
+ R = S.take_front(PrefixSize);
+ S = S.drop_front(PrefixSize);
+
+ switch (S.front()) {
+ case '\\':
+ S = S.drop_front(2);
+ break;
+ case '[': {
+ size_t EndBracket = S.find_first_of("]");
+ if (EndBracket == std::string::npos)
+ return R; // Incorrect, but let SubGlobPattern::create handle it.
+ S = S.drop_front(EndBracket + 1);
+ break;
+ }
+ case '{':
+ // TODO: implement.
+ return {};
+ default:
+ S = S.drop_front(1);
+ }
+ }
+
+ return R;
+}
+
Expected<GlobPattern>
GlobPattern::create(StringRef S, std::optional<size_t> MaxSubPatterns) {
GlobPattern Pat;
+ Pat.Pattern = S;
// Store the prefix that does not contain any metacharacter.
- size_t PrefixSize = S.find_first_of("?*[{\\");
- Pat.Prefix = S.substr(0, PrefixSize);
- if (PrefixSize == std::string::npos)
+ Pat.PrefixSize = S.find_first_of("?*[{\\");
+ if (Pat.PrefixSize == std::string::npos) {
+ Pat.PrefixSize = S.size();
return Pat;
- S = S.substr(PrefixSize);
+ }
+ S = S.substr(Pat.PrefixSize);
// Just in case we stop on unmatched opening brackets.
size_t SuffixStart = S.find_last_of("?*[]{}\\");
assert(SuffixStart != std::string::npos);
if (S[SuffixStart] == '\\')
++SuffixStart;
- ++SuffixStart;
- Pat.Suffix = S.substr(SuffixStart);
+ if (SuffixStart < S.size())
+ ++SuffixStart;
+ Pat.SuffixSize = S.size() - SuffixStart;
S = S.substr(0, SuffixStart);
SmallVector<std::string, 1> SubPats;
@@ -199,10 +235,15 @@ GlobPattern::SubGlobPattern::create(StringRef S) {
return Pat;
}
+StringRef GlobPattern::longest_substr() const {
+ return maxPlainSubstring(
+ Pattern.drop_front(PrefixSize).drop_back(SuffixSize));
+}
+
bool GlobPattern::match(StringRef S) const {
- if (!S.consume_front(Prefix))
+ if (!S.consume_front(prefix()))
return false;
- if (!S.consume_back(Suffix))
+ if (!S.consume_back(suffix()))
return false;
if (SubGlobs.empty() && S.empty())
return true;
diff --git a/llvm/unittests/Support/GlobPatternTest.cpp b/llvm/unittests/Support/GlobPatternTest.cpp
index 58fd7678131c6..a0e0d1415f383 100644
--- a/llvm/unittests/Support/GlobPatternTest.cpp
+++ b/llvm/unittests/Support/GlobPatternTest.cpp
@@ -329,6 +329,64 @@ TEST_F(GlobPatternTest, PrefixSuffix) {
EXPECT_EQ("cd", Pat->suffix());
}
+TEST_F(GlobPatternTest, Substr) {
+ auto Pat = GlobPattern::create("");
+ ASSERT_TRUE((bool)Pat);
+ EXPECT_EQ("", Pat->longest_substr());
+
+ Pat = GlobPattern::create("abcd");
+ ASSERT_TRUE((bool)Pat);
+ EXPECT_EQ("", Pat->longest_substr());
+
+ Pat = GlobPattern::create("a*bcd");
+ ASSERT_TRUE((bool)Pat);
+ EXPECT_EQ("", Pat->longest_substr());
+
+ Pat = GlobPattern::create("*abcd");
+ ASSERT_TRUE((bool)Pat);
+ EXPECT_EQ("", Pat->longest_substr());
+
+ Pat = GlobPattern::create("abcd*");
+ ASSERT_TRUE((bool)Pat);
+ EXPECT_EQ("", Pat->longest_substr());
+
+ Pat = GlobPattern::create("a*bc*d");
+ ASSERT_TRUE((bool)Pat);
+ EXPECT_EQ("bc", Pat->longest_substr());
+
+ Pat = GlobPattern::create("a*bc*def*g");
+ ASSERT_TRUE((bool)Pat);
+ EXPECT_EQ("def", Pat->longest_substr());
+
+ Pat = GlobPattern::create("a*bcd*ef*g");
+ ASSERT_TRUE((bool)Pat);
+ EXPECT_EQ("bcd", Pat->longest_substr());
+
+ Pat = GlobPattern::create("a*bcd*efg*h");
+ ASSERT_TRUE((bool)Pat);
+ EXPECT_EQ("bcd", Pat->longest_substr());
+
+ Pat = GlobPattern::create("a*bcd[ef]g*h");
+ ASSERT_TRUE((bool)Pat);
+ EXPECT_EQ("bcd", Pat->longest_substr());
+
+ Pat = GlobPattern::create("a*bcde\\fg*h");
+ ASSERT_TRUE((bool)Pat);
+ EXPECT_EQ("bcde", Pat->longest_substr());
+
+ Pat = GlobPattern::create("a*bcde\\[fg*h");
+ ASSERT_TRUE((bool)Pat);
+ EXPECT_EQ("bcde", Pat->longest_substr());
+
+ Pat = GlobPattern::create("a*bcde?fg*h");
+ ASSERT_TRUE((bool)Pat);
+ EXPECT_EQ("bcde", Pat->longest_substr());
+
+ Pat = GlobPattern::create("a*bcdef{g}*h");
+ ASSERT_TRUE((bool)Pat);
+ EXPECT_EQ("", Pat->longest_substr());
+}
+
TEST_F(GlobPatternTest, Pathological) {
std::string P, S(40, 'a');
StringRef Pieces[] = {"a*", "[ba]*", "{b*,a*}*"};
``````````
</details>
https://github.com/llvm/llvm-project/pull/164512
More information about the llvm-commits
mailing list