[llvm] [Support] Add `\{<ref>}` backreferences in Regex::sub() (PR #67220)
Igor Kudrin via llvm-commits
llvm-commits at lists.llvm.org
Wed Sep 27 17:51:47 PDT 2023
https://github.com/igorkudrin updated https://github.com/llvm/llvm-project/pull/67220
>From 37de6364efecb6011d09122a789f2ed18c0f5f6b Mon Sep 17 00:00:00 2001
From: Igor Kudrin <ikudrin at accesssoftek.com>
Date: Fri, 22 Sep 2023 22:44:51 -0700
Subject: [PATCH] [Support] Add `\g<ref>` backreferences in Regex::sub()
The existing format of backreferences, `\<ref>`, does not allow digits
to be placed directly after the reference because they are included in
the reference number. The new format solves this problem by adding
explicit delimiters.
---
llvm/include/llvm/Support/Regex.h | 5 +++--
llvm/lib/Support/Regex.cpp | 19 +++++++++++++++++++
llvm/unittests/Support/RegexTest.cpp | 16 ++++++++++++++++
3 files changed, 38 insertions(+), 2 deletions(-)
diff --git a/llvm/include/llvm/Support/Regex.h b/llvm/include/llvm/Support/Regex.h
index ae4b9516f194e3a..bb7a8009b6bd0c3 100644
--- a/llvm/include/llvm/Support/Regex.h
+++ b/llvm/include/llvm/Support/Regex.h
@@ -85,8 +85,9 @@ namespace llvm {
std::string *Error = nullptr) const;
/// sub - Return the result of replacing the first match of the regex in
- /// \p String with the \p Repl string. Backreferences like "\0" in the
- /// replacement string are replaced with the appropriate match substring.
+ /// \p String with the \p Repl string. Backreferences like "\0" and "\g<1>"
+ /// in the replacement string are replaced with the appropriate match
+ /// substring.
///
/// Note that the replacement string has backslash escaping performed on
/// it. Invalid backreferences are ignored (replaced by empty strings).
diff --git a/llvm/lib/Support/Regex.cpp b/llvm/lib/Support/Regex.cpp
index dfbd373e4a98096..8fa71a749cc8e10 100644
--- a/llvm/lib/Support/Regex.cpp
+++ b/llvm/lib/Support/Regex.cpp
@@ -163,6 +163,25 @@ std::string Regex::sub(StringRef Repl, StringRef String,
// FIXME: We should have a StringExtras function for mapping C99 escapes.
switch (Repl[0]) {
+
+ // Backreference with the "\g<ref>" syntax
+ case 'g':
+ if (Repl.size() >= 4 && Repl[1] == '<') {
+ size_t End = Repl.find('>');
+ StringRef Ref = Repl.slice(2, End);
+ unsigned RefValue;
+ if (End != StringRef::npos && !Ref.getAsInteger(10, RefValue)) {
+ Repl = Repl.substr(End + 1);
+ if (RefValue < Matches.size())
+ Res += Matches[RefValue];
+ else if (Error && Error->empty())
+ *Error =
+ ("invalid backreference string 'g<" + Twine(Ref) + ">'").str();
+ break;
+ }
+ }
+ [[fallthrough]];
+
// Treat all unrecognized characters as self-quoting.
default:
Res += Repl[0];
diff --git a/llvm/unittests/Support/RegexTest.cpp b/llvm/unittests/Support/RegexTest.cpp
index 78f37cdbd1ef89e..aecd2ff3133a3d7 100644
--- a/llvm/unittests/Support/RegexTest.cpp
+++ b/llvm/unittests/Support/RegexTest.cpp
@@ -127,6 +127,22 @@ TEST_F(RegexTest, Substitution) {
EXPECT_EQ("aber", Regex("a[0-9]+b").sub("a\\100b", "a1234ber", &Error));
EXPECT_EQ(Error, "invalid backreference string '100'");
+
+ EXPECT_EQ("012345", Regex("a([0-9]+).*").sub("0\\g<1>5", "a1234ber", &Error));
+ EXPECT_EQ("", Error);
+
+ EXPECT_EQ("0g<15", Regex("a([0-9]+).*").sub("0\\g<15", "a1234ber", &Error));
+ EXPECT_EQ("", Error);
+
+ EXPECT_EQ("0g<>15", Regex("a([0-9]+).*").sub("0\\g<>15", "a1234ber", &Error));
+ EXPECT_EQ("", Error);
+
+ EXPECT_EQ("0g<3e>1",
+ Regex("a([0-9]+).*").sub("0\\g<3e>1", "a1234ber", &Error));
+ EXPECT_EQ("", Error);
+
+ EXPECT_EQ("aber", Regex("a([0-9]+)b").sub("a\\g<100>b", "a1234ber", &Error));
+ EXPECT_EQ(Error, "invalid backreference string 'g<100>'");
}
TEST_F(RegexTest, IsLiteralERE) {
More information about the llvm-commits
mailing list