[llvm] 28b7e28 - [Support] Add `\g<ref>` backreferences in Regex::sub() (#67220)

via llvm-commits llvm-commits at lists.llvm.org
Wed Oct 11 10:24:50 PDT 2023


Author: Igor Kudrin
Date: 2023-10-12T00:24:45+07:00
New Revision: 28b7e281d4eaea0d5d56b1a4cf7a550be746a007

URL: https://github.com/llvm/llvm-project/commit/28b7e281d4eaea0d5d56b1a4cf7a550be746a007
DIFF: https://github.com/llvm/llvm-project/commit/28b7e281d4eaea0d5d56b1a4cf7a550be746a007.diff

LOG: [Support] Add `\g<ref>` backreferences in Regex::sub() (#67220)

The existing format of backreferences, `\<ref>`, does not allow digits
to be placed directly after the reference because they are included in
the reference number. The new format solves this problem by adding
explicit delimiters.

Added: 
    

Modified: 
    llvm/include/llvm/Support/Regex.h
    llvm/lib/Support/Regex.cpp
    llvm/unittests/Support/RegexTest.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/Support/Regex.h b/llvm/include/llvm/Support/Regex.h
index ae4b9516f194e3a..bb7a8009b6bd0c3 100644
--- a/llvm/include/llvm/Support/Regex.h
+++ b/llvm/include/llvm/Support/Regex.h
@@ -85,8 +85,9 @@ namespace llvm {
                std::string *Error = nullptr) const;
 
     /// sub - Return the result of replacing the first match of the regex in
-    /// \p String with the \p Repl string. Backreferences like "\0" in the
-    /// replacement string are replaced with the appropriate match substring.
+    /// \p String with the \p Repl string. Backreferences like "\0" and "\g<1>"
+    /// in the replacement string are replaced with the appropriate match
+    /// substring.
     ///
     /// Note that the replacement string has backslash escaping performed on
     /// it. Invalid backreferences are ignored (replaced by empty strings).

diff  --git a/llvm/lib/Support/Regex.cpp b/llvm/lib/Support/Regex.cpp
index dfbd373e4a98096..8fa71a749cc8e10 100644
--- a/llvm/lib/Support/Regex.cpp
+++ b/llvm/lib/Support/Regex.cpp
@@ -163,6 +163,25 @@ std::string Regex::sub(StringRef Repl, StringRef String,
 
     // FIXME: We should have a StringExtras function for mapping C99 escapes.
     switch (Repl[0]) {
+
+      // Backreference with the "\g<ref>" syntax
+    case 'g':
+      if (Repl.size() >= 4 && Repl[1] == '<') {
+        size_t End = Repl.find('>');
+        StringRef Ref = Repl.slice(2, End);
+        unsigned RefValue;
+        if (End != StringRef::npos && !Ref.getAsInteger(10, RefValue)) {
+          Repl = Repl.substr(End + 1);
+          if (RefValue < Matches.size())
+            Res += Matches[RefValue];
+          else if (Error && Error->empty())
+            *Error =
+                ("invalid backreference string 'g<" + Twine(Ref) + ">'").str();
+          break;
+        }
+      }
+      [[fallthrough]];
+
       // Treat all unrecognized characters as self-quoting.
     default:
       Res += Repl[0];

diff  --git a/llvm/unittests/Support/RegexTest.cpp b/llvm/unittests/Support/RegexTest.cpp
index 78f37cdbd1ef89e..e3c721b466c6ccd 100644
--- a/llvm/unittests/Support/RegexTest.cpp
+++ b/llvm/unittests/Support/RegexTest.cpp
@@ -127,6 +127,34 @@ TEST_F(RegexTest, Substitution) {
 
   EXPECT_EQ("aber", Regex("a[0-9]+b").sub("a\\100b", "a1234ber", &Error));
   EXPECT_EQ(Error, "invalid backreference string '100'");
+
+  EXPECT_EQ("012345", Regex("a([0-9]+).*").sub("0\\g<1>5", "a1234ber", &Error));
+  EXPECT_EQ("", Error);
+
+  EXPECT_EQ("0a1234ber5",
+            Regex("a([0-9]+).*").sub("0\\g<0>5", "a1234ber", &Error));
+  EXPECT_EQ("", Error);
+
+  EXPECT_EQ("0A5", Regex("a(.)(.)(.)(.)(.)(.)(.)(.)(.)(.).*")
+                       .sub("0\\g<10>5", "a123456789Aber", &Error));
+  EXPECT_EQ("", Error);
+
+  EXPECT_EQ("0g<-1>5",
+            Regex("a([0-9]+).*").sub("0\\g<-1>5", "a1234ber", &Error));
+  EXPECT_EQ("", Error);
+
+  EXPECT_EQ("0g<15", Regex("a([0-9]+).*").sub("0\\g<15", "a1234ber", &Error));
+  EXPECT_EQ("", Error);
+
+  EXPECT_EQ("0g<>15", Regex("a([0-9]+).*").sub("0\\g<>15", "a1234ber", &Error));
+  EXPECT_EQ("", Error);
+
+  EXPECT_EQ("0g<3e>1",
+            Regex("a([0-9]+).*").sub("0\\g<3e>1", "a1234ber", &Error));
+  EXPECT_EQ("", Error);
+
+  EXPECT_EQ("aber", Regex("a([0-9]+)b").sub("a\\g<100>b", "a1234ber", &Error));
+  EXPECT_EQ(Error, "invalid backreference string 'g<100>'");
 }
 
 TEST_F(RegexTest, IsLiteralERE) {


        


More information about the llvm-commits mailing list