[llvm] [Support] Add `\{<ref>}` backreferences in Regex::sub(). (PR #67220)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Sep 22 23:06:44 PDT 2023
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-support
<details>
<summary>Changes</summary>
The existing format of backreferences, `\<ref>`, does not allow digits to be placed directly after the reference because they are included in the reference number. The new format solves this problem by adding explicit delimiters.
---
Full diff: https://github.com/llvm/llvm-project/pull/67220.diff
3 Files Affected:
- (modified) llvm/include/llvm/Support/Regex.h (+3-2)
- (modified) llvm/lib/Support/Regex.cpp (+19)
- (modified) llvm/unittests/Support/RegexTest.cpp (+15)
``````````diff
diff --git a/llvm/include/llvm/Support/Regex.h b/llvm/include/llvm/Support/Regex.h
index ae4b9516f194e3a..1df768c53dd3766 100644
--- a/llvm/include/llvm/Support/Regex.h
+++ b/llvm/include/llvm/Support/Regex.h
@@ -85,8 +85,9 @@ namespace llvm {
std::string *Error = nullptr) const;
/// sub - Return the result of replacing the first match of the regex in
- /// \p String with the \p Repl string. Backreferences like "\0" in the
- /// replacement string are replaced with the appropriate match substring.
+ /// \p String with the \p Repl string. Backreferences like "\0" and "\{1}"
+ /// in the replacement string are replaced with the appropriate match
+ /// substring.
///
/// Note that the replacement string has backslash escaping performed on
/// it. Invalid backreferences are ignored (replaced by empty strings).
diff --git a/llvm/lib/Support/Regex.cpp b/llvm/lib/Support/Regex.cpp
index dfbd373e4a98096..79ae13bcf57ad86 100644
--- a/llvm/lib/Support/Regex.cpp
+++ b/llvm/lib/Support/Regex.cpp
@@ -179,6 +179,25 @@ std::string Regex::sub(StringRef Repl, StringRef String,
Repl = Repl.substr(1);
break;
+ // Backreference in curly braces
+ case '{': {
+ size_t End = Repl.find('}');
+ StringRef Ref = Repl.slice(1, End);
+ unsigned RefValue;
+ if (End == StringRef::npos || Ref.getAsInteger(10, RefValue)) {
+ // Fallback to the pre-existing behavior
+ Res += '{';
+ Repl = Repl.substr(1);
+ break;
+ }
+ Repl = Repl.substr(End + 1);
+ if (RefValue < Matches.size())
+ Res += Matches[RefValue];
+ else if (Error && Error->empty())
+ *Error = ("invalid backreference string '{" + Twine(Ref) + "}'").str();
+ break;
+ }
+
// Decimal escapes are backreferences.
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9': {
diff --git a/llvm/unittests/Support/RegexTest.cpp b/llvm/unittests/Support/RegexTest.cpp
index 78f37cdbd1ef89e..8dc686652b9a3d4 100644
--- a/llvm/unittests/Support/RegexTest.cpp
+++ b/llvm/unittests/Support/RegexTest.cpp
@@ -127,6 +127,21 @@ TEST_F(RegexTest, Substitution) {
EXPECT_EQ("aber", Regex("a[0-9]+b").sub("a\\100b", "a1234ber", &Error));
EXPECT_EQ(Error, "invalid backreference string '100'");
+
+ EXPECT_EQ("012345", Regex("a([0-9]+).*").sub("0\\{1}5", "a1234ber", &Error));
+ EXPECT_EQ("", Error);
+
+ EXPECT_EQ("0{15", Regex("a([0-9]+).*").sub("0\\{15", "a1234ber", &Error));
+ EXPECT_EQ("", Error);
+
+ EXPECT_EQ("0{}15", Regex("a([0-9]+).*").sub("0\\{}15", "a1234ber", &Error));
+ EXPECT_EQ("", Error);
+
+ EXPECT_EQ("0{3e}1", Regex("a([0-9]+).*").sub("0\\{3e}1", "a1234ber", &Error));
+ EXPECT_EQ("", Error);
+
+ EXPECT_EQ("aber", Regex("a([0-9]+)b").sub("a\\{100}b", "a1234ber", &Error));
+ EXPECT_EQ(Error, "invalid backreference string '{100}'");
}
TEST_F(RegexTest, IsLiteralERE) {
``````````
</details>
https://github.com/llvm/llvm-project/pull/67220
More information about the llvm-commits
mailing list