[llvm-commits] [llvm] r96503 - in /llvm/trunk: include/llvm/Support/Regex.h lib/Support/Regex.cpp unittests/Support/RegexTest.cpp
Daniel Dunbar
daniel at zuster.org
Wed Feb 17 12:08:43 PST 2010
Author: ddunbar
Date: Wed Feb 17 14:08:42 2010
New Revision: 96503
URL: http://llvm.org/viewvc/llvm-project?rev=96503&view=rev
Log:
Add Regex::sub, for doing regular expression substitution with backreferences.
Modified:
llvm/trunk/include/llvm/Support/Regex.h
llvm/trunk/lib/Support/Regex.cpp
llvm/trunk/unittests/Support/RegexTest.cpp
Modified: llvm/trunk/include/llvm/Support/Regex.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Support/Regex.h?rev=96503&r1=96502&r2=96503&view=diff
==============================================================================
--- llvm/trunk/include/llvm/Support/Regex.h (original)
+++ llvm/trunk/include/llvm/Support/Regex.h Wed Feb 17 14:08:42 2010
@@ -56,6 +56,19 @@
///
/// This returns true on a successful match.
bool match(const StringRef &String, SmallVectorImpl<StringRef> *Matches=0);
+
+ /// sub - Return the result of replacing the first match of the regex in
+ /// \arg String with the \arg Repl string. Backreferences like "\0" in the
+ /// replacement string are replaced with the appropriate match substring.
+ ///
+ /// Note that the replacement string has backslash escaping performed on
+ /// it. Invalid backreferences are ignored (replaced by empty strings).
+ ///
+ /// \param Error If non-null, any errors in the substitution (invalid
+ /// backreferences, trailing backslashes) will be recorded as a non-empty
+ /// string.
+ std::string sub(StringRef Repl, StringRef String, std::string *Error = 0);
+
private:
struct llvm_regex *preg;
int error;
Modified: llvm/trunk/lib/Support/Regex.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Support/Regex.cpp?rev=96503&r1=96502&r2=96503&view=diff
==============================================================================
--- llvm/trunk/lib/Support/Regex.cpp (original)
+++ llvm/trunk/lib/Support/Regex.cpp Wed Feb 17 14:08:42 2010
@@ -90,3 +90,79 @@
return true;
}
+
+std::string Regex::sub(StringRef Repl, StringRef String,
+ std::string *Error) {
+ SmallVector<StringRef, 8> Matches;
+
+ // Reset error, if given.
+ if (Error && !Error->empty()) *Error = "";
+
+ // Return the input if there was no match.
+ if (!match(String, &Matches))
+ return String;
+
+ // Otherwise splice in the replacement string, starting with the prefix before
+ // the match.
+ std::string Res(String.begin(), Matches[0].begin());
+
+ // Then the replacement string, honoring possible substitutions.
+ while (!Repl.empty()) {
+ // Skip to the next escape.
+ std::pair<StringRef, StringRef> Split = Repl.split('\\');
+
+ // Add the skipped substring.
+ Res += Split.first;
+
+ // Check for terminimation and trailing backslash.
+ if (Split.second.empty()) {
+ if (Repl.size() != Split.first.size() &&
+ Error && Error->empty())
+ *Error = "replacement string contained trailing backslash";
+ break;
+ }
+
+ // Otherwise update the replacement string and interpret escapes.
+ Repl = Split.second;
+
+ // FIXME: We should have a StringExtras function for mapping C99 escapes.
+ switch (Repl[0]) {
+ // Treat all unrecognized characters as self-quoting.
+ default:
+ Res += Repl[0];
+ Repl = Repl.substr(1);
+ break;
+
+ // Single character escapes.
+ case 't':
+ Res += '\t';
+ Repl = Repl.substr(1);
+ break;
+ case 'n':
+ Res += '\n';
+ Repl = Repl.substr(1);
+ break;
+
+ // Decimal escapes are backreferences.
+ case '0': case '1': case '2': case '3': case '4':
+ case '5': case '6': case '7': case '8': case '9': {
+ // Extract the backreference number.
+ StringRef Ref = Repl.slice(0, Repl.find_first_not_of("0123456789"));
+ Repl = Repl.substr(Ref.size());
+
+ unsigned RefValue;
+ if (!Ref.getAsInteger(10, RefValue) &&
+ RefValue < Matches.size())
+ Res += Matches[RefValue];
+ else if (Error && Error->empty())
+ *Error = "invalid backreference string '" + Ref.str() + "'";
+ break;
+ }
+ }
+ }
+
+ // And finally the suffix.
+ Res += StringRef(Matches[0].end(), String.end() - Matches[0].end());
+
+ return Res;
+}
Modified: llvm/trunk/unittests/Support/RegexTest.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/unittests/Support/RegexTest.cpp?rev=96503&r1=96502&r2=96503&view=diff
==============================================================================
--- llvm/trunk/unittests/Support/RegexTest.cpp (original)
+++ llvm/trunk/unittests/Support/RegexTest.cpp Wed Feb 17 14:08:42 2010
@@ -62,4 +62,33 @@
EXPECT_TRUE(r5.match(String));
}
+TEST_F(RegexTest, Substitution) {
+ std::string Error;
+
+ EXPECT_EQ("aNUMber", Regex("[0-9]+").sub("NUM", "a1234ber"));
+
+ // Standard Escapes
+ EXPECT_EQ("a\\ber", Regex("[0-9]+").sub("\\\\", "a1234ber", &Error));
+ EXPECT_EQ(Error, "");
+ EXPECT_EQ("a\nber", Regex("[0-9]+").sub("\\n", "a1234ber", &Error));
+ EXPECT_EQ(Error, "");
+ EXPECT_EQ("a\tber", Regex("[0-9]+").sub("\\t", "a1234ber", &Error));
+ EXPECT_EQ(Error, "");
+ EXPECT_EQ("ajber", Regex("[0-9]+").sub("\\j", "a1234ber", &Error));
+ EXPECT_EQ(Error, "");
+
+ EXPECT_EQ("aber", Regex("[0-9]+").sub("\\", "a1234ber", &Error));
+ EXPECT_EQ(Error, "replacement string contained trailing backslash");
+
+ // Backreferences
+ EXPECT_EQ("aa1234bber", Regex("a[0-9]+b").sub("a\\0b", "a1234ber", &Error));
+ EXPECT_EQ(Error, "");
+
+ EXPECT_EQ("a1234ber", Regex("a([0-9]+)b").sub("a\\1b", "a1234ber", &Error));
+ EXPECT_EQ(Error, "");
+
+ EXPECT_EQ("aber", Regex("a[0-9]+b").sub("a\\100b", "a1234ber", &Error));
+ EXPECT_EQ(Error, "invalid backreference string '100'");
+}
+
}
More information about the llvm-commits
mailing list