[libcxx-commits] [PATCH] D62453: Regex backreference [3/3] Validate backreferences in the constructor.
Mark de Wever via Phabricator via libcxx-commits
libcxx-commits at lists.llvm.org
Thu Feb 20 12:19:54 PST 2020
Mordante updated this revision to Diff 245714.
Mordante added a comment.
Rebased as requested.
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D62453/new/
https://reviews.llvm.org/D62453
Files:
libcxx/include/regex
libcxx/test/std/re/re.regex/re.regex.construct/bad_backref.pass.cpp
Index: libcxx/test/std/re/re.regex/re.regex.construct/bad_backref.pass.cpp
===================================================================
--- libcxx/test/std/re/re.regex/re.regex.construct/bad_backref.pass.cpp
+++ libcxx/test/std/re/re.regex/re.regex.construct/bad_backref.pass.cpp
@@ -18,11 +18,11 @@
#include <cassert>
#include "test_macros.h"
-static bool error_badbackref_thrown(const char *pat)
+static bool error_badbackref_thrown(const char *pat, std::regex::flag_type f)
{
bool result = false;
try {
- std::regex re(pat);
+ std::regex re(pat, f);
} catch (const std::regex_error &ex) {
result = (ex.code() == std::regex_constants::error_backref);
}
@@ -31,9 +31,25 @@
int main(int, char**)
{
- assert(error_badbackref_thrown("\\1abc")); // no references
- assert(error_badbackref_thrown("ab(c)\\2def")); // only one reference
- assert(error_badbackref_thrown("\\800000000000000000000000000000")); // overflows
+// no references
+ assert(error_badbackref_thrown("\\1abc", std::regex_constants::ECMAScript));
+ assert(error_badbackref_thrown("\\1abd", std::regex::basic));
+ assert(error_badbackref_thrown("\\1abd", std::regex::extended));
+ assert(error_badbackref_thrown("\\1abd", std::regex::awk) == false);
+ assert(error_badbackref_thrown("\\1abd", std::regex::grep));
+ assert(error_badbackref_thrown("\\1abd", std::regex::egrep));
+
+// only one reference
+ assert(error_badbackref_thrown("ab(c)\\2def", std::regex_constants::ECMAScript));
+ assert(error_badbackref_thrown("ab\\(c\\)\\2def", std::regex_constants::basic));
+ assert(error_badbackref_thrown("ab(c)\\2def", std::regex_constants::extended));
+ assert(error_badbackref_thrown("ab\\(c\\)\\2def", std::regex_constants::awk) == false);
+ assert(error_badbackref_thrown("ab(c)\\2def", std::regex_constants::awk) == false);
+ assert(error_badbackref_thrown("ab\\(c\\)\\2def", std::regex_constants::grep));
+ assert(error_badbackref_thrown("ab(c)\\2def", std::regex_constants::egrep));
+
+
+ assert(error_badbackref_thrown("\\800000000000000000000000000000", std::regex_constants::ECMAScript)); // overflows
// this should NOT throw, because we only should look at the '1'
// See https://bugs.llvm.org/show_bug.cgi?id=31387
@@ -42,5 +58,35 @@
std::regex re(pat1, pat1 + 7); // extra chars after the end.
}
+// reference before group
+ assert(error_badbackref_thrown("\\1(abc)", std::regex_constants::ECMAScript));
+ assert(error_badbackref_thrown("\\1\\(abd\\)", std::regex::basic));
+ assert(error_badbackref_thrown("\\1(abd)", std::regex::extended));
+ assert(error_badbackref_thrown("\\1(abd)", std::regex::awk) == false);
+ assert(error_badbackref_thrown("\\1\\(abd\\)", std::regex::awk) == false);
+ assert(error_badbackref_thrown("\\1\\(abd\\)", std::regex::grep));
+ assert(error_badbackref_thrown("\\1(abd)", std::regex::egrep));
+
+// reference limit
+ assert(error_badbackref_thrown("(cat)\\10", std::regex::ECMAScript));
+ assert(error_badbackref_thrown("\\(cat\\)\\10", std::regex::basic) == false);
+ assert(error_badbackref_thrown("(cat)\\10", std::regex::extended) == false);
+ assert(error_badbackref_thrown("\\(cat\\)\\10", std::regex::awk) == false);
+ assert(error_badbackref_thrown("(cat)\\10", std::regex::awk) == false);
+ assert(error_badbackref_thrown("\\(cat\\)\\10", std::regex::grep) == false);
+ assert(error_badbackref_thrown("(cat)\\10", std::regex::egrep) == false);
+
+// https://bugs.llvm.org/show_bug.cgi?id=34297
+ assert(error_badbackref_thrown("(cat)\\1", std::regex::basic));
+ assert(error_badbackref_thrown("\\(cat\\)\\1", std::regex::basic) == false);
+ assert(error_badbackref_thrown("(cat)\\1", std::regex::extended) == false);
+ assert(error_badbackref_thrown("\\(cat\\)\\1", std::regex::extended));
+ assert(error_badbackref_thrown("(cat)\\1", std::regex::awk) == false);
+ assert(error_badbackref_thrown("\\(cat\\)\\1", std::regex::awk) == false);
+ assert(error_badbackref_thrown("(cat)\\1", std::regex::grep));
+ assert(error_badbackref_thrown("\\(cat\\)\\1", std::regex::grep) == false);
+ assert(error_badbackref_thrown("(cat)\\1", std::regex::egrep) == false);
+ assert(error_badbackref_thrown("\\(cat\\)\\1", std::regex::egrep));
+
return 0;
}
Index: libcxx/include/regex
===================================================================
--- libcxx/include/regex
+++ libcxx/include/regex
@@ -4661,6 +4661,8 @@
unsigned __val = __traits_.value(c, 10);
if (__val >= 1 && __val <= 9)
{
+ if (__val > mark_count())
+ __throw_regex_error<regex_constants::error_backref>();
__push_back_ref(__val);
return true;
}
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D62453.245714.patch
Type: text/x-patch
Size: 4798 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/libcxx-commits/attachments/20200220/b6e681ad/attachment.bin>
More information about the libcxx-commits
mailing list