[libcxx-commits] [PATCH] D62453: Regex backreference [3/3] Validate backreferences in the constructor.

Mark de Wever via Phabricator via libcxx-commits libcxx-commits at lists.llvm.org
Thu Feb 20 12:19:54 PST 2020


Mordante updated this revision to Diff 245714.
Mordante added a comment.

Rebased as requested.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D62453/new/

https://reviews.llvm.org/D62453

Files:
  libcxx/include/regex
  libcxx/test/std/re/re.regex/re.regex.construct/bad_backref.pass.cpp


Index: libcxx/test/std/re/re.regex/re.regex.construct/bad_backref.pass.cpp
===================================================================
--- libcxx/test/std/re/re.regex/re.regex.construct/bad_backref.pass.cpp
+++ libcxx/test/std/re/re.regex/re.regex.construct/bad_backref.pass.cpp
@@ -18,11 +18,11 @@
 #include <cassert>
 #include "test_macros.h"
 
-static bool error_badbackref_thrown(const char *pat)
+static bool error_badbackref_thrown(const char *pat, std::regex::flag_type f)
 {
     bool result = false;
     try {
-        std::regex re(pat);
+        std::regex re(pat, f);
     } catch (const std::regex_error &ex) {
         result = (ex.code() == std::regex_constants::error_backref);
     }
@@ -31,9 +31,25 @@
 
 int main(int, char**)
 {
-    assert(error_badbackref_thrown("\\1abc"));      // no references
-    assert(error_badbackref_thrown("ab(c)\\2def")); // only one reference
-    assert(error_badbackref_thrown("\\800000000000000000000000000000")); // overflows
+//  no references
+    assert(error_badbackref_thrown("\\1abc", std::regex_constants::ECMAScript));
+    assert(error_badbackref_thrown("\\1abd", std::regex::basic));
+    assert(error_badbackref_thrown("\\1abd", std::regex::extended));
+    assert(error_badbackref_thrown("\\1abd", std::regex::awk) == false);
+    assert(error_badbackref_thrown("\\1abd", std::regex::grep));
+    assert(error_badbackref_thrown("\\1abd", std::regex::egrep));
+
+//  only one reference
+    assert(error_badbackref_thrown("ab(c)\\2def", std::regex_constants::ECMAScript));
+    assert(error_badbackref_thrown("ab\\(c\\)\\2def", std::regex_constants::basic));
+    assert(error_badbackref_thrown("ab(c)\\2def", std::regex_constants::extended));
+    assert(error_badbackref_thrown("ab\\(c\\)\\2def", std::regex_constants::awk) == false);
+    assert(error_badbackref_thrown("ab(c)\\2def", std::regex_constants::awk) == false);
+    assert(error_badbackref_thrown("ab\\(c\\)\\2def", std::regex_constants::grep));
+    assert(error_badbackref_thrown("ab(c)\\2def", std::regex_constants::egrep));
+
+
+    assert(error_badbackref_thrown("\\800000000000000000000000000000", std::regex_constants::ECMAScript)); // overflows
 
 //  this should NOT throw, because we only should look at the '1'
 //  See https://bugs.llvm.org/show_bug.cgi?id=31387
@@ -42,5 +58,35 @@
     std::regex re(pat1, pat1 + 7); // extra chars after the end.
     }
 
+//  reference before group
+    assert(error_badbackref_thrown("\\1(abc)", std::regex_constants::ECMAScript));
+    assert(error_badbackref_thrown("\\1\\(abd\\)", std::regex::basic));
+    assert(error_badbackref_thrown("\\1(abd)", std::regex::extended));
+    assert(error_badbackref_thrown("\\1(abd)", std::regex::awk) == false);
+    assert(error_badbackref_thrown("\\1\\(abd\\)", std::regex::awk) == false);
+    assert(error_badbackref_thrown("\\1\\(abd\\)", std::regex::grep));
+    assert(error_badbackref_thrown("\\1(abd)", std::regex::egrep));
+
+//  reference limit
+    assert(error_badbackref_thrown("(cat)\\10", std::regex::ECMAScript));
+    assert(error_badbackref_thrown("\\(cat\\)\\10", std::regex::basic) == false);
+    assert(error_badbackref_thrown("(cat)\\10", std::regex::extended) == false);
+    assert(error_badbackref_thrown("\\(cat\\)\\10", std::regex::awk) == false);
+    assert(error_badbackref_thrown("(cat)\\10", std::regex::awk) == false);
+    assert(error_badbackref_thrown("\\(cat\\)\\10", std::regex::grep) == false);
+    assert(error_badbackref_thrown("(cat)\\10", std::regex::egrep) == false);
+
+//  https://bugs.llvm.org/show_bug.cgi?id=34297
+    assert(error_badbackref_thrown("(cat)\\1", std::regex::basic));
+    assert(error_badbackref_thrown("\\(cat\\)\\1", std::regex::basic) == false);
+    assert(error_badbackref_thrown("(cat)\\1", std::regex::extended) == false);
+    assert(error_badbackref_thrown("\\(cat\\)\\1", std::regex::extended));
+    assert(error_badbackref_thrown("(cat)\\1", std::regex::awk) == false);
+    assert(error_badbackref_thrown("\\(cat\\)\\1", std::regex::awk) == false);
+    assert(error_badbackref_thrown("(cat)\\1", std::regex::grep));
+    assert(error_badbackref_thrown("\\(cat\\)\\1", std::regex::grep) == false);
+    assert(error_badbackref_thrown("(cat)\\1", std::regex::egrep) == false);
+    assert(error_badbackref_thrown("\\(cat\\)\\1", std::regex::egrep));
+
   return 0;
 }
Index: libcxx/include/regex
===================================================================
--- libcxx/include/regex
+++ libcxx/include/regex
@@ -4661,6 +4661,8 @@
     unsigned __val = __traits_.value(c, 10);
     if (__val >= 1 && __val <= 9)
     {
+        if (__val > mark_count())
+            __throw_regex_error<regex_constants::error_backref>();
         __push_back_ref(__val);
         return true;
     }


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D62453.245714.patch
Type: text/x-patch
Size: 4798 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/libcxx-commits/attachments/20200220/b6e681ad/attachment.bin>


More information about the libcxx-commits mailing list