[libcxx-commits] [libcxx] 72ce0c8 - [libc++][regex] Validate backreferences in the constructor.

Louis Dionne via libcxx-commits libcxx-commits at lists.llvm.org
Thu Feb 20 15:21:35 PST 2020


Author: Mark de Wever
Date: 2020-02-20T18:16:21-05:00
New Revision: 72ce0c8073bc8dfa109d25a3c5f245ee0285568c

URL: https://github.com/llvm/llvm-project/commit/72ce0c8073bc8dfa109d25a3c5f245ee0285568c
DIFF: https://github.com/llvm/llvm-project/commit/72ce0c8073bc8dfa109d25a3c5f245ee0285568c.diff

LOG: [libc++][regex] Validate backreferences in the constructor.

This patch enables throwing exceptions for invalid backreferences
in the constructor when using the basic, extended,  grep, or egrep grammar.

This fixes bug 34297.

Differential Revision: https://reviews.llvm.org/D62453

Added: 
    

Modified: 
    libcxx/include/regex
    libcxx/test/std/re/re.regex/re.regex.construct/bad_backref.pass.cpp

Removed: 
    


################################################################################
diff  --git a/libcxx/include/regex b/libcxx/include/regex
index e349fa6846ae..68cfbb284347 100644
--- a/libcxx/include/regex
+++ b/libcxx/include/regex
@@ -4661,6 +4661,8 @@ basic_regex<_CharT, _Traits>::__test_back_ref(_CharT c)
     unsigned __val = __traits_.value(c, 10);
     if (__val >= 1 && __val <= 9)
     {
+        if (__val > mark_count())
+            __throw_regex_error<regex_constants::error_backref>();
         __push_back_ref(__val);
         return true;
     }

diff  --git a/libcxx/test/std/re/re.regex/re.regex.construct/bad_backref.pass.cpp b/libcxx/test/std/re/re.regex/re.regex.construct/bad_backref.pass.cpp
index 0a15b6453c8d..08a14442e4e3 100644
--- a/libcxx/test/std/re/re.regex/re.regex.construct/bad_backref.pass.cpp
+++ b/libcxx/test/std/re/re.regex/re.regex.construct/bad_backref.pass.cpp
@@ -18,11 +18,11 @@
 #include <cassert>
 #include "test_macros.h"
 
-static bool error_badbackref_thrown(const char *pat)
+static bool error_badbackref_thrown(const char *pat, std::regex::flag_type f)
 {
     bool result = false;
     try {
-        std::regex re(pat);
+        std::regex re(pat, f);
     } catch (const std::regex_error &ex) {
         result = (ex.code() == std::regex_constants::error_backref);
     }
@@ -31,9 +31,25 @@ static bool error_badbackref_thrown(const char *pat)
 
 int main(int, char**)
 {
-    assert(error_badbackref_thrown("\\1abc"));      // no references
-    assert(error_badbackref_thrown("ab(c)\\2def")); // only one reference
-    assert(error_badbackref_thrown("\\800000000000000000000000000000")); // overflows
+//  no references
+    assert(error_badbackref_thrown("\\1abc", std::regex_constants::ECMAScript));
+    assert(error_badbackref_thrown("\\1abd", std::regex::basic));
+    assert(error_badbackref_thrown("\\1abd", std::regex::extended));
+    assert(error_badbackref_thrown("\\1abd", std::regex::awk) == false);
+    assert(error_badbackref_thrown("\\1abd", std::regex::grep));
+    assert(error_badbackref_thrown("\\1abd", std::regex::egrep));
+
+//  only one reference
+    assert(error_badbackref_thrown("ab(c)\\2def", std::regex_constants::ECMAScript));
+    assert(error_badbackref_thrown("ab\\(c\\)\\2def", std::regex_constants::basic));
+    assert(error_badbackref_thrown("ab(c)\\2def", std::regex_constants::extended));
+    assert(error_badbackref_thrown("ab\\(c\\)\\2def", std::regex_constants::awk) == false);
+    assert(error_badbackref_thrown("ab(c)\\2def", std::regex_constants::awk) == false);
+    assert(error_badbackref_thrown("ab\\(c\\)\\2def", std::regex_constants::grep));
+    assert(error_badbackref_thrown("ab(c)\\2def", std::regex_constants::egrep));
+
+
+    assert(error_badbackref_thrown("\\800000000000000000000000000000", std::regex_constants::ECMAScript)); // overflows
 
 //  this should NOT throw, because we only should look at the '1'
 //  See https://bugs.llvm.org/show_bug.cgi?id=31387
@@ -42,5 +58,35 @@ int main(int, char**)
     std::regex re(pat1, pat1 + 7); // extra chars after the end.
     }
 
+//  reference before group
+    assert(error_badbackref_thrown("\\1(abc)", std::regex_constants::ECMAScript));
+    assert(error_badbackref_thrown("\\1\\(abd\\)", std::regex::basic));
+    assert(error_badbackref_thrown("\\1(abd)", std::regex::extended));
+    assert(error_badbackref_thrown("\\1(abd)", std::regex::awk) == false);
+    assert(error_badbackref_thrown("\\1\\(abd\\)", std::regex::awk) == false);
+    assert(error_badbackref_thrown("\\1\\(abd\\)", std::regex::grep));
+    assert(error_badbackref_thrown("\\1(abd)", std::regex::egrep));
+
+//  reference limit
+    assert(error_badbackref_thrown("(cat)\\10", std::regex::ECMAScript));
+    assert(error_badbackref_thrown("\\(cat\\)\\10", std::regex::basic) == false);
+    assert(error_badbackref_thrown("(cat)\\10", std::regex::extended) == false);
+    assert(error_badbackref_thrown("\\(cat\\)\\10", std::regex::awk) == false);
+    assert(error_badbackref_thrown("(cat)\\10", std::regex::awk) == false);
+    assert(error_badbackref_thrown("\\(cat\\)\\10", std::regex::grep) == false);
+    assert(error_badbackref_thrown("(cat)\\10", std::regex::egrep) == false);
+
+//  https://bugs.llvm.org/show_bug.cgi?id=34297
+    assert(error_badbackref_thrown("(cat)\\1", std::regex::basic));
+    assert(error_badbackref_thrown("\\(cat\\)\\1", std::regex::basic) == false);
+    assert(error_badbackref_thrown("(cat)\\1", std::regex::extended) == false);
+    assert(error_badbackref_thrown("\\(cat\\)\\1", std::regex::extended));
+    assert(error_badbackref_thrown("(cat)\\1", std::regex::awk) == false);
+    assert(error_badbackref_thrown("\\(cat\\)\\1", std::regex::awk) == false);
+    assert(error_badbackref_thrown("(cat)\\1", std::regex::grep));
+    assert(error_badbackref_thrown("\\(cat\\)\\1", std::regex::grep) == false);
+    assert(error_badbackref_thrown("(cat)\\1", std::regex::egrep) == false);
+    assert(error_badbackref_thrown("\\(cat\\)\\1", std::regex::egrep));
+
   return 0;
 }


        


More information about the libcxx-commits mailing list