[libcxx] r186954 - Bill Fisher: This patch fixes a bug where std::regex in ECMAScript mode was ignoring capture groups inside lookahead assertions.

Howard Hinnant hhinnant at apple.com
Tue Jul 23 09:18:04 PDT 2013


Author: hhinnant
Date: Tue Jul 23 11:18:04 2013
New Revision: 186954

URL: http://llvm.org/viewvc/llvm-project?rev=186954&view=rev
Log:
Bill Fisher: This patch fixes a bug where std::regex in ECMAScript mode was ignoring capture groups inside lookahead assertions. 

For example, matching /(?=(a))(a)/ to "a" should yield two captures: \1 = "a", \2 = "a"

Added:
    libcxx/trunk/test/re/re.alg/re.alg.match/lookahead_capture.pass.cpp
Modified:
    libcxx/trunk/include/regex

Modified: libcxx/trunk/include/regex
URL: http://llvm.org/viewvc/llvm-project/libcxx/trunk/include/regex?rev=186954&r1=186953&r2=186954&view=diff
==============================================================================
--- libcxx/trunk/include/regex (original)
+++ libcxx/trunk/include/regex Tue Jul 23 11:18:04 2013
@@ -2769,7 +2769,7 @@ private:
     void __push_end_marked_subexpression(unsigned);
     void __push_empty();
     void __push_word_boundary(bool);
-    void __push_lookahead(const basic_regex&, bool);
+    void __push_lookahead(const basic_regex&, bool, unsigned);
 
     template <class _Allocator>
         bool
@@ -2907,6 +2907,7 @@ class __lookahead
     typedef __owns_one_state<_CharT> base;
 
     basic_regex<_CharT, _Traits> __exp_;
+    unsigned __mexp_;
     bool __invert_;
 
     __lookahead(const __lookahead&);
@@ -2915,8 +2916,8 @@ public:
     typedef _VSTD::__state<_CharT> __state;
 
     _LIBCPP_INLINE_VISIBILITY
-    __lookahead(const basic_regex<_CharT, _Traits>& __exp, bool __invert, __node<_CharT>* __s)
-        : base(__s), __exp_(__exp), __invert_(__invert) {}
+    __lookahead(const basic_regex<_CharT, _Traits>& __exp, bool __invert, __node<_CharT>* __s, unsigned __mexp)
+        : base(__s), __exp_(__exp), __invert_(__invert), __mexp_(__mexp) {}
 
     virtual void __exec(__state&) const;
 };
@@ -2935,6 +2936,9 @@ __lookahead<_CharT, _Traits>::__exec(__s
     {
         __s.__do_ = __state::__accept_but_not_consume;
         __s.__node_ = this->first();
+        for (unsigned __i = 1; __i < __m.size(); ++__i) {
+            __s.__sub_matches_[__mexp_ + __i - 1] = __m.__matches_[__i];
+        }
     }
     else
     {
@@ -4168,7 +4172,9 @@ basic_regex<_CharT, _Traits>::__parse_as
                                 basic_regex __exp;
                                 __exp.__flags_ = __flags_;
                                 __temp = __exp.__parse(++__temp, __last);
-                                __push_lookahead(_VSTD::move(__exp), false);
+                                unsigned __mexp = __exp.__marked_count_;
+                                __push_lookahead(_VSTD::move(__exp), false, __marked_count_);
+                                __marked_count_ += __mexp;
 #ifndef _LIBCPP_NO_EXCEPTIONS
                                 if (__temp == __last || *__temp != ')')
                                     throw regex_error(regex_constants::error_paren);
@@ -4181,7 +4187,9 @@ basic_regex<_CharT, _Traits>::__parse_as
                                 basic_regex __exp;
                                 __exp.__flags_ = __flags_;
                                 __temp = __exp.__parse(++__temp, __last);
-                                __push_lookahead(_VSTD::move(__exp), true);
+                                unsigned __mexp = __exp.__marked_count_;
+                                __push_lookahead(_VSTD::move(__exp), true, __marked_count_);
+                                __marked_count_ += __mexp;
 #ifndef _LIBCPP_NO_EXCEPTIONS
                                 if (__temp == __last || *__temp != ')')
                                     throw regex_error(regex_constants::error_paren);
@@ -4759,10 +4767,11 @@ basic_regex<_CharT, _Traits>::__start_ma
 template <class _CharT, class _Traits>
 void
 basic_regex<_CharT, _Traits>::__push_lookahead(const basic_regex& __exp,
-                                               bool __invert)
+                                               bool __invert,
+                                               unsigned __mexp)
 {
     __end_->first() = new __lookahead<_CharT, _Traits>(__exp, __invert,
-                                                           __end_->first());
+                                                           __end_->first(), __mexp);
     __end_ = static_cast<__owns_one_state<_CharT>*>(__end_->first());
 }
 

Added: libcxx/trunk/test/re/re.alg/re.alg.match/lookahead_capture.pass.cpp
URL: http://llvm.org/viewvc/llvm-project/libcxx/trunk/test/re/re.alg/re.alg.match/lookahead_capture.pass.cpp?rev=186954&view=auto
==============================================================================
--- libcxx/trunk/test/re/re.alg/re.alg.match/lookahead_capture.pass.cpp (added)
+++ libcxx/trunk/test/re/re.alg/re.alg.match/lookahead_capture.pass.cpp Tue Jul 23 11:18:04 2013
@@ -0,0 +1,98 @@
+//===----------------------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+// <regex>
+
+// template <class BidirectionalIterator, class Allocator, class charT, class traits>
+//     bool
+//     regex_match(BidirectionalIterator first, BidirectionalIterator last,
+//                  match_results<BidirectionalIterator, Allocator>& m,
+//                  const basic_regex<charT, traits>& e,
+//                  regex_constants::match_flag_type flags = regex_constants::match_default);
+
+// std::regex in ECMAScript mode should not ignore capture groups inside lookahead assertions. 
+// For example, matching /(?=(a))(a)/ to "a" should yield two captures: \1 = "a", \2 = "a"
+
+#include <regex>
+#include <cassert>
+
+#include "test_iterators.h"
+
+int main()
+{
+    {
+        std::regex re{"^(?=(.))a$"};
+        assert(re.mark_count() == 1);
+
+        std::string s{"a"};
+        std::smatch m;
+        assert(std::regex_match(s, m, re));
+        assert(m.size() == 2);
+        assert(m[0] == "a");
+        assert(m[1] == "a");
+    }
+
+    {
+        std::regex re{"^(a)(?=(.))(b)$"};
+        assert(re.mark_count() == 3);
+
+        std::string s{"ab"};
+        std::smatch m;
+        assert(std::regex_match(s, m, re));
+        assert(m.size() == 4);
+        assert(m[0] == "ab");
+        assert(m[1] == "a");
+        assert(m[2] == "b");
+        assert(m[3] == "b");
+    }
+
+    {
+        std::regex re{"^(.)(?=(.)(?=.(.)))(...)$"};
+        assert(re.mark_count() == 4);
+
+        std::string s{"abcd"};
+        std::smatch m;
+        assert(std::regex_match(s, m, re));
+        assert(m.size() == 5);
+        assert(m[0] == "abcd");
+        assert(m[1] == "a");
+        assert(m[2] == "b");
+        assert(m[3] == "d");
+        assert(m[4] == "bcd");
+    }
+
+    {
+        std::regex re{"^(a)(?!([^b]))(.c)$"};
+        assert(re.mark_count() == 3);
+
+        std::string s{"abc"};
+        std::smatch m;
+        assert(std::regex_match(s, m, re));
+        assert(m.size() == 4);
+        assert(m[0] == "abc");
+        assert(m[1] == "a");
+        assert(m[2] == "");
+        assert(m[3] == "bc");
+    }
+
+    {
+        std::regex re{"^(?!((b)))(?=(.))(?!(abc)).b$"};
+        assert(re.mark_count() == 4);
+
+        std::string s{"ab"};
+        std::smatch m;
+        assert(std::regex_match(s, m, re));
+        assert(m.size() == 5);
+        assert(m[0] == "ab");
+        assert(m[1] == "");
+        assert(m[2] == "");
+        assert(m[3] == "a");
+        assert(m[4] == "");
+    }
+}





More information about the cfe-commits mailing list