[libcxx-commits] [libcxx] 3abaf6c - [libc++] Implements multiline regex support.

Mark de Wever via libcxx-commits libcxx-commits at lists.llvm.org
Wed Nov 18 09:17:55 PST 2020


Author: Mark de Wever
Date: 2020-11-18T18:17:36+01:00
New Revision: 3abaf6cde7a92f38db2f5b3cb87e653f89f3bd26

URL: https://github.com/llvm/llvm-project/commit/3abaf6cde7a92f38db2f5b3cb87e653f89f3bd26
DIFF: https://github.com/llvm/llvm-project/commit/3abaf6cde7a92f38db2f5b3cb87e653f89f3bd26.diff

LOG: [libc++] Implements multiline regex support.

This resolves LWG2503.

Added: 
    libcxx/test/std/re/re.const/re.matchflag/match_multiline.pass.cpp

Modified: 
    libcxx/include/regex
    libcxx/test/std/re/re.const/re.synopt/syntax_option_type.pass.cpp
    libcxx/www/cxx1z_status.html

Removed: 
    


################################################################################
diff  --git a/libcxx/include/regex b/libcxx/include/regex
index f42f1ecd16a4..e4868af92f5f 100644
--- a/libcxx/include/regex
+++ b/libcxx/include/regex
@@ -32,7 +32,8 @@ enum syntax_option_type
     extended   = unspecified,
     awk        = unspecified,
     grep       = unspecified,
-    egrep      = unspecified
+    egrep      = unspecified,
+    multiline  = unspecified
 };
 
 constexpr syntax_option_type operator~(syntax_option_type f);
@@ -142,6 +143,7 @@ public:
     static constexpr regex_constants::syntax_option_type awk = regex_constants::awk;
     static constexpr regex_constants::syntax_option_type grep = regex_constants::grep;
     static constexpr regex_constants::syntax_option_type egrep = regex_constants::egrep;
+    static constexpr regex_constants::syntax_option_type multiline = regex_constants::multiline;
 
     // construct/copy/destroy:
     basic_regex();
@@ -802,7 +804,9 @@ enum syntax_option_type
     extended   = 1 << 5,
     awk        = 1 << 6,
     grep       = 1 << 7,
-    egrep      = 1 << 8
+    egrep      = 1 << 8,
+    // 1 << 9 may be used by ECMAScript
+    multiline  = 1 << 10
 };
 
 inline _LIBCPP_CONSTEXPR
@@ -1982,24 +1986,33 @@ __word_boundary<_CharT, _Traits>::__exec(__state& __s) const
 // __l_anchor
 
 template <class _CharT>
-class __l_anchor
+_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR
+bool __is_eol(_CharT c)
+{
+    return c == '\r' || c == '\n';
+}
+
+template <class _CharT>
+class __l_anchor_multiline
     : public __owns_one_state<_CharT>
 {
     typedef __owns_one_state<_CharT> base;
 
+    bool __multiline;
+
 public:
     typedef _VSTD::__state<_CharT> __state;
 
     _LIBCPP_INLINE_VISIBILITY
-    __l_anchor(__node<_CharT>* __s)
-        : base(__s) {}
+    __l_anchor_multiline(bool __multiline, __node<_CharT>* __s)
+        : base(__s), __multiline(__multiline) {}
 
     virtual void __exec(__state&) const;
 };
 
 template <class _CharT>
 void
-__l_anchor<_CharT>::__exec(__state& __s) const
+__l_anchor_multiline<_CharT>::__exec(__state& __s) const
 {
     if (__s.__at_first_ && __s.__current_ == __s.__first_ &&
         !(__s.__flags_ & regex_constants::match_not_bol))
@@ -2007,6 +2020,13 @@ __l_anchor<_CharT>::__exec(__state& __s) const
         __s.__do_ = __state::__accept_but_not_consume;
         __s.__node_ = this->first();
     }
+    else if (__multiline &&
+             !__s.__at_first_ &&
+             __is_eol(*_VSTD::prev(__s.__current_)))
+    {
+        __s.__do_ = __state::__accept_but_not_consume;
+        __s.__node_ = this->first();
+    }
     else
     {
         __s.__do_ = __state::__reject;
@@ -2017,24 +2037,26 @@ __l_anchor<_CharT>::__exec(__state& __s) const
 // __r_anchor
 
 template <class _CharT>
-class __r_anchor
+class __r_anchor_multiline
     : public __owns_one_state<_CharT>
 {
     typedef __owns_one_state<_CharT> base;
 
+    bool __multiline;
+
 public:
     typedef _VSTD::__state<_CharT> __state;
 
     _LIBCPP_INLINE_VISIBILITY
-    __r_anchor(__node<_CharT>* __s)
-        : base(__s) {}
+    __r_anchor_multiline(bool __multiline, __node<_CharT>* __s)
+        : base(__s), __multiline(__multiline) {}
 
     virtual void __exec(__state&) const;
 };
 
 template <class _CharT>
 void
-__r_anchor<_CharT>::__exec(__state& __s) const
+__r_anchor_multiline<_CharT>::__exec(__state& __s) const
 {
     if (__s.__current_ == __s.__last_ &&
         !(__s.__flags_ & regex_constants::match_not_eol))
@@ -2042,6 +2064,11 @@ __r_anchor<_CharT>::__exec(__state& __s) const
         __s.__do_ = __state::__accept_but_not_consume;
         __s.__node_ = this->first();
     }
+    else if (__multiline && __is_eol(*__s.__current_))
+    {
+        __s.__do_ = __state::__accept_but_not_consume;
+        __s.__node_ = this->first();
+    }
     else
     {
         __s.__do_ = __state::__reject;
@@ -2541,6 +2568,7 @@ public:
     static const regex_constants::syntax_option_type awk = regex_constants::awk;
     static const regex_constants::syntax_option_type grep = regex_constants::grep;
     static const regex_constants::syntax_option_type egrep = regex_constants::egrep;
+    static const regex_constants::syntax_option_type multiline = regex_constants::multiline;
 
     // construct/copy/destroy:
     _LIBCPP_INLINE_VISIBILITY
@@ -2707,6 +2735,12 @@ private:
     _LIBCPP_INLINE_VISIBILITY
     unsigned __loop_count() const {return __loop_count_;}
 
+    _LIBCPP_INLINE_VISIBILITY
+    bool __use_multiline() const
+    {
+        return __get_grammar(__flags_) == ECMAScript && (__flags_ & multiline);
+    }
+
     template <class _ForwardIterator>
         void
         __init(_ForwardIterator __first, _ForwardIterator __last);
@@ -4746,7 +4780,7 @@ template <class _CharT, class _Traits>
 void
 basic_regex<_CharT, _Traits>::__push_l_anchor()
 {
-    __end_->first() = new __l_anchor<_CharT>(__end_->first());
+    __end_->first() = new __l_anchor_multiline<_CharT>(__use_multiline(), __end_->first());
     __end_ = static_cast<__owns_one_state<_CharT>*>(__end_->first());
 }
 
@@ -4754,7 +4788,7 @@ template <class _CharT, class _Traits>
 void
 basic_regex<_CharT, _Traits>::__push_r_anchor()
 {
-    __end_->first() = new __r_anchor<_CharT>(__end_->first());
+    __end_->first() = new __r_anchor_multiline<_CharT>(__use_multiline(), __end_->first());
     __end_ = static_cast<__owns_one_state<_CharT>*>(__end_->first());
 }
 

diff  --git a/libcxx/test/std/re/re.const/re.matchflag/match_multiline.pass.cpp b/libcxx/test/std/re/re.const/re.matchflag/match_multiline.pass.cpp
new file mode 100644
index 000000000000..18ac9da2b0cd
--- /dev/null
+++ b/libcxx/test/std/re/re.const/re.matchflag/match_multiline.pass.cpp
@@ -0,0 +1,272 @@
+// -*- C++ -*-
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++98, c++03
+
+// <regex>
+
+// multiline:
+//     Specifies that ^ shall match the beginning of a line and $ shall match
+//     the end of a line, if the ECMAScript engine is selected.
+
+#include <regex>
+#include <cassert>
+#include "test_macros.h"
+
+static void search(const char* pat, std::regex_constants::syntax_option_type f,
+                   const char* target, bool expected)
+{
+    std::regex re(pat, f);
+    std::cmatch m;
+    assert(std::regex_search(target, m, re) == expected);
+
+    if(expected) {
+        assert(m.size() == 1);
+        assert(m.length(0) == 3);
+        assert(m.str(0) == "foo");
+    }
+    else
+    {
+        assert(m.size() == 0);
+    }
+}
+
+int main(int, char**)
+{
+    using std::regex_constants::ECMAScript;
+    using std::regex_constants::basic;
+    using std::regex_constants::extended;
+    using std::regex_constants::awk;
+    using std::regex_constants::grep;
+    using std::regex_constants::egrep;
+    using std::regex_constants::multiline;
+
+    {
+        const char* pat = "^foo";
+        const char* target = "foo";
+
+        search(pat, ECMAScript, target, true);
+        search(pat, basic, target, true);
+        search(pat, extended, target, true);
+        search(pat, awk, target, true);
+        search(pat, grep, target, true);
+        search(pat, egrep, target, true);
+
+        search(pat, ECMAScript | multiline, target, true);
+        search(pat, basic | multiline, target, true);
+        search(pat, extended | multiline, target, true);
+        search(pat, awk | multiline, target, true);
+        search(pat, grep | multiline, target, true);
+        search(pat, egrep | multiline, target, true);
+    }
+    {
+        const char* pat = "^foo";
+        const char* target = "\nfoo";
+
+        search(pat, ECMAScript, target, false);
+        search(pat, basic, target, false);
+        search(pat, extended, target, false);
+        search(pat, awk, target, false);
+        search(pat, grep, target, false);
+        search(pat, egrep, target, false);
+
+        search(pat, ECMAScript | multiline, target, true);
+        search(pat, basic | multiline, target, false);
+        search(pat, extended | multiline, target, false);
+        search(pat, awk | multiline, target, false);
+        search(pat, grep | multiline, target, false);
+        search(pat, egrep | multiline, target, false);
+    }
+    {
+        const char* pat = "^foo";
+        const char* target = "bar\nfoo";
+
+        search(pat, ECMAScript, target, false);
+        search(pat, basic, target, false);
+        search(pat, extended, target, false);
+        search(pat, awk, target, false);
+        search(pat, grep, target, false);
+        search(pat, egrep, target, false);
+
+        search(pat, ECMAScript | multiline, target, true);
+        search(pat, basic | multiline, target, false);
+        search(pat, extended | multiline, target, false);
+        search(pat, awk | multiline, target, false);
+        search(pat, grep | multiline, target, false);
+        search(pat, egrep | multiline, target, false);
+    }
+
+    {
+        const char* pat = "foo$";
+        const char* target = "foo";
+
+        search(pat, ECMAScript, target, true);
+        search(pat, basic, target, true);
+        search(pat, extended, target, true);
+        search(pat, awk, target, true);
+        search(pat, grep, target, true);
+        search(pat, egrep, target, true);
+
+        search(pat, ECMAScript | multiline, target, true);
+        search(pat, basic | multiline, target, true);
+        search(pat, extended | multiline, target, true);
+        search(pat, awk | multiline, target, true);
+        search(pat, grep | multiline, target, true);
+        search(pat, egrep | multiline, target, true);
+    }
+    {
+        const char* pat = "foo$";
+        const char* target = "foo\n";
+
+        search(pat, ECMAScript, target, false);
+        search(pat, basic, target, false);
+        search(pat, extended, target, false);
+        search(pat, awk, target, false);
+        search(pat, grep, target, false);
+        search(pat, egrep, target, false);
+
+        search(pat, ECMAScript | multiline, target, true);
+        search(pat, basic | multiline, target, false);
+        search(pat, extended | multiline, target, false);
+        search(pat, awk | multiline, target, false);
+        search(pat, grep | multiline, target, false);
+        search(pat, egrep | multiline, target, false);
+    }
+    {
+        const char* pat = "foo$";
+        const char* target = "foo\nbar";
+
+        search(pat, ECMAScript, target, false);
+        search(pat, basic, target, false);
+        search(pat, extended, target, false);
+        search(pat, awk, target, false);
+        search(pat, grep, target, false);
+        search(pat, egrep, target, false);
+
+        search(pat, ECMAScript | multiline, target, true);
+        search(pat, basic | multiline, target, false);
+        search(pat, extended | multiline, target, false);
+        search(pat, awk | multiline, target, false);
+        search(pat, grep | multiline, target, false);
+        search(pat, egrep | multiline, target, false);
+    }
+
+
+    {
+        const char* pat = "^foo";
+        const char* target = "foo";
+
+        search(pat, ECMAScript, target, true);
+        search(pat, basic, target, true);
+        search(pat, extended, target, true);
+        search(pat, awk, target, true);
+        search(pat, grep, target, true);
+        search(pat, egrep, target, true);
+
+        search(pat, ECMAScript | multiline, target, true);
+        search(pat, basic | multiline, target, true);
+        search(pat, extended | multiline, target, true);
+        search(pat, awk | multiline, target, true);
+        search(pat, grep | multiline, target, true);
+        search(pat, egrep | multiline, target, true);
+    }
+    {
+        const char* pat = "^foo";
+        const char* target = "\rfoo";
+
+        search(pat, ECMAScript, target, false);
+        search(pat, basic, target, false);
+        search(pat, extended, target, false);
+        search(pat, awk, target, false);
+        search(pat, grep, target, false);
+        search(pat, egrep, target, false);
+
+        search(pat, ECMAScript | multiline, target, true);
+        search(pat, basic | multiline, target, false);
+        search(pat, extended | multiline, target, false);
+        search(pat, awk | multiline, target, false);
+        search(pat, grep | multiline, target, false);
+        search(pat, egrep | multiline, target, false);
+    }
+    {
+        const char* pat = "^foo";
+        const char* target = "bar\rfoo";
+
+        search(pat, ECMAScript, target, false);
+        search(pat, basic, target, false);
+        search(pat, extended, target, false);
+        search(pat, awk, target, false);
+        search(pat, grep, target, false);
+        search(pat, egrep, target, false);
+
+        search(pat, ECMAScript | multiline, target, true);
+        search(pat, basic | multiline, target, false);
+        search(pat, extended | multiline, target, false);
+        search(pat, awk | multiline, target, false);
+        search(pat, grep | multiline, target, false);
+        search(pat, egrep | multiline, target, false);
+    }
+
+    {
+        const char* pat = "foo$";
+        const char* target = "foo";
+
+        search(pat, ECMAScript, target, true);
+        search(pat, basic, target, true);
+        search(pat, extended, target, true);
+        search(pat, awk, target, true);
+        search(pat, grep, target, true);
+        search(pat, egrep, target, true);
+
+        search(pat, ECMAScript | multiline, target, true);
+        search(pat, basic | multiline, target, true);
+        search(pat, extended | multiline, target, true);
+        search(pat, awk | multiline, target, true);
+        search(pat, grep | multiline, target, true);
+        search(pat, egrep | multiline, target, true);
+    }
+    {
+        const char* pat = "foo$";
+        const char* target = "foo\r";
+
+        search(pat, ECMAScript, target, false);
+        search(pat, basic, target, false);
+        search(pat, extended, target, false);
+        search(pat, awk, target, false);
+        search(pat, grep, target, false);
+        search(pat, egrep, target, false);
+
+        search(pat, ECMAScript | multiline, target, true);
+        search(pat, basic | multiline, target, false);
+        search(pat, extended | multiline, target, false);
+        search(pat, awk | multiline, target, false);
+        search(pat, grep | multiline, target, false);
+        search(pat, egrep | multiline, target, false);
+    }
+    {
+        const char* pat = "foo$";
+        const char* target = "foo\rbar";
+
+        search(pat, ECMAScript, target, false);
+        search(pat, basic, target, false);
+        search(pat, extended, target, false);
+        search(pat, awk, target, false);
+        search(pat, grep, target, false);
+        search(pat, egrep, target, false);
+
+        search(pat, ECMAScript | multiline, target, true);
+        search(pat, basic | multiline, target, false);
+        search(pat, extended | multiline, target, false);
+        search(pat, awk | multiline, target, false);
+        search(pat, grep | multiline, target, false);
+        search(pat, egrep | multiline, target, false);
+    }
+
+    return 0;
+}

diff  --git a/libcxx/test/std/re/re.const/re.synopt/syntax_option_type.pass.cpp b/libcxx/test/std/re/re.const/re.synopt/syntax_option_type.pass.cpp
index c34da53eed8c..ab718c58a7e9 100644
--- a/libcxx/test/std/re/re.const/re.synopt/syntax_option_type.pass.cpp
+++ b/libcxx/test/std/re/re.const/re.synopt/syntax_option_type.pass.cpp
@@ -23,7 +23,8 @@
 //     extended   = unspecified,
 //     awk        = unspecified,
 //     grep       = unspecified,
-//     egrep      = unspecified
+//     egrep      = unspecified,
+//     multiline  = unspecified
 // };
 //
 // }
@@ -48,6 +49,7 @@ int main(int, char**)
     assert(std::regex_constants::awk != 0);
     assert(std::regex_constants::grep != 0);
     assert(std::regex_constants::egrep != 0);
+    assert(std::regex_constants::multiline != 0);
 
     assert((std::regex_constants::icase & std::regex_constants::nosubs) == 0);
     assert((std::regex_constants::icase & std::regex_constants::optimize) == 0);
@@ -58,6 +60,7 @@ int main(int, char**)
     assert((std::regex_constants::icase & std::regex_constants::awk) == 0);
     assert((std::regex_constants::icase & std::regex_constants::grep) == 0);
     assert((std::regex_constants::icase & std::regex_constants::egrep) == 0);
+    assert((std::regex_constants::icase & std::regex_constants::multiline) == 0);
 
     assert((std::regex_constants::nosubs & std::regex_constants::optimize) == 0);
     assert((std::regex_constants::nosubs & std::regex_constants::collate) == 0);
@@ -67,6 +70,7 @@ int main(int, char**)
     assert((std::regex_constants::nosubs & std::regex_constants::awk) == 0);
     assert((std::regex_constants::nosubs & std::regex_constants::grep) == 0);
     assert((std::regex_constants::nosubs & std::regex_constants::egrep) == 0);
+    assert((std::regex_constants::nosubs & std::regex_constants::multiline) == 0);
 
     assert((std::regex_constants::optimize & std::regex_constants::collate) == 0);
     assert((std::regex_constants::optimize & std::regex_constants::ECMAScript) == 0);
@@ -75,6 +79,7 @@ int main(int, char**)
     assert((std::regex_constants::optimize & std::regex_constants::awk) == 0);
     assert((std::regex_constants::optimize & std::regex_constants::grep) == 0);
     assert((std::regex_constants::optimize & std::regex_constants::egrep) == 0);
+    assert((std::regex_constants::optimize & std::regex_constants::multiline) == 0);
 
     assert((std::regex_constants::collate & std::regex_constants::ECMAScript) == 0);
     assert((std::regex_constants::collate & std::regex_constants::basic) == 0);
@@ -82,26 +87,34 @@ int main(int, char**)
     assert((std::regex_constants::collate & std::regex_constants::awk) == 0);
     assert((std::regex_constants::collate & std::regex_constants::grep) == 0);
     assert((std::regex_constants::collate & std::regex_constants::egrep) == 0);
+    assert((std::regex_constants::collate & std::regex_constants::multiline) == 0);
 
     assert((std::regex_constants::ECMAScript & std::regex_constants::basic) == 0);
     assert((std::regex_constants::ECMAScript & std::regex_constants::extended) == 0);
     assert((std::regex_constants::ECMAScript & std::regex_constants::awk) == 0);
     assert((std::regex_constants::ECMAScript & std::regex_constants::grep) == 0);
     assert((std::regex_constants::ECMAScript & std::regex_constants::egrep) == 0);
+    assert((std::regex_constants::ECMAScript & std::regex_constants::multiline) == 0);
 
     assert((std::regex_constants::basic & std::regex_constants::extended) == 0);
     assert((std::regex_constants::basic & std::regex_constants::awk) == 0);
     assert((std::regex_constants::basic & std::regex_constants::grep) == 0);
     assert((std::regex_constants::basic & std::regex_constants::egrep) == 0);
+    assert((std::regex_constants::basic & std::regex_constants::multiline) == 0);
 
     assert((std::regex_constants::extended & std::regex_constants::awk) == 0);
     assert((std::regex_constants::extended & std::regex_constants::grep) == 0);
     assert((std::regex_constants::extended & std::regex_constants::egrep) == 0);
+    assert((std::regex_constants::extended & std::regex_constants::multiline) == 0);
 
     assert((std::regex_constants::awk & std::regex_constants::grep) == 0);
     assert((std::regex_constants::awk & std::regex_constants::egrep) == 0);
+    assert((std::regex_constants::awk & std::regex_constants::multiline) == 0);
 
     assert((std::regex_constants::grep & std::regex_constants::egrep) == 0);
+    assert((std::regex_constants::grep & std::regex_constants::multiline) == 0);
+
+    assert((std::regex_constants::egrep & std::regex_constants::multiline) == 0);
 
     assert((std::regex_constants::icase | std::regex_constants::nosubs) != 0);
     assert((std::regex_constants::icase ^ std::regex_constants::nosubs) != 0);

diff  --git a/libcxx/www/cxx1z_status.html b/libcxx/www/cxx1z_status.html
index b1db58e26ce3..e5130c0ae2f7 100644
--- a/libcxx/www/cxx1z_status.html
+++ b/libcxx/www/cxx1z_status.html
@@ -371,7 +371,7 @@ <h3>Library Working group Issues Status</h3>
 	<tr><td><a href="https://wg21.link/LWG2460">2460</a></td><td>LWG issue 2408 and value categories</td><td>Issaquah</td><td>Complete</td></tr>
 	<tr><td><a href="https://wg21.link/LWG2468">2468</a></td><td>Self-move-assignment of library types</td><td>Issaquah</td><td></td></tr>
 	<tr><td><a href="https://wg21.link/LWG2475">2475</a></td><td>Allow overwriting of std::basic_string terminator with charT() to allow cleaner interoperation with legacy APIs</td><td>Issaquah</td><td>Complete</td></tr>
-	<tr><td><a href="https://wg21.link/LWG2503">2503</a></td><td>multiline option should be added to syntax_option_type</td><td>Issaquah</td><td></td></tr>
+	<tr><td><a href="https://wg21.link/LWG2503">2503</a></td><td>multiline option should be added to syntax_option_type</td><td>Issaquah</td><td>Complete</td></tr>
 	<tr><td><a href="https://wg21.link/LWG2510">2510</a></td><td>Tag types should not be DefaultConstructible</td><td>Issaquah</td><td>Complete</td></tr>
 	<tr><td><a href="https://wg21.link/LWG2514">2514</a></td><td>Type traits must not be final</td><td>Issaquah</td><td>Complete</td></tr>
 	<tr><td><a href="https://wg21.link/LWG2518">2518</a></td><td>[fund.ts.v2] Non-member swap for propagate_const should call member swap</td><td>Issaquah</td><td>Complete</td></tr>
@@ -503,7 +503,7 @@ <h3>Library Working group Issues Status</h3>
 <!-- 	<tr><td></td><td></td><td></td><td></td></tr> -->
   </table>
 
-  <p>Last Updated: 3-Jul-2019</p>
+  <p>Last Updated: 17-Nov-2020</p>
 </div>
 </body>
 </html>


        


More information about the libcxx-commits mailing list