[libcxx-commits] [libcxx] 3abaf6c - [libc++] Implements multiline regex support.
Mark de Wever via libcxx-commits
libcxx-commits at lists.llvm.org
Wed Nov 18 09:17:55 PST 2020
Author: Mark de Wever
Date: 2020-11-18T18:17:36+01:00
New Revision: 3abaf6cde7a92f38db2f5b3cb87e653f89f3bd26
URL: https://github.com/llvm/llvm-project/commit/3abaf6cde7a92f38db2f5b3cb87e653f89f3bd26
DIFF: https://github.com/llvm/llvm-project/commit/3abaf6cde7a92f38db2f5b3cb87e653f89f3bd26.diff
LOG: [libc++] Implements multiline regex support.
This resolves LWG2503.
Added:
libcxx/test/std/re/re.const/re.matchflag/match_multiline.pass.cpp
Modified:
libcxx/include/regex
libcxx/test/std/re/re.const/re.synopt/syntax_option_type.pass.cpp
libcxx/www/cxx1z_status.html
Removed:
################################################################################
diff --git a/libcxx/include/regex b/libcxx/include/regex
index f42f1ecd16a4..e4868af92f5f 100644
--- a/libcxx/include/regex
+++ b/libcxx/include/regex
@@ -32,7 +32,8 @@ enum syntax_option_type
extended = unspecified,
awk = unspecified,
grep = unspecified,
- egrep = unspecified
+ egrep = unspecified,
+ multiline = unspecified
};
constexpr syntax_option_type operator~(syntax_option_type f);
@@ -142,6 +143,7 @@ public:
static constexpr regex_constants::syntax_option_type awk = regex_constants::awk;
static constexpr regex_constants::syntax_option_type grep = regex_constants::grep;
static constexpr regex_constants::syntax_option_type egrep = regex_constants::egrep;
+ static constexpr regex_constants::syntax_option_type multiline = regex_constants::multiline;
// construct/copy/destroy:
basic_regex();
@@ -802,7 +804,9 @@ enum syntax_option_type
extended = 1 << 5,
awk = 1 << 6,
grep = 1 << 7,
- egrep = 1 << 8
+ egrep = 1 << 8,
+ // 1 << 9 may be used by ECMAScript
+ multiline = 1 << 10
};
inline _LIBCPP_CONSTEXPR
@@ -1982,24 +1986,33 @@ __word_boundary<_CharT, _Traits>::__exec(__state& __s) const
// __l_anchor
template <class _CharT>
-class __l_anchor
+_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR
+bool __is_eol(_CharT c)
+{
+ return c == '\r' || c == '\n';
+}
+
+template <class _CharT>
+class __l_anchor_multiline
: public __owns_one_state<_CharT>
{
typedef __owns_one_state<_CharT> base;
+ bool __multiline;
+
public:
typedef _VSTD::__state<_CharT> __state;
_LIBCPP_INLINE_VISIBILITY
- __l_anchor(__node<_CharT>* __s)
- : base(__s) {}
+ __l_anchor_multiline(bool __multiline, __node<_CharT>* __s)
+ : base(__s), __multiline(__multiline) {}
virtual void __exec(__state&) const;
};
template <class _CharT>
void
-__l_anchor<_CharT>::__exec(__state& __s) const
+__l_anchor_multiline<_CharT>::__exec(__state& __s) const
{
if (__s.__at_first_ && __s.__current_ == __s.__first_ &&
!(__s.__flags_ & regex_constants::match_not_bol))
@@ -2007,6 +2020,13 @@ __l_anchor<_CharT>::__exec(__state& __s) const
__s.__do_ = __state::__accept_but_not_consume;
__s.__node_ = this->first();
}
+ else if (__multiline &&
+ !__s.__at_first_ &&
+ __is_eol(*_VSTD::prev(__s.__current_)))
+ {
+ __s.__do_ = __state::__accept_but_not_consume;
+ __s.__node_ = this->first();
+ }
else
{
__s.__do_ = __state::__reject;
@@ -2017,24 +2037,26 @@ __l_anchor<_CharT>::__exec(__state& __s) const
// __r_anchor
template <class _CharT>
-class __r_anchor
+class __r_anchor_multiline
: public __owns_one_state<_CharT>
{
typedef __owns_one_state<_CharT> base;
+ bool __multiline;
+
public:
typedef _VSTD::__state<_CharT> __state;
_LIBCPP_INLINE_VISIBILITY
- __r_anchor(__node<_CharT>* __s)
- : base(__s) {}
+ __r_anchor_multiline(bool __multiline, __node<_CharT>* __s)
+ : base(__s), __multiline(__multiline) {}
virtual void __exec(__state&) const;
};
template <class _CharT>
void
-__r_anchor<_CharT>::__exec(__state& __s) const
+__r_anchor_multiline<_CharT>::__exec(__state& __s) const
{
if (__s.__current_ == __s.__last_ &&
!(__s.__flags_ & regex_constants::match_not_eol))
@@ -2042,6 +2064,11 @@ __r_anchor<_CharT>::__exec(__state& __s) const
__s.__do_ = __state::__accept_but_not_consume;
__s.__node_ = this->first();
}
+ else if (__multiline && __is_eol(*__s.__current_))
+ {
+ __s.__do_ = __state::__accept_but_not_consume;
+ __s.__node_ = this->first();
+ }
else
{
__s.__do_ = __state::__reject;
@@ -2541,6 +2568,7 @@ public:
static const regex_constants::syntax_option_type awk = regex_constants::awk;
static const regex_constants::syntax_option_type grep = regex_constants::grep;
static const regex_constants::syntax_option_type egrep = regex_constants::egrep;
+ static const regex_constants::syntax_option_type multiline = regex_constants::multiline;
// construct/copy/destroy:
_LIBCPP_INLINE_VISIBILITY
@@ -2707,6 +2735,12 @@ private:
_LIBCPP_INLINE_VISIBILITY
unsigned __loop_count() const {return __loop_count_;}
+ _LIBCPP_INLINE_VISIBILITY
+ bool __use_multiline() const
+ {
+ return __get_grammar(__flags_) == ECMAScript && (__flags_ & multiline);
+ }
+
template <class _ForwardIterator>
void
__init(_ForwardIterator __first, _ForwardIterator __last);
@@ -4746,7 +4780,7 @@ template <class _CharT, class _Traits>
void
basic_regex<_CharT, _Traits>::__push_l_anchor()
{
- __end_->first() = new __l_anchor<_CharT>(__end_->first());
+ __end_->first() = new __l_anchor_multiline<_CharT>(__use_multiline(), __end_->first());
__end_ = static_cast<__owns_one_state<_CharT>*>(__end_->first());
}
@@ -4754,7 +4788,7 @@ template <class _CharT, class _Traits>
void
basic_regex<_CharT, _Traits>::__push_r_anchor()
{
- __end_->first() = new __r_anchor<_CharT>(__end_->first());
+ __end_->first() = new __r_anchor_multiline<_CharT>(__use_multiline(), __end_->first());
__end_ = static_cast<__owns_one_state<_CharT>*>(__end_->first());
}
diff --git a/libcxx/test/std/re/re.const/re.matchflag/match_multiline.pass.cpp b/libcxx/test/std/re/re.const/re.matchflag/match_multiline.pass.cpp
new file mode 100644
index 000000000000..18ac9da2b0cd
--- /dev/null
+++ b/libcxx/test/std/re/re.const/re.matchflag/match_multiline.pass.cpp
@@ -0,0 +1,272 @@
+// -*- C++ -*-
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++98, c++03
+
+// <regex>
+
+// multiline:
+// Specifies that ^ shall match the beginning of a line and $ shall match
+// the end of a line, if the ECMAScript engine is selected.
+
+#include <regex>
+#include <cassert>
+#include "test_macros.h"
+
+static void search(const char* pat, std::regex_constants::syntax_option_type f,
+ const char* target, bool expected)
+{
+ std::regex re(pat, f);
+ std::cmatch m;
+ assert(std::regex_search(target, m, re) == expected);
+
+ if(expected) {
+ assert(m.size() == 1);
+ assert(m.length(0) == 3);
+ assert(m.str(0) == "foo");
+ }
+ else
+ {
+ assert(m.size() == 0);
+ }
+}
+
+int main(int, char**)
+{
+ using std::regex_constants::ECMAScript;
+ using std::regex_constants::basic;
+ using std::regex_constants::extended;
+ using std::regex_constants::awk;
+ using std::regex_constants::grep;
+ using std::regex_constants::egrep;
+ using std::regex_constants::multiline;
+
+ {
+ const char* pat = "^foo";
+ const char* target = "foo";
+
+ search(pat, ECMAScript, target, true);
+ search(pat, basic, target, true);
+ search(pat, extended, target, true);
+ search(pat, awk, target, true);
+ search(pat, grep, target, true);
+ search(pat, egrep, target, true);
+
+ search(pat, ECMAScript | multiline, target, true);
+ search(pat, basic | multiline, target, true);
+ search(pat, extended | multiline, target, true);
+ search(pat, awk | multiline, target, true);
+ search(pat, grep | multiline, target, true);
+ search(pat, egrep | multiline, target, true);
+ }
+ {
+ const char* pat = "^foo";
+ const char* target = "\nfoo";
+
+ search(pat, ECMAScript, target, false);
+ search(pat, basic, target, false);
+ search(pat, extended, target, false);
+ search(pat, awk, target, false);
+ search(pat, grep, target, false);
+ search(pat, egrep, target, false);
+
+ search(pat, ECMAScript | multiline, target, true);
+ search(pat, basic | multiline, target, false);
+ search(pat, extended | multiline, target, false);
+ search(pat, awk | multiline, target, false);
+ search(pat, grep | multiline, target, false);
+ search(pat, egrep | multiline, target, false);
+ }
+ {
+ const char* pat = "^foo";
+ const char* target = "bar\nfoo";
+
+ search(pat, ECMAScript, target, false);
+ search(pat, basic, target, false);
+ search(pat, extended, target, false);
+ search(pat, awk, target, false);
+ search(pat, grep, target, false);
+ search(pat, egrep, target, false);
+
+ search(pat, ECMAScript | multiline, target, true);
+ search(pat, basic | multiline, target, false);
+ search(pat, extended | multiline, target, false);
+ search(pat, awk | multiline, target, false);
+ search(pat, grep | multiline, target, false);
+ search(pat, egrep | multiline, target, false);
+ }
+
+ {
+ const char* pat = "foo$";
+ const char* target = "foo";
+
+ search(pat, ECMAScript, target, true);
+ search(pat, basic, target, true);
+ search(pat, extended, target, true);
+ search(pat, awk, target, true);
+ search(pat, grep, target, true);
+ search(pat, egrep, target, true);
+
+ search(pat, ECMAScript | multiline, target, true);
+ search(pat, basic | multiline, target, true);
+ search(pat, extended | multiline, target, true);
+ search(pat, awk | multiline, target, true);
+ search(pat, grep | multiline, target, true);
+ search(pat, egrep | multiline, target, true);
+ }
+ {
+ const char* pat = "foo$";
+ const char* target = "foo\n";
+
+ search(pat, ECMAScript, target, false);
+ search(pat, basic, target, false);
+ search(pat, extended, target, false);
+ search(pat, awk, target, false);
+ search(pat, grep, target, false);
+ search(pat, egrep, target, false);
+
+ search(pat, ECMAScript | multiline, target, true);
+ search(pat, basic | multiline, target, false);
+ search(pat, extended | multiline, target, false);
+ search(pat, awk | multiline, target, false);
+ search(pat, grep | multiline, target, false);
+ search(pat, egrep | multiline, target, false);
+ }
+ {
+ const char* pat = "foo$";
+ const char* target = "foo\nbar";
+
+ search(pat, ECMAScript, target, false);
+ search(pat, basic, target, false);
+ search(pat, extended, target, false);
+ search(pat, awk, target, false);
+ search(pat, grep, target, false);
+ search(pat, egrep, target, false);
+
+ search(pat, ECMAScript | multiline, target, true);
+ search(pat, basic | multiline, target, false);
+ search(pat, extended | multiline, target, false);
+ search(pat, awk | multiline, target, false);
+ search(pat, grep | multiline, target, false);
+ search(pat, egrep | multiline, target, false);
+ }
+
+
+ {
+ const char* pat = "^foo";
+ const char* target = "foo";
+
+ search(pat, ECMAScript, target, true);
+ search(pat, basic, target, true);
+ search(pat, extended, target, true);
+ search(pat, awk, target, true);
+ search(pat, grep, target, true);
+ search(pat, egrep, target, true);
+
+ search(pat, ECMAScript | multiline, target, true);
+ search(pat, basic | multiline, target, true);
+ search(pat, extended | multiline, target, true);
+ search(pat, awk | multiline, target, true);
+ search(pat, grep | multiline, target, true);
+ search(pat, egrep | multiline, target, true);
+ }
+ {
+ const char* pat = "^foo";
+ const char* target = "\rfoo";
+
+ search(pat, ECMAScript, target, false);
+ search(pat, basic, target, false);
+ search(pat, extended, target, false);
+ search(pat, awk, target, false);
+ search(pat, grep, target, false);
+ search(pat, egrep, target, false);
+
+ search(pat, ECMAScript | multiline, target, true);
+ search(pat, basic | multiline, target, false);
+ search(pat, extended | multiline, target, false);
+ search(pat, awk | multiline, target, false);
+ search(pat, grep | multiline, target, false);
+ search(pat, egrep | multiline, target, false);
+ }
+ {
+ const char* pat = "^foo";
+ const char* target = "bar\rfoo";
+
+ search(pat, ECMAScript, target, false);
+ search(pat, basic, target, false);
+ search(pat, extended, target, false);
+ search(pat, awk, target, false);
+ search(pat, grep, target, false);
+ search(pat, egrep, target, false);
+
+ search(pat, ECMAScript | multiline, target, true);
+ search(pat, basic | multiline, target, false);
+ search(pat, extended | multiline, target, false);
+ search(pat, awk | multiline, target, false);
+ search(pat, grep | multiline, target, false);
+ search(pat, egrep | multiline, target, false);
+ }
+
+ {
+ const char* pat = "foo$";
+ const char* target = "foo";
+
+ search(pat, ECMAScript, target, true);
+ search(pat, basic, target, true);
+ search(pat, extended, target, true);
+ search(pat, awk, target, true);
+ search(pat, grep, target, true);
+ search(pat, egrep, target, true);
+
+ search(pat, ECMAScript | multiline, target, true);
+ search(pat, basic | multiline, target, true);
+ search(pat, extended | multiline, target, true);
+ search(pat, awk | multiline, target, true);
+ search(pat, grep | multiline, target, true);
+ search(pat, egrep | multiline, target, true);
+ }
+ {
+ const char* pat = "foo$";
+ const char* target = "foo\r";
+
+ search(pat, ECMAScript, target, false);
+ search(pat, basic, target, false);
+ search(pat, extended, target, false);
+ search(pat, awk, target, false);
+ search(pat, grep, target, false);
+ search(pat, egrep, target, false);
+
+ search(pat, ECMAScript | multiline, target, true);
+ search(pat, basic | multiline, target, false);
+ search(pat, extended | multiline, target, false);
+ search(pat, awk | multiline, target, false);
+ search(pat, grep | multiline, target, false);
+ search(pat, egrep | multiline, target, false);
+ }
+ {
+ const char* pat = "foo$";
+ const char* target = "foo\rbar";
+
+ search(pat, ECMAScript, target, false);
+ search(pat, basic, target, false);
+ search(pat, extended, target, false);
+ search(pat, awk, target, false);
+ search(pat, grep, target, false);
+ search(pat, egrep, target, false);
+
+ search(pat, ECMAScript | multiline, target, true);
+ search(pat, basic | multiline, target, false);
+ search(pat, extended | multiline, target, false);
+ search(pat, awk | multiline, target, false);
+ search(pat, grep | multiline, target, false);
+ search(pat, egrep | multiline, target, false);
+ }
+
+ return 0;
+}
diff --git a/libcxx/test/std/re/re.const/re.synopt/syntax_option_type.pass.cpp b/libcxx/test/std/re/re.const/re.synopt/syntax_option_type.pass.cpp
index c34da53eed8c..ab718c58a7e9 100644
--- a/libcxx/test/std/re/re.const/re.synopt/syntax_option_type.pass.cpp
+++ b/libcxx/test/std/re/re.const/re.synopt/syntax_option_type.pass.cpp
@@ -23,7 +23,8 @@
// extended = unspecified,
// awk = unspecified,
// grep = unspecified,
-// egrep = unspecified
+// egrep = unspecified,
+// multiline = unspecified
// };
//
// }
@@ -48,6 +49,7 @@ int main(int, char**)
assert(std::regex_constants::awk != 0);
assert(std::regex_constants::grep != 0);
assert(std::regex_constants::egrep != 0);
+ assert(std::regex_constants::multiline != 0);
assert((std::regex_constants::icase & std::regex_constants::nosubs) == 0);
assert((std::regex_constants::icase & std::regex_constants::optimize) == 0);
@@ -58,6 +60,7 @@ int main(int, char**)
assert((std::regex_constants::icase & std::regex_constants::awk) == 0);
assert((std::regex_constants::icase & std::regex_constants::grep) == 0);
assert((std::regex_constants::icase & std::regex_constants::egrep) == 0);
+ assert((std::regex_constants::icase & std::regex_constants::multiline) == 0);
assert((std::regex_constants::nosubs & std::regex_constants::optimize) == 0);
assert((std::regex_constants::nosubs & std::regex_constants::collate) == 0);
@@ -67,6 +70,7 @@ int main(int, char**)
assert((std::regex_constants::nosubs & std::regex_constants::awk) == 0);
assert((std::regex_constants::nosubs & std::regex_constants::grep) == 0);
assert((std::regex_constants::nosubs & std::regex_constants::egrep) == 0);
+ assert((std::regex_constants::nosubs & std::regex_constants::multiline) == 0);
assert((std::regex_constants::optimize & std::regex_constants::collate) == 0);
assert((std::regex_constants::optimize & std::regex_constants::ECMAScript) == 0);
@@ -75,6 +79,7 @@ int main(int, char**)
assert((std::regex_constants::optimize & std::regex_constants::awk) == 0);
assert((std::regex_constants::optimize & std::regex_constants::grep) == 0);
assert((std::regex_constants::optimize & std::regex_constants::egrep) == 0);
+ assert((std::regex_constants::optimize & std::regex_constants::multiline) == 0);
assert((std::regex_constants::collate & std::regex_constants::ECMAScript) == 0);
assert((std::regex_constants::collate & std::regex_constants::basic) == 0);
@@ -82,26 +87,34 @@ int main(int, char**)
assert((std::regex_constants::collate & std::regex_constants::awk) == 0);
assert((std::regex_constants::collate & std::regex_constants::grep) == 0);
assert((std::regex_constants::collate & std::regex_constants::egrep) == 0);
+ assert((std::regex_constants::collate & std::regex_constants::multiline) == 0);
assert((std::regex_constants::ECMAScript & std::regex_constants::basic) == 0);
assert((std::regex_constants::ECMAScript & std::regex_constants::extended) == 0);
assert((std::regex_constants::ECMAScript & std::regex_constants::awk) == 0);
assert((std::regex_constants::ECMAScript & std::regex_constants::grep) == 0);
assert((std::regex_constants::ECMAScript & std::regex_constants::egrep) == 0);
+ assert((std::regex_constants::ECMAScript & std::regex_constants::multiline) == 0);
assert((std::regex_constants::basic & std::regex_constants::extended) == 0);
assert((std::regex_constants::basic & std::regex_constants::awk) == 0);
assert((std::regex_constants::basic & std::regex_constants::grep) == 0);
assert((std::regex_constants::basic & std::regex_constants::egrep) == 0);
+ assert((std::regex_constants::basic & std::regex_constants::multiline) == 0);
assert((std::regex_constants::extended & std::regex_constants::awk) == 0);
assert((std::regex_constants::extended & std::regex_constants::grep) == 0);
assert((std::regex_constants::extended & std::regex_constants::egrep) == 0);
+ assert((std::regex_constants::extended & std::regex_constants::multiline) == 0);
assert((std::regex_constants::awk & std::regex_constants::grep) == 0);
assert((std::regex_constants::awk & std::regex_constants::egrep) == 0);
+ assert((std::regex_constants::awk & std::regex_constants::multiline) == 0);
assert((std::regex_constants::grep & std::regex_constants::egrep) == 0);
+ assert((std::regex_constants::grep & std::regex_constants::multiline) == 0);
+
+ assert((std::regex_constants::egrep & std::regex_constants::multiline) == 0);
assert((std::regex_constants::icase | std::regex_constants::nosubs) != 0);
assert((std::regex_constants::icase ^ std::regex_constants::nosubs) != 0);
diff --git a/libcxx/www/cxx1z_status.html b/libcxx/www/cxx1z_status.html
index b1db58e26ce3..e5130c0ae2f7 100644
--- a/libcxx/www/cxx1z_status.html
+++ b/libcxx/www/cxx1z_status.html
@@ -371,7 +371,7 @@ <h3>Library Working group Issues Status</h3>
<tr><td><a href="https://wg21.link/LWG2460">2460</a></td><td>LWG issue 2408 and value categories</td><td>Issaquah</td><td>Complete</td></tr>
<tr><td><a href="https://wg21.link/LWG2468">2468</a></td><td>Self-move-assignment of library types</td><td>Issaquah</td><td></td></tr>
<tr><td><a href="https://wg21.link/LWG2475">2475</a></td><td>Allow overwriting of std::basic_string terminator with charT() to allow cleaner interoperation with legacy APIs</td><td>Issaquah</td><td>Complete</td></tr>
- <tr><td><a href="https://wg21.link/LWG2503">2503</a></td><td>multiline option should be added to syntax_option_type</td><td>Issaquah</td><td></td></tr>
+ <tr><td><a href="https://wg21.link/LWG2503">2503</a></td><td>multiline option should be added to syntax_option_type</td><td>Issaquah</td><td>Complete</td></tr>
<tr><td><a href="https://wg21.link/LWG2510">2510</a></td><td>Tag types should not be DefaultConstructible</td><td>Issaquah</td><td>Complete</td></tr>
<tr><td><a href="https://wg21.link/LWG2514">2514</a></td><td>Type traits must not be final</td><td>Issaquah</td><td>Complete</td></tr>
<tr><td><a href="https://wg21.link/LWG2518">2518</a></td><td>[fund.ts.v2] Non-member swap for propagate_const should call member swap</td><td>Issaquah</td><td>Complete</td></tr>
@@ -503,7 +503,7 @@ <h3>Library Working group Issues Status</h3>
<!-- <tr><td></td><td></td><td></td><td></td></tr> -->
</table>
- <p>Last Updated: 3-Jul-2019</p>
+ <p>Last Updated: 17-Nov-2020</p>
</div>
</body>
</html>
More information about the libcxx-commits
mailing list