[libcxx-commits] [libcxx] [libc++] <regex>: Make unmatched backrefs always succeed in ECMAScript mode (PR #154442)
via libcxx-commits
libcxx-commits at lists.llvm.org
Tue Aug 19 16:46:49 PDT 2025
https://github.com/SainoNamkho updated https://github.com/llvm/llvm-project/pull/154442
>From d4f97686e86e329ec8f0ec0870b4916dae98782b Mon Sep 17 00:00:00 2001
From: SainoNamkho <23036788+SainoNamkho at users.noreply.github.com>
Date: Wed, 20 Aug 2025 04:07:47 +0800
Subject: [PATCH] [libc++] <regex>: Make unmatched backrefs should always
succeed in ECMAScript mode.
Fix #154408
---
libcxx/include/regex | 41 +++++++++++++------
.../std/re/re.alg/re.alg.search/ecma.pass.cpp | 30 ++++++++++++++
2 files changed, 58 insertions(+), 13 deletions(-)
diff --git a/libcxx/include/regex b/libcxx/include/regex
index 9bbc3a69021b9..ba69f5681a5d4 100644
--- a/libcxx/include/regex
+++ b/libcxx/include/regex
@@ -1668,7 +1668,7 @@ void __end_marked_subexpression<_CharT>::__exec(__state& __s) const {
// __back_ref
-template <class _CharT>
+template <class _CharT, bool _UnmatchedAlwaysSucceed>
class __back_ref : public __owns_one_state<_CharT> {
typedef __owns_one_state<_CharT> base;
@@ -1682,8 +1682,8 @@ public:
_LIBCPP_HIDE_FROM_ABI_VIRTUAL virtual void __exec(__state&) const;
};
-template <class _CharT>
-void __back_ref<_CharT>::__exec(__state& __s) const {
+template <class _CharT, bool _UnmatchedAlwaysSucceed>
+void __back_ref<_CharT, _UnmatchedAlwaysSucceed>::__exec(__state& __s) const {
if (__mexp_ > __s.__sub_matches_.size())
std::__throw_regex_error<regex_constants::error_backref>();
sub_match<const _CharT*>& __sm = __s.__sub_matches_[__mexp_ - 1];
@@ -1697,6 +1697,9 @@ void __back_ref<_CharT>::__exec(__state& __s) const {
__s.__do_ = __state::__reject;
__s.__node_ = nullptr;
}
+ } else if constexpr (_UnmatchedAlwaysSucceed) {
+ __s.__do_ = __state::__accept_but_not_consume;
+ __s.__node_ = this->first();;
} else {
__s.__do_ = __state::__reject;
__s.__node_ = nullptr;
@@ -1705,7 +1708,7 @@ void __back_ref<_CharT>::__exec(__state& __s) const {
// __back_ref_icase
-template <class _CharT, class _Traits>
+template <class _CharT, class _Traits, bool _UnmatchedAlwaysSucceed>
class __back_ref_icase : public __owns_one_state<_CharT> {
typedef __owns_one_state<_CharT> base;
@@ -1721,8 +1724,8 @@ public:
_LIBCPP_HIDE_FROM_ABI_VIRTUAL virtual void __exec(__state&) const;
};
-template <class _CharT, class _Traits>
-void __back_ref_icase<_CharT, _Traits>::__exec(__state& __s) const {
+template <class _CharT, class _Traits, bool _UnmatchedAlwaysSucceed>
+void __back_ref_icase<_CharT, _Traits, _UnmatchedAlwaysSucceed>::__exec(__state& __s) const {
sub_match<const _CharT*>& __sm = __s.__sub_matches_[__mexp_ - 1];
if (__sm.matched) {
ptrdiff_t __len = __sm.second - __sm.first;
@@ -1739,6 +1742,11 @@ void __back_ref_icase<_CharT, _Traits>::__exec(__state& __s) const {
__s.__node_ = nullptr;
}
} else {
+ if constexpr (_UnmatchedAlwaysSucceed) {
+ __s.__do_ = __state::__accept_but_not_consume;
+ __s.__node_ = this->first();
+ return;
+ }
__not_equal:
__s.__do_ = __state::__reject;
__s.__node_ = nullptr;
@@ -1747,7 +1755,7 @@ void __back_ref_icase<_CharT, _Traits>::__exec(__state& __s) const {
// __back_ref_collate
-template <class _CharT, class _Traits>
+template <class _CharT, class _Traits, bool _UnmatchedAlwaysSucceed>
class __back_ref_collate : public __owns_one_state<_CharT> {
typedef __owns_one_state<_CharT> base;
@@ -1763,8 +1771,8 @@ public:
_LIBCPP_HIDE_FROM_ABI_VIRTUAL virtual void __exec(__state&) const;
};
-template <class _CharT, class _Traits>
-void __back_ref_collate<_CharT, _Traits>::__exec(__state& __s) const {
+template <class _CharT, class _Traits, bool _UnmatchedAlwaysSucceed>
+void __back_ref_collate<_CharT, _Traits, _UnmatchedAlwaysSucceed>::__exec(__state& __s) const {
sub_match<const _CharT*>& __sm = __s.__sub_matches_[__mexp_ - 1];
if (__sm.matched) {
ptrdiff_t __len = __sm.second - __sm.first;
@@ -1781,6 +1789,11 @@ void __back_ref_collate<_CharT, _Traits>::__exec(__state& __s) const {
__s.__node_ = nullptr;
}
} else {
+ if constexpr (_UnmatchedAlwaysSucceed) {
+ __s.__do_ = __state::__accept_but_not_consume;
+ __s.__node_ = this->first();
+ return;
+ }
__not_equal:
__s.__do_ = __state::__reject;
__s.__node_ = nullptr;
@@ -2565,6 +2578,7 @@ private:
bool __greedy = true);
__bracket_expression<_CharT, _Traits>* __start_matching_list(bool __negate);
void __push_char(value_type __c);
+ template <bool _UnmatchedAlwaysSucceed = false>
void __push_back_ref(int __i);
void __push_alternation(__owns_one_state<_CharT>* __sa, __owns_one_state<_CharT>* __sb);
void __push_begin_marked_subexpression();
@@ -3807,7 +3821,7 @@ basic_regex<_CharT, _Traits>::__parse_decimal_escape(_ForwardIterator __first, _
}
if (__v == 0 || __v > mark_count())
std::__throw_regex_error<regex_constants::error_backref>();
- __push_back_ref(__v);
+ __push_back_ref<true>(__v);
}
}
return __first;
@@ -4149,13 +4163,14 @@ void basic_regex<_CharT, _Traits>::__push_word_boundary(bool __invert) {
}
template <class _CharT, class _Traits>
+template <bool _UnmatchedAlwaysSucceed>
void basic_regex<_CharT, _Traits>::__push_back_ref(int __i) {
if (flags() & icase)
- __end_->first() = new __back_ref_icase<_CharT, _Traits>(__traits_, __i, __end_->first());
+ __end_->first() = new __back_ref_icase<_CharT, _Traits, _UnmatchedAlwaysSucceed>(__traits_, __i, __end_->first());
else if (flags() & collate)
- __end_->first() = new __back_ref_collate<_CharT, _Traits>(__traits_, __i, __end_->first());
+ __end_->first() = new __back_ref_collate<_CharT, _Traits, _UnmatchedAlwaysSucceed>(__traits_, __i, __end_->first());
else
- __end_->first() = new __back_ref<_CharT>(__i, __end_->first());
+ __end_->first() = new __back_ref<_CharT, _UnmatchedAlwaysSucceed>(__i, __end_->first());
__end_ = static_cast<__owns_one_state<_CharT>*>(__end_->first());
}
diff --git a/libcxx/test/std/re/re.alg/re.alg.search/ecma.pass.cpp b/libcxx/test/std/re/re.alg/re.alg.search/ecma.pass.cpp
index 518c27e424484..a6a1a9cf87c24 100644
--- a/libcxx/test/std/re/re.alg/re.alg.search/ecma.pass.cpp
+++ b/libcxx/test/std/re/re.alg/re.alg.search/ecma.pass.cpp
@@ -762,6 +762,21 @@ int main(int, char**)
assert(m.position(0) == 0);
assert(m.str(0) == s);
}
+ {
+ std::cmatch m;
+ const char s[] = "a";
+ assert(std::regex_search(s, m, std::regex{"(a()|)\\2a"}));
+ assert(m.size() == 3);
+ assert(!m.prefix().matched);
+ assert(m.prefix().first == s);
+ assert(m.prefix().second == m[0].first);
+ assert(!m.suffix().matched);
+ assert(m.suffix().first == m[0].second);
+ assert(m.suffix().second == s + std::char_traits<char>::length(s));
+ assert(m.length(0) >= 0 && static_cast<std::size_t>(m.length(0)) == std::char_traits<char>::length(s));
+ assert(m.position(0) == 0);
+ assert(m.str(0) == s);
+ }
#ifndef TEST_HAS_NO_WIDE_CHARACTERS
{
@@ -1503,6 +1518,21 @@ int main(int, char**)
assert(m.position(0) == 0);
assert(m.str(0) == s);
}
+ {
+ std::wcmatch m;
+ const wchar_t s[] = L"a";
+ assert(std::regex_search(s, m, std::wregex{L"(a()|)\\2a"}));
+ assert(m.size() == 3);
+ assert(!m.prefix().matched);
+ assert(m.prefix().first == s);
+ assert(m.prefix().second == m[0].first);
+ assert(!m.suffix().matched);
+ assert(m.suffix().first == m[0].second);
+ assert(m.suffix().second == s + std::char_traits<wchar_t>::length(s));
+ assert(m.length(0) >= 0 && static_cast<std::size_t>(m.length(0)) == std::char_traits<wchar_t>::length(s));
+ assert(m.position(0) == 0);
+ assert(m.str(0) == s);
+ }
#endif // TEST_HAS_NO_WIDE_CHARACTERS
return 0;
More information about the libcxx-commits
mailing list