[libcxx-commits] [libcxx] [libc++] <regex>: Make unmatched backrefs always succeed in ECMAScript mode (PR #154442)

via libcxx-commits libcxx-commits at lists.llvm.org
Tue Aug 19 16:46:49 PDT 2025


https://github.com/SainoNamkho updated https://github.com/llvm/llvm-project/pull/154442

>From d4f97686e86e329ec8f0ec0870b4916dae98782b Mon Sep 17 00:00:00 2001
From: SainoNamkho <23036788+SainoNamkho at users.noreply.github.com>
Date: Wed, 20 Aug 2025 04:07:47 +0800
Subject: [PATCH] [libc++] <regex>: Make unmatched backrefs should always
 succeed in ECMAScript mode.

Fix #154408
---
 libcxx/include/regex                          | 41 +++++++++++++------
 .../std/re/re.alg/re.alg.search/ecma.pass.cpp | 30 ++++++++++++++
 2 files changed, 58 insertions(+), 13 deletions(-)

diff --git a/libcxx/include/regex b/libcxx/include/regex
index 9bbc3a69021b9..ba69f5681a5d4 100644
--- a/libcxx/include/regex
+++ b/libcxx/include/regex
@@ -1668,7 +1668,7 @@ void __end_marked_subexpression<_CharT>::__exec(__state& __s) const {
 
 // __back_ref
 
-template <class _CharT>
+template <class _CharT, bool _UnmatchedAlwaysSucceed>
 class __back_ref : public __owns_one_state<_CharT> {
   typedef __owns_one_state<_CharT> base;
 
@@ -1682,8 +1682,8 @@ public:
   _LIBCPP_HIDE_FROM_ABI_VIRTUAL virtual void __exec(__state&) const;
 };
 
-template <class _CharT>
-void __back_ref<_CharT>::__exec(__state& __s) const {
+template <class _CharT, bool _UnmatchedAlwaysSucceed>
+void __back_ref<_CharT, _UnmatchedAlwaysSucceed>::__exec(__state& __s) const {
   if (__mexp_ > __s.__sub_matches_.size())
     std::__throw_regex_error<regex_constants::error_backref>();
   sub_match<const _CharT*>& __sm = __s.__sub_matches_[__mexp_ - 1];
@@ -1697,6 +1697,9 @@ void __back_ref<_CharT>::__exec(__state& __s) const {
       __s.__do_   = __state::__reject;
       __s.__node_ = nullptr;
     }
+  } else if constexpr (_UnmatchedAlwaysSucceed) {
+    __s.__do_   = __state::__accept_but_not_consume;
+    __s.__node_ = this->first();;
   } else {
     __s.__do_   = __state::__reject;
     __s.__node_ = nullptr;
@@ -1705,7 +1708,7 @@ void __back_ref<_CharT>::__exec(__state& __s) const {
 
 // __back_ref_icase
 
-template <class _CharT, class _Traits>
+template <class _CharT, class _Traits, bool _UnmatchedAlwaysSucceed>
 class __back_ref_icase : public __owns_one_state<_CharT> {
   typedef __owns_one_state<_CharT> base;
 
@@ -1721,8 +1724,8 @@ public:
   _LIBCPP_HIDE_FROM_ABI_VIRTUAL virtual void __exec(__state&) const;
 };
 
-template <class _CharT, class _Traits>
-void __back_ref_icase<_CharT, _Traits>::__exec(__state& __s) const {
+template <class _CharT, class _Traits, bool _UnmatchedAlwaysSucceed>
+void __back_ref_icase<_CharT, _Traits, _UnmatchedAlwaysSucceed>::__exec(__state& __s) const {
   sub_match<const _CharT*>& __sm = __s.__sub_matches_[__mexp_ - 1];
   if (__sm.matched) {
     ptrdiff_t __len = __sm.second - __sm.first;
@@ -1739,6 +1742,11 @@ void __back_ref_icase<_CharT, _Traits>::__exec(__state& __s) const {
       __s.__node_ = nullptr;
     }
   } else {
+    if constexpr (_UnmatchedAlwaysSucceed) {
+      __s.__do_   = __state::__accept_but_not_consume;
+      __s.__node_ = this->first();
+      return;
+    }
   __not_equal:
     __s.__do_   = __state::__reject;
     __s.__node_ = nullptr;
@@ -1747,7 +1755,7 @@ void __back_ref_icase<_CharT, _Traits>::__exec(__state& __s) const {
 
 // __back_ref_collate
 
-template <class _CharT, class _Traits>
+template <class _CharT, class _Traits, bool _UnmatchedAlwaysSucceed>
 class __back_ref_collate : public __owns_one_state<_CharT> {
   typedef __owns_one_state<_CharT> base;
 
@@ -1763,8 +1771,8 @@ public:
   _LIBCPP_HIDE_FROM_ABI_VIRTUAL virtual void __exec(__state&) const;
 };
 
-template <class _CharT, class _Traits>
-void __back_ref_collate<_CharT, _Traits>::__exec(__state& __s) const {
+template <class _CharT, class _Traits, bool _UnmatchedAlwaysSucceed>
+void __back_ref_collate<_CharT, _Traits, _UnmatchedAlwaysSucceed>::__exec(__state& __s) const {
   sub_match<const _CharT*>& __sm = __s.__sub_matches_[__mexp_ - 1];
   if (__sm.matched) {
     ptrdiff_t __len = __sm.second - __sm.first;
@@ -1781,6 +1789,11 @@ void __back_ref_collate<_CharT, _Traits>::__exec(__state& __s) const {
       __s.__node_ = nullptr;
     }
   } else {
+    if constexpr (_UnmatchedAlwaysSucceed) {
+      __s.__do_   = __state::__accept_but_not_consume;
+      __s.__node_ = this->first();
+      return;
+    }
   __not_equal:
     __s.__do_   = __state::__reject;
     __s.__node_ = nullptr;
@@ -2565,6 +2578,7 @@ private:
                    bool __greedy       = true);
   __bracket_expression<_CharT, _Traits>* __start_matching_list(bool __negate);
   void __push_char(value_type __c);
+  template <bool _UnmatchedAlwaysSucceed = false>
   void __push_back_ref(int __i);
   void __push_alternation(__owns_one_state<_CharT>* __sa, __owns_one_state<_CharT>* __sb);
   void __push_begin_marked_subexpression();
@@ -3807,7 +3821,7 @@ basic_regex<_CharT, _Traits>::__parse_decimal_escape(_ForwardIterator __first, _
       }
       if (__v == 0 || __v > mark_count())
         std::__throw_regex_error<regex_constants::error_backref>();
-      __push_back_ref(__v);
+      __push_back_ref<true>(__v);
     }
   }
   return __first;
@@ -4149,13 +4163,14 @@ void basic_regex<_CharT, _Traits>::__push_word_boundary(bool __invert) {
 }
 
 template <class _CharT, class _Traits>
+template <bool _UnmatchedAlwaysSucceed>
 void basic_regex<_CharT, _Traits>::__push_back_ref(int __i) {
   if (flags() & icase)
-    __end_->first() = new __back_ref_icase<_CharT, _Traits>(__traits_, __i, __end_->first());
+    __end_->first() = new __back_ref_icase<_CharT, _Traits, _UnmatchedAlwaysSucceed>(__traits_, __i, __end_->first());
   else if (flags() & collate)
-    __end_->first() = new __back_ref_collate<_CharT, _Traits>(__traits_, __i, __end_->first());
+    __end_->first() = new __back_ref_collate<_CharT, _Traits, _UnmatchedAlwaysSucceed>(__traits_, __i, __end_->first());
   else
-    __end_->first() = new __back_ref<_CharT>(__i, __end_->first());
+    __end_->first() = new __back_ref<_CharT, _UnmatchedAlwaysSucceed>(__i, __end_->first());
   __end_ = static_cast<__owns_one_state<_CharT>*>(__end_->first());
 }
 
diff --git a/libcxx/test/std/re/re.alg/re.alg.search/ecma.pass.cpp b/libcxx/test/std/re/re.alg/re.alg.search/ecma.pass.cpp
index 518c27e424484..a6a1a9cf87c24 100644
--- a/libcxx/test/std/re/re.alg/re.alg.search/ecma.pass.cpp
+++ b/libcxx/test/std/re/re.alg/re.alg.search/ecma.pass.cpp
@@ -762,6 +762,21 @@ int main(int, char**)
         assert(m.position(0) == 0);
         assert(m.str(0) == s);
     }
+    {
+        std::cmatch m;
+        const char s[] = "a";
+        assert(std::regex_search(s, m, std::regex{"(a()|)\\2a"}));
+        assert(m.size() == 3);
+        assert(!m.prefix().matched);
+        assert(m.prefix().first == s);
+        assert(m.prefix().second == m[0].first);
+        assert(!m.suffix().matched);
+        assert(m.suffix().first == m[0].second);
+        assert(m.suffix().second == s + std::char_traits<char>::length(s));
+        assert(m.length(0) >= 0 && static_cast<std::size_t>(m.length(0)) == std::char_traits<char>::length(s));
+        assert(m.position(0) == 0);
+        assert(m.str(0) == s);
+    }
 
 #ifndef TEST_HAS_NO_WIDE_CHARACTERS
     {
@@ -1503,6 +1518,21 @@ int main(int, char**)
         assert(m.position(0) == 0);
         assert(m.str(0) == s);
     }
+    {
+        std::wcmatch m;
+        const wchar_t s[] = L"a";
+        assert(std::regex_search(s, m, std::wregex{L"(a()|)\\2a"}));
+        assert(m.size() == 3);
+        assert(!m.prefix().matched);
+        assert(m.prefix().first == s);
+        assert(m.prefix().second == m[0].first);
+        assert(!m.suffix().matched);
+        assert(m.suffix().first == m[0].second);
+        assert(m.suffix().second == s + std::char_traits<wchar_t>::length(s));
+        assert(m.length(0) >= 0 && static_cast<std::size_t>(m.length(0)) == std::char_traits<wchar_t>::length(s));
+        assert(m.position(0) == 0);
+        assert(m.str(0) == s);
+    }
 #endif // TEST_HAS_NO_WIDE_CHARACTERS
 
   return 0;



More information about the libcxx-commits mailing list