[libcxx-commits] [libcxx] [libc++] Fix `match_prev_avail` implementation in `std::regex_search` (PR #79773)

Sanjay Marreddi via libcxx-commits libcxx-commits at lists.llvm.org
Sun Jan 28 16:33:28 PST 2024


https://github.com/SanjayMarreddi created https://github.com/llvm/llvm-project/pull/79773

The implementation of the `match_prev_avail` flag in the `regex_search` is regressed after the fixed issue #41544. It resulted in many wrong search results especially those involving `"^"` regex pattern.

Fixes #74838

>From d09c469f717dee5f159787cd9b259c49bceddf2c Mon Sep 17 00:00:00 2001
From: SanjayMarreddi <sanjay.mareddi at gmail.com>
Date: Mon, 29 Jan 2024 00:25:48 +0000
Subject: [PATCH] [libc++] Fix `match_prev_avail` implementation in
 `std::regex_search`

---
 libcxx/include/regex                          |  15 +-
 .../re.matchflag/match_prev_avail.pass.cpp    | 198 +++++++++++++-----
 2 files changed, 156 insertions(+), 57 deletions(-)

diff --git a/libcxx/include/regex b/libcxx/include/regex
index 48af5b8b57fd649..e43dc5b54c5975b 100644
--- a/libcxx/include/regex
+++ b/libcxx/include/regex
@@ -5100,8 +5100,19 @@ bool basic_regex<_CharT, _Traits>::__search(
     const _CharT* __last,
     match_results<const _CharT*, _Allocator>& __m,
     regex_constants::match_flag_type __flags) const {
-  if (__flags & regex_constants::match_prev_avail)
-    __flags &= ~(regex_constants::match_not_bol | regex_constants::match_not_bow);
+  if (__flags & regex_constants::match_prev_avail){
+    if (flags() & std::regex_constants::multiline){
+      if (*__first != '\n'  && *__first != '\r'){
+        __flags |= std::regex_constants::match_not_bol;
+      }
+    }
+    else{
+      __flags |= std::regex_constants::match_not_bol;
+    }
+    if (isalnum(*__first)) {
+      __flags |= std::regex_constants::match_not_bow;
+    }
+  }
 
   __m.__init(1 + mark_count(), __first, __last, __flags & regex_constants::__no_update_pos);
   if (__match_at_start(__first, __last, __m, __flags, !(__flags & regex_constants::__no_update_pos))) {
diff --git a/libcxx/test/std/re/re.const/re.matchflag/match_prev_avail.pass.cpp b/libcxx/test/std/re/re.const/re.matchflag/match_prev_avail.pass.cpp
index 508b8dd43be9538..47198bac69b75ee 100644
--- a/libcxx/test/std/re/re.const/re.matchflag/match_prev_avail.pass.cpp
+++ b/libcxx/test/std/re/re.const/re.matchflag/match_prev_avail.pass.cpp
@@ -18,67 +18,155 @@
 #include <cassert>
 #include <string>
 
+template <class It>
+void test(It start,
+          It end,
+          char const* regex,
+          std::regex_constants::match_flag_type flags,
+          bool expect_match,
+          int expect_pos = 0,
+          int expect_len = 0,
+          bool multiline = false) {
+  std::smatch match;
+  std::regex re(regex, multiline ? std::regex::multiline : std::regex::ECMAScript);
+  if (expect_match) {
+    assert(std::regex_search(start, end, match, re, flags));
+    assert(match.position(0) == expect_pos);
+    assert(match.length(0) == expect_len);
+  } else {
+    assert(!std::regex_search(start, end, match, re, flags));
+  }
+}
+
 int main(int, char**) {
-  char str1[] = "\na";
-  auto str1_scnd = str1 + 1;
+  // The implementation of `match_prev_avail` is being corrected as per the discussions in the issue #74838.
+  {
+    std::string s = "ab";
+    test(s.cbegin() + 1, s.cend(), "^", std::regex_constants::match_default, true, 0, 0);
+    test(s.cbegin() + 1, s.cend(), "^", std::regex_constants::match_not_bol, false);
+    test(s.cbegin() + 1, s.cend(), "^", std::regex_constants::match_prev_avail, false);
+    test(s.cbegin() + 1,
+         s.cend(),
+         "^",
+         std::regex_constants::match_prev_avail | std::regex_constants::match_not_bol,
+         false);
+  }
 
-  // Assert that match_prev_avail disables match_not_bol and this matches
-  assert(std::regex_match(str1 + 1, str1 + 2, std::regex("^a"),
-                     std::regex_constants::match_not_bol |
-                         std::regex_constants::match_prev_avail));
-  // Manually passing match_prev_avail defines that --str1 is a valid position
-  assert(std::regex_match(str1_scnd, std::regex("a"),
-                     std::regex_constants::match_not_bol |
-                         std::regex_constants::match_prev_avail));
+  {
+    std::string s = "ab";
+    test(s.cbegin(), s.cend(), "^ab", std::regex_constants::match_default, true, 0, 2);
+    test(s.cbegin(), s.cend(), "^ab", std::regex_constants::match_not_bol, false);
+  }
 
-  //Assert that match_prev_avail disables match_not_bow and this matches
-  assert(std::regex_search(str1, std::regex("\\ba")));
-  assert(std::regex_match(str1 + 1, str1 + 2, std::regex("\\ba\\b"),
-                     std::regex_constants::match_not_bow |
-                         std::regex_constants::match_prev_avail));
-  assert(std::regex_search(str1_scnd, std::regex("\\ba"),
-                      std::regex_constants::match_not_bow |
-                          std::regex_constants::match_prev_avail));
+  {
+    std::string s = "ab";
+    test(s.cbegin() + 1, s.cend(), "^b", std::regex_constants::match_default, true, 0, 1);
+    test(s.cbegin() + 1, s.cend(), "^b", std::regex_constants::match_not_bol, false);
+    test(s.cbegin() + 1, s.cend(), "^b", std::regex_constants::match_prev_avail, false);
+    test(s.cbegin() + 1,
+         s.cend(),
+         "^b",
+         std::regex_constants::match_prev_avail | std::regex_constants::match_not_bol,
+         false);
+  }
 
-  //Assert that match_prev_avail disables both match_not_bow and match_not_bol
-  assert(std::regex_match(str1 + 1, str1 + 2, std::regex("^a"),
-                     std::regex_constants::match_not_bol |
-                         std::regex_constants::match_not_bow |
-                         std::regex_constants::match_prev_avail));
-  assert(std::regex_match(str1_scnd, std::regex("\\ba"),
-                     std::regex_constants::match_not_bol |
-                         std::regex_constants::match_not_bow |
-                         std::regex_constants::match_prev_avail));
+  {
+    std::string s = "ab\nb";
+    test(s.cbegin() + 1, s.cend(), "^b", std::regex_constants::match_default, true, 0, 1, true);
+    test(s.cbegin() + 1, s.cend(), "^b", std::regex_constants::match_not_bol, true, 2, 1, true); // TODO
+    test(s.cbegin() + 1, s.cend(), "^b", std::regex_constants::match_prev_avail, true, 2, 1, true);
+    test(s.cbegin() + 1,
+         s.cend(),
+         "^b",
+         std::regex_constants::match_prev_avail | std::regex_constants::match_not_bol,
+         true,
+         2,
+         1,
+         true);
+  }
 
-  // pr 42199
-  std::string S = " cd";
-  std::string::iterator Start = S.begin() + 1;
-  std::string::iterator End = S.end();
-  assert(std::regex_search(Start, End, std::regex("^cd")));
+  {
+    std::string s = "\na";
+    test(s.cbegin() + 1,
+         s.cend(),
+         "^a",
+         std::regex_constants::match_not_bol | std::regex_constants::match_prev_avail,
+         false);
+    test(s.cbegin() + 1,
+         s.cend(),
+         "a",
+         std::regex_constants::match_not_bol | std::regex_constants::match_prev_avail,
+         true,
+         0,
+         1);
 
-  assert(!std::regex_search(Start, End, std::regex("^cd"),
-            std::regex_constants::match_not_bol));
-  assert(!std::regex_search(Start, End, std::regex(".*\\bcd\\b"),
-            std::regex_constants::match_not_bow));
-  assert(!std::regex_search(Start, End, std::regex("^cd"),
-            std::regex_constants::match_not_bol |
-            std::regex_constants::match_not_bow));
-  assert(!std::regex_search(Start, End, std::regex(".*\\bcd\\b"),
-            std::regex_constants::match_not_bol |
-            std::regex_constants::match_not_bow));
+    test(s.cbegin() + 1,
+         s.cend(),
+         "\\ba",
+         std::regex_constants::match_not_bow | std::regex_constants::match_prev_avail,
+         true,
+         0,
+         1);
+    test(s.cbegin() + 1,
+         s.cend(),
+         "\\ba\\b",
+         std::regex_constants::match_not_bow | std::regex_constants::match_prev_avail,
+         true,
+         0,
+         1);
 
-  assert(std::regex_search(Start, End, std::regex("^cd"),
-            std::regex_constants::match_prev_avail));
+    test(s.cbegin() + 1,
+         s.cend(),
+         "^a",
+         std::regex_constants::match_not_bol | std::regex_constants::match_not_bow |
+             std::regex_constants::match_prev_avail,
+         false);
+    test(s.cbegin() + 1,
+         s.cend(),
+         "\\ba",
+         std::regex_constants::match_not_bol | std::regex_constants::match_not_bow |
+             std::regex_constants::match_prev_avail,
+         true,
+         0,
+         1);
+  }
 
-  assert(std::regex_search(Start, End, std::regex("^cd"),
-            std::regex_constants::match_not_bol |
-            std::regex_constants::match_prev_avail));
-  assert(std::regex_search(Start, End, std::regex("^cd"),
-            std::regex_constants::match_not_bow |
-            std::regex_constants::match_prev_avail));
-  assert(std::regex_match(Start, End, std::regex("\\bcd\\b"),
-            std::regex_constants::match_not_bol |
-            std::regex_constants::match_not_bow |
-            std::regex_constants::match_prev_avail));
+  {
+    // pr 42199
+    std::string s = " cd";
+    test(s.cbegin() + 1, s.cend(), "^cd", std::regex_constants::match_default, true, 0, 2);
+    test(s.cbegin() + 1, s.cend(), "^cd", std::regex_constants::match_not_bol, false);
+    test(s.cbegin() + 1, s.cend(), ".*\\bcd\\b", std::regex_constants::match_not_bow, false);
+    test(s.cbegin() + 1,
+         s.cend(),
+         "^cd",
+         std::regex_constants::match_not_bol | std::regex_constants::match_not_bow,
+         false);
+    test(s.cbegin() + 1,
+         s.cend(),
+         ".*\\bcd\\b",
+         std::regex_constants::match_not_bol | std::regex_constants::match_not_bow,
+         false);
+
+    test(s.cbegin() + 1, s.cend(), "^cd", std::regex_constants::match_prev_avail, false);
+    test(s.cbegin() + 1,
+         s.cend(),
+         "^cd",
+         std::regex_constants::match_not_bol | std::regex_constants::match_prev_avail,
+         false);
+    test(s.cbegin() + 1,
+         s.cend(),
+         "^cd",
+         std::regex_constants::match_not_bow | std::regex_constants::match_prev_avail,
+         false);
+    test(s.cbegin() + 1,
+         s.cend(),
+         "\\bcd\\b",
+         std::regex_constants::match_not_bol | std::regex_constants::match_not_bow |
+             std::regex_constants::match_prev_avail,
+         true,
+         0,
+         2);
+  }
   return 0;
-}
+}
\ No newline at end of file



More information about the libcxx-commits mailing list