[libcxx-commits] [libcxx] [libc++] Fix `match_prev_avail` implementation in `std::regex_search` (PR #79773)
Sanjay Marreddi via libcxx-commits
libcxx-commits at lists.llvm.org
Sun Jan 28 16:33:28 PST 2024
https://github.com/SanjayMarreddi created https://github.com/llvm/llvm-project/pull/79773
The implementation of the `match_prev_avail` flag in the `regex_search` is regressed after the fixed issue #41544. It resulted in many wrong search results especially those involving `"^"` regex pattern.
Fixes #74838
>From d09c469f717dee5f159787cd9b259c49bceddf2c Mon Sep 17 00:00:00 2001
From: SanjayMarreddi <sanjay.mareddi at gmail.com>
Date: Mon, 29 Jan 2024 00:25:48 +0000
Subject: [PATCH] [libc++] Fix `match_prev_avail` implementation in
`std::regex_search`
---
libcxx/include/regex | 15 +-
.../re.matchflag/match_prev_avail.pass.cpp | 198 +++++++++++++-----
2 files changed, 156 insertions(+), 57 deletions(-)
diff --git a/libcxx/include/regex b/libcxx/include/regex
index 48af5b8b57fd649..e43dc5b54c5975b 100644
--- a/libcxx/include/regex
+++ b/libcxx/include/regex
@@ -5100,8 +5100,19 @@ bool basic_regex<_CharT, _Traits>::__search(
const _CharT* __last,
match_results<const _CharT*, _Allocator>& __m,
regex_constants::match_flag_type __flags) const {
- if (__flags & regex_constants::match_prev_avail)
- __flags &= ~(regex_constants::match_not_bol | regex_constants::match_not_bow);
+ if (__flags & regex_constants::match_prev_avail){
+ if (flags() & std::regex_constants::multiline){
+ if (*__first != '\n' && *__first != '\r'){
+ __flags |= std::regex_constants::match_not_bol;
+ }
+ }
+ else{
+ __flags |= std::regex_constants::match_not_bol;
+ }
+ if (isalnum(*__first)) {
+ __flags |= std::regex_constants::match_not_bow;
+ }
+ }
__m.__init(1 + mark_count(), __first, __last, __flags & regex_constants::__no_update_pos);
if (__match_at_start(__first, __last, __m, __flags, !(__flags & regex_constants::__no_update_pos))) {
diff --git a/libcxx/test/std/re/re.const/re.matchflag/match_prev_avail.pass.cpp b/libcxx/test/std/re/re.const/re.matchflag/match_prev_avail.pass.cpp
index 508b8dd43be9538..47198bac69b75ee 100644
--- a/libcxx/test/std/re/re.const/re.matchflag/match_prev_avail.pass.cpp
+++ b/libcxx/test/std/re/re.const/re.matchflag/match_prev_avail.pass.cpp
@@ -18,67 +18,155 @@
#include <cassert>
#include <string>
+template <class It>
+void test(It start,
+ It end,
+ char const* regex,
+ std::regex_constants::match_flag_type flags,
+ bool expect_match,
+ int expect_pos = 0,
+ int expect_len = 0,
+ bool multiline = false) {
+ std::smatch match;
+ std::regex re(regex, multiline ? std::regex::multiline : std::regex::ECMAScript);
+ if (expect_match) {
+ assert(std::regex_search(start, end, match, re, flags));
+ assert(match.position(0) == expect_pos);
+ assert(match.length(0) == expect_len);
+ } else {
+ assert(!std::regex_search(start, end, match, re, flags));
+ }
+}
+
int main(int, char**) {
- char str1[] = "\na";
- auto str1_scnd = str1 + 1;
+ // The implementation of `match_prev_avail` is being corrected as per the discussions in the issue #74838.
+ {
+ std::string s = "ab";
+ test(s.cbegin() + 1, s.cend(), "^", std::regex_constants::match_default, true, 0, 0);
+ test(s.cbegin() + 1, s.cend(), "^", std::regex_constants::match_not_bol, false);
+ test(s.cbegin() + 1, s.cend(), "^", std::regex_constants::match_prev_avail, false);
+ test(s.cbegin() + 1,
+ s.cend(),
+ "^",
+ std::regex_constants::match_prev_avail | std::regex_constants::match_not_bol,
+ false);
+ }
- // Assert that match_prev_avail disables match_not_bol and this matches
- assert(std::regex_match(str1 + 1, str1 + 2, std::regex("^a"),
- std::regex_constants::match_not_bol |
- std::regex_constants::match_prev_avail));
- // Manually passing match_prev_avail defines that --str1 is a valid position
- assert(std::regex_match(str1_scnd, std::regex("a"),
- std::regex_constants::match_not_bol |
- std::regex_constants::match_prev_avail));
+ {
+ std::string s = "ab";
+ test(s.cbegin(), s.cend(), "^ab", std::regex_constants::match_default, true, 0, 2);
+ test(s.cbegin(), s.cend(), "^ab", std::regex_constants::match_not_bol, false);
+ }
- //Assert that match_prev_avail disables match_not_bow and this matches
- assert(std::regex_search(str1, std::regex("\\ba")));
- assert(std::regex_match(str1 + 1, str1 + 2, std::regex("\\ba\\b"),
- std::regex_constants::match_not_bow |
- std::regex_constants::match_prev_avail));
- assert(std::regex_search(str1_scnd, std::regex("\\ba"),
- std::regex_constants::match_not_bow |
- std::regex_constants::match_prev_avail));
+ {
+ std::string s = "ab";
+ test(s.cbegin() + 1, s.cend(), "^b", std::regex_constants::match_default, true, 0, 1);
+ test(s.cbegin() + 1, s.cend(), "^b", std::regex_constants::match_not_bol, false);
+ test(s.cbegin() + 1, s.cend(), "^b", std::regex_constants::match_prev_avail, false);
+ test(s.cbegin() + 1,
+ s.cend(),
+ "^b",
+ std::regex_constants::match_prev_avail | std::regex_constants::match_not_bol,
+ false);
+ }
- //Assert that match_prev_avail disables both match_not_bow and match_not_bol
- assert(std::regex_match(str1 + 1, str1 + 2, std::regex("^a"),
- std::regex_constants::match_not_bol |
- std::regex_constants::match_not_bow |
- std::regex_constants::match_prev_avail));
- assert(std::regex_match(str1_scnd, std::regex("\\ba"),
- std::regex_constants::match_not_bol |
- std::regex_constants::match_not_bow |
- std::regex_constants::match_prev_avail));
+ {
+ std::string s = "ab\nb";
+ test(s.cbegin() + 1, s.cend(), "^b", std::regex_constants::match_default, true, 0, 1, true);
+ test(s.cbegin() + 1, s.cend(), "^b", std::regex_constants::match_not_bol, true, 2, 1, true); // TODO
+ test(s.cbegin() + 1, s.cend(), "^b", std::regex_constants::match_prev_avail, true, 2, 1, true);
+ test(s.cbegin() + 1,
+ s.cend(),
+ "^b",
+ std::regex_constants::match_prev_avail | std::regex_constants::match_not_bol,
+ true,
+ 2,
+ 1,
+ true);
+ }
- // pr 42199
- std::string S = " cd";
- std::string::iterator Start = S.begin() + 1;
- std::string::iterator End = S.end();
- assert(std::regex_search(Start, End, std::regex("^cd")));
+ {
+ std::string s = "\na";
+ test(s.cbegin() + 1,
+ s.cend(),
+ "^a",
+ std::regex_constants::match_not_bol | std::regex_constants::match_prev_avail,
+ false);
+ test(s.cbegin() + 1,
+ s.cend(),
+ "a",
+ std::regex_constants::match_not_bol | std::regex_constants::match_prev_avail,
+ true,
+ 0,
+ 1);
- assert(!std::regex_search(Start, End, std::regex("^cd"),
- std::regex_constants::match_not_bol));
- assert(!std::regex_search(Start, End, std::regex(".*\\bcd\\b"),
- std::regex_constants::match_not_bow));
- assert(!std::regex_search(Start, End, std::regex("^cd"),
- std::regex_constants::match_not_bol |
- std::regex_constants::match_not_bow));
- assert(!std::regex_search(Start, End, std::regex(".*\\bcd\\b"),
- std::regex_constants::match_not_bol |
- std::regex_constants::match_not_bow));
+ test(s.cbegin() + 1,
+ s.cend(),
+ "\\ba",
+ std::regex_constants::match_not_bow | std::regex_constants::match_prev_avail,
+ true,
+ 0,
+ 1);
+ test(s.cbegin() + 1,
+ s.cend(),
+ "\\ba\\b",
+ std::regex_constants::match_not_bow | std::regex_constants::match_prev_avail,
+ true,
+ 0,
+ 1);
- assert(std::regex_search(Start, End, std::regex("^cd"),
- std::regex_constants::match_prev_avail));
+ test(s.cbegin() + 1,
+ s.cend(),
+ "^a",
+ std::regex_constants::match_not_bol | std::regex_constants::match_not_bow |
+ std::regex_constants::match_prev_avail,
+ false);
+ test(s.cbegin() + 1,
+ s.cend(),
+ "\\ba",
+ std::regex_constants::match_not_bol | std::regex_constants::match_not_bow |
+ std::regex_constants::match_prev_avail,
+ true,
+ 0,
+ 1);
+ }
- assert(std::regex_search(Start, End, std::regex("^cd"),
- std::regex_constants::match_not_bol |
- std::regex_constants::match_prev_avail));
- assert(std::regex_search(Start, End, std::regex("^cd"),
- std::regex_constants::match_not_bow |
- std::regex_constants::match_prev_avail));
- assert(std::regex_match(Start, End, std::regex("\\bcd\\b"),
- std::regex_constants::match_not_bol |
- std::regex_constants::match_not_bow |
- std::regex_constants::match_prev_avail));
+ {
+ // pr 42199
+ std::string s = " cd";
+ test(s.cbegin() + 1, s.cend(), "^cd", std::regex_constants::match_default, true, 0, 2);
+ test(s.cbegin() + 1, s.cend(), "^cd", std::regex_constants::match_not_bol, false);
+ test(s.cbegin() + 1, s.cend(), ".*\\bcd\\b", std::regex_constants::match_not_bow, false);
+ test(s.cbegin() + 1,
+ s.cend(),
+ "^cd",
+ std::regex_constants::match_not_bol | std::regex_constants::match_not_bow,
+ false);
+ test(s.cbegin() + 1,
+ s.cend(),
+ ".*\\bcd\\b",
+ std::regex_constants::match_not_bol | std::regex_constants::match_not_bow,
+ false);
+
+ test(s.cbegin() + 1, s.cend(), "^cd", std::regex_constants::match_prev_avail, false);
+ test(s.cbegin() + 1,
+ s.cend(),
+ "^cd",
+ std::regex_constants::match_not_bol | std::regex_constants::match_prev_avail,
+ false);
+ test(s.cbegin() + 1,
+ s.cend(),
+ "^cd",
+ std::regex_constants::match_not_bow | std::regex_constants::match_prev_avail,
+ false);
+ test(s.cbegin() + 1,
+ s.cend(),
+ "\\bcd\\b",
+ std::regex_constants::match_not_bol | std::regex_constants::match_not_bow |
+ std::regex_constants::match_prev_avail,
+ true,
+ 0,
+ 2);
+ }
return 0;
-}
+}
\ No newline at end of file
More information about the libcxx-commits
mailing list