[libcxx-commits] [libcxx] [libc++][regex] Correctly adjust match prefix for zero-length matches. (PR #94550)
Konstantin Varlamov via libcxx-commits
libcxx-commits at lists.llvm.org
Wed Jun 5 23:49:46 PDT 2024
================
@@ -17,102 +17,149 @@
#include <iterator>
#include "test_macros.h"
-int main(int, char**)
-{
- {
- std::regex phone_numbers("\\d{3}-\\d{4}");
- const char phone_book[] = "555-1234, 555-2345, 555-3456";
- std::cregex_iterator i(std::begin(phone_book), std::end(phone_book), phone_numbers);
- std::cregex_iterator i2 = i;
- assert(i != std::cregex_iterator());
- assert(i2!= std::cregex_iterator());
- assert((*i).size() == 1);
- assert((*i).position() == 0);
- assert((*i).str() == "555-1234");
- assert((*i2).size() == 1);
- assert((*i2).position() == 0);
- assert((*i2).str() == "555-1234");
- i++;
- assert(i != std::cregex_iterator());
- assert(i2!= std::cregex_iterator());
- assert((*i).size() == 1);
- assert((*i).position() == 10);
- assert((*i).str() == "555-2345");
- assert((*i2).size() == 1);
- assert((*i2).position() == 0);
- assert((*i2).str() == "555-1234");
- i++;
- assert(i != std::cregex_iterator());
- assert(i2!= std::cregex_iterator());
- assert((*i).size() == 1);
- assert((*i).position() == 20);
- assert((*i).str() == "555-3456");
- assert((*i2).size() == 1);
- assert((*i2).position() == 0);
- assert((*i2).str() == "555-1234");
- i++;
- assert(i == std::cregex_iterator());
- assert(i2!= std::cregex_iterator());
- assert((*i2).size() == 1);
- assert((*i2).position() == 0);
- assert((*i2).str() == "555-1234");
- }
- {
- std::regex phone_numbers("\\d{3}-\\d{4}");
- const char phone_book[] = "555-1234, 555-2345, 555-3456";
- std::cregex_iterator i(std::begin(phone_book), std::end(phone_book), phone_numbers);
- std::cregex_iterator i2 = i;
- assert(i != std::cregex_iterator());
- assert(i2!= std::cregex_iterator());
- assert((*i).size() == 1);
- assert((*i).position() == 0);
- assert((*i).str() == "555-1234");
- assert((*i2).size() == 1);
- assert((*i2).position() == 0);
- assert((*i2).str() == "555-1234");
- ++i;
- assert(i != std::cregex_iterator());
- assert(i2!= std::cregex_iterator());
- assert((*i).size() == 1);
- assert((*i).position() == 10);
- assert((*i).str() == "555-2345");
- assert((*i2).size() == 1);
- assert((*i2).position() == 0);
- assert((*i2).str() == "555-1234");
- ++i;
- assert(i != std::cregex_iterator());
- assert(i2!= std::cregex_iterator());
- assert((*i).size() == 1);
- assert((*i).position() == 20);
- assert((*i).str() == "555-3456");
- assert((*i2).size() == 1);
- assert((*i2).position() == 0);
- assert((*i2).str() == "555-1234");
- ++i;
- assert(i == std::cregex_iterator());
- assert(i2!= std::cregex_iterator());
- assert((*i2).size() == 1);
- assert((*i2).position() == 0);
- assert((*i2).str() == "555-1234");
- }
- { // https://llvm.org/PR33681
- std::regex rex(".*");
- const char foo[] = "foo";
+int main(int, char**) {
+ {
+ std::regex phone_numbers("\\d{3}-\\d{4}");
+ const char phone_book[] = "555-1234, 555-2345, 555-3456";
+ std::cregex_iterator i(std::begin(phone_book), std::end(phone_book), phone_numbers);
+ std::cregex_iterator i2 = i;
+ assert(i != std::cregex_iterator());
+ assert(i2 != std::cregex_iterator());
+ assert((*i).size() == 1);
+ assert((*i).position() == 0);
+ assert((*i).str() == "555-1234");
+ assert((*i2).size() == 1);
+ assert((*i2).position() == 0);
+ assert((*i2).str() == "555-1234");
+ i++;
+ assert(i != std::cregex_iterator());
+ assert(i2 != std::cregex_iterator());
+ assert((*i).size() == 1);
+ assert((*i).position() == 10);
+ assert((*i).str() == "555-2345");
+ assert((*i2).size() == 1);
+ assert((*i2).position() == 0);
+ assert((*i2).str() == "555-1234");
+ i++;
+ assert(i != std::cregex_iterator());
+ assert(i2 != std::cregex_iterator());
+ assert((*i).size() == 1);
+ assert((*i).position() == 20);
+ assert((*i).str() == "555-3456");
+ assert((*i2).size() == 1);
+ assert((*i2).position() == 0);
+ assert((*i2).str() == "555-1234");
+ i++;
+ assert(i == std::cregex_iterator());
+ assert(i2 != std::cregex_iterator());
+ assert((*i2).size() == 1);
+ assert((*i2).position() == 0);
+ assert((*i2).str() == "555-1234");
+ }
+ {
+ std::regex phone_numbers("\\d{3}-\\d{4}");
+ const char phone_book[] = "555-1234, 555-2345, 555-3456";
+ std::cregex_iterator i(std::begin(phone_book), std::end(phone_book), phone_numbers);
+ std::cregex_iterator i2 = i;
+ assert(i != std::cregex_iterator());
+ assert(i2 != std::cregex_iterator());
+ assert((*i).size() == 1);
+ assert((*i).position() == 0);
+ assert((*i).str() == "555-1234");
+ assert((*i2).size() == 1);
+ assert((*i2).position() == 0);
+ assert((*i2).str() == "555-1234");
+ ++i;
+ assert(i != std::cregex_iterator());
+ assert(i2 != std::cregex_iterator());
+ assert((*i).size() == 1);
+ assert((*i).position() == 10);
+ assert((*i).str() == "555-2345");
+ assert((*i2).size() == 1);
+ assert((*i2).position() == 0);
+ assert((*i2).str() == "555-1234");
+ ++i;
+ assert(i != std::cregex_iterator());
+ assert(i2 != std::cregex_iterator());
+ assert((*i).size() == 1);
+ assert((*i).position() == 20);
+ assert((*i).str() == "555-3456");
+ assert((*i2).size() == 1);
+ assert((*i2).position() == 0);
+ assert((*i2).str() == "555-1234");
+ ++i;
+ assert(i == std::cregex_iterator());
+ assert(i2 != std::cregex_iterator());
+ assert((*i2).size() == 1);
+ assert((*i2).position() == 0);
+ assert((*i2).str() == "555-1234");
+ }
+ { // https://llvm.org/PR33681
+ std::regex rex(".*");
+ const char foo[] = "foo";
// The -1 is because we don't want the implicit null from the array.
- std::cregex_iterator i(std::begin(foo), std::end(foo) - 1, rex);
- std::cregex_iterator e;
- assert(i != e);
- assert((*i).size() == 1);
- assert((*i).str() == "foo");
-
- ++i;
- assert(i != e);
- assert((*i).size() == 1);
- assert((*i).str() == "");
-
- ++i;
- assert(i == e);
- }
+ std::cregex_iterator i(std::begin(foo), std::end(foo) - 1, rex);
+ std::cregex_iterator e;
+ assert(i != e);
+ assert((*i).size() == 1);
+ assert((*i).str() == "foo");
+
+ ++i;
+ assert(i != e);
+ assert((*i).size() == 1);
+ assert((*i).str() == "");
+
+ ++i;
+ assert(i == e);
+ }
+
+ {
+ // Check that we correctly adjust the match prefix when dealing with zero-length matches -- this is explicitly
+ // required by the Standard ([re.regiter.incr]: "In all cases in which the call to `regex_search` returns true,
+ // `match.prefix().first` shall be equal to the previous value of `match[0].second`"). For a pattern that matches
+ // empty sequences, there is an implicit zero-length match between every character in a string -- make sure the
+ // prefix of each of these matches (except the first one) is the preceding character.
+
+ auto validate = [](const std::regex& empty_matching_pattern) {
----------------
var-const wrote:
Update: I actually didn't realize we support `<regex>` in the C++03 mode. Would you prefer to change this to a helper function, or to make the test C++11 and above? (perhaps split into a separate file)
https://github.com/llvm/llvm-project/pull/94550
More information about the libcxx-commits
mailing list