[libcxx-commits] [libcxx] [libc++] `std::views::split`: fix handling of empty ranges (LWG4017) (PR #87916)

Jan Kokemüller via libcxx-commits libcxx-commits at lists.llvm.org
Sun Apr 7 04:13:27 PDT 2024


https://github.com/jiixyj created https://github.com/llvm/llvm-project/pull/87916

Currently, `std::views::split` and `std::views::lazy_split` will result in zero output ranges when given an empty input range. This is inconsistent and confusing behavior. It should instead result in one (empty) output range. There is [LWG4017](<https://cplusplus.github.io/LWG/issue4017>) open for this.

To fix this, it is not enough to initialize the `trailing_empty_` boolean with `cur_ == next_.begin()` as proposed in the LWG issue. Instead, there probably need to be _two_ ways to construct the `split_view::iterator`:

- a `split_view::iterator` that is at the end of the range, but will still produce another, empty, trailing range. Constructing it could look like this: `iterator{*this, end, __find_next(end)}`
- a `split_view::iterator` that is at the end of the range, but will _not_ produce another range (it is "properly" at the end). It could look like this: `iterator{*this, end, std::nullopt}`

This patch takes this approach and refactors the `iterator` constructor to take an `optional` like this:

```c++
constexpr __iterator(
      split_view<_View, _Pattern>& __parent, iterator_t<_View> __current, optional<subrange<iterator_t<_View>>> __next);
```

Furthermore, the `split_view` class is refactored from holding both the `next_` `subrange` and the `trailing_empty_` boolean to just hold `next_` which is now an `optional<subrange>`. The class invariant is now such that `next_` holds a value iff there is another element in the `split_view`. Therefore, `operator++` must call `next_.reset()` in exactly those situations where the end of the `split_view` is reached. This simplifies the code quite a bit.

A similar refactoring is done for `lazy_split_view`, but there for the `current_` member which is now an `optional<_MaybeCurrent>`. Its `operator++` is a bit more complex, though, and care must be taken to call `current_.reset()` at all the right places.

After the refactoring, `split_view` does not need its own `sentinel` class but can just use `std::default_sentinel_t`. Therefore, the `range.split/sentinel/ctor.parent.pass.cpp` test could be deleted.

>From d11a134fbdf9d4b81597d2ffa8e5494b06934091 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jan=20Kokem=C3=BCller?= <jan.kokemueller at gmail.com>
Date: Sat, 6 Apr 2024 16:17:59 +0200
Subject: [PATCH] make 'views::split' return an empty range when given an empty
 range

---
 libcxx/include/__ranges/lazy_split_view.h     | 55 ++++++++++--------
 libcxx/include/__ranges/split_view.h          | 57 +++++--------------
 .../range.lazy.split/general.pass.cpp         | 18 +++++-
 .../range.lazy.split/view_interface.pass.cpp  | 14 -----
 .../range.split/general.pass.cpp              | 16 ++++++
 .../range.split/sentinel/ctor.parent.pass.cpp | 49 ----------------
 6 files changed, 79 insertions(+), 130 deletions(-)
 delete mode 100644 libcxx/test/std/ranges/range.adaptors/range.split/sentinel/ctor.parent.pass.cpp

diff --git a/libcxx/include/__ranges/lazy_split_view.h b/libcxx/include/__ranges/lazy_split_view.h
index 6aedfdabffe3a8..4f7a4873223497 100644
--- a/libcxx/include/__ranges/lazy_split_view.h
+++ b/libcxx/include/__ranges/lazy_split_view.h
@@ -103,7 +103,9 @@ class lazy_split_view : public view_interface<lazy_split_view<_View, _Pattern>>
 
   _LIBCPP_HIDE_FROM_ABI constexpr auto begin() {
     if constexpr (forward_range<_View>) {
-      return __outer_iterator < __simple_view<_View> && __simple_view < _Pattern >> {*this, ranges::begin(__base_)};
+      // clang-format off
+      return __outer_iterator<__simple_view<_View> && __simple_view<_Pattern>>{*this, ranges::begin(__base_)};
+      // clang-format on
     } else {
       __current_.__emplace(ranges::begin(__base_));
       return __outer_iterator<false>{*this};
@@ -119,12 +121,14 @@ class lazy_split_view : public view_interface<lazy_split_view<_View, _Pattern>>
   _LIBCPP_HIDE_FROM_ABI constexpr auto end()
     requires forward_range<_View> && common_range<_View>
   {
-    return __outer_iterator < __simple_view<_View> && __simple_view < _Pattern >> {*this, ranges::end(__base_)};
+    // clang-format off
+    return __outer_iterator<__simple_view<_View> && __simple_view<_Pattern>>{*this, {}};
+    // clang-format on
   }
 
   _LIBCPP_HIDE_FROM_ABI constexpr auto end() const {
     if constexpr (forward_range<_View> && forward_range<const _View> && common_range<const _View>) {
-      return __outer_iterator<true>{*this, ranges::end(__base_)};
+      return __outer_iterator<true>{*this, {}};
     } else {
       return default_sentinel;
     }
@@ -149,22 +153,23 @@ class lazy_split_view : public view_interface<lazy_split_view<_View, _Pattern>>
     using _Parent = __maybe_const<_Const, lazy_split_view>;
     using _Base   = __maybe_const<_Const, _View>;
 
-    _Parent* __parent_                                 = nullptr;
-    using _MaybeCurrent                                = _If<forward_range<_View>, iterator_t<_Base>, __empty_cache>;
-    _LIBCPP_NO_UNIQUE_ADDRESS _MaybeCurrent __current_ = _MaybeCurrent();
-    bool __trailing_empty_                             = false;
+    _Parent* __parent_  = nullptr;
+    using _MaybeCurrent = _If<forward_range<_View>, iterator_t<_Base>, __empty_cache>;
+    _LIBCPP_NO_UNIQUE_ADDRESS optional<_MaybeCurrent> __current_ = nullopt;
 
+    // precondition: `__current_.has_value()`
     [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto& __current() noexcept {
       if constexpr (forward_range<_View>) {
-        return __current_;
+        return *__current_;
       } else {
         return *__parent_->__current_;
       }
     }
 
+    // precondition: `__current_.has_value()`
     [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr const auto& __current() const noexcept {
       if constexpr (forward_range<_View>) {
-        return __current_;
+        return *__current_;
       } else {
         return *__parent_->__current_;
       }
@@ -195,9 +200,9 @@ class lazy_split_view : public view_interface<lazy_split_view<_View, _Pattern>>
 
     _LIBCPP_HIDE_FROM_ABI constexpr explicit __outer_iterator(_Parent& __parent)
       requires(!forward_range<_Base>)
-        : __parent_(std::addressof(__parent)) {}
+        : __parent_(std::addressof(__parent)), __current_(in_place) {}
 
-    _LIBCPP_HIDE_FROM_ABI constexpr __outer_iterator(_Parent& __parent, iterator_t<_Base> __current)
+    _LIBCPP_HIDE_FROM_ABI constexpr __outer_iterator(_Parent& __parent, optional<iterator_t<_Base>> __current)
       requires forward_range<_Base>
         : __parent_(std::addressof(__parent)), __current_(std::move(__current)) {}
 
@@ -210,14 +215,16 @@ class lazy_split_view : public view_interface<lazy_split_view<_View, _Pattern>>
     _LIBCPP_HIDE_FROM_ABI constexpr __outer_iterator& operator++() {
       const auto __end = ranges::end(__parent_->__base_);
       if (__current() == __end) {
-        __trailing_empty_ = false;
+        __current_.reset();
         return *this;
       }
 
       const auto [__pbegin, __pend] = ranges::subrange{__parent_->__pattern_};
       if (__pbegin == __pend) {
         // Empty pattern: split on every element in the input range
-        ++__current();
+        if (++__current() == __end) {
+          __current_.reset();
+        }
 
       } else if constexpr (__tiny_range<_Pattern>) {
         // One-element pattern: we can use `ranges::find`.
@@ -225,8 +232,8 @@ class lazy_split_view : public view_interface<lazy_split_view<_View, _Pattern>>
         if (__current() != __end) {
           // Make sure we point to after the separator we just found.
           ++__current();
-          if (__current() == __end)
-            __trailing_empty_ = true;
+        } else {
+          __current_.reset();
         }
 
       } else {
@@ -235,12 +242,14 @@ class lazy_split_view : public view_interface<lazy_split_view<_View, _Pattern>>
           const auto [__b, __p] = ranges::mismatch(__current(), __end, __pbegin, __pend);
           if (__p == __pend) {
             __current() = __b;
-            if (__current() == __end) {
-              __trailing_empty_ = true;
-            }
-            break; // The pattern matched; skip it.
+            // The pattern matched; skip it.
+            return *this;
           }
         } while (++__current() != __end);
+
+        // We arrived at the end of the range without matching the pattern,
+        // so we are done.
+        __current_.reset();
       }
 
       return *this;
@@ -260,12 +269,12 @@ class lazy_split_view : public view_interface<lazy_split_view<_View, _Pattern>>
     _LIBCPP_HIDE_FROM_ABI friend constexpr bool operator==(const __outer_iterator& __x, const __outer_iterator& __y)
       requires forward_range<_Base>
     {
-      return __x.__current_ == __y.__current_ && __x.__trailing_empty_ == __y.__trailing_empty_;
+      return __x.__current_ == __y.__current_;
     }
 
     _LIBCPP_HIDE_FROM_ABI friend constexpr bool operator==(const __outer_iterator& __x, default_sentinel_t) {
       _LIBCPP_ASSERT_NON_NULL(__x.__parent_ != nullptr, "Cannot call comparison on a default-constructed iterator.");
-      return __x.__current() == ranges::end(__x.__parent_base()) && !__x.__trailing_empty_;
+      return !__x.__current_.has_value();
     }
   };
 
@@ -403,8 +412,8 @@ template <class _Range, class _Pattern>
 lazy_split_view(_Range&&, _Pattern&&) -> lazy_split_view<views::all_t<_Range>, views::all_t<_Pattern>>;
 
 template <input_range _Range>
-lazy_split_view(_Range&&, range_value_t<_Range>)
-    -> lazy_split_view<views::all_t<_Range>, single_view<range_value_t<_Range>>>;
+lazy_split_view(_Range&&,
+                range_value_t<_Range>) -> lazy_split_view<views::all_t<_Range>, single_view<range_value_t<_Range>>>;
 
 namespace views {
 namespace __lazy_split_view {
diff --git a/libcxx/include/__ranges/split_view.h b/libcxx/include/__ranges/split_view.h
index 98f17be04f628f..ba8fbf1343a48f 100644
--- a/libcxx/include/__ranges/split_view.h
+++ b/libcxx/include/__ranges/split_view.h
@@ -15,6 +15,7 @@
 #include <__config>
 #include <__functional/bind_back.h>
 #include <__functional/ranges_operations.h>
+#include <__iterator/default_sentinel.h>
 #include <__iterator/indirectly_comparable.h>
 #include <__iterator/iterator_traits.h>
 #include <__memory/addressof.h>
@@ -58,11 +59,7 @@ class split_view : public view_interface<split_view<_View, _Pattern>> {
   template <class, class>
   friend struct __iterator;
 
-  template <class, class>
-  friend struct __sentinel;
-
   struct __iterator;
-  struct __sentinel;
 
   _LIBCPP_HIDE_FROM_ABI constexpr subrange<iterator_t<_View>> __find_next(iterator_t<_View> __it) {
     auto [__begin, __end] = ranges::search(subrange(__it, ranges::end(__base_)), __pattern_);
@@ -107,7 +104,7 @@ class split_view : public view_interface<split_view<_View, _Pattern>> {
     if constexpr (common_range<_View>) {
       return __iterator{*this, ranges::end(__base_), {}};
     } else {
-      return __sentinel{*this};
+      return default_sentinel;
     }
   }
 };
@@ -123,12 +120,9 @@ template <forward_range _View, forward_range _Pattern>
            indirectly_comparable<iterator_t<_View>, iterator_t<_Pattern>, ranges::equal_to>
 struct split_view<_View, _Pattern>::__iterator {
 private:
-  split_view* __parent_                                         = nullptr;
-  _LIBCPP_NO_UNIQUE_ADDRESS iterator_t<_View> __cur_            = iterator_t<_View>();
-  _LIBCPP_NO_UNIQUE_ADDRESS subrange<iterator_t<_View>> __next_ = subrange<iterator_t<_View>>();
-  bool __trailing_empty_                                        = false;
-
-  friend struct __sentinel;
+  split_view* __parent_                                                   = nullptr;
+  _LIBCPP_NO_UNIQUE_ADDRESS iterator_t<_View> __cur_                      = iterator_t<_View>();
+  _LIBCPP_NO_UNIQUE_ADDRESS optional<subrange<iterator_t<_View>>> __next_ = nullopt;
 
 public:
   using iterator_concept  = forward_iterator_tag;
@@ -139,25 +133,20 @@ struct split_view<_View, _Pattern>::__iterator {
   _LIBCPP_HIDE_FROM_ABI __iterator() = default;
 
   _LIBCPP_HIDE_FROM_ABI constexpr __iterator(
-      split_view<_View, _Pattern>& __parent, iterator_t<_View> __current, subrange<iterator_t<_View>> __next)
+      split_view<_View, _Pattern>& __parent, iterator_t<_View> __current, optional<subrange<iterator_t<_View>>> __next)
       : __parent_(std::addressof(__parent)), __cur_(std::move(__current)), __next_(std::move(__next)) {}
 
   _LIBCPP_HIDE_FROM_ABI constexpr iterator_t<_View> base() const { return __cur_; }
 
-  _LIBCPP_HIDE_FROM_ABI constexpr value_type operator*() const { return {__cur_, __next_.begin()}; }
+  _LIBCPP_HIDE_FROM_ABI constexpr value_type operator*() const { return {__cur_, __next_->begin()}; }
 
   _LIBCPP_HIDE_FROM_ABI constexpr __iterator& operator++() {
-    __cur_ = __next_.begin();
+    __cur_ = __next_->begin();
     if (__cur_ != ranges::end(__parent_->__base_)) {
-      __cur_ = __next_.end();
-      if (__cur_ == ranges::end(__parent_->__base_)) {
-        __trailing_empty_ = true;
-        __next_           = {__cur_, __cur_};
-      } else {
-        __next_ = __parent_->__find_next(__cur_);
-      }
+      __cur_  = __next_->end();
+      __next_ = __parent_->__find_next(__cur_);
     } else {
-      __trailing_empty_ = false;
+      __next_.reset();
     }
     return *this;
   }
@@ -169,29 +158,11 @@ struct split_view<_View, _Pattern>::__iterator {
   }
 
   _LIBCPP_HIDE_FROM_ABI friend constexpr bool operator==(const __iterator& __x, const __iterator& __y) {
-    return __x.__cur_ == __y.__cur_ && __x.__trailing_empty_ == __y.__trailing_empty_;
-  }
-};
-
-template <forward_range _View, forward_range _Pattern>
-  requires view<_View> && view<_Pattern> &&
-           indirectly_comparable<iterator_t<_View>, iterator_t<_Pattern>, ranges::equal_to>
-struct split_view<_View, _Pattern>::__sentinel {
-private:
-  _LIBCPP_NO_UNIQUE_ADDRESS sentinel_t<_View> __end_ = sentinel_t<_View>();
-
-  _LIBCPP_HIDE_FROM_ABI static constexpr bool __equals(const __iterator& __x, const __sentinel& __y) {
-    return __x.__cur_ == __y.__end_ && !__x.__trailing_empty_;
+    return __x.__cur_ == __y.__cur_ && __x.__next_.has_value() == __y.__next_.has_value();
   }
 
-public:
-  _LIBCPP_HIDE_FROM_ABI __sentinel() = default;
-
-  _LIBCPP_HIDE_FROM_ABI constexpr explicit __sentinel(split_view<_View, _Pattern>& __parent)
-      : __end_(ranges::end(__parent.__base_)) {}
-
-  _LIBCPP_HIDE_FROM_ABI friend constexpr bool operator==(const __iterator& __x, const __sentinel& __y) {
-    return __equals(__x, __y);
+  _LIBCPP_HIDE_FROM_ABI friend constexpr bool operator==(const __iterator& __x, default_sentinel_t) {
+    return !__x.__next_.has_value();
   }
 };
 
diff --git a/libcxx/test/std/ranges/range.adaptors/range.lazy.split/general.pass.cpp b/libcxx/test/std/ranges/range.adaptors/range.lazy.split/general.pass.cpp
index f4e87bb47399ef..e90232f818f4d9 100644
--- a/libcxx/test/std/ranges/range.adaptors/range.lazy.split/general.pass.cpp
+++ b/libcxx/test/std/ranges/range.adaptors/range.lazy.split/general.pass.cpp
@@ -213,6 +213,22 @@ constexpr bool test_string_literals() {
     assert(test_with_piping("", long_sep, expected));
   }
 
+  // Splitting an empty `string_view` should result in one (empty) range.
+  // See LWG issue 4017.
+  {
+    std::array expected = {std::string_view()};
+
+    assert(test_function_call(std::string_view(), short_sep, expected));
+    assert(test_with_piping(std::string_view(), short_sep, expected));
+    assert(test_function_call(std::string_view(), long_sep, expected));
+    assert(test_with_piping(std::string_view(), long_sep, expected));
+
+    assert(test_function_call(std::string_view(""), short_sep, expected));
+    assert(test_with_piping(std::string_view(""), short_sep, expected));
+    assert(test_function_call(std::string_view(""), long_sep, expected));
+    assert(test_with_piping(std::string_view(""), long_sep, expected));
+  }
+
   // Terminating null in the separator -- the character literal `' '` and the seemingly equivalent string literal `" "`
   // are treated differently due to the presence of an implicit `\0` in the latter.
   {
@@ -367,7 +383,7 @@ constexpr bool main_test() {
 
   // Empty input.
   {
-    std::array<std::string_view, 0> expected = {};
+    std::array expected = {""sv};
     test_one(""sv, short_sep, expected);
     test_one(""sv, long_sep, expected);
   }
diff --git a/libcxx/test/std/ranges/range.adaptors/range.lazy.split/view_interface.pass.cpp b/libcxx/test/std/ranges/range.adaptors/range.lazy.split/view_interface.pass.cpp
index 8d5982d59759dd..350fc82d854eaf 100644
--- a/libcxx/test/std/ranges/range.adaptors/range.lazy.split/view_interface.pass.cpp
+++ b/libcxx/test/std/ranges/range.adaptors/range.lazy.split/view_interface.pass.cpp
@@ -31,13 +31,6 @@ constexpr bool test() {
       std::ranges::lazy_split_view v("abc def", " ");
       assert(!v.empty());
     }
-
-    {
-      // Note: an empty string literal would still produce a non-empty output because the terminating zero is treated as
-      // a separate character; hence the use of `string_view`.
-      std::ranges::lazy_split_view v(""sv, "");
-      assert(v.empty());
-    }
   }
 
   // operator bool()
@@ -46,13 +39,6 @@ constexpr bool test() {
       std::ranges::lazy_split_view v("abc", "");
       assert(v);
     }
-
-    {
-      // Note: an empty string literal would still produce a non-empty output because the terminating zero is treated as
-      // a separate character; hence the use of `string_view`.
-      std::ranges::lazy_split_view v(""sv, "");
-      assert(!v);
-    }
   }
 
   // front()
diff --git a/libcxx/test/std/ranges/range.adaptors/range.split/general.pass.cpp b/libcxx/test/std/ranges/range.adaptors/range.split/general.pass.cpp
index 5389d931f840e7..8b1303cfddf378 100644
--- a/libcxx/test/std/ranges/range.adaptors/range.split/general.pass.cpp
+++ b/libcxx/test/std/ranges/range.adaptors/range.split/general.pass.cpp
@@ -161,6 +161,22 @@ constexpr bool test_string_literals() {
     assert(test_with_piping("", long_sep, expected));
   }
 
+  // Splitting an empty `string_view` should result in one (empty) range.
+  // See LWG issue 4017.
+  {
+    std::array expected = {std::string_view()};
+
+    assert(test_function_call(std::string_view(), short_sep, expected));
+    assert(test_with_piping(std::string_view(), short_sep, expected));
+    assert(test_function_call(std::string_view(), long_sep, expected));
+    assert(test_with_piping(std::string_view(), long_sep, expected));
+
+    assert(test_function_call(std::string_view(""), short_sep, expected));
+    assert(test_with_piping(std::string_view(""), short_sep, expected));
+    assert(test_function_call(std::string_view(""), long_sep, expected));
+    assert(test_with_piping(std::string_view(""), long_sep, expected));
+  }
+
   // Terminating null in the separator -- the character literal `' '` and the seemingly equivalent string literal `" "`
   // are treated differently due to the presence of an implicit `\0` in the latter.
   {
diff --git a/libcxx/test/std/ranges/range.adaptors/range.split/sentinel/ctor.parent.pass.cpp b/libcxx/test/std/ranges/range.adaptors/range.split/sentinel/ctor.parent.pass.cpp
deleted file mode 100644
index c89b1ee2bdfce5..00000000000000
--- a/libcxx/test/std/ranges/range.adaptors/range.split/sentinel/ctor.parent.pass.cpp
+++ /dev/null
@@ -1,49 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-// UNSUPPORTED: c++03, c++11, c++14, c++17
-
-// constexpr explicit sentinel(split_view& parent);
-
-#include <cassert>
-#include <ranges>
-#include <type_traits>
-
-#include "test_iterators.h"
-
-// test explicit
-using Range     = std::ranges::subrange<int*, sentinel_wrapper<int*>>;
-using SplitView = std::ranges::split_view<Range, std::ranges::single_view<int>>;
-using SplitSent = std::ranges::sentinel_t<SplitView>;
-
-static_assert(std::is_constructible_v<SplitSent, SplitView&>);
-static_assert(!std::is_convertible_v<SplitView&, SplitSent>);
-
-constexpr bool test() {
-  {
-    int buffer[] = {0, 1, 2};
-    Range input{buffer, sentinel_wrapper<int*>(buffer + 3)};
-    SplitView sv(input, -1);
-    auto it = sv.begin();
-
-    SplitSent sent(sv);
-    assert(sent != it);
-
-    ++it;
-    assert(sent == it);
-  }
-
-  return true;
-}
-
-int main(int, char**) {
-  test();
-  static_assert(test());
-
-  return 0;
-}



More information about the libcxx-commits mailing list