[libcxx-commits] [libcxx] [libc++] `std::views::split`: fix handling of empty ranges (LWG4017) (PR #87916)
via libcxx-commits
libcxx-commits at lists.llvm.org
Sun Apr 7 04:13:54 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-libcxx
Author: Jan Kokemüller (jiixyj)
<details>
<summary>Changes</summary>
Currently, `std::views::split` and `std::views::lazy_split` will result in zero output ranges when given an empty input range. This is inconsistent and confusing behavior. It should instead result in one (empty) output range. There is [LWG4017](<https://cplusplus.github.io/LWG/issue4017>) open for this.
To fix this, it is not enough to initialize the `trailing_empty_` boolean with `cur_ == next_.begin()` as proposed in the LWG issue. Instead, there probably need to be _two_ ways to construct the `split_view::iterator`:
- a `split_view::iterator` that is at the end of the range, but will still produce another, empty, trailing range. Constructing it could look like this: `iterator{*this, end, __find_next(end)}`
- a `split_view::iterator` that is at the end of the range, but will _not_ produce another range (it is "properly" at the end). It could look like this: `iterator{*this, end, std::nullopt}`
This patch takes this approach and refactors the `iterator` constructor to take an `optional` like this:
```c++
constexpr __iterator(
split_view<_View, _Pattern>& __parent, iterator_t<_View> __current, optional<subrange<iterator_t<_View>>> __next);
```
Furthermore, the `split_view` class is refactored from holding both the `next_` `subrange` and the `trailing_empty_` boolean to just hold `next_` which is now an `optional<subrange>`. The class invariant is now such that `next_` holds a value iff there is another element in the `split_view`. Therefore, `operator++` must call `next_.reset()` in exactly those situations where the end of the `split_view` is reached. This simplifies the code quite a bit.
A similar refactoring is done for `lazy_split_view`, but there for the `current_` member which is now an `optional<_MaybeCurrent>`. Its `operator++` is a bit more complex, though, and care must be taken to call `current_.reset()` at all the right places.
After the refactoring, `split_view` does not need its own `sentinel` class but can just use `std::default_sentinel_t`. Therefore, the `range.split/sentinel/ctor.parent.pass.cpp` test could be deleted.
---
Full diff: https://github.com/llvm/llvm-project/pull/87916.diff
6 Files Affected:
- (modified) libcxx/include/__ranges/lazy_split_view.h (+32-23)
- (modified) libcxx/include/__ranges/split_view.h (+14-43)
- (modified) libcxx/test/std/ranges/range.adaptors/range.lazy.split/general.pass.cpp (+17-1)
- (modified) libcxx/test/std/ranges/range.adaptors/range.lazy.split/view_interface.pass.cpp (-14)
- (modified) libcxx/test/std/ranges/range.adaptors/range.split/general.pass.cpp (+16)
- (removed) libcxx/test/std/ranges/range.adaptors/range.split/sentinel/ctor.parent.pass.cpp (-49)
``````````diff
diff --git a/libcxx/include/__ranges/lazy_split_view.h b/libcxx/include/__ranges/lazy_split_view.h
index 6aedfdabffe3a8..4f7a4873223497 100644
--- a/libcxx/include/__ranges/lazy_split_view.h
+++ b/libcxx/include/__ranges/lazy_split_view.h
@@ -103,7 +103,9 @@ class lazy_split_view : public view_interface<lazy_split_view<_View, _Pattern>>
_LIBCPP_HIDE_FROM_ABI constexpr auto begin() {
if constexpr (forward_range<_View>) {
- return __outer_iterator < __simple_view<_View> && __simple_view < _Pattern >> {*this, ranges::begin(__base_)};
+ // clang-format off
+ return __outer_iterator<__simple_view<_View> && __simple_view<_Pattern>>{*this, ranges::begin(__base_)};
+ // clang-format on
} else {
__current_.__emplace(ranges::begin(__base_));
return __outer_iterator<false>{*this};
@@ -119,12 +121,14 @@ class lazy_split_view : public view_interface<lazy_split_view<_View, _Pattern>>
_LIBCPP_HIDE_FROM_ABI constexpr auto end()
requires forward_range<_View> && common_range<_View>
{
- return __outer_iterator < __simple_view<_View> && __simple_view < _Pattern >> {*this, ranges::end(__base_)};
+ // clang-format off
+ return __outer_iterator<__simple_view<_View> && __simple_view<_Pattern>>{*this, {}};
+ // clang-format on
}
_LIBCPP_HIDE_FROM_ABI constexpr auto end() const {
if constexpr (forward_range<_View> && forward_range<const _View> && common_range<const _View>) {
- return __outer_iterator<true>{*this, ranges::end(__base_)};
+ return __outer_iterator<true>{*this, {}};
} else {
return default_sentinel;
}
@@ -149,22 +153,23 @@ class lazy_split_view : public view_interface<lazy_split_view<_View, _Pattern>>
using _Parent = __maybe_const<_Const, lazy_split_view>;
using _Base = __maybe_const<_Const, _View>;
- _Parent* __parent_ = nullptr;
- using _MaybeCurrent = _If<forward_range<_View>, iterator_t<_Base>, __empty_cache>;
- _LIBCPP_NO_UNIQUE_ADDRESS _MaybeCurrent __current_ = _MaybeCurrent();
- bool __trailing_empty_ = false;
+ _Parent* __parent_ = nullptr;
+ using _MaybeCurrent = _If<forward_range<_View>, iterator_t<_Base>, __empty_cache>;
+ _LIBCPP_NO_UNIQUE_ADDRESS optional<_MaybeCurrent> __current_ = nullopt;
+ // precondition: `__current_.has_value()`
[[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto& __current() noexcept {
if constexpr (forward_range<_View>) {
- return __current_;
+ return *__current_;
} else {
return *__parent_->__current_;
}
}
+ // precondition: `__current_.has_value()`
[[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr const auto& __current() const noexcept {
if constexpr (forward_range<_View>) {
- return __current_;
+ return *__current_;
} else {
return *__parent_->__current_;
}
@@ -195,9 +200,9 @@ class lazy_split_view : public view_interface<lazy_split_view<_View, _Pattern>>
_LIBCPP_HIDE_FROM_ABI constexpr explicit __outer_iterator(_Parent& __parent)
requires(!forward_range<_Base>)
- : __parent_(std::addressof(__parent)) {}
+ : __parent_(std::addressof(__parent)), __current_(in_place) {}
- _LIBCPP_HIDE_FROM_ABI constexpr __outer_iterator(_Parent& __parent, iterator_t<_Base> __current)
+ _LIBCPP_HIDE_FROM_ABI constexpr __outer_iterator(_Parent& __parent, optional<iterator_t<_Base>> __current)
requires forward_range<_Base>
: __parent_(std::addressof(__parent)), __current_(std::move(__current)) {}
@@ -210,14 +215,16 @@ class lazy_split_view : public view_interface<lazy_split_view<_View, _Pattern>>
_LIBCPP_HIDE_FROM_ABI constexpr __outer_iterator& operator++() {
const auto __end = ranges::end(__parent_->__base_);
if (__current() == __end) {
- __trailing_empty_ = false;
+ __current_.reset();
return *this;
}
const auto [__pbegin, __pend] = ranges::subrange{__parent_->__pattern_};
if (__pbegin == __pend) {
// Empty pattern: split on every element in the input range
- ++__current();
+ if (++__current() == __end) {
+ __current_.reset();
+ }
} else if constexpr (__tiny_range<_Pattern>) {
// One-element pattern: we can use `ranges::find`.
@@ -225,8 +232,8 @@ class lazy_split_view : public view_interface<lazy_split_view<_View, _Pattern>>
if (__current() != __end) {
// Make sure we point to after the separator we just found.
++__current();
- if (__current() == __end)
- __trailing_empty_ = true;
+ } else {
+ __current_.reset();
}
} else {
@@ -235,12 +242,14 @@ class lazy_split_view : public view_interface<lazy_split_view<_View, _Pattern>>
const auto [__b, __p] = ranges::mismatch(__current(), __end, __pbegin, __pend);
if (__p == __pend) {
__current() = __b;
- if (__current() == __end) {
- __trailing_empty_ = true;
- }
- break; // The pattern matched; skip it.
+ // The pattern matched; skip it.
+ return *this;
}
} while (++__current() != __end);
+
+ // We arrived at the end of the range without matching the pattern,
+ // so we are done.
+ __current_.reset();
}
return *this;
@@ -260,12 +269,12 @@ class lazy_split_view : public view_interface<lazy_split_view<_View, _Pattern>>
_LIBCPP_HIDE_FROM_ABI friend constexpr bool operator==(const __outer_iterator& __x, const __outer_iterator& __y)
requires forward_range<_Base>
{
- return __x.__current_ == __y.__current_ && __x.__trailing_empty_ == __y.__trailing_empty_;
+ return __x.__current_ == __y.__current_;
}
_LIBCPP_HIDE_FROM_ABI friend constexpr bool operator==(const __outer_iterator& __x, default_sentinel_t) {
_LIBCPP_ASSERT_NON_NULL(__x.__parent_ != nullptr, "Cannot call comparison on a default-constructed iterator.");
- return __x.__current() == ranges::end(__x.__parent_base()) && !__x.__trailing_empty_;
+ return !__x.__current_.has_value();
}
};
@@ -403,8 +412,8 @@ template <class _Range, class _Pattern>
lazy_split_view(_Range&&, _Pattern&&) -> lazy_split_view<views::all_t<_Range>, views::all_t<_Pattern>>;
template <input_range _Range>
-lazy_split_view(_Range&&, range_value_t<_Range>)
- -> lazy_split_view<views::all_t<_Range>, single_view<range_value_t<_Range>>>;
+lazy_split_view(_Range&&,
+ range_value_t<_Range>) -> lazy_split_view<views::all_t<_Range>, single_view<range_value_t<_Range>>>;
namespace views {
namespace __lazy_split_view {
diff --git a/libcxx/include/__ranges/split_view.h b/libcxx/include/__ranges/split_view.h
index 98f17be04f628f..ba8fbf1343a48f 100644
--- a/libcxx/include/__ranges/split_view.h
+++ b/libcxx/include/__ranges/split_view.h
@@ -15,6 +15,7 @@
#include <__config>
#include <__functional/bind_back.h>
#include <__functional/ranges_operations.h>
+#include <__iterator/default_sentinel.h>
#include <__iterator/indirectly_comparable.h>
#include <__iterator/iterator_traits.h>
#include <__memory/addressof.h>
@@ -58,11 +59,7 @@ class split_view : public view_interface<split_view<_View, _Pattern>> {
template <class, class>
friend struct __iterator;
- template <class, class>
- friend struct __sentinel;
-
struct __iterator;
- struct __sentinel;
_LIBCPP_HIDE_FROM_ABI constexpr subrange<iterator_t<_View>> __find_next(iterator_t<_View> __it) {
auto [__begin, __end] = ranges::search(subrange(__it, ranges::end(__base_)), __pattern_);
@@ -107,7 +104,7 @@ class split_view : public view_interface<split_view<_View, _Pattern>> {
if constexpr (common_range<_View>) {
return __iterator{*this, ranges::end(__base_), {}};
} else {
- return __sentinel{*this};
+ return default_sentinel;
}
}
};
@@ -123,12 +120,9 @@ template <forward_range _View, forward_range _Pattern>
indirectly_comparable<iterator_t<_View>, iterator_t<_Pattern>, ranges::equal_to>
struct split_view<_View, _Pattern>::__iterator {
private:
- split_view* __parent_ = nullptr;
- _LIBCPP_NO_UNIQUE_ADDRESS iterator_t<_View> __cur_ = iterator_t<_View>();
- _LIBCPP_NO_UNIQUE_ADDRESS subrange<iterator_t<_View>> __next_ = subrange<iterator_t<_View>>();
- bool __trailing_empty_ = false;
-
- friend struct __sentinel;
+ split_view* __parent_ = nullptr;
+ _LIBCPP_NO_UNIQUE_ADDRESS iterator_t<_View> __cur_ = iterator_t<_View>();
+ _LIBCPP_NO_UNIQUE_ADDRESS optional<subrange<iterator_t<_View>>> __next_ = nullopt;
public:
using iterator_concept = forward_iterator_tag;
@@ -139,25 +133,20 @@ struct split_view<_View, _Pattern>::__iterator {
_LIBCPP_HIDE_FROM_ABI __iterator() = default;
_LIBCPP_HIDE_FROM_ABI constexpr __iterator(
- split_view<_View, _Pattern>& __parent, iterator_t<_View> __current, subrange<iterator_t<_View>> __next)
+ split_view<_View, _Pattern>& __parent, iterator_t<_View> __current, optional<subrange<iterator_t<_View>>> __next)
: __parent_(std::addressof(__parent)), __cur_(std::move(__current)), __next_(std::move(__next)) {}
_LIBCPP_HIDE_FROM_ABI constexpr iterator_t<_View> base() const { return __cur_; }
- _LIBCPP_HIDE_FROM_ABI constexpr value_type operator*() const { return {__cur_, __next_.begin()}; }
+ _LIBCPP_HIDE_FROM_ABI constexpr value_type operator*() const { return {__cur_, __next_->begin()}; }
_LIBCPP_HIDE_FROM_ABI constexpr __iterator& operator++() {
- __cur_ = __next_.begin();
+ __cur_ = __next_->begin();
if (__cur_ != ranges::end(__parent_->__base_)) {
- __cur_ = __next_.end();
- if (__cur_ == ranges::end(__parent_->__base_)) {
- __trailing_empty_ = true;
- __next_ = {__cur_, __cur_};
- } else {
- __next_ = __parent_->__find_next(__cur_);
- }
+ __cur_ = __next_->end();
+ __next_ = __parent_->__find_next(__cur_);
} else {
- __trailing_empty_ = false;
+ __next_.reset();
}
return *this;
}
@@ -169,29 +158,11 @@ struct split_view<_View, _Pattern>::__iterator {
}
_LIBCPP_HIDE_FROM_ABI friend constexpr bool operator==(const __iterator& __x, const __iterator& __y) {
- return __x.__cur_ == __y.__cur_ && __x.__trailing_empty_ == __y.__trailing_empty_;
- }
-};
-
-template <forward_range _View, forward_range _Pattern>
- requires view<_View> && view<_Pattern> &&
- indirectly_comparable<iterator_t<_View>, iterator_t<_Pattern>, ranges::equal_to>
-struct split_view<_View, _Pattern>::__sentinel {
-private:
- _LIBCPP_NO_UNIQUE_ADDRESS sentinel_t<_View> __end_ = sentinel_t<_View>();
-
- _LIBCPP_HIDE_FROM_ABI static constexpr bool __equals(const __iterator& __x, const __sentinel& __y) {
- return __x.__cur_ == __y.__end_ && !__x.__trailing_empty_;
+ return __x.__cur_ == __y.__cur_ && __x.__next_.has_value() == __y.__next_.has_value();
}
-public:
- _LIBCPP_HIDE_FROM_ABI __sentinel() = default;
-
- _LIBCPP_HIDE_FROM_ABI constexpr explicit __sentinel(split_view<_View, _Pattern>& __parent)
- : __end_(ranges::end(__parent.__base_)) {}
-
- _LIBCPP_HIDE_FROM_ABI friend constexpr bool operator==(const __iterator& __x, const __sentinel& __y) {
- return __equals(__x, __y);
+ _LIBCPP_HIDE_FROM_ABI friend constexpr bool operator==(const __iterator& __x, default_sentinel_t) {
+ return !__x.__next_.has_value();
}
};
diff --git a/libcxx/test/std/ranges/range.adaptors/range.lazy.split/general.pass.cpp b/libcxx/test/std/ranges/range.adaptors/range.lazy.split/general.pass.cpp
index f4e87bb47399ef..e90232f818f4d9 100644
--- a/libcxx/test/std/ranges/range.adaptors/range.lazy.split/general.pass.cpp
+++ b/libcxx/test/std/ranges/range.adaptors/range.lazy.split/general.pass.cpp
@@ -213,6 +213,22 @@ constexpr bool test_string_literals() {
assert(test_with_piping("", long_sep, expected));
}
+ // Splitting an empty `string_view` should result in one (empty) range.
+ // See LWG issue 4017.
+ {
+ std::array expected = {std::string_view()};
+
+ assert(test_function_call(std::string_view(), short_sep, expected));
+ assert(test_with_piping(std::string_view(), short_sep, expected));
+ assert(test_function_call(std::string_view(), long_sep, expected));
+ assert(test_with_piping(std::string_view(), long_sep, expected));
+
+ assert(test_function_call(std::string_view(""), short_sep, expected));
+ assert(test_with_piping(std::string_view(""), short_sep, expected));
+ assert(test_function_call(std::string_view(""), long_sep, expected));
+ assert(test_with_piping(std::string_view(""), long_sep, expected));
+ }
+
// Terminating null in the separator -- the character literal `' '` and the seemingly equivalent string literal `" "`
// are treated differently due to the presence of an implicit `\0` in the latter.
{
@@ -367,7 +383,7 @@ constexpr bool main_test() {
// Empty input.
{
- std::array<std::string_view, 0> expected = {};
+ std::array expected = {""sv};
test_one(""sv, short_sep, expected);
test_one(""sv, long_sep, expected);
}
diff --git a/libcxx/test/std/ranges/range.adaptors/range.lazy.split/view_interface.pass.cpp b/libcxx/test/std/ranges/range.adaptors/range.lazy.split/view_interface.pass.cpp
index 8d5982d59759dd..350fc82d854eaf 100644
--- a/libcxx/test/std/ranges/range.adaptors/range.lazy.split/view_interface.pass.cpp
+++ b/libcxx/test/std/ranges/range.adaptors/range.lazy.split/view_interface.pass.cpp
@@ -31,13 +31,6 @@ constexpr bool test() {
std::ranges::lazy_split_view v("abc def", " ");
assert(!v.empty());
}
-
- {
- // Note: an empty string literal would still produce a non-empty output because the terminating zero is treated as
- // a separate character; hence the use of `string_view`.
- std::ranges::lazy_split_view v(""sv, "");
- assert(v.empty());
- }
}
// operator bool()
@@ -46,13 +39,6 @@ constexpr bool test() {
std::ranges::lazy_split_view v("abc", "");
assert(v);
}
-
- {
- // Note: an empty string literal would still produce a non-empty output because the terminating zero is treated as
- // a separate character; hence the use of `string_view`.
- std::ranges::lazy_split_view v(""sv, "");
- assert(!v);
- }
}
// front()
diff --git a/libcxx/test/std/ranges/range.adaptors/range.split/general.pass.cpp b/libcxx/test/std/ranges/range.adaptors/range.split/general.pass.cpp
index 5389d931f840e7..8b1303cfddf378 100644
--- a/libcxx/test/std/ranges/range.adaptors/range.split/general.pass.cpp
+++ b/libcxx/test/std/ranges/range.adaptors/range.split/general.pass.cpp
@@ -161,6 +161,22 @@ constexpr bool test_string_literals() {
assert(test_with_piping("", long_sep, expected));
}
+ // Splitting an empty `string_view` should result in one (empty) range.
+ // See LWG issue 4017.
+ {
+ std::array expected = {std::string_view()};
+
+ assert(test_function_call(std::string_view(), short_sep, expected));
+ assert(test_with_piping(std::string_view(), short_sep, expected));
+ assert(test_function_call(std::string_view(), long_sep, expected));
+ assert(test_with_piping(std::string_view(), long_sep, expected));
+
+ assert(test_function_call(std::string_view(""), short_sep, expected));
+ assert(test_with_piping(std::string_view(""), short_sep, expected));
+ assert(test_function_call(std::string_view(""), long_sep, expected));
+ assert(test_with_piping(std::string_view(""), long_sep, expected));
+ }
+
// Terminating null in the separator -- the character literal `' '` and the seemingly equivalent string literal `" "`
// are treated differently due to the presence of an implicit `\0` in the latter.
{
diff --git a/libcxx/test/std/ranges/range.adaptors/range.split/sentinel/ctor.parent.pass.cpp b/libcxx/test/std/ranges/range.adaptors/range.split/sentinel/ctor.parent.pass.cpp
deleted file mode 100644
index c89b1ee2bdfce5..00000000000000
--- a/libcxx/test/std/ranges/range.adaptors/range.split/sentinel/ctor.parent.pass.cpp
+++ /dev/null
@@ -1,49 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-// UNSUPPORTED: c++03, c++11, c++14, c++17
-
-// constexpr explicit sentinel(split_view& parent);
-
-#include <cassert>
-#include <ranges>
-#include <type_traits>
-
-#include "test_iterators.h"
-
-// test explicit
-using Range = std::ranges::subrange<int*, sentinel_wrapper<int*>>;
-using SplitView = std::ranges::split_view<Range, std::ranges::single_view<int>>;
-using SplitSent = std::ranges::sentinel_t<SplitView>;
-
-static_assert(std::is_constructible_v<SplitSent, SplitView&>);
-static_assert(!std::is_convertible_v<SplitView&, SplitSent>);
-
-constexpr bool test() {
- {
- int buffer[] = {0, 1, 2};
- Range input{buffer, sentinel_wrapper<int*>(buffer + 3)};
- SplitView sv(input, -1);
- auto it = sv.begin();
-
- SplitSent sent(sv);
- assert(sent != it);
-
- ++it;
- assert(sent == it);
- }
-
- return true;
-}
-
-int main(int, char**) {
- test();
- static_assert(test());
-
- return 0;
-}
``````````
</details>
https://github.com/llvm/llvm-project/pull/87916
More information about the libcxx-commits
mailing list