[libcxx-commits] [libcxx] [libc++] Optimize std::{, ranges}::{fill, fill_n} for segmented iterators (PR #132665)
Peng Liu via libcxx-commits
libcxx-commits at lists.llvm.org
Wed Jun 25 10:38:04 PDT 2025
https://github.com/winner245 updated https://github.com/llvm/llvm-project/pull/132665
>From cb5382d279becd64a94fbc8925b5ba97256b4bee Mon Sep 17 00:00:00 2001
From: Peng Liu <winner245 at hotmail.com>
Date: Sat, 14 Jun 2025 12:29:42 -0400
Subject: [PATCH 1/2] Optimize {std,ranges}::{fill,fill_n} for segmented
iterators
---
libcxx/docs/ReleaseNotes/21.rst | 4 ++
libcxx/include/__algorithm/fill.h | 38 +++++++++++----
libcxx/include/__algorithm/fill_n.h | 48 +++++++++++++++----
libcxx/include/__algorithm/ranges_fill.h | 13 ++---
.../alg.fill/fill.pass.cpp | 23 +++++++++
.../alg.fill/fill_n.pass.cpp | 23 +++++++++
.../alg.fill/ranges.fill.pass.cpp | 22 +++++++++
.../alg.fill/ranges.fill_n.pass.cpp | 22 +++++++++
8 files changed, 169 insertions(+), 24 deletions(-)
diff --git a/libcxx/docs/ReleaseNotes/21.rst b/libcxx/docs/ReleaseNotes/21.rst
index c52bc54f412bd..527fe713ef0a2 100644
--- a/libcxx/docs/ReleaseNotes/21.rst
+++ b/libcxx/docs/ReleaseNotes/21.rst
@@ -81,6 +81,10 @@ Improvements and New Features
- The ``bitset::to_string`` function has been optimized, resulting in a performance improvement of up to 8.3x for bitsets
with uniformly distributed zeros and ones, and up to 13.5x and 16.1x for sparse and dense bitsets, respectively.
+- The ``std::{fill, fill_n}``, ``std::ranges::{fill, fill_n}`` algorithms have been optimized for segmented iterators,
+ resulting in a performance improvement of at least 10x for ``std::deque<int>`` iterators and
+ ``std::join_view<std::vector<std::vector<int>>>`` iterators.
+
Deprecations and Removals
-------------------------
diff --git a/libcxx/include/__algorithm/fill.h b/libcxx/include/__algorithm/fill.h
index 1ce3eadb013d0..c77ae9a661704 100644
--- a/libcxx/include/__algorithm/fill.h
+++ b/libcxx/include/__algorithm/fill.h
@@ -10,8 +10,11 @@
#define _LIBCPP___ALGORITHM_FILL_H
#include <__algorithm/fill_n.h>
+#include <__algorithm/for_each_segment.h>
#include <__config>
#include <__iterator/iterator_traits.h>
+#include <__iterator/segmented_iterator.h>
+#include <__type_traits/enable_if.h>
#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
# pragma GCC system_header
@@ -21,23 +24,42 @@ _LIBCPP_BEGIN_NAMESPACE_STD
// fill isn't specialized for std::memset, because the compiler already optimizes the loop to a call to std::memset.
-template <class _ForwardIterator, class _Tp>
-inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void
-__fill(_ForwardIterator __first, _ForwardIterator __last, const _Tp& __value, forward_iterator_tag) {
+template <class _ForwardIterator, class _Sentinel, class _Tp>
+inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _ForwardIterator
+__fill(_ForwardIterator __first, _Sentinel __last, const _Tp& __value) {
for (; __first != __last; ++__first)
*__first = __value;
+ return __first;
}
-template <class _RandomAccessIterator, class _Tp>
-inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void
-__fill(_RandomAccessIterator __first, _RandomAccessIterator __last, const _Tp& __value, random_access_iterator_tag) {
- std::fill_n(__first, __last - __first, __value);
+template <class _RandomAccessIterator,
+ class _Tp,
+ __enable_if_t<__has_random_access_iterator_category<_RandomAccessIterator>::value &&
+ !__is_segmented_iterator<_RandomAccessIterator>::value,
+ int> = 0>
+inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _RandomAccessIterator
+__fill(_RandomAccessIterator __first, _RandomAccessIterator __last, const _Tp& __value) {
+ return std::__fill_n(__first, __last - __first, __value);
+}
+
+#ifndef _LIBCPP_CXX03_LANG
+template <class _SegmentedIterator,
+ class _Tp,
+ __enable_if_t<__is_segmented_iterator<_SegmentedIterator>::value, int> = 0>
+inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _SegmentedIterator
+__fill(_SegmentedIterator __first, _SegmentedIterator __last, const _Tp& __value) {
+ using __local_iterator_t = typename __segmented_iterator_traits<_SegmentedIterator>::__local_iterator;
+ std::__for_each_segment(__first, __last, [&](__local_iterator_t __lfirst, __local_iterator_t __llast) {
+ std::__fill(__lfirst, __llast, __value);
+ });
+ return __last;
}
+#endif // !_LIBCPP_CXX03_LANG
template <class _ForwardIterator, class _Tp>
inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void
fill(_ForwardIterator __first, _ForwardIterator __last, const _Tp& __value) {
- std::__fill(__first, __last, __value, typename iterator_traits<_ForwardIterator>::iterator_category());
+ std::__fill(__first, __last, __value);
}
_LIBCPP_END_NAMESPACE_STD
diff --git a/libcxx/include/__algorithm/fill_n.h b/libcxx/include/__algorithm/fill_n.h
index 0da78e1f38c4c..7c932b1d026a8 100644
--- a/libcxx/include/__algorithm/fill_n.h
+++ b/libcxx/include/__algorithm/fill_n.h
@@ -9,10 +9,16 @@
#ifndef _LIBCPP___ALGORITHM_FILL_N_H
#define _LIBCPP___ALGORITHM_FILL_N_H
+#include <__algorithm/for_each_n_segment.h>
#include <__algorithm/min.h>
#include <__config>
#include <__fwd/bit_reference.h>
+#include <__iterator/iterator_traits.h>
+#include <__iterator/segmented_iterator.h>
#include <__memory/pointer_traits.h>
+#include <__type_traits/disjunction.h>
+#include <__type_traits/enable_if.h>
+#include <__type_traits/negation.h>
#include <__utility/convert_to_integral.h>
#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
@@ -26,9 +32,39 @@ _LIBCPP_BEGIN_NAMESPACE_STD
// fill_n isn't specialized for std::memset, because the compiler already optimizes the loop to a call to std::memset.
-template <class _OutputIterator, class _Size, class _Tp>
+template <class _OutputIterator,
+ class _Size,
+ class _Tp
+#ifndef _LIBCPP_CXX03_LANG
+ ,
+ __enable_if_t<_Or< _Not<__is_segmented_iterator<_OutputIterator> >,
+ _Not<__has_random_access_local_iterator<_OutputIterator> > >::value,
+ int> = 0
+#endif
+ >
inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _OutputIterator
-__fill_n(_OutputIterator __first, _Size __n, const _Tp& __value);
+__fill_n(_OutputIterator __first, _Size __n, const _Tp& __value) {
+ for (; __n > 0; ++__first, (void)--__n)
+ *__first = __value;
+ return __first;
+}
+
+#ifndef _LIBCPP_CXX03_LANG
+template < class _OutputIterator,
+ class _Size,
+ class _Tp,
+ __enable_if_t<__is_segmented_iterator<_OutputIterator>::value &&
+ __has_random_access_iterator_category<
+ typename __segmented_iterator_traits<_OutputIterator>::__local_iterator>::value,
+ int> = 0>
+inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _OutputIterator
+__fill_n(_OutputIterator __first, _Size __n, const _Tp& __value) {
+ using __local_iterator_t = typename __segmented_iterator_traits<_OutputIterator>::__local_iterator;
+ return std::__for_each_n_segment(__first, __n, [&](__local_iterator_t __lfirst, __local_iterator_t __llast) {
+ std::__fill_n(__lfirst, __llast - __lfirst, __value);
+ });
+}
+#endif // !_LIBCPP_CXX03_LANG
template <bool _FillVal, class _Cp>
_LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void
@@ -68,14 +104,6 @@ __fill_n(__bit_iterator<_Cp, false> __first, _Size __n, const bool& __value) {
return __first + __n;
}
-template <class _OutputIterator, class _Size, class _Tp>
-inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _OutputIterator
-__fill_n(_OutputIterator __first, _Size __n, const _Tp& __value) {
- for (; __n > 0; ++__first, (void)--__n)
- *__first = __value;
- return __first;
-}
-
template <class _OutputIterator, class _Size, class _Tp>
inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _OutputIterator
fill_n(_OutputIterator __first, _Size __n, const _Tp& __value) {
diff --git a/libcxx/include/__algorithm/ranges_fill.h b/libcxx/include/__algorithm/ranges_fill.h
index c248009f98fe3..814ae6363fcf0 100644
--- a/libcxx/include/__algorithm/ranges_fill.h
+++ b/libcxx/include/__algorithm/ranges_fill.h
@@ -9,12 +9,14 @@
#ifndef _LIBCPP___ALGORITHM_RANGES_FILL_H
#define _LIBCPP___ALGORITHM_RANGES_FILL_H
-#include <__algorithm/ranges_fill_n.h>
+#include <__algorithm/fill.h>
+#include <__algorithm/fill_n.h>
#include <__config>
#include <__iterator/concepts.h>
#include <__ranges/access.h>
#include <__ranges/concepts.h>
#include <__ranges/dangling.h>
+#include <__utility/move.h>
#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
# pragma GCC system_header
@@ -31,12 +33,11 @@ namespace ranges {
struct __fill {
template <class _Type, output_iterator<const _Type&> _Iter, sentinel_for<_Iter> _Sent>
_LIBCPP_HIDE_FROM_ABI constexpr _Iter operator()(_Iter __first, _Sent __last, const _Type& __value) const {
- if constexpr (random_access_iterator<_Iter> && sized_sentinel_for<_Sent, _Iter>) {
- return ranges::fill_n(__first, __last - __first, __value);
+ if constexpr (sized_sentinel_for<_Sent, _Iter>) {
+ auto __n = __last - __first;
+ return std::__fill_n(std::move(__first), __n, __value);
} else {
- for (; __first != __last; ++__first)
- *__first = __value;
- return __first;
+ return std::__fill(std::move(__first), std::move(__last), __value);
}
}
diff --git a/libcxx/test/std/algorithms/alg.modifying.operations/alg.fill/fill.pass.cpp b/libcxx/test/std/algorithms/alg.modifying.operations/alg.fill/fill.pass.cpp
index 9b403db85ebf9..b0a74b81f8c0f 100644
--- a/libcxx/test/std/algorithms/alg.modifying.operations/alg.fill/fill.pass.cpp
+++ b/libcxx/test/std/algorithms/alg.modifying.operations/alg.fill/fill.pass.cpp
@@ -17,6 +17,8 @@
#include <array>
#include <cassert>
#include <cstddef>
+#include <deque>
+#include <ranges>
#include <vector>
#include "sized_allocator.h"
@@ -93,6 +95,13 @@ TEST_CONSTEXPR_CXX20 bool test_vector_bool(std::size_t N) {
return true;
}
+/*TEST_CONSTEXPR_CXX26*/ void test_deque() { // TODO: Mark as TEST_CONSTEXPR_CXX26 once std::deque is constexpr
+ std::deque<int> in(20);
+ std::deque<int> expected(in.size(), 42);
+ std::fill(in.begin(), in.end(), 42);
+ assert(in == expected);
+}
+
TEST_CONSTEXPR_CXX20 bool test() {
types::for_each(types::forward_iterator_list<char*>(), Test<char>());
types::for_each(types::forward_iterator_list<int*>(), Test<int>());
@@ -138,6 +147,20 @@ TEST_CONSTEXPR_CXX20 bool test() {
}
}
+ if (!TEST_IS_CONSTANT_EVALUATED) // TODO: Use TEST_STD_AT_LEAST_26_OR_RUNTIME_EVALUATED when std::deque is made constexpr
+ test_deque();
+
+#if TEST_STD_VER >= 20
+ { // Verify that join_view of vectors work properly.
+ std::vector<std::vector<int>> v{{1, 2}, {1, 2, 3}, {}, {3, 4, 5}, {6}, {7, 8, 9, 6}, {0, 1, 2, 3, 0, 1, 2}};
+ auto jv = std::ranges::join_view(v);
+ std::fill(jv.begin(), jv.end(), 42);
+ for (const auto& vec : v)
+ for (auto n : vec)
+ assert(n == 42);
+ }
+#endif
+
return true;
}
diff --git a/libcxx/test/std/algorithms/alg.modifying.operations/alg.fill/fill_n.pass.cpp b/libcxx/test/std/algorithms/alg.modifying.operations/alg.fill/fill_n.pass.cpp
index e42172ceb960d..5dc9b901db075 100644
--- a/libcxx/test/std/algorithms/alg.modifying.operations/alg.fill/fill_n.pass.cpp
+++ b/libcxx/test/std/algorithms/alg.modifying.operations/alg.fill/fill_n.pass.cpp
@@ -17,6 +17,8 @@
#include <array>
#include <cassert>
#include <cstddef>
+#include <deque>
+#include <ranges>
#include <vector>
#include "sized_allocator.h"
@@ -126,6 +128,13 @@ TEST_CONSTEXPR_CXX20 bool test_vector_bool(std::size_t N) {
return true;
}
+/*TEST_CONSTEXPR_CXX26*/ void test_deque() { // TODO: Mark as TEST_CONSTEXPR_CXX26 once std::deque is constexpr
+ std::deque<int> in(20);
+ std::deque<int> expected(in.size(), 42);
+ std::fill_n(in.begin(), in.size(), 42);
+ assert(in == expected);
+}
+
TEST_CONSTEXPR_CXX20 bool test() {
types::for_each(types::forward_iterator_list<char*>(), Test<char>());
types::for_each(types::forward_iterator_list<int*>(), Test<int>());
@@ -221,6 +230,20 @@ TEST_CONSTEXPR_CXX20 bool test() {
}
}
+ if (!TEST_IS_CONSTANT_EVALUATED) // TODO: Use TEST_STD_AT_LEAST_26_OR_RUNTIME_EVALUATED when std::deque is made constexpr
+ test_deque();
+
+#if TEST_STD_VER >= 20
+ {
+ std::vector<std::vector<int>> v{{1, 2}, {1, 2, 3}, {}, {3, 4, 5}, {6}, {7, 8, 9, 6}, {0, 1, 2, 3, 0, 1, 2}};
+ auto jv = std::ranges::join_view(v);
+ std::fill_n(jv.begin(), std::distance(jv.begin(), jv.end()), 42);
+ for (const auto& vec : v)
+ for (auto n : vec)
+ assert(n == 42);
+ }
+#endif
+
return true;
}
diff --git a/libcxx/test/std/algorithms/alg.modifying.operations/alg.fill/ranges.fill.pass.cpp b/libcxx/test/std/algorithms/alg.modifying.operations/alg.fill/ranges.fill.pass.cpp
index 61a659fb0028c..7ae0a0665dfbb 100644
--- a/libcxx/test/std/algorithms/alg.modifying.operations/alg.fill/ranges.fill.pass.cpp
+++ b/libcxx/test/std/algorithms/alg.modifying.operations/alg.fill/ranges.fill.pass.cpp
@@ -18,6 +18,7 @@
#include <algorithm>
#include <array>
#include <cassert>
+#include <deque>
#include <ranges>
#include <string>
#include <vector>
@@ -128,6 +129,13 @@ constexpr bool test_vector_bool(std::size_t N) {
}
#endif
+/*TEST_CONSTEXPR_CXX26*/ void test_deque() { // TODO: Mark as TEST_CONSTEXPR_CXX26 once std::deque is constexpr
+ std::deque<int> in(20);
+ std::deque<int> expected(in.size(), 42);
+ std::ranges::fill(in, 42);
+ assert(in == expected);
+}
+
constexpr bool test() {
test_iterators<cpp17_output_iterator<int*>, sentinel_wrapper<cpp17_output_iterator<int*>>>();
test_iterators<cpp20_output_iterator<int*>, sentinel_wrapper<cpp20_output_iterator<int*>>>();
@@ -227,6 +235,20 @@ constexpr bool test() {
}
#endif
+ if (!TEST_IS_CONSTANT_EVALUATED) // TODO: Use TEST_STD_AT_LEAST_26_OR_RUNTIME_EVALUATED when std::deque is made constexpr
+ test_deque();
+
+#if TEST_STD_VER >= 20
+ {
+ std::vector<std::vector<int>> v{{1, 2}, {1, 2, 3}, {}, {3, 4, 5}, {6}, {7, 8, 9, 6}, {0, 1, 2, 3, 0, 1, 2}};
+ auto jv = std::ranges::join_view(v);
+ std::ranges::fill(jv, 42);
+ for (const auto& vec : v)
+ for (auto n : vec)
+ assert(n == 42);
+ }
+#endif
+
return true;
}
diff --git a/libcxx/test/std/algorithms/alg.modifying.operations/alg.fill/ranges.fill_n.pass.cpp b/libcxx/test/std/algorithms/alg.modifying.operations/alg.fill/ranges.fill_n.pass.cpp
index 2d6e24a03e0b3..25db892548a6a 100644
--- a/libcxx/test/std/algorithms/alg.modifying.operations/alg.fill/ranges.fill_n.pass.cpp
+++ b/libcxx/test/std/algorithms/alg.modifying.operations/alg.fill/ranges.fill_n.pass.cpp
@@ -16,6 +16,7 @@
#include <algorithm>
#include <array>
#include <cassert>
+#include <deque>
#include <ranges>
#include <string>
#include <vector>
@@ -101,6 +102,13 @@ constexpr bool test_vector_bool(std::size_t N) {
}
#endif
+/*TEST_CONSTEXPR_CXX26*/ void test_deque() { // TODO: Mark as TEST_CONSTEXPR_CXX26 once std::deque is constexpr
+ std::deque<int> in(20);
+ std::deque<int> expected(in.size(), 42);
+ std::ranges::fill_n(std::ranges::begin(in), std::ranges::size(in), 42);
+ assert(in == expected);
+}
+
constexpr bool test() {
test_iterators<cpp17_output_iterator<int*>, sentinel_wrapper<cpp17_output_iterator<int*>>>();
test_iterators<cpp20_output_iterator<int*>, sentinel_wrapper<cpp20_output_iterator<int*>>>();
@@ -175,6 +183,20 @@ constexpr bool test() {
}
#endif
+ if (!TEST_IS_CONSTANT_EVALUATED) // TODO: Use TEST_STD_AT_LEAST_26_OR_RUNTIME_EVALUATED when std::deque is made constexpr
+ test_deque();
+
+#if TEST_STD_VER >= 20
+ {
+ std::vector<std::vector<int>> v{{1, 2}, {1, 2, 3}, {}, {3, 4, 5}, {6}, {7, 8, 9, 6}, {0, 1, 2, 3, 0, 1, 2}};
+ auto jv = std::ranges::join_view(v);
+ std::ranges::fill_n(std::ranges::begin(jv), std::ranges::distance(jv), 42);
+ for (const auto& vec : v)
+ for (auto n : vec)
+ assert(n == 42);
+ }
+#endif
+
return true;
}
>From f780dcc443b8d5edfab50cd293751ee8b1d85d07 Mon Sep 17 00:00:00 2001
From: Peng Liu <winner245 at hotmail.com>
Date: Wed, 25 Jun 2025 12:30:09 -0400
Subject: [PATCH 2/2] Improve readability of SFINAE for std::__fill_n
---
libcxx/include/__algorithm/fill_n.h | 11 +++++------
1 file changed, 5 insertions(+), 6 deletions(-)
diff --git a/libcxx/include/__algorithm/fill_n.h b/libcxx/include/__algorithm/fill_n.h
index 7c932b1d026a8..fe929b7a7bc2d 100644
--- a/libcxx/include/__algorithm/fill_n.h
+++ b/libcxx/include/__algorithm/fill_n.h
@@ -16,7 +16,7 @@
#include <__iterator/iterator_traits.h>
#include <__iterator/segmented_iterator.h>
#include <__memory/pointer_traits.h>
-#include <__type_traits/disjunction.h>
+#include <__type_traits/conjunction.h>
#include <__type_traits/enable_if.h>
#include <__type_traits/negation.h>
#include <__utility/convert_to_integral.h>
@@ -37,8 +37,8 @@ template <class _OutputIterator,
class _Tp
#ifndef _LIBCPP_CXX03_LANG
,
- __enable_if_t<_Or< _Not<__is_segmented_iterator<_OutputIterator> >,
- _Not<__has_random_access_local_iterator<_OutputIterator> > >::value,
+ __enable_if_t<!_And<__is_segmented_iterator<_OutputIterator>,
+ __has_random_access_local_iterator<_OutputIterator> >::value,
int> = 0
#endif
>
@@ -53,9 +53,8 @@ __fill_n(_OutputIterator __first, _Size __n, const _Tp& __value) {
template < class _OutputIterator,
class _Size,
class _Tp,
- __enable_if_t<__is_segmented_iterator<_OutputIterator>::value &&
- __has_random_access_iterator_category<
- typename __segmented_iterator_traits<_OutputIterator>::__local_iterator>::value,
+ __enable_if_t<_And<__is_segmented_iterator<_OutputIterator>,
+ __has_random_access_local_iterator<_OutputIterator> >::value,
int> = 0>
inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _OutputIterator
__fill_n(_OutputIterator __first, _Size __n, const _Tp& __value) {
More information about the libcxx-commits
mailing list