[libcxx-commits] [libcxx] [libc++] Optimize {std, ranges}::distance for segmented iterators (PR #133612)
Peng Liu via libcxx-commits
libcxx-commits at lists.llvm.org
Sat Mar 29 20:32:41 PDT 2025
https://github.com/winner245 created https://github.com/llvm/llvm-project/pull/133612
This patch enhances the performance of `std::distance` and `std::ranges::distance` for non-random-access segmented iterators. The original implementation operates in linear time, `O(n)`, where `n` is the total number of elements. The optimized version reduces this to approximately `O(n / segment_size)` by leveraging segment structure, where `segment_size` is the size of each segment.
The table below summarizes the peak performance improvements observed across different segment sizes, with n ranging up to `1 << 20` (1,048,576 elements), based on benchmark results.
```
----------------------------------------------------------------------------
Contaier/n/segment_size std::distance std::ranges::distance
----------------------------------------------------------------------------
vector<vector<int>>/1048576/256 401.6x 422.9x
deque<deque<int>>/1048576/256 112.1x 132.6x
vector<vector<int>>/1048576/1024 1669.2x 1559.1x
deque<deque<int>>/1048576/1024 487.7x 497.4x
```
## Benchmarks
#### Segment size = 1024
```
-----------------------------------------------------------------------------------------
Benchmark Before After Speedup
-----------------------------------------------------------------------------------------
std::distance(join_view(vector<vector<int>>))/50 38.8 ns 1.01 ns 38.4x
std::distance(join_view(vector<vector<int>>))/1024 660 ns 1.02 ns 647.1x
std::distance(join_view(vector<vector<int>>))/4096 2934 ns 1.98 ns 1481.8x
std::distance(join_view(vector<vector<int>>))/8192 5751 ns 3.92 ns 1466.8x
std::distance(join_view(vector<vector<int>>))/16384 11520 ns 7.06 ns 1631.7x
std::distance(join_view(vector<vector<int>>))/65536 46367 ns 32.2 ns 1440.6x
std::distance(join_view(vector<vector<int>>))/262144 182611 ns 114 ns 1601.9x
std::distance(join_view(vector<vector<int>>))/1048576 737785 ns 442 ns 1669.2x
std::distance(join_view(deque<deque<int>>))/50 53.1 ns 6.13 ns 8.7x
std::distance(join_view(deque<deque<int>>))/1024 854 ns 7.53 ns 113.4x
std::distance(join_view(deque<deque<int>>))/4096 3507 ns 14.7 ns 238.6x
std::distance(join_view(deque<deque<int>>))/8192 7114 ns 17.6 ns 404.2x
std::distance(join_view(deque<deque<int>>))/16384 13997 ns 30.7 ns 455.9x
std::distance(join_view(deque<deque<int>>))/65536 55598 ns 114 ns 487.7x
std::distance(join_view(deque<deque<int>>))/262144 214293 ns 480 ns 446.4x
std::distance(join_view(deque<deque<int>>))/1048576 833000 ns 2183 ns 381.6x
rng::distance(join_view(vector<vector<int>>))/50 39.1 ns 1.10 ns 35.5x
rng::distance(join_view(vector<vector<int>>))/1024 689 ns 1.14 ns 604.4x
rng::distance(join_view(vector<vector<int>>))/4096 2753 ns 2.15 ns 1280.5x
rng::distance(join_view(vector<vector<int>>))/8192 5530 ns 4.61 ns 1199.6x
rng::distance(join_view(vector<vector<int>>))/16384 10968 ns 7.97 ns 1376.2x
rng::distance(join_view(vector<vector<int>>))/65536 46009 ns 35.3 ns 1303.4x
rng::distance(join_view(vector<vector<int>>))/262144 190569 ns 124 ns 1536.9x
rng::distance(join_view(vector<vector<int>>))/1048576 746724 ns 479 ns 1559.1x
rng::distance(join_view(deque<deque<int>>))/50 51.6 ns 6.57 ns 7.9x
rng::distance(join_view(deque<deque<int>>))/1024 826 ns 6.50 ns 127.1x
rng::distance(join_view(deque<deque<int>>))/4096 3323 ns 12.5 ns 265.8x
rng::distance(join_view(deque<deque<int>>))/8192 6619 ns 19.1 ns 346.5x
rng::distance(join_view(deque<deque<int>>))/16384 13495 ns 33.2 ns 406.5x
rng::distance(join_view(deque<deque<int>>))/65536 53668 ns 114 ns 470.8x
rng::distance(join_view(deque<deque<int>>))/262144 236277 ns 475 ns 497.4x
rng::distance(join_view(deque<deque<int>>))/1048576 914177 ns 2157 ns 423.8x
-----------------------------------------------------------------------------------------
```
#### Segment size = 256
```
-----------------------------------------------------------------------------------------
Benchmark Before After Speedup
-----------------------------------------------------------------------------------------
std::distance(join_view(vector<vector<int>>))/50 38.1 ns 1.02 ns 37.4x
std::distance(join_view(vector<vector<int>>))/1024 689 ns 2.06 ns 334.5x
std::distance(join_view(vector<vector<int>>))/4096 2815 ns 7.01 ns 401.6x
std::distance(join_view(vector<vector<int>>))/8192 5507 ns 14.3 ns 385.1x
std::distance(join_view(vector<vector<int>>))/16384 11050 ns 33.7 ns 327.9x
std::distance(join_view(vector<vector<int>>))/65536 44197 ns 118 ns 374.6x
std::distance(join_view(vector<vector<int>>))/262144 175793 ns 449 ns 391.5x
std::distance(join_view(vector<vector<int>>))/1048576 703242 ns 2140 ns 328.7x
std::distance(join_view(deque<deque<int>>))/50 50.2 ns 6.12 ns 8.2x
std::distance(join_view(deque<deque<int>>))/1024 835 ns 11.4 ns 73.2x
std::distance(join_view(deque<deque<int>>))/4096 3353 ns 32.9 ns 101.9x
std::distance(join_view(deque<deque<int>>))/8192 6711 ns 64.2 ns 104.5x
std::distance(join_view(deque<deque<int>>))/16384 13231 ns 118 ns 112.1x
std::distance(join_view(deque<deque<int>>))/65536 53523 ns 556 ns 96.3x
std::distance(join_view(deque<deque<int>>))/262144 219101 ns 2166 ns 101.2x
std::distance(join_view(deque<deque<int>>))/1048576 880277 ns 15852 ns 55.5x
rng::distance(join_view(vector<vector<int>>))/50 37.7 ns 1.13 ns 33.4x
rng::distance(join_view(vector<vector<int>>))/1024 697 ns 2.14 ns 325.7x
rng::distance(join_view(vector<vector<int>>))/4096 2804 ns 7.52 ns 373.0x
rng::distance(join_view(vector<vector<int>>))/8192 5749 ns 15.2 ns 378.2x
rng::distance(join_view(vector<vector<int>>))/16384 11742 ns 34.8 ns 337.4x
rng::distance(join_view(vector<vector<int>>))/65536 47274 ns 116 ns 407.7x
rng::distance(join_view(vector<vector<int>>))/262144 187774 ns 444 ns 422.9x
rng::distance(join_view(vector<vector<int>>))/1048576 749724 ns 2109 ns 355.5x
rng::distance(join_view(deque<deque<int>>))/50 53.0 ns 6.09 ns 8.7x
rng::distance(join_view(deque<deque<int>>))/1024 895 ns 11.0 ns 81.4x
rng::distance(join_view(deque<deque<int>>))/4096 3825 ns 30.6 ns 125.0x
rng::distance(join_view(deque<deque<int>>))/8192 7550 ns 60.5 ns 124.8x
rng::distance(join_view(deque<deque<int>>))/16384 14847 ns 112 ns 132.6x
rng::distance(join_view(deque<deque<int>>))/65536 56888 ns 453 ns 125.6x
rng::distance(join_view(deque<deque<int>>))/262144 231395 ns 2034 ns 113.8x
rng::distance(join_view(deque<deque<int>>))/1048576 933093 ns 15012 ns 62.2x
-----------------------------------------------------------------------------------------
```
>From fda3246c0fb004732d29038c5a14c0c3217f1559 Mon Sep 17 00:00:00 2001
From: Peng Liu <winner245 at hotmail.com>
Date: Sat, 29 Mar 2025 22:29:33 -0400
Subject: [PATCH] Optimize {std,ranges}::distance for segmented iterators
---
libcxx/include/__iterator/distance.h | 41 ++++++---
.../benchmarks/iterators/distance.bench.cpp | 71 +++++++++++++++
.../iterator.operations/distance.pass.cpp | 89 ++++++++++++++-----
3 files changed, 170 insertions(+), 31 deletions(-)
create mode 100644 libcxx/test/benchmarks/iterators/distance.bench.cpp
diff --git a/libcxx/include/__iterator/distance.h b/libcxx/include/__iterator/distance.h
index 1732aa527f64a..726db9cdbc87c 100644
--- a/libcxx/include/__iterator/distance.h
+++ b/libcxx/include/__iterator/distance.h
@@ -10,14 +10,17 @@
#ifndef _LIBCPP___ITERATOR_DISTANCE_H
#define _LIBCPP___ITERATOR_DISTANCE_H
+#include <__algorithm/for_each_segment.h>
#include <__config>
#include <__iterator/concepts.h>
#include <__iterator/incrementable_traits.h>
#include <__iterator/iterator_traits.h>
+#include <__iterator/segmented_iterator.h>
#include <__ranges/access.h>
#include <__ranges/concepts.h>
#include <__ranges/size.h>
#include <__type_traits/decay.h>
+#include <__type_traits/enable_if.h>
#include <__type_traits/remove_cvref.h>
#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
@@ -26,25 +29,38 @@
_LIBCPP_BEGIN_NAMESPACE_STD
-template <class _InputIter>
+template <class _InputIter, class _Sent>
inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 typename iterator_traits<_InputIter>::difference_type
-__distance(_InputIter __first, _InputIter __last, input_iterator_tag) {
+__distance(_InputIter __first, _Sent __last) {
typename iterator_traits<_InputIter>::difference_type __r(0);
for (; __first != __last; ++__first)
++__r;
return __r;
}
-template <class _RandIter>
+template < class _RandIter, __enable_if_t<__has_random_access_iterator_category<_RandIter>::value, int> = 0>
inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 typename iterator_traits<_RandIter>::difference_type
-__distance(_RandIter __first, _RandIter __last, random_access_iterator_tag) {
+__distance(_RandIter __first, _RandIter __last) {
return __last - __first;
}
+template <class _SegmentedIter,
+ __enable_if_t<!__has_random_access_iterator_category<_SegmentedIter>::value &&
+ __is_segmented_iterator<_SegmentedIter>::value,
+ int> = 0>
+inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 typename iterator_traits<_SegmentedIter>::difference_type
+__distance(_SegmentedIter __first, _SegmentedIter __last) {
+ typename iterator_traits<_SegmentedIter>::difference_type __r(0);
+ std::__for_each_segment(__first, __last, [&__r](auto __lfirst, auto __llast) {
+ __r += std::__distance(__lfirst, __llast);
+ });
+ return __r;
+}
+
template <class _InputIter>
inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 typename iterator_traits<_InputIter>::difference_type
distance(_InputIter __first, _InputIter __last) {
- return std::__distance(__first, __last, typename iterator_traits<_InputIter>::iterator_category());
+ return std::__distance(__first, __last);
}
#if _LIBCPP_STD_VER >= 20
@@ -56,12 +72,17 @@ struct __distance {
template <class _Ip, sentinel_for<_Ip> _Sp>
requires(!sized_sentinel_for<_Sp, _Ip>)
_LIBCPP_HIDE_FROM_ABI constexpr iter_difference_t<_Ip> operator()(_Ip __first, _Sp __last) const {
- iter_difference_t<_Ip> __n = 0;
- while (__first != __last) {
- ++__first;
- ++__n;
+ if constexpr (assignable_from<_Ip&, _Sp> && __is_segmented_iterator<_Ip>::value) {
+ auto __iter = std::move(__last);
+ return std::__distance(__first, __iter);
+ } else {
+ iter_difference_t<_Ip> __n = 0;
+ while (__first != __last) {
+ ++__first;
+ ++__n;
+ }
+ return __n;
}
- return __n;
}
template <class _Ip, sized_sentinel_for<decay_t<_Ip>> _Sp>
diff --git a/libcxx/test/benchmarks/iterators/distance.bench.cpp b/libcxx/test/benchmarks/iterators/distance.bench.cpp
new file mode 100644
index 0000000000000..db9e6a9201658
--- /dev/null
+++ b/libcxx/test/benchmarks/iterators/distance.bench.cpp
@@ -0,0 +1,71 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17
+
+#include <cstddef>
+#include <deque>
+#include <iterator>
+#include <ranges>
+#include <vector>
+
+#include <benchmark/benchmark.h>
+
+int main(int argc, char** argv) {
+ auto std_distance = [](auto first, auto last) { return std::distance(first, last); };
+
+ // {std,ranges}::distance
+ {
+ auto bm = []<class Container>(std::string name, auto distance, std::size_t seg_size) {
+ benchmark::RegisterBenchmark(
+ name,
+ [distance, seg_size](auto& st) {
+ std::size_t const size = st.range(0);
+ std::size_t const segments = (size + seg_size - 1) / seg_size;
+ Container c(segments);
+ for (std::size_t i = 0, n = size; i < segments; ++i, n -= seg_size) {
+ c[i].resize(std::min(seg_size, n));
+ }
+
+ auto view = c | std::views::join;
+ auto first = view.begin();
+ auto last = view.end();
+
+ for ([[maybe_unused]] auto _ : st) {
+ benchmark::DoNotOptimize(c);
+ auto result = distance(first, last);
+ benchmark::DoNotOptimize(result);
+ }
+ })
+ ->Arg(50) // non power-of-two
+ ->Arg(1024)
+ ->Arg(4096)
+ ->Arg(8192)
+ ->Arg(1 << 14)
+ ->Arg(1 << 16)
+ ->Arg(1 << 18)
+ ->Arg(1 << 20);
+ };
+ bm.operator()<std::vector<std::vector<int>>>("std::distance(join_view(vector<vector<int>>))", std_distance, 256);
+ bm.operator()<std::deque<std::deque<int>>>("std::distance(join_view(deque<deque<int>>))", std_distance, 256);
+ bm.operator()<std::vector<std::vector<int>>>(
+ "rng::distance(join_view(vector<vector<int>>)", std::ranges::distance, 256);
+ bm.operator()<std::deque<std::deque<int>>>(
+ "rng::distance(join_view(deque<deque<int>>)", std::ranges::distance, 256);
+
+ // bm.operator()<std::vector<std::vector<int>>>("std::distance(join_view(vector<vector<int>>))", std_distance, 1024);
+ // bm.operator()<std::deque<std::deque<int>>>("std::distance(join_view(deque<deque<int>>))", std_distance, 1024);
+ // bm.operator()<std::vector<std::vector<int>>>("rng::distance(join_view(vector<vector<int>>)", std::ranges::distance, 1024);
+ // bm.operator()<std::deque<std::deque<int>>>("rng::distance(join_view(deque<deque<int>>)", std::ranges::distance, 1024);
+ }
+
+ benchmark::Initialize(&argc, argv);
+ benchmark::RunSpecifiedBenchmarks();
+ benchmark::Shutdown();
+ return 0;
+}
diff --git a/libcxx/test/std/iterators/iterator.primitives/iterator.operations/distance.pass.cpp b/libcxx/test/std/iterators/iterator.primitives/iterator.operations/distance.pass.cpp
index 13caefff92365..d3ff573575c1f 100644
--- a/libcxx/test/std/iterators/iterator.primitives/iterator.operations/distance.pass.cpp
+++ b/libcxx/test/std/iterators/iterator.primitives/iterator.operations/distance.pass.cpp
@@ -16,38 +16,85 @@
// Iter::difference_type
// distance(Iter first, Iter last); // constexpr in C++17
-#include <iterator>
+#include <array>
#include <cassert>
+#include <deque>
+#include <iterator>
+#include <vector>
#include <type_traits>
#include "test_macros.h"
#include "test_iterators.h"
template <class It>
-TEST_CONSTEXPR_CXX17
-void check_distance(It first, It last, typename std::iterator_traits<It>::difference_type dist)
-{
- typedef typename std::iterator_traits<It>::difference_type Difference;
- static_assert(std::is_same<decltype(std::distance(first, last)), Difference>::value, "");
- assert(std::distance(first, last) == dist);
+TEST_CONSTEXPR_CXX17 void check_distance(It first, It last, typename std::iterator_traits<It>::difference_type dist) {
+ typedef typename std::iterator_traits<It>::difference_type Difference;
+ static_assert(std::is_same<decltype(std::distance(first, last)), Difference>::value, "");
+ assert(std::distance(first, last) == dist);
}
-TEST_CONSTEXPR_CXX17 bool tests()
-{
- const char* s = "1234567890";
- check_distance(cpp17_input_iterator<const char*>(s), cpp17_input_iterator<const char*>(s+10), 10);
- check_distance(forward_iterator<const char*>(s), forward_iterator<const char*>(s+10), 10);
- check_distance(bidirectional_iterator<const char*>(s), bidirectional_iterator<const char*>(s+10), 10);
- check_distance(random_access_iterator<const char*>(s), random_access_iterator<const char*>(s+10), 10);
- check_distance(s, s+10, 10);
- return true;
+#if TEST_STD_VER >= 20
+template <class It>
+TEST_CONSTEXPR_CXX20 void check_ranges_distance(It first, It last, std::iter_difference_t<It> dist) {
+ using Difference = std::iter_difference_t<It>;
+ static_assert(std::is_same<decltype(std::ranges::distance(first, last)), Difference>::value, "");
+ assert(std::ranges::distance(first, last) == Difference(dist));
+}
+#endif
+
+TEST_CONSTEXPR_CXX17 bool tests() {
+ const char* s = "1234567890";
+ check_distance(cpp17_input_iterator<const char*>(s), cpp17_input_iterator<const char*>(s + 10), 10);
+ check_distance(forward_iterator<const char*>(s), forward_iterator<const char*>(s + 10), 10);
+ check_distance(bidirectional_iterator<const char*>(s), bidirectional_iterator<const char*>(s + 10), 10);
+ check_distance(random_access_iterator<const char*>(s), random_access_iterator<const char*>(s + 10), 10);
+ check_distance(s, s + 10, 10);
+
+#if TEST_STD_VER >= 20
+ check_ranges_distance(forward_iterator(s), forward_iterator(s + 10), 10);
+ check_ranges_distance(bidirectional_iterator(s), bidirectional_iterator(s + 10), 10);
+ check_ranges_distance(random_access_iterator(s), random_access_iterator(s + 10), 10);
+ check_ranges_distance(s, s + 10, 10);
+
+ {
+ using Container = std::vector<std::vector<int>>;
+ Container c;
+ auto view = c | std::views::join;
+ Container::difference_type n = 0;
+ for (std::size_t i = 0; i < 10; ++i) {
+ n += i;
+ c.push_back(Container::value_type(i));
+ }
+ assert(std::distance(view.begin(), view.end()) == n);
+ assert(std::ranges::distance(view.begin(), view.end()) == n);
+ }
+ {
+ using Container = std::array<std::array<char, 3>, 10>;
+ Container c;
+ auto view = c | std::views::join;
+ assert(std::distance(view.begin(), view.end()) == 30);
+ assert(std::ranges::distance(view.begin(), view.end()) == 30);
+ }
+ if (!TEST_IS_CONSTANT_EVALUATED) {
+ using Container = std::deque<std::deque<double>>;
+ Container c;
+ auto view = c | std::views::join;
+ Container::difference_type n = 0;
+ for (std::size_t i = 0; i < 10; ++i) {
+ n += i;
+ c.push_back(Container::value_type(i));
+ }
+ assert(std::distance(view.begin(), view.end()) == n);
+ assert(std::ranges::distance(view.begin(), view.end()) == n);
+ }
+#endif
+ return true;
}
-int main(int, char**)
-{
- tests();
+int main(int, char**) {
+ tests();
#if TEST_STD_VER >= 17
- static_assert(tests(), "");
+ static_assert(tests(), "");
#endif
- return 0;
+ return 0;
}
More information about the libcxx-commits
mailing list