[libcxx-commits] [libcxx] [libc++][ranges] optimize the performance of `ranges::starts_with` (PR #84570)
via libcxx-commits
libcxx-commits at lists.llvm.org
Sat Mar 9 00:15:11 PST 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-libcxx
Author: Xiaoyang Liu (xiaoyang-sde)
<details>
<summary>Changes</summary>
## Abstract
This pull request attempts to optimize the performance of `ranges::starts_with`.
This pull request is related to the issue #<!-- -->83880.
## Benchmarking
```txt
2024-03-08T23:40:18-08:00
Running ./ranges_starts_with.libcxx.out
Run on (12 X 2496 MHz CPU s)
CPU Caches:
L1 Data 48 KiB (x6)
L1 Instruction 32 KiB (x6)
L2 Unified 1280 KiB (x6)
L3 Unified 18432 KiB (x1)
Load Average: 0.79, 1.93, 2.27
-----------------------------------------------------------------------------------------------------------
Benchmark Time CPU Iterations
-----------------------------------------------------------------------------------------------------------
bm_starts_with_contiguous_iter_with_memcmp_optimization/16 1.50 ns 1.50 ns 460164239
bm_starts_with_contiguous_iter_with_memcmp_optimization/256 12.5 ns 12.5 ns 55148246
bm_starts_with_contiguous_iter_with_memcmp_optimization/4096 189 ns 189 ns 4030953
bm_starts_with_contiguous_iter_with_memcmp_optimization/65536 4889 ns 4889 ns 157791
bm_starts_with_contiguous_iter_with_memcmp_optimization/1048576 138974 ns 138964 ns 5810
bm_starts_with_contiguous_iter_with_memcmp_optimization/16777216 3874632 ns 3874633 ns 180
bm_starts_with_contiguous_iter/16 6.55 ns 6.55 ns 90161221
bm_starts_with_contiguous_iter/256 99.8 ns 99.8 ns 7093729
bm_starts_with_contiguous_iter/4096 1505 ns 1488 ns 420866
bm_starts_with_contiguous_iter/65536 24836 ns 24836 ns 29657
bm_starts_with_contiguous_iter/1048576 400852 ns 400847 ns 1700
bm_starts_with_contiguous_iter/16777216 7017180 ns 7017118 ns 101
bm_starts_with_random_access_iter/16 6.86 ns 6.86 ns 99695785
bm_starts_with_random_access_iter/256 102 ns 102 ns 6721592
bm_starts_with_random_access_iter/4096 1537 ns 1537 ns 478604
bm_starts_with_random_access_iter/65536 23564 ns 23564 ns 30089
bm_starts_with_random_access_iter/1048576 382977 ns 382985 ns 1850
bm_starts_with_random_access_iter/16777216 6952078 ns 6952032 ns 99
bm_starts_with_bidirectional_iter/16 6.23 ns 6.23 ns 111062808
bm_starts_with_bidirectional_iter/256 103 ns 103 ns 6981875
bm_starts_with_bidirectional_iter/4096 1498 ns 1468 ns 451407
bm_starts_with_bidirectional_iter/65536 23994 ns 23994 ns 30011
bm_starts_with_bidirectional_iter/1048576 381783 ns 381781 ns 1838
bm_starts_with_bidirectional_iter/16777216 7538205 ns 7538213 ns 103
bm_starts_with_forward_iter/16 6.37 ns 6.37 ns 103694333
bm_starts_with_forward_iter/256 103 ns 103 ns 7017987
bm_starts_with_forward_iter/4096 1535 ns 1535 ns 483272
bm_starts_with_forward_iter/65536 23208 ns 23208 ns 28449
bm_starts_with_forward_iter/1048576 370772 ns 370773 ns 1812
bm_starts_with_forward_iter/16777216 6758327 ns 6758279 ns 101
```
---
Full diff: https://github.com/llvm/llvm-project/pull/84570.diff
5 Files Affected:
- (modified) libcxx/benchmarks/CMakeLists.txt (+1)
- (added) libcxx/benchmarks/algorithms/ranges_starts_with.bench.cpp (+107)
- (modified) libcxx/include/__algorithm/ranges_starts_with.h (+59-23)
- (modified) libcxx/include/__functional/operations.h (+9)
- (modified) libcxx/include/__functional/ranges_operations.h (+5)
``````````diff
diff --git a/libcxx/benchmarks/CMakeLists.txt b/libcxx/benchmarks/CMakeLists.txt
index b436e96f178b70..621b295a92c8d2 100644
--- a/libcxx/benchmarks/CMakeLists.txt
+++ b/libcxx/benchmarks/CMakeLists.txt
@@ -186,6 +186,7 @@ set(BENCHMARK_TESTS
algorithms/pstl.stable_sort.bench.cpp
algorithms/push_heap.bench.cpp
algorithms/ranges_contains.bench.cpp
+ algorithms/ranges_starts_with.bench.cpp
algorithms/ranges_ends_with.bench.cpp
algorithms/ranges_make_heap.bench.cpp
algorithms/ranges_make_heap_then_sort_heap.bench.cpp
diff --git a/libcxx/benchmarks/algorithms/ranges_starts_with.bench.cpp b/libcxx/benchmarks/algorithms/ranges_starts_with.bench.cpp
new file mode 100644
index 00000000000000..9986fdb1684fc7
--- /dev/null
+++ b/libcxx/benchmarks/algorithms/ranges_starts_with.bench.cpp
@@ -0,0 +1,107 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include <algorithm>
+#include <benchmark/benchmark.h>
+#include <functional>
+#include <vector>
+
+#include "test_iterators.h"
+
+static void bm_starts_with_contiguous_iter_with_memcmp_optimization(benchmark::State& state) {
+ std::vector<int> a(state.range(), 1);
+ std::vector<int> p(state.range(), 1);
+
+ for (auto _ : state) {
+ benchmark::DoNotOptimize(a);
+ benchmark::DoNotOptimize(p);
+
+ auto begin1 = contiguous_iterator(a.data());
+ auto end1 = contiguous_iterator(a.data() + a.size());
+ auto begin2 = contiguous_iterator(p.data());
+ auto end2 = contiguous_iterator(p.data() + p.size());
+
+ benchmark::DoNotOptimize(std::ranges::starts_with(begin1, end1, begin2, end2));
+ }
+}
+BENCHMARK(bm_starts_with_contiguous_iter_with_memcmp_optimization)->RangeMultiplier(16)->Range(16, 16 << 20);
+
+static void bm_starts_with_contiguous_iter(benchmark::State& state) {
+ std::vector<int> a(state.range(), 1);
+ std::vector<int> p(state.range(), 1);
+
+ for (auto _ : state) {
+ benchmark::DoNotOptimize(a);
+ benchmark::DoNotOptimize(p);
+
+ auto begin1 = contiguous_iterator(a.data());
+ auto end1 = contiguous_iterator(a.data() + a.size());
+ auto begin2 = contiguous_iterator(p.data());
+ auto end2 = contiguous_iterator(p.data() + p.size());
+
+ benchmark::DoNotOptimize(
+ std::ranges::starts_with(begin1, end1, begin2, end2, [](const int a, const int b) { return a == b; }));
+ }
+}
+BENCHMARK(bm_starts_with_contiguous_iter)->RangeMultiplier(16)->Range(16, 16 << 20);
+
+static void bm_starts_with_random_access_iter(benchmark::State& state) {
+ std::vector<int> a(state.range(), 1);
+ std::vector<int> p(state.range(), 1);
+
+ for (auto _ : state) {
+ benchmark::DoNotOptimize(a);
+ benchmark::DoNotOptimize(p);
+
+ auto begin1 = random_access_iterator(a.data());
+ auto end1 = random_access_iterator(a.data() + a.size());
+ auto begin2 = random_access_iterator(p.data());
+ auto end2 = random_access_iterator(p.data() + p.size());
+
+ benchmark::DoNotOptimize(std::ranges::starts_with(begin1, end1, begin2, end2));
+ }
+}
+BENCHMARK(bm_starts_with_random_access_iter)->RangeMultiplier(16)->Range(16, 16 << 20);
+
+static void bm_starts_with_bidirectional_iter(benchmark::State& state) {
+ std::vector<int> a(state.range(), 1);
+ std::vector<int> p(state.range(), 1);
+
+ for (auto _ : state) {
+ benchmark::DoNotOptimize(a);
+ benchmark::DoNotOptimize(p);
+
+ auto begin1 = bidirectional_iterator(a.data());
+ auto end1 = bidirectional_iterator(a.data() + a.size());
+ auto begin2 = bidirectional_iterator(p.data());
+ auto end2 = bidirectional_iterator(p.data() + p.size());
+
+ benchmark::DoNotOptimize(std::ranges::starts_with(begin1, end1, begin2, end2));
+ }
+}
+BENCHMARK(bm_starts_with_bidirectional_iter)->RangeMultiplier(16)->Range(16, 16 << 20);
+
+static void bm_starts_with_forward_iter(benchmark::State& state) {
+ std::vector<int> a(state.range(), 1);
+ std::vector<int> p(state.range(), 1);
+
+ for (auto _ : state) {
+ benchmark::DoNotOptimize(a);
+ benchmark::DoNotOptimize(p);
+
+ auto begin1 = forward_iterator(a.data());
+ auto end1 = forward_iterator(a.data() + a.size());
+ auto begin2 = forward_iterator(p.data());
+ auto end2 = forward_iterator(p.data() + p.size());
+
+ benchmark::DoNotOptimize(std::ranges::starts_with(begin1, end1, begin2, end2));
+ }
+}
+BENCHMARK(bm_starts_with_forward_iter)->RangeMultiplier(16)->Range(16, 16 << 20);
+
+BENCHMARK_MAIN();
diff --git a/libcxx/include/__algorithm/ranges_starts_with.h b/libcxx/include/__algorithm/ranges_starts_with.h
index 90e184aa9bccc2..ebcc15555f5544 100644
--- a/libcxx/include/__algorithm/ranges_starts_with.h
+++ b/libcxx/include/__algorithm/ranges_starts_with.h
@@ -9,12 +9,14 @@
#ifndef _LIBCPP___ALGORITHM_RANGES_STARTS_WITH_H
#define _LIBCPP___ALGORITHM_RANGES_STARTS_WITH_H
-#include <__algorithm/in_in_result.h>
+#include <__algorithm/ranges_equal.h>
#include <__algorithm/ranges_mismatch.h>
#include <__config>
#include <__functional/identity.h>
#include <__functional/ranges_operations.h>
+#include <__functional/reference_wrapper.h>
#include <__iterator/concepts.h>
+#include <__iterator/distance.h>
#include <__iterator/indirectly_comparable.h>
#include <__ranges/access.h>
#include <__ranges/concepts.h>
@@ -34,6 +36,48 @@ _LIBCPP_BEGIN_NAMESPACE_STD
namespace ranges {
namespace __starts_with {
struct __fn {
+ template <class _Iter1, class _Sent1, class _Iter2, class _Sent2, class _Pred, class _Proj1, class _Proj2>
+ _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI static constexpr bool __starts_with_impl(
+ _Iter1 __first1,
+ _Sent1 __last1,
+ _Iter2 __first2,
+ _Sent2 __last2,
+ _Pred& __pred,
+ _Proj1& __proj1,
+ _Proj2& __proj2) {
+ if constexpr (sized_sentinel_for<_Sent1, _Iter1> && sized_sentinel_for<_Sent2, _Iter2>) {
+ auto __n1 = ranges::distance(__first1, __last1);
+ auto __n2 = ranges::distance(__first2, __last2);
+ if (__n2 == 0) {
+ return true;
+ }
+ if (__n2 > __n1) {
+ return false;
+ }
+
+ if constexpr (random_access_iterator<_Iter1> && random_access_iterator<_Iter2>) {
+ return ranges::equal(
+ std::move(__first1),
+ ranges::next(__first1, __n2),
+ std::move(__first2),
+ std::move(__last2),
+ std::ref(__pred),
+ std::ref(__proj1),
+ std::ref(__proj2));
+ }
+ }
+
+ return ranges::mismatch(
+ std::move(__first1),
+ std::move(__last1),
+ std::move(__first2),
+ std::move(__last2),
+ std::ref(__pred),
+ std::ref(__proj1),
+ std::ref(__proj2))
+ .in2 == __last2;
+ }
+
template <input_iterator _Iter1,
sentinel_for<_Iter1> _Sent1,
input_iterator _Iter2,
@@ -42,23 +86,16 @@ struct __fn {
class _Proj1 = identity,
class _Proj2 = identity>
requires indirectly_comparable<_Iter1, _Iter2, _Pred, _Proj1, _Proj2>
- _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr bool operator()(
+ _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI static constexpr bool operator()(
_Iter1 __first1,
_Sent1 __last1,
_Iter2 __first2,
_Sent2 __last2,
_Pred __pred = {},
_Proj1 __proj1 = {},
- _Proj2 __proj2 = {}) const {
- return __mismatch::__fn::__go(
- std::move(__first1),
- std::move(__last1),
- std::move(__first2),
- std::move(__last2),
- __pred,
- __proj1,
- __proj2)
- .in2 == __last2;
+ _Proj2 __proj2 = {}) {
+ return __starts_with_impl(
+ std::move(__first1), std::move(__last1), std::move(__first2), std::move(__last2), __pred, __proj1, __proj2);
}
template <input_range _Range1,
@@ -67,17 +104,16 @@ struct __fn {
class _Proj1 = identity,
class _Proj2 = identity>
requires indirectly_comparable<iterator_t<_Range1>, iterator_t<_Range2>, _Pred, _Proj1, _Proj2>
- _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr bool operator()(
- _Range1&& __range1, _Range2&& __range2, _Pred __pred = {}, _Proj1 __proj1 = {}, _Proj2 __proj2 = {}) const {
- return __mismatch::__fn::__go(
- ranges::begin(__range1),
- ranges::end(__range1),
- ranges::begin(__range2),
- ranges::end(__range2),
- __pred,
- __proj1,
- __proj2)
- .in2 == ranges::end(__range2);
+ _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI static constexpr bool
+ operator()(_Range1&& __range1, _Range2&& __range2, _Pred __pred = {}, _Proj1 __proj1 = {}, _Proj2 __proj2 = {}) {
+ return __starts_with_impl(
+ ranges::begin(__range1),
+ ranges::end(__range1),
+ ranges::begin(__range2),
+ ranges::end(__range2),
+ __pred,
+ __proj1,
+ __proj2);
}
};
} // namespace __starts_with
diff --git a/libcxx/include/__functional/operations.h b/libcxx/include/__functional/operations.h
index 7ddc00650f162f..4565e1415a6e02 100644
--- a/libcxx/include/__functional/operations.h
+++ b/libcxx/include/__functional/operations.h
@@ -13,6 +13,7 @@
#include <__config>
#include <__functional/binary_function.h>
#include <__functional/unary_function.h>
+#include <__fwd/functional.h>
#include <__type_traits/integral_constant.h>
#include <__type_traits/operation_traits.h>
#include <__utility/forward.h>
@@ -316,10 +317,18 @@ struct _LIBCPP_TEMPLATE_VIS equal_to<void> {
// comparison when we don't perform an implicit conversion when calling it.
template <class _Tp>
struct __desugars_to<__equal_tag, equal_to<_Tp>, _Tp, _Tp> : true_type {};
+template <class _Tp>
+struct __desugars_to<__equal_tag, reference_wrapper<equal_to<_Tp>>, _Tp, _Tp> : true_type {};
+template <class _Tp>
+struct __desugars_to<__equal_tag, reference_wrapper<const equal_to<_Tp>>, _Tp, _Tp> : true_type {};
// In the transparent case, we do not enforce that
template <class _Tp, class _Up>
struct __desugars_to<__equal_tag, equal_to<void>, _Tp, _Up> : true_type {};
+template <class _Tp, class _Up>
+struct __desugars_to<__equal_tag, reference_wrapper<equal_to<void>>, _Tp, _Up> : true_type {};
+template <class _Tp, class _Up>
+struct __desugars_to<__equal_tag, reference_wrapper<const equal_to<void>>, _Tp, _Up> : true_type {};
#if _LIBCPP_STD_VER >= 14
template <class _Tp = void>
diff --git a/libcxx/include/__functional/ranges_operations.h b/libcxx/include/__functional/ranges_operations.h
index 38b28018049eb7..be400c42a65790 100644
--- a/libcxx/include/__functional/ranges_operations.h
+++ b/libcxx/include/__functional/ranges_operations.h
@@ -13,6 +13,7 @@
#include <__concepts/equality_comparable.h>
#include <__concepts/totally_ordered.h>
#include <__config>
+#include <__fwd/functional.h>
#include <__type_traits/integral_constant.h>
#include <__type_traits/operation_traits.h>
#include <__utility/forward.h>
@@ -99,6 +100,10 @@ struct greater_equal {
// operator are of the same type
template <class _Tp, class _Up>
struct __desugars_to<__equal_tag, ranges::equal_to, _Tp, _Up> : true_type {};
+template <class _Tp, class _Up>
+struct __desugars_to<__equal_tag, reference_wrapper<ranges::equal_to>, _Tp, _Up> : true_type {};
+template <class _Tp, class _Up>
+struct __desugars_to<__equal_tag, reference_wrapper<const ranges::equal_to>, _Tp, _Up> : true_type {};
#endif // _LIBCPP_STD_VER >= 20
``````````
</details>
https://github.com/llvm/llvm-project/pull/84570
More information about the libcxx-commits
mailing list