[libcxx-commits] [libcxx] [libc++][ranges] optimize the performance of `ranges::starts_with` (PR #84570)
Xiaoyang Liu via libcxx-commits
libcxx-commits at lists.llvm.org
Wed Oct 16 21:17:54 PDT 2024
https://github.com/xiaoyang-sde updated https://github.com/llvm/llvm-project/pull/84570
>From e5da9683e87181f74f81e8a6c0bd180e1e223f5f Mon Sep 17 00:00:00 2001
From: Xiaoyang Liu <siujoeng.lau at gmail.com>
Date: Fri, 26 Apr 2024 11:40:14 -0400
Subject: [PATCH 1/2] [libc++][ranges] optimize the performance of
'ranges::starts_with'
---
libcxx/benchmarks/CMakeLists.txt | 1 +
.../algorithms/ranges_starts_with.bench.cpp | 107 ++++++++++++++++++
.../include/__algorithm/ranges_starts_with.h | 69 +++++++++--
.../ranges.starts_with.pass.cpp | 12 +-
4 files changed, 176 insertions(+), 13 deletions(-)
create mode 100644 libcxx/benchmarks/algorithms/ranges_starts_with.bench.cpp
diff --git a/libcxx/benchmarks/CMakeLists.txt b/libcxx/benchmarks/CMakeLists.txt
index 5dc3be0c367e5e..26825883c04662 100644
--- a/libcxx/benchmarks/CMakeLists.txt
+++ b/libcxx/benchmarks/CMakeLists.txt
@@ -197,6 +197,7 @@ set(BENCHMARK_TESTS
algorithms/ranges_sort.bench.cpp
algorithms/ranges_sort_heap.bench.cpp
algorithms/ranges_stable_sort.bench.cpp
+ algorithms/ranges_starts_with.bench.cpp
algorithms/sort.bench.cpp
algorithms/sort_heap.bench.cpp
algorithms/stable_sort.bench.cpp
diff --git a/libcxx/benchmarks/algorithms/ranges_starts_with.bench.cpp b/libcxx/benchmarks/algorithms/ranges_starts_with.bench.cpp
new file mode 100644
index 00000000000000..1c3ac6d02ce140
--- /dev/null
+++ b/libcxx/benchmarks/algorithms/ranges_starts_with.bench.cpp
@@ -0,0 +1,107 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include <algorithm>
+#include <benchmark/benchmark.h>
+#include <vector>
+
+#include "test_iterators.h"
+
+static void bm_starts_with_contiguous_iter_with_memcmp_optimization(benchmark::State& state) {
+ std::vector<int> a(state.range(), 1);
+ std::vector<int> p(state.range(), 1);
+
+ for (auto _ : state) {
+ benchmark::DoNotOptimize(a);
+ benchmark::DoNotOptimize(p);
+
+ auto begin1 = contiguous_iterator(a.data());
+ auto end1 = contiguous_iterator(a.data() + a.size());
+ auto begin2 = contiguous_iterator(p.data());
+ auto end2 = contiguous_iterator(p.data() + p.size());
+
+ benchmark::DoNotOptimize(std::ranges::starts_with(begin1, end1, begin2, end2));
+ }
+}
+BENCHMARK(bm_starts_with_contiguous_iter_with_memcmp_optimization)->RangeMultiplier(16)->Range(16, 16 << 20);
+
+static void bm_starts_with_contiguous_iter(benchmark::State& state) {
+ std::vector<int> a(state.range(), 1);
+ std::vector<int> p(state.range(), 1);
+
+ for (auto _ : state) {
+ benchmark::DoNotOptimize(a);
+ benchmark::DoNotOptimize(p);
+
+ auto begin1 = contiguous_iterator(a.data());
+ auto end1 = contiguous_iterator(a.data() + a.size());
+ auto begin2 = contiguous_iterator(p.data());
+ auto end2 = contiguous_iterator(p.data() + p.size());
+
+ // Using a custom predicate to make sure the memcmp optimization doesn't get invoked
+ benchmark::DoNotOptimize(
+ std::ranges::starts_with(begin1, end1, begin2, end2, [](const int a, const int b) { return a == b; }));
+ }
+}
+BENCHMARK(bm_starts_with_contiguous_iter)->RangeMultiplier(16)->Range(16, 16 << 20);
+
+static void bm_starts_with_random_access_iter(benchmark::State& state) {
+ std::vector<int> a(state.range(), 1);
+ std::vector<int> p(state.range(), 1);
+
+ for (auto _ : state) {
+ benchmark::DoNotOptimize(a);
+ benchmark::DoNotOptimize(p);
+
+ auto begin1 = random_access_iterator(a.data());
+ auto end1 = random_access_iterator(a.data() + a.size());
+ auto begin2 = random_access_iterator(p.data());
+ auto end2 = random_access_iterator(p.data() + p.size());
+
+ benchmark::DoNotOptimize(std::ranges::starts_with(begin1, end1, begin2, end2));
+ }
+}
+BENCHMARK(bm_starts_with_random_access_iter)->RangeMultiplier(16)->Range(16, 16 << 20);
+
+static void bm_starts_with_bidirectional_iter(benchmark::State& state) {
+ std::vector<int> a(state.range(), 1);
+ std::vector<int> p(state.range(), 1);
+
+ for (auto _ : state) {
+ benchmark::DoNotOptimize(a);
+ benchmark::DoNotOptimize(p);
+
+ auto begin1 = bidirectional_iterator(a.data());
+ auto end1 = bidirectional_iterator(a.data() + a.size());
+ auto begin2 = bidirectional_iterator(p.data());
+ auto end2 = bidirectional_iterator(p.data() + p.size());
+
+ benchmark::DoNotOptimize(std::ranges::starts_with(begin1, end1, begin2, end2));
+ }
+}
+BENCHMARK(bm_starts_with_bidirectional_iter)->RangeMultiplier(16)->Range(16, 16 << 20);
+
+static void bm_starts_with_forward_iter(benchmark::State& state) {
+ std::vector<int> a(state.range(), 1);
+ std::vector<int> p(state.range(), 1);
+
+ for (auto _ : state) {
+ benchmark::DoNotOptimize(a);
+ benchmark::DoNotOptimize(p);
+
+ auto begin1 = forward_iterator(a.data());
+ auto end1 = forward_iterator(a.data() + a.size());
+ auto begin2 = forward_iterator(p.data());
+ auto end2 = forward_iterator(p.data() + p.size());
+
+ benchmark::DoNotOptimize(std::ranges::starts_with(begin1, end1, begin2, end2));
+ }
+}
+BENCHMARK(bm_starts_with_forward_iter)->RangeMultiplier(16)->Range(16, 16 << 20);
+
+BENCHMARK_MAIN();
diff --git a/libcxx/include/__algorithm/ranges_starts_with.h b/libcxx/include/__algorithm/ranges_starts_with.h
index 17084e4f24336a..e8bd3e72a861ca 100644
--- a/libcxx/include/__algorithm/ranges_starts_with.h
+++ b/libcxx/include/__algorithm/ranges_starts_with.h
@@ -9,15 +9,19 @@
#ifndef _LIBCPP___ALGORITHM_RANGES_STARTS_WITH_H
#define _LIBCPP___ALGORITHM_RANGES_STARTS_WITH_H
-#include <__algorithm/in_in_result.h>
+#include <__algorithm/ranges_equal.h>
#include <__algorithm/ranges_mismatch.h>
#include <__config>
#include <__functional/identity.h>
#include <__functional/ranges_operations.h>
+#include <__functional/reference_wrapper.h>
#include <__iterator/concepts.h>
+#include <__iterator/distance.h>
#include <__iterator/indirectly_comparable.h>
+#include <__iterator/next.h>
#include <__ranges/access.h>
#include <__ranges/concepts.h>
+#include <__ranges/size.h>
#include <__utility/move.h>
#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
@@ -50,14 +54,36 @@ struct __fn {
_Pred __pred = {},
_Proj1 __proj1 = {},
_Proj2 __proj2 = {}) {
- return __mismatch::__fn::__go(
+ if constexpr (sized_sentinel_for<_Sent1, _Iter1> && sized_sentinel_for<_Sent2, _Iter2>) {
+ auto __n1 = ranges::distance(__first1, __last1);
+ auto __n2 = ranges::distance(__first2, __last2);
+ if (__n2 == 0) {
+ return true;
+ }
+ if (__n2 > __n1) {
+ return false;
+ }
+
+ if constexpr (contiguous_iterator<_Iter1> && contiguous_iterator<_Iter2>) {
+ return ranges::equal(
+ std::move(__first1),
+ ranges::next(__first1, __n2),
+ std::move(__first2),
+ std::move(__last2),
+ std::ref(__pred),
+ std::ref(__proj1),
+ std::ref(__proj2));
+ }
+ }
+
+ return ranges::mismatch(
std::move(__first1),
std::move(__last1),
std::move(__first2),
- std::move(__last2),
- __pred,
- __proj1,
- __proj2)
+ __last2,
+ std::ref(__pred),
+ std::ref(__proj1),
+ std::ref(__proj2))
.in2 == __last2;
}
@@ -69,18 +95,41 @@ struct __fn {
requires indirectly_comparable<iterator_t<_Range1>, iterator_t<_Range2>, _Pred, _Proj1, _Proj2>
[[nodiscard]] _LIBCPP_HIDE_FROM_ABI static constexpr bool
operator()(_Range1&& __range1, _Range2&& __range2, _Pred __pred = {}, _Proj1 __proj1 = {}, _Proj2 __proj2 = {}) {
- return __mismatch::__fn::__go(
+ if constexpr (sized_range<_Range1> && sized_range<_Range2>) {
+ auto __n1 = ranges::size(__range1);
+ auto __n2 = ranges::size(__range2);
+ if (__n2 == 0) {
+ return true;
+ }
+ if (__n2 > __n1) {
+ return false;
+ }
+
+ if constexpr (contiguous_range<_Range1> && contiguous_range<_Range2>) {
+ return ranges::equal(
+ ranges::begin(__range1),
+ ranges::next(ranges::begin(__range1), __n2),
+ ranges::begin(__range2),
+ ranges::end(__range2),
+ std::ref(__pred),
+ std::ref(__proj1),
+ std::ref(__proj2));
+ }
+ }
+
+ return ranges::mismatch(
ranges::begin(__range1),
ranges::end(__range1),
ranges::begin(__range2),
ranges::end(__range2),
- __pred,
- __proj1,
- __proj2)
+ std::ref(__pred),
+ std::ref(__proj1),
+ std::ref(__proj2))
.in2 == ranges::end(__range2);
}
};
} // namespace __starts_with
+
inline namespace __cpo {
inline constexpr auto starts_with = __starts_with::__fn{};
} // namespace __cpo
diff --git a/libcxx/test/std/algorithms/alg.nonmodifying/alg.starts_with/ranges.starts_with.pass.cpp b/libcxx/test/std/algorithms/alg.nonmodifying/alg.starts_with/ranges.starts_with.pass.cpp
index 0f2284edde81c7..7a8aa4bedb318f 100644
--- a/libcxx/test/std/algorithms/alg.nonmodifying/alg.starts_with/ranges.starts_with.pass.cpp
+++ b/libcxx/test/std/algorithms/alg.nonmodifying/alg.starts_with/ranges.starts_with.pass.cpp
@@ -216,12 +216,18 @@ constexpr void test_iterators() {
constexpr bool test() {
types::for_each(types::cpp20_input_iterator_list<int*>{}, []<class Iter2>() {
types::for_each(types::cpp20_input_iterator_list<int*>{}, []<class Iter1>() {
- if constexpr (std::forward_iterator<Iter1> && std::forward_iterator<Iter2>)
+ if constexpr (std::forward_iterator<Iter1> && std::forward_iterator<Iter2>) {
test_iterators<Iter1, Iter1, Iter2, Iter2>();
- if constexpr (std::forward_iterator<Iter2>)
+ }
+ if constexpr (std::forward_iterator<Iter2>) {
+ test_iterators<Iter1, sentinel_wrapper<Iter1>, Iter2, Iter2>();
test_iterators<Iter1, sized_sentinel<Iter1>, Iter2, Iter2>();
- if constexpr (std::forward_iterator<Iter1>)
+ }
+ if constexpr (std::forward_iterator<Iter1>) {
+ test_iterators<Iter1, Iter1, Iter2, sentinel_wrapper<Iter2>>();
test_iterators<Iter1, Iter1, Iter2, sized_sentinel<Iter2>>();
+ }
+ test_iterators<Iter1, sentinel_wrapper<Iter1>, Iter2, sentinel_wrapper<Iter2>>();
test_iterators<Iter1, sized_sentinel<Iter1>, Iter2, sized_sentinel<Iter2>>();
});
});
>From 7c5f3913e0efaf8d2095e4d954cb01671fe61928 Mon Sep 17 00:00:00 2001
From: Xiaoyang Liu <siujoeng.lau at gmail.com>
Date: Thu, 17 Oct 2024 00:17:37 -0400
Subject: [PATCH 2/2] [libc++][ranges] optimize the performance of
'ranges::starts_with'
---
.../alg.nonmodifying/alg.starts_with/ranges.starts_with.pass.cpp | 1 -
1 file changed, 1 deletion(-)
diff --git a/libcxx/test/std/algorithms/alg.nonmodifying/alg.starts_with/ranges.starts_with.pass.cpp b/libcxx/test/std/algorithms/alg.nonmodifying/alg.starts_with/ranges.starts_with.pass.cpp
index 7a8aa4bedb318f..cfc2e37d596f8f 100644
--- a/libcxx/test/std/algorithms/alg.nonmodifying/alg.starts_with/ranges.starts_with.pass.cpp
+++ b/libcxx/test/std/algorithms/alg.nonmodifying/alg.starts_with/ranges.starts_with.pass.cpp
@@ -227,7 +227,6 @@ constexpr bool test() {
test_iterators<Iter1, Iter1, Iter2, sentinel_wrapper<Iter2>>();
test_iterators<Iter1, Iter1, Iter2, sized_sentinel<Iter2>>();
}
- test_iterators<Iter1, sentinel_wrapper<Iter1>, Iter2, sentinel_wrapper<Iter2>>();
test_iterators<Iter1, sized_sentinel<Iter1>, Iter2, sized_sentinel<Iter2>>();
});
});
More information about the libcxx-commits
mailing list