[libcxx-commits] [libcxx] [libc++][ranges] optimize the performance of `ranges::starts_with` (PR #84570)

via libcxx-commits libcxx-commits at lists.llvm.org
Sat Mar 9 00:15:11 PST 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-libcxx

Author: Xiaoyang Liu (xiaoyang-sde)

<details>
<summary>Changes</summary>

## Abstract

This pull request attempts to optimize the performance of `ranges::starts_with`.

This pull request is related to the issue #<!-- -->83880.

## Benchmarking

```txt
2024-03-08T23:40:18-08:00
Running ./ranges_starts_with.libcxx.out
Run on (12 X 2496 MHz CPU s)
CPU Caches:
  L1 Data 48 KiB (x6)
  L1 Instruction 32 KiB (x6)
  L2 Unified 1280 KiB (x6)
  L3 Unified 18432 KiB (x1)
Load Average: 0.79, 1.93, 2.27
-----------------------------------------------------------------------------------------------------------
Benchmark                                                                 Time             CPU   Iterations
-----------------------------------------------------------------------------------------------------------
bm_starts_with_contiguous_iter_with_memcmp_optimization/16             1.50 ns         1.50 ns    460164239
bm_starts_with_contiguous_iter_with_memcmp_optimization/256            12.5 ns         12.5 ns     55148246
bm_starts_with_contiguous_iter_with_memcmp_optimization/4096            189 ns          189 ns      4030953
bm_starts_with_contiguous_iter_with_memcmp_optimization/65536          4889 ns         4889 ns       157791
bm_starts_with_contiguous_iter_with_memcmp_optimization/1048576      138974 ns       138964 ns         5810
bm_starts_with_contiguous_iter_with_memcmp_optimization/16777216    3874632 ns      3874633 ns          180
bm_starts_with_contiguous_iter/16                                      6.55 ns         6.55 ns     90161221
bm_starts_with_contiguous_iter/256                                     99.8 ns         99.8 ns      7093729
bm_starts_with_contiguous_iter/4096                                    1505 ns         1488 ns       420866
bm_starts_with_contiguous_iter/65536                                  24836 ns        24836 ns        29657
bm_starts_with_contiguous_iter/1048576                               400852 ns       400847 ns         1700
bm_starts_with_contiguous_iter/16777216                             7017180 ns      7017118 ns          101
bm_starts_with_random_access_iter/16                                   6.86 ns         6.86 ns     99695785
bm_starts_with_random_access_iter/256                                   102 ns          102 ns      6721592
bm_starts_with_random_access_iter/4096                                 1537 ns         1537 ns       478604
bm_starts_with_random_access_iter/65536                               23564 ns        23564 ns        30089
bm_starts_with_random_access_iter/1048576                            382977 ns       382985 ns         1850
bm_starts_with_random_access_iter/16777216                          6952078 ns      6952032 ns           99
bm_starts_with_bidirectional_iter/16                                   6.23 ns         6.23 ns    111062808
bm_starts_with_bidirectional_iter/256                                   103 ns          103 ns      6981875
bm_starts_with_bidirectional_iter/4096                                 1498 ns         1468 ns       451407
bm_starts_with_bidirectional_iter/65536                               23994 ns        23994 ns        30011
bm_starts_with_bidirectional_iter/1048576                            381783 ns       381781 ns         1838
bm_starts_with_bidirectional_iter/16777216                          7538205 ns      7538213 ns          103
bm_starts_with_forward_iter/16                                         6.37 ns         6.37 ns    103694333
bm_starts_with_forward_iter/256                                         103 ns          103 ns      7017987
bm_starts_with_forward_iter/4096                                       1535 ns         1535 ns       483272
bm_starts_with_forward_iter/65536                                     23208 ns        23208 ns        28449
bm_starts_with_forward_iter/1048576                                  370772 ns       370773 ns         1812
bm_starts_with_forward_iter/16777216                                6758327 ns      6758279 ns          101
```

---
Full diff: https://github.com/llvm/llvm-project/pull/84570.diff


5 Files Affected:

- (modified) libcxx/benchmarks/CMakeLists.txt (+1) 
- (added) libcxx/benchmarks/algorithms/ranges_starts_with.bench.cpp (+107) 
- (modified) libcxx/include/__algorithm/ranges_starts_with.h (+59-23) 
- (modified) libcxx/include/__functional/operations.h (+9) 
- (modified) libcxx/include/__functional/ranges_operations.h (+5) 


``````````diff
diff --git a/libcxx/benchmarks/CMakeLists.txt b/libcxx/benchmarks/CMakeLists.txt
index b436e96f178b70..621b295a92c8d2 100644
--- a/libcxx/benchmarks/CMakeLists.txt
+++ b/libcxx/benchmarks/CMakeLists.txt
@@ -186,6 +186,7 @@ set(BENCHMARK_TESTS
     algorithms/pstl.stable_sort.bench.cpp
     algorithms/push_heap.bench.cpp
     algorithms/ranges_contains.bench.cpp
+    algorithms/ranges_starts_with.bench.cpp
     algorithms/ranges_ends_with.bench.cpp
     algorithms/ranges_make_heap.bench.cpp
     algorithms/ranges_make_heap_then_sort_heap.bench.cpp
diff --git a/libcxx/benchmarks/algorithms/ranges_starts_with.bench.cpp b/libcxx/benchmarks/algorithms/ranges_starts_with.bench.cpp
new file mode 100644
index 00000000000000..9986fdb1684fc7
--- /dev/null
+++ b/libcxx/benchmarks/algorithms/ranges_starts_with.bench.cpp
@@ -0,0 +1,107 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include <algorithm>
+#include <benchmark/benchmark.h>
+#include <functional>
+#include <vector>
+
+#include "test_iterators.h"
+
+static void bm_starts_with_contiguous_iter_with_memcmp_optimization(benchmark::State& state) {
+  std::vector<int> a(state.range(), 1);
+  std::vector<int> p(state.range(), 1);
+
+  for (auto _ : state) {
+    benchmark::DoNotOptimize(a);
+    benchmark::DoNotOptimize(p);
+
+    auto begin1 = contiguous_iterator(a.data());
+    auto end1   = contiguous_iterator(a.data() + a.size());
+    auto begin2 = contiguous_iterator(p.data());
+    auto end2   = contiguous_iterator(p.data() + p.size());
+
+    benchmark::DoNotOptimize(std::ranges::starts_with(begin1, end1, begin2, end2));
+  }
+}
+BENCHMARK(bm_starts_with_contiguous_iter_with_memcmp_optimization)->RangeMultiplier(16)->Range(16, 16 << 20);
+
+static void bm_starts_with_contiguous_iter(benchmark::State& state) {
+  std::vector<int> a(state.range(), 1);
+  std::vector<int> p(state.range(), 1);
+
+  for (auto _ : state) {
+    benchmark::DoNotOptimize(a);
+    benchmark::DoNotOptimize(p);
+
+    auto begin1 = contiguous_iterator(a.data());
+    auto end1   = contiguous_iterator(a.data() + a.size());
+    auto begin2 = contiguous_iterator(p.data());
+    auto end2   = contiguous_iterator(p.data() + p.size());
+
+    benchmark::DoNotOptimize(
+        std::ranges::starts_with(begin1, end1, begin2, end2, [](const int a, const int b) { return a == b; }));
+  }
+}
+BENCHMARK(bm_starts_with_contiguous_iter)->RangeMultiplier(16)->Range(16, 16 << 20);
+
+static void bm_starts_with_random_access_iter(benchmark::State& state) {
+  std::vector<int> a(state.range(), 1);
+  std::vector<int> p(state.range(), 1);
+
+  for (auto _ : state) {
+    benchmark::DoNotOptimize(a);
+    benchmark::DoNotOptimize(p);
+
+    auto begin1 = random_access_iterator(a.data());
+    auto end1   = random_access_iterator(a.data() + a.size());
+    auto begin2 = random_access_iterator(p.data());
+    auto end2   = random_access_iterator(p.data() + p.size());
+
+    benchmark::DoNotOptimize(std::ranges::starts_with(begin1, end1, begin2, end2));
+  }
+}
+BENCHMARK(bm_starts_with_random_access_iter)->RangeMultiplier(16)->Range(16, 16 << 20);
+
+static void bm_starts_with_bidirectional_iter(benchmark::State& state) {
+  std::vector<int> a(state.range(), 1);
+  std::vector<int> p(state.range(), 1);
+
+  for (auto _ : state) {
+    benchmark::DoNotOptimize(a);
+    benchmark::DoNotOptimize(p);
+
+    auto begin1 = bidirectional_iterator(a.data());
+    auto end1   = bidirectional_iterator(a.data() + a.size());
+    auto begin2 = bidirectional_iterator(p.data());
+    auto end2   = bidirectional_iterator(p.data() + p.size());
+
+    benchmark::DoNotOptimize(std::ranges::starts_with(begin1, end1, begin2, end2));
+  }
+}
+BENCHMARK(bm_starts_with_bidirectional_iter)->RangeMultiplier(16)->Range(16, 16 << 20);
+
+static void bm_starts_with_forward_iter(benchmark::State& state) {
+  std::vector<int> a(state.range(), 1);
+  std::vector<int> p(state.range(), 1);
+
+  for (auto _ : state) {
+    benchmark::DoNotOptimize(a);
+    benchmark::DoNotOptimize(p);
+
+    auto begin1 = forward_iterator(a.data());
+    auto end1   = forward_iterator(a.data() + a.size());
+    auto begin2 = forward_iterator(p.data());
+    auto end2   = forward_iterator(p.data() + p.size());
+
+    benchmark::DoNotOptimize(std::ranges::starts_with(begin1, end1, begin2, end2));
+  }
+}
+BENCHMARK(bm_starts_with_forward_iter)->RangeMultiplier(16)->Range(16, 16 << 20);
+
+BENCHMARK_MAIN();
diff --git a/libcxx/include/__algorithm/ranges_starts_with.h b/libcxx/include/__algorithm/ranges_starts_with.h
index 90e184aa9bccc2..ebcc15555f5544 100644
--- a/libcxx/include/__algorithm/ranges_starts_with.h
+++ b/libcxx/include/__algorithm/ranges_starts_with.h
@@ -9,12 +9,14 @@
 #ifndef _LIBCPP___ALGORITHM_RANGES_STARTS_WITH_H
 #define _LIBCPP___ALGORITHM_RANGES_STARTS_WITH_H
 
-#include <__algorithm/in_in_result.h>
+#include <__algorithm/ranges_equal.h>
 #include <__algorithm/ranges_mismatch.h>
 #include <__config>
 #include <__functional/identity.h>
 #include <__functional/ranges_operations.h>
+#include <__functional/reference_wrapper.h>
 #include <__iterator/concepts.h>
+#include <__iterator/distance.h>
 #include <__iterator/indirectly_comparable.h>
 #include <__ranges/access.h>
 #include <__ranges/concepts.h>
@@ -34,6 +36,48 @@ _LIBCPP_BEGIN_NAMESPACE_STD
 namespace ranges {
 namespace __starts_with {
 struct __fn {
+  template <class _Iter1, class _Sent1, class _Iter2, class _Sent2, class _Pred, class _Proj1, class _Proj2>
+  _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI static constexpr bool __starts_with_impl(
+      _Iter1 __first1,
+      _Sent1 __last1,
+      _Iter2 __first2,
+      _Sent2 __last2,
+      _Pred& __pred,
+      _Proj1& __proj1,
+      _Proj2& __proj2) {
+    if constexpr (sized_sentinel_for<_Sent1, _Iter1> && sized_sentinel_for<_Sent2, _Iter2>) {
+      auto __n1 = ranges::distance(__first1, __last1);
+      auto __n2 = ranges::distance(__first2, __last2);
+      if (__n2 == 0) {
+        return true;
+      }
+      if (__n2 > __n1) {
+        return false;
+      }
+
+      if constexpr (random_access_iterator<_Iter1> && random_access_iterator<_Iter2>) {
+        return ranges::equal(
+            std::move(__first1),
+            ranges::next(__first1, __n2),
+            std::move(__first2),
+            std::move(__last2),
+            std::ref(__pred),
+            std::ref(__proj1),
+            std::ref(__proj2));
+      }
+    }
+
+    return ranges::mismatch(
+               std::move(__first1),
+               std::move(__last1),
+               std::move(__first2),
+               std::move(__last2),
+               std::ref(__pred),
+               std::ref(__proj1),
+               std::ref(__proj2))
+               .in2 == __last2;
+  }
+
   template <input_iterator _Iter1,
             sentinel_for<_Iter1> _Sent1,
             input_iterator _Iter2,
@@ -42,23 +86,16 @@ struct __fn {
             class _Proj1 = identity,
             class _Proj2 = identity>
     requires indirectly_comparable<_Iter1, _Iter2, _Pred, _Proj1, _Proj2>
-  _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr bool operator()(
+  _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI static constexpr bool operator()(
       _Iter1 __first1,
       _Sent1 __last1,
       _Iter2 __first2,
       _Sent2 __last2,
       _Pred __pred   = {},
       _Proj1 __proj1 = {},
-      _Proj2 __proj2 = {}) const {
-    return __mismatch::__fn::__go(
-               std::move(__first1),
-               std::move(__last1),
-               std::move(__first2),
-               std::move(__last2),
-               __pred,
-               __proj1,
-               __proj2)
-               .in2 == __last2;
+      _Proj2 __proj2 = {}) {
+    return __starts_with_impl(
+        std::move(__first1), std::move(__last1), std::move(__first2), std::move(__last2), __pred, __proj1, __proj2);
   }
 
   template <input_range _Range1,
@@ -67,17 +104,16 @@ struct __fn {
             class _Proj1 = identity,
             class _Proj2 = identity>
     requires indirectly_comparable<iterator_t<_Range1>, iterator_t<_Range2>, _Pred, _Proj1, _Proj2>
-  _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr bool operator()(
-      _Range1&& __range1, _Range2&& __range2, _Pred __pred = {}, _Proj1 __proj1 = {}, _Proj2 __proj2 = {}) const {
-    return __mismatch::__fn::__go(
-               ranges::begin(__range1),
-               ranges::end(__range1),
-               ranges::begin(__range2),
-               ranges::end(__range2),
-               __pred,
-               __proj1,
-               __proj2)
-               .in2 == ranges::end(__range2);
+  _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI static constexpr bool
+  operator()(_Range1&& __range1, _Range2&& __range2, _Pred __pred = {}, _Proj1 __proj1 = {}, _Proj2 __proj2 = {}) {
+    return __starts_with_impl(
+        ranges::begin(__range1),
+        ranges::end(__range1),
+        ranges::begin(__range2),
+        ranges::end(__range2),
+        __pred,
+        __proj1,
+        __proj2);
   }
 };
 } // namespace __starts_with
diff --git a/libcxx/include/__functional/operations.h b/libcxx/include/__functional/operations.h
index 7ddc00650f162f..4565e1415a6e02 100644
--- a/libcxx/include/__functional/operations.h
+++ b/libcxx/include/__functional/operations.h
@@ -13,6 +13,7 @@
 #include <__config>
 #include <__functional/binary_function.h>
 #include <__functional/unary_function.h>
+#include <__fwd/functional.h>
 #include <__type_traits/integral_constant.h>
 #include <__type_traits/operation_traits.h>
 #include <__utility/forward.h>
@@ -316,10 +317,18 @@ struct _LIBCPP_TEMPLATE_VIS equal_to<void> {
 // comparison when we don't perform an implicit conversion when calling it.
 template <class _Tp>
 struct __desugars_to<__equal_tag, equal_to<_Tp>, _Tp, _Tp> : true_type {};
+template <class _Tp>
+struct __desugars_to<__equal_tag, reference_wrapper<equal_to<_Tp>>, _Tp, _Tp> : true_type {};
+template <class _Tp>
+struct __desugars_to<__equal_tag, reference_wrapper<const equal_to<_Tp>>, _Tp, _Tp> : true_type {};
 
 // In the transparent case, we do not enforce that
 template <class _Tp, class _Up>
 struct __desugars_to<__equal_tag, equal_to<void>, _Tp, _Up> : true_type {};
+template <class _Tp, class _Up>
+struct __desugars_to<__equal_tag, reference_wrapper<equal_to<void>>, _Tp, _Up> : true_type {};
+template <class _Tp, class _Up>
+struct __desugars_to<__equal_tag, reference_wrapper<const equal_to<void>>, _Tp, _Up> : true_type {};
 
 #if _LIBCPP_STD_VER >= 14
 template <class _Tp = void>
diff --git a/libcxx/include/__functional/ranges_operations.h b/libcxx/include/__functional/ranges_operations.h
index 38b28018049eb7..be400c42a65790 100644
--- a/libcxx/include/__functional/ranges_operations.h
+++ b/libcxx/include/__functional/ranges_operations.h
@@ -13,6 +13,7 @@
 #include <__concepts/equality_comparable.h>
 #include <__concepts/totally_ordered.h>
 #include <__config>
+#include <__fwd/functional.h>
 #include <__type_traits/integral_constant.h>
 #include <__type_traits/operation_traits.h>
 #include <__utility/forward.h>
@@ -99,6 +100,10 @@ struct greater_equal {
 // operator are of the same type
 template <class _Tp, class _Up>
 struct __desugars_to<__equal_tag, ranges::equal_to, _Tp, _Up> : true_type {};
+template <class _Tp, class _Up>
+struct __desugars_to<__equal_tag, reference_wrapper<ranges::equal_to>, _Tp, _Up> : true_type {};
+template <class _Tp, class _Up>
+struct __desugars_to<__equal_tag, reference_wrapper<const ranges::equal_to>, _Tp, _Up> : true_type {};
 
 #endif // _LIBCPP_STD_VER >= 20
 

``````````

</details>


https://github.com/llvm/llvm-project/pull/84570


More information about the libcxx-commits mailing list