[libcxx-commits] [libcxx] [libc++][ranges] optimize the performance of `ranges::starts_with` (PR #84570)

Xiaoyang Liu via libcxx-commits libcxx-commits at lists.llvm.org
Fri Apr 26 08:40:31 PDT 2024


https://github.com/xiaoyang-sde updated https://github.com/llvm/llvm-project/pull/84570

>From e5da9683e87181f74f81e8a6c0bd180e1e223f5f Mon Sep 17 00:00:00 2001
From: Xiaoyang Liu <siujoeng.lau at gmail.com>
Date: Fri, 26 Apr 2024 11:40:14 -0400
Subject: [PATCH] [libc++][ranges] optimize the performance of
 'ranges::starts_with'

---
 libcxx/benchmarks/CMakeLists.txt              |   1 +
 .../algorithms/ranges_starts_with.bench.cpp   | 107 ++++++++++++++++++
 .../include/__algorithm/ranges_starts_with.h  |  69 +++++++++--
 .../ranges.starts_with.pass.cpp               |  12 +-
 4 files changed, 176 insertions(+), 13 deletions(-)
 create mode 100644 libcxx/benchmarks/algorithms/ranges_starts_with.bench.cpp

diff --git a/libcxx/benchmarks/CMakeLists.txt b/libcxx/benchmarks/CMakeLists.txt
index 5dc3be0c367e5e..26825883c04662 100644
--- a/libcxx/benchmarks/CMakeLists.txt
+++ b/libcxx/benchmarks/CMakeLists.txt
@@ -197,6 +197,7 @@ set(BENCHMARK_TESTS
     algorithms/ranges_sort.bench.cpp
     algorithms/ranges_sort_heap.bench.cpp
     algorithms/ranges_stable_sort.bench.cpp
+    algorithms/ranges_starts_with.bench.cpp
     algorithms/sort.bench.cpp
     algorithms/sort_heap.bench.cpp
     algorithms/stable_sort.bench.cpp
diff --git a/libcxx/benchmarks/algorithms/ranges_starts_with.bench.cpp b/libcxx/benchmarks/algorithms/ranges_starts_with.bench.cpp
new file mode 100644
index 00000000000000..1c3ac6d02ce140
--- /dev/null
+++ b/libcxx/benchmarks/algorithms/ranges_starts_with.bench.cpp
@@ -0,0 +1,107 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include <algorithm>
+#include <benchmark/benchmark.h>
+#include <vector>
+
+#include "test_iterators.h"
+
+static void bm_starts_with_contiguous_iter_with_memcmp_optimization(benchmark::State& state) {
+  std::vector<int> a(state.range(), 1);
+  std::vector<int> p(state.range(), 1);
+
+  for (auto _ : state) {
+    benchmark::DoNotOptimize(a);
+    benchmark::DoNotOptimize(p);
+
+    auto begin1 = contiguous_iterator(a.data());
+    auto end1   = contiguous_iterator(a.data() + a.size());
+    auto begin2 = contiguous_iterator(p.data());
+    auto end2   = contiguous_iterator(p.data() + p.size());
+
+    benchmark::DoNotOptimize(std::ranges::starts_with(begin1, end1, begin2, end2));
+  }
+}
+BENCHMARK(bm_starts_with_contiguous_iter_with_memcmp_optimization)->RangeMultiplier(16)->Range(16, 16 << 20);
+
+static void bm_starts_with_contiguous_iter(benchmark::State& state) {
+  std::vector<int> a(state.range(), 1);
+  std::vector<int> p(state.range(), 1);
+
+  for (auto _ : state) {
+    benchmark::DoNotOptimize(a);
+    benchmark::DoNotOptimize(p);
+
+    auto begin1 = contiguous_iterator(a.data());
+    auto end1   = contiguous_iterator(a.data() + a.size());
+    auto begin2 = contiguous_iterator(p.data());
+    auto end2   = contiguous_iterator(p.data() + p.size());
+
+    // Using a custom predicate to make sure the memcmp optimization doesn't get invoked
+    benchmark::DoNotOptimize(
+        std::ranges::starts_with(begin1, end1, begin2, end2, [](const int a, const int b) { return a == b; }));
+  }
+}
+BENCHMARK(bm_starts_with_contiguous_iter)->RangeMultiplier(16)->Range(16, 16 << 20);
+
+static void bm_starts_with_random_access_iter(benchmark::State& state) {
+  std::vector<int> a(state.range(), 1);
+  std::vector<int> p(state.range(), 1);
+
+  for (auto _ : state) {
+    benchmark::DoNotOptimize(a);
+    benchmark::DoNotOptimize(p);
+
+    auto begin1 = random_access_iterator(a.data());
+    auto end1   = random_access_iterator(a.data() + a.size());
+    auto begin2 = random_access_iterator(p.data());
+    auto end2   = random_access_iterator(p.data() + p.size());
+
+    benchmark::DoNotOptimize(std::ranges::starts_with(begin1, end1, begin2, end2));
+  }
+}
+BENCHMARK(bm_starts_with_random_access_iter)->RangeMultiplier(16)->Range(16, 16 << 20);
+
+static void bm_starts_with_bidirectional_iter(benchmark::State& state) {
+  std::vector<int> a(state.range(), 1);
+  std::vector<int> p(state.range(), 1);
+
+  for (auto _ : state) {
+    benchmark::DoNotOptimize(a);
+    benchmark::DoNotOptimize(p);
+
+    auto begin1 = bidirectional_iterator(a.data());
+    auto end1   = bidirectional_iterator(a.data() + a.size());
+    auto begin2 = bidirectional_iterator(p.data());
+    auto end2   = bidirectional_iterator(p.data() + p.size());
+
+    benchmark::DoNotOptimize(std::ranges::starts_with(begin1, end1, begin2, end2));
+  }
+}
+BENCHMARK(bm_starts_with_bidirectional_iter)->RangeMultiplier(16)->Range(16, 16 << 20);
+
+static void bm_starts_with_forward_iter(benchmark::State& state) {
+  std::vector<int> a(state.range(), 1);
+  std::vector<int> p(state.range(), 1);
+
+  for (auto _ : state) {
+    benchmark::DoNotOptimize(a);
+    benchmark::DoNotOptimize(p);
+
+    auto begin1 = forward_iterator(a.data());
+    auto end1   = forward_iterator(a.data() + a.size());
+    auto begin2 = forward_iterator(p.data());
+    auto end2   = forward_iterator(p.data() + p.size());
+
+    benchmark::DoNotOptimize(std::ranges::starts_with(begin1, end1, begin2, end2));
+  }
+}
+BENCHMARK(bm_starts_with_forward_iter)->RangeMultiplier(16)->Range(16, 16 << 20);
+
+BENCHMARK_MAIN();
diff --git a/libcxx/include/__algorithm/ranges_starts_with.h b/libcxx/include/__algorithm/ranges_starts_with.h
index 17084e4f24336a..e8bd3e72a861ca 100644
--- a/libcxx/include/__algorithm/ranges_starts_with.h
+++ b/libcxx/include/__algorithm/ranges_starts_with.h
@@ -9,15 +9,19 @@
 #ifndef _LIBCPP___ALGORITHM_RANGES_STARTS_WITH_H
 #define _LIBCPP___ALGORITHM_RANGES_STARTS_WITH_H
 
-#include <__algorithm/in_in_result.h>
+#include <__algorithm/ranges_equal.h>
 #include <__algorithm/ranges_mismatch.h>
 #include <__config>
 #include <__functional/identity.h>
 #include <__functional/ranges_operations.h>
+#include <__functional/reference_wrapper.h>
 #include <__iterator/concepts.h>
+#include <__iterator/distance.h>
 #include <__iterator/indirectly_comparable.h>
+#include <__iterator/next.h>
 #include <__ranges/access.h>
 #include <__ranges/concepts.h>
+#include <__ranges/size.h>
 #include <__utility/move.h>
 
 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
@@ -50,14 +54,36 @@ struct __fn {
       _Pred __pred   = {},
       _Proj1 __proj1 = {},
       _Proj2 __proj2 = {}) {
-    return __mismatch::__fn::__go(
+    if constexpr (sized_sentinel_for<_Sent1, _Iter1> && sized_sentinel_for<_Sent2, _Iter2>) {
+      auto __n1 = ranges::distance(__first1, __last1);
+      auto __n2 = ranges::distance(__first2, __last2);
+      if (__n2 == 0) {
+        return true;
+      }
+      if (__n2 > __n1) {
+        return false;
+      }
+
+      if constexpr (contiguous_iterator<_Iter1> && contiguous_iterator<_Iter2>) {
+        return ranges::equal(
+            std::move(__first1),
+            ranges::next(__first1, __n2),
+            std::move(__first2),
+            std::move(__last2),
+            std::ref(__pred),
+            std::ref(__proj1),
+            std::ref(__proj2));
+      }
+    }
+
+    return ranges::mismatch(
                std::move(__first1),
                std::move(__last1),
                std::move(__first2),
-               std::move(__last2),
-               __pred,
-               __proj1,
-               __proj2)
+               __last2,
+               std::ref(__pred),
+               std::ref(__proj1),
+               std::ref(__proj2))
                .in2 == __last2;
   }
 
@@ -69,18 +95,41 @@ struct __fn {
     requires indirectly_comparable<iterator_t<_Range1>, iterator_t<_Range2>, _Pred, _Proj1, _Proj2>
   [[nodiscard]] _LIBCPP_HIDE_FROM_ABI static constexpr bool
   operator()(_Range1&& __range1, _Range2&& __range2, _Pred __pred = {}, _Proj1 __proj1 = {}, _Proj2 __proj2 = {}) {
-    return __mismatch::__fn::__go(
+    if constexpr (sized_range<_Range1> && sized_range<_Range2>) {
+      auto __n1 = ranges::size(__range1);
+      auto __n2 = ranges::size(__range2);
+      if (__n2 == 0) {
+        return true;
+      }
+      if (__n2 > __n1) {
+        return false;
+      }
+
+      if constexpr (contiguous_range<_Range1> && contiguous_range<_Range2>) {
+        return ranges::equal(
+            ranges::begin(__range1),
+            ranges::next(ranges::begin(__range1), __n2),
+            ranges::begin(__range2),
+            ranges::end(__range2),
+            std::ref(__pred),
+            std::ref(__proj1),
+            std::ref(__proj2));
+      }
+    }
+
+    return ranges::mismatch(
                ranges::begin(__range1),
                ranges::end(__range1),
                ranges::begin(__range2),
                ranges::end(__range2),
-               __pred,
-               __proj1,
-               __proj2)
+               std::ref(__pred),
+               std::ref(__proj1),
+               std::ref(__proj2))
                .in2 == ranges::end(__range2);
   }
 };
 } // namespace __starts_with
+
 inline namespace __cpo {
 inline constexpr auto starts_with = __starts_with::__fn{};
 } // namespace __cpo
diff --git a/libcxx/test/std/algorithms/alg.nonmodifying/alg.starts_with/ranges.starts_with.pass.cpp b/libcxx/test/std/algorithms/alg.nonmodifying/alg.starts_with/ranges.starts_with.pass.cpp
index 0f2284edde81c7..7a8aa4bedb318f 100644
--- a/libcxx/test/std/algorithms/alg.nonmodifying/alg.starts_with/ranges.starts_with.pass.cpp
+++ b/libcxx/test/std/algorithms/alg.nonmodifying/alg.starts_with/ranges.starts_with.pass.cpp
@@ -216,12 +216,18 @@ constexpr void test_iterators() {
 constexpr bool test() {
   types::for_each(types::cpp20_input_iterator_list<int*>{}, []<class Iter2>() {
     types::for_each(types::cpp20_input_iterator_list<int*>{}, []<class Iter1>() {
-      if constexpr (std::forward_iterator<Iter1> && std::forward_iterator<Iter2>)
+      if constexpr (std::forward_iterator<Iter1> && std::forward_iterator<Iter2>) {
         test_iterators<Iter1, Iter1, Iter2, Iter2>();
-      if constexpr (std::forward_iterator<Iter2>)
+      }
+      if constexpr (std::forward_iterator<Iter2>) {
+        test_iterators<Iter1, sentinel_wrapper<Iter1>, Iter2, Iter2>();
         test_iterators<Iter1, sized_sentinel<Iter1>, Iter2, Iter2>();
-      if constexpr (std::forward_iterator<Iter1>)
+      }
+      if constexpr (std::forward_iterator<Iter1>) {
+        test_iterators<Iter1, Iter1, Iter2, sentinel_wrapper<Iter2>>();
         test_iterators<Iter1, Iter1, Iter2, sized_sentinel<Iter2>>();
+      }
+      test_iterators<Iter1, sentinel_wrapper<Iter1>, Iter2, sentinel_wrapper<Iter2>>();
       test_iterators<Iter1, sized_sentinel<Iter1>, Iter2, sized_sentinel<Iter2>>();
     });
   });



More information about the libcxx-commits mailing list