[libcxx-commits] [libcxx] [libc++] Optimize lexicographical_compare (PR #65279)
via libcxx-commits
libcxx-commits at lists.llvm.org
Mon Sep 4 11:06:18 PDT 2023
https://github.com/philnik777 created https://github.com/llvm/llvm-project/pull/65279:
Benchmarks:
```
---------------------------------------------------------------------------------------
Benchmark old new
---------------------------------------------------------------------------------------
bm_lexicographical_compare<unsigned char>/1 0.950 ns 1.06 ns
bm_lexicographical_compare<unsigned char>/2 1.51 ns 1.61 ns
bm_lexicographical_compare<unsigned char>/3 2.00 ns 1.62 ns
bm_lexicographical_compare<unsigned char>/4 2.65 ns 1.58 ns
bm_lexicographical_compare<unsigned char>/5 3.64 ns 1.66 ns
bm_lexicographical_compare<unsigned char>/6 3.83 ns 1.58 ns
bm_lexicographical_compare<unsigned char>/7 4.23 ns 1.92 ns
bm_lexicographical_compare<unsigned char>/8 4.77 ns 1.59 ns
bm_lexicographical_compare<unsigned char>/16 9.29 ns 1.58 ns
bm_lexicographical_compare<unsigned char>/64 41.5 ns 1.52 ns
bm_lexicographical_compare<unsigned char>/512 282 ns 7.91 ns
bm_lexicographical_compare<unsigned char>/4096 2175 ns 54.2 ns
bm_lexicographical_compare<unsigned char>/32768 17325 ns 663 ns
bm_lexicographical_compare<unsigned char>/262144 138502 ns 5520 ns
bm_lexicographical_compare<unsigned char>/1048576 554088 ns 29796 ns
bm_lexicographical_compare<int>/1 0.969 ns 0.966 ns
bm_lexicographical_compare<int>/2 1.45 ns 1.45 ns
bm_lexicographical_compare<int>/3 1.99 ns 1.99 ns
bm_lexicographical_compare<int>/4 2.66 ns 2.64 ns
bm_lexicographical_compare<int>/5 3.17 ns 3.17 ns
bm_lexicographical_compare<int>/6 3.70 ns 3.69 ns
bm_lexicographical_compare<int>/7 4.23 ns 4.23 ns
bm_lexicographical_compare<int>/8 4.79 ns 4.76 ns
bm_lexicographical_compare<int>/16 8.99 ns 8.94 ns
bm_lexicographical_compare<int>/64 43.7 ns 39.7 ns
bm_lexicographical_compare<int>/512 280 ns 280 ns
bm_lexicographical_compare<int>/4096 2173 ns 2173 ns
bm_lexicographical_compare<int>/32768 17331 ns 17314 ns
bm_lexicographical_compare<int>/262144 138655 ns 138517 ns
bm_lexicographical_compare<int>/1048576 569757 ns 566695 ns
bm_ranges_lexicographical_compare<unsigned char>/1 0.957 ns 1.06 ns
bm_ranges_lexicographical_compare<unsigned char>/2 1.46 ns 1.58 ns
bm_ranges_lexicographical_compare<unsigned char>/3 2.00 ns 1.59 ns
bm_ranges_lexicographical_compare<unsigned char>/4 2.65 ns 1.58 ns
bm_ranges_lexicographical_compare<unsigned char>/5 3.29 ns 1.58 ns
bm_ranges_lexicographical_compare<unsigned char>/6 3.70 ns 1.58 ns
bm_ranges_lexicographical_compare<unsigned char>/7 4.23 ns 1.58 ns
bm_ranges_lexicographical_compare<unsigned char>/8 4.92 ns 1.58 ns
bm_ranges_lexicographical_compare<unsigned char>/16 9.21 ns 1.58 ns
bm_ranges_lexicographical_compare<unsigned char>/64 43.0 ns 1.52 ns
bm_ranges_lexicographical_compare<unsigned char>/512 286 ns 7.91 ns
bm_ranges_lexicographical_compare<unsigned char>/4096 2182 ns 54.2 ns
bm_ranges_lexicographical_compare<unsigned char>/32768 17403 ns 664 ns
bm_ranges_lexicographical_compare<unsigned char>/262144 138635 ns 5419 ns
bm_ranges_lexicographical_compare<unsigned char>/1048576 554722 ns 29147 ns
bm_ranges_lexicographical_compare<int>/1 0.968 ns 0.967 ns
bm_ranges_lexicographical_compare<int>/2 1.46 ns 1.46 ns
bm_ranges_lexicographical_compare<int>/3 2.03 ns 1.98 ns
bm_ranges_lexicographical_compare<int>/4 2.67 ns 2.87 ns
bm_ranges_lexicographical_compare<int>/5 3.21 ns 3.48 ns
bm_ranges_lexicographical_compare<int>/6 3.74 ns 3.88 ns
bm_ranges_lexicographical_compare<int>/7 4.26 ns 4.22 ns
bm_ranges_lexicographical_compare<int>/8 4.78 ns 4.78 ns
bm_ranges_lexicographical_compare<int>/16 9.17 ns 9.28 ns
bm_ranges_lexicographical_compare<int>/64 37.8 ns 35.8 ns
bm_ranges_lexicographical_compare<int>/512 296 ns 282 ns
bm_ranges_lexicographical_compare<int>/4096 2246 ns 2175 ns
bm_ranges_lexicographical_compare<int>/32768 17442 ns 17312 ns
bm_ranges_lexicographical_compare<int>/262144 138644 ns 138462 ns
bm_ranges_lexicographical_compare<int>/1048576 565530 ns 564599 ns
```
>From 38d29d9f5feded65edf66a015cfe3b9c6a411e25 Mon Sep 17 00:00:00 2001
From: Nikolas Klauser <nikolasklauser at berlin.de>
Date: Mon, 4 Sep 2023 10:46:58 -0700
Subject: [PATCH] [libc++] Optimize lexicographical_compare
---
libcxx/benchmarks/CMakeLists.txt | 1 +
.../lexicographical_compare.bench.cpp | 41 ++++++++++
libcxx/include/__algorithm/comp.h | 3 +
.../__algorithm/lexicographical_compare.h | 77 ++++++++++++-------
.../ranges_lexicographical_compare.h | 27 ++++---
libcxx/include/__functional/operations.h | 8 ++
.../include/__functional/ranges_operations.h | 3 +
.../include/__type_traits/predicate_traits.h | 3 +
8 files changed, 125 insertions(+), 38 deletions(-)
create mode 100644 libcxx/benchmarks/algorithms/lexicographical_compare.bench.cpp
diff --git a/libcxx/benchmarks/CMakeLists.txt b/libcxx/benchmarks/CMakeLists.txt
index 92f295ef399008..fad1cb4dbe75de 100644
--- a/libcxx/benchmarks/CMakeLists.txt
+++ b/libcxx/benchmarks/CMakeLists.txt
@@ -155,6 +155,7 @@ set(BENCHMARK_TESTS
algorithms.partition_point.bench.cpp
algorithms/equal.bench.cpp
algorithms/find.bench.cpp
+ algorithms/lexicographical_compare.bench.cpp
algorithms/lower_bound.bench.cpp
algorithms/make_heap.bench.cpp
algorithms/make_heap_then_sort_heap.bench.cpp
diff --git a/libcxx/benchmarks/algorithms/lexicographical_compare.bench.cpp b/libcxx/benchmarks/algorithms/lexicographical_compare.bench.cpp
new file mode 100644
index 00000000000000..bbbbbd3dbff4de
--- /dev/null
+++ b/libcxx/benchmarks/algorithms/lexicographical_compare.bench.cpp
@@ -0,0 +1,41 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include <algorithm>
+#include <benchmark/benchmark.h>
+#include <vector>
+
+template <class T>
+static void bm_lexicographical_compare(benchmark::State& state) {
+ std::vector<T> vec1(state.range(), '1');
+ std::vector<T> vec2(state.range(), '1');
+
+ for (auto _ : state) {
+ benchmark::DoNotOptimize(vec1);
+ benchmark::DoNotOptimize(vec2);
+ benchmark::DoNotOptimize(std::lexicographical_compare(vec1.begin(), vec1.end(), vec2.begin(), vec2.end()));
+ }
+}
+BENCHMARK(bm_lexicographical_compare<unsigned char>)->DenseRange(1, 8)->Range(16, 1 << 20);
+BENCHMARK(bm_lexicographical_compare<int>)->DenseRange(1, 8)->Range(16, 1 << 20);
+
+template <class T>
+static void bm_ranges_lexicographical_compare(benchmark::State& state) {
+ std::vector<T> vec1(state.range(), '1');
+ std::vector<T> vec2(state.range(), '1');
+
+ for (auto _ : state) {
+ benchmark::DoNotOptimize(vec1);
+ benchmark::DoNotOptimize(vec2);
+ benchmark::DoNotOptimize(std::ranges::lexicographical_compare(vec1.begin(), vec1.end(), vec2.begin(), vec2.end()));
+ }
+}
+BENCHMARK(bm_ranges_lexicographical_compare<unsigned char>)->DenseRange(1, 8)->Range(16, 1 << 20);
+BENCHMARK(bm_ranges_lexicographical_compare<int>)->DenseRange(1, 8)->Range(16, 1 << 20);
+
+BENCHMARK_MAIN();
diff --git a/libcxx/include/__algorithm/comp.h b/libcxx/include/__algorithm/comp.h
index 9474536615ffb6..b80e2056c8f90f 100644
--- a/libcxx/include/__algorithm/comp.h
+++ b/libcxx/include/__algorithm/comp.h
@@ -42,6 +42,9 @@ struct __less<void, void> {
}
};
+template <class _Lhs, class _Rhs>
+struct __is_trivial_less_than_predicate<__less<>, _Lhs, _Rhs> : true_type {};
+
_LIBCPP_END_NAMESPACE_STD
#endif // _LIBCPP___ALGORITHM_COMP_H
diff --git a/libcxx/include/__algorithm/lexicographical_compare.h b/libcxx/include/__algorithm/lexicographical_compare.h
index 62b72edc80f773..a806e4f62b5b57 100644
--- a/libcxx/include/__algorithm/lexicographical_compare.h
+++ b/libcxx/include/__algorithm/lexicographical_compare.h
@@ -11,50 +11,75 @@
#include <__algorithm/comp.h>
#include <__algorithm/comp_ref_type.h>
+#include <__algorithm/min.h>
+#include <__algorithm/unwrap_iter.h>
#include <__config>
+#include <__functional/identity.h>
#include <__iterator/iterator_traits.h>
+#include <__string/constexpr_c_functions.h>
+#include <__type_traits/invoke.h>
#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
_LIBCPP_BEGIN_NAMESPACE_STD
-template <class _Compare, class _InputIterator1, class _InputIterator2>
+template <class _Iter1, class _Sent1, class _Iter2, class _Sent2, class _Proj1, class _Proj2, class _Comp>
+_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool __lexicographical_compare(
+ _Iter1 __first1, _Sent1 __last1, _Iter2 __first2, _Sent2 __last2, _Comp& __comp, _Proj1& __proj1, _Proj2& __proj2) {
+ while (__first2 != __last2) {
+ if (__first1 == __last1 ||
+ std::__invoke(__comp, std::__invoke(__proj1, *__first1), std::__invoke(__proj2, *__first2)))
+ return true;
+ if (std::__invoke(__comp, std::__invoke(__proj2, *__first2), std::__invoke(__proj1, *__first1)))
+ return false;
+ ++__first1;
+ ++__first2;
+ }
+ return false;
+}
+
+#if _LIBCPP_STD_VER >= 20
+template <class _Tp, class _Up, class _Proj1, class _Proj2, class _Comp>
+ requires(__libcpp_is_trivially_lexicographically_comparable<_Tp, _Up>::value && __is_identity<_Proj1>::value &&
+ __is_identity<_Proj2>::value && __is_trivial_less_than_predicate<_Comp, _Tp, _Up>::value)
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool
-__lexicographical_compare(_InputIterator1 __first1, _InputIterator1 __last1,
- _InputIterator2 __first2, _InputIterator2 __last2, _Compare __comp)
-{
- for (; __first2 != __last2; ++__first1, (void) ++__first2)
- {
- if (__first1 == __last1 || __comp(*__first1, *__first2))
- return true;
- if (__comp(*__first2, *__first1))
- return false;
- }
- return false;
+__lexicographical_compare(_Tp* __first1, _Tp* __last1, _Up* __first2, _Up* __last2, _Comp&, _Proj1&, _Proj2&) {
+ return std::__constexpr_memcmp(
+ __first1, __first2, __element_count(std::min(__last1 - __first1, __last2 - __first2))) < 0;
}
+#endif
template <class _InputIterator1, class _InputIterator2, class _Compare>
-_LIBCPP_NODISCARD_EXT inline
-_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_SINCE_CXX20
-bool
-lexicographical_compare(_InputIterator1 __first1, _InputIterator1 __last1,
- _InputIterator2 __first2, _InputIterator2 __last2, _Compare __comp)
-{
- return _VSTD::__lexicographical_compare<__comp_ref_type<_Compare> >(__first1, __last1, __first2, __last2, __comp);
+_LIBCPP_NODISCARD_EXT inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool lexicographical_compare(
+ _InputIterator1 __first1,
+ _InputIterator1 __last1,
+ _InputIterator2 __first2,
+ _InputIterator2 __last2,
+ _Compare __comp) {
+ __identity __proj;
+ return std::__lexicographical_compare(
+ std::__unwrap_iter(__first1),
+ std::__unwrap_iter(__last1),
+ std::__unwrap_iter(__first2),
+ std::__unwrap_iter(__last2),
+ __comp,
+ __proj,
+ __proj);
}
template <class _InputIterator1, class _InputIterator2>
-_LIBCPP_NODISCARD_EXT inline
-_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_SINCE_CXX20
-bool
-lexicographical_compare(_InputIterator1 __first1, _InputIterator1 __last1,
- _InputIterator2 __first2, _InputIterator2 __last2)
-{
- return _VSTD::lexicographical_compare(__first1, __last1, __first2, __last2, __less<>());
+_LIBCPP_NODISCARD_EXT inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_SINCE_CXX20 bool lexicographical_compare(
+ _InputIterator1 __first1, _InputIterator1 __last1, _InputIterator2 __first2, _InputIterator2 __last2) {
+ return std::lexicographical_compare(__first1, __last1, __first2, __last2, __less<>());
}
_LIBCPP_END_NAMESPACE_STD
+_LIBCPP_POP_MACROS
+
#endif // _LIBCPP___ALGORITHM_LEXICOGRAPHICAL_COMPARE_H
diff --git a/libcxx/include/__algorithm/ranges_lexicographical_compare.h b/libcxx/include/__algorithm/ranges_lexicographical_compare.h
index 5b843dfd7b3139..b0b84160833369 100644
--- a/libcxx/include/__algorithm/ranges_lexicographical_compare.h
+++ b/libcxx/include/__algorithm/ranges_lexicographical_compare.h
@@ -9,6 +9,8 @@
#ifndef _LIBCPP___ALGORITHM_RANGES_LEXICOGRAPHICAL_COMPARE_H
#define _LIBCPP___ALGORITHM_RANGES_LEXICOGRAPHICAL_COMPARE_H
+#include <__algorithm/lexicographical_compare.h>
+#include <__algorithm/unwrap_range.h>
#include <__config>
#include <__functional/identity.h>
#include <__functional/invoke.h>
@@ -31,7 +33,7 @@ namespace ranges {
namespace __lexicographical_compare {
struct __fn {
template <class _Iter1, class _Sent1, class _Iter2, class _Sent2, class _Proj1, class _Proj2, class _Comp>
- _LIBCPP_HIDE_FROM_ABI constexpr static bool __lexicographical_compare_impl(
+ static _LIBCPP_HIDE_FROM_ABI constexpr bool __lexicographical_compare_unwrap(
_Iter1 __first1,
_Sent1 __last1,
_Iter2 __first2,
@@ -39,15 +41,16 @@ struct __fn {
_Comp& __comp,
_Proj1& __proj1,
_Proj2& __proj2) {
- while (__first2 != __last2) {
- if (__first1 == __last1 || std::invoke(__comp, std::invoke(__proj1, *__first1), std::invoke(__proj2, *__first2)))
- return true;
- if (std::invoke(__comp, std::invoke(__proj2, *__first2), std::invoke(__proj1, *__first1)))
- return false;
- ++__first1;
- ++__first2;
- }
- return false;
+ auto [__first1_un, __last1_un] = std::__unwrap_range(std::move(__first1), std::move(__last1));
+ auto [__first2_un, __last2_un] = std::__unwrap_range(std::move(__first2), std::move(__last2));
+ return std::__lexicographical_compare(
+ std::move(__first1_un),
+ std::move(__last1_un),
+ std::move(__first2_un),
+ std::move(__last2_un),
+ __comp,
+ __proj1,
+ __proj2);
}
template <input_iterator _Iter1,
@@ -65,7 +68,7 @@ struct __fn {
_Comp __comp = {},
_Proj1 __proj1 = {},
_Proj2 __proj2 = {}) const {
- return __lexicographical_compare_impl(
+ return __lexicographical_compare_unwrap(
std::move(__first1), std::move(__last1), std::move(__first2), std::move(__last2), __comp, __proj1, __proj2);
}
@@ -77,7 +80,7 @@ struct __fn {
_Comp = ranges::less>
_LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr bool operator()(
_Range1&& __range1, _Range2&& __range2, _Comp __comp = {}, _Proj1 __proj1 = {}, _Proj2 __proj2 = {}) const {
- return __lexicographical_compare_impl(
+ return __lexicographical_compare_unrwap(
ranges::begin(__range1),
ranges::end(__range1),
ranges::begin(__range2),
diff --git a/libcxx/include/__functional/operations.h b/libcxx/include/__functional/operations.h
index 6cdb89d6b449bc..d41c80c6924376 100644
--- a/libcxx/include/__functional/operations.h
+++ b/libcxx/include/__functional/operations.h
@@ -418,6 +418,14 @@ struct _LIBCPP_TEMPLATE_VIS less<void>
};
#endif
+template <class _Tp>
+struct __is_trivial_less_than_predicate<less<_Tp>, _Tp, _Tp> : true_type {};
+
+#if _LIBCPP_STD_VER >= 14
+template <class _Tp, class _Up>
+struct __is_trivial_less_than_predicate<less<>, _Tp, _Up> : true_type {};
+#endif
+
#if _LIBCPP_STD_VER >= 14
template <class _Tp = void>
#else
diff --git a/libcxx/include/__functional/ranges_operations.h b/libcxx/include/__functional/ranges_operations.h
index c344fc38f98ddd..5da79655a65a7d 100644
--- a/libcxx/include/__functional/ranges_operations.h
+++ b/libcxx/include/__functional/ranges_operations.h
@@ -98,6 +98,9 @@ struct greater_equal {
template <class _Lhs, class _Rhs>
struct __is_trivial_equality_predicate<ranges::equal_to, _Lhs, _Rhs> : true_type {};
+template <class _Lhs, class _Rhs>
+struct __is_trivial_less_than_predicate<ranges::less, _Lhs, _Rhs> : true_type {};
+
#endif // _LIBCPP_STD_VER >= 20
_LIBCPP_END_NAMESPACE_STD
diff --git a/libcxx/include/__type_traits/predicate_traits.h b/libcxx/include/__type_traits/predicate_traits.h
index 872608e6ac3be3..815847cce41e07 100644
--- a/libcxx/include/__type_traits/predicate_traits.h
+++ b/libcxx/include/__type_traits/predicate_traits.h
@@ -21,6 +21,9 @@ _LIBCPP_BEGIN_NAMESPACE_STD
template <class _Pred, class _Lhs, class _Rhs>
struct __is_trivial_equality_predicate : false_type {};
+template <class _Pred, class _Lhs, class _Rhs>
+struct __is_trivial_less_than_predicate : false_type {};
+
_LIBCPP_END_NAMESPACE_STD
#endif // _LIBCPP___TYPE_TRAITS_PREDICATE_TRAITS
More information about the libcxx-commits
mailing list