[libcxx-commits] [libcxx] [libc++][PSTL] Introduce cpu traits (PR #88134)
via libcxx-commits
libcxx-commits at lists.llvm.org
Tue Apr 9 07:44:44 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-libcxx
Author: Louis Dionne (ldionne)
<details>
<summary>Changes</summary>
Currently, CPU backends in the PSTL are created by defining functions
in the __par_backend namespace. Then, the PSTL includes the CPU backend
that gets configured via CMake and gets those definitions.
This prevents CPU backends from easily co-existing and is a bit confusing.
To solve this problem, this patch introduces the notion of __cpu_traits,
which is a cheap encapsulation of the basis operations required to
implement a CPU-based PSTL. Different backends can now define their own
tag and coexist, and the CPU-based PSTL will simply use __cpu_traits to
dispatch to the right implementation of e.g. __for_each.
Note that this patch doesn't change the actual implementation of the
backends in any way, it only modifies how that implementation is accessed
to implement PSTL algorithms.
This patch is a step towards #<!-- -->88131.
---
Patch is 55.98 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/88134.diff
18 Files Affected:
- (modified) libcxx/include/CMakeLists.txt (+1)
- (modified) libcxx/include/__algorithm/pstl_backends/cpu_backend.h (-45)
- (modified) libcxx/include/__algorithm/pstl_backends/cpu_backends/any_of.h (+5-4)
- (modified) libcxx/include/__algorithm/pstl_backends/cpu_backends/backend.h (+7-3)
- (modified) libcxx/include/__algorithm/pstl_backends/cpu_backends/fill.h (+2-1)
- (modified) libcxx/include/__algorithm/pstl_backends/cpu_backends/find_if.h (+12-10)
- (modified) libcxx/include/__algorithm/pstl_backends/cpu_backends/for_each.h (+2-1)
- (modified) libcxx/include/__algorithm/pstl_backends/cpu_backends/libdispatch.h (+231-226)
- (modified) libcxx/include/__algorithm/pstl_backends/cpu_backends/merge.h (+2-1)
- (modified) libcxx/include/__algorithm/pstl_backends/cpu_backends/serial.h (+50-48)
- (modified) libcxx/include/__algorithm/pstl_backends/cpu_backends/stable_sort.h (+2-1)
- (modified) libcxx/include/__algorithm/pstl_backends/cpu_backends/thread.h (+50-46)
- (modified) libcxx/include/__algorithm/pstl_backends/cpu_backends/transform.h (+3-2)
- (modified) libcxx/include/__algorithm/pstl_backends/cpu_backends/transform_reduce.h (+11-7)
- (added) libcxx/include/__pstl/cpu_algos/cpu_traits.h (+84)
- (modified) libcxx/include/module.modulemap (+2)
- (modified) libcxx/src/pstl/libdispatch.cpp (+2-5)
- (modified) libcxx/utils/generate_iwyu_mapping.py (+1)
``````````diff
diff --git a/libcxx/include/CMakeLists.txt b/libcxx/include/CMakeLists.txt
index 097a41d4c41740..1f90dd6db5b158 100644
--- a/libcxx/include/CMakeLists.txt
+++ b/libcxx/include/CMakeLists.txt
@@ -591,6 +591,7 @@ set(files
__numeric/transform_exclusive_scan.h
__numeric/transform_inclusive_scan.h
__numeric/transform_reduce.h
+ __pstl/cpu_algos/cpu_traits.h
__random/bernoulli_distribution.h
__random/binomial_distribution.h
__random/cauchy_distribution.h
diff --git a/libcxx/include/__algorithm/pstl_backends/cpu_backend.h b/libcxx/include/__algorithm/pstl_backends/cpu_backend.h
index 6980ded189ea2a..c93139243af459 100644
--- a/libcxx/include/__algorithm/pstl_backends/cpu_backend.h
+++ b/libcxx/include/__algorithm/pstl_backends/cpu_backend.h
@@ -10,51 +10,6 @@
#define _LIBCPP___ALGORITHM_PSTL_BACKENDS_CPU_BACKEND_H
#include <__config>
-
-/*
-
- // _Functor takes a subrange for [__first, __last) that should be executed in serial
- template <class _RandomAccessIterator, class _Functor>
- optional<__empty> __parallel_for(_RandomAccessIterator __first, _RandomAccessIterator __last, _Functor __func);
-
- template <class _Iterator, class _UnaryOp, class _Tp, class _BinaryOp, class _Reduction>
- optional<_Tp>
- __parallel_transform_reduce(_Iterator __first, _Iterator __last, _UnaryOp, _Tp __init, _BinaryOp, _Reduction);
-
- // Cancel the execution of other jobs - they aren't needed anymore
- void __cancel_execution();
-
- template <class _RandomAccessIterator1,
- class _RandomAccessIterator2,
- class _RandomAccessIterator3,
- class _Compare,
- class _LeafMerge>
- optional<void> __parallel_merge(
- _RandomAccessIterator1 __first1,
- _RandomAccessIterator1 __last1,
- _RandomAccessIterator2 __first2,
- _RandomAccessIterator2 __last2,
- _RandomAccessIterator3 __outit,
- _Compare __comp,
- _LeafMerge __leaf_merge);
-
- template <class _RandomAccessIterator, class _Comp, class _LeafSort>
- void __parallel_stable_sort(_RandomAccessIterator __first,
- _RandomAccessIterator __last,
- _Comp __comp,
- _LeafSort __leaf_sort);
-
- TODO: Document the parallel backend
-
-Exception handling
-==================
-
-CPU backends are expected to report errors (i.e. failure to allocate) by returning a disengaged `optional` from their
-implementation. Exceptions shouldn't be used to report an internal failure-to-allocate, since all exceptions are turned
-into a program termination at the front-end level. When a backend returns a disengaged `optional` to the frontend, the
-frontend will turn that into a call to `std::__throw_bad_alloc();` to report the internal failure to the user.
-*/
-
#include <__algorithm/pstl_backends/cpu_backends/any_of.h>
#include <__algorithm/pstl_backends/cpu_backends/backend.h>
#include <__algorithm/pstl_backends/cpu_backends/fill.h>
diff --git a/libcxx/include/__algorithm/pstl_backends/cpu_backends/any_of.h b/libcxx/include/__algorithm/pstl_backends/cpu_backends/any_of.h
index 13dff80086e72b..be5e54f3fa5c85 100644
--- a/libcxx/include/__algorithm/pstl_backends/cpu_backends/any_of.h
+++ b/libcxx/include/__algorithm/pstl_backends/cpu_backends/any_of.h
@@ -17,6 +17,7 @@
#include <__config>
#include <__functional/operations.h>
#include <__iterator/concepts.h>
+#include <__pstl/cpu_algos/cpu_traits.h>
#include <__type_traits/is_execution_policy.h>
#include <__utility/move.h>
#include <__utility/pair.h>
@@ -30,13 +31,13 @@ _LIBCPP_PUSH_MACROS
_LIBCPP_BEGIN_NAMESPACE_STD
-template <class _Index, class _Brick>
+template <class _Backend, class _Index, class _Brick>
_LIBCPP_HIDE_FROM_ABI optional<bool> __parallel_or(_Index __first, _Index __last, _Brick __f) {
std::atomic<bool> __found(false);
- auto __ret = __par_backend::__parallel_for(__first, __last, [__f, &__found](_Index __i, _Index __j) {
+ auto __ret = __pstl::__cpu_traits<_Backend>::__parallel_for(__first, __last, [__f, &__found](_Index __i, _Index __j) {
if (!__found.load(std::memory_order_relaxed) && __f(__i, __j)) {
__found.store(true, std::memory_order_relaxed);
- __par_backend::__cancel_execution();
+ __pstl::__cpu_traits<_Backend>::__cancel_execution();
}
});
if (!__ret)
@@ -74,7 +75,7 @@ _LIBCPP_HIDE_FROM_ABI optional<bool>
__pstl_any_of(__cpu_backend_tag, _ForwardIterator __first, _ForwardIterator __last, _Predicate __pred) {
if constexpr (__is_parallel_execution_policy_v<_ExecutionPolicy> &&
__has_random_access_iterator_category_or_concept<_ForwardIterator>::value) {
- return std::__parallel_or(
+ return std::__parallel_or<__cpu_backend_tag>(
__first, __last, [&__pred](_ForwardIterator __brick_first, _ForwardIterator __brick_last) {
auto __res = std::__pstl_any_of<__remove_parallel_policy_t<_ExecutionPolicy>>(
__cpu_backend_tag{}, __brick_first, __brick_last, __pred);
diff --git a/libcxx/include/__algorithm/pstl_backends/cpu_backends/backend.h b/libcxx/include/__algorithm/pstl_backends/cpu_backends/backend.h
index ea2210a4a7adbd..0641a51e6823e3 100644
--- a/libcxx/include/__algorithm/pstl_backends/cpu_backends/backend.h
+++ b/libcxx/include/__algorithm/pstl_backends/cpu_backends/backend.h
@@ -30,9 +30,13 @@
_LIBCPP_BEGIN_NAMESPACE_STD
-struct __cpu_backend_tag {};
-
-inline constexpr size_t __lane_size = 64;
+# if defined(_LIBCPP_PSTL_CPU_BACKEND_SERIAL)
+using __cpu_backend_tag = __pstl::__serial_backend_tag;
+# elif defined(_LIBCPP_PSTL_CPU_BACKEND_THREAD)
+using __cpu_backend_tag = __pstl::__std_thread_backend_tag;
+# elif defined(_LIBCPP_PSTL_CPU_BACKEND_LIBDISPATCH)
+using __cpu_backend_tag = __pstl::__libdispatch_backend_tag;
+# endif
_LIBCPP_END_NAMESPACE_STD
diff --git a/libcxx/include/__algorithm/pstl_backends/cpu_backends/fill.h b/libcxx/include/__algorithm/pstl_backends/cpu_backends/fill.h
index 64babe9fd2bdae..49a32f6c5ce551 100644
--- a/libcxx/include/__algorithm/pstl_backends/cpu_backends/fill.h
+++ b/libcxx/include/__algorithm/pstl_backends/cpu_backends/fill.h
@@ -13,6 +13,7 @@
#include <__algorithm/pstl_backends/cpu_backends/backend.h>
#include <__config>
#include <__iterator/concepts.h>
+#include <__pstl/cpu_algos/cpu_traits.h>
#include <__type_traits/is_execution_policy.h>
#include <__utility/empty.h>
#include <optional>
@@ -39,7 +40,7 @@ _LIBCPP_HIDE_FROM_ABI optional<__empty>
__pstl_fill(__cpu_backend_tag, _ForwardIterator __first, _ForwardIterator __last, const _Tp& __value) {
if constexpr (__is_parallel_execution_policy_v<_ExecutionPolicy> &&
__has_random_access_iterator_category_or_concept<_ForwardIterator>::value) {
- return __par_backend::__parallel_for(
+ return __pstl::__cpu_traits<__cpu_backend_tag>::__parallel_for(
__first, __last, [&__value](_ForwardIterator __brick_first, _ForwardIterator __brick_last) {
[[maybe_unused]] auto __res = std::__pstl_fill<__remove_parallel_policy_t<_ExecutionPolicy>>(
__cpu_backend_tag{}, __brick_first, __brick_last, __value);
diff --git a/libcxx/include/__algorithm/pstl_backends/cpu_backends/find_if.h b/libcxx/include/__algorithm/pstl_backends/cpu_backends/find_if.h
index 170470e4fb7edd..11a5668bf25af1 100644
--- a/libcxx/include/__algorithm/pstl_backends/cpu_backends/find_if.h
+++ b/libcxx/include/__algorithm/pstl_backends/cpu_backends/find_if.h
@@ -16,6 +16,7 @@
#include <__functional/operations.h>
#include <__iterator/concepts.h>
#include <__iterator/iterator_traits.h>
+#include <__pstl/cpu_algos/cpu_traits.h>
#include <__type_traits/is_execution_policy.h>
#include <__utility/move.h>
#include <__utility/pair.h>
@@ -33,7 +34,7 @@ _LIBCPP_PUSH_MACROS
_LIBCPP_BEGIN_NAMESPACE_STD
-template <class _Index, class _Brick, class _Compare>
+template <class _Backend, class _Index, class _Brick, class _Compare>
_LIBCPP_HIDE_FROM_ABI optional<_Index>
__parallel_find(_Index __first, _Index __last, _Brick __f, _Compare __comp, bool __b_first) {
typedef typename std::iterator_traits<_Index>::difference_type _DifferenceType;
@@ -41,8 +42,8 @@ __parallel_find(_Index __first, _Index __last, _Brick __f, _Compare __comp, bool
_DifferenceType __initial_dist = __b_first ? __n : -1;
std::atomic<_DifferenceType> __extremum(__initial_dist);
// TODO: find out what is better here: parallel_for or parallel_reduce
- auto __res =
- __par_backend::__parallel_for(__first, __last, [__comp, __f, __first, &__extremum](_Index __i, _Index __j) {
+ auto __res = __pstl::__cpu_traits<_Backend>::__parallel_for(
+ __first, __last, [__comp, __f, __first, &__extremum](_Index __i, _Index __j) {
// See "Reducing Contention Through Priority Updates", PPoPP '13, for discussion of
// why using a shared variable scales fairly well in this situation.
if (__comp(__i - __first, __extremum)) {
@@ -61,12 +62,12 @@ __parallel_find(_Index __first, _Index __last, _Brick __f, _Compare __comp, bool
return __extremum.load() != __initial_dist ? __first + __extremum.load() : __last;
}
-template <class _Index, class _DifferenceType, class _Compare>
+template <class _Backend, class _Index, class _DifferenceType, class _Compare>
_LIBCPP_HIDE_FROM_ABI _Index
__simd_first(_Index __first, _DifferenceType __begin, _DifferenceType __end, _Compare __comp) noexcept {
// Experiments show good block sizes like this
- const _DifferenceType __block_size = 8;
- alignas(__lane_size) _DifferenceType __lane[__block_size] = {0};
+ const _DifferenceType __block_size = 8;
+ alignas(__pstl::__cpu_traits<_Backend>::__lane_size) _DifferenceType __lane[__block_size] = {0};
while (__end - __begin >= __block_size) {
_DifferenceType __found = 0;
_PSTL_PRAGMA_SIMD_REDUCTION(| : __found) for (_DifferenceType __i = __begin; __i < __begin + __block_size; ++__i) {
@@ -102,7 +103,7 @@ _LIBCPP_HIDE_FROM_ABI optional<_ForwardIterator>
__pstl_find_if(__cpu_backend_tag, _ForwardIterator __first, _ForwardIterator __last, _Predicate __pred) {
if constexpr (__is_parallel_execution_policy_v<_ExecutionPolicy> &&
__has_random_access_iterator_category_or_concept<_ForwardIterator>::value) {
- return std::__parallel_find(
+ return std::__parallel_find<__cpu_backend_tag>(
__first,
__last,
[&__pred](_ForwardIterator __brick_first, _ForwardIterator __brick_last) {
@@ -116,9 +117,10 @@ __pstl_find_if(__cpu_backend_tag, _ForwardIterator __first, _ForwardIterator __l
} else if constexpr (__is_unsequenced_execution_policy_v<_ExecutionPolicy> &&
__has_random_access_iterator_category_or_concept<_ForwardIterator>::value) {
using __diff_t = __iter_diff_t<_ForwardIterator>;
- return std::__simd_first(__first, __diff_t(0), __last - __first, [&__pred](_ForwardIterator __iter, __diff_t __i) {
- return __pred(__iter[__i]);
- });
+ return std::__simd_first<__cpu_backend_tag>(
+ __first, __diff_t(0), __last - __first, [&__pred](_ForwardIterator __iter, __diff_t __i) {
+ return __pred(__iter[__i]);
+ });
} else {
return std::find_if(__first, __last, __pred);
}
diff --git a/libcxx/include/__algorithm/pstl_backends/cpu_backends/for_each.h b/libcxx/include/__algorithm/pstl_backends/cpu_backends/for_each.h
index 81fd4526b8dbf1..1667ec0f0c4f41 100644
--- a/libcxx/include/__algorithm/pstl_backends/cpu_backends/for_each.h
+++ b/libcxx/include/__algorithm/pstl_backends/cpu_backends/for_each.h
@@ -13,6 +13,7 @@
#include <__algorithm/pstl_backends/cpu_backends/backend.h>
#include <__config>
#include <__iterator/concepts.h>
+#include <__pstl/cpu_algos/cpu_traits.h>
#include <__type_traits/is_execution_policy.h>
#include <__utility/empty.h>
#include <optional>
@@ -39,7 +40,7 @@ _LIBCPP_HIDE_FROM_ABI optional<__empty>
__pstl_for_each(__cpu_backend_tag, _ForwardIterator __first, _ForwardIterator __last, _Functor __func) {
if constexpr (__is_parallel_execution_policy_v<_ExecutionPolicy> &&
__has_random_access_iterator_category_or_concept<_ForwardIterator>::value) {
- return std::__par_backend::__parallel_for(
+ return __pstl::__cpu_traits<__cpu_backend_tag>::__parallel_for(
__first, __last, [__func](_ForwardIterator __brick_first, _ForwardIterator __brick_last) {
[[maybe_unused]] auto __res = std::__pstl_for_each<__remove_parallel_policy_t<_ExecutionPolicy>>(
__cpu_backend_tag{}, __brick_first, __brick_last, __func);
diff --git a/libcxx/include/__algorithm/pstl_backends/cpu_backends/libdispatch.h b/libcxx/include/__algorithm/pstl_backends/cpu_backends/libdispatch.h
index e885e7f225172c..8757f249680375 100644
--- a/libcxx/include/__algorithm/pstl_backends/cpu_backends/libdispatch.h
+++ b/libcxx/include/__algorithm/pstl_backends/cpu_backends/libdispatch.h
@@ -23,6 +23,7 @@
#include <__memory/construct_at.h>
#include <__memory/unique_ptr.h>
#include <__numeric/reduce.h>
+#include <__pstl/cpu_algos/cpu_traits.h>
#include <__utility/empty.h>
#include <__utility/exception_guard.h>
#include <__utility/move.h>
@@ -37,10 +38,11 @@ _LIBCPP_PUSH_MACROS
#if !defined(_LIBCPP_HAS_NO_INCOMPLETE_PSTL) && _LIBCPP_STD_VER >= 17
_LIBCPP_BEGIN_NAMESPACE_STD
+namespace __pstl {
-namespace __par_backend {
-inline namespace __libdispatch {
+struct __libdispatch_backend_tag {};
+namespace __libdispatch {
// ::dispatch_apply is marked as __attribute__((nothrow)) because it doesn't let exceptions propagate, and neither do
// we.
// TODO: Do we want to add [[_Clang::__callback__(__func, __context, __)]]?
@@ -77,267 +79,270 @@ __dispatch_parallel_for(__chunk_partitions __partitions, _RandomAccessIterator _
return __empty{};
}
+} // namespace __libdispatch
-template <class _RandomAccessIterator, class _Functor>
-_LIBCPP_HIDE_FROM_ABI optional<__empty>
-__parallel_for(_RandomAccessIterator __first, _RandomAccessIterator __last, _Functor __func) {
- return __libdispatch::__dispatch_parallel_for(
- __libdispatch::__partition_chunks(__last - __first), std::move(__first), std::move(__func));
-}
-
-template <class _RandomAccessIterator1, class _RandomAccessIterator2, class _RandomAccessIteratorOut>
-struct __merge_range {
- __merge_range(_RandomAccessIterator1 __mid1, _RandomAccessIterator2 __mid2, _RandomAccessIteratorOut __result)
- : __mid1_(__mid1), __mid2_(__mid2), __result_(__result) {}
+template <>
+struct __cpu_traits<__libdispatch_backend_tag> {
+ template <class _RandomAccessIterator, class _Functor>
+ _LIBCPP_HIDE_FROM_ABI static optional<__empty>
+ __parallel_for(_RandomAccessIterator __first, _RandomAccessIterator __last, _Functor __func) {
+ return __libdispatch::__dispatch_parallel_for(
+ __libdispatch::__partition_chunks(__last - __first), std::move(__first), std::move(__func));
+ }
- _RandomAccessIterator1 __mid1_;
- _RandomAccessIterator2 __mid2_;
- _RandomAccessIteratorOut __result_;
-};
+ template <class _RandomAccessIterator1, class _RandomAccessIterator2, class _RandomAccessIteratorOut>
+ struct __merge_range {
+ __merge_range(_RandomAccessIterator1 __mid1, _RandomAccessIterator2 __mid2, _RandomAccessIteratorOut __result)
+ : __mid1_(__mid1), __mid2_(__mid2), __result_(__result) {}
-template <typename _RandomAccessIterator1,
- typename _RandomAccessIterator2,
- typename _RandomAccessIterator3,
- typename _Compare,
- typename _LeafMerge>
-_LIBCPP_HIDE_FROM_ABI optional<__empty> __parallel_merge(
- _RandomAccessIterator1 __first1,
- _RandomAccessIterator1 __last1,
- _RandomAccessIterator2 __first2,
- _RandomAccessIterator2 __last2,
- _RandomAccessIterator3 __result,
- _Compare __comp,
- _LeafMerge __leaf_merge) noexcept {
- __chunk_partitions __partitions =
- __libdispatch::__partition_chunks(std::max<ptrdiff_t>(__last1 - __first1, __last2 - __first2));
-
- if (__partitions.__chunk_count_ == 0)
- return __empty{};
+ _RandomAccessIterator1 __mid1_;
+ _RandomAccessIterator2 __mid2_;
+ _RandomAccessIteratorOut __result_;
+ };
- if (__partitions.__chunk_count_ == 1) {
- __leaf_merge(__first1, __last1, __first2, __last2, __result, __comp);
- return __empty{};
- }
+ template <typename _RandomAccessIterator1,
+ typename _RandomAccessIterator2,
+ typename _RandomAccessIterator3,
+ typename _Compare,
+ typename _LeafMerge>
+ _LIBCPP_HIDE_FROM_ABI static optional<__empty> __parallel_merge(
+ _RandomAccessIterator1 __first1,
+ _RandomAccessIterator1 __last1,
+ _RandomAccessIterator2 __first2,
+ _RandomAccessIterator2 __last2,
+ _RandomAccessIterator3 __result,
+ _Compare __comp,
+ _LeafMerge __leaf_merge) noexcept {
+ __libdispatch::__chunk_partitions __partitions =
+ __libdispatch::__partition_chunks(std::max<ptrdiff_t>(__last1 - __first1, __last2 - __first2));
+
+ if (__partitions.__chunk_count_ == 0)
+ return __empty{};
+
+ if (__partitions.__chunk_count_ == 1) {
+ __leaf_merge(__first1, __last1, __first2, __last2, __result, __comp);
+ return __empty{};
+ }
- using __merge_range_t = __merge_range<_RandomAccessIterator1, _RandomAccessIterator2, _RandomAccessIterator3>;
- auto const __n_ranges = __partitions.__chunk_count_ + 1;
+ using __merge_range_t = __merge_range<_RandomAccessIterator1, _RandomAccessIterator2, _RandomAccessIterator3>;
+ auto const __n_ranges = __partitions.__chunk_count_ + 1;
- // TODO: use __uninitialized_buffer
- auto __destroy = [=](__merge_range_t* __ptr) {
- std::destroy_n(__ptr, __n_ranges);
- std::allocator<__merge_range_t>().deallocate(__ptr, __n_ranges);
- };
+ // TODO: use __uninitialized_buffer
+ auto __destroy = [=](__merge_range_t* __ptr) {
+ std::destroy_n(__ptr, __n_ranges);
+ std::allocator<__merge_range_t>().deallocate(__ptr, __n_ranges);
+ };
- unique_ptr<__merge_range_t[], decltype(__destroy)> __ranges(
- [&]() -> __merge_range_t* {
+ unique_ptr<__merge_range_t[], decltype(__destroy)> __ranges(
+ [&]() -> __merge_range_t* {
# ifndef _LIBCPP_HAS_NO_EXCEPTIONS
- try {
+ try {
# endif
- return std::allocator<__merge_range_t>().allocate(__n_ranges);
+ return std::allocator<__merge_range_t>().allocate(__n_ranges);
# ifndef _LIBCPP_HAS_NO_EXCEPTIONS
- } catch (const std::bad_alloc&) {
- return nullptr;
- }
+ } catch (const std::bad_alloc&) {
+ return nullptr;
+ }
# endif
- }(),
- __destroy);
-
- if (!__ranges)
- return nullopt;
+ }(),
+ __destroy);
+
+ if (!__ranges)
+ return nullopt;
+
+ // TODO: Improve the case where the smaller range is merged into just a few (or even one) chunks of the larger case
+ __merge_range_t* __r = __ranges.get();
+ std::__construct_at(__r++, __first1, __first2, __result);
+
+ bool __iterate_first_range = __last1 - __first1 > __last2 - __first2;
+
+ auto __compute_chunk = [&](size_t __chunk_size) -> __merge_range_t {
+ auto [__mid1, __mid2] = [&] {
+ if (__iterate_first_range) {
+ auto __m1 = __first1 + __chunk_size;
+ auto __m2 = std::lower_bound(__first2, __last2, __m1[-1], __comp);
+ return std::make_pair(__m1, __m2);
+ } else {
+ auto __m2 = __first2 + __chunk_size;
+ auto __m1 = std::lower_bound(__first1, __last1, __m2[-1], __comp);
+ return std::make_pair(__m1, __m2);
+ }
+ }();
- // TODO: Improve the case where the smaller range is merged into just a few (or even one) chunks of the larger case
- __merge_range_t* __r = __ranges.get();
- std::__construct_at(__r++, __first1, __first2, __result);
+ __result += (__mid1 - __first1) + (__mid2 - __first2);
+ __first1 = __mid1;
+ __first2 = __mid2;
+ return {std::move(__mid1), std::move(__mid2), __result};
+ };
- bool __iterate_first_range = __last1 - __first1 > __last2 - __first2;
+ // handle first chunk
+ std::__construct_at(__r++, __compute_chunk(__partitions.__first_chunk_size_));
- auto __compute_chunk = [&](size_t __chunk_size) -> __merge_range_t {
- auto [__mid1, __mid2] =...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/88134
More information about the libcxx-commits
mailing list