[libcxx-commits] [libcxx] optimize minmax_element (PR #135495)
via libcxx-commits
libcxx-commits at lists.llvm.org
Sun Apr 13 22:16:23 PDT 2025
https://github.com/wsehjk updated https://github.com/llvm/llvm-project/pull/135495
>From 28f7ed6a5692a6311c6ebe338de6c7d909e437df Mon Sep 17 00:00:00 2001
From: shiwen wang <Wse1714401046 at gmail.com>
Date: Sat, 12 Apr 2025 22:42:32 +0800
Subject: [PATCH 1/5] optimize minmax_element
---
libcxx/include/__algorithm/minmax_element.h | 52 +++++++++++++++++++--
1 file changed, 49 insertions(+), 3 deletions(-)
diff --git a/libcxx/include/__algorithm/minmax_element.h b/libcxx/include/__algorithm/minmax_element.h
index dc0c3a818cd57..9f6ca60267e42 100644
--- a/libcxx/include/__algorithm/minmax_element.h
+++ b/libcxx/include/__algorithm/minmax_element.h
@@ -15,6 +15,7 @@
#include <__iterator/iterator_traits.h>
#include <__type_traits/invoke.h>
#include <__type_traits/is_callable.h>
+#include <__type_traits/is_integral.h>
#include <__utility/pair.h>
#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
@@ -38,9 +39,10 @@ class _MinmaxElementLessFunc {
}
};
-template <class _Iter, class _Sent, class _Proj, class _Comp>
-_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_Iter, _Iter>
-__minmax_element_impl(_Iter __first, _Sent __last, _Comp& __comp, _Proj& __proj) {
+template<class _Iter, class _Sent, class _Proj, class _Comp>
+_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_Iter, _Iter>
+__minmax_element_loop(_Iter __first, _Sent __last, _Comp& __comp, _Proj& __proj) {
+ __builtin_printf("Debug: __minmax_element_impl called, %d\n", __LINE__); // 不需要 iostream
auto __less = _MinmaxElementLessFunc<_Comp, _Proj>(__comp, __proj);
pair<_Iter, _Iter> __result(__first, __first);
@@ -78,6 +80,50 @@ __minmax_element_impl(_Iter __first, _Sent __last, _Comp& __comp, _Proj& __proj)
return __result;
}
+
+// template<class _Tp>
+// typename std::iterator_traits<_Iter>::value_type
+// __minmax_element_vectorized(_Tp __first, _Tp __last) {
+
+// }
+
+
+template <class _Iter, class _Proj, class _Comp,
+ __enable_if_t<is_integral_v<typename std::iterator_traits<_Iter>::value_type>
+ && __is_identity<_Proj>::value && __desugars_to_v<__less_tag, _Comp, _Iter, _Iter>,
+ int> = 0
+ >
+_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_Iter, _Iter>
+__minmax_element_impl(_Iter __first, _Iter __last, _Comp& __comp, _Proj& __proj) {
+ if (__libcpp_is_constant_evaluated()) {
+ return __minmax_element_loop(__first, __last, __comp, __proj);
+ } else {
+
+ }
+}
+
+template <class _Iter, class _Proj, class _Comp,
+ __enable_if_t<!is_integral_v<typename std::iterator_traits<_Iter>::value_type>
+ && __can_map_to_integer_v<typename std::iterator_traits<_Iter>::value_type>
+ && __libcpp_is_trivially_equality_comparable<typename std::iterator_traits<_Iter>::value_type, typename std::iterator_traits<_Iter>::value_type>::value
+ && __is_identity<_Proj>::value && __desugars_to_v<__less_tag, _Comp, _Iter, _Iter>,
+ int> = 0
+ >
+_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_Iter, _Iter>
+__minmax_element_impl(_Iter __first, _Iter __last, _Comp& __comp, _Proj& __proj) {
+ if (__libcpp_is_constant_evaluated()) {
+ return __minmax_element_loop(__first, __last, __comp, __proj);
+ } else {
+
+ }
+}
+
+template <class _Iter, class _Sent, class _Proj, class _Comp>
+_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_Iter, _Iter>
+__minmax_element_impl(_Iter __first, _Sent __last, _Comp& __comp, _Proj& __proj) {
+ return std::__minmax_element_loop(__first, __last, __comp, __proj);
+}
+
template <class _ForwardIterator, class _Compare>
[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_ForwardIterator, _ForwardIterator>
minmax_element(_ForwardIterator __first, _ForwardIterator __last, _Compare __comp) {
>From 57b9d7aadc5f13e224b9d84bbaabdaad0c120716 Mon Sep 17 00:00:00 2001
From: shiwen wang <Wse1714401046 at gmail.com>
Date: Mon, 14 Apr 2025 01:04:40 +0800
Subject: [PATCH 2/5] optimize minmax_element
---
libcxx/include/__algorithm/minmax_element.h | 132 +++++++++++++++++---
1 file changed, 112 insertions(+), 20 deletions(-)
diff --git a/libcxx/include/__algorithm/minmax_element.h b/libcxx/include/__algorithm/minmax_element.h
index 9f6ca60267e42..47605d9ce2966 100644
--- a/libcxx/include/__algorithm/minmax_element.h
+++ b/libcxx/include/__algorithm/minmax_element.h
@@ -10,6 +10,8 @@
#define _LIBCPP___ALGORITHM_MINMAX_ELEMENT_H
#include <__algorithm/comp.h>
+#include <__algorithm/simd_utils.h>
+#include <__algorithm/unwrap_iter.h>
#include <__config>
#include <__functional/identity.h>
#include <__iterator/iterator_traits.h>
@@ -81,32 +83,105 @@ __minmax_element_loop(_Iter __first, _Sent __last, _Comp& __comp, _Proj& __proj)
}
-// template<class _Tp>
-// typename std::iterator_traits<_Iter>::value_type
-// __minmax_element_vectorized(_Tp __first, _Tp __last) {
+template<class _Iter>
+_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_Iter, _Iter>
+__minmax_element_vectorized(_Iter __first, _Iter __last) {
+ using __value_type = __iter_value_type<_Iter>;
+ constexpr size_t __unroll_count = 4;
+ constexpr size_t __vec_size = __native_vector_size<__value_type>;
+ using __vec_type = __simd_vector<__value_type, __vec_size>;
+ if (__last == __first) [[__unlikely__]] {
+ return {__first, __first};
+ }
-// }
+ __value_type __min_element = *__first;
+ __value_type __max_element = *__first;
+
+ _Iter __min_block_start = __first;
+ _Iter __min_block_end = __last + 1;
+ _Iter __max_block_start = __first;
+ _Iter __max_block_end = __last + 1;
+
+ while(static_cast<size_t>(__last - __first) >= __unroll_count * __vec_size) [[__likely__]]{
+ __vec_type __vec[__unroll_count];
+ for(size_t __i = 0; __i < __unroll_count; ++__i) {
+ __vec[__i] = std::__load_vector<__vec_type>(__first + __i * __vec_size);
+ // min
+ auto __block_min_element = __builtin_reduce_min(__vec[__i]);
+ if (__block_min_element < __min_element) {
+ __min_element = __block_min_element;
+ __min_block_start = __first + __i * __vec_size;
+ __min_block_start = __first + (__i + 1) * __vec_size;
+ }
+ // max
+ auto __block_max_element = __builtin_reduce_max(__vec[__i]);
+ if (__block_max_element > __max_element) {
+ __max_element = __block_max_element;
+ __max_block_start = __first + __i * __vec_size;
+ __max_block_start = __first + (__i + 1) * __vec_size;
+ }
+ }
+ __first += __unroll_count * __vec_size;
+ }
+ // remaining vectors
+ while(static_cast<size_t>(__last - __first) >= __vec_size) {
+ __vec_type __vec = std::__load_vector<__vec_type>(__first + __vec_size);
+ auto __block_min_element = __builtin_reduce_min(__vec);
+ if (__block_min_element < __min_element) {
+ __min_element = __block_min_element;
+ __min_block_start = __first + __i * __vec_size;
+ __min_block_start = __first + (__i + 1) * __vec_size;
+ }
+ // max
+ auto __block_max_element = __builtin_reduce_max(__vec);
+ if (__block_max_element > __max_element) {
+ __max_element = __block_max_element;
+ __max_block_start = __first + __i * __vec_size;
+ __max_block_start = __first + (__i + 1) * __vec_size;
+ }
+ __first += __vec_size;
+ }
-template <class _Iter, class _Proj, class _Comp,
- __enable_if_t<is_integral_v<typename std::iterator_traits<_Iter>::value_type>
- && __is_identity<_Proj>::value && __desugars_to_v<__less_tag, _Comp, _Iter, _Iter>,
- int> = 0
- >
-_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_Iter, _Iter>
-__minmax_element_impl(_Iter __first, _Iter __last, _Comp& __comp, _Proj& __proj) {
- if (__libcpp_is_constant_evaluated()) {
- return __minmax_element_loop(__first, __last, __comp, __proj);
- } else {
+ if (__last > __first) {
+ __less_tag __pred;
+ __identity __proj;
+ auto __epilogue = std::__minmax_element_loop(__first, __last, __pred, __proj);
+ auto __epilogue_min_element = *__epilogue.first;
+ auto __epilogue_max_element = *__epilogue.second;
+ if (__epilogue_min_element < __min_element && __epilogue_max_element > __max_element) {
+ return __epilogue;
+ } else if (__epilogue_min_element < __min_element) {
+ __min_element = __epilogue_min_element;
+ __min_block_start = __first;
+ __min_block_end = __last;
+ } else {
+ __max_element = __epilogue_max_element;
+ __max_block_start = __first;
+ __max_block_end = __last;
+ }
+ }
+ // locate min
+ for(; __min_block_start != __min_block_end; ++__min_block_start) {
+ if (*__min_block_start == __min_element)
+ break;
}
+
+ for(; __max_block_start != __max_block_end; ++__max_block_start) {
+ if (*__max_block_start == __max_element)
+ break;
+ }
+
+ return {__min_block_start, __max_block_start};
}
template <class _Iter, class _Proj, class _Comp,
- __enable_if_t<!is_integral_v<typename std::iterator_traits<_Iter>::value_type>
- && __can_map_to_integer_v<typename std::iterator_traits<_Iter>::value_type>
- && __libcpp_is_trivially_equality_comparable<typename std::iterator_traits<_Iter>::value_type, typename std::iterator_traits<_Iter>::value_type>::value
- && __is_identity<_Proj>::value && __desugars_to_v<__less_tag, _Comp, _Iter, _Iter>,
+ __enable_if_t
+ <is_integral_v<__iter_value_type<_Iter>>
+ && is_same_v<__iterator_category_type<_Iter>, random_access_iterator_tag>
+ && __is_identity<_Proj>::value
+ && __desugars_to_v<__less_tag, _Comp, _Iter, _Iter>,
int> = 0
>
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_Iter, _Iter>
@@ -114,10 +189,27 @@ __minmax_element_impl(_Iter __first, _Iter __last, _Comp& __comp, _Proj& __proj)
if (__libcpp_is_constant_evaluated()) {
return __minmax_element_loop(__first, __last, __comp, __proj);
} else {
-
+ auto __res = std::__minmax_element_vectorized(std::__unwrap_iter(__first), std::__unwrap_iter(__last));
+ return {std::__rewrap_iter(__first, __res.first), std::__rewrap_iter(__first, __res.second)};
}
}
-
+// template <class _Iter, class _Proj, class _Comp,
+// __enable_if_t
+// <!is_integral_v<__iter_value_type<_Iter>>
+// && is_same_v<__iterator_category_type<_Iter>, random_access_iterator_tag>
+// && __can_map_to_integer_v<__iter_value_type<_Iter>>
+// && __libcpp_is_trivially_equality_comparable<__iter_value_type<_Iter>, __iter_value_type<_Iter>>::value
+// && __is_identity<_Proj>::value
+// && __desugars_to_v<__less_tag, _Comp, _Iter, _Iter>,
+// int> = 0
+// >
+// _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_Iter, _Iter>
+// __minmax_element_impl(_Iter __first, _Iter __last, _Comp& __comp, _Proj& __proj) {
+// if (__libcpp_is_constant_evaluated()) {
+// return __minmax_element_loop(__first, __last, __comp, __proj);
+// } else {
+// }
+// }
template <class _Iter, class _Sent, class _Proj, class _Comp>
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_Iter, _Iter>
__minmax_element_impl(_Iter __first, _Sent __last, _Comp& __comp, _Proj& __proj) {
>From 88e9665f441c217a4cd691a0ad8abd8eeb9862bf Mon Sep 17 00:00:00 2001
From: shiwen wang <Wse1714401046 at gmail.com>
Date: Mon, 14 Apr 2025 01:18:05 +0800
Subject: [PATCH 3/5] proecess epilogue
---
libcxx/include/__algorithm/minmax_element.h | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/libcxx/include/__algorithm/minmax_element.h b/libcxx/include/__algorithm/minmax_element.h
index 47605d9ce2966..3cd63eaeed9a6 100644
--- a/libcxx/include/__algorithm/minmax_element.h
+++ b/libcxx/include/__algorithm/minmax_element.h
@@ -44,7 +44,6 @@ class _MinmaxElementLessFunc {
template<class _Iter, class _Sent, class _Proj, class _Comp>
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_Iter, _Iter>
__minmax_element_loop(_Iter __first, _Sent __last, _Comp& __comp, _Proj& __proj) {
- __builtin_printf("Debug: __minmax_element_impl called, %d\n", __LINE__); // 不需要 iostream
auto __less = _MinmaxElementLessFunc<_Comp, _Proj>(__comp, __proj);
pair<_Iter, _Iter> __result(__first, __first);
@@ -154,11 +153,11 @@ __minmax_element_vectorized(_Iter __first, _Iter __last) {
} else if (__epilogue_min_element < __min_element) {
__min_element = __epilogue_min_element;
__min_block_start = __first;
- __min_block_end = __last;
+ __min_block_end = __first; // this is global min_element
} else {
__max_element = __epilogue_max_element;
__max_block_start = __first;
- __max_block_end = __last;
+ __max_block_end = __first; // this is global max_element
}
}
@@ -168,6 +167,7 @@ __minmax_element_vectorized(_Iter __first, _Iter __last) {
break;
}
+ // locate max
for(; __max_block_start != __max_block_end; ++__max_block_start) {
if (*__max_block_start == __max_element)
break;
>From 813726fd7aee7e320a9f68fa160bd411813c7a34 Mon Sep 17 00:00:00 2001
From: shiwen wang <Wse1714401046 at gmail.com>
Date: Mon, 14 Apr 2025 11:02:43 +0800
Subject: [PATCH 4/5] fix bug
---
libcxx/include/__algorithm/minmax_element.h | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/libcxx/include/__algorithm/minmax_element.h b/libcxx/include/__algorithm/minmax_element.h
index 3cd63eaeed9a6..7f8006fd15dad 100644
--- a/libcxx/include/__algorithm/minmax_element.h
+++ b/libcxx/include/__algorithm/minmax_element.h
@@ -125,19 +125,19 @@ __minmax_element_vectorized(_Iter __first, _Iter __last) {
// remaining vectors
while(static_cast<size_t>(__last - __first) >= __vec_size) {
- __vec_type __vec = std::__load_vector<__vec_type>(__first + __vec_size);
+ __vec_type __vec = std::__load_vector<__vec_type>(__first);
auto __block_min_element = __builtin_reduce_min(__vec);
if (__block_min_element < __min_element) {
__min_element = __block_min_element;
- __min_block_start = __first + __i * __vec_size;
- __min_block_start = __first + (__i + 1) * __vec_size;
+ __min_block_start = __first;
+ __min_block_start = __first + __vec_size;
}
// max
auto __block_max_element = __builtin_reduce_max(__vec);
if (__block_max_element > __max_element) {
__max_element = __block_max_element;
- __max_block_start = __first + __i * __vec_size;
- __max_block_start = __first + (__i + 1) * __vec_size;
+ __max_block_start = __first;
+ __max_block_start = __first + __vec_size;
}
__first += __vec_size;
}
>From f7a751c0f0ca5c6f2bfe224417968e37baaf25dd Mon Sep 17 00:00:00 2001
From: shiwen wang <Wse1714401046 at gmail.com>
Date: Mon, 14 Apr 2025 13:14:49 +0800
Subject: [PATCH 5/5] correct impl
---
libcxx/include/__algorithm/minmax_element.h | 23 +++++++++++----------
1 file changed, 12 insertions(+), 11 deletions(-)
diff --git a/libcxx/include/__algorithm/minmax_element.h b/libcxx/include/__algorithm/minmax_element.h
index 7f8006fd15dad..933e924c88834 100644
--- a/libcxx/include/__algorithm/minmax_element.h
+++ b/libcxx/include/__algorithm/minmax_element.h
@@ -88,7 +88,7 @@ __minmax_element_vectorized(_Iter __first, _Iter __last) {
using __value_type = __iter_value_type<_Iter>;
constexpr size_t __unroll_count = 4;
constexpr size_t __vec_size = __native_vector_size<__value_type>;
- using __vec_type = __simd_vector<__value_type, __vec_size>;
+ using __vec_type = __simd_vector<__value_type, __vec_size>;
if (__last == __first) [[__unlikely__]] {
return {__first, __first};
}
@@ -105,19 +105,19 @@ __minmax_element_vectorized(_Iter __first, _Iter __last) {
__vec_type __vec[__unroll_count];
for(size_t __i = 0; __i < __unroll_count; ++__i) {
__vec[__i] = std::__load_vector<__vec_type>(__first + __i * __vec_size);
- // min
+ // block min
auto __block_min_element = __builtin_reduce_min(__vec[__i]);
if (__block_min_element < __min_element) {
__min_element = __block_min_element;
__min_block_start = __first + __i * __vec_size;
- __min_block_start = __first + (__i + 1) * __vec_size;
+ __min_block_end = __first + (__i + 1) * __vec_size;
}
- // max
+ // block max
auto __block_max_element = __builtin_reduce_max(__vec[__i]);
if (__block_max_element > __max_element) {
__max_element = __block_max_element;
__max_block_start = __first + __i * __vec_size;
- __max_block_start = __first + (__i + 1) * __vec_size;
+ __max_block_end = __first + (__i + 1) * __vec_size;
}
}
__first += __unroll_count * __vec_size;
@@ -130,22 +130,22 @@ __minmax_element_vectorized(_Iter __first, _Iter __last) {
if (__block_min_element < __min_element) {
__min_element = __block_min_element;
__min_block_start = __first;
- __min_block_start = __first + __vec_size;
+ __min_block_end = __first + __vec_size;
}
// max
auto __block_max_element = __builtin_reduce_max(__vec);
if (__block_max_element > __max_element) {
__max_element = __block_max_element;
__max_block_start = __first;
- __max_block_start = __first + __vec_size;
+ __max_block_end = __first + __vec_size;
}
__first += __vec_size;
}
if (__last > __first) {
- __less_tag __pred;
- __identity __proj;
- auto __epilogue = std::__minmax_element_loop(__first, __last, __pred, __proj);
+ auto __comp = std::__less<>{};
+ std::__identity __proj;
+ auto __epilogue = std::__minmax_element_loop(__first, __last, __comp, __proj);
auto __epilogue_min_element = *__epilogue.first;
auto __epilogue_max_element = *__epilogue.second;
if (__epilogue_min_element < __min_element && __epilogue_max_element > __max_element) {
@@ -154,7 +154,7 @@ __minmax_element_vectorized(_Iter __first, _Iter __last) {
__min_element = __epilogue_min_element;
__min_block_start = __first;
__min_block_end = __first; // this is global min_element
- } else {
+ } else { // __epilogue_max_element > __max_element
__max_element = __epilogue_max_element;
__max_block_start = __first;
__max_block_end = __first; // this is global max_element
@@ -173,6 +173,7 @@ __minmax_element_vectorized(_Iter __first, _Iter __last) {
break;
}
+
return {__min_block_start, __max_block_start};
}
More information about the libcxx-commits
mailing list