[libcxx-commits] [libcxx] Fix endianess for algorithm mismatch (PR #93082)
Zibi Sarbinowski via libcxx-commits
libcxx-commits at lists.llvm.org
Wed May 22 11:07:54 PDT 2024
https://github.com/zibi2 created https://github.com/llvm/llvm-project/pull/93082
This PR will fix `std/algorithms/alg.nonmodifying/mismatch/mismatch.pass.cpp` test for big endian platrofrms such as z/OS.
>From fa56dba875e527e7ecc21dae5805cdafec238acf Mon Sep 17 00:00:00 2001
From: Zbigniew Sarbinowski <zibi at ca.ibm.com>
Date: Wed, 22 May 2024 18:11:27 +0000
Subject: [PATCH] Fix endianess for algorithm mismatch
---
libcxx/include/__algorithm/mismatch.h | 47 ++++++++++++++++++++++++---
1 file changed, 42 insertions(+), 5 deletions(-)
diff --git a/libcxx/include/__algorithm/mismatch.h b/libcxx/include/__algorithm/mismatch.h
index 632bec02406a4..a8219f0817a6c 100644
--- a/libcxx/include/__algorithm/mismatch.h
+++ b/libcxx/include/__algorithm/mismatch.h
@@ -56,6 +56,39 @@ __mismatch(_Iter1 __first1, _Sent1 __last1, _Iter2 __first2, _Pred& __pred, _Pro
#if _LIBCPP_VECTORIZE_ALGORITHMS
+template <class _Tp,
+ __enable_if_t<is_integral<_Tp>::value, int> = 0>
+_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __simd_vector<_Tp, 8>
+__reverse_vector(__simd_vector<_Tp, 8>& __cmp_res) {
+#if defined(_LIBCPP_BIG_ENDIAN)
+ static_assert(__native_vector_size<_Tp> == 8, "The __native_vector_size has to be 8");
+ __cmp_res = __builtin_shufflevector(__cmp_res, __cmp_res, 7, 6, 5, 4, 3, 2, 1, 0);
+#endif
+ return __cmp_res;
+}
+
+template <class _Tp,
+ __enable_if_t<is_integral<_Tp>::value, int> = 0>
+_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __simd_vector<_Tp, 16>
+__reverse_vector(__simd_vector<_Tp, 16> __cmp_res) {
+#if defined(_LIBCPP_BIG_ENDIAN)
+ static_assert(__native_vector_size<_Tp> == 16, "The __native_vector_size has to be 16");
+ __cmp_res = __builtin_shufflevector(__cmp_res, __cmp_res, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
+#endif
+ return __cmp_res;
+}
+
+template <class _Tp,
+ __enable_if_t<is_integral<_Tp>::value, int> = 0>
+_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __simd_vector<_Tp, 32>
+__reverse_vector(__simd_vector<_Tp, 32> __cmp_res) {
+#if defined(_LIBCPP_BIG_ENDIAN)
+ static_assert(__native_vector_size<_Tp> == 32, "The __native_vector_size has to be 32");
+ __cmp_res = __builtin_shufflevector(__cmp_res, __cmp_res, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
+#endif
+ return __cmp_res;
+}
+
template <class _Iter>
_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<_Iter, _Iter>
__mismatch_vectorized(_Iter __first1, _Iter __last1, _Iter __first2) {
@@ -77,7 +110,9 @@ __mismatch_vectorized(_Iter __first1, _Iter __last1, _Iter __first2) {
}
for (size_t __i = 0; __i != __unroll_count; ++__i) {
- if (auto __cmp_res = __lhs[__i] == __rhs[__i]; !std::__all_of(__cmp_res)) {
+ auto __cmp_res = __lhs[__i] == __rhs[__i];
+ __cmp_res = __reverse_vector<_Tp>(__cmp_res);
+ if (!std::__all_of(__cmp_res)) {
auto __offset = __i * __vec_size + std::__find_first_not_set(__cmp_res);
return {__first1 + __offset, __first2 + __offset};
}
@@ -89,8 +124,9 @@ __mismatch_vectorized(_Iter __first1, _Iter __last1, _Iter __first2) {
// check the remaining 0-3 vectors
while (static_cast<size_t>(__last1 - __first1) >= __vec_size) {
- if (auto __cmp_res = std::__load_vector<__vec>(__first1) == std::__load_vector<__vec>(__first2);
- !std::__all_of(__cmp_res)) {
+ auto __cmp_res = std::__load_vector<__vec>(__first1) == std::__load_vector<__vec>(__first2);
+ __cmp_res = __reverse_vector<_Tp>(__cmp_res);
+ if (!std::__all_of(__cmp_res)) {
auto __offset = std::__find_first_not_set(__cmp_res);
return {__first1 + __offset, __first2 + __offset};
}
@@ -106,8 +142,9 @@ __mismatch_vectorized(_Iter __first1, _Iter __last1, _Iter __first2) {
if (static_cast<size_t>(__first1 - __orig_first1) >= __vec_size) {
__first1 = __last1 - __vec_size;
__first2 = __last2 - __vec_size;
- auto __offset =
- std::__find_first_not_set(std::__load_vector<__vec>(__first1) == std::__load_vector<__vec>(__first2));
+ auto __cmp_res = std::__load_vector<__vec>(__first1) == std::__load_vector<__vec>(__first2);
+ __cmp_res = __reverse_vector<_Tp>(__cmp_res);
+ auto __offset = std::__find_first_not_set(__cmp_res);
return {__first1 + __offset, __first2 + __offset};
} // else loop over the elements individually
}
More information about the libcxx-commits
mailing list