[libcxx-commits] [libcxx] [libc++] Fix endianness for algorithm mismatch (PR #93082)
Zibi Sarbinowski via libcxx-commits
libcxx-commits at lists.llvm.org
Mon Jun 3 11:37:01 PDT 2024
https://github.com/zibi2 updated https://github.com/llvm/llvm-project/pull/93082
>From e366cb1d384d08387bb2cb106c5991ec57a0fb55 Mon Sep 17 00:00:00 2001
From: Zbigniew Sarbinowski <zibi at ca.ibm.com>
Date: Wed, 22 May 2024 18:11:27 +0000
Subject: [PATCH 1/6] Fix endianess for algorithm mismatch
---
libcxx/include/__algorithm/mismatch.h | 47 ++++++++++++++++++++++++---
1 file changed, 42 insertions(+), 5 deletions(-)
diff --git a/libcxx/include/__algorithm/mismatch.h b/libcxx/include/__algorithm/mismatch.h
index 632bec02406a4..a8219f0817a6c 100644
--- a/libcxx/include/__algorithm/mismatch.h
+++ b/libcxx/include/__algorithm/mismatch.h
@@ -56,6 +56,39 @@ __mismatch(_Iter1 __first1, _Sent1 __last1, _Iter2 __first2, _Pred& __pred, _Pro
#if _LIBCPP_VECTORIZE_ALGORITHMS
+template <class _Tp,
+ __enable_if_t<is_integral<_Tp>::value, int> = 0>
+_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __simd_vector<_Tp, 8>
+__reverse_vector(__simd_vector<_Tp, 8>& __cmp_res) {
+#if defined(_LIBCPP_BIG_ENDIAN)
+ static_assert(__native_vector_size<_Tp> == 8, "The __native_vector_size has to be 8");
+ __cmp_res = __builtin_shufflevector(__cmp_res, __cmp_res, 7, 6, 5, 4, 3, 2, 1, 0);
+#endif
+ return __cmp_res;
+}
+
+template <class _Tp,
+ __enable_if_t<is_integral<_Tp>::value, int> = 0>
+_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __simd_vector<_Tp, 16>
+__reverse_vector(__simd_vector<_Tp, 16> __cmp_res) {
+#if defined(_LIBCPP_BIG_ENDIAN)
+ static_assert(__native_vector_size<_Tp> == 16, "The __native_vector_size has to be 16");
+ __cmp_res = __builtin_shufflevector(__cmp_res, __cmp_res, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
+#endif
+ return __cmp_res;
+}
+
+template <class _Tp,
+ __enable_if_t<is_integral<_Tp>::value, int> = 0>
+_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __simd_vector<_Tp, 32>
+__reverse_vector(__simd_vector<_Tp, 32> __cmp_res) {
+#if defined(_LIBCPP_BIG_ENDIAN)
+ static_assert(__native_vector_size<_Tp> == 32, "The __native_vector_size has to be 32");
+ __cmp_res = __builtin_shufflevector(__cmp_res, __cmp_res, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
+#endif
+ return __cmp_res;
+}
+
template <class _Iter>
_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<_Iter, _Iter>
__mismatch_vectorized(_Iter __first1, _Iter __last1, _Iter __first2) {
@@ -77,7 +110,9 @@ __mismatch_vectorized(_Iter __first1, _Iter __last1, _Iter __first2) {
}
for (size_t __i = 0; __i != __unroll_count; ++__i) {
- if (auto __cmp_res = __lhs[__i] == __rhs[__i]; !std::__all_of(__cmp_res)) {
+ auto __cmp_res = __lhs[__i] == __rhs[__i];
+ __cmp_res = __reverse_vector<_Tp>(__cmp_res);
+ if (!std::__all_of(__cmp_res)) {
auto __offset = __i * __vec_size + std::__find_first_not_set(__cmp_res);
return {__first1 + __offset, __first2 + __offset};
}
@@ -89,8 +124,9 @@ __mismatch_vectorized(_Iter __first1, _Iter __last1, _Iter __first2) {
// check the remaining 0-3 vectors
while (static_cast<size_t>(__last1 - __first1) >= __vec_size) {
- if (auto __cmp_res = std::__load_vector<__vec>(__first1) == std::__load_vector<__vec>(__first2);
- !std::__all_of(__cmp_res)) {
+ auto __cmp_res = std::__load_vector<__vec>(__first1) == std::__load_vector<__vec>(__first2);
+ __cmp_res = __reverse_vector<_Tp>(__cmp_res);
+ if (!std::__all_of(__cmp_res)) {
auto __offset = std::__find_first_not_set(__cmp_res);
return {__first1 + __offset, __first2 + __offset};
}
@@ -106,8 +142,9 @@ __mismatch_vectorized(_Iter __first1, _Iter __last1, _Iter __first2) {
if (static_cast<size_t>(__first1 - __orig_first1) >= __vec_size) {
__first1 = __last1 - __vec_size;
__first2 = __last2 - __vec_size;
- auto __offset =
- std::__find_first_not_set(std::__load_vector<__vec>(__first1) == std::__load_vector<__vec>(__first2));
+ auto __cmp_res = std::__load_vector<__vec>(__first1) == std::__load_vector<__vec>(__first2);
+ __cmp_res = __reverse_vector<_Tp>(__cmp_res);
+ auto __offset = std::__find_first_not_set(__cmp_res);
return {__first1 + __offset, __first2 + __offset};
} // else loop over the elements individually
}
>From 05395e0b07312d5a7594bdc1db46cf695001242c Mon Sep 17 00:00:00 2001
From: Zbigniew Sarbinowski <zibi at ca.ibm.com>
Date: Wed, 22 May 2024 18:21:04 +0000
Subject: [PATCH 2/6] Update based on the latest changes
---
libcxx/include/__algorithm/mismatch.h | 83 ++++++++++++++++++---------
1 file changed, 57 insertions(+), 26 deletions(-)
diff --git a/libcxx/include/__algorithm/mismatch.h b/libcxx/include/__algorithm/mismatch.h
index a8219f0817a6c..7b4e7da35cf7f 100644
--- a/libcxx/include/__algorithm/mismatch.h
+++ b/libcxx/include/__algorithm/mismatch.h
@@ -56,36 +56,67 @@ __mismatch(_Iter1 __first1, _Sent1 __last1, _Iter2 __first2, _Pred& __pred, _Pro
#if _LIBCPP_VECTORIZE_ALGORITHMS
-template <class _Tp,
- __enable_if_t<is_integral<_Tp>::value, int> = 0>
-_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __simd_vector<_Tp, 8>
-__reverse_vector(__simd_vector<_Tp, 8>& __cmp_res) {
-#if defined(_LIBCPP_BIG_ENDIAN)
- static_assert(__native_vector_size<_Tp> == 8, "The __native_vector_size has to be 8");
+template <class _Value_type>
+_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __simd_vector<_Value_type, 8>
+__reverse_vector(__simd_vector<_Value_type, 8>& __cmp_res) {
+# if defined(_LIBCPP_BIG_ENDIAN)
+ static_assert(__native_vector_size<_Value_type> == 8, "The __native_vector_size has to be 8");
__cmp_res = __builtin_shufflevector(__cmp_res, __cmp_res, 7, 6, 5, 4, 3, 2, 1, 0);
-#endif
+# endif
return __cmp_res;
}
-template <class _Tp,
- __enable_if_t<is_integral<_Tp>::value, int> = 0>
-_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __simd_vector<_Tp, 16>
-__reverse_vector(__simd_vector<_Tp, 16> __cmp_res) {
-#if defined(_LIBCPP_BIG_ENDIAN)
- static_assert(__native_vector_size<_Tp> == 16, "The __native_vector_size has to be 16");
+template <class _Value_type>
+_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __simd_vector<_Value_type, 16>
+__reverse_vector(__simd_vector<_Value_type, 16> __cmp_res) {
+# if defined(_LIBCPP_BIG_ENDIAN)
+ static_assert(__native_vector_size<_Value_type> == 16, "The __native_vector_size has to be 16");
__cmp_res = __builtin_shufflevector(__cmp_res, __cmp_res, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
-#endif
+# endif
return __cmp_res;
}
-template <class _Tp,
- __enable_if_t<is_integral<_Tp>::value, int> = 0>
-_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __simd_vector<_Tp, 32>
-__reverse_vector(__simd_vector<_Tp, 32> __cmp_res) {
-#if defined(_LIBCPP_BIG_ENDIAN)
- static_assert(__native_vector_size<_Tp> == 32, "The __native_vector_size has to be 32");
- __cmp_res = __builtin_shufflevector(__cmp_res, __cmp_res, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
-#endif
+template <class _Value_type>
+_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __simd_vector<_Value_type, 32>
+__reverse_vector(__simd_vector<_Value_type, 32> __cmp_res) {
+# if defined(_LIBCPP_BIG_ENDIAN)
+ static_assert(__native_vector_size<_Value_type> == 32, "The __native_vector_size has to be 32");
+ __cmp_res = __builtin_shufflevector(
+ __cmp_res,
+ __cmp_res,
+ 31,
+ 30,
+ 29,
+ 28,
+ 27,
+ 26,
+ 25,
+ 24,
+ 23,
+ 22,
+ 21,
+ 20,
+ 19,
+ 18,
+ 17,
+ 16,
+ 15,
+ 14,
+ 13,
+ 12,
+ 11,
+ 10,
+ 9,
+ 8,
+ 7,
+ 6,
+ 5,
+ 4,
+ 3,
+ 2,
+ 1,
+ 0);
+# endif
return __cmp_res;
}
@@ -111,7 +142,7 @@ __mismatch_vectorized(_Iter __first1, _Iter __last1, _Iter __first2) {
for (size_t __i = 0; __i != __unroll_count; ++__i) {
auto __cmp_res = __lhs[__i] == __rhs[__i];
- __cmp_res = __reverse_vector<_Tp>(__cmp_res);
+ __cmp_res = __reverse_vector<__value_type>(__cmp_res);
if (!std::__all_of(__cmp_res)) {
auto __offset = __i * __vec_size + std::__find_first_not_set(__cmp_res);
return {__first1 + __offset, __first2 + __offset};
@@ -125,7 +156,7 @@ __mismatch_vectorized(_Iter __first1, _Iter __last1, _Iter __first2) {
// check the remaining 0-3 vectors
while (static_cast<size_t>(__last1 - __first1) >= __vec_size) {
auto __cmp_res = std::__load_vector<__vec>(__first1) == std::__load_vector<__vec>(__first2);
- __cmp_res = __reverse_vector<_Tp>(__cmp_res);
+ __cmp_res = __reverse_vector<__value_type>(__cmp_res);
if (!std::__all_of(__cmp_res)) {
auto __offset = std::__find_first_not_set(__cmp_res);
return {__first1 + __offset, __first2 + __offset};
@@ -143,8 +174,8 @@ __mismatch_vectorized(_Iter __first1, _Iter __last1, _Iter __first2) {
__first1 = __last1 - __vec_size;
__first2 = __last2 - __vec_size;
auto __cmp_res = std::__load_vector<__vec>(__first1) == std::__load_vector<__vec>(__first2);
- __cmp_res = __reverse_vector<_Tp>(__cmp_res);
- auto __offset = std::__find_first_not_set(__cmp_res);
+ __cmp_res = __reverse_vector<__value_type>(__cmp_res);
+ auto __offset = std::__find_first_not_set(__cmp_res);
return {__first1 + __offset, __first2 + __offset};
} // else loop over the elements individually
}
>From b4ce87242c5eb820fe6fde076ecbaedeb78b427e Mon Sep 17 00:00:00 2001
From: Zbigniew Sarbinowski <zibi at ca.ibm.com>
Date: Wed, 22 May 2024 22:45:20 +0000
Subject: [PATCH 3/6] Add more __reverse_vector overloads
---
libcxx/include/__algorithm/mismatch.h | 70 +++++++++++++++++++++------
1 file changed, 55 insertions(+), 15 deletions(-)
diff --git a/libcxx/include/__algorithm/mismatch.h b/libcxx/include/__algorithm/mismatch.h
index 7b4e7da35cf7f..8c4b4a0e031ed 100644
--- a/libcxx/include/__algorithm/mismatch.h
+++ b/libcxx/include/__algorithm/mismatch.h
@@ -56,31 +56,71 @@ __mismatch(_Iter1 __first1, _Sent1 __last1, _Iter2 __first2, _Pred& __pred, _Pro
#if _LIBCPP_VECTORIZE_ALGORITHMS
-template <class _Value_type>
-_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __simd_vector<_Value_type, 8>
-__reverse_vector(__simd_vector<_Value_type, 8>& __cmp_res) {
+template <class _ValueType>
+_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __simd_vector<long, 2>
+__reverse_vector(__simd_vector<long, 2>& __cmp_res) {
# if defined(_LIBCPP_BIG_ENDIAN)
- static_assert(__native_vector_size<_Value_type> == 8, "The __native_vector_size has to be 8");
+ static_assert(__native_vector_size<long> == 2, "The __native_vector_size has to be 2");
+ __cmp_res = __builtin_shufflevector(__cmp_res, __cmp_res, 1, 0);
+# endif
+ return __cmp_res;
+}
+
+template <class _ValueType>
+_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __simd_vector<long, 4>
+__reverse_vector(__simd_vector<long, 4>& __cmp_res) {
+# if defined(_LIBCPP_BIG_ENDIAN)
+ static_assert(__native_vector_size<long> == 4, "The __native_vector_size has to be 4");
+ __cmp_res = __builtin_shufflevector(__cmp_res, __cmp_res, 3, 2, 1, 0);
+# endif
+ return __cmp_res;
+}
+
+template <class _ValueType>
+_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __simd_vector<int, 8>
+__reverse_vector(__simd_vector<int, 8>& __cmp_res) {
+# if defined(_LIBCPP_BIG_ENDIAN)
+ static_assert(__native_vector_size<int> == 8, "The __native_vector_size has to be 8");
+ __cmp_res = __builtin_shufflevector(__cmp_res, __cmp_res, 7, 6, 5, 4, 3, 2, 1, 0);
+# endif
+ return __cmp_res;
+}
+
+template <class _ValueType>
+_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __simd_vector<int, 4>
+__reverse_vector(__simd_vector<int, 4>& __cmp_res) {
+# if defined(_LIBCPP_BIG_ENDIAN)
+ static_assert(__native_vector_size<int> == 4, "The __native_vector_size has to be 4");
+ __cmp_res = __builtin_shufflevector(__cmp_res, __cmp_res, 3, 2, 1, 0);
+# endif
+ return __cmp_res;
+}
+
+template <class _ValueType>
+_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __simd_vector<_ValueType, 8>
+__reverse_vector(__simd_vector<_ValueType, 8>& __cmp_res) {
+# if defined(_LIBCPP_BIG_ENDIAN)
+ static_assert(__native_vector_size<_ValueType> == 8, "The __native_vector_size has to be 8");
__cmp_res = __builtin_shufflevector(__cmp_res, __cmp_res, 7, 6, 5, 4, 3, 2, 1, 0);
# endif
return __cmp_res;
}
-template <class _Value_type>
-_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __simd_vector<_Value_type, 16>
-__reverse_vector(__simd_vector<_Value_type, 16> __cmp_res) {
+template <class _ValueType>
+_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __simd_vector<_ValueType, 16>
+__reverse_vector(__simd_vector<_ValueType, 16> __cmp_res) {
# if defined(_LIBCPP_BIG_ENDIAN)
- static_assert(__native_vector_size<_Value_type> == 16, "The __native_vector_size has to be 16");
+ static_assert(__native_vector_size<_ValueType> == 16, "The __native_vector_size has to be 16");
__cmp_res = __builtin_shufflevector(__cmp_res, __cmp_res, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
# endif
return __cmp_res;
}
-template <class _Value_type>
-_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __simd_vector<_Value_type, 32>
-__reverse_vector(__simd_vector<_Value_type, 32> __cmp_res) {
+template <class _ValueType>
+_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __simd_vector<_ValueType, 32>
+__reverse_vector(__simd_vector<_ValueType, 32> __cmp_res) {
# if defined(_LIBCPP_BIG_ENDIAN)
- static_assert(__native_vector_size<_Value_type> == 32, "The __native_vector_size has to be 32");
+ static_assert(__native_vector_size<_ValueType> == 32, "The __native_vector_size has to be 32");
__cmp_res = __builtin_shufflevector(
__cmp_res,
__cmp_res,
@@ -142,7 +182,7 @@ __mismatch_vectorized(_Iter __first1, _Iter __last1, _Iter __first2) {
for (size_t __i = 0; __i != __unroll_count; ++__i) {
auto __cmp_res = __lhs[__i] == __rhs[__i];
- __cmp_res = __reverse_vector<__value_type>(__cmp_res);
+ __cmp_res = std::__reverse_vector<__value_type>(__cmp_res);
if (!std::__all_of(__cmp_res)) {
auto __offset = __i * __vec_size + std::__find_first_not_set(__cmp_res);
return {__first1 + __offset, __first2 + __offset};
@@ -156,7 +196,7 @@ __mismatch_vectorized(_Iter __first1, _Iter __last1, _Iter __first2) {
// check the remaining 0-3 vectors
while (static_cast<size_t>(__last1 - __first1) >= __vec_size) {
auto __cmp_res = std::__load_vector<__vec>(__first1) == std::__load_vector<__vec>(__first2);
- __cmp_res = __reverse_vector<__value_type>(__cmp_res);
+ __cmp_res = std::__reverse_vector<__value_type>(__cmp_res);
if (!std::__all_of(__cmp_res)) {
auto __offset = std::__find_first_not_set(__cmp_res);
return {__first1 + __offset, __first2 + __offset};
@@ -174,7 +214,7 @@ __mismatch_vectorized(_Iter __first1, _Iter __last1, _Iter __first2) {
__first1 = __last1 - __vec_size;
__first2 = __last2 - __vec_size;
auto __cmp_res = std::__load_vector<__vec>(__first1) == std::__load_vector<__vec>(__first2);
- __cmp_res = __reverse_vector<__value_type>(__cmp_res);
+ __cmp_res = std::__reverse_vector<__value_type>(__cmp_res);
auto __offset = std::__find_first_not_set(__cmp_res);
return {__first1 + __offset, __first2 + __offset};
} // else loop over the elements individually
>From 40cab2461b7efa5861fc01272baa3b196fb91c98 Mon Sep 17 00:00:00 2001
From: Zbigniew Sarbinowski <zibi at ca.ibm.com>
Date: Fri, 24 May 2024 19:16:57 +0000
Subject: [PATCH 4/6] Try to fix windows CI
---
libcxx/include/__algorithm/mismatch.h | 16 +++++++++-------
1 file changed, 9 insertions(+), 7 deletions(-)
diff --git a/libcxx/include/__algorithm/mismatch.h b/libcxx/include/__algorithm/mismatch.h
index 8c4b4a0e031ed..8519ef76140ed 100644
--- a/libcxx/include/__algorithm/mismatch.h
+++ b/libcxx/include/__algorithm/mismatch.h
@@ -56,11 +56,19 @@ __mismatch(_Iter1 __first1, _Sent1 __last1, _Iter2 __first2, _Pred& __pred, _Pro
#if _LIBCPP_VECTORIZE_ALGORITHMS
+template <class _ValueType>
+_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __simd_vector<long long, 2>
+__reverse_vector(__simd_vector<long long, 2>& __cmp_res) {
+# if defined(_LIBCPP_BIG_ENDIAN)
+ __cmp_res = __builtin_shufflevector(__cmp_res, __cmp_res, 1, 0);
+# endif
+ return __cmp_res;
+}
+
template <class _ValueType>
_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __simd_vector<long, 2>
__reverse_vector(__simd_vector<long, 2>& __cmp_res) {
# if defined(_LIBCPP_BIG_ENDIAN)
- static_assert(__native_vector_size<long> == 2, "The __native_vector_size has to be 2");
__cmp_res = __builtin_shufflevector(__cmp_res, __cmp_res, 1, 0);
# endif
return __cmp_res;
@@ -70,7 +78,6 @@ template <class _ValueType>
_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __simd_vector<long, 4>
__reverse_vector(__simd_vector<long, 4>& __cmp_res) {
# if defined(_LIBCPP_BIG_ENDIAN)
- static_assert(__native_vector_size<long> == 4, "The __native_vector_size has to be 4");
__cmp_res = __builtin_shufflevector(__cmp_res, __cmp_res, 3, 2, 1, 0);
# endif
return __cmp_res;
@@ -80,7 +87,6 @@ template <class _ValueType>
_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __simd_vector<int, 8>
__reverse_vector(__simd_vector<int, 8>& __cmp_res) {
# if defined(_LIBCPP_BIG_ENDIAN)
- static_assert(__native_vector_size<int> == 8, "The __native_vector_size has to be 8");
__cmp_res = __builtin_shufflevector(__cmp_res, __cmp_res, 7, 6, 5, 4, 3, 2, 1, 0);
# endif
return __cmp_res;
@@ -90,7 +96,6 @@ template <class _ValueType>
_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __simd_vector<int, 4>
__reverse_vector(__simd_vector<int, 4>& __cmp_res) {
# if defined(_LIBCPP_BIG_ENDIAN)
- static_assert(__native_vector_size<int> == 4, "The __native_vector_size has to be 4");
__cmp_res = __builtin_shufflevector(__cmp_res, __cmp_res, 3, 2, 1, 0);
# endif
return __cmp_res;
@@ -100,7 +105,6 @@ template <class _ValueType>
_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __simd_vector<_ValueType, 8>
__reverse_vector(__simd_vector<_ValueType, 8>& __cmp_res) {
# if defined(_LIBCPP_BIG_ENDIAN)
- static_assert(__native_vector_size<_ValueType> == 8, "The __native_vector_size has to be 8");
__cmp_res = __builtin_shufflevector(__cmp_res, __cmp_res, 7, 6, 5, 4, 3, 2, 1, 0);
# endif
return __cmp_res;
@@ -110,7 +114,6 @@ template <class _ValueType>
_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __simd_vector<_ValueType, 16>
__reverse_vector(__simd_vector<_ValueType, 16> __cmp_res) {
# if defined(_LIBCPP_BIG_ENDIAN)
- static_assert(__native_vector_size<_ValueType> == 16, "The __native_vector_size has to be 16");
__cmp_res = __builtin_shufflevector(__cmp_res, __cmp_res, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
# endif
return __cmp_res;
@@ -120,7 +123,6 @@ template <class _ValueType>
_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __simd_vector<_ValueType, 32>
__reverse_vector(__simd_vector<_ValueType, 32> __cmp_res) {
# if defined(_LIBCPP_BIG_ENDIAN)
- static_assert(__native_vector_size<_ValueType> == 32, "The __native_vector_size has to be 32");
__cmp_res = __builtin_shufflevector(
__cmp_res,
__cmp_res,
>From 42f64c8d8ebee7b314d28bdf724a2ed2b2ed68b0 Mon Sep 17 00:00:00 2001
From: Zbigniew Sarbinowski <zibi at ca.ibm.com>
Date: Mon, 3 Jun 2024 18:30:36 +0000
Subject: [PATCH 5/6] Based on the suggestion, apply the variadic template
technique to reduce code
---
libcxx/include/__algorithm/mismatch.h | 139 ++++++--------------------
1 file changed, 33 insertions(+), 106 deletions(-)
diff --git a/libcxx/include/__algorithm/mismatch.h b/libcxx/include/__algorithm/mismatch.h
index 8519ef76140ed..bde3010b0c457 100644
--- a/libcxx/include/__algorithm/mismatch.h
+++ b/libcxx/include/__algorithm/mismatch.h
@@ -55,113 +55,34 @@ __mismatch(_Iter1 __first1, _Sent1 __last1, _Iter2 __first2, _Pred& __pred, _Pro
}
#if _LIBCPP_VECTORIZE_ALGORITHMS
-
-template <class _ValueType>
-_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __simd_vector<long long, 2>
-__reverse_vector(__simd_vector<long long, 2>& __cmp_res) {
-# if defined(_LIBCPP_BIG_ENDIAN)
- __cmp_res = __builtin_shufflevector(__cmp_res, __cmp_res, 1, 0);
-# endif
- return __cmp_res;
-}
-
-template <class _ValueType>
-_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __simd_vector<long, 2>
-__reverse_vector(__simd_vector<long, 2>& __cmp_res) {
-# if defined(_LIBCPP_BIG_ENDIAN)
- __cmp_res = __builtin_shufflevector(__cmp_res, __cmp_res, 1, 0);
-# endif
- return __cmp_res;
-}
-
-template <class _ValueType>
-_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __simd_vector<long, 4>
-__reverse_vector(__simd_vector<long, 4>& __cmp_res) {
-# if defined(_LIBCPP_BIG_ENDIAN)
- __cmp_res = __builtin_shufflevector(__cmp_res, __cmp_res, 3, 2, 1, 0);
-# endif
- return __cmp_res;
-}
-
-template <class _ValueType>
-_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __simd_vector<int, 8>
-__reverse_vector(__simd_vector<int, 8>& __cmp_res) {
-# if defined(_LIBCPP_BIG_ENDIAN)
- __cmp_res = __builtin_shufflevector(__cmp_res, __cmp_res, 7, 6, 5, 4, 3, 2, 1, 0);
-# endif
- return __cmp_res;
-}
-
-template <class _ValueType>
-_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __simd_vector<int, 4>
-__reverse_vector(__simd_vector<int, 4>& __cmp_res) {
-# if defined(_LIBCPP_BIG_ENDIAN)
- __cmp_res = __builtin_shufflevector(__cmp_res, __cmp_res, 3, 2, 1, 0);
-# endif
- return __cmp_res;
+template <class _ValueType, size_t _Np>
+_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI __simd_vector<long long, _Np>
+__reverse_vector(__simd_vector<long long, _Np> __cmp_res) {
+ return [&]<size_t... _Indices>(index_sequence<_Indices...>) {
+ return __builtin_shufflevector(__cmp_res, __cmp_res, (_Np - _Indices - 1)...);
+ }(make_index_sequence<_Np>{});
+}
+template <class _ValueType, size_t _Np>
+_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI __simd_vector<long, _Np>
+__reverse_vector(__simd_vector<long, _Np> __cmp_res) {
+ return [&]<size_t... _Indices>(index_sequence<_Indices...>) {
+ return __builtin_shufflevector(__cmp_res, __cmp_res, (_Np - _Indices - 1)...);
+ }(make_index_sequence<_Np>{});
+}
+template <class _ValueType, size_t _Np>
+_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI __simd_vector<int, _Np>
+__reverse_vector(__simd_vector<int, _Np> __cmp_res) {
+ return [&]<size_t... _Indices>(index_sequence<_Indices...>) {
+ return __builtin_shufflevector(__cmp_res, __cmp_res, (_Np - _Indices - 1)...);
+ }(make_index_sequence<_Np>{});
+}
+template <class _ValueType, size_t _Np>
+_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI __simd_vector<_ValueType, _Np>
+__reverse_vector(__simd_vector<_ValueType, _Np> __cmp_res) {
+ return [&]<size_t... _Indices>(index_sequence<_Indices...>) {
+ return __builtin_shufflevector(__cmp_res, __cmp_res, (_Np - _Indices - 1)...);
+ }(make_index_sequence<_Np>{});
}
-
-template <class _ValueType>
-_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __simd_vector<_ValueType, 8>
-__reverse_vector(__simd_vector<_ValueType, 8>& __cmp_res) {
-# if defined(_LIBCPP_BIG_ENDIAN)
- __cmp_res = __builtin_shufflevector(__cmp_res, __cmp_res, 7, 6, 5, 4, 3, 2, 1, 0);
-# endif
- return __cmp_res;
-}
-
-template <class _ValueType>
-_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __simd_vector<_ValueType, 16>
-__reverse_vector(__simd_vector<_ValueType, 16> __cmp_res) {
-# if defined(_LIBCPP_BIG_ENDIAN)
- __cmp_res = __builtin_shufflevector(__cmp_res, __cmp_res, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
-# endif
- return __cmp_res;
-}
-
-template <class _ValueType>
-_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __simd_vector<_ValueType, 32>
-__reverse_vector(__simd_vector<_ValueType, 32> __cmp_res) {
-# if defined(_LIBCPP_BIG_ENDIAN)
- __cmp_res = __builtin_shufflevector(
- __cmp_res,
- __cmp_res,
- 31,
- 30,
- 29,
- 28,
- 27,
- 26,
- 25,
- 24,
- 23,
- 22,
- 21,
- 20,
- 19,
- 18,
- 17,
- 16,
- 15,
- 14,
- 13,
- 12,
- 11,
- 10,
- 9,
- 8,
- 7,
- 6,
- 5,
- 4,
- 3,
- 2,
- 1,
- 0);
-# endif
- return __cmp_res;
-}
-
template <class _Iter>
_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<_Iter, _Iter>
__mismatch_vectorized(_Iter __first1, _Iter __last1, _Iter __first2) {
@@ -184,7 +105,9 @@ __mismatch_vectorized(_Iter __first1, _Iter __last1, _Iter __first2) {
for (size_t __i = 0; __i != __unroll_count; ++__i) {
auto __cmp_res = __lhs[__i] == __rhs[__i];
+# if defined(_LIBCPP_BIG_ENDIAN)
__cmp_res = std::__reverse_vector<__value_type>(__cmp_res);
+# endif
if (!std::__all_of(__cmp_res)) {
auto __offset = __i * __vec_size + std::__find_first_not_set(__cmp_res);
return {__first1 + __offset, __first2 + __offset};
@@ -198,7 +121,9 @@ __mismatch_vectorized(_Iter __first1, _Iter __last1, _Iter __first2) {
// check the remaining 0-3 vectors
while (static_cast<size_t>(__last1 - __first1) >= __vec_size) {
auto __cmp_res = std::__load_vector<__vec>(__first1) == std::__load_vector<__vec>(__first2);
+# if defined(_LIBCPP_BIG_ENDIAN)
__cmp_res = std::__reverse_vector<__value_type>(__cmp_res);
+# endif
if (!std::__all_of(__cmp_res)) {
auto __offset = std::__find_first_not_set(__cmp_res);
return {__first1 + __offset, __first2 + __offset};
@@ -216,7 +141,9 @@ __mismatch_vectorized(_Iter __first1, _Iter __last1, _Iter __first2) {
__first1 = __last1 - __vec_size;
__first2 = __last2 - __vec_size;
auto __cmp_res = std::__load_vector<__vec>(__first1) == std::__load_vector<__vec>(__first2);
+# if defined(_LIBCPP_BIG_ENDIAN)
__cmp_res = std::__reverse_vector<__value_type>(__cmp_res);
+# endif
auto __offset = std::__find_first_not_set(__cmp_res);
return {__first1 + __offset, __first2 + __offset};
} // else loop over the elements individually
>From 8cddb8352696412011d1a513cb86b01af925044f Mon Sep 17 00:00:00 2001
From: Zbigniew Sarbinowski <zibi at ca.ibm.com>
Date: Mon, 3 Jun 2024 18:43:05 +0000
Subject: [PATCH 6/6] fix formatting
---
libcxx/include/__algorithm/mismatch.h | 14 ++++++--------
1 file changed, 6 insertions(+), 8 deletions(-)
diff --git a/libcxx/include/__algorithm/mismatch.h b/libcxx/include/__algorithm/mismatch.h
index bde3010b0c457..bdd3314ed1ec5 100644
--- a/libcxx/include/__algorithm/mismatch.h
+++ b/libcxx/include/__algorithm/mismatch.h
@@ -63,15 +63,13 @@ __reverse_vector(__simd_vector<long long, _Np> __cmp_res) {
}(make_index_sequence<_Np>{});
}
template <class _ValueType, size_t _Np>
-_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI __simd_vector<long, _Np>
-__reverse_vector(__simd_vector<long, _Np> __cmp_res) {
+_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI __simd_vector<long, _Np> __reverse_vector(__simd_vector<long, _Np> __cmp_res) {
return [&]<size_t... _Indices>(index_sequence<_Indices...>) {
return __builtin_shufflevector(__cmp_res, __cmp_res, (_Np - _Indices - 1)...);
}(make_index_sequence<_Np>{});
}
template <class _ValueType, size_t _Np>
-_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI __simd_vector<int, _Np>
-__reverse_vector(__simd_vector<int, _Np> __cmp_res) {
+_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI __simd_vector<int, _Np> __reverse_vector(__simd_vector<int, _Np> __cmp_res) {
return [&]<size_t... _Indices>(index_sequence<_Indices...>) {
return __builtin_shufflevector(__cmp_res, __cmp_res, (_Np - _Indices - 1)...);
}(make_index_sequence<_Np>{});
@@ -106,7 +104,7 @@ __mismatch_vectorized(_Iter __first1, _Iter __last1, _Iter __first2) {
for (size_t __i = 0; __i != __unroll_count; ++__i) {
auto __cmp_res = __lhs[__i] == __rhs[__i];
# if defined(_LIBCPP_BIG_ENDIAN)
- __cmp_res = std::__reverse_vector<__value_type>(__cmp_res);
+ __cmp_res = std::__reverse_vector<__value_type>(__cmp_res);
# endif
if (!std::__all_of(__cmp_res)) {
auto __offset = __i * __vec_size + std::__find_first_not_set(__cmp_res);
@@ -122,7 +120,7 @@ __mismatch_vectorized(_Iter __first1, _Iter __last1, _Iter __first2) {
while (static_cast<size_t>(__last1 - __first1) >= __vec_size) {
auto __cmp_res = std::__load_vector<__vec>(__first1) == std::__load_vector<__vec>(__first2);
# if defined(_LIBCPP_BIG_ENDIAN)
- __cmp_res = std::__reverse_vector<__value_type>(__cmp_res);
+ __cmp_res = std::__reverse_vector<__value_type>(__cmp_res);
# endif
if (!std::__all_of(__cmp_res)) {
auto __offset = std::__find_first_not_set(__cmp_res);
@@ -142,9 +140,9 @@ __mismatch_vectorized(_Iter __first1, _Iter __last1, _Iter __first2) {
__first2 = __last2 - __vec_size;
auto __cmp_res = std::__load_vector<__vec>(__first1) == std::__load_vector<__vec>(__first2);
# if defined(_LIBCPP_BIG_ENDIAN)
- __cmp_res = std::__reverse_vector<__value_type>(__cmp_res);
+ __cmp_res = std::__reverse_vector<__value_type>(__cmp_res);
# endif
- auto __offset = std::__find_first_not_set(__cmp_res);
+ auto __offset = std::__find_first_not_set(__cmp_res);
return {__first1 + __offset, __first2 + __offset};
} // else loop over the elements individually
}
More information about the libcxx-commits
mailing list