[libcxx-commits] [libcxx] Fix endianess for algorithm mismatch (PR #93082)

Wed May 22 11:07:54 PDT 2024

https://github.com/zibi2 created https://github.com/llvm/llvm-project/pull/93082

This PR will fix `std/algorithms/alg.nonmodifying/mismatch/mismatch.pass.cpp` test for big endian platrofrms such as z/OS.

>From fa56dba875e527e7ecc21dae5805cdafec238acf Mon Sep 17 00:00:00 2001
From: Zbigniew Sarbinowski <zibi at ca.ibm.com>
Date: Wed, 22 May 2024 18:11:27 +0000
Subject: [PATCH] Fix endianess for algorithm mismatch

---
 libcxx/include/__algorithm/mismatch.h | 47 ++++++++++++++++++++++++---
 1 file changed, 42 insertions(+), 5 deletions(-)

diff --git a/libcxx/include/__algorithm/mismatch.h b/libcxx/include/__algorithm/mismatch.h
index 632bec02406a4..a8219f0817a6c 100644
--- a/libcxx/include/__algorithm/mismatch.h
+++ b/libcxx/include/__algorithm/mismatch.h
@@ -56,6 +56,39 @@ __mismatch(_Iter1 __first1, _Sent1 __last1, _Iter2 __first2, _Pred& __pred, _Pro
 
 #if _LIBCPP_VECTORIZE_ALGORITHMS
 
+template <class _Tp,
+          __enable_if_t<is_integral<_Tp>::value, int> = 0>
+_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __simd_vector<_Tp, 8>
+__reverse_vector(__simd_vector<_Tp, 8>& __cmp_res) {
+#if defined(_LIBCPP_BIG_ENDIAN)
+  static_assert(__native_vector_size<_Tp> == 8, "The __native_vector_size has to be 8");
+  __cmp_res = __builtin_shufflevector(__cmp_res, __cmp_res, 7, 6, 5, 4, 3, 2, 1, 0);
+#endif
+  return __cmp_res;
+}
+
+template <class _Tp,
+          __enable_if_t<is_integral<_Tp>::value, int> = 0>
+_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __simd_vector<_Tp, 16>
+__reverse_vector(__simd_vector<_Tp, 16> __cmp_res) {
+#if defined(_LIBCPP_BIG_ENDIAN)
+  static_assert(__native_vector_size<_Tp> == 16, "The __native_vector_size has to be 16");
+  __cmp_res = __builtin_shufflevector(__cmp_res, __cmp_res, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
+#endif
+  return __cmp_res;
+}
+
+template <class _Tp,
+          __enable_if_t<is_integral<_Tp>::value, int> = 0>
+_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __simd_vector<_Tp, 32>
+__reverse_vector(__simd_vector<_Tp, 32> __cmp_res) {
+#if defined(_LIBCPP_BIG_ENDIAN)
+  static_assert(__native_vector_size<_Tp> == 32, "The __native_vector_size has to be 32");
+  __cmp_res = __builtin_shufflevector(__cmp_res, __cmp_res, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
+#endif
+  return __cmp_res;
+}
+
 template <class _Iter>
 _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<_Iter, _Iter>
 __mismatch_vectorized(_Iter __first1, _Iter __last1, _Iter __first2) {
@@ -77,7 +110,9 @@ __mismatch_vectorized(_Iter __first1, _Iter __last1, _Iter __first2) {
       }
 
       for (size_t __i = 0; __i != __unroll_count; ++__i) {
-        if (auto __cmp_res = __lhs[__i] == __rhs[__i]; !std::__all_of(__cmp_res)) {
+        auto __cmp_res = __lhs[__i] == __rhs[__i];
+        __cmp_res = __reverse_vector<_Tp>(__cmp_res);
+        if (!std::__all_of(__cmp_res)) {
           auto __offset = __i * __vec_size + std::__find_first_not_set(__cmp_res);
           return {__first1 + __offset, __first2 + __offset};
         }
@@ -89,8 +124,9 @@ __mismatch_vectorized(_Iter __first1, _Iter __last1, _Iter __first2) {
 
     // check the remaining 0-3 vectors
     while (static_cast<size_t>(__last1 - __first1) >= __vec_size) {
-      if (auto __cmp_res = std::__load_vector<__vec>(__first1) == std::__load_vector<__vec>(__first2);
-          !std::__all_of(__cmp_res)) {
+      auto __cmp_res = std::__load_vector<__vec>(__first1) == std::__load_vector<__vec>(__first2);
+      __cmp_res = __reverse_vector<_Tp>(__cmp_res);
+      if (!std::__all_of(__cmp_res)) {
         auto __offset = std::__find_first_not_set(__cmp_res);
         return {__first1 + __offset, __first2 + __offset};
       }
@@ -106,8 +142,9 @@ __mismatch_vectorized(_Iter __first1, _Iter __last1, _Iter __first2) {
     if (static_cast<size_t>(__first1 - __orig_first1) >= __vec_size) {
       __first1 = __last1 - __vec_size;
       __first2 = __last2 - __vec_size;
-      auto __offset =
-          std::__find_first_not_set(std::__load_vector<__vec>(__first1) == std::__load_vector<__vec>(__first2));
+      auto __cmp_res = std::__load_vector<__vec>(__first1) == std::__load_vector<__vec>(__first2);
+      __cmp_res = __reverse_vector<_Tp>(__cmp_res);
+      auto __offset = std::__find_first_not_set(__cmp_res);
       return {__first1 + __offset, __first2 + __offset};
     } // else loop over the elements individually
   }