[libcxx-commits] [libcxx] [libc++] Vectorize mismatch (PR #73255)
Nikolas Klauser via libcxx-commits
libcxx-commits at lists.llvm.org
Sat Dec 23 05:05:37 PST 2023
https://github.com/philnik777 updated https://github.com/llvm/llvm-project/pull/73255
>From 6a9f6de198bcc9f7da1be0e14c2bd448cf9c8831 Mon Sep 17 00:00:00 2001
From: Nikolas Klauser <nikolasklauser at berlin.de>
Date: Sat, 23 Dec 2023 12:07:10 +0100
Subject: [PATCH 1/2] [libc++][NFC] Refactor <experimental/simd> a bit to
simplify dependencies
---
libcxx/include/CMakeLists.txt | 2 -
libcxx/include/experimental/__simd/abi_tag.h | 55 -------------------
.../include/experimental/__simd/aligned_tag.h | 13 ++++-
.../include/experimental/__simd/declaration.h | 52 +++++++++++++++++-
.../__simd/internal_declaration.h | 41 --------------
libcxx/include/experimental/__simd/scalar.h | 2 +-
libcxx/include/experimental/__simd/simd.h | 2 -
.../include/experimental/__simd/simd_mask.h | 2 -
libcxx/include/experimental/__simd/traits.h | 15 +----
libcxx/include/experimental/__simd/vec_ext.h | 2 +-
libcxx/include/experimental/simd | 1 -
libcxx/include/module.modulemap.in | 2 -
12 files changed, 66 insertions(+), 123 deletions(-)
delete mode 100644 libcxx/include/experimental/__simd/abi_tag.h
delete mode 100644 libcxx/include/experimental/__simd/internal_declaration.h
diff --git a/libcxx/include/CMakeLists.txt b/libcxx/include/CMakeLists.txt
index 746d5812fba048..0fe3ab44d2466e 100644
--- a/libcxx/include/CMakeLists.txt
+++ b/libcxx/include/CMakeLists.txt
@@ -914,10 +914,8 @@ set(files
expected
experimental/__config
experimental/__memory
- experimental/__simd/abi_tag.h
experimental/__simd/aligned_tag.h
experimental/__simd/declaration.h
- experimental/__simd/internal_declaration.h
experimental/__simd/reference.h
experimental/__simd/scalar.h
experimental/__simd/simd.h
diff --git a/libcxx/include/experimental/__simd/abi_tag.h b/libcxx/include/experimental/__simd/abi_tag.h
deleted file mode 100644
index cec5be65ce5c21..00000000000000
--- a/libcxx/include/experimental/__simd/abi_tag.h
+++ /dev/null
@@ -1,55 +0,0 @@
-// -*- C++ -*-
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef _LIBCPP_EXPERIMENTAL___SIMD_ABI_TAG_H
-#define _LIBCPP_EXPERIMENTAL___SIMD_ABI_TAG_H
-
-#include <cstddef>
-#include <experimental/__config>
-#include <experimental/__simd/internal_declaration.h>
-
-#if _LIBCPP_STD_VER >= 17 && defined(_LIBCPP_ENABLE_EXPERIMENTAL)
-
-_LIBCPP_BEGIN_NAMESPACE_EXPERIMENTAL
-inline namespace parallelism_v2 {
-namespace simd_abi {
-
-using scalar = __scalar;
-
-// TODO: make this platform dependent
-template <int _Np>
-using fixed_size = __vec_ext<_Np>;
-
-template <class _Tp>
-inline constexpr int max_fixed_size = 32;
-
-// TODO: make this platform dependent
-template <class _Tp>
-using compatible = __vec_ext<16 / sizeof(_Tp)>;
-
-// TODO: make this platform dependent
-template <class _Tp>
-using native = __vec_ext<_LIBCPP_NATIVE_SIMD_WIDTH_IN_BYTES / sizeof(_Tp)>;
-
-// TODO: make this platform dependent
-template <class _Tp, size_t _Np, class... _Abis>
-struct deduce {
- using type = fixed_size<_Np>;
-};
-
-// TODO: make this platform dependent
-template <class _Tp, size_t _Np, class... _Abis>
-using deduce_t = typename deduce<_Tp, _Np, _Abis...>::type;
-
-} // namespace simd_abi
-} // namespace parallelism_v2
-_LIBCPP_END_NAMESPACE_EXPERIMENTAL
-
-#endif // _LIBCPP_STD_VER >= 17 && defined(_LIBCPP_ENABLE_EXPERIMENTAL)
-#endif // _LIBCPP_EXPERIMENTAL___SIMD_ABI_TAG_H
diff --git a/libcxx/include/experimental/__simd/aligned_tag.h b/libcxx/include/experimental/__simd/aligned_tag.h
index d216a21c073f3a..edbb3b24931f5a 100644
--- a/libcxx/include/experimental/__simd/aligned_tag.h
+++ b/libcxx/include/experimental/__simd/aligned_tag.h
@@ -10,10 +10,10 @@
#ifndef _LIBCPP_EXPERIMENTAL___SIMD_ALIGNED_TAG_H
#define _LIBCPP_EXPERIMENTAL___SIMD_ALIGNED_TAG_H
-#include <__bit/bit_ceil.h>
#include <__memory/assume_aligned.h>
#include <cstddef>
#include <experimental/__config>
+#include <experimental/__simd/traits.h>
#if _LIBCPP_STD_VER >= 17 && defined(_LIBCPP_ENABLE_EXPERIMENTAL)
@@ -30,9 +30,12 @@ struct element_aligned_tag {
}
};
+template <>
+inline constexpr bool is_simd_flag_type_v<element_aligned_tag> = true;
+
struct vector_aligned_tag {
template <class _Tp, class _Up = typename _Tp::value_type>
- static constexpr size_t __alignment = std::__bit_ceil(sizeof(_Up) * _Tp::size());
+ static constexpr size_t __alignment = memory_alignment_v<_Tp, _Up>;
template <class _Tp, class _Up>
static _LIBCPP_HIDE_FROM_ABI constexpr _Up* __apply(_Up* __ptr) {
@@ -40,6 +43,9 @@ struct vector_aligned_tag {
}
};
+template <>
+inline constexpr bool is_simd_flag_type_v<vector_aligned_tag> = true;
+
template <size_t _Np>
struct overaligned_tag {
template <class _Tp, class _Up = typename _Tp::value_type>
@@ -51,6 +57,9 @@ struct overaligned_tag {
}
};
+template <size_t _Np>
+inline constexpr bool is_simd_flag_type_v<overaligned_tag<_Np>> = true;
+
inline constexpr element_aligned_tag element_aligned{};
inline constexpr vector_aligned_tag vector_aligned{};
diff --git a/libcxx/include/experimental/__simd/declaration.h b/libcxx/include/experimental/__simd/declaration.h
index 065faeaec3841f..7b45d035c27121 100644
--- a/libcxx/include/experimental/__simd/declaration.h
+++ b/libcxx/include/experimental/__simd/declaration.h
@@ -10,13 +10,63 @@
#ifndef _LIBCPP_EXPERIMENTAL___SIMD_DECLARATION_H
#define _LIBCPP_EXPERIMENTAL___SIMD_DECLARATION_H
+#include <cstddef>
#include <experimental/__config>
-#include <experimental/__simd/abi_tag.h>
#if _LIBCPP_STD_VER >= 17 && defined(_LIBCPP_ENABLE_EXPERIMENTAL)
_LIBCPP_BEGIN_NAMESPACE_EXPERIMENTAL
inline namespace parallelism_v2 {
+namespace simd_abi {
+template <int>
+struct __vec_ext;
+struct __scalar;
+
+using scalar = __scalar;
+
+// TODO: make this platform dependent
+template <int _Np>
+using fixed_size = __vec_ext<_Np>;
+
+template <class _Tp>
+inline constexpr int max_fixed_size = 32;
+
+// TODO: make this platform dependent
+template <class _Tp>
+using compatible = __vec_ext<16 / sizeof(_Tp)>;
+
+// TODO: make this platform dependent
+template <class _Tp>
+using native = __vec_ext<_LIBCPP_NATIVE_SIMD_WIDTH_IN_BYTES / sizeof(_Tp)>;
+
+// TODO: make this platform dependent
+template <class _Tp, size_t _Np, class... _Abis>
+struct deduce {
+ using type = fixed_size<_Np>;
+};
+
+// TODO: make this platform dependent
+template <class _Tp, size_t _Np, class... _Abis>
+using deduce_t = typename deduce<_Tp, _Np, _Abis...>::type;
+
+} // namespace simd_abi
+
+template <class _Tp, class _Abi>
+struct __simd_storage;
+
+template <class _Tp, class _Abi>
+struct __mask_storage;
+
+template <class _Tp, class _Abi>
+struct __simd_operations;
+
+template <class _Tp, class _Abi>
+struct __mask_operations;
+
+struct element_aligned_tag;
+struct vector_aligned_tag;
+template <size_t>
+struct overaligned_tag;
template <class _Tp, class _Abi = simd_abi::compatible<_Tp>>
class simd;
diff --git a/libcxx/include/experimental/__simd/internal_declaration.h b/libcxx/include/experimental/__simd/internal_declaration.h
deleted file mode 100644
index 9ad1ad1ae3192f..00000000000000
--- a/libcxx/include/experimental/__simd/internal_declaration.h
+++ /dev/null
@@ -1,41 +0,0 @@
-// -*- C++ -*-
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef _LIBCPP_EXPERIMENTAL___SIMD_INTERNAL_DECLARATION_H
-#define _LIBCPP_EXPERIMENTAL___SIMD_INTERNAL_DECLARATION_H
-
-#include <experimental/__config>
-
-#if _LIBCPP_STD_VER >= 17 && defined(_LIBCPP_ENABLE_EXPERIMENTAL)
-
-_LIBCPP_BEGIN_NAMESPACE_EXPERIMENTAL
-inline namespace parallelism_v2 {
-namespace simd_abi {
-template <int>
-struct __vec_ext;
-struct __scalar;
-} // namespace simd_abi
-
-template <class _Tp, class _Abi>
-struct __simd_storage;
-
-template <class _Tp, class _Abi>
-struct __mask_storage;
-
-template <class _Tp, class _Abi>
-struct __simd_operations;
-
-template <class _Tp, class _Abi>
-struct __mask_operations;
-
-} // namespace parallelism_v2
-_LIBCPP_END_NAMESPACE_EXPERIMENTAL
-
-#endif // _LIBCPP_STD_VER >= 17 && defined(_LIBCPP_ENABLE_EXPERIMENTAL)
-#endif // _LIBCPP_EXPERIMENTAL___SIMD_INTERNAL_DECLARATION_H
diff --git a/libcxx/include/experimental/__simd/scalar.h b/libcxx/include/experimental/__simd/scalar.h
index 53fa1c29f374ca..5eeff4c1e82a38 100644
--- a/libcxx/include/experimental/__simd/scalar.h
+++ b/libcxx/include/experimental/__simd/scalar.h
@@ -12,7 +12,7 @@
#include <cstddef>
#include <experimental/__config>
-#include <experimental/__simd/internal_declaration.h>
+#include <experimental/__simd/declaration.h>
#include <experimental/__simd/traits.h>
#if _LIBCPP_STD_VER >= 17 && defined(_LIBCPP_ENABLE_EXPERIMENTAL)
diff --git a/libcxx/include/experimental/__simd/simd.h b/libcxx/include/experimental/__simd/simd.h
index ffb328eb345b1a..c345811fee7fc7 100644
--- a/libcxx/include/experimental/__simd/simd.h
+++ b/libcxx/include/experimental/__simd/simd.h
@@ -15,9 +15,7 @@
#include <__utility/forward.h>
#include <cstddef>
#include <experimental/__config>
-#include <experimental/__simd/abi_tag.h>
#include <experimental/__simd/declaration.h>
-#include <experimental/__simd/internal_declaration.h>
#include <experimental/__simd/reference.h>
#include <experimental/__simd/traits.h>
#include <experimental/__simd/utility.h>
diff --git a/libcxx/include/experimental/__simd/simd_mask.h b/libcxx/include/experimental/__simd/simd_mask.h
index 325b8409e3b6d2..db03843b46e3ad 100644
--- a/libcxx/include/experimental/__simd/simd_mask.h
+++ b/libcxx/include/experimental/__simd/simd_mask.h
@@ -13,9 +13,7 @@
#include <__type_traits/is_same.h>
#include <cstddef>
#include <experimental/__config>
-#include <experimental/__simd/abi_tag.h>
#include <experimental/__simd/declaration.h>
-#include <experimental/__simd/internal_declaration.h>
#include <experimental/__simd/reference.h>
#include <experimental/__simd/traits.h>
diff --git a/libcxx/include/experimental/__simd/traits.h b/libcxx/include/experimental/__simd/traits.h
index 9b4abe9d0c232e..ec25b4bfa7f95e 100644
--- a/libcxx/include/experimental/__simd/traits.h
+++ b/libcxx/include/experimental/__simd/traits.h
@@ -10,14 +10,12 @@
#ifndef _LIBCPP_EXPERIMENTAL___SIMD_TRAITS_H
#define _LIBCPP_EXPERIMENTAL___SIMD_TRAITS_H
+#include <__bit/bit_ceil.h>
#include <__type_traits/integral_constant.h>
#include <__type_traits/is_same.h>
#include <cstddef>
#include <experimental/__config>
-#include <experimental/__simd/abi_tag.h>
-#include <experimental/__simd/aligned_tag.h>
#include <experimental/__simd/declaration.h>
-#include <experimental/__simd/internal_declaration.h>
#include <experimental/__simd/utility.h>
#if _LIBCPP_STD_VER >= 17 && defined(_LIBCPP_ENABLE_EXPERIMENTAL)
@@ -47,15 +45,6 @@ struct is_simd_mask : bool_constant<is_simd_mask_v<_Tp>> {};
template <class _Tp>
inline constexpr bool is_simd_flag_type_v = false;
-template <>
-inline constexpr bool is_simd_flag_type_v<element_aligned_tag> = true;
-
-template <>
-inline constexpr bool is_simd_flag_type_v<vector_aligned_tag> = true;
-
-template <size_t _Np>
-inline constexpr bool is_simd_flag_type_v<overaligned_tag<_Np>> = true;
-
template <class _Tp>
struct is_simd_flag_type : bool_constant<is_simd_flag_type_v<_Tp>> {};
@@ -71,7 +60,7 @@ inline constexpr size_t simd_size_v = simd_size<_Tp, _Abi>::value;
template <class _Tp,
class _Up = typename _Tp::value_type,
bool = (is_simd_v<_Tp> && __is_vectorizable_v<_Up>) || (is_simd_mask_v<_Tp> && is_same_v<_Up, bool>)>
-struct memory_alignment : integral_constant<size_t, vector_aligned_tag::__alignment<_Tp, _Up>> {};
+struct memory_alignment : integral_constant<size_t, std::__bit_ceil(sizeof(_Up) * _Tp::size())> {};
template <class _Tp, class _Up>
struct memory_alignment<_Tp, _Up, false> {};
diff --git a/libcxx/include/experimental/__simd/vec_ext.h b/libcxx/include/experimental/__simd/vec_ext.h
index 56a0b888104bfa..07ba032f493b1e 100644
--- a/libcxx/include/experimental/__simd/vec_ext.h
+++ b/libcxx/include/experimental/__simd/vec_ext.h
@@ -15,7 +15,7 @@
#include <__utility/integer_sequence.h>
#include <cstddef>
#include <experimental/__config>
-#include <experimental/__simd/internal_declaration.h>
+#include <experimental/__simd/declaration.h>
#include <experimental/__simd/traits.h>
#include <experimental/__simd/utility.h>
diff --git a/libcxx/include/experimental/simd b/libcxx/include/experimental/simd
index 56858832857c17..adca9faa47bb06 100644
--- a/libcxx/include/experimental/simd
+++ b/libcxx/include/experimental/simd
@@ -78,7 +78,6 @@ inline namespace parallelism_v2 {
#endif
#include <experimental/__config>
-#include <experimental/__simd/abi_tag.h>
#include <experimental/__simd/aligned_tag.h>
#include <experimental/__simd/declaration.h>
#include <experimental/__simd/scalar.h>
diff --git a/libcxx/include/module.modulemap.in b/libcxx/include/module.modulemap.in
index a37e96205cf2e0..d10670d4faaffc 100644
--- a/libcxx/include/module.modulemap.in
+++ b/libcxx/include/module.modulemap.in
@@ -530,10 +530,8 @@ module std_experimental [system] {
export *
}
module simd {
- module abi_tag { private header "experimental/__simd/abi_tag.h" }
module aligned_tag { private header "experimental/__simd/aligned_tag.h" }
module declaration { private header "experimental/__simd/declaration.h" }
- module internal_declaration { private header "experimental/__simd/internal_declaration.h" }
module reference { private header "experimental/__simd/reference.h" }
module scalar { private header "experimental/__simd/scalar.h" }
module simd { private header "experimental/__simd/simd.h" }
>From 5251bb216104da9cf7cb69e850613e874163d4b5 Mon Sep 17 00:00:00 2001
From: Nikolas Klauser <nikolasklauser at berlin.de>
Date: Sun, 8 Oct 2023 12:45:40 +0200
Subject: [PATCH 2/2] [libc++] Vectorize mismatch
---
.../benchmarks/algorithms/mismatch.bench.cpp | 31 ++
libcxx/include/CMakeLists.txt | 1 +
libcxx/include/__algorithm/mismatch.h | 158 +++++++++-
libcxx/include/__algorithm/vectorization.h | 78 +++++
libcxx/include/__bit/has_single_bit.h | 15 +-
libcxx/include/__utility/align_down.h | 31 ++
libcxx/include/experimental/__simd/avx512.h | 93 ++++++
.../include/experimental/__simd/declaration.h | 13 +
.../experimental/__simd/feature_traits.h | 298 ++++++++++++++++++
libcxx/include/experimental/__simd/simd.h | 73 +++++
.../include/experimental/__simd/simd_mask.h | 26 ++
libcxx/include/experimental/__simd/vec_ext.h | 33 ++
libcxx/src/memory_resource.cpp | 3 +-
.../mismatch/mismatch.pass.cpp | 77 ++---
14 files changed, 863 insertions(+), 67 deletions(-)
create mode 100644 libcxx/benchmarks/algorithms/mismatch.bench.cpp
create mode 100644 libcxx/include/__algorithm/vectorization.h
create mode 100644 libcxx/include/__utility/align_down.h
create mode 100644 libcxx/include/experimental/__simd/avx512.h
create mode 100644 libcxx/include/experimental/__simd/feature_traits.h
diff --git a/libcxx/benchmarks/algorithms/mismatch.bench.cpp b/libcxx/benchmarks/algorithms/mismatch.bench.cpp
new file mode 100644
index 00000000000000..3cce0c108ee1ec
--- /dev/null
+++ b/libcxx/benchmarks/algorithms/mismatch.bench.cpp
@@ -0,0 +1,31 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include <algorithm>
+#include <benchmark/benchmark.h>
+#include <random>
+
+template <class T>
+static void bm_find(benchmark::State& state) {
+ std::vector<T> vec1(state.range(), '1');
+ std::vector<T> vec2(state.range(), '1');
+ std::mt19937_64 rng(std::random_device{}());
+
+ for (auto _ : state) {
+ auto idx = rng() % vec1.size();
+ vec1[idx] = '2';
+ benchmark::DoNotOptimize(vec1);
+ benchmark::DoNotOptimize(std::mismatch(vec1.begin(), vec1.end(), vec2.begin()));
+ vec1[idx] = '1';
+ }
+}
+BENCHMARK(bm_find<char>)->DenseRange(1, 8)->Range(16, 1 << 20);
+BENCHMARK(bm_find<short>)->DenseRange(1, 8)->Range(16, 1 << 20);
+BENCHMARK(bm_find<int>)->DenseRange(1, 8)->Range(16, 1 << 20);
+
+BENCHMARK_MAIN();
diff --git a/libcxx/include/CMakeLists.txt b/libcxx/include/CMakeLists.txt
index 0fe3ab44d2466e..791d1386c0e141 100644
--- a/libcxx/include/CMakeLists.txt
+++ b/libcxx/include/CMakeLists.txt
@@ -229,6 +229,7 @@ set(files
__algorithm/unwrap_iter.h
__algorithm/unwrap_range.h
__algorithm/upper_bound.h
+ __algorithm/vectorization.h
__assert
__atomic/aliases.h
__atomic/atomic.h
diff --git a/libcxx/include/__algorithm/mismatch.h b/libcxx/include/__algorithm/mismatch.h
index d345b6048a7e9b..20e59e805d7f03 100644
--- a/libcxx/include/__algorithm/mismatch.h
+++ b/libcxx/include/__algorithm/mismatch.h
@@ -11,9 +11,19 @@
#define _LIBCPP___ALGORITHM_MISMATCH_H
#include <__algorithm/comp.h>
+#include <__algorithm/unwrap_iter.h>
+#include <__algorithm/vectorization.h>
#include <__config>
+#include <__functional/identity.h>
#include <__iterator/iterator_traits.h>
+#include <__type_traits/invoke.h>
+#include <__type_traits/is_equality_comparable.h>
+#include <__utility/align_down.h>
+#include <__utility/move.h>
#include <__utility/pair.h>
+#include <experimental/__simd/feature_traits.h>
+#include <experimental/__simd/simd.h>
+#include <experimental/__simd/simd_mask.h>
#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
# pragma GCC system_header
@@ -21,13 +31,151 @@
_LIBCPP_BEGIN_NAMESPACE_STD
+template <class _InIter1, class _Sent1, class _InIter2, class _Pred, class _Proj1, class _Proj2>
+_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<_InIter1, _InIter2>
+__mismatch_loop(_InIter1 __first1, _Sent1 __last1, _InIter2 __first2, _Pred __pred, _Proj1 __proj1, _Proj2 __proj2) {
+ while (__first1 != __last1) {
+ if (!std::__invoke(__pred, std::__invoke(__proj1, *__first1), std::__invoke(__proj2, *__first2)))
+ break;
+ ++__first1;
+ ++__first2;
+ }
+ return {std::move(__first1), std::move(__first2)};
+}
+
+#if _LIBCPP_CAN_VECTORIZE_ALGORIHTMS
+template <class _Tp>
+struct __mismatch_vector_impl {
+ template <bool _VectorizeFloatingPoint>
+ static constexpr bool __can_vectorize =
+ (__libcpp_is_trivially_equality_comparable<_Tp, _Tp>::value && __fits_in_vector<_Tp> &&
+ alignof(_Tp) >= alignof(__get_arithmetic_type<_Tp>)) ||
+ (_VectorizeFloatingPoint && is_floating_point_v<_Tp>);
+
+ using __vec = __arithmetic_vec<_Tp>;
+ using __mask_traits = experimental::__mask_traits<typename __vec::value_type, typename __vec::abi_type>;
+ static constexpr size_t __unroll_count = 4;
+
+ struct __result {
+ _Tp* __iter1;
+ _Tp* __iter2;
+ bool __matched;
+ };
+
+ _LIBCPP_HIDE_FROM_ABI static __result __prologue(_Tp* __first1, _Tp* __last1, _Tp* __first2) {
+ if constexpr (__mask_traits::__has_maskload) {
+ auto __first_aligned = std::__align_down(__vec::size(), __first1);
+ auto __offset = __first1 - __first_aligned;
+ auto __checked_size = __vec::size() - __offset;
+ if (__checked_size < __last1 - __first1)
+ return {__first1, __first2, false};
+ auto __second_aligned = __first2 - __offset;
+ auto __mask = __mask_traits::__mask_with_last_enabled(__checked_size);
+ __vec __lhs =
+ __mask_traits::__maskload_unaligned(reinterpret_cast<typename __vec::value_type*>(__first_aligned), __mask);
+ __vec __rhs =
+ __mask_traits::__maskload_unaligned(reinterpret_cast<typename __vec::value_type*>(__second_aligned), __mask);
+ auto __res = __mask_traits::__mask_cmp_eq(__mask, __lhs, __rhs);
+ auto __inv_mask = ~__mask.__get_data().__mask_;
+ if ((__res.__get_data().__mask_ & __mask.__get_data().__mask_) != __mask.__get_data().__mask_) {
+ auto __match_offset = experimental::find_first_set(decltype(__mask){
+ experimental::__from_storage, {decltype(__res.__get_data().__mask_)(~__res.__get_data().__mask_)}});
+ return {__first_aligned + __match_offset, __second_aligned + __match_offset, true};
+ }
+ return {__first_aligned + __vec::size(), __second_aligned + __vec::size(), false};
+ } else {
+ return {__first1, __first2, false};
+ }
+ }
+
+ _LIBCPP_HIDE_FROM_ABI _LIBCPP_ALWAYS_INLINE static __result __loop(_Tp* __first1, _Tp* __last1, _Tp* __first2) {
+ while (__last1 - __first1 >= __unroll_count * __vec::size()) {
+ __vec __lhs[__unroll_count];
+ __vec __rhs[__unroll_count];
+
+ for (size_t __i = 0; __i != __unroll_count; ++__i) {
+ __lhs[__i] = std::__load_as_arithmetic(__first1 + __i * __vec::size());
+ __rhs[__i] = std::__load_as_arithmetic(__first2 + __i * __vec::size());
+ }
+
+ for (size_t __i = 0; __i != __unroll_count; ++__i) {
+ if (auto __res = __lhs[__i] == __rhs[__i]; !experimental::all_of(__res)) {
+ auto __offset = __i * __vec::size() + experimental::find_first_set(__res);
+ return {__first1 + __offset, __first2 + __offset, true};
+ }
+ }
+
+ __first1 += __unroll_count * __vec::size();
+ __first2 += __unroll_count * __vec::size();
+ }
+ return {__first1, __first2, __first1 == __last1};
+ }
+
+ _LIBCPP_HIDE_FROM_ABI static pair<_Tp*, _Tp*> __epilogue(_Tp* __first1, _Tp* __last1, _Tp* __first2) {
+ if constexpr (__mask_traits::__has_maskload) {
+ auto __size = __last1 - __first1;
+ auto __mask = __mask_traits::__mask_with_first_enabled(__size);
+ __vec __lhs =
+ __mask_traits::__maskload_unaligned(reinterpret_cast<typename __vec::value_type*>(__first1), __mask);
+ __vec __rhs =
+ __mask_traits::__maskload_unaligned(reinterpret_cast<typename __vec::value_type*>(__first2), __mask);
+ auto __res = __mask_traits::__mask_cmp_eq(__mask, __lhs, __rhs);
+ auto __inv_mask = ~__mask.__get_data().__mask_;
+ if ((__res.__get_data().__mask_ | __inv_mask) != decltype(__mask){true}.__get_data().__mask_) {
+ auto __offset = experimental::find_first_set(__res);
+ return {__first1 + __offset, __first2 + __offset};
+ }
+ return {__first1 + __size, __first2 + __size};
+ } else {
+ return std::__mismatch_loop(__first1, __last1, __first2, __equal_to(), __identity(), __identity());
+ }
+ }
+};
+#endif // _LIBCPP_CAN_VECTORIZE_ALGORIHTMS
+
+template <class _InIter1, class _Sent1, class _InIter2, class _Pred, class _Proj1, class _Proj2>
+_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<_InIter1, _InIter2>
+__mismatch(_InIter1 __first1, _Sent1 __last1, _InIter2 __first2, _Pred __pred, _Proj1 __proj1, _Proj2 __proj2) {
+ return std::__mismatch_loop(__first1, __last1, __first2, __pred, __proj1, __proj2);
+}
+
+#if _LIBCPP_VECTORIZE_CLASSIC_ALGORITHMS
+template <
+ class _Tp,
+ class _Pred,
+ class _Proj1,
+ class _Proj2,
+ enable_if_t<
+ __desugars_to<__equal_tag, _Pred, _Tp, _Tp>::value && __is_identity<_Proj1>::value &&
+ __is_identity<_Proj2>::value &&
+ __mismatch_vector_impl<_Tp>::template __can_vectorize<_LIBCPP_VECTORIZE_FLOATING_POINT_CLASSIC_ALGORITHMS>,
+ int> = 0>
+_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI inline constexpr pair<_Tp*, _Tp*>
+__mismatch(_Tp* __first1, _Tp* __last1, _Tp* __first2, _Pred __pred, _Proj1 __proj1, _Proj2 __proj2) {
+ if (__libcpp_is_constant_evaluated())
+ return std::__mismatch_loop(__first1, __last1, __first2, __pred, __proj1, __proj2);
+
+ using __impl = __mismatch_vector_impl<_Tp>;
+
+ // auto [__piter1, __piter2, __pmatch] = __impl::__prologue(__first1, __last1, __first2);
+ // if (__pmatch)
+ // return {__piter1, __piter2};
+
+ auto [__iter1, __iter2, __matched] = __impl::__loop(__first1, __last1, __first2);
+ if (__matched)
+ return {__iter1, __iter2};
+
+ return __impl::__epilogue(__first1, __last1, __first2);
+}
+#endif // _LIBCPP_VECTORIZE_ALGORITHMS
+
template <class _InputIterator1, class _InputIterator2, class _BinaryPredicate>
-_LIBCPP_NODISCARD_EXT inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<_InputIterator1, _InputIterator2>
+_LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<_InputIterator1, _InputIterator2>
mismatch(_InputIterator1 __first1, _InputIterator1 __last1, _InputIterator2 __first2, _BinaryPredicate __pred) {
- for (; __first1 != __last1; ++__first1, (void)++__first2)
- if (!__pred(*__first1, *__first2))
- break;
- return pair<_InputIterator1, _InputIterator2>(__first1, __first2);
+ __identity __proj;
+ auto __res = std::__mismatch(
+ std::__unwrap_iter(__first1), std::__unwrap_iter(__last1), std::__unwrap_iter(__first2), __pred, __proj, __proj);
+ return std::make_pair(std::__rewrap_iter(__first1, __res.first), std::__rewrap_iter(__first2, __res.second));
}
template <class _InputIterator1, class _InputIterator2>
diff --git a/libcxx/include/__algorithm/vectorization.h b/libcxx/include/__algorithm/vectorization.h
new file mode 100644
index 00000000000000..f2b139ff4a1ffe
--- /dev/null
+++ b/libcxx/include/__algorithm/vectorization.h
@@ -0,0 +1,78 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___ALGORITHM_VECTORIZATION_H
+#define _LIBCPP___ALGORITHM_VECTORIZATION_H
+
+#include <__config>
+#include <__type_traits/is_floating_point.h>
+#include <__utility/integer_sequence.h>
+#include <experimental/__simd/simd.h>
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+# pragma GCC system_header
+#endif
+
+#if _LIBCPP_STD_VER >= 17 && defined(_LIBCPP_ENABLE_EXPERIMENTAL)
+# define _LIBCPP_CAN_VECTORIZE_ALGORIHTMS 1
+#else
+# define _LIBCPP_CAN_VECTORIZE_ALGORIHTMS 0
+#endif
+
+#if _LIBCPP_CAN_VECTORIZE_ALGORIHTMS && !defined(__OPTIMIZE_SIZE__)
+# define _LIBCPP_VECTORIZE_CLASSIC_ALGORITHMS 1
+#else
+# define _LIBCPP_VECTORIZE_CLASSIC_ALGORITHMS 0
+#endif
+
+#if _LIBCPP_VECTORIZE_CLASSIC_ALGORITHMS && defined(__FAST_MATH__)
+# define _LIBCPP_VECTORIZE_FLOATING_POINT_CLASSIC_ALGORITHMS 1
+#else
+# define _LIBCPP_VECTORIZE_FLOATING_POINT_CLASSIC_ALGORITHMS 0
+#endif
+
+#if _LIBCPP_CAN_VECTORIZE_ALGORIHTMS
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+template <class _Tp>
+inline static const bool __fits_in_vector =
+ sizeof(_Tp) == 1 || sizeof(_Tp) == 2 || sizeof(_Tp) == 4 || sizeof(_Tp) == 8;
+
+template <class _Tp>
+_LIBCPP_HIDE_FROM_ABI constexpr auto __get_arithmetic_type_impl() {
+ if constexpr (is_floating_point_v<_Tp>)
+ return _Tp{};
+ else if constexpr (constexpr auto __sz = sizeof(_Tp); __sz == 1)
+ return uint8_t{};
+ else if constexpr (__sz == 2)
+ return uint16_t{};
+ else if constexpr (__sz == 4)
+ return uint32_t{};
+ else if constexpr (__sz == 8)
+ return uint64_t{};
+ else
+ static_assert(false, "unexpected sizeof type");
+}
+
+template <class _Tp>
+using __get_arithmetic_type = decltype(__get_arithmetic_type_impl<_Tp>());
+
+template <class _Tp>
+using __arithmetic_vec = experimental::native_simd<__get_arithmetic_type<_Tp>>;
+
+template <class _Tp>
+_LIBCPP_HIDE_FROM_ABI __arithmetic_vec<_Tp> __load_as_arithmetic(_Tp* __values) {
+ return {reinterpret_cast<__get_arithmetic_type<_Tp>*>(__values), 0};
+}
+
+_LIBCPP_END_NAMESPACE_STD
+
+#endif // _LIBCPP_CAN_VECTORIZE_ALGORIHTMS
+
+#endif // _LIBCPP___ALGORITHM_VECTORIZATION_H
diff --git a/libcxx/include/__bit/has_single_bit.h b/libcxx/include/__bit/has_single_bit.h
index a4e178060a73a3..bc75158206829c 100644
--- a/libcxx/include/__bit/has_single_bit.h
+++ b/libcxx/include/__bit/has_single_bit.h
@@ -19,19 +19,24 @@
_LIBCPP_PUSH_MACROS
#include <__undef_macros>
-#if _LIBCPP_STD_VER >= 20
-
_LIBCPP_BEGIN_NAMESPACE_STD
-template <__libcpp_unsigned_integer _Tp>
-_LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr bool has_single_bit(_Tp __t) noexcept {
+template <class _Tp>
+_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI constexpr bool __has_single_bit(_Tp __t) noexcept {
return __t != 0 && (((__t & (__t - 1)) == 0));
}
-_LIBCPP_END_NAMESPACE_STD
+#if _LIBCPP_STD_VER >= 20
+
+template <__libcpp_unsigned_integer _Tp>
+_LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr bool has_single_bit(_Tp __t) noexcept {
+ return std::__has_single_bit(__t);
+}
#endif // _LIBCPP_STD_VER >= 20
+_LIBCPP_END_NAMESPACE_STD
+
_LIBCPP_POP_MACROS
#endif // _LIBCPP___BIT_HAS_SINGLE_BIT_H
diff --git a/libcxx/include/__utility/align_down.h b/libcxx/include/__utility/align_down.h
new file mode 100644
index 00000000000000..d0bbfbb568ee20
--- /dev/null
+++ b/libcxx/include/__utility/align_down.h
@@ -0,0 +1,31 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___UTILITY_ALIGN_DOWN_H
+#define _LIBCPP___UTILITY_ALIGN_DOWN_H
+
+#include <__config>
+#include <cstddef>
+#include <cstdint>
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+# pragma GCC system_header
+#endif
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+template <class _Tp>
+_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI inline _Tp* __align_down(size_t __align, _Tp* __ptr) {
+ _LIBCPP_ASSERT_UNCATEGORIZED(
+ __align >= alignof(_Tp), "Alignment has to be at least as large as the required alignment");
+ return reinterpret_cast<_Tp*>(reinterpret_cast<uintptr_t>(__ptr) & ~(__align - 1));
+}
+
+_LIBCPP_END_NAMESPACE_STD
+
+#endif // _LIBCPP___UTILITY_ALIGN_DOWN_H
diff --git a/libcxx/include/experimental/__simd/avx512.h b/libcxx/include/experimental/__simd/avx512.h
new file mode 100644
index 00000000000000..f8875b8aa8b206
--- /dev/null
+++ b/libcxx/include/experimental/__simd/avx512.h
@@ -0,0 +1,93 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP_EXPERIMENTAL___SIMD_AVX512_H
+#define _LIBCPP_EXPERIMENTAL___SIMD_AVX512_H
+
+#include <__bit/bit_ceil.h>
+#include <experimental/__config>
+#include <experimental/__simd/declaration.h>
+#include <experimental/__simd/vec_ext.h>
+
+#if __has_include(<immintrin.h>)
+# include <immintrin.h>
+#endif
+
+#if _LIBCPP_STD_VER >= 17 && defined(_LIBCPP_ENABLE_EXPERIMENTAL) && defined(__AVX512F__)
+
+_LIBCPP_BEGIN_NAMESPACE_EXPERIMENTAL
+inline namespace parallelism_v2 {
+namespace simd_abi {
+template <int _Np>
+struct __avx512 {
+ static constexpr size_t __simd_size = _Np;
+};
+
+template <class _Tp>
+inline constexpr bool __is_avx512_v = false;
+
+template <int _Np>
+inline constexpr bool __is_avx512_v<__avx512<_Np>> = true;
+} // namespace simd_abi
+
+template <int _Np>
+inline constexpr bool is_abi_tag_v<simd_abi::__avx512<_Np>> = _Np > 0 && _Np <= 64;
+
+template <class _Tp, int _Np>
+struct __simd_storage<_Tp, simd_abi::__avx512<_Np>> : __simd_storage<_Tp, simd_abi::__vec_ext<_Np>> {};
+
+template <class _Tp, int _Np>
+struct __mask_storage<_Tp, simd_abi::__avx512<_Np>> {
+ _LIBCPP_HIDE_FROM_ABI static constexpr auto __get_mask_t() {
+ if constexpr (_Np <= 8)
+ return __mmask8{};
+ else if constexpr (_Np <= 16)
+ return __mmask16{};
+ else if constexpr (_Np <= 32)
+ return __mmask32{};
+ else if constexpr (_Np <= 64)
+ return __mmask64{};
+ else
+ static_assert(_Np == -1, "Unexpected size");
+ }
+ decltype(__get_mask_t()) __mask_;
+
+ _LIBCPP_HIDE_FROM_ABI bool __get(size_t __index) const noexcept { return __mask_ & 1 << __index; }
+ _LIBCPP_HIDE_FROM_ABI void __set(size_t __index, bool __value) noexcept {
+ if (__value)
+ __mask_ |= 1 << __index;
+ else
+ __mask_ &= ~(1 << __index);
+ }
+};
+
+template <class _Tp, int _Np>
+struct __simd_operations<_Tp, simd_abi::__avx512<_Np>> : __simd_operations<_Tp, simd_abi::__vec_ext<_Np>> {};
+
+template <class _Tp, int _Np>
+struct __mask_operations<_Tp, simd_abi::__avx512<_Np>> {
+ using _MaskStorage = __mask_storage<_Tp, simd_abi::__avx512<_Np>>;
+
+ _LIBCPP_HIDE_FROM_ABI static _MaskStorage __broadcast(bool __v) noexcept {
+ if (__v)
+ return {numeric_limits<_MaskStorage>::max()};
+ else
+ return {0};
+ }
+
+ _LIBCPP_HIDE_FROM_ABI static bool all_of(_MaskStorage __mask) noexcept {
+ return __mask.__mask_ == __broadcast(true).__mask_;
+ }
+};
+} // namespace parallelism_v2
+
+_LIBCPP_END_NAMESPACE_EXPERIMENTAL
+
+#endif // _LIBCPP_STD_VER >= 17 && defined(_LIBCPP_ENABLE_EXPERIMENTAL) && defined(__AVX512F__)
+
+#endif // _LIBCPP_EXPERIMENTAL___SIMD_AVX512_H
diff --git a/libcxx/include/experimental/__simd/declaration.h b/libcxx/include/experimental/__simd/declaration.h
index 7b45d035c27121..aa87d75738044d 100644
--- a/libcxx/include/experimental/__simd/declaration.h
+++ b/libcxx/include/experimental/__simd/declaration.h
@@ -18,6 +18,11 @@
_LIBCPP_BEGIN_NAMESPACE_EXPERIMENTAL
inline namespace parallelism_v2 {
namespace simd_abi {
+#ifdef __AVX512F__
+template <int>
+struct __avx512;
+#endif
+
template <int>
struct __vec_ext;
struct __scalar;
@@ -36,8 +41,13 @@ template <class _Tp>
using compatible = __vec_ext<16 / sizeof(_Tp)>;
// TODO: make this platform dependent
+#ifdef __AVX512F__
+template <class _Tp>
+using native = __avx512<64 / sizeof(_Tp)>;
+#else
template <class _Tp>
using native = __vec_ext<_LIBCPP_NATIVE_SIMD_WIDTH_IN_BYTES / sizeof(_Tp)>;
+#endif
// TODO: make this platform dependent
template <class _Tp, size_t _Np, class... _Abis>
@@ -51,6 +61,9 @@ using deduce_t = typename deduce<_Tp, _Np, _Abis...>::type;
} // namespace simd_abi
+struct __from_storage_t {};
+inline constexpr __from_storage_t __from_storage;
+
template <class _Tp, class _Abi>
struct __simd_storage;
diff --git a/libcxx/include/experimental/__simd/feature_traits.h b/libcxx/include/experimental/__simd/feature_traits.h
new file mode 100644
index 00000000000000..6d96bf3856bde1
--- /dev/null
+++ b/libcxx/include/experimental/__simd/feature_traits.h
@@ -0,0 +1,298 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP_EXPERIMENTAL___SIMD_FEATURE_TRAITS_H
+#define _LIBCPP_EXPERIMENTAL___SIMD_FEATURE_TRAITS_H
+
+#include <__bit/has_single_bit.h>
+#include <__config>
+#include <__memory/assume_aligned.h>
+#include <experimental/__simd/declaration.h>
+#include <experimental/__simd/vec_ext.h>
+
+#ifdef __AVX512F__
+# include <immintrin.h>
+#endif
+
+// The intrinsics cannot be portably qualified. This isn't super problematic, since we're only dealing with builtin
+// types anyways.
+// NOLINTBEGIN(libcpp-robust-against-adl)
+
+#if _LIBCPP_STD_VER >= 17 && defined(_LIBCPP_ENABLE_EXPERIMENTAL)
+
+_LIBCPP_BEGIN_NAMESPACE_EXPERIMENTAL
+inline namespace parallelism_v2 {
+
+template <class _Tp, class _Abi, class = void>
+struct __mask_traits {
+ static constexpr bool __has_maskload = false;
+ static constexpr bool __has_maskstore = false;
+};
+
+template <uint64_t __base_pattern>
+_LIBCPP_HIDE_FROM_ABI uint64_t __set_least_significant_bits(size_t __count) noexcept {
+ uint64_t __bits = __base_pattern;
+ __bits >>= 64 - __count;
+ return __bits;
+}
+
+template <uint64_t __base_pattern>
+_LIBCPP_HIDE_FROM_ABI uint64_t __set_most_significant_bits(size_t __count) noexcept {
+ uint64_t __bits = __base_pattern;
+ __bits <<= 64 - __count;
+ __bits &= __base_pattern;
+ return __bits;
+}
+
+# ifdef __AVX512F__
+
+template <class _Tp, size_t _Np>
+struct __mask_traits<_Tp, simd_abi::__avx512<_Np>, enable_if_t<is_integral_v<_Tp>>> {
+private:
+ static constexpr size_t __element_count = _Np;
+ static constexpr size_t __element_size = sizeof(_Tp);
+
+ using __simd_t = simd<_Tp, simd_abi::__avx512<_Np>>;
+ using __mask_t = simd_mask<_Tp, simd_abi::__avx512<_Np>>;
+
+ using __storage_t [[__gnu__::__vector_size__(_Np * sizeof(_Tp))]] = _Tp;
+
+public:
+# ifdef __AVX512VL__
+ static constexpr bool __has_maskload = std::__has_single_bit(_Np);
+ static constexpr bool __has_maskstore = __has_maskload;
+
+ static _LIBCPP_HIDE_FROM_ABI __simd_t __maskload_unaligned(const _Tp* __ptr, __mask_t __mask_wrapped) {
+ if constexpr (!__has_maskload) {
+ return {};
+ } else {
+ __storage_t __data = [&] {
+ auto __mask = __mask_wrapped.__get_data().__mask_;
+
+ if constexpr (__element_size == 1) {
+ if constexpr (__element_count == 16) {
+ return _mm_maskz_loadu_epi8(__mask, __ptr);
+ } else if constexpr (__element_count == 32) {
+ return _mm256_maskz_loadu_epi8(__mask, __ptr);
+ } else if constexpr (__element_count == 64) {
+ return _mm512_maskz_loadu_epi8(__mask, __ptr);
+ } else {
+ static_assert(_Np == 3, "Unexpected size");
+ }
+ } else if constexpr (__element_size == 2) {
+ if constexpr (__element_count == 8) {
+ return _mm_maskz_loadu_epi16(__mask, __ptr);
+ } else if constexpr (__element_count == 16) {
+ return _mm256_maskz_loadu_epi16(__mask, __ptr);
+ } else if constexpr (__element_count == 32) {
+ return _mm512_maskz_loadu_epi16(__mask, __ptr);
+ } else {
+ static_assert(_Np == 3, "Unexpected size");
+ }
+ } else if constexpr (__element_size == 4) {
+ if constexpr (__element_count == 4) {
+ return _mm_maskz_loadu_epi32(__mask, __ptr);
+ } else if constexpr (__element_count == 8) {
+ return _mm256_maskz_loadu_epi32(__mask, __ptr);
+ } else if constexpr (__element_count == 16) {
+ return _mm512_maskz_loadu_epi32(__mask, __ptr);
+ } else {
+ static_assert(_Np == 3, "Unexpected size");
+ }
+ } else if constexpr (__element_size == 8) {
+ if constexpr (__element_count == 2) {
+ return _mm_maskz_loadu_epi64(__mask, __ptr);
+ } else if constexpr (__element_count == 4) {
+ return _mm256_maskz_loadu_epi64(__mask, __ptr);
+ } else if constexpr (__element_count == 8) {
+ return _mm512_maskz_loadu_epi64(__mask, __ptr);
+ } else {
+ static_assert(_Np == 3, "Unexpected size");
+ }
+ } else {
+ static_assert(_Np == 3, "Unexpected size");
+ }
+ }();
+ return {__from_storage, { __data }};
+ }
+ }
+
+ _LIBCPP_HIDE_FROM_ABI void __maskstore(const _Tp* __ptr_raw, __simd_t __data_wrapped, __mask_t __mask_wrapped) {
+ if constexpr (!__has_maskstore) {
+ return;
+ } else {
+ [&] {
+ auto __mask = __mask_wrapped.__get_data();
+ auto __data = __data_wrapped.__get_data();
+ auto __ptr = std::__assume_aligned<sizeof(__storage_t)>(__ptr_raw);
+
+ if constexpr (__element_size == 1) {
+ if constexpr (__element_count == 16) {
+ return _mm_mask_storeu_epi8(__ptr, __mask, __data);
+ } else if constexpr (__element_count == 32) {
+ return _mm256_mask_storeu_epi8(__ptr, __mask, __data);
+ } else if constexpr (__element_count == 64) {
+ return _mm512_mask_storeu_epi8(__ptr, __mask, __data);
+ } else {
+ static_assert(_Np == 3, "Unexpected size");
+ }
+ } else if constexpr (__element_size == 2) {
+ if constexpr (__element_count == 8) {
+ return _mm_mask_storeu_epi16(__ptr, __mask, __data);
+ } else if constexpr (__element_count == 16) {
+ return _mm256_mask_storeu_epi16(__ptr, __mask, __data);
+ } else if constexpr (__element_count == 32) {
+ return _mm512_mask_storeu_epi16(__ptr, __mask, __data);
+ } else {
+ static_assert(_Np == 3, "Unexpected size");
+ }
+ } else if constexpr (__element_size == 4) {
+ if constexpr (__element_count == 4) {
+ return _mm_mask_store_epi32(__ptr, __mask, __data);
+ } else if constexpr (__element_count == 8) {
+ return _mm256_mask_store_epi32(__ptr, __mask, __data);
+ } else if constexpr (__element_count == 16) {
+ return _mm512_mask_store_epi32(__ptr, __mask, __data);
+ } else {
+ static_assert(_Np == 3, "Unexpected size");
+ }
+ } else if constexpr (__element_size == 8) {
+ if constexpr (__element_count == 2) {
+ return _mm_mask_store_epi64(__ptr, __mask, __data);
+ } else if constexpr (__element_count == 4) {
+ return _mm256_mask_store_epi64(__ptr, __mask, __data);
+ } else if constexpr (__element_count == 8) {
+ return _mm512_mask_store_epi64(__ptr, __mask, __data);
+ } else {
+ static_assert(_Np == 3, "Unexpected size");
+ }
+ }
+ }();
+ }
+ }
+
+ static __mask_t __mask_with_first_enabled(size_t __n) noexcept {
+ if constexpr (__element_count == 2) {
+ auto __bitmask = experimental::__set_most_significant_bits<0x0000000000000003>(__n);
+ return {__from_storage, { static_cast<__mmask8>(__bitmask) }};
+ } else if constexpr (__element_count == 4) {
+ auto __bitmask = experimental::__set_most_significant_bits<0x000000000000000F>(__n);
+ return {__from_storage, { static_cast<__mmask8>(__bitmask) }};
+ } else if constexpr (__element_count == 8) {
+ auto __bitmask = experimental::__set_most_significant_bits<0x00000000000000FF>(__n);
+ return {__from_storage, { static_cast<__mmask8>(__bitmask) }};
+ } else if constexpr (__element_count == 16) {
+ auto __bitmask = experimental::__set_most_significant_bits<0x000000000000FFFF>(__n);
+ return {__from_storage, { static_cast<__mmask16>(__bitmask) }};
+ } else if constexpr (__element_count == 32) {
+ auto __bitmask = experimental::__set_most_significant_bits<0x00000000FFFFFFFF>(__n);
+ return {__from_storage, { static_cast<__mmask32>(__bitmask) }};
+ } else if constexpr (__element_count == 64) {
+ auto __bitmask = experimental::__set_most_significant_bits<0xFFFFFFFFFFFFFFFF>(__n);
+ return {__from_storage, { static_cast<__mmask64>(__bitmask) }};
+ }
+ }
+
+ static __mask_t __mask_with_last_enabled(size_t __n) noexcept {
+ if constexpr (__element_count == 2) {
+ auto __bitmask = experimental::__set_least_significant_bits<0x0000000000000003>(__n);
+ return {__from_storage, { static_cast<__mmask8>(__bitmask) }};
+ } else if constexpr (__element_count == 4) {
+ auto __bitmask = experimental::__set_least_significant_bits<0x000000000000000F>(__n);
+ return {__from_storage, { static_cast<__mmask8>(__bitmask) }};
+ } else if constexpr (__element_count == 8) {
+ auto __bitmask = experimental::__set_least_significant_bits<0x00000000000000FF>(__n);
+ return {__from_storage, { static_cast<__mmask8>(__bitmask) }};
+ } else if constexpr (__element_count == 16) {
+ auto __bitmask = experimental::__set_least_significant_bits<0x000000000000FFFF>(__n);
+ return {__from_storage, { static_cast<__mmask16>(__bitmask) }};
+ } else if constexpr (__element_count == 32) {
+ auto __bitmask = experimental::__set_least_significant_bits<0x00000000FFFFFFFF>(__n);
+ return {__from_storage, { static_cast<__mmask32>(__bitmask) }};
+ } else if constexpr (__element_count == 64) {
+ auto __bitmask = experimental::__set_least_significant_bits<0xFFFFFFFFFFFFFFFF>(__n);
+ return {__from_storage, { static_cast<__mmask64>(__bitmask) }};
+ }
+ }
+
+ template <int __comparator>
+ static _LIBCPP_HIDE_FROM_ABI __mask_t
+ __mask_cmp_mask(__mask_t __mask_wrapped, __simd_t __lhs_wrapped, __simd_t __rhs_wrapped) {
+ if constexpr (!__has_maskstore) {
+ return;
+ } else {
+ auto __ret = [&] {
+ auto __mask = __mask_wrapped.__get_data().__mask_;
+ auto __lhs = __lhs_wrapped.__get_data().__data;
+ auto __rhs = __rhs_wrapped.__get_data().__data;
+
+ if constexpr (__element_size == 1) {
+ if constexpr (__element_count == 16) {
+ return _mm_mask_cmp_epi8_mask(__mask, __lhs, __rhs, __comparator);
+ } else if constexpr (__element_count == 32) {
+ return _mm256_mask_cmp_epi8_mask(__mask, __lhs, __rhs, __comparator);
+ } else if constexpr (__element_count == 64) {
+ return _mm512_mask_cmp_epi8_mask(__mask, __lhs, __rhs, __comparator);
+ } else {
+ static_assert(_Np == 3, "Unexpected size");
+ }
+ } else if constexpr (__element_size == 2) {
+ if constexpr (__element_count == 8) {
+ return _mm_mask_cmp_epi16_mask(__mask, __lhs, __rhs, __comparator);
+ } else if constexpr (__element_count == 16) {
+ return _mm256_mask_cmp_epi16_mask(__mask, __lhs, __rhs, __comparator);
+ } else if constexpr (__element_count == 32) {
+ return _mm512_mask_cmp_epi16_mask(__mask, __lhs, __rhs, __comparator);
+ } else {
+ static_assert(_Np == 3, "Unexpected size");
+ }
+ } else if constexpr (__element_size == 4) {
+ if constexpr (__element_count == 4) {
+ return _mm_mask_cmp_epi32_mask(__mask, __lhs, __rhs, __comparator);
+ } else if constexpr (__element_count == 8) {
+ return _mm256_mask_cmp_epi32_mask(__mask, __lhs, __rhs, __comparator);
+ } else if constexpr (__element_count == 16) {
+ return _mm512_mask_cmp_epi32_mask(__mask, __lhs, __rhs, __comparator);
+ } else {
+ static_assert(_Np == 3, "Unexpected size");
+ }
+ } else if constexpr (__element_size == 8) {
+ if constexpr (__element_count == 2) {
+ return _mm_mask_cmp_epi64_mask(__mask, __lhs, __rhs, __comparator);
+ } else if constexpr (__element_count == 4) {
+ return _mm256_mask_cmp_epi64_mask(__mask, __lhs, __rhs, __comparator);
+ } else if constexpr (__element_count == 8) {
+ return _mm512_mask_cmp_epi64_mask(__mask, __lhs, __rhs, __comparator);
+ } else {
+ static_assert(_Np == 3, "Unexpected size");
+ }
+ }
+ }();
+ return {__from_storage, {__ret}};
+ }
+ }
+
+ static _LIBCPP_HIDE_FROM_ABI __mask_t __mask_cmp_eq(__mask_t __mask, __simd_t __lhs, __simd_t __rhs) noexcept {
+ return __mask_cmp_mask<_MM_CMPINT_EQ>(__mask, __lhs, __rhs);
+ }
+# else
+ static constexpr bool __has_maskload = false;
+ static constexpr bool __has_maskstore = false;
+# endif
+};
+
+# endif // __AVX512F__
+
+} // namespace parallelism_v2
+_LIBCPP_END_NAMESPACE_EXPERIMENTAL
+
+#endif
+
+// NOLINTEND(libcpp-robust-against-adl)
+
+#endif // _LIBCPP_EXPERIMENTAL___SIMD_FEATURE_TRAITS_H
diff --git a/libcxx/include/experimental/__simd/simd.h b/libcxx/include/experimental/__simd/simd.h
index c345811fee7fc7..adc40048954c98 100644
--- a/libcxx/include/experimental/__simd/simd.h
+++ b/libcxx/include/experimental/__simd/simd.h
@@ -15,6 +15,7 @@
#include <__utility/forward.h>
#include <cstddef>
#include <experimental/__config>
+#include <experimental/__simd/avx512.h>
#include <experimental/__simd/declaration.h>
#include <experimental/__simd/reference.h>
#include <experimental/__simd/traits.h>
@@ -44,6 +45,9 @@ class simd {
_LIBCPP_HIDE_FROM_ABI simd() noexcept = default;
+ template <class _Up, class _Flags>
+ _LIBCPP_HIDE_FROM_ABI simd(const _Up* __data, _Flags) noexcept : __s_(_Impl::__load(__data)) {}
+
// broadcast constructor
template <class _Up, enable_if_t<__can_broadcast_v<value_type, __remove_cvref_t<_Up>>, int> = 0>
_LIBCPP_HIDE_FROM_ABI simd(_Up&& __v) noexcept : __s_(_Impl::__broadcast(static_cast<value_type>(__v))) {}
@@ -64,9 +68,78 @@ class simd {
explicit _LIBCPP_HIDE_FROM_ABI simd(_Generator&& __g) noexcept
: __s_(_Impl::__generate(std::forward<_Generator>(__g))) {}
+ _LIBCPP_HIDE_FROM_ABI simd(__from_storage_t, _Storage __data) noexcept : __s_(__data) {}
+
// scalar access [simd.subscr]
_LIBCPP_HIDE_FROM_ABI reference operator[](size_t __i) noexcept { return reference(__s_, __i); }
_LIBCPP_HIDE_FROM_ABI value_type operator[](size_t __i) const noexcept { return __s_.__get(__i); }
+
+ _LIBCPP_HIDE_FROM_ABI _Storage __get_data() const { return __s_; }
+
+# ifdef __AVX512F__
+ template <int __comparator>
+ static _LIBCPP_HIDE_FROM_ABI auto __cmp(_Storage __lhs_wrapped, _Storage __rhs_wrapped) {
+ auto __lhs = __lhs_wrapped.__data;
+ auto __rhs = __rhs_wrapped.__data;
+ constexpr auto __element_size = sizeof(_Tp);
+ constexpr auto __element_count = size();
+ if constexpr (__element_size == 1) {
+ if constexpr (__element_count == 16) {
+ return _mm_cmp_epi8_mask(__lhs, __rhs, __comparator);
+ } else if constexpr (__element_count == 32) {
+ return _mm256_cmp_epi8_mask(__lhs, __rhs, __comparator);
+ } else if constexpr (__element_count == 64) {
+ return _mm512_cmp_epi8_mask(__lhs, __rhs, __comparator);
+ } else {
+ static_assert(__element_count == 0, "Unexpected size");
+ }
+ } else if constexpr (__element_size == 2) {
+ if constexpr (__element_count == 8) {
+ return _mm_cmp_epi16_mask(__lhs, __rhs, __comparator);
+ } else if constexpr (__element_count == 16) {
+ return _mm256_cmp_epi16_mask(__lhs, __rhs, __comparator);
+ } else if constexpr (__element_count == 32) {
+ return _mm512_cmp_epi16_mask(__lhs, __rhs, __comparator);
+ } else {
+ static_assert(__element_count == 0, "Unexpected size");
+ }
+ } else if constexpr (__element_size == 4) {
+ if constexpr (__element_count == 4) {
+ return _mm_cmp_epi32_mask(__lhs, __rhs, __comparator);
+ } else if constexpr (__element_count == 8) {
+ return _mm256_cmp_epi32_mask(__lhs, __rhs, __comparator);
+ } else if constexpr (__element_count == 16) {
+ return _mm512_cmp_epi32_mask(__lhs, __rhs, __comparator);
+ } else {
+ static_assert(__element_count == 0, "Unexpected size");
+ }
+ } else if constexpr (__element_size == 8) {
+ if constexpr (__element_count == 2) {
+ return _mm_cmp_epi64_mask(__lhs, __rhs, __comparator);
+ } else if constexpr (__element_count == 4) {
+ return _mm256_cmp_epi64_mask(__lhs, __rhs, __comparator);
+ } else if constexpr (__element_count == 8) {
+ return _mm512_cmp_epi64_mask(__lhs, __rhs, __comparator);
+ } else {
+ static_assert(__element_count == 0, "Unexpected size");
+ }
+ }
+ }
+# endif
+
+ friend _LIBCPP_HIDE_FROM_ABI mask_type operator==(const simd& __lhs, const simd& __rhs) noexcept {
+#ifdef __AVX512F__
+ if constexpr (simd_abi::__is_avx512_v<_Abi>) {
+ return {__from_storage, {__cmp<_MM_CMPINT_EQ>(__lhs.__s_, __rhs.__s_)}};
+ } else
+#endif
+ {
+ mask_type __result;
+ for (int __i = 0; __i != size(); ++__i)
+ __result[__i] = __lhs[__i] == __rhs[__i];
+ return __result;
+ }
+ }
};
template <class _Tp, class _Abi>
diff --git a/libcxx/include/experimental/__simd/simd_mask.h b/libcxx/include/experimental/__simd/simd_mask.h
index db03843b46e3ad..3a3dab7cbb7b5b 100644
--- a/libcxx/include/experimental/__simd/simd_mask.h
+++ b/libcxx/include/experimental/__simd/simd_mask.h
@@ -11,6 +11,7 @@
#define _LIBCPP_EXPERIMENTAL___SIMD_SIMD_MASK_H
#include <__type_traits/is_same.h>
+#include <__utility/unreachable.h>
#include <cstddef>
#include <experimental/__config>
#include <experimental/__simd/declaration.h>
@@ -41,6 +42,8 @@ class simd_mask {
_LIBCPP_HIDE_FROM_ABI simd_mask() noexcept = default;
+ _LIBCPP_HIDE_FROM_ABI simd_mask(__from_storage_t, _Storage __data) : __s_(__data) {}
+
// broadcast constructor
_LIBCPP_HIDE_FROM_ABI explicit simd_mask(value_type __v) noexcept : __s_(_Impl::__broadcast(__v)) {}
@@ -55,6 +58,8 @@ class simd_mask {
// scalar access [simd.mask.subscr]
_LIBCPP_HIDE_FROM_ABI reference operator[](size_t __i) noexcept { return reference(__s_, __i); }
_LIBCPP_HIDE_FROM_ABI value_type operator[](size_t __i) const noexcept { return __s_.__get(__i); }
+
+ _LIBCPP_HIDE_FROM_ABI _Storage __get_data() const noexcept { return __s_; }
};
template <class _Tp, class _Abi>
@@ -66,6 +71,27 @@ using native_simd_mask = simd_mask<_Tp, simd_abi::native<_Tp>>;
template <class _Tp, int _Np>
using fixed_size_simd_mask = simd_mask<_Tp, simd_abi::fixed_size<_Np>>;
+template <class _Tp, class _Abi>
+_LIBCPP_HIDE_FROM_ABI bool all_of(const simd_mask<_Tp, _Abi>& __mask) noexcept {
+ return __mask_operations<_Tp, _Abi>::all_of(__mask.__get_data());
+}
+
+template <class _Tp, class _Abi>
+_LIBCPP_HIDE_FROM_ABI int find_first_set(const simd_mask<_Tp, _Abi>& __mask) noexcept {
+# ifdef __AVX512F__
+ if constexpr (simd_abi::__is_avx512_v<_Abi>) {
+ return std::__countl_zero(__mask.__get_data().__mask_);
+ } else
+# endif
+ {
+ for (int __i = 0; __i != __mask.size(); ++__i) {
+ if (__mask[__i])
+ return __i;
+ }
+ std::__libcpp_unreachable();
+ }
+}
+
} // namespace parallelism_v2
_LIBCPP_END_NAMESPACE_EXPERIMENTAL
diff --git a/libcxx/include/experimental/__simd/vec_ext.h b/libcxx/include/experimental/__simd/vec_ext.h
index 07ba032f493b1e..79dbfdf5982461 100644
--- a/libcxx/include/experimental/__simd/vec_ext.h
+++ b/libcxx/include/experimental/__simd/vec_ext.h
@@ -19,6 +19,10 @@
#include <experimental/__simd/traits.h>
#include <experimental/__simd/utility.h>
+#if __has_include(<immintrin.h>)
+# include <immintrin.h>
+#endif
+
#if _LIBCPP_STD_VER >= 17 && defined(_LIBCPP_ENABLE_EXPERIMENTAL)
_LIBCPP_BEGIN_NAMESPACE_EXPERIMENTAL
@@ -73,6 +77,14 @@ struct __simd_operations<_Tp, simd_abi::__vec_ext<_Np>> {
static _LIBCPP_HIDE_FROM_ABI _SimdStorage __generate(_Generator&& __g) noexcept {
return __generate_init(std::forward<_Generator>(__g), std::make_index_sequence<_Np>());
}
+
+ template <class _Up>
+ static _LIBCPP_HIDE_FROM_ABI _SimdStorage __load(const _Up* __data) noexcept {
+ _SimdStorage __result;
+ for (size_t __i = 0; __i != _Np; ++__i)
+ __result.__set(__i, __data[__i]);
+ return __result;
+ }
};
template <class _Tp, int _Np>
@@ -87,6 +99,27 @@ struct __mask_operations<_Tp, simd_abi::__vec_ext<_Np>> {
}
return __result;
}
+
+ static _LIBCPP_HIDE_FROM_ABI bool all_of(_MaskStorage __mask) noexcept {
+ [[maybe_unused]] constexpr auto __vec_size = sizeof(_Tp) * _Np;
+# ifdef __AVX2__
+ if constexpr (__vec_size == 32) {
+ return _mm256_movemask_epi8((__m256i)__mask.__data) == 0xffffffffU;
+ } else
+# endif
+# ifdef __SSE2__
+ if constexpr (__vec_size == 16) {
+ return _mm_movemask_epi8((__m128i)__mask.__data) == 0xffffU;
+ } else
+# endif
+ {
+ for (int __i = 0; __i != _Np; ++__i) {
+ if (!__mask.__get(__i))
+ return false;
+ }
+ return true;
+ }
+ }
};
} // namespace parallelism_v2
diff --git a/libcxx/src/memory_resource.cpp b/libcxx/src/memory_resource.cpp
index afd1b892086da8..7aea374a5a8c1d 100644
--- a/libcxx/src/memory_resource.cpp
+++ b/libcxx/src/memory_resource.cpp
@@ -8,6 +8,7 @@
#include <memory>
#include <memory_resource>
+#include <__utility/align_down.h>
#ifndef _LIBCPP_HAS_NO_ATOMIC_HEADER
# include <atomic>
@@ -416,7 +417,7 @@ static void* align_down(size_t align, size_t size, void*& ptr, size_t& space) {
return nullptr;
char* p1 = static_cast<char*>(ptr);
- char* new_ptr = reinterpret_cast<char*>(reinterpret_cast<uintptr_t>(p1 - size) & ~(align - 1));
+ char* new_ptr = static_cast<char*>(std::__align_down(align, p1 - size));
if (new_ptr < (p1 - space))
return nullptr;
diff --git a/libcxx/test/std/algorithms/alg.nonmodifying/mismatch/mismatch.pass.cpp b/libcxx/test/std/algorithms/alg.nonmodifying/mismatch/mismatch.pass.cpp
index cc588c095ccfb2..e5f481bc6cfae0 100644
--- a/libcxx/test/std/algorithms/alg.nonmodifying/mismatch/mismatch.pass.cpp
+++ b/libcxx/test/std/algorithms/alg.nonmodifying/mismatch/mismatch.pass.cpp
@@ -23,71 +23,36 @@
#include "test_macros.h"
#include "test_iterators.h"
-#if TEST_STD_VER > 17
-TEST_CONSTEXPR bool test_constexpr() {
- int ia[] = {1, 3, 6, 7};
- int ib[] = {1, 3};
- int ic[] = {1, 3, 5, 7};
- typedef cpp17_input_iterator<int*> II;
- typedef bidirectional_iterator<int*> BI;
+TEST_CONSTEXPR_CXX20 bool test() {
+ int ia[] = {0, 1, 2, 2, 0, 1, 2, 3};
+ const unsigned sa = sizeof(ia) / sizeof(ia[0]);
+ int ib[] = {0, 1, 2, 3, 0, 1, 2, 3};
+ const unsigned sb = sizeof(ib) / sizeof(ib[0]);
+ ((void)sb); // unused in C++11
- auto p1 = std::mismatch(std::begin(ia), std::end(ia), std::begin(ic));
- if (p1.first != ia+2 || p1.second != ic+2)
- return false;
+ typedef cpp17_input_iterator<const int*> II;
+ typedef random_access_iterator<const int*> RAI;
- auto p2 = std::mismatch(std::begin(ia), std::end(ia), std::begin(ic), std::end(ic));
- if (p2.first != ia+2 || p2.second != ic+2)
- return false;
+ assert(std::mismatch(II(ia), II(ia + sa), II(ib)) == (std::pair<II, II>(II(ia + 3), II(ib + 3))));
- auto p3 = std::mismatch(std::begin(ib), std::end(ib), std::begin(ic));
- if (p3.first != ib+2 || p3.second != ic+2)
- return false;
-
- auto p4 = std::mismatch(std::begin(ib), std::end(ib), std::begin(ic), std::end(ic));
- if (p4.first != ib+2 || p4.second != ic+2)
- return false;
-
- auto p5 = std::mismatch(II(std::begin(ib)), II(std::end(ib)), II(std::begin(ic)));
- if (p5.first != II(ib+2) || p5.second != II(ic+2))
- return false;
- auto p6 = std::mismatch(BI(std::begin(ib)), BI(std::end(ib)), BI(std::begin(ic)), BI(std::end(ic)));
- if (p6.first != BI(ib+2) || p6.second != BI(ic+2))
- return false;
-
- return true;
- }
-#endif
-
-int main(int, char**)
-{
- int ia[] = {0, 1, 2, 2, 0, 1, 2, 3};
- const unsigned sa = sizeof(ia)/sizeof(ia[0]);
- int ib[] = {0, 1, 2, 3, 0, 1, 2, 3};
- const unsigned sb = sizeof(ib)/sizeof(ib[0]); ((void)sb); // unused in C++11
-
- typedef cpp17_input_iterator<const int*> II;
- typedef random_access_iterator<const int*> RAI;
-
- assert(std::mismatch(II(ia), II(ia + sa), II(ib))
- == (std::pair<II, II>(II(ia+3), II(ib+3))));
-
- assert(std::mismatch(RAI(ia), RAI(ia + sa), RAI(ib))
- == (std::pair<RAI, RAI>(RAI(ia+3), RAI(ib+3))));
+ assert(std::mismatch(RAI(ia), RAI(ia + sa), RAI(ib)) == (std::pair<RAI, RAI>(RAI(ia + 3), RAI(ib + 3))));
#if TEST_STD_VER > 11 // We have the four iteration version
- assert(std::mismatch(II(ia), II(ia + sa), II(ib), II(ib+sb))
- == (std::pair<II, II>(II(ia+3), II(ib+3))));
-
- assert(std::mismatch(RAI(ia), RAI(ia + sa), RAI(ib), RAI(ib+sb))
- == (std::pair<RAI, RAI>(RAI(ia+3), RAI(ib+3))));
+ assert(std::mismatch(II(ia), II(ia + sa), II(ib), II(ib + sb)) == (std::pair<II, II>(II(ia + 3), II(ib + 3))));
+ assert(std::mismatch(RAI(ia), RAI(ia + sa), RAI(ib), RAI(ib + sb)) ==
+ (std::pair<RAI, RAI>(RAI(ia + 3), RAI(ib + 3))));
- assert(std::mismatch(II(ia), II(ia + sa), II(ib), II(ib+2))
- == (std::pair<II, II>(II(ia+2), II(ib+2))));
+ assert(std::mismatch(II(ia), II(ia + sa), II(ib), II(ib + 2)) == (std::pair<II, II>(II(ia + 2), II(ib + 2))));
#endif
-#if TEST_STD_VER > 17
- static_assert(test_constexpr());
+ return true;
+}
+
+int main(int, char**) {
+ test();
+#if TEST_STD_VER >= 20
+ static_assert(test());
#endif
return 0;
More information about the libcxx-commits
mailing list