[clang] Update SSE/AVX integer comparison intrinsics to be used in constexpr (PR #155656)
via cfe-commits
cfe-commits at lists.llvm.org
Thu Aug 28 05:23:03 PDT 2025
https://github.com/smoke-y updated https://github.com/llvm/llvm-project/pull/155656
>From 032c6310682a4dc535fad7d94f07dd585bc24df1 Mon Sep 17 00:00:00 2001
From: smoke-y <SnowSneeze at protonmail.com>
Date: Wed, 27 Aug 2025 22:08:02 +0530
Subject: [PATCH 1/7] Update MMX integer comparison intrinsics to be used in
constexpr
---
clang/lib/Headers/avx2intrin.h | 16 ++++++++--------
clang/lib/Headers/emmintrin.h | 18 +++++++++---------
clang/lib/Headers/smmintrin.h | 4 ++--
clang/test/CodeGen/X86/avx2-builtins.c | 8 ++++++++
clang/test/CodeGen/X86/sse2-builtins.c | 9 +++++++++
clang/test/CodeGen/X86/sse41-builtins.c | 1 +
clang/test/CodeGen/X86/sse42-builtins.c | 2 ++
7 files changed, 39 insertions(+), 19 deletions(-)
diff --git a/clang/lib/Headers/avx2intrin.h b/clang/lib/Headers/avx2intrin.h
index ce5b2b7544d8c..05bd15385d149 100644
--- a/clang/lib/Headers/avx2intrin.h
+++ b/clang/lib/Headers/avx2intrin.h
@@ -637,7 +637,7 @@ _mm256_blendv_epi8(__m256i __V1, __m256i __V2, __m256i __M)
/// \param __b
/// A 256-bit integer vector containing one of the inputs.
/// \returns A 256-bit integer vector containing the result.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_cmpeq_epi8(__m256i __a, __m256i __b)
{
return (__m256i)((__v32qi)__a == (__v32qi)__b);
@@ -663,7 +663,7 @@ _mm256_cmpeq_epi8(__m256i __a, __m256i __b)
/// \param __b
/// A 256-bit vector of [16 x i16] containing one of the inputs.
/// \returns A 256-bit vector of [16 x i16] containing the result.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_cmpeq_epi16(__m256i __a, __m256i __b)
{
return (__m256i)((__v16hi)__a == (__v16hi)__b);
@@ -689,7 +689,7 @@ _mm256_cmpeq_epi16(__m256i __a, __m256i __b)
/// \param __b
/// A 256-bit vector of [8 x i32] containing one of the inputs.
/// \returns A 256-bit vector of [8 x i32] containing the result.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_cmpeq_epi32(__m256i __a, __m256i __b)
{
return (__m256i)((__v8si)__a == (__v8si)__b);
@@ -715,7 +715,7 @@ _mm256_cmpeq_epi32(__m256i __a, __m256i __b)
/// \param __b
/// A 256-bit vector of [4 x i64] containing one of the inputs.
/// \returns A 256-bit vector of [4 x i64] containing the result.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_cmpeq_epi64(__m256i __a, __m256i __b)
{
return (__m256i)((__v4di)__a == (__v4di)__b);
@@ -741,7 +741,7 @@ _mm256_cmpeq_epi64(__m256i __a, __m256i __b)
/// \param __b
/// A 256-bit integer vector containing one of the inputs.
/// \returns A 256-bit integer vector containing the result.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_cmpgt_epi8(__m256i __a, __m256i __b)
{
/* This function always performs a signed comparison, but __v32qi is a char
@@ -769,7 +769,7 @@ _mm256_cmpgt_epi8(__m256i __a, __m256i __b)
/// \param __b
/// A 256-bit vector of [16 x i16] containing one of the inputs.
/// \returns A 256-bit vector of [16 x i16] containing the result.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_cmpgt_epi16(__m256i __a, __m256i __b)
{
return (__m256i)((__v16hi)__a > (__v16hi)__b);
@@ -795,7 +795,7 @@ _mm256_cmpgt_epi16(__m256i __a, __m256i __b)
/// \param __b
/// A 256-bit vector of [8 x i32] containing one of the inputs.
/// \returns A 256-bit vector of [8 x i32] containing the result.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_cmpgt_epi32(__m256i __a, __m256i __b)
{
return (__m256i)((__v8si)__a > (__v8si)__b);
@@ -821,7 +821,7 @@ _mm256_cmpgt_epi32(__m256i __a, __m256i __b)
/// \param __b
/// A 256-bit vector of [4 x i64] containing one of the inputs.
/// \returns A 256-bit vector of [4 x i64] containing the result.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_cmpgt_epi64(__m256i __a, __m256i __b)
{
return (__m256i)((__v4di)__a > (__v4di)__b);
diff --git a/clang/lib/Headers/emmintrin.h b/clang/lib/Headers/emmintrin.h
index 8b6b62458dac1..7f69019e01b06 100644
--- a/clang/lib/Headers/emmintrin.h
+++ b/clang/lib/Headers/emmintrin.h
@@ -3090,7 +3090,7 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srl_epi64(__m128i __a,
/// \param __b
/// A 128-bit integer vector.
/// \returns A 128-bit integer vector containing the comparison results.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmpeq_epi8(__m128i __a,
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cmpeq_epi8(__m128i __a,
__m128i __b) {
return (__m128i)((__v16qi)__a == (__v16qi)__b);
}
@@ -3109,7 +3109,7 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmpeq_epi8(__m128i __a,
/// \param __b
/// A 128-bit integer vector.
/// \returns A 128-bit integer vector containing the comparison results.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmpeq_epi16(__m128i __a,
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cmpeq_epi16(__m128i __a,
__m128i __b) {
return (__m128i)((__v8hi)__a == (__v8hi)__b);
}
@@ -3128,7 +3128,7 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmpeq_epi16(__m128i __a,
/// \param __b
/// A 128-bit integer vector.
/// \returns A 128-bit integer vector containing the comparison results.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmpeq_epi32(__m128i __a,
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cmpeq_epi32(__m128i __a,
__m128i __b) {
return (__m128i)((__v4si)__a == (__v4si)__b);
}
@@ -3148,7 +3148,7 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmpeq_epi32(__m128i __a,
/// \param __b
/// A 128-bit integer vector.
/// \returns A 128-bit integer vector containing the comparison results.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmpgt_epi8(__m128i __a,
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cmpgt_epi8(__m128i __a,
__m128i __b) {
/* This function always performs a signed comparison, but __v16qi is a char
which may be signed or unsigned, so use __v16qs. */
@@ -3170,7 +3170,7 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmpgt_epi8(__m128i __a,
/// \param __b
/// A 128-bit integer vector.
/// \returns A 128-bit integer vector containing the comparison results.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmpgt_epi16(__m128i __a,
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cmpgt_epi16(__m128i __a,
__m128i __b) {
return (__m128i)((__v8hi)__a > (__v8hi)__b);
}
@@ -3190,7 +3190,7 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmpgt_epi16(__m128i __a,
/// \param __b
/// A 128-bit integer vector.
/// \returns A 128-bit integer vector containing the comparison results.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmpgt_epi32(__m128i __a,
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cmpgt_epi32(__m128i __a,
__m128i __b) {
return (__m128i)((__v4si)__a > (__v4si)__b);
}
@@ -3210,7 +3210,7 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmpgt_epi32(__m128i __a,
/// \param __b
/// A 128-bit integer vector.
/// \returns A 128-bit integer vector containing the comparison results.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmplt_epi8(__m128i __a,
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cmplt_epi8(__m128i __a,
__m128i __b) {
return _mm_cmpgt_epi8(__b, __a);
}
@@ -3230,7 +3230,7 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmplt_epi8(__m128i __a,
/// \param __b
/// A 128-bit integer vector.
/// \returns A 128-bit integer vector containing the comparison results.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmplt_epi16(__m128i __a,
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cmplt_epi16(__m128i __a,
__m128i __b) {
return _mm_cmpgt_epi16(__b, __a);
}
@@ -3250,7 +3250,7 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmplt_epi16(__m128i __a,
/// \param __b
/// A 128-bit integer vector.
/// \returns A 128-bit integer vector containing the comparison results.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmplt_epi32(__m128i __a,
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cmplt_epi32(__m128i __a,
__m128i __b) {
return _mm_cmpgt_epi32(__b, __a);
}
diff --git a/clang/lib/Headers/smmintrin.h b/clang/lib/Headers/smmintrin.h
index 57d0d329312af..3f44c786fb75f 100644
--- a/clang/lib/Headers/smmintrin.h
+++ b/clang/lib/Headers/smmintrin.h
@@ -1211,7 +1211,7 @@ static __inline__ int __DEFAULT_FN_ATTRS _mm_testnzc_si128(__m128i __M,
/// \param __V2
/// A 128-bit integer vector.
/// \returns A 128-bit integer vector containing the comparison results.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmpeq_epi64(__m128i __V1,
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cmpeq_epi64(__m128i __V1,
__m128i __V2) {
return (__m128i)((__v2di)__V1 == (__v2di)__V2);
}
@@ -2338,7 +2338,7 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_minpos_epu16(__m128i __V) {
/// \param __V2
/// A 128-bit integer vector.
/// \returns A 128-bit integer vector containing the comparison results.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmpgt_epi64(__m128i __V1,
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cmpgt_epi64(__m128i __V1,
__m128i __V2) {
return (__m128i)((__v2di)__V1 > (__v2di)__V2);
}
diff --git a/clang/test/CodeGen/X86/avx2-builtins.c b/clang/test/CodeGen/X86/avx2-builtins.c
index 49e35230ba225..84a4db9695b88 100644
--- a/clang/test/CodeGen/X86/avx2-builtins.c
+++ b/clang/test/CodeGen/X86/avx2-builtins.c
@@ -287,48 +287,56 @@ __m256i test_mm256_cmpeq_epi8(__m256i a, __m256i b) {
// CHECK: icmp eq <32 x i8>
return _mm256_cmpeq_epi8(a, b);
}
+TEST_CONSTEXPR(match_v32qi(_mm256_cmpeq_epi8((__m256i)(__v32qi){1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32}, (__m256i)(__v32qi){10,2,6,4,5,12,14,8,9,20,22,12,26,14,30,16,17,36,38,20,42,22,46,24,50,26,54,28,58,30,62,32}), 0,-1,0,-1,-1,0,0,-1,-1,0,0,-1,0,-1,0,-1,-1,0,0,-1,0,-1,0,-1,0,-1,0,-1,0,-1,0,-1));
__m256i test_mm256_cmpeq_epi16(__m256i a, __m256i b) {
// CHECK-LABEL: test_mm256_cmpeq_epi16
// CHECK: icmp eq <16 x i16>
return _mm256_cmpeq_epi16(a, b);
}
+TEST_CONSTEXPR(match_v16hi(_mm256_cmpeq_epi16((__m256i)(__v16hi){+1, -2, +3, -4, +5, -6, +7, -8, +9, -10, +11, -12, +13, -14, +15, -16}, (__m256i)(__v16hi){-10, -2, +6, -4, +5, -12, +14, -8, +9, -20, +22, -12, +26, -14, +30, -16}), 0, -1, 0, -1, -1, 0, 0, -1, -1, 0, 0, -1, 0, -1, 0, -1));
__m256i test_mm256_cmpeq_epi32(__m256i a, __m256i b) {
// CHECK-LABEL: test_mm256_cmpeq_epi32
// CHECK: icmp eq <8 x i32>
return _mm256_cmpeq_epi32(a, b);
}
+TEST_CONSTEXPR(match_v8si(_mm256_cmpeq_epi32((__m256i)(__v8si){+1, -2, +3, -4, +5, -6, +7, -8}, (__m256i)(__v8si){-10, -2, +6, -4, +5, -12, +14, -8}), 0, -1, 0, -1, -1, 0, 0, -1));
__m256i test_mm256_cmpeq_epi64(__m256i a, __m256i b) {
// CHECK-LABEL: test_mm256_cmpeq_epi64
// CHECK: icmp eq <4 x i64>
return _mm256_cmpeq_epi64(a, b);
}
+TEST_CONSTEXPR(match_v4di(_mm256_cmpeq_epi64((__m256i)(__v4di){+1, -2, +3, -4}, (__m256i)(__v4di){-10, -2, +6, -4}), 0, -1, 0, -1));
__m256i test_mm256_cmpgt_epi8(__m256i a, __m256i b) {
// CHECK-LABEL: test_mm256_cmpgt_epi8
// CHECK: icmp sgt <32 x i8>
return _mm256_cmpgt_epi8(a, b);
}
+TEST_CONSTEXPR(match_v32qi(_mm256_cmpgt_epi8((__m256i)(__v32qi){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, (__m256i)(__v32qi){10, 2, 6, 5, 30, 7, 8, 1, 20, 3, 12, 8, 25, 10, 9, 2, 10, 2, 6, 5, 30, 7, 8, 1, 20, 3, 12, 8, 25, 10, 9, 2}), 0, 0, 0, 0, 0, 0, 0, -1, 0, -1, 0, -1, 0, -1, -1, -1, 0, 0, 0, 0, 0, 0, 0, -1, 0, -1, 0, -1, 0, -1, -1, -1));
__m256i test_mm256_cmpgt_epi16(__m256i a, __m256i b) {
// CHECK-LABEL: test_mm256_cmpgt_epi16
// CHECK: icmp sgt <16 x i16>
return _mm256_cmpgt_epi16(a, b);
}
+TEST_CONSTEXPR(match_v16hi(_mm256_cmpgt_epi16((__m256i)(__v16hi){+1, -2, +3, -4, +5, -6, +7, -8, +1, -2, +3, -4, +5, -6, +7, -8}, (__m256i)(__v16hi){-10, -2, +6, -5, +30, -7, +8, -1, -10, -2, +6, -5, +30, -7, +8, -1}), -1, 0, 0, -1, 0, -1, 0, 0, -1, 0, 0, -1, 0, -1, 0, 0));
__m256i test_mm256_cmpgt_epi32(__m256i a, __m256i b) {
// CHECK-LABEL: test_mm256_cmpgt_epi32
// CHECK: icmp sgt <8 x i32>
return _mm256_cmpgt_epi32(a, b);
}
+TEST_CONSTEXPR(match_v8si(_mm256_cmpgt_epi32((__m256i)(__v8si){+1, -2, +3, -4, +5, -6, +7, -8}, (__m256i)(__v8si){-10, -2, +6, -5, +30, -7, +8, -1}), -1, 0, 0, -1, 0, -1, 0, 0));
__m256i test_mm256_cmpgt_epi64(__m256i a, __m256i b) {
// CHECK-LABEL: test_mm256_cmpgt_epi64
// CHECK: icmp sgt <4 x i64>
return _mm256_cmpgt_epi64(a, b);
}
+TEST_CONSTEXPR(match_v4di(_mm256_cmpgt_epi64((__m256i)(__v4di){+1, -2, +3, -4}, (__m256i)(__v4di){-10, -2, +6, -5}), -1, 0, 0, -1));
__m256i test_mm256_cvtepi8_epi16(__m128i a) {
// CHECK-LABEL: test_mm256_cvtepi8_epi16
diff --git a/clang/test/CodeGen/X86/sse2-builtins.c b/clang/test/CodeGen/X86/sse2-builtins.c
index 38d5e877a5036..a578f04c97c71 100644
--- a/clang/test/CodeGen/X86/sse2-builtins.c
+++ b/clang/test/CodeGen/X86/sse2-builtins.c
@@ -243,18 +243,21 @@ __m128i test_mm_cmpeq_epi8(__m128i A, __m128i B) {
// CHECK: icmp eq <16 x i8>
return _mm_cmpeq_epi8(A, B);
}
+TEST_CONSTEXPR(match_v16qi(_mm_cmpeq_epi8((__m128i)(__v16qi){1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16}, (__m128i)(__v16qi){10,2,6,4,5,12,14,8,9,20,22,12,26,14,30,16}), 0,-1,0,-1,-1,0,0,-1,-1,0,0,-1,0,-1,0,-1));
__m128i test_mm_cmpeq_epi16(__m128i A, __m128i B) {
// CHECK-LABEL: test_mm_cmpeq_epi16
// CHECK: icmp eq <8 x i16>
return _mm_cmpeq_epi16(A, B);
}
+TEST_CONSTEXPR(match_v8hi(_mm_cmpeq_epi16((__m128i)(__v8hi){+1, -2, +3, -4, +5, -6, +7, -8}, (__m128i)(__v8hi){-10, -2, +6, -4, +5, -12, +14, -8}), 0, -1, 0, -1, -1, 0, 0, -1));
__m128i test_mm_cmpeq_epi32(__m128i A, __m128i B) {
// CHECK-LABEL: test_mm_cmpeq_epi32
// CHECK: icmp eq <4 x i32>
return _mm_cmpeq_epi32(A, B);
}
+TEST_CONSTEXPR(match_v4si(_mm_cmpeq_epi32((__m128i)(__v4si){+1, -2, +3, -4}, (__m128i)(__v4si){-10, -2, +6, -4}), 0, -1, 0, -1));
__m128d test_mm_cmpeq_pd(__m128d A, __m128d B) {
// CHECK-LABEL: test_mm_cmpeq_pd
@@ -293,18 +296,21 @@ __m128i test_mm_cmpgt_epi8(__m128i A, __m128i B) {
// CHECK: icmp sgt <16 x i8>
return _mm_cmpgt_epi8(A, B);
}
+TEST_CONSTEXPR(match_v16qi(_mm_cmpgt_epi8((__m128i)(__v16qi){15,2,8,4,12,6,20,8,25,10,30,12,35,14,40,16}, (__m128i)(__v16qi){10,2,6,4,5,12,14,8,9,20,22,12,26,14,30,16}), -1,0,-1,0,-1,0,-1,0,-1,0,-1,0,-1,0,-1,0));
__m128i test_mm_cmpgt_epi16(__m128i A, __m128i B) {
// CHECK-LABEL: test_mm_cmpgt_epi16
// CHECK: icmp sgt <8 x i16>
return _mm_cmpgt_epi16(A, B);
}
+TEST_CONSTEXPR(match_v8hi(_mm_cmpgt_epi16((__m128i)(__v8hi){15,2,8,4,12,6,20,8}, (__m128i)(__v8hi){10,2,6,4,5,12,14,8}), -1,0,-1,0,-1,0,-1,0));
__m128i test_mm_cmpgt_epi32(__m128i A, __m128i B) {
// CHECK-LABEL: test_mm_cmpgt_epi32
// CHECK: icmp sgt <4 x i32>
return _mm_cmpgt_epi32(A, B);
}
+TEST_CONSTEXPR(match_v4si(_mm_cmpgt_epi32((__m128i)(__v4si){15,2,8,4}, (__m128i)(__v4si){10,2,6,4}), -1,0,-1,0));
__m128d test_mm_cmpgt_pd(__m128d A, __m128d B) {
// CHECK-LABEL: test_mm_cmpgt_pd
@@ -343,18 +349,21 @@ __m128i test_mm_cmplt_epi8(__m128i A, __m128i B) {
// CHECK: icmp sgt <16 x i8>
return _mm_cmplt_epi8(A, B);
}
+TEST_CONSTEXPR(match_v16qi(_mm_cmplt_epi8((__m128i)(__v16qi){5,2,3,4,1,6,7,8,9,5,11,12,13,10,15,8}, (__m128i)(__v16qi){10,2,6,4,5,12,14,8,9,20,22,12,26,14,30,16}), -1, 0, -1, 0, -1, -1, -1, 0, 0, -1, -1, 0, -1, -1, -1, -1));
__m128i test_mm_cmplt_epi16(__m128i A, __m128i B) {
// CHECK-LABEL: test_mm_cmplt_epi16
// CHECK: icmp sgt <8 x i16>
return _mm_cmplt_epi16(A, B);
}
+TEST_CONSTEXPR(match_v8hi(_mm_cmplt_epi16((__m128i)(__v8hi){5,2,3,4,1,6,7,8}, (__m128i)(__v8hi){10,2,6,4,5,12,14,8}), -1, 0, -1, 0, -1, -1, -1, 0));
__m128i test_mm_cmplt_epi32(__m128i A, __m128i B) {
// CHECK-LABEL: test_mm_cmplt_epi32
// CHECK: icmp sgt <4 x i32>
return _mm_cmplt_epi32(A, B);
}
+TEST_CONSTEXPR(match_v4si(_mm_cmplt_epi32((__m128i)(__v4si){5,2,3,4}, (__m128i)(__v4si){10,2,6,4}), -1,0,-1,0));
__m128d test_mm_cmplt_pd(__m128d A, __m128d B) {
// CHECK-LABEL: test_mm_cmplt_pd
diff --git a/clang/test/CodeGen/X86/sse41-builtins.c b/clang/test/CodeGen/X86/sse41-builtins.c
index 500b780d49057..4f9f1530ce427 100644
--- a/clang/test/CodeGen/X86/sse41-builtins.c
+++ b/clang/test/CodeGen/X86/sse41-builtins.c
@@ -79,6 +79,7 @@ __m128i test_mm_cmpeq_epi64(__m128i A, __m128i B) {
// CHECK: sext <2 x i1> %{{.*}} to <2 x i64>
return _mm_cmpeq_epi64(A, B);
}
+TEST_CONSTEXPR(match_v2di(_mm_cmpeq_epi64((__m128i)(__v2di){+1, -8}, (__m128i)(__v2di){-10, -8}), 0, -1));
__m128i test_mm_cvtepi8_epi16(__m128i a) {
// CHECK-LABEL: test_mm_cvtepi8_epi16
diff --git a/clang/test/CodeGen/X86/sse42-builtins.c b/clang/test/CodeGen/X86/sse42-builtins.c
index d0c0cce33e1d0..aa598b8f78069 100644
--- a/clang/test/CodeGen/X86/sse42-builtins.c
+++ b/clang/test/CodeGen/X86/sse42-builtins.c
@@ -9,6 +9,7 @@
#include <immintrin.h>
+#include "builtin_test_helpers.h"
// NOTE: This should match the tests in llvm/test/CodeGen/X86/sse42-intrinsics-fast-isel.ll
@@ -59,6 +60,7 @@ __m128i test_mm_cmpgt_epi64(__m128i A, __m128i B) {
// CHECK: icmp sgt <2 x i64>
return _mm_cmpgt_epi64(A, B);
}
+TEST_CONSTEXPR(match_v2di(_mm_cmpgt_epi64((__m128i)(__v2di){+1, -8}, (__m128i)(__v2di){-10, -8}), -1, 0));
int test_mm_cmpistra(__m128i A, __m128i B) {
// CHECK-LABEL: test_mm_cmpistra
>From 75fc7f029a1d8e1825a1b81365f947fcff36d07b Mon Sep 17 00:00:00 2001
From: smoke-y <SnowSneeze at protonmail.com>
Date: Thu, 28 Aug 2025 16:41:55 +0530
Subject: [PATCH 2/7] qi -> qs
---
clang/test/CodeGen/X86/avx2-builtins.c | 5 ++++-
clang/test/CodeGen/X86/sse2-builtins.c | 2 +-
2 files changed, 5 insertions(+), 2 deletions(-)
diff --git a/clang/test/CodeGen/X86/avx2-builtins.c b/clang/test/CodeGen/X86/avx2-builtins.c
index 84a4db9695b88..7e25da35020c5 100644
--- a/clang/test/CodeGen/X86/avx2-builtins.c
+++ b/clang/test/CodeGen/X86/avx2-builtins.c
@@ -287,7 +287,10 @@ __m256i test_mm256_cmpeq_epi8(__m256i a, __m256i b) {
// CHECK: icmp eq <32 x i8>
return _mm256_cmpeq_epi8(a, b);
}
-TEST_CONSTEXPR(match_v32qi(_mm256_cmpeq_epi8((__m256i)(__v32qi){1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32}, (__m256i)(__v32qi){10,2,6,4,5,12,14,8,9,20,22,12,26,14,30,16,17,36,38,20,42,22,46,24,50,26,54,28,58,30,62,32}), 0,-1,0,-1,-1,0,0,-1,-1,0,0,-1,0,-1,0,-1,-1,0,0,-1,0,-1,0,-1,0,-1,0,-1,0,-1,0,-1));
+TEST_CONSTEXPR(match_v16qi(_mm_cmpeq_epi8(
+ (__m128i)(__v16qs){1,-2,3,-4,-5,6,-7,8,-9,10,-11,12,-13,14,-15,16},
+ (__m128i)(__v16qs){10,-2,6,-4,-5,12,-14,8,-9,20,-22,12,-26,14,-30,16}),
+ 0,-1,0,-1,-1,0,0,-1,-1,0,0,-1,0,-1,0,-1));
__m256i test_mm256_cmpeq_epi16(__m256i a, __m256i b) {
// CHECK-LABEL: test_mm256_cmpeq_epi16
diff --git a/clang/test/CodeGen/X86/sse2-builtins.c b/clang/test/CodeGen/X86/sse2-builtins.c
index a578f04c97c71..17030d0b7920e 100644
--- a/clang/test/CodeGen/X86/sse2-builtins.c
+++ b/clang/test/CodeGen/X86/sse2-builtins.c
@@ -243,7 +243,7 @@ __m128i test_mm_cmpeq_epi8(__m128i A, __m128i B) {
// CHECK: icmp eq <16 x i8>
return _mm_cmpeq_epi8(A, B);
}
-TEST_CONSTEXPR(match_v16qi(_mm_cmpeq_epi8((__m128i)(__v16qi){1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16}, (__m128i)(__v16qi){10,2,6,4,5,12,14,8,9,20,22,12,26,14,30,16}), 0,-1,0,-1,-1,0,0,-1,-1,0,0,-1,0,-1,0,-1));
+TEST_CONSTEXPR(match_v16qi(_mm_cmpeq_epi8((__m128i)(__v16qs){1,-2,3,-4,-5,6,-7,8,-9,10,-11,12,-13,14,-15,16}, (__m128i)(__v16qs){10,-2,6,-4,-5,12,-14,8,-9,20,-22,12,-26,14,-30,16}), 0,-1,0,-1,-1,0,0,-1,-1,0,0,-1,0,-1,0,-1));
__m128i test_mm_cmpeq_epi16(__m128i A, __m128i B) {
// CHECK-LABEL: test_mm_cmpeq_epi16
>From 5b8a7f11ee2f454ca52c47ad3c19267ff58748ee Mon Sep 17 00:00:00 2001
From: smoke-y <SnowSneeze at protonmail.com>
Date: Thu, 28 Aug 2025 16:53:55 +0530
Subject: [PATCH 3/7] clang-format on avx2, emmintrin, smmintrin
---
clang/lib/Headers/avx2intrin.h | 852 +++++++++++++++------------------
clang/lib/Headers/emmintrin.h | 36 +-
clang/lib/Headers/smmintrin.h | 8 +-
3 files changed, 396 insertions(+), 500 deletions(-)
diff --git a/clang/lib/Headers/avx2intrin.h b/clang/lib/Headers/avx2intrin.h
index 05bd15385d149..7d617e519d7b9 100644
--- a/clang/lib/Headers/avx2intrin.h
+++ b/clang/lib/Headers/avx2intrin.h
@@ -97,8 +97,8 @@
/// An unsigned immediate value specifying the starting positions of the
/// bytes to operate on.
/// \returns A 256-bit vector of [16 x i16] containing the result.
-#define _mm256_mpsadbw_epu8(X, Y, M) \
- ((__m256i)__builtin_ia32_mpsadbw256((__v32qi)(__m256i)(X), \
+#define _mm256_mpsadbw_epu8(X, Y, M) \
+ ((__m256i)__builtin_ia32_mpsadbw256((__v32qi)(__m256i)(X), \
(__v32qi)(__m256i)(Y), (int)(M)))
/// Computes the absolute value of each signed byte in the 256-bit integer
@@ -112,10 +112,8 @@
/// \param __a
/// A 256-bit integer vector.
/// \returns A 256-bit integer vector containing the result.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_abs_epi8(__m256i __a)
-{
- return (__m256i)__builtin_elementwise_abs((__v32qs)__a);
+static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_abs_epi8(__m256i __a) {
+ return (__m256i)__builtin_elementwise_abs((__v32qs)__a);
}
/// Computes the absolute value of each signed 16-bit element in the 256-bit
@@ -129,10 +127,8 @@ _mm256_abs_epi8(__m256i __a)
/// \param __a
/// A 256-bit vector of [16 x i16].
/// \returns A 256-bit vector of [16 x i16] containing the result.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_abs_epi16(__m256i __a)
-{
- return (__m256i)__builtin_elementwise_abs((__v16hi)__a);
+static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_abs_epi16(__m256i __a) {
+ return (__m256i)__builtin_elementwise_abs((__v16hi)__a);
}
/// Computes the absolute value of each signed 32-bit element in the 256-bit
@@ -146,10 +142,8 @@ _mm256_abs_epi16(__m256i __a)
/// \param __a
/// A 256-bit vector of [8 x i32].
/// \returns A 256-bit vector of [8 x i32] containing the result.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_abs_epi32(__m256i __a)
-{
- return (__m256i)__builtin_elementwise_abs((__v8si)__a);
+static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_abs_epi32(__m256i __a) {
+ return (__m256i)__builtin_elementwise_abs((__v8si)__a);
}
/// Converts the elements of two 256-bit vectors of [16 x i16] to 8-bit
@@ -178,8 +172,7 @@ _mm256_abs_epi32(__m256i __a)
/// result[255:192].
/// \returns A 256-bit integer vector containing the result.
static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_packs_epi16(__m256i __a, __m256i __b)
-{
+_mm256_packs_epi16(__m256i __a, __m256i __b) {
return (__m256i)__builtin_ia32_packsswb256((__v16hi)__a, (__v16hi)__b);
}
@@ -210,8 +203,7 @@ _mm256_packs_epi16(__m256i __a, __m256i __b)
/// result[255:192].
/// \returns A 256-bit vector of [16 x i16] containing the result.
static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_packs_epi32(__m256i __a, __m256i __b)
-{
+_mm256_packs_epi32(__m256i __a, __m256i __b) {
return (__m256i)__builtin_ia32_packssdw256((__v8si)__a, (__v8si)__b);
}
@@ -241,8 +233,7 @@ _mm256_packs_epi32(__m256i __a, __m256i __b)
/// result[255:192].
/// \returns A 256-bit integer vector containing the result.
static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_packus_epi16(__m256i __a, __m256i __b)
-{
+_mm256_packus_epi16(__m256i __a, __m256i __b) {
return (__m256i)__builtin_ia32_packuswb256((__v16hi)__a, (__v16hi)__b);
}
@@ -273,9 +264,8 @@ _mm256_packus_epi16(__m256i __a, __m256i __b)
/// result[255:192].
/// \returns A 256-bit vector of [16 x i16] containing the result.
static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_packus_epi32(__m256i __V1, __m256i __V2)
-{
- return (__m256i) __builtin_ia32_packusdw256((__v8si)__V1, (__v8si)__V2);
+_mm256_packus_epi32(__m256i __V1, __m256i __V2) {
+ return (__m256i)__builtin_ia32_packusdw256((__v8si)__V1, (__v8si)__V2);
}
/// Adds 8-bit integers from corresponding bytes of two 256-bit integer
@@ -291,9 +281,8 @@ _mm256_packus_epi32(__m256i __V1, __m256i __V2)
/// \param __b
/// A 256-bit integer vector containing one of the source operands.
/// \returns A 256-bit integer vector containing the sums.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_add_epi8(__m256i __a, __m256i __b)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_add_epi8(__m256i __a,
+ __m256i __b) {
return (__m256i)((__v32qu)__a + (__v32qu)__b);
}
@@ -310,9 +299,8 @@ _mm256_add_epi8(__m256i __a, __m256i __b)
/// \param __b
/// A 256-bit vector of [16 x i16] containing one of the source operands.
/// \returns A 256-bit vector of [16 x i16] containing the sums.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_add_epi16(__m256i __a, __m256i __b)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_add_epi16(__m256i __a,
+ __m256i __b) {
return (__m256i)((__v16hu)__a + (__v16hu)__b);
}
@@ -329,9 +317,8 @@ _mm256_add_epi16(__m256i __a, __m256i __b)
/// \param __b
/// A 256-bit vector of [8 x i32] containing one of the source operands.
/// \returns A 256-bit vector of [8 x i32] containing the sums.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_add_epi32(__m256i __a, __m256i __b)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_add_epi32(__m256i __a,
+ __m256i __b) {
return (__m256i)((__v8su)__a + (__v8su)__b);
}
@@ -348,9 +335,8 @@ _mm256_add_epi32(__m256i __a, __m256i __b)
/// \param __b
/// A 256-bit vector of [4 x i64] containing one of the source operands.
/// \returns A 256-bit vector of [4 x i64] containing the sums.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_add_epi64(__m256i __a, __m256i __b)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_add_epi64(__m256i __a,
+ __m256i __b) {
return (__m256i)((__v4du)__a + (__v4du)__b);
}
@@ -448,8 +434,8 @@ _mm256_adds_epu16(__m256i __a, __m256i __b) {
/// \param n
/// An immediate value specifying the number of bytes to shift.
/// \returns A 256-bit integer vector containing the result.
-#define _mm256_alignr_epi8(a, b, n) \
- ((__m256i)__builtin_ia32_palignr256((__v32qi)(__m256i)(a), \
+#define _mm256_alignr_epi8(a, b, n) \
+ ((__m256i)__builtin_ia32_palignr256((__v32qi)(__m256i)(a), \
(__v32qi)(__m256i)(b), (n)))
/// Computes the bitwise AND of the 256-bit integer vectors in \a __a and
@@ -465,8 +451,7 @@ _mm256_adds_epu16(__m256i __a, __m256i __b) {
/// A 256-bit integer vector.
/// \returns A 256-bit integer vector containing the result.
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
-_mm256_and_si256(__m256i __a, __m256i __b)
-{
+_mm256_and_si256(__m256i __a, __m256i __b) {
return (__m256i)((__v4du)__a & (__v4du)__b);
}
@@ -483,8 +468,7 @@ _mm256_and_si256(__m256i __a, __m256i __b)
/// A 256-bit integer vector.
/// \returns A 256-bit integer vector containing the result.
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
-_mm256_andnot_si256(__m256i __a, __m256i __b)
-{
+_mm256_andnot_si256(__m256i __a, __m256i __b) {
return (__m256i)(~(__v4du)__a & (__v4du)__b);
}
@@ -508,9 +492,8 @@ _mm256_andnot_si256(__m256i __a, __m256i __b)
/// \param __b
/// A 256-bit integer vector.
/// \returns A 256-bit integer vector containing the result.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_avg_epu8(__m256i __a, __m256i __b)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_avg_epu8(__m256i __a,
+ __m256i __b) {
return (__m256i)__builtin_ia32_pavgb256((__v32qi)__a, (__v32qi)__b);
}
@@ -534,9 +517,8 @@ _mm256_avg_epu8(__m256i __a, __m256i __b)
/// \param __b
/// A 256-bit vector of [16 x i16].
/// \returns A 256-bit vector of [16 x i16] containing the result.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_avg_epu16(__m256i __a, __m256i __b)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_avg_epu16(__m256i __a,
+ __m256i __b) {
return (__m256i)__builtin_ia32_pavgw256((__v16hi)__a, (__v16hi)__b);
}
@@ -570,10 +552,9 @@ _mm256_avg_epu16(__m256i __a, __m256i __b)
/// \a __V2.
/// \returns A 256-bit integer vector containing the result.
static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_blendv_epi8(__m256i __V1, __m256i __V2, __m256i __M)
-{
+_mm256_blendv_epi8(__m256i __V1, __m256i __V2, __m256i __M) {
return (__m256i)__builtin_ia32_pblendvb256((__v32qi)__V1, (__v32qi)__V2,
- (__v32qi)__M);
+ (__v32qi)__M);
}
/// Merges 16-bit integer values from either of the two 256-bit vectors
@@ -613,8 +594,8 @@ _mm256_blendv_epi8(__m256i __V1, __m256i __V2, __m256i __M)
/// \a M[0] determines the source for elements 0 and 8, \a M[1] for
/// elements 1 and 9, and so forth.
/// \returns A 256-bit vector of [16 x i16] containing the result.
-#define _mm256_blend_epi16(V1, V2, M) \
- ((__m256i)__builtin_ia32_pblendw256((__v16hi)(__m256i)(V1), \
+#define _mm256_blend_epi16(V1, V2, M) \
+ ((__m256i)__builtin_ia32_pblendw256((__v16hi)(__m256i)(V1), \
(__v16hi)(__m256i)(V2), (int)(M)))
/// Compares corresponding bytes in the 256-bit integer vectors in \a __a and
@@ -638,8 +619,7 @@ _mm256_blendv_epi8(__m256i __V1, __m256i __V2, __m256i __M)
/// A 256-bit integer vector containing one of the inputs.
/// \returns A 256-bit integer vector containing the result.
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
-_mm256_cmpeq_epi8(__m256i __a, __m256i __b)
-{
+_mm256_cmpeq_epi8(__m256i __a, __m256i __b) {
return (__m256i)((__v32qi)__a == (__v32qi)__b);
}
@@ -664,8 +644,7 @@ _mm256_cmpeq_epi8(__m256i __a, __m256i __b)
/// A 256-bit vector of [16 x i16] containing one of the inputs.
/// \returns A 256-bit vector of [16 x i16] containing the result.
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
-_mm256_cmpeq_epi16(__m256i __a, __m256i __b)
-{
+_mm256_cmpeq_epi16(__m256i __a, __m256i __b) {
return (__m256i)((__v16hi)__a == (__v16hi)__b);
}
@@ -690,8 +669,7 @@ _mm256_cmpeq_epi16(__m256i __a, __m256i __b)
/// A 256-bit vector of [8 x i32] containing one of the inputs.
/// \returns A 256-bit vector of [8 x i32] containing the result.
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
-_mm256_cmpeq_epi32(__m256i __a, __m256i __b)
-{
+_mm256_cmpeq_epi32(__m256i __a, __m256i __b) {
return (__m256i)((__v8si)__a == (__v8si)__b);
}
@@ -716,8 +694,7 @@ _mm256_cmpeq_epi32(__m256i __a, __m256i __b)
/// A 256-bit vector of [4 x i64] containing one of the inputs.
/// \returns A 256-bit vector of [4 x i64] containing the result.
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
-_mm256_cmpeq_epi64(__m256i __a, __m256i __b)
-{
+_mm256_cmpeq_epi64(__m256i __a, __m256i __b) {
return (__m256i)((__v4di)__a == (__v4di)__b);
}
@@ -742,8 +719,7 @@ _mm256_cmpeq_epi64(__m256i __a, __m256i __b)
/// A 256-bit integer vector containing one of the inputs.
/// \returns A 256-bit integer vector containing the result.
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
-_mm256_cmpgt_epi8(__m256i __a, __m256i __b)
-{
+_mm256_cmpgt_epi8(__m256i __a, __m256i __b) {
/* This function always performs a signed comparison, but __v32qi is a char
which may be signed or unsigned, so use __v32qs. */
return (__m256i)((__v32qs)__a > (__v32qs)__b);
@@ -770,8 +746,7 @@ _mm256_cmpgt_epi8(__m256i __a, __m256i __b)
/// A 256-bit vector of [16 x i16] containing one of the inputs.
/// \returns A 256-bit vector of [16 x i16] containing the result.
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
-_mm256_cmpgt_epi16(__m256i __a, __m256i __b)
-{
+_mm256_cmpgt_epi16(__m256i __a, __m256i __b) {
return (__m256i)((__v16hi)__a > (__v16hi)__b);
}
@@ -796,8 +771,7 @@ _mm256_cmpgt_epi16(__m256i __a, __m256i __b)
/// A 256-bit vector of [8 x i32] containing one of the inputs.
/// \returns A 256-bit vector of [8 x i32] containing the result.
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
-_mm256_cmpgt_epi32(__m256i __a, __m256i __b)
-{
+_mm256_cmpgt_epi32(__m256i __a, __m256i __b) {
return (__m256i)((__v8si)__a > (__v8si)__b);
}
@@ -822,8 +796,7 @@ _mm256_cmpgt_epi32(__m256i __a, __m256i __b)
/// A 256-bit vector of [4 x i64] containing one of the inputs.
/// \returns A 256-bit vector of [4 x i64] containing the result.
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
-_mm256_cmpgt_epi64(__m256i __a, __m256i __b)
-{
+_mm256_cmpgt_epi64(__m256i __a, __m256i __b) {
return (__m256i)((__v4di)__a > (__v4di)__b);
}
@@ -857,10 +830,9 @@ _mm256_cmpgt_epi64(__m256i __a, __m256i __b)
/// \param __b
/// A 256-bit vector of [16 x i16] containing one of the source operands.
/// \returns A 256-bit vector of [16 x i16] containing the sums.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_hadd_epi16(__m256i __a, __m256i __b)
-{
- return (__m256i)__builtin_ia32_phaddw256((__v16hi)__a, (__v16hi)__b);
+static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_hadd_epi16(__m256i __a,
+ __m256i __b) {
+ return (__m256i)__builtin_ia32_phaddw256((__v16hi)__a, (__v16hi)__b);
}
/// Horizontally adds the adjacent pairs of 32-bit integers from two 256-bit
@@ -889,10 +861,9 @@ _mm256_hadd_epi16(__m256i __a, __m256i __b)
/// \param __b
/// A 256-bit vector of [8 x i32] containing one of the source operands.
/// \returns A 256-bit vector of [8 x i32] containing the sums.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_hadd_epi32(__m256i __a, __m256i __b)
-{
- return (__m256i)__builtin_ia32_phaddd256((__v8si)__a, (__v8si)__b);
+static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_hadd_epi32(__m256i __a,
+ __m256i __b) {
+ return (__m256i)__builtin_ia32_phaddd256((__v8si)__a, (__v8si)__b);
}
/// Horizontally adds the adjacent pairs of 16-bit integers from two 256-bit
@@ -925,9 +896,8 @@ _mm256_hadd_epi32(__m256i __a, __m256i __b)
/// A 256-bit vector of [16 x i16] containing one of the source operands.
/// \returns A 256-bit vector of [16 x i16] containing the sums.
static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_hadds_epi16(__m256i __a, __m256i __b)
-{
- return (__m256i)__builtin_ia32_phaddsw256((__v16hi)__a, (__v16hi)__b);
+_mm256_hadds_epi16(__m256i __a, __m256i __b) {
+ return (__m256i)__builtin_ia32_phaddsw256((__v16hi)__a, (__v16hi)__b);
}
/// Horizontally subtracts adjacent pairs of 16-bit integers from two 256-bit
@@ -960,10 +930,9 @@ _mm256_hadds_epi16(__m256i __a, __m256i __b)
/// \param __b
/// A 256-bit vector of [16 x i16] containing one of the source operands.
/// \returns A 256-bit vector of [16 x i16] containing the differences.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_hsub_epi16(__m256i __a, __m256i __b)
-{
- return (__m256i)__builtin_ia32_phsubw256((__v16hi)__a, (__v16hi)__b);
+static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_hsub_epi16(__m256i __a,
+ __m256i __b) {
+ return (__m256i)__builtin_ia32_phsubw256((__v16hi)__a, (__v16hi)__b);
}
/// Horizontally subtracts adjacent pairs of 32-bit integers from two 256-bit
@@ -992,10 +961,9 @@ _mm256_hsub_epi16(__m256i __a, __m256i __b)
/// \param __b
/// A 256-bit vector of [8 x i32] containing one of the source operands.
/// \returns A 256-bit vector of [8 x i32] containing the differences.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_hsub_epi32(__m256i __a, __m256i __b)
-{
- return (__m256i)__builtin_ia32_phsubd256((__v8si)__a, (__v8si)__b);
+static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_hsub_epi32(__m256i __a,
+ __m256i __b) {
+ return (__m256i)__builtin_ia32_phsubd256((__v8si)__a, (__v8si)__b);
}
/// Horizontally subtracts adjacent pairs of 16-bit integers from two 256-bit
@@ -1029,9 +997,8 @@ _mm256_hsub_epi32(__m256i __a, __m256i __b)
/// A 256-bit vector of [16 x i16] containing one of the source operands.
/// \returns A 256-bit vector of [16 x i16] containing the differences.
static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_hsubs_epi16(__m256i __a, __m256i __b)
-{
- return (__m256i)__builtin_ia32_phsubsw256((__v16hi)__a, (__v16hi)__b);
+_mm256_hsubs_epi16(__m256i __a, __m256i __b) {
+ return (__m256i)__builtin_ia32_phsubsw256((__v16hi)__a, (__v16hi)__b);
}
/// Multiplies each unsigned byte from the 256-bit integer vector in \a __a
@@ -1059,9 +1026,8 @@ _mm256_hsubs_epi16(__m256i __a, __m256i __b)
/// A 256-bit vector containing one of the source operands.
/// \returns A 256-bit vector of [16 x i16] containing the result.
static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_maddubs_epi16(__m256i __a, __m256i __b)
-{
- return (__m256i)__builtin_ia32_pmaddubsw256((__v32qi)__a, (__v32qi)__b);
+_mm256_maddubs_epi16(__m256i __a, __m256i __b) {
+ return (__m256i)__builtin_ia32_pmaddubsw256((__v32qi)__a, (__v32qi)__b);
}
/// Multiplies corresponding 16-bit elements of two 256-bit vectors of
@@ -1090,9 +1056,8 @@ _mm256_maddubs_epi16(__m256i __a, __m256i __b)
/// \param __b
/// A 256-bit vector of [16 x i16] containing one of the source operands.
/// \returns A 256-bit vector of [8 x i32] containing the result.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_madd_epi16(__m256i __a, __m256i __b)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_madd_epi16(__m256i __a,
+ __m256i __b) {
return (__m256i)__builtin_ia32_pmaddwd256((__v16hi)__a, (__v16hi)__b);
}
@@ -1109,9 +1074,8 @@ _mm256_madd_epi16(__m256i __a, __m256i __b)
/// \param __b
/// A 256-bit integer vector.
/// \returns A 256-bit integer vector containing the result.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_max_epi8(__m256i __a, __m256i __b)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_max_epi8(__m256i __a,
+ __m256i __b) {
return (__m256i)__builtin_elementwise_max((__v32qs)__a, (__v32qs)__b);
}
@@ -1128,9 +1092,8 @@ _mm256_max_epi8(__m256i __a, __m256i __b)
/// \param __b
/// A 256-bit vector of [16 x i16].
/// \returns A 256-bit vector of [16 x i16] containing the result.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_max_epi16(__m256i __a, __m256i __b)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_max_epi16(__m256i __a,
+ __m256i __b) {
return (__m256i)__builtin_elementwise_max((__v16hi)__a, (__v16hi)__b);
}
@@ -1147,9 +1110,8 @@ _mm256_max_epi16(__m256i __a, __m256i __b)
/// \param __b
/// A 256-bit vector of [8 x i32].
/// \returns A 256-bit vector of [8 x i32] containing the result.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_max_epi32(__m256i __a, __m256i __b)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_max_epi32(__m256i __a,
+ __m256i __b) {
return (__m256i)__builtin_elementwise_max((__v8si)__a, (__v8si)__b);
}
@@ -1166,9 +1128,8 @@ _mm256_max_epi32(__m256i __a, __m256i __b)
/// \param __b
/// A 256-bit integer vector.
/// \returns A 256-bit integer vector containing the result.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_max_epu8(__m256i __a, __m256i __b)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_max_epu8(__m256i __a,
+ __m256i __b) {
return (__m256i)__builtin_elementwise_max((__v32qu)__a, (__v32qu)__b);
}
@@ -1185,9 +1146,8 @@ _mm256_max_epu8(__m256i __a, __m256i __b)
/// \param __b
/// A 256-bit vector of [16 x i16].
/// \returns A 256-bit vector of [16 x i16] containing the result.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_max_epu16(__m256i __a, __m256i __b)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_max_epu16(__m256i __a,
+ __m256i __b) {
return (__m256i)__builtin_elementwise_max((__v16hu)__a, (__v16hu)__b);
}
@@ -1204,9 +1164,8 @@ _mm256_max_epu16(__m256i __a, __m256i __b)
/// \param __b
/// A 256-bit vector of [8 x i32].
/// \returns A 256-bit vector of [8 x i32] containing the result.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_max_epu32(__m256i __a, __m256i __b)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_max_epu32(__m256i __a,
+ __m256i __b) {
return (__m256i)__builtin_elementwise_max((__v8su)__a, (__v8su)__b);
}
@@ -1223,9 +1182,8 @@ _mm256_max_epu32(__m256i __a, __m256i __b)
/// \param __b
/// A 256-bit integer vector.
/// \returns A 256-bit integer vector containing the result.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_min_epi8(__m256i __a, __m256i __b)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_min_epi8(__m256i __a,
+ __m256i __b) {
return (__m256i)__builtin_elementwise_min((__v32qs)__a, (__v32qs)__b);
}
@@ -1242,9 +1200,8 @@ _mm256_min_epi8(__m256i __a, __m256i __b)
/// \param __b
/// A 256-bit vector of [16 x i16].
/// \returns A 256-bit vector of [16 x i16] containing the result.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_min_epi16(__m256i __a, __m256i __b)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_min_epi16(__m256i __a,
+ __m256i __b) {
return (__m256i)__builtin_elementwise_min((__v16hi)__a, (__v16hi)__b);
}
@@ -1261,9 +1218,8 @@ _mm256_min_epi16(__m256i __a, __m256i __b)
/// \param __b
/// A 256-bit vector of [8 x i32].
/// \returns A 256-bit vector of [8 x i32] containing the result.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_min_epi32(__m256i __a, __m256i __b)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_min_epi32(__m256i __a,
+ __m256i __b) {
return (__m256i)__builtin_elementwise_min((__v8si)__a, (__v8si)__b);
}
@@ -1280,9 +1236,8 @@ _mm256_min_epi32(__m256i __a, __m256i __b)
/// \param __b
/// A 256-bit integer vector.
/// \returns A 256-bit integer vector containing the result.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_min_epu8(__m256i __a, __m256i __b)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_min_epu8(__m256i __a,
+ __m256i __b) {
return (__m256i)__builtin_elementwise_min((__v32qu)__a, (__v32qu)__b);
}
@@ -1299,9 +1254,8 @@ _mm256_min_epu8(__m256i __a, __m256i __b)
/// \param __b
/// A 256-bit vector of [16 x i16].
/// \returns A 256-bit vector of [16 x i16] containing the result.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_min_epu16(__m256i __a, __m256i __b)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_min_epu16(__m256i __a,
+ __m256i __b) {
return (__m256i)__builtin_elementwise_min((__v16hu)__a, (__v16hu)__b);
}
@@ -1318,9 +1272,8 @@ _mm256_min_epu16(__m256i __a, __m256i __b)
/// \param __b
/// A 256-bit vector of [8 x i32].
/// \returns A 256-bit vector of [8 x i32] containing the result.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_min_epu32(__m256i __a, __m256i __b)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_min_epu32(__m256i __a,
+ __m256i __b) {
return (__m256i)__builtin_elementwise_min((__v8su)__a, (__v8su)__b);
}
@@ -1341,9 +1294,7 @@ _mm256_min_epu32(__m256i __a, __m256i __b)
/// \param __a
/// A 256-bit integer vector containing the source bytes.
/// \returns The 32-bit integer mask.
-static __inline__ int __DEFAULT_FN_ATTRS256
-_mm256_movemask_epi8(__m256i __a)
-{
+static __inline__ int __DEFAULT_FN_ATTRS256 _mm256_movemask_epi8(__m256i __a) {
return __builtin_ia32_pmovmskb256((__v32qi)__a);
}
@@ -1371,7 +1322,7 @@ static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_cvtepi8_epi16(__m128i __V) {
/* This function always performs a signed extension, but __v16qi is a char
which may be signed or unsigned, so use __v16qs. */
- return (__m256i)__builtin_convertvector((__v16qs)__V, __v16hi);
+ return (__m256i) __builtin_convertvector((__v16qs)__V, __v16hi);
}
/// Sign-extends bytes from the lower half of the 128-bit integer vector in
@@ -1398,7 +1349,10 @@ static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_cvtepi8_epi32(__m128i __V) {
/* This function always performs a signed extension, but __v16qi is a char
which may be signed or unsigned, so use __v16qs. */
- return (__m256i)__builtin_convertvector(__builtin_shufflevector((__v16qs)__V, (__v16qs)__V, 0, 1, 2, 3, 4, 5, 6, 7), __v8si);
+ return (__m256i) __builtin_convertvector(
+ __builtin_shufflevector((__v16qs)__V, (__v16qs)__V, 0, 1, 2, 3, 4, 5, 6,
+ 7),
+ __v8si);
}
/// Sign-extends the first four bytes from the 128-bit integer vector in
@@ -1424,7 +1378,8 @@ static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_cvtepi8_epi64(__m128i __V) {
/* This function always performs a signed extension, but __v16qi is a char
which may be signed or unsigned, so use __v16qs. */
- return (__m256i)__builtin_convertvector(__builtin_shufflevector((__v16qs)__V, (__v16qs)__V, 0, 1, 2, 3), __v4di);
+ return (__m256i) __builtin_convertvector(
+ __builtin_shufflevector((__v16qs)__V, (__v16qs)__V, 0, 1, 2, 3), __v4di);
}
/// Sign-extends 16-bit elements from the 128-bit vector of [8 x i16] in
@@ -1449,7 +1404,7 @@ _mm256_cvtepi8_epi64(__m128i __V) {
/// values.
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_cvtepi16_epi32(__m128i __V) {
- return (__m256i)__builtin_convertvector((__v8hi)__V, __v8si);
+ return (__m256i) __builtin_convertvector((__v8hi)__V, __v8si);
}
/// Sign-extends 16-bit elements from the lower half of the 128-bit vector of
@@ -1473,7 +1428,8 @@ _mm256_cvtepi16_epi32(__m128i __V) {
/// values.
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_cvtepi16_epi64(__m128i __V) {
- return (__m256i)__builtin_convertvector(__builtin_shufflevector((__v8hi)__V, (__v8hi)__V, 0, 1, 2, 3), __v4di);
+ return (__m256i) __builtin_convertvector(
+ __builtin_shufflevector((__v8hi)__V, (__v8hi)__V, 0, 1, 2, 3), __v4di);
}
/// Sign-extends 32-bit elements from the 128-bit vector of [4 x i32] in
@@ -1497,7 +1453,7 @@ _mm256_cvtepi16_epi64(__m128i __V) {
/// values.
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_cvtepi32_epi64(__m128i __V) {
- return (__m256i)__builtin_convertvector((__v4si)__V, __v4di);
+ return (__m256i) __builtin_convertvector((__v4si)__V, __v4di);
}
/// Zero-extends bytes from the 128-bit integer vector in \a __V and returns
@@ -1522,7 +1478,7 @@ _mm256_cvtepi32_epi64(__m128i __V) {
/// values.
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_cvtepu8_epi16(__m128i __V) {
- return (__m256i)__builtin_convertvector((__v16qu)__V, __v16hi);
+ return (__m256i) __builtin_convertvector((__v16qu)__V, __v16hi);
}
/// Zero-extends bytes from the lower half of the 128-bit integer vector in
@@ -1547,7 +1503,10 @@ _mm256_cvtepu8_epi16(__m128i __V) {
/// values.
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_cvtepu8_epi32(__m128i __V) {
- return (__m256i)__builtin_convertvector(__builtin_shufflevector((__v16qu)__V, (__v16qu)__V, 0, 1, 2, 3, 4, 5, 6, 7), __v8si);
+ return (__m256i) __builtin_convertvector(
+ __builtin_shufflevector((__v16qu)__V, (__v16qu)__V, 0, 1, 2, 3, 4, 5, 6,
+ 7),
+ __v8si);
}
/// Zero-extends the first four bytes from the 128-bit integer vector in
@@ -1571,7 +1530,8 @@ _mm256_cvtepu8_epi32(__m128i __V) {
/// values.
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_cvtepu8_epi64(__m128i __V) {
- return (__m256i)__builtin_convertvector(__builtin_shufflevector((__v16qu)__V, (__v16qu)__V, 0, 1, 2, 3), __v4di);
+ return (__m256i) __builtin_convertvector(
+ __builtin_shufflevector((__v16qu)__V, (__v16qu)__V, 0, 1, 2, 3), __v4di);
}
/// Zero-extends 16-bit elements from the 128-bit vector of [8 x i16] in
@@ -1596,7 +1556,7 @@ _mm256_cvtepu8_epi64(__m128i __V) {
/// values.
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_cvtepu16_epi32(__m128i __V) {
- return (__m256i)__builtin_convertvector((__v8hu)__V, __v8si);
+ return (__m256i) __builtin_convertvector((__v8hu)__V, __v8si);
}
/// Zero-extends 16-bit elements from the lower half of the 128-bit vector of
@@ -1620,7 +1580,8 @@ _mm256_cvtepu16_epi32(__m128i __V) {
/// values.
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_cvtepu16_epi64(__m128i __V) {
- return (__m256i)__builtin_convertvector(__builtin_shufflevector((__v8hu)__V, (__v8hu)__V, 0, 1, 2, 3), __v4di);
+ return (__m256i) __builtin_convertvector(
+ __builtin_shufflevector((__v8hu)__V, (__v8hu)__V, 0, 1, 2, 3), __v4di);
}
/// Zero-extends 32-bit elements from the 128-bit vector of [4 x i32] in
@@ -1644,7 +1605,7 @@ _mm256_cvtepu16_epi64(__m128i __V) {
/// values.
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_cvtepu32_epi64(__m128i __V) {
- return (__m256i)__builtin_convertvector((__v4su)__V, __v4di);
+ return (__m256i) __builtin_convertvector((__v4su)__V, __v4di);
}
/// Multiplies signed 32-bit integers from even-numbered elements of two
@@ -1694,8 +1655,7 @@ _mm256_mul_epi32(__m256i __a, __m256i __b) {
/// A 256-bit vector of [16 x i16] containing one of the source operands.
/// \returns A 256-bit vector of [16 x i16] containing the rounded products.
static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_mulhrs_epi16(__m256i __a, __m256i __b)
-{
+_mm256_mulhrs_epi16(__m256i __a, __m256i __b) {
return (__m256i)__builtin_ia32_pmulhrsw256((__v16hi)__a, (__v16hi)__b);
}
@@ -1713,8 +1673,7 @@ _mm256_mulhrs_epi16(__m256i __a, __m256i __b)
/// A 256-bit vector of [16 x i16] containing one of the source operands.
/// \returns A 256-bit vector of [16 x i16] containing the products.
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
-_mm256_mulhi_epu16(__m256i __a, __m256i __b)
-{
+_mm256_mulhi_epu16(__m256i __a, __m256i __b) {
return (__m256i)__builtin_ia32_pmulhuw256((__v16hu)__a, (__v16hu)__b);
}
@@ -1732,8 +1691,7 @@ _mm256_mulhi_epu16(__m256i __a, __m256i __b)
/// A 256-bit vector of [16 x i16] containing one of the source operands.
/// \returns A 256-bit vector of [16 x i16] containing the products.
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
-_mm256_mulhi_epi16(__m256i __a, __m256i __b)
-{
+_mm256_mulhi_epi16(__m256i __a, __m256i __b) {
return (__m256i)__builtin_ia32_pmulhw256((__v16hi)__a, (__v16hi)__b);
}
@@ -1751,8 +1709,7 @@ _mm256_mulhi_epi16(__m256i __a, __m256i __b)
/// A 256-bit vector of [16 x i16] containing one of the source operands.
/// \returns A 256-bit vector of [16 x i16] containing the products.
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
-_mm256_mullo_epi16(__m256i __a, __m256i __b)
-{
+_mm256_mullo_epi16(__m256i __a, __m256i __b) {
return (__m256i)((__v16hu)__a * (__v16hu)__b);
}
@@ -1769,9 +1726,8 @@ _mm256_mullo_epi16(__m256i __a, __m256i __b)
/// \param __b
/// A 256-bit vector of [8 x i32] containing one of the source operands.
/// \returns A 256-bit vector of [8 x i32] containing the products.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_mullo_epi32 (__m256i __a, __m256i __b)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_mullo_epi32(__m256i __a, __m256i __b) {
return (__m256i)((__v8su)__a * (__v8su)__b);
}
@@ -1813,8 +1769,7 @@ _mm256_mul_epu32(__m256i __a, __m256i __b) {
/// A 256-bit integer vector.
/// \returns A 256-bit integer vector containing the result.
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
-_mm256_or_si256(__m256i __a, __m256i __b)
-{
+_mm256_or_si256(__m256i __a, __m256i __b) {
return (__m256i)((__v4du)__a | (__v4du)__b);
}
@@ -1857,9 +1812,8 @@ _mm256_or_si256(__m256i __a, __m256i __b)
/// \param __b
/// A 256-bit integer vector.
/// \returns A 256-bit integer vector containing the result.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_sad_epu8(__m256i __a, __m256i __b)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_sad_epu8(__m256i __a,
+ __m256i __b) {
return __builtin_ia32_psadbw256((__v32qi)__a, (__v32qi)__b);
}
@@ -1897,8 +1851,7 @@ _mm256_sad_epu8(__m256i __a, __m256i __b)
/// to copy to the result byte.
/// \returns A 256-bit integer vector containing the result.
static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_shuffle_epi8(__m256i __a, __m256i __b)
-{
+_mm256_shuffle_epi8(__m256i __a, __m256i __b) {
return (__m256i)__builtin_ia32_pshufb256((__v32qi)__a, (__v32qi)__b);
}
@@ -1932,7 +1885,7 @@ _mm256_shuffle_epi8(__m256i __a, __m256i __b)
/// result, \a imm[3:2] specifies the index for elements 1 and 5, and so
/// forth.
/// \returns A 256-bit vector of [8 x i32] containing the result.
-#define _mm256_shuffle_epi32(a, imm) \
+#define _mm256_shuffle_epi32(a, imm) \
((__m256i)__builtin_ia32_pshufd256((__v8si)(__m256i)(a), (int)(imm)))
/// Shuffles 16-bit integers from the 256-bit vector of [16 x i16] in \a a
@@ -1968,7 +1921,7 @@ _mm256_shuffle_epi8(__m256i __a, __m256i __b)
/// result, \a imm[3:2] specifies the index for elements 5 and 9, and so
/// forth. Indexes are offset by 4 (so 0 means index 4, and so forth).
/// \returns A 256-bit vector of [16 x i16] containing the result.
-#define _mm256_shufflehi_epi16(a, imm) \
+#define _mm256_shufflehi_epi16(a, imm) \
((__m256i)__builtin_ia32_pshufhw256((__v16hi)(__m256i)(a), (int)(imm)))
/// Shuffles 16-bit integers from the 256-bit vector of [16 x i16] \a a
@@ -2005,7 +1958,7 @@ _mm256_shuffle_epi8(__m256i __a, __m256i __b)
/// result, \a imm[3:2] specifies the index for elements 1 and 9, and so
/// forth.
/// \returns A 256-bit vector of [16 x i16] containing the result.
-#define _mm256_shufflelo_epi16(a, imm) \
+#define _mm256_shufflelo_epi16(a, imm) \
((__m256i)__builtin_ia32_pshuflw256((__v16hi)(__m256i)(a), (int)(imm)))
/// Sets each byte of the result to the corresponding byte of the 256-bit
@@ -2023,10 +1976,9 @@ _mm256_shuffle_epi8(__m256i __a, __m256i __b)
/// \param __b
/// A 256-bit integer vector].
/// \returns A 256-bit integer vector containing the result.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_sign_epi8(__m256i __a, __m256i __b)
-{
- return (__m256i)__builtin_ia32_psignb256((__v32qi)__a, (__v32qi)__b);
+static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_sign_epi8(__m256i __a,
+ __m256i __b) {
+ return (__m256i)__builtin_ia32_psignb256((__v32qi)__a, (__v32qi)__b);
}
/// Sets each element of the result to the corresponding element of the
@@ -2044,10 +1996,9 @@ _mm256_sign_epi8(__m256i __a, __m256i __b)
/// \param __b
/// A 256-bit vector of [16 x i16].
/// \returns A 256-bit vector of [16 x i16] containing the result.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_sign_epi16(__m256i __a, __m256i __b)
-{
- return (__m256i)__builtin_ia32_psignw256((__v16hi)__a, (__v16hi)__b);
+static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_sign_epi16(__m256i __a,
+ __m256i __b) {
+ return (__m256i)__builtin_ia32_psignw256((__v16hi)__a, (__v16hi)__b);
}
/// Sets each element of the result to the corresponding element of the
@@ -2065,10 +2016,9 @@ _mm256_sign_epi16(__m256i __a, __m256i __b)
/// \param __b
/// A 256-bit vector of [8 x i32].
/// \returns A 256-bit vector of [8 x i32] containing the result.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_sign_epi32(__m256i __a, __m256i __b)
-{
- return (__m256i)__builtin_ia32_psignd256((__v8si)__a, (__v8si)__b);
+static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_sign_epi32(__m256i __a,
+ __m256i __b) {
+ return (__m256i)__builtin_ia32_psignd256((__v8si)__a, (__v8si)__b);
}
/// Shifts each 128-bit half of the 256-bit integer vector \a a left by
@@ -2088,8 +2038,9 @@ _mm256_sign_epi32(__m256i __a, __m256i __b)
/// \param imm
/// An unsigned immediate value specifying the shift count (in bytes).
/// \returns A 256-bit integer vector containing the result.
-#define _mm256_slli_si256(a, imm) \
- ((__m256i)__builtin_ia32_pslldqi256_byteshift((__v4di)(__m256i)(a), (int)(imm)))
+#define _mm256_slli_si256(a, imm) \
+ ((__m256i)__builtin_ia32_pslldqi256_byteshift((__v4di)(__m256i)(a), \
+ (int)(imm)))
/// Shifts each 128-bit half of the 256-bit integer vector \a a left by
/// \a imm bytes, shifting in zero bytes, and returns the result. If \a imm
@@ -2108,8 +2059,9 @@ _mm256_sign_epi32(__m256i __a, __m256i __b)
/// \param imm
/// An unsigned immediate value specifying the shift count (in bytes).
/// \returns A 256-bit integer vector containing the result.
-#define _mm256_bslli_epi128(a, imm) \
- ((__m256i)__builtin_ia32_pslldqi256_byteshift((__v4di)(__m256i)(a), (int)(imm)))
+#define _mm256_bslli_epi128(a, imm) \
+ ((__m256i)__builtin_ia32_pslldqi256_byteshift((__v4di)(__m256i)(a), \
+ (int)(imm)))
/// Shifts each 16-bit element of the 256-bit vector of [16 x i16] in \a __a
/// left by \a __count bits, shifting in zero bits, and returns the result.
@@ -2124,9 +2076,8 @@ _mm256_sign_epi32(__m256i __a, __m256i __b)
/// \param __count
/// An unsigned integer value specifying the shift count (in bits).
/// \returns A 256-bit vector of [16 x i16] containing the result.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_slli_epi16(__m256i __a, int __count)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_slli_epi16(__m256i __a,
+ int __count) {
return (__m256i)__builtin_ia32_psllwi256((__v16hi)__a, __count);
}
@@ -2146,8 +2097,7 @@ _mm256_slli_epi16(__m256i __a, int __count)
/// shift count (in bits). The upper element is ignored.
/// \returns A 256-bit vector of [16 x i16] containing the result.
static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_sll_epi16(__m256i __a, __m128i __count)
-{
+_mm256_sll_epi16(__m256i __a, __m128i __count) {
return (__m256i)__builtin_ia32_psllw256((__v16hi)__a, (__v8hi)__count);
}
@@ -2164,9 +2114,8 @@ _mm256_sll_epi16(__m256i __a, __m128i __count)
/// \param __count
/// An unsigned integer value specifying the shift count (in bits).
/// \returns A 256-bit vector of [8 x i32] containing the result.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_slli_epi32(__m256i __a, int __count)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_slli_epi32(__m256i __a,
+ int __count) {
return (__m256i)__builtin_ia32_pslldi256((__v8si)__a, __count);
}
@@ -2186,8 +2135,7 @@ _mm256_slli_epi32(__m256i __a, int __count)
/// shift count (in bits). The upper element is ignored.
/// \returns A 256-bit vector of [8 x i32] containing the result.
static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_sll_epi32(__m256i __a, __m128i __count)
-{
+_mm256_sll_epi32(__m256i __a, __m128i __count) {
return (__m256i)__builtin_ia32_pslld256((__v8si)__a, (__v4si)__count);
}
@@ -2204,9 +2152,8 @@ _mm256_sll_epi32(__m256i __a, __m128i __count)
/// \param __count
/// An unsigned integer value specifying the shift count (in bits).
/// \returns A 256-bit vector of [4 x i64] containing the result.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_slli_epi64(__m256i __a, int __count)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_slli_epi64(__m256i __a,
+ int __count) {
return __builtin_ia32_psllqi256((__v4di)__a, __count);
}
@@ -2226,8 +2173,7 @@ _mm256_slli_epi64(__m256i __a, int __count)
/// shift count (in bits). The upper element is ignored.
/// \returns A 256-bit vector of [4 x i64] containing the result.
static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_sll_epi64(__m256i __a, __m128i __count)
-{
+_mm256_sll_epi64(__m256i __a, __m128i __count) {
return __builtin_ia32_psllq256((__v4di)__a, __count);
}
@@ -2245,9 +2191,8 @@ _mm256_sll_epi64(__m256i __a, __m128i __count)
/// \param __count
/// An unsigned integer value specifying the shift count (in bits).
/// \returns A 256-bit vector of [16 x i16] containing the result.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_srai_epi16(__m256i __a, int __count)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_srai_epi16(__m256i __a,
+ int __count) {
return (__m256i)__builtin_ia32_psrawi256((__v16hi)__a, __count);
}
@@ -2268,8 +2213,7 @@ _mm256_srai_epi16(__m256i __a, int __count)
/// shift count (in bits). The upper element is ignored.
/// \returns A 256-bit vector of [16 x i16] containing the result.
static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_sra_epi16(__m256i __a, __m128i __count)
-{
+_mm256_sra_epi16(__m256i __a, __m128i __count) {
return (__m256i)__builtin_ia32_psraw256((__v16hi)__a, (__v8hi)__count);
}
@@ -2287,9 +2231,8 @@ _mm256_sra_epi16(__m256i __a, __m128i __count)
/// \param __count
/// An unsigned integer value specifying the shift count (in bits).
/// \returns A 256-bit vector of [8 x i32] containing the result.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_srai_epi32(__m256i __a, int __count)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_srai_epi32(__m256i __a,
+ int __count) {
return (__m256i)__builtin_ia32_psradi256((__v8si)__a, __count);
}
@@ -2310,8 +2253,7 @@ _mm256_srai_epi32(__m256i __a, int __count)
/// shift count (in bits). The upper element is ignored.
/// \returns A 256-bit vector of [8 x i32] containing the result.
static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_sra_epi32(__m256i __a, __m128i __count)
-{
+_mm256_sra_epi32(__m256i __a, __m128i __count) {
return (__m256i)__builtin_ia32_psrad256((__v8si)__a, (__v4si)__count);
}
@@ -2332,7 +2274,7 @@ _mm256_sra_epi32(__m256i __a, __m128i __count)
/// \param imm
/// An unsigned immediate value specifying the shift count (in bytes).
/// \returns A 256-bit integer vector containing the result.
-#define _mm256_srli_si256(a, imm) \
+#define _mm256_srli_si256(a, imm) \
((__m256i)__builtin_ia32_psrldqi256_byteshift((__m256i)(a), (int)(imm)))
/// Shifts each 128-bit half of the 256-bit integer vector in \a a right by
@@ -2352,7 +2294,7 @@ _mm256_sra_epi32(__m256i __a, __m128i __count)
/// \param imm
/// An unsigned immediate value specifying the shift count (in bytes).
/// \returns A 256-bit integer vector containing the result.
-#define _mm256_bsrli_epi128(a, imm) \
+#define _mm256_bsrli_epi128(a, imm) \
((__m256i)__builtin_ia32_psrldqi256_byteshift((__m256i)(a), (int)(imm)))
/// Shifts each 16-bit element of the 256-bit vector of [16 x i16] in \a __a
@@ -2368,9 +2310,8 @@ _mm256_sra_epi32(__m256i __a, __m128i __count)
/// \param __count
/// An unsigned integer value specifying the shift count (in bits).
/// \returns A 256-bit vector of [16 x i16] containing the result.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_srli_epi16(__m256i __a, int __count)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_srli_epi16(__m256i __a,
+ int __count) {
return (__m256i)__builtin_ia32_psrlwi256((__v16hi)__a, __count);
}
@@ -2390,8 +2331,7 @@ _mm256_srli_epi16(__m256i __a, int __count)
/// shift count (in bits). The upper element is ignored.
/// \returns A 256-bit vector of [16 x i16] containing the result.
static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_srl_epi16(__m256i __a, __m128i __count)
-{
+_mm256_srl_epi16(__m256i __a, __m128i __count) {
return (__m256i)__builtin_ia32_psrlw256((__v16hi)__a, (__v8hi)__count);
}
@@ -2408,9 +2348,8 @@ _mm256_srl_epi16(__m256i __a, __m128i __count)
/// \param __count
/// An unsigned integer value specifying the shift count (in bits).
/// \returns A 256-bit vector of [8 x i32] containing the result.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_srli_epi32(__m256i __a, int __count)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_srli_epi32(__m256i __a,
+ int __count) {
return (__m256i)__builtin_ia32_psrldi256((__v8si)__a, __count);
}
@@ -2430,8 +2369,7 @@ _mm256_srli_epi32(__m256i __a, int __count)
/// shift count (in bits). The upper element is ignored.
/// \returns A 256-bit vector of [8 x i32] containing the result.
static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_srl_epi32(__m256i __a, __m128i __count)
-{
+_mm256_srl_epi32(__m256i __a, __m128i __count) {
return (__m256i)__builtin_ia32_psrld256((__v8si)__a, (__v4si)__count);
}
@@ -2448,9 +2386,8 @@ _mm256_srl_epi32(__m256i __a, __m128i __count)
/// \param __count
/// An unsigned integer value specifying the shift count (in bits).
/// \returns A 256-bit vector of [4 x i64] containing the result.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_srli_epi64(__m256i __a, int __count)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_srli_epi64(__m256i __a,
+ int __count) {
return __builtin_ia32_psrlqi256((__v4di)__a, __count);
}
@@ -2470,8 +2407,7 @@ _mm256_srli_epi64(__m256i __a, int __count)
/// shift count (in bits). The upper element is ignored.
/// \returns A 256-bit vector of [4 x i64] containing the result.
static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_srl_epi64(__m256i __a, __m128i __count)
-{
+_mm256_srl_epi64(__m256i __a, __m128i __count) {
return __builtin_ia32_psrlq256((__v4di)__a, __count);
}
@@ -2496,9 +2432,8 @@ _mm256_srl_epi64(__m256i __a, __m128i __count)
/// \param __b
/// A 256-bit integer vector containing the subtrahends.
/// \returns A 256-bit integer vector containing the differences.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_sub_epi8(__m256i __a, __m256i __b)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_sub_epi8(__m256i __a,
+ __m256i __b) {
return (__m256i)((__v32qu)__a - (__v32qu)__b);
}
@@ -2523,9 +2458,8 @@ _mm256_sub_epi8(__m256i __a, __m256i __b)
/// \param __b
/// A 256-bit vector of [16 x i16] containing the subtrahends.
/// \returns A 256-bit vector of [16 x i16] containing the differences.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_sub_epi16(__m256i __a, __m256i __b)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_sub_epi16(__m256i __a,
+ __m256i __b) {
return (__m256i)((__v16hu)__a - (__v16hu)__b);
}
@@ -2549,9 +2483,8 @@ _mm256_sub_epi16(__m256i __a, __m256i __b)
/// \param __b
/// A 256-bit vector of [8 x i32] containing the subtrahends.
/// \returns A 256-bit vector of [8 x i32] containing the differences.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_sub_epi32(__m256i __a, __m256i __b)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_sub_epi32(__m256i __a,
+ __m256i __b) {
return (__m256i)((__v8su)__a - (__v8su)__b);
}
@@ -2575,9 +2508,8 @@ _mm256_sub_epi32(__m256i __a, __m256i __b)
/// \param __b
/// A 256-bit vector of [4 x i64] containing the subtrahends.
/// \returns A 256-bit vector of [4 x i64] containing the differences.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_sub_epi64(__m256i __a, __m256i __b)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_sub_epi64(__m256i __a,
+ __m256i __b) {
return (__m256i)((__v4du)__a - (__v4du)__b);
}
@@ -2712,7 +2644,11 @@ _mm256_subs_epu16(__m256i __a, __m256i __b) {
/// \returns A 256-bit integer vector containing the result.
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_unpackhi_epi8(__m256i __a, __m256i __b) {
- return (__m256i)__builtin_shufflevector((__v32qi)__a, (__v32qi)__b, 8, 32+8, 9, 32+9, 10, 32+10, 11, 32+11, 12, 32+12, 13, 32+13, 14, 32+14, 15, 32+15, 24, 32+24, 25, 32+25, 26, 32+26, 27, 32+27, 28, 32+28, 29, 32+29, 30, 32+30, 31, 32+31);
+ return (__m256i)__builtin_shufflevector(
+ (__v32qi)__a, (__v32qi)__b, 8, 32 + 8, 9, 32 + 9, 10, 32 + 10, 11,
+ 32 + 11, 12, 32 + 12, 13, 32 + 13, 14, 32 + 14, 15, 32 + 15, 24, 32 + 24,
+ 25, 32 + 25, 26, 32 + 26, 27, 32 + 27, 28, 32 + 28, 29, 32 + 29, 30,
+ 32 + 30, 31, 32 + 31);
}
/// Unpacks and interleaves 16-bit integers from parts of the 256-bit vectors
@@ -2746,7 +2682,9 @@ _mm256_unpackhi_epi8(__m256i __a, __m256i __b) {
/// \returns A 256-bit vector of [16 x i16] containing the result.
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_unpackhi_epi16(__m256i __a, __m256i __b) {
- return (__m256i)__builtin_shufflevector((__v16hi)__a, (__v16hi)__b, 4, 16+4, 5, 16+5, 6, 16+6, 7, 16+7, 12, 16+12, 13, 16+13, 14, 16+14, 15, 16+15);
+ return (__m256i)__builtin_shufflevector(
+ (__v16hi)__a, (__v16hi)__b, 4, 16 + 4, 5, 16 + 5, 6, 16 + 6, 7, 16 + 7,
+ 12, 16 + 12, 13, 16 + 13, 14, 16 + 14, 15, 16 + 15);
}
/// Unpacks and interleaves 32-bit integers from parts of the 256-bit vectors
@@ -2779,7 +2717,8 @@ _mm256_unpackhi_epi16(__m256i __a, __m256i __b) {
/// \returns A 256-bit vector of [8 x i32] containing the result.
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_unpackhi_epi32(__m256i __a, __m256i __b) {
- return (__m256i)__builtin_shufflevector((__v8si)__a, (__v8si)__b, 2, 8+2, 3, 8+3, 6, 8+6, 7, 8+7);
+ return (__m256i)__builtin_shufflevector((__v8si)__a, (__v8si)__b, 2, 8 + 2, 3,
+ 8 + 3, 6, 8 + 6, 7, 8 + 7);
}
/// Unpacks and interleaves 64-bit integers from parts of the 256-bit vectors
@@ -2808,7 +2747,8 @@ _mm256_unpackhi_epi32(__m256i __a, __m256i __b) {
/// \returns A 256-bit vector of [4 x i64] containing the result.
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_unpackhi_epi64(__m256i __a, __m256i __b) {
- return (__m256i)__builtin_shufflevector((__v4di)__a, (__v4di)__b, 1, 4+1, 3, 4+3);
+ return (__m256i)__builtin_shufflevector((__v4di)__a, (__v4di)__b, 1, 4 + 1, 3,
+ 4 + 3);
}
/// Unpacks and interleaves 8-bit integers from parts of the 256-bit integer
@@ -2841,7 +2781,10 @@ _mm256_unpackhi_epi64(__m256i __a, __m256i __b) {
/// \returns A 256-bit integer vector containing the result.
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_unpacklo_epi8(__m256i __a, __m256i __b) {
- return (__m256i)__builtin_shufflevector((__v32qi)__a, (__v32qi)__b, 0, 32+0, 1, 32+1, 2, 32+2, 3, 32+3, 4, 32+4, 5, 32+5, 6, 32+6, 7, 32+7, 16, 32+16, 17, 32+17, 18, 32+18, 19, 32+19, 20, 32+20, 21, 32+21, 22, 32+22, 23, 32+23);
+ return (__m256i)__builtin_shufflevector(
+ (__v32qi)__a, (__v32qi)__b, 0, 32 + 0, 1, 32 + 1, 2, 32 + 2, 3, 32 + 3, 4,
+ 32 + 4, 5, 32 + 5, 6, 32 + 6, 7, 32 + 7, 16, 32 + 16, 17, 32 + 17, 18,
+ 32 + 18, 19, 32 + 19, 20, 32 + 20, 21, 32 + 21, 22, 32 + 22, 23, 32 + 23);
}
/// Unpacks and interleaves 16-bit integers from parts of the 256-bit vectors
@@ -2875,7 +2818,9 @@ _mm256_unpacklo_epi8(__m256i __a, __m256i __b) {
/// \returns A 256-bit vector of [16 x i16] containing the result.
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_unpacklo_epi16(__m256i __a, __m256i __b) {
- return (__m256i)__builtin_shufflevector((__v16hi)__a, (__v16hi)__b, 0, 16+0, 1, 16+1, 2, 16+2, 3, 16+3, 8, 16+8, 9, 16+9, 10, 16+10, 11, 16+11);
+ return (__m256i)__builtin_shufflevector(
+ (__v16hi)__a, (__v16hi)__b, 0, 16 + 0, 1, 16 + 1, 2, 16 + 2, 3, 16 + 3, 8,
+ 16 + 8, 9, 16 + 9, 10, 16 + 10, 11, 16 + 11);
}
/// Unpacks and interleaves 32-bit integers from parts of the 256-bit vectors
@@ -2908,7 +2853,8 @@ _mm256_unpacklo_epi16(__m256i __a, __m256i __b) {
/// \returns A 256-bit vector of [8 x i32] containing the result.
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_unpacklo_epi32(__m256i __a, __m256i __b) {
- return (__m256i)__builtin_shufflevector((__v8si)__a, (__v8si)__b, 0, 8+0, 1, 8+1, 4, 8+4, 5, 8+5);
+ return (__m256i)__builtin_shufflevector((__v8si)__a, (__v8si)__b, 0, 8 + 0, 1,
+ 8 + 1, 4, 8 + 4, 5, 8 + 5);
}
/// Unpacks and interleaves 64-bit integers from parts of the 256-bit vectors
@@ -2937,7 +2883,8 @@ _mm256_unpacklo_epi32(__m256i __a, __m256i __b) {
/// \returns A 256-bit vector of [4 x i64] containing the result.
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_unpacklo_epi64(__m256i __a, __m256i __b) {
- return (__m256i)__builtin_shufflevector((__v4di)__a, (__v4di)__b, 0, 4+0, 2, 4+2);
+ return (__m256i)__builtin_shufflevector((__v4di)__a, (__v4di)__b, 0, 4 + 0, 2,
+ 4 + 2);
}
/// Computes the bitwise XOR of the 256-bit integer vectors in \a __a and
@@ -2953,8 +2900,7 @@ _mm256_unpacklo_epi64(__m256i __a, __m256i __b) {
/// A 256-bit integer vector.
/// \returns A 256-bit integer vector containing the result.
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
-_mm256_xor_si256(__m256i __a, __m256i __b)
-{
+_mm256_xor_si256(__m256i __a, __m256i __b) {
return (__m256i)((__v4du)__a ^ (__v4du)__b);
}
@@ -2970,8 +2916,7 @@ _mm256_xor_si256(__m256i __a, __m256i __b)
/// A pointer to the 32-byte aligned memory containing the vector to load.
/// \returns A 256-bit integer vector loaded from memory.
static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_stream_load_si256(const void *__V)
-{
+_mm256_stream_load_si256(const void *__V) {
typedef __v4di __v4di_aligned __attribute__((aligned(32)));
return (__m256i)__builtin_nontemporal_load((const __v4di_aligned *)__V);
}
@@ -3021,7 +2966,8 @@ _mm_broadcastsd_pd(__m128d __a) {
/// \returns A 256-bit vector of [8 x float] containing the result.
static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_broadcastss_ps(__m128 __X) {
- return (__m256)__builtin_shufflevector((__v4sf)__X, (__v4sf)__X, 0, 0, 0, 0, 0, 0, 0, 0);
+ return (__m256)__builtin_shufflevector((__v4sf)__X, (__v4sf)__X, 0, 0, 0, 0,
+ 0, 0, 0, 0);
}
/// Broadcasts the 64-bit floating-point value from the low element of the
@@ -3090,8 +3036,8 @@ _mm256_broadcastsi128_si256(__m128i __X) {
/// corresponds to the index of a copied value. When a mask bit is 0, the
/// element is copied from \a V1; otherwise, it is copied from \a V2.
/// \returns A 128-bit vector of [4 x i32] containing the result.
-#define _mm_blend_epi32(V1, V2, M) \
- ((__m128i)__builtin_ia32_pblendd128((__v4si)(__m128i)(V1), \
+#define _mm_blend_epi32(V1, V2, M) \
+ ((__m128i)__builtin_ia32_pblendd128((__v4si)(__m128i)(V1), \
(__v4si)(__m128i)(V2), (int)(M)))
/// Merges 32-bit integer elements from either of the two 256-bit vectors of
@@ -3127,8 +3073,8 @@ _mm256_broadcastsi128_si256(__m128i __X) {
/// corresponds to the index of a copied value. When a mask bit is 0, the
/// element is copied from \a V1; otherwise, it is is copied from \a V2.
/// \returns A 256-bit vector of [8 x i32] containing the result.
-#define _mm256_blend_epi32(V1, V2, M) \
- ((__m256i)__builtin_ia32_pblendd256((__v8si)(__m256i)(V1), \
+#define _mm256_blend_epi32(V1, V2, M) \
+ ((__m256i)__builtin_ia32_pblendd256((__v8si)(__m256i)(V1), \
(__v8si)(__m256i)(V2), (int)(M)))
/// Broadcasts the low byte from the 128-bit integer vector in \a __X to all
@@ -3143,7 +3089,9 @@ _mm256_broadcastsi128_si256(__m128i __X) {
/// \returns A 256-bit integer vector containing the result.
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_broadcastb_epi8(__m128i __X) {
- return (__m256i)__builtin_shufflevector((__v16qi)__X, (__v16qi)__X, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+ return (__m256i)__builtin_shufflevector(
+ (__v16qi)__X, (__v16qi)__X, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
}
/// Broadcasts the low element from the 128-bit vector of [8 x i16] in \a __X
@@ -3158,7 +3106,8 @@ _mm256_broadcastb_epi8(__m128i __X) {
/// \returns A 256-bit vector of [16 x i16] containing the result.
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_broadcastw_epi16(__m128i __X) {
- return (__m256i)__builtin_shufflevector((__v8hi)__X, (__v8hi)__X, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+ return (__m256i)__builtin_shufflevector((__v8hi)__X, (__v8hi)__X, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
}
/// Broadcasts the low element from the 128-bit vector of [4 x i32] in \a __X
@@ -3173,7 +3122,8 @@ _mm256_broadcastw_epi16(__m128i __X) {
/// \returns A 256-bit vector of [8 x i32] containing the result.
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_broadcastd_epi32(__m128i __X) {
- return (__m256i)__builtin_shufflevector((__v4si)__X, (__v4si)__X, 0, 0, 0, 0, 0, 0, 0, 0);
+ return (__m256i)__builtin_shufflevector((__v4si)__X, (__v4si)__X, 0, 0, 0, 0,
+ 0, 0, 0, 0);
}
/// Broadcasts the low element from the 128-bit vector of [2 x i64] in \a __X
@@ -3203,7 +3153,9 @@ _mm256_broadcastq_epi64(__m128i __X) {
/// \returns A 128-bit integer vector containing the result.
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_broadcastb_epi8(__m128i __X) {
- return (__m128i)__builtin_shufflevector((__v16qi)__X, (__v16qi)__X, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+ return (__m128i)__builtin_shufflevector((__v16qi)__X, (__v16qi)__X, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0);
}
/// Broadcasts the low element from the 128-bit vector of [8 x i16] in
@@ -3218,7 +3170,8 @@ _mm_broadcastb_epi8(__m128i __X) {
/// \returns A 128-bit vector of [8 x i16] containing the result.
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_broadcastw_epi16(__m128i __X) {
- return (__m128i)__builtin_shufflevector((__v8hi)__X, (__v8hi)__X, 0, 0, 0, 0, 0, 0, 0, 0);
+ return (__m128i)__builtin_shufflevector((__v8hi)__X, (__v8hi)__X, 0, 0, 0, 0,
+ 0, 0, 0, 0);
}
/// Broadcasts the low element from the 128-bit vector of [4 x i32] in \a __X
@@ -3274,8 +3227,7 @@ _mm_broadcastq_epi64(__m128i __X) {
/// \a __a.
/// \returns A 256-bit vector of [8 x i32] containing the result.
static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_permutevar8x32_epi32(__m256i __a, __m256i __b)
-{
+_mm256_permutevar8x32_epi32(__m256i __a, __m256i __b) {
return (__m256i)__builtin_ia32_permvarsi256((__v8si)__a, (__v8si)__b);
}
@@ -3306,7 +3258,7 @@ _mm256_permutevar8x32_epi32(__m256i __a, __m256i __b)
/// \a M[1:0] specifies the index in \a a for element 0 of the result,
/// \a M[3:2] specifies the index for element 1, and so forth.
/// \returns A 256-bit vector of [4 x double] containing the result.
-#define _mm256_permute4x64_pd(V, M) \
+#define _mm256_permute4x64_pd(V, M) \
((__m256d)__builtin_ia32_permdf256((__v4df)(__m256d)(V), (int)(M)))
/// Sets the result's 256-bit vector of [8 x float] to copies of elements of
@@ -3332,8 +3284,7 @@ _mm256_permutevar8x32_epi32(__m256i __a, __m256i __b)
/// \a __a.
/// \returns A 256-bit vector of [8 x float] containing the result.
static __inline__ __m256 __DEFAULT_FN_ATTRS256
-_mm256_permutevar8x32_ps(__m256 __a, __m256i __b)
-{
+_mm256_permutevar8x32_ps(__m256 __a, __m256i __b) {
return (__m256)__builtin_ia32_permvarsf256((__v8sf)__a, (__v8si)__b);
}
@@ -3364,7 +3315,7 @@ _mm256_permutevar8x32_ps(__m256 __a, __m256i __b)
/// \a M[1:0] specifies the index in \a a for element 0 of the result,
/// \a M[3:2] specifies the index for element 1, and so forth.
/// \returns A 256-bit vector of [4 x i64] containing the result.
-#define _mm256_permute4x64_epi64(V, M) \
+#define _mm256_permute4x64_epi64(V, M) \
((__m256i)__builtin_ia32_permdi256((__v4di)(__m256i)(V), (int)(M)))
/// Sets each half of the 256-bit result either to zero or to one of the
@@ -3410,7 +3361,7 @@ _mm256_permutevar8x32_ps(__m256 __a, __m256i __b)
/// 2: the lower half of \a V2 \n
/// 3: the upper half of \a V2
/// \returns A 256-bit integer vector containing the result.
-#define _mm256_permute2x128_si256(V1, V2, M) \
+#define _mm256_permute2x128_si256(V1, V2, M) \
((__m256i)__builtin_ia32_permti256((__m256i)(V1), (__m256i)(V2), (int)(M)))
/// Extracts half of the 256-bit vector \a V to the 128-bit result. If bit 0
@@ -3430,7 +3381,7 @@ _mm256_permutevar8x32_ps(__m256 __a, __m256i __b)
/// \param M
/// An immediate value specifying which half of \a V to extract.
/// \returns A 128-bit integer vector containing the result.
-#define _mm256_extracti128_si256(V, M) \
+#define _mm256_extracti128_si256(V, M) \
((__m128i)__builtin_ia32_extract128i256((__v4di)(__m256i)(V), (int)(M)))
/// Copies the 256-bit vector \a V1 to the result, then overwrites half of the
@@ -3453,8 +3404,8 @@ _mm256_permutevar8x32_ps(__m256 __a, __m256i __b)
/// \param M
/// An immediate value specifying where to put \a V2 in the result.
/// \returns A 256-bit integer vector containing the result.
-#define _mm256_inserti128_si256(V1, V2, M) \
- ((__m256i)__builtin_ia32_insert128i256((__v4di)(__m256i)(V1), \
+#define _mm256_inserti128_si256(V1, V2, M) \
+ ((__m256i)__builtin_ia32_insert128i256((__v4di)(__m256i)(V1), \
(__v2di)(__m128i)(V2), (int)(M)))
/// Conditionally loads eight 32-bit integer elements from memory \a __X, if
@@ -3484,8 +3435,7 @@ _mm256_permutevar8x32_ps(__m256 __a, __m256i __b)
/// \returns A 256-bit vector of [8 x i32] containing the loaded or zeroed
/// elements.
static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_maskload_epi32(int const *__X, __m256i __M)
-{
+_mm256_maskload_epi32(int const *__X, __m256i __M) {
return (__m256i)__builtin_ia32_maskloadd256((const __v8si *)__X, (__v8si)__M);
}
@@ -3516,8 +3466,7 @@ _mm256_maskload_epi32(int const *__X, __m256i __M)
/// \returns A 256-bit vector of [4 x i64] containing the loaded or zeroed
/// elements.
static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_maskload_epi64(long long const *__X, __m256i __M)
-{
+_mm256_maskload_epi64(long long const *__X, __m256i __M) {
return (__m256i)__builtin_ia32_maskloadq256((const __v4di *)__X, (__v4di)__M);
}
@@ -3548,8 +3497,7 @@ _mm256_maskload_epi64(long long const *__X, __m256i __M)
/// \returns A 128-bit vector of [4 x i32] containing the loaded or zeroed
/// elements.
static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_maskload_epi32(int const *__X, __m128i __M)
-{
+_mm_maskload_epi32(int const *__X, __m128i __M) {
return (__m128i)__builtin_ia32_maskloadd((const __v4si *)__X, (__v4si)__M);
}
@@ -3580,8 +3528,7 @@ _mm_maskload_epi32(int const *__X, __m128i __M)
/// \returns A 128-bit vector of [2 x i64] containing the loaded or zeroed
/// elements.
static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_maskload_epi64(long long const *__X, __m128i __M)
-{
+_mm_maskload_epi64(long long const *__X, __m128i __M) {
return (__m128i)__builtin_ia32_maskloadq((const __v2di *)__X, (__v2di)__M);
}
@@ -3610,8 +3557,7 @@ _mm_maskload_epi64(long long const *__X, __m128i __M)
/// \param __Y
/// A 256-bit vector of [8 x i32] containing the values to store.
static __inline__ void __DEFAULT_FN_ATTRS256
-_mm256_maskstore_epi32(int *__X, __m256i __M, __m256i __Y)
-{
+_mm256_maskstore_epi32(int *__X, __m256i __M, __m256i __Y) {
__builtin_ia32_maskstored256((__v8si *)__X, (__v8si)__M, (__v8si)__Y);
}
@@ -3640,8 +3586,7 @@ _mm256_maskstore_epi32(int *__X, __m256i __M, __m256i __Y)
/// \param __Y
/// A 256-bit vector of [4 x i64] containing the values to store.
static __inline__ void __DEFAULT_FN_ATTRS256
-_mm256_maskstore_epi64(long long *__X, __m256i __M, __m256i __Y)
-{
+_mm256_maskstore_epi64(long long *__X, __m256i __M, __m256i __Y) {
__builtin_ia32_maskstoreq256((__v4di *)__X, (__v4di)__M, (__v4di)__Y);
}
@@ -3669,9 +3614,9 @@ _mm256_maskstore_epi64(long long *__X, __m256i __M, __m256i __Y)
/// A 128-bit vector of [4 x i32] containing the mask bits.
/// \param __Y
/// A 128-bit vector of [4 x i32] containing the values to store.
-static __inline__ void __DEFAULT_FN_ATTRS128
-_mm_maskstore_epi32(int *__X, __m128i __M, __m128i __Y)
-{
+static __inline__ void __DEFAULT_FN_ATTRS128 _mm_maskstore_epi32(int *__X,
+ __m128i __M,
+ __m128i __Y) {
__builtin_ia32_maskstored((__v4si *)__X, (__v4si)__M, (__v4si)__Y);
}
@@ -3699,10 +3644,10 @@ _mm_maskstore_epi32(int *__X, __m128i __M, __m128i __Y)
/// A 128-bit vector of [2 x i64] containing the mask bits.
/// \param __Y
/// A 128-bit vector of [2 x i64] containing the values to store.
-static __inline__ void __DEFAULT_FN_ATTRS128
-_mm_maskstore_epi64(long long *__X, __m128i __M, __m128i __Y)
-{
- __builtin_ia32_maskstoreq(( __v2di *)__X, (__v2di)__M, (__v2di)__Y);
+static __inline__ void __DEFAULT_FN_ATTRS128 _mm_maskstore_epi64(long long *__X,
+ __m128i __M,
+ __m128i __Y) {
+ __builtin_ia32_maskstoreq((__v2di *)__X, (__v2di)__M, (__v2di)__Y);
}
/// Shifts each 32-bit element of the 256-bit vector of [8 x i32] in \a __X
@@ -3722,8 +3667,7 @@ _mm_maskstore_epi64(long long *__X, __m128i __M, __m128i __Y)
/// bits).
/// \returns A 256-bit vector of [8 x i32] containing the result.
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
-_mm256_sllv_epi32(__m256i __X, __m256i __Y)
-{
+_mm256_sllv_epi32(__m256i __X, __m256i __Y) {
return (__m256i)__builtin_ia32_psllv8si((__v8si)__X, (__v8si)__Y);
}
@@ -3744,8 +3688,7 @@ _mm256_sllv_epi32(__m256i __X, __m256i __Y)
/// bits).
/// \returns A 128-bit vector of [4 x i32] containing the result.
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
-_mm_sllv_epi32(__m128i __X, __m128i __Y)
-{
+_mm_sllv_epi32(__m128i __X, __m128i __Y) {
return (__m128i)__builtin_ia32_psllv4si((__v4si)__X, (__v4si)__Y);
}
@@ -3766,8 +3709,7 @@ _mm_sllv_epi32(__m128i __X, __m128i __Y)
/// bits).
/// \returns A 256-bit vector of [4 x i64] containing the result.
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
-_mm256_sllv_epi64(__m256i __X, __m256i __Y)
-{
+_mm256_sllv_epi64(__m256i __X, __m256i __Y) {
return (__m256i)__builtin_ia32_psllv4di((__v4di)__X, (__v4di)__Y);
}
@@ -3788,8 +3730,7 @@ _mm256_sllv_epi64(__m256i __X, __m256i __Y)
/// bits).
/// \returns A 128-bit vector of [2 x i64] containing the result.
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
-_mm_sllv_epi64(__m128i __X, __m128i __Y)
-{
+_mm_sllv_epi64(__m128i __X, __m128i __Y) {
return (__m128i)__builtin_ia32_psllv2di((__v2di)__X, (__v2di)__Y);
}
@@ -3811,8 +3752,7 @@ _mm_sllv_epi64(__m128i __X, __m128i __Y)
/// bits).
/// \returns A 256-bit vector of [8 x i32] containing the result.
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
-_mm256_srav_epi32(__m256i __X, __m256i __Y)
-{
+_mm256_srav_epi32(__m256i __X, __m256i __Y) {
return (__m256i)__builtin_ia32_psrav8si((__v8si)__X, (__v8si)__Y);
}
@@ -3834,8 +3774,7 @@ _mm256_srav_epi32(__m256i __X, __m256i __Y)
/// bits).
/// \returns A 128-bit vector of [4 x i32] containing the result.
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
-_mm_srav_epi32(__m128i __X, __m128i __Y)
-{
+_mm_srav_epi32(__m128i __X, __m128i __Y) {
return (__m128i)__builtin_ia32_psrav4si((__v4si)__X, (__v4si)__Y);
}
@@ -3856,8 +3795,7 @@ _mm_srav_epi32(__m128i __X, __m128i __Y)
/// bits).
/// \returns A 256-bit vector of [8 x i32] containing the result.
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
-_mm256_srlv_epi32(__m256i __X, __m256i __Y)
-{
+_mm256_srlv_epi32(__m256i __X, __m256i __Y) {
return (__m256i)__builtin_ia32_psrlv8si((__v8si)__X, (__v8si)__Y);
}
@@ -3878,8 +3816,7 @@ _mm256_srlv_epi32(__m256i __X, __m256i __Y)
/// bits).
/// \returns A 128-bit vector of [4 x i32] containing the result.
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
-_mm_srlv_epi32(__m128i __X, __m128i __Y)
-{
+_mm_srlv_epi32(__m128i __X, __m128i __Y) {
return (__m128i)__builtin_ia32_psrlv4si((__v4si)__X, (__v4si)__Y);
}
@@ -3900,8 +3837,7 @@ _mm_srlv_epi32(__m128i __X, __m128i __Y)
/// bits).
/// \returns A 256-bit vector of [4 x i64] containing the result.
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
-_mm256_srlv_epi64(__m256i __X, __m256i __Y)
-{
+_mm256_srlv_epi64(__m256i __X, __m256i __Y) {
return (__m256i)__builtin_ia32_psrlv4di((__v4di)__X, (__v4di)__Y);
}
@@ -3922,8 +3858,7 @@ _mm256_srlv_epi64(__m256i __X, __m256i __Y)
/// bits).
/// \returns A 128-bit vector of [2 x i64] containing the result.
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
-_mm_srlv_epi64(__m128i __X, __m128i __Y)
-{
+_mm_srlv_epi64(__m128i __X, __m128i __Y) {
return (__m128i)__builtin_ia32_psrlv2di((__v2di)__X, (__v2di)__Y);
}
@@ -3970,11 +3905,10 @@ _mm_srlv_epi64(__m128i __X, __m128i __Y)
/// A literal constant scale factor for the indexes in \a i. Must be
/// 1, 2, 4, or 8.
/// \returns A 128-bit vector of [2 x double] containing the gathered values.
-#define _mm_mask_i32gather_pd(a, m, i, mask, s) \
- ((__m128d)__builtin_ia32_gatherd_pd((__v2df)(__m128i)(a), \
- (double const *)(m), \
- (__v4si)(__m128i)(i), \
- (__v2df)(__m128d)(mask), (s)))
+#define _mm_mask_i32gather_pd(a, m, i, mask, s) \
+ ((__m128d)__builtin_ia32_gatherd_pd( \
+ (__v2df)(__m128i)(a), (double const *)(m), (__v4si)(__m128i)(i), \
+ (__v2df)(__m128d)(mask), (s)))
/// Conditionally gathers four 64-bit floating-point values, either from the
/// 256-bit vector of [4 x double] in \a a, or from memory \a m using scaled
@@ -4018,11 +3952,10 @@ _mm_srlv_epi64(__m128i __X, __m128i __Y)
/// A literal constant scale factor for the indexes in \a i. Must be
/// 1, 2, 4, or 8.
/// \returns A 256-bit vector of [4 x double] containing the gathered values.
-#define _mm256_mask_i32gather_pd(a, m, i, mask, s) \
- ((__m256d)__builtin_ia32_gatherd_pd256((__v4df)(__m256d)(a), \
- (double const *)(m), \
- (__v4si)(__m128i)(i), \
- (__v4df)(__m256d)(mask), (s)))
+#define _mm256_mask_i32gather_pd(a, m, i, mask, s) \
+ ((__m256d)__builtin_ia32_gatherd_pd256( \
+ (__v4df)(__m256d)(a), (double const *)(m), (__v4si)(__m128i)(i), \
+ (__v4df)(__m256d)(mask), (s)))
/// Conditionally gathers two 64-bit floating-point values, either from the
/// 128-bit vector of [2 x double] in \a a, or from memory \a m using scaled
@@ -4066,11 +3999,10 @@ _mm_srlv_epi64(__m128i __X, __m128i __Y)
/// A literal constant scale factor for the indexes in \a i. Must be
/// 1, 2, 4, or 8.
/// \returns A 128-bit vector of [2 x double] containing the gathered values.
-#define _mm_mask_i64gather_pd(a, m, i, mask, s) \
- ((__m128d)__builtin_ia32_gatherq_pd((__v2df)(__m128d)(a), \
- (double const *)(m), \
- (__v2di)(__m128i)(i), \
- (__v2df)(__m128d)(mask), (s)))
+#define _mm_mask_i64gather_pd(a, m, i, mask, s) \
+ ((__m128d)__builtin_ia32_gatherq_pd( \
+ (__v2df)(__m128d)(a), (double const *)(m), (__v2di)(__m128i)(i), \
+ (__v2df)(__m128d)(mask), (s)))
/// Conditionally gathers four 64-bit floating-point values, either from the
/// 256-bit vector of [4 x double] in \a a, or from memory \a m using scaled
@@ -4114,11 +4046,10 @@ _mm_srlv_epi64(__m128i __X, __m128i __Y)
/// A literal constant scale factor for the indexes in \a i. Must be
/// 1, 2, 4, or 8.
/// \returns A 256-bit vector of [4 x double] containing the gathered values.
-#define _mm256_mask_i64gather_pd(a, m, i, mask, s) \
- ((__m256d)__builtin_ia32_gatherq_pd256((__v4df)(__m256d)(a), \
- (double const *)(m), \
- (__v4di)(__m256i)(i), \
- (__v4df)(__m256d)(mask), (s)))
+#define _mm256_mask_i64gather_pd(a, m, i, mask, s) \
+ ((__m256d)__builtin_ia32_gatherq_pd256( \
+ (__v4df)(__m256d)(a), (double const *)(m), (__v4di)(__m256i)(i), \
+ (__v4df)(__m256d)(mask), (s)))
/// Conditionally gathers four 32-bit floating-point values, either from the
/// 128-bit vector of [4 x float] in \a a, or from memory \a m using scaled
@@ -4162,10 +4093,9 @@ _mm_srlv_epi64(__m128i __X, __m128i __Y)
/// A literal constant scale factor for the indexes in \a i. Must be
/// 1, 2, 4, or 8.
/// \returns A 128-bit vector of [4 x float] containing the gathered values.
-#define _mm_mask_i32gather_ps(a, m, i, mask, s) \
- ((__m128)__builtin_ia32_gatherd_ps((__v4sf)(__m128)(a), \
- (float const *)(m), \
- (__v4si)(__m128i)(i), \
+#define _mm_mask_i32gather_ps(a, m, i, mask, s) \
+ ((__m128)__builtin_ia32_gatherd_ps((__v4sf)(__m128)(a), (float const *)(m), \
+ (__v4si)(__m128i)(i), \
(__v4sf)(__m128)(mask), (s)))
/// Conditionally gathers eight 32-bit floating-point values, either from the
@@ -4210,11 +4140,10 @@ _mm_srlv_epi64(__m128i __X, __m128i __Y)
/// A literal constant scale factor for the indexes in \a i. Must be
/// 1, 2, 4, or 8.
/// \returns A 256-bit vector of [8 x float] containing the gathered values.
-#define _mm256_mask_i32gather_ps(a, m, i, mask, s) \
- ((__m256)__builtin_ia32_gatherd_ps256((__v8sf)(__m256)(a), \
- (float const *)(m), \
- (__v8si)(__m256i)(i), \
- (__v8sf)(__m256)(mask), (s)))
+#define _mm256_mask_i32gather_ps(a, m, i, mask, s) \
+ ((__m256)__builtin_ia32_gatherd_ps256( \
+ (__v8sf)(__m256)(a), (float const *)(m), (__v8si)(__m256i)(i), \
+ (__v8sf)(__m256)(mask), (s)))
/// Conditionally gathers two 32-bit floating-point values, either from the
/// 128-bit vector of [4 x float] in \a a, or from memory \a m using scaled
@@ -4261,10 +4190,9 @@ _mm_srlv_epi64(__m128i __X, __m128i __Y)
/// A literal constant scale factor for the indexes in \a i. Must be
/// 1, 2, 4, or 8.
/// \returns A 128-bit vector of [4 x float] containing the gathered values.
-#define _mm_mask_i64gather_ps(a, m, i, mask, s) \
- ((__m128)__builtin_ia32_gatherq_ps((__v4sf)(__m128)(a), \
- (float const *)(m), \
- (__v2di)(__m128i)(i), \
+#define _mm_mask_i64gather_ps(a, m, i, mask, s) \
+ ((__m128)__builtin_ia32_gatherq_ps((__v4sf)(__m128)(a), (float const *)(m), \
+ (__v2di)(__m128i)(i), \
(__v4sf)(__m128)(mask), (s)))
/// Conditionally gathers four 32-bit floating-point values, either from the
@@ -4309,11 +4237,10 @@ _mm_srlv_epi64(__m128i __X, __m128i __Y)
/// A literal constant scale factor for the indexes in \a i. Must be
/// 1, 2, 4, or 8.
/// \returns A 128-bit vector of [4 x float] containing the gathered values.
-#define _mm256_mask_i64gather_ps(a, m, i, mask, s) \
- ((__m128)__builtin_ia32_gatherq_ps256((__v4sf)(__m128)(a), \
- (float const *)(m), \
- (__v4di)(__m256i)(i), \
- (__v4sf)(__m128)(mask), (s)))
+#define _mm256_mask_i64gather_ps(a, m, i, mask, s) \
+ ((__m128)__builtin_ia32_gatherq_ps256( \
+ (__v4sf)(__m128)(a), (float const *)(m), (__v4di)(__m256i)(i), \
+ (__v4sf)(__m128)(mask), (s)))
/// Conditionally gathers four 32-bit integer values, either from the
/// 128-bit vector of [4 x i32] in \a a, or from memory \a m using scaled
@@ -4357,10 +4284,9 @@ _mm_srlv_epi64(__m128i __X, __m128i __Y)
/// A literal constant scale factor for the indexes in \a i. Must be
/// 1, 2, 4, or 8.
/// \returns A 128-bit vector of [4 x i32] containing the gathered values.
-#define _mm_mask_i32gather_epi32(a, m, i, mask, s) \
- ((__m128i)__builtin_ia32_gatherd_d((__v4si)(__m128i)(a), \
- (int const *)(m), \
- (__v4si)(__m128i)(i), \
+#define _mm_mask_i32gather_epi32(a, m, i, mask, s) \
+ ((__m128i)__builtin_ia32_gatherd_d((__v4si)(__m128i)(a), (int const *)(m), \
+ (__v4si)(__m128i)(i), \
(__v4si)(__m128i)(mask), (s)))
/// Conditionally gathers eight 32-bit integer values, either from the
@@ -4405,11 +4331,10 @@ _mm_srlv_epi64(__m128i __X, __m128i __Y)
/// A literal constant scale factor for the indexes in \a i. Must be
/// 1, 2, 4, or 8.
/// \returns A 256-bit vector of [8 x i32] containing the gathered values.
-#define _mm256_mask_i32gather_epi32(a, m, i, mask, s) \
- ((__m256i)__builtin_ia32_gatherd_d256((__v8si)(__m256i)(a), \
- (int const *)(m), \
- (__v8si)(__m256i)(i), \
- (__v8si)(__m256i)(mask), (s)))
+#define _mm256_mask_i32gather_epi32(a, m, i, mask, s) \
+ ((__m256i)__builtin_ia32_gatherd_d256( \
+ (__v8si)(__m256i)(a), (int const *)(m), (__v8si)(__m256i)(i), \
+ (__v8si)(__m256i)(mask), (s)))
/// Conditionally gathers two 32-bit integer values, either from the
/// 128-bit vector of [4 x i32] in \a a, or from memory \a m using scaled
@@ -4456,10 +4381,9 @@ _mm_srlv_epi64(__m128i __X, __m128i __Y)
/// A literal constant scale factor for the indexes in \a i. Must be
/// 1, 2, 4, or 8.
/// \returns A 128-bit vector of [4 x i32] containing the gathered values.
-#define _mm_mask_i64gather_epi32(a, m, i, mask, s) \
- ((__m128i)__builtin_ia32_gatherq_d((__v4si)(__m128i)(a), \
- (int const *)(m), \
- (__v2di)(__m128i)(i), \
+#define _mm_mask_i64gather_epi32(a, m, i, mask, s) \
+ ((__m128i)__builtin_ia32_gatherq_d((__v4si)(__m128i)(a), (int const *)(m), \
+ (__v2di)(__m128i)(i), \
(__v4si)(__m128i)(mask), (s)))
/// Conditionally gathers four 32-bit integer values, either from the
@@ -4504,11 +4428,10 @@ _mm_srlv_epi64(__m128i __X, __m128i __Y)
/// A literal constant scale factor for the indexes in \a i. Must be
/// 1, 2, 4, or 8.
/// \returns A 128-bit vector of [4 x i32] containing the gathered values.
-#define _mm256_mask_i64gather_epi32(a, m, i, mask, s) \
- ((__m128i)__builtin_ia32_gatherq_d256((__v4si)(__m128i)(a), \
- (int const *)(m), \
- (__v4di)(__m256i)(i), \
- (__v4si)(__m128i)(mask), (s)))
+#define _mm256_mask_i64gather_epi32(a, m, i, mask, s) \
+ ((__m128i)__builtin_ia32_gatherq_d256( \
+ (__v4si)(__m128i)(a), (int const *)(m), (__v4di)(__m256i)(i), \
+ (__v4si)(__m128i)(mask), (s)))
/// Conditionally gathers two 64-bit integer values, either from the
/// 128-bit vector of [2 x i64] in \a a, or from memory \a m using scaled
@@ -4553,11 +4476,10 @@ _mm_srlv_epi64(__m128i __X, __m128i __Y)
/// A literal constant scale factor for the indexes in \a i. Must be
/// 1, 2, 4, or 8.
/// \returns A 128-bit vector of [2 x i64] containing the gathered values.
-#define _mm_mask_i32gather_epi64(a, m, i, mask, s) \
- ((__m128i)__builtin_ia32_gatherd_q((__v2di)(__m128i)(a), \
- (long long const *)(m), \
- (__v4si)(__m128i)(i), \
- (__v2di)(__m128i)(mask), (s)))
+#define _mm_mask_i32gather_epi64(a, m, i, mask, s) \
+ ((__m128i)__builtin_ia32_gatherd_q( \
+ (__v2di)(__m128i)(a), (long long const *)(m), (__v4si)(__m128i)(i), \
+ (__v2di)(__m128i)(mask), (s)))
/// Conditionally gathers four 64-bit integer values, either from the
/// 256-bit vector of [4 x i64] in \a a, or from memory \a m using scaled
@@ -4601,11 +4523,10 @@ _mm_srlv_epi64(__m128i __X, __m128i __Y)
/// A literal constant scale factor for the indexes in \a i. Must be
/// 1, 2, 4, or 8.
/// \returns A 256-bit vector of [4 x i64] containing the gathered values.
-#define _mm256_mask_i32gather_epi64(a, m, i, mask, s) \
- ((__m256i)__builtin_ia32_gatherd_q256((__v4di)(__m256i)(a), \
- (long long const *)(m), \
- (__v4si)(__m128i)(i), \
- (__v4di)(__m256i)(mask), (s)))
+#define _mm256_mask_i32gather_epi64(a, m, i, mask, s) \
+ ((__m256i)__builtin_ia32_gatherd_q256( \
+ (__v4di)(__m256i)(a), (long long const *)(m), (__v4si)(__m128i)(i), \
+ (__v4di)(__m256i)(mask), (s)))
/// Conditionally gathers two 64-bit integer values, either from the
/// 128-bit vector of [2 x i64] in \a a, or from memory \a m using scaled
@@ -4649,11 +4570,10 @@ _mm_srlv_epi64(__m128i __X, __m128i __Y)
/// A literal constant scale factor for the indexes in \a i. Must be
/// 1, 2, 4, or 8.
/// \returns A 128-bit vector of [2 x i64] containing the gathered values.
-#define _mm_mask_i64gather_epi64(a, m, i, mask, s) \
- ((__m128i)__builtin_ia32_gatherq_q((__v2di)(__m128i)(a), \
- (long long const *)(m), \
- (__v2di)(__m128i)(i), \
- (__v2di)(__m128i)(mask), (s)))
+#define _mm_mask_i64gather_epi64(a, m, i, mask, s) \
+ ((__m128i)__builtin_ia32_gatherq_q( \
+ (__v2di)(__m128i)(a), (long long const *)(m), (__v2di)(__m128i)(i), \
+ (__v2di)(__m128i)(mask), (s)))
/// Conditionally gathers four 64-bit integer values, either from the
/// 256-bit vector of [4 x i64] in \a a, or from memory \a m using scaled
@@ -4697,11 +4617,10 @@ _mm_srlv_epi64(__m128i __X, __m128i __Y)
/// A literal constant scale factor for the indexes in \a i. Must be
/// 1, 2, 4, or 8.
/// \returns A 256-bit vector of [4 x i64] containing the gathered values.
-#define _mm256_mask_i64gather_epi64(a, m, i, mask, s) \
- ((__m256i)__builtin_ia32_gatherq_q256((__v4di)(__m256i)(a), \
- (long long const *)(m), \
- (__v4di)(__m256i)(i), \
- (__v4di)(__m256i)(mask), (s)))
+#define _mm256_mask_i64gather_epi64(a, m, i, mask, s) \
+ ((__m256i)__builtin_ia32_gatherq_q256( \
+ (__v4di)(__m256i)(a), (long long const *)(m), (__v4di)(__m256i)(i), \
+ (__v4di)(__m256i)(mask), (s)))
/// Gathers two 64-bit floating-point values from memory \a m using scaled
/// indexes from the 128-bit vector of [4 x i32] in \a i.
@@ -4731,13 +4650,10 @@ _mm_srlv_epi64(__m128i __X, __m128i __Y)
/// A literal constant scale factor for the indexes in \a i. Must be
/// 1, 2, 4, or 8.
/// \returns A 128-bit vector of [2 x double] containing the gathered values.
-#define _mm_i32gather_pd(m, i, s) \
- ((__m128d)__builtin_ia32_gatherd_pd((__v2df)_mm_undefined_pd(), \
- (double const *)(m), \
- (__v4si)(__m128i)(i), \
- (__v2df)_mm_cmpeq_pd(_mm_setzero_pd(), \
- _mm_setzero_pd()), \
- (s)))
+#define _mm_i32gather_pd(m, i, s) \
+ ((__m128d)__builtin_ia32_gatherd_pd( \
+ (__v2df)_mm_undefined_pd(), (double const *)(m), (__v4si)(__m128i)(i), \
+ (__v2df)_mm_cmpeq_pd(_mm_setzero_pd(), _mm_setzero_pd()), (s)))
/// Gathers four 64-bit floating-point values from memory \a m using scaled
/// indexes from the 128-bit vector of [4 x i32] in \a i.
@@ -4766,14 +4682,13 @@ _mm_srlv_epi64(__m128i __X, __m128i __Y)
/// A literal constant scale factor for the indexes in \a i. Must be
/// 1, 2, 4, or 8.
/// \returns A 256-bit vector of [4 x double] containing the gathered values.
-#define _mm256_i32gather_pd(m, i, s) \
- ((__m256d)__builtin_ia32_gatherd_pd256((__v4df)_mm256_undefined_pd(), \
- (double const *)(m), \
- (__v4si)(__m128i)(i), \
- (__v4df)_mm256_cmp_pd(_mm256_setzero_pd(), \
- _mm256_setzero_pd(), \
- _CMP_EQ_OQ), \
- (s)))
+#define _mm256_i32gather_pd(m, i, s) \
+ ((__m256d)__builtin_ia32_gatherd_pd256( \
+ (__v4df)_mm256_undefined_pd(), (double const *)(m), \
+ (__v4si)(__m128i)(i), \
+ (__v4df)_mm256_cmp_pd(_mm256_setzero_pd(), _mm256_setzero_pd(), \
+ _CMP_EQ_OQ), \
+ (s)))
/// Gathers two 64-bit floating-point values from memory \a m using scaled
/// indexes from the 128-bit vector of [2 x i64] in \a i.
@@ -4802,13 +4717,10 @@ _mm_srlv_epi64(__m128i __X, __m128i __Y)
/// A literal constant scale factor for the indexes in \a i. Must be
/// 1, 2, 4, or 8.
/// \returns A 128-bit vector of [2 x double] containing the gathered values.
-#define _mm_i64gather_pd(m, i, s) \
- ((__m128d)__builtin_ia32_gatherq_pd((__v2df)_mm_undefined_pd(), \
- (double const *)(m), \
- (__v2di)(__m128i)(i), \
- (__v2df)_mm_cmpeq_pd(_mm_setzero_pd(), \
- _mm_setzero_pd()), \
- (s)))
+#define _mm_i64gather_pd(m, i, s) \
+ ((__m128d)__builtin_ia32_gatherq_pd( \
+ (__v2df)_mm_undefined_pd(), (double const *)(m), (__v2di)(__m128i)(i), \
+ (__v2df)_mm_cmpeq_pd(_mm_setzero_pd(), _mm_setzero_pd()), (s)))
/// Gathers four 64-bit floating-point values from memory \a m using scaled
/// indexes from the 256-bit vector of [4 x i64] in \a i.
@@ -4837,14 +4749,13 @@ _mm_srlv_epi64(__m128i __X, __m128i __Y)
/// A literal constant scale factor for the indexes in \a i. Must be
/// 1, 2, 4, or 8.
/// \returns A 256-bit vector of [4 x double] containing the gathered values.
-#define _mm256_i64gather_pd(m, i, s) \
- ((__m256d)__builtin_ia32_gatherq_pd256((__v4df)_mm256_undefined_pd(), \
- (double const *)(m), \
- (__v4di)(__m256i)(i), \
- (__v4df)_mm256_cmp_pd(_mm256_setzero_pd(), \
- _mm256_setzero_pd(), \
- _CMP_EQ_OQ), \
- (s)))
+#define _mm256_i64gather_pd(m, i, s) \
+ ((__m256d)__builtin_ia32_gatherq_pd256( \
+ (__v4df)_mm256_undefined_pd(), (double const *)(m), \
+ (__v4di)(__m256i)(i), \
+ (__v4df)_mm256_cmp_pd(_mm256_setzero_pd(), _mm256_setzero_pd(), \
+ _CMP_EQ_OQ), \
+ (s)))
/// Gathers four 32-bit floating-point values from memory \a m using scaled
/// indexes from the 128-bit vector of [4 x i32] in \a i.
@@ -4873,13 +4784,10 @@ _mm_srlv_epi64(__m128i __X, __m128i __Y)
/// A literal constant scale factor for the indexes in \a i. Must be
/// 1, 2, 4, or 8.
/// \returns A 128-bit vector of [4 x float] containing the gathered values.
-#define _mm_i32gather_ps(m, i, s) \
- ((__m128)__builtin_ia32_gatherd_ps((__v4sf)_mm_undefined_ps(), \
- (float const *)(m), \
- (__v4si)(__m128i)(i), \
- (__v4sf)_mm_cmpeq_ps(_mm_setzero_ps(), \
- _mm_setzero_ps()), \
- (s)))
+#define _mm_i32gather_ps(m, i, s) \
+ ((__m128)__builtin_ia32_gatherd_ps( \
+ (__v4sf)_mm_undefined_ps(), (float const *)(m), (__v4si)(__m128i)(i), \
+ (__v4sf)_mm_cmpeq_ps(_mm_setzero_ps(), _mm_setzero_ps()), (s)))
/// Gathers eight 32-bit floating-point values from memory \a m using scaled
/// indexes from the 256-bit vector of [8 x i32] in \a i.
@@ -4908,14 +4816,12 @@ _mm_srlv_epi64(__m128i __X, __m128i __Y)
/// A literal constant scale factor for the indexes in \a i. Must be
/// 1, 2, 4, or 8.
/// \returns A 256-bit vector of [8 x float] containing the gathered values.
-#define _mm256_i32gather_ps(m, i, s) \
- ((__m256)__builtin_ia32_gatherd_ps256((__v8sf)_mm256_undefined_ps(), \
- (float const *)(m), \
- (__v8si)(__m256i)(i), \
- (__v8sf)_mm256_cmp_ps(_mm256_setzero_ps(), \
- _mm256_setzero_ps(), \
- _CMP_EQ_OQ), \
- (s)))
+#define _mm256_i32gather_ps(m, i, s) \
+ ((__m256)__builtin_ia32_gatherd_ps256( \
+ (__v8sf)_mm256_undefined_ps(), (float const *)(m), (__v8si)(__m256i)(i), \
+ (__v8sf)_mm256_cmp_ps(_mm256_setzero_ps(), _mm256_setzero_ps(), \
+ _CMP_EQ_OQ), \
+ (s)))
/// Gathers two 32-bit floating-point values from memory \a m using scaled
/// indexes from the 128-bit vector of [2 x i64] in \a i. The upper two
@@ -4946,13 +4852,10 @@ _mm_srlv_epi64(__m128i __X, __m128i __Y)
/// A literal constant scale factor for the indexes in \a i. Must be
/// 1, 2, 4, or 8.
/// \returns A 128-bit vector of [4 x float] containing the gathered values.
-#define _mm_i64gather_ps(m, i, s) \
- ((__m128)__builtin_ia32_gatherq_ps((__v4sf)_mm_undefined_ps(), \
- (float const *)(m), \
- (__v2di)(__m128i)(i), \
- (__v4sf)_mm_cmpeq_ps(_mm_setzero_ps(), \
- _mm_setzero_ps()), \
- (s)))
+#define _mm_i64gather_ps(m, i, s) \
+ ((__m128)__builtin_ia32_gatherq_ps( \
+ (__v4sf)_mm_undefined_ps(), (float const *)(m), (__v2di)(__m128i)(i), \
+ (__v4sf)_mm_cmpeq_ps(_mm_setzero_ps(), _mm_setzero_ps()), (s)))
/// Gathers four 32-bit floating-point values from memory \a m using scaled
/// indexes from the 256-bit vector of [4 x i64] in \a i.
@@ -4981,13 +4884,10 @@ _mm_srlv_epi64(__m128i __X, __m128i __Y)
/// A literal constant scale factor for the indexes in \a i. Must be
/// 1, 2, 4, or 8.
/// \returns A 128-bit vector of [4 x float] containing the gathered values.
-#define _mm256_i64gather_ps(m, i, s) \
- ((__m128)__builtin_ia32_gatherq_ps256((__v4sf)_mm_undefined_ps(), \
- (float const *)(m), \
- (__v4di)(__m256i)(i), \
- (__v4sf)_mm_cmpeq_ps(_mm_setzero_ps(), \
- _mm_setzero_ps()), \
- (s)))
+#define _mm256_i64gather_ps(m, i, s) \
+ ((__m128)__builtin_ia32_gatherq_ps256( \
+ (__v4sf)_mm_undefined_ps(), (float const *)(m), (__v4di)(__m256i)(i), \
+ (__v4sf)_mm_cmpeq_ps(_mm_setzero_ps(), _mm_setzero_ps()), (s)))
/// Gathers four 32-bit floating-point values from memory \a m using scaled
/// indexes from the 128-bit vector of [4 x i32] in \a i.
@@ -5016,9 +4916,9 @@ _mm_srlv_epi64(__m128i __X, __m128i __Y)
/// A literal constant scale factor for the indexes in \a i. Must be
/// 1, 2, 4, or 8.
/// \returns A 128-bit vector of [4 x i32] containing the gathered values.
-#define _mm_i32gather_epi32(m, i, s) \
- ((__m128i)__builtin_ia32_gatherd_d((__v4si)_mm_undefined_si128(), \
- (int const *)(m), (__v4si)(__m128i)(i), \
+#define _mm_i32gather_epi32(m, i, s) \
+ ((__m128i)__builtin_ia32_gatherd_d((__v4si)_mm_undefined_si128(), \
+ (int const *)(m), (__v4si)(__m128i)(i), \
(__v4si)_mm_set1_epi32(-1), (s)))
/// Gathers eight 32-bit floating-point values from memory \a m using scaled
@@ -5048,10 +4948,10 @@ _mm_srlv_epi64(__m128i __X, __m128i __Y)
/// A literal constant scale factor for the indexes in \a i. Must be
/// 1, 2, 4, or 8.
/// \returns A 256-bit vector of [8 x i32] containing the gathered values.
-#define _mm256_i32gather_epi32(m, i, s) \
- ((__m256i)__builtin_ia32_gatherd_d256((__v8si)_mm256_undefined_si256(), \
- (int const *)(m), (__v8si)(__m256i)(i), \
- (__v8si)_mm256_set1_epi32(-1), (s)))
+#define _mm256_i32gather_epi32(m, i, s) \
+ ((__m256i)__builtin_ia32_gatherd_d256( \
+ (__v8si)_mm256_undefined_si256(), (int const *)(m), \
+ (__v8si)(__m256i)(i), (__v8si)_mm256_set1_epi32(-1), (s)))
/// Gathers two 32-bit integer values from memory \a m using scaled indexes
/// from the 128-bit vector of [2 x i64] in \a i. The upper two elements
@@ -5082,9 +4982,9 @@ _mm_srlv_epi64(__m128i __X, __m128i __Y)
/// A literal constant scale factor for the indexes in \a i. Must be
/// 1, 2, 4, or 8.
/// \returns A 128-bit vector of [4 x i32] containing the gathered values.
-#define _mm_i64gather_epi32(m, i, s) \
- ((__m128i)__builtin_ia32_gatherq_d((__v4si)_mm_undefined_si128(), \
- (int const *)(m), (__v2di)(__m128i)(i), \
+#define _mm_i64gather_epi32(m, i, s) \
+ ((__m128i)__builtin_ia32_gatherq_d((__v4si)_mm_undefined_si128(), \
+ (int const *)(m), (__v2di)(__m128i)(i), \
(__v4si)_mm_set1_epi32(-1), (s)))
/// Gathers four 32-bit integer values from memory \a m using scaled indexes
@@ -5114,10 +5014,10 @@ _mm_srlv_epi64(__m128i __X, __m128i __Y)
/// A literal constant scale factor for the indexes in \a i. Must be
/// 1, 2, 4, or 8.
/// \returns A 128-bit vector of [4 x i32] containing the gathered values.
-#define _mm256_i64gather_epi32(m, i, s) \
- ((__m128i)__builtin_ia32_gatherq_d256((__v4si)_mm_undefined_si128(), \
- (int const *)(m), (__v4di)(__m256i)(i), \
- (__v4si)_mm_set1_epi32(-1), (s)))
+#define _mm256_i64gather_epi32(m, i, s) \
+ ((__m128i)__builtin_ia32_gatherq_d256( \
+ (__v4si)_mm_undefined_si128(), (int const *)(m), (__v4di)(__m256i)(i), \
+ (__v4si)_mm_set1_epi32(-1), (s)))
/// Gathers two 64-bit integer values from memory \a m using scaled indexes
/// from the 128-bit vector of [4 x i32] in \a i.
@@ -5147,11 +5047,10 @@ _mm_srlv_epi64(__m128i __X, __m128i __Y)
/// A literal constant scale factor for the indexes in \a i. Must be
/// 1, 2, 4, or 8.
/// \returns A 128-bit vector of [2 x i64] containing the gathered values.
-#define _mm_i32gather_epi64(m, i, s) \
- ((__m128i)__builtin_ia32_gatherd_q((__v2di)_mm_undefined_si128(), \
- (long long const *)(m), \
- (__v4si)(__m128i)(i), \
- (__v2di)_mm_set1_epi64x(-1), (s)))
+#define _mm_i32gather_epi64(m, i, s) \
+ ((__m128i)__builtin_ia32_gatherd_q( \
+ (__v2di)_mm_undefined_si128(), (long long const *)(m), \
+ (__v4si)(__m128i)(i), (__v2di)_mm_set1_epi64x(-1), (s)))
/// Gathers four 64-bit integer values from memory \a m using scaled indexes
/// from the 128-bit vector of [4 x i32] in \a i.
@@ -5180,11 +5079,10 @@ _mm_srlv_epi64(__m128i __X, __m128i __Y)
/// A literal constant scale factor for the indexes in \a i. Must be
/// 1, 2, 4, or 8.
/// \returns A 256-bit vector of [4 x i64] containing the gathered values.
-#define _mm256_i32gather_epi64(m, i, s) \
- ((__m256i)__builtin_ia32_gatherd_q256((__v4di)_mm256_undefined_si256(), \
- (long long const *)(m), \
- (__v4si)(__m128i)(i), \
- (__v4di)_mm256_set1_epi64x(-1), (s)))
+#define _mm256_i32gather_epi64(m, i, s) \
+ ((__m256i)__builtin_ia32_gatherd_q256( \
+ (__v4di)_mm256_undefined_si256(), (long long const *)(m), \
+ (__v4si)(__m128i)(i), (__v4di)_mm256_set1_epi64x(-1), (s)))
/// Gathers two 64-bit integer values from memory \a m using scaled indexes
/// from the 128-bit vector of [2 x i64] in \a i.
@@ -5213,11 +5111,10 @@ _mm_srlv_epi64(__m128i __X, __m128i __Y)
/// A literal constant scale factor for the indexes in \a i. Must be
/// 1, 2, 4, or 8.
/// \returns A 128-bit vector of [2 x i64] containing the gathered values.
-#define _mm_i64gather_epi64(m, i, s) \
- ((__m128i)__builtin_ia32_gatherq_q((__v2di)_mm_undefined_si128(), \
- (long long const *)(m), \
- (__v2di)(__m128i)(i), \
- (__v2di)_mm_set1_epi64x(-1), (s)))
+#define _mm_i64gather_epi64(m, i, s) \
+ ((__m128i)__builtin_ia32_gatherq_q( \
+ (__v2di)_mm_undefined_si128(), (long long const *)(m), \
+ (__v2di)(__m128i)(i), (__v2di)_mm_set1_epi64x(-1), (s)))
/// Gathers four 64-bit integer values from memory \a m using scaled indexes
/// from the 256-bit vector of [4 x i64] in \a i.
@@ -5246,11 +5143,10 @@ _mm_srlv_epi64(__m128i __X, __m128i __Y)
/// A literal constant scale factor for the indexes in \a i. Must be
/// 1, 2, 4, or 8.
/// \returns A 256-bit vector of [4 x i64] containing the gathered values.
-#define _mm256_i64gather_epi64(m, i, s) \
- ((__m256i)__builtin_ia32_gatherq_q256((__v4di)_mm256_undefined_si256(), \
- (long long const *)(m), \
- (__v4di)(__m256i)(i), \
- (__v4di)_mm256_set1_epi64x(-1), (s)))
+#define _mm256_i64gather_epi64(m, i, s) \
+ ((__m256i)__builtin_ia32_gatherq_q256( \
+ (__v4di)_mm256_undefined_si256(), (long long const *)(m), \
+ (__v4di)(__m256i)(i), (__v4di)_mm256_set1_epi64x(-1), (s)))
#undef __DEFAULT_FN_ATTRS256
#undef __DEFAULT_FN_ATTRS128
diff --git a/clang/lib/Headers/emmintrin.h b/clang/lib/Headers/emmintrin.h
index 7f69019e01b06..dbdd4898d900e 100644
--- a/clang/lib/Headers/emmintrin.h
+++ b/clang/lib/Headers/emmintrin.h
@@ -3090,8 +3090,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srl_epi64(__m128i __a,
/// \param __b
/// A 128-bit integer vector.
/// \returns A 128-bit integer vector containing the comparison results.
-static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cmpeq_epi8(__m128i __a,
- __m128i __b) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_cmpeq_epi8(__m128i __a, __m128i __b) {
return (__m128i)((__v16qi)__a == (__v16qi)__b);
}
@@ -3109,8 +3109,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cmpeq_epi8(__m128i __
/// \param __b
/// A 128-bit integer vector.
/// \returns A 128-bit integer vector containing the comparison results.
-static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cmpeq_epi16(__m128i __a,
- __m128i __b) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_cmpeq_epi16(__m128i __a, __m128i __b) {
return (__m128i)((__v8hi)__a == (__v8hi)__b);
}
@@ -3128,8 +3128,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cmpeq_epi16(__m128i _
/// \param __b
/// A 128-bit integer vector.
/// \returns A 128-bit integer vector containing the comparison results.
-static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cmpeq_epi32(__m128i __a,
- __m128i __b) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_cmpeq_epi32(__m128i __a, __m128i __b) {
return (__m128i)((__v4si)__a == (__v4si)__b);
}
@@ -3148,8 +3148,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cmpeq_epi32(__m128i _
/// \param __b
/// A 128-bit integer vector.
/// \returns A 128-bit integer vector containing the comparison results.
-static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cmpgt_epi8(__m128i __a,
- __m128i __b) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_cmpgt_epi8(__m128i __a, __m128i __b) {
/* This function always performs a signed comparison, but __v16qi is a char
which may be signed or unsigned, so use __v16qs. */
return (__m128i)((__v16qs)__a > (__v16qs)__b);
@@ -3170,8 +3170,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cmpgt_epi8(__m128i __
/// \param __b
/// A 128-bit integer vector.
/// \returns A 128-bit integer vector containing the comparison results.
-static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cmpgt_epi16(__m128i __a,
- __m128i __b) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_cmpgt_epi16(__m128i __a, __m128i __b) {
return (__m128i)((__v8hi)__a > (__v8hi)__b);
}
@@ -3190,8 +3190,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cmpgt_epi16(__m128i _
/// \param __b
/// A 128-bit integer vector.
/// \returns A 128-bit integer vector containing the comparison results.
-static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cmpgt_epi32(__m128i __a,
- __m128i __b) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_cmpgt_epi32(__m128i __a, __m128i __b) {
return (__m128i)((__v4si)__a > (__v4si)__b);
}
@@ -3210,8 +3210,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cmpgt_epi32(__m128i _
/// \param __b
/// A 128-bit integer vector.
/// \returns A 128-bit integer vector containing the comparison results.
-static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cmplt_epi8(__m128i __a,
- __m128i __b) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_cmplt_epi8(__m128i __a, __m128i __b) {
return _mm_cmpgt_epi8(__b, __a);
}
@@ -3230,8 +3230,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cmplt_epi8(__m128i __
/// \param __b
/// A 128-bit integer vector.
/// \returns A 128-bit integer vector containing the comparison results.
-static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cmplt_epi16(__m128i __a,
- __m128i __b) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_cmplt_epi16(__m128i __a, __m128i __b) {
return _mm_cmpgt_epi16(__b, __a);
}
@@ -3250,8 +3250,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cmplt_epi16(__m128i _
/// \param __b
/// A 128-bit integer vector.
/// \returns A 128-bit integer vector containing the comparison results.
-static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cmplt_epi32(__m128i __a,
- __m128i __b) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_cmplt_epi32(__m128i __a, __m128i __b) {
return _mm_cmpgt_epi32(__b, __a);
}
diff --git a/clang/lib/Headers/smmintrin.h b/clang/lib/Headers/smmintrin.h
index 3f44c786fb75f..f68dd7ed2bcc9 100644
--- a/clang/lib/Headers/smmintrin.h
+++ b/clang/lib/Headers/smmintrin.h
@@ -1211,8 +1211,8 @@ static __inline__ int __DEFAULT_FN_ATTRS _mm_testnzc_si128(__m128i __M,
/// \param __V2
/// A 128-bit integer vector.
/// \returns A 128-bit integer vector containing the comparison results.
-static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cmpeq_epi64(__m128i __V1,
- __m128i __V2) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_cmpeq_epi64(__m128i __V1, __m128i __V2) {
return (__m128i)((__v2di)__V1 == (__v2di)__V2);
}
@@ -2338,8 +2338,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_minpos_epu16(__m128i __V) {
/// \param __V2
/// A 128-bit integer vector.
/// \returns A 128-bit integer vector containing the comparison results.
-static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cmpgt_epi64(__m128i __V1,
- __m128i __V2) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_cmpgt_epi64(__m128i __V1, __m128i __V2) {
return (__m128i)((__v2di)__V1 > (__v2di)__V2);
}
>From d6c72caab363cf254e8ea899ed76f9f0a024c0ff Mon Sep 17 00:00:00 2001
From: smoke-y <SnowSneeze at protonmail.com>
Date: Thu, 28 Aug 2025 16:57:38 +0530
Subject: [PATCH 4/7] resolved conflict: added CONSTEXPR support back to abs
---
clang/lib/Headers/avx2intrin.h | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/clang/lib/Headers/avx2intrin.h b/clang/lib/Headers/avx2intrin.h
index 7d617e519d7b9..35ca5be1d8cdd 100644
--- a/clang/lib/Headers/avx2intrin.h
+++ b/clang/lib/Headers/avx2intrin.h
@@ -112,7 +112,8 @@
/// \param __a
/// A 256-bit integer vector.
/// \returns A 256-bit integer vector containing the result.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_abs_epi8(__m256i __a) {
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_abs_epi8(__m256i __a) {
return (__m256i)__builtin_elementwise_abs((__v32qs)__a);
}
>From 21fee1bcda3777c16d3e018766fd211322849f45 Mon Sep 17 00:00:00 2001
From: smoke-y <SnowSneeze at protonmail.com>
Date: Thu, 28 Aug 2025 16:59:59 +0530
Subject: [PATCH 5/7] resolved conflict: added CONSTEXPR support back to abs
---
clang/lib/Headers/avx2intrin.h | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/clang/lib/Headers/avx2intrin.h b/clang/lib/Headers/avx2intrin.h
index 35ca5be1d8cdd..26c665eea0c99 100644
--- a/clang/lib/Headers/avx2intrin.h
+++ b/clang/lib/Headers/avx2intrin.h
@@ -128,7 +128,7 @@ _mm256_abs_epi8(__m256i __a) {
/// \param __a
/// A 256-bit vector of [16 x i16].
/// \returns A 256-bit vector of [16 x i16] containing the result.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_abs_epi16(__m256i __a) {
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_abs_epi16(__m256i __a) {
return (__m256i)__builtin_elementwise_abs((__v16hi)__a);
}
@@ -143,7 +143,7 @@ static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_abs_epi16(__m256i __a) {
/// \param __a
/// A 256-bit vector of [8 x i32].
/// \returns A 256-bit vector of [8 x i32] containing the result.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_abs_epi32(__m256i __a) {
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_abs_epi32(__m256i __a) {
return (__m256i)__builtin_elementwise_abs((__v8si)__a);
}
>From 399b18277beffcef85a5bb742284d7e90c3a654e Mon Sep 17 00:00:00 2001
From: smoke-y <SnowSneeze at protonmail.com>
Date: Thu, 28 Aug 2025 17:00:55 +0530
Subject: [PATCH 6/7] formated resolved conflicts
---
clang/lib/Headers/avx2intrin.h | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/clang/lib/Headers/avx2intrin.h b/clang/lib/Headers/avx2intrin.h
index 26c665eea0c99..4b698bf0c6135 100644
--- a/clang/lib/Headers/avx2intrin.h
+++ b/clang/lib/Headers/avx2intrin.h
@@ -128,7 +128,8 @@ _mm256_abs_epi8(__m256i __a) {
/// \param __a
/// A 256-bit vector of [16 x i16].
/// \returns A 256-bit vector of [16 x i16] containing the result.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_abs_epi16(__m256i __a) {
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_abs_epi16(__m256i __a) {
return (__m256i)__builtin_elementwise_abs((__v16hi)__a);
}
@@ -143,7 +144,8 @@ static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_abs_epi16(__m25
/// \param __a
/// A 256-bit vector of [8 x i32].
/// \returns A 256-bit vector of [8 x i32] containing the result.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_abs_epi32(__m256i __a) {
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_abs_epi32(__m256i __a) {
return (__m256i)__builtin_elementwise_abs((__v8si)__a);
}
>From 65004c2c687adafa9d1fb0a9685c647c59585f80 Mon Sep 17 00:00:00 2001
From: smoke-y <SnowSneeze at protonmail.com>
Date: Thu, 28 Aug 2025 17:34:46 +0530
Subject: [PATCH 7/7] formated and changed i->s
---
clang/lib/Headers/avx2intrin.h | 5 ++---
clang/test/CodeGen/X86/avx2-builtins.c | 5 ++++-
clang/test/CodeGen/X86/sse2-builtins.c | 10 ++++++++--
3 files changed, 14 insertions(+), 6 deletions(-)
diff --git a/clang/lib/Headers/avx2intrin.h b/clang/lib/Headers/avx2intrin.h
index 4b698bf0c6135..58297bde14184 100644
--- a/clang/lib/Headers/avx2intrin.h
+++ b/clang/lib/Headers/avx2intrin.h
@@ -97,9 +97,8 @@
/// An unsigned immediate value specifying the starting positions of the
/// bytes to operate on.
/// \returns A 256-bit vector of [16 x i16] containing the result.
-#define _mm256_mpsadbw_epu8(X, Y, M) \
- ((__m256i)__builtin_ia32_mpsadbw256((__v32qi)(__m256i)(X), \
- (__v32qi)(__m256i)(Y), (int)(M)))
+#define _mm256_mpsadbw_epu8(X, Y, M) \
+ ((__m256i)__builtin_ia32_mpsadbw256((__v32qi)(__m256i)(X), \
/// Computes the absolute value of each signed byte in the 256-bit integer
/// vector \a __a and returns each value in the corresponding byte of
diff --git a/clang/test/CodeGen/X86/avx2-builtins.c b/clang/test/CodeGen/X86/avx2-builtins.c
index 7e25da35020c5..1629c20659597 100644
--- a/clang/test/CodeGen/X86/avx2-builtins.c
+++ b/clang/test/CodeGen/X86/avx2-builtins.c
@@ -318,7 +318,10 @@ __m256i test_mm256_cmpgt_epi8(__m256i a, __m256i b) {
// CHECK: icmp sgt <32 x i8>
return _mm256_cmpgt_epi8(a, b);
}
-TEST_CONSTEXPR(match_v32qi(_mm256_cmpgt_epi8((__m256i)(__v32qi){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, (__m256i)(__v32qi){10, 2, 6, 5, 30, 7, 8, 1, 20, 3, 12, 8, 25, 10, 9, 2, 10, 2, 6, 5, 30, 7, 8, 1, 20, 3, 12, 8, 25, 10, 9, 2}), 0, 0, 0, 0, 0, 0, 0, -1, 0, -1, 0, -1, 0, -1, -1, -1, 0, 0, 0, 0, 0, 0, 0, -1, 0, -1, 0, -1, 0, -1, -1, -1));
+TEST_CONSTEXPR(match_v32qi(_mm256_cmpgt_epi8(
+ (__m256i)(__v32qs){1, -2, 3, -4, 5, -6, 7, -8, 9, -10, 11, -12, 13, -14, 15, -16, -1, 2, -3, 4, -5, 6, -7, 8, -9, 10, -11, 12, -13, 14, -15, 16},
+ (__m256i)(__v32qs){10, -2, 6, -5, 30, -7, 8, -1, 20, -3, 12, -8, 25, -10, 9, -2, -10, 2, -6, 5, -30, 7, -8, 1, -20, 3, -12, 8, -25, 10, -9, 2}),
+ 0, 0, 0, -1, 0, -1, 0, 0, 0, 0, 0, 0, 0, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, -1, -1, -1, -1, -1, -1, -1, 0, -1));
__m256i test_mm256_cmpgt_epi16(__m256i a, __m256i b) {
// CHECK-LABEL: test_mm256_cmpgt_epi16
diff --git a/clang/test/CodeGen/X86/sse2-builtins.c b/clang/test/CodeGen/X86/sse2-builtins.c
index 17030d0b7920e..81fe8136220c8 100644
--- a/clang/test/CodeGen/X86/sse2-builtins.c
+++ b/clang/test/CodeGen/X86/sse2-builtins.c
@@ -296,7 +296,10 @@ __m128i test_mm_cmpgt_epi8(__m128i A, __m128i B) {
// CHECK: icmp sgt <16 x i8>
return _mm_cmpgt_epi8(A, B);
}
-TEST_CONSTEXPR(match_v16qi(_mm_cmpgt_epi8((__m128i)(__v16qi){15,2,8,4,12,6,20,8,25,10,30,12,35,14,40,16}, (__m128i)(__v16qi){10,2,6,4,5,12,14,8,9,20,22,12,26,14,30,16}), -1,0,-1,0,-1,0,-1,0,-1,0,-1,0,-1,0,-1,0));
+TEST_CONSTEXPR(match_v16qi(_mm_cmpgt_epi8(
+ (__m128i)(__v16qs){15,-2,8,-4,12,6,-20,8,25,-10,30,12,-35,14,40,-16},
+ (__m128i)(__v16qs){10,-2,6,-4,5,12,-14,8,9,-20,22,12,-26,14,30,-16}),
+ -1, 0, -1, 0, -1, 0, 0, 0,-1, -1, -1, 0, 0, 0, -1, 0));
__m128i test_mm_cmpgt_epi16(__m128i A, __m128i B) {
// CHECK-LABEL: test_mm_cmpgt_epi16
@@ -349,7 +352,10 @@ __m128i test_mm_cmplt_epi8(__m128i A, __m128i B) {
// CHECK: icmp sgt <16 x i8>
return _mm_cmplt_epi8(A, B);
}
-TEST_CONSTEXPR(match_v16qi(_mm_cmplt_epi8((__m128i)(__v16qi){5,2,3,4,1,6,7,8,9,5,11,12,13,10,15,8}, (__m128i)(__v16qi){10,2,6,4,5,12,14,8,9,20,22,12,26,14,30,16}), -1, 0, -1, 0, -1, -1, -1, 0, 0, -1, -1, 0, -1, -1, -1, -1));
+TEST_CONSTEXPR(match_v16qi(_mm_cmplt_epi8(
+ (__m128i)(__v16qs){15,-2,8,-4,12,6,-20,8,25,-10,30,12,-35,14,40,-16},
+ (__m128i)(__v16qs){10,-2,6,-4,5,12,-14,8,9,-20,22,12,-26,14,30,-16}),
+ 0, 0, 0, 0, 0, -1, -1, 0,0, 0, 0, 0, -1, 0, 0, 0));
__m128i test_mm_cmplt_epi16(__m128i A, __m128i B) {
// CHECK-LABEL: test_mm_cmplt_epi16
More information about the cfe-commits
mailing list