[clang] [Headers][X86] Allow SSE2/AVX2/AVX512F/AVX512BW/AVX512DQ integer arithmetic intrinsics to be used in constexpr (PR #157582)
via cfe-commits
cfe-commits at lists.llvm.org
Tue Sep 9 09:22:22 PDT 2025
https://github.com/donneypr updated https://github.com/llvm/llvm-project/pull/157582
>From 0e67c0217dada580d43c30e03180b0977a6cce98 Mon Sep 17 00:00:00 2001
From: donneypr <donatoprabahar at gmail.com>
Date: Mon, 8 Sep 2025 19:54:43 -0400
Subject: [PATCH 1/7] [clang][x86][headers] Make SSE2 add/sub intrinsics
constexpr
---
clang/lib/Headers/emmintrin.h | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/clang/lib/Headers/emmintrin.h b/clang/lib/Headers/emmintrin.h
index a366e0df407a9..c99c85f26c6d1 100644
--- a/clang/lib/Headers/emmintrin.h
+++ b/clang/lib/Headers/emmintrin.h
@@ -2060,7 +2060,7 @@ static __inline__ void __DEFAULT_FN_ATTRS _mm_storel_pd(double *__dp,
/// A 128-bit vector of [16 x i8].
/// \returns A 128-bit vector of [16 x i8] containing the sums of both
/// parameters.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_add_epi8(__m128i __a,
+static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_add_epi8_CONSTEXPR(__m128i __a,
__m128i __b) {
return (__m128i)((__v16qu)__a + (__v16qu)__b);
}
@@ -2081,7 +2081,7 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_add_epi8(__m128i __a,
/// A 128-bit vector of [8 x i16].
/// \returns A 128-bit vector of [8 x i16] containing the sums of both
/// parameters.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_add_epi16(__m128i __a,
+static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_add_epi16_CONSTEXPR(__m128i __a,
__m128i __b) {
return (__m128i)((__v8hu)__a + (__v8hu)__b);
}
@@ -2499,7 +2499,7 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sad_epu8(__m128i __a,
/// A 128-bit integer vector containing the subtrahends.
/// \returns A 128-bit integer vector containing the differences of the values
/// in the operands.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sub_epi8(__m128i __a,
+static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sub_epi8_CONSTEXPR(__m128i __a,
__m128i __b) {
return (__m128i)((__v16qu)__a - (__v16qu)__b);
}
@@ -2516,7 +2516,7 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sub_epi8(__m128i __a,
/// A 128-bit integer vector containing the subtrahends.
/// \returns A 128-bit integer vector containing the differences of the values
/// in the operands.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sub_epi16(__m128i __a,
+static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sub_epi16_CONSTEXPR(__m128i __a,
__m128i __b) {
return (__m128i)((__v8hu)__a - (__v8hu)__b);
}
>From 6f109906308620d8674cc7e69c3cee09495fc34f Mon Sep 17 00:00:00 2001
From: donneypr <donatoprabahar at gmail.com>
Date: Mon, 8 Sep 2025 20:05:11 -0400
Subject: [PATCH 2/7] [clang][x86][headers] Make AVX2 add/sub intrinsics
constexpr
---
clang/lib/Headers/avx2intrin.h | 16 ++++++++--------
1 file changed, 8 insertions(+), 8 deletions(-)
diff --git a/clang/lib/Headers/avx2intrin.h b/clang/lib/Headers/avx2intrin.h
index 2cacdc3c4596c..5c8c2996229c6 100644
--- a/clang/lib/Headers/avx2intrin.h
+++ b/clang/lib/Headers/avx2intrin.h
@@ -279,7 +279,7 @@ _mm256_packus_epi32(__m256i __V1, __m256i __V2)
/// \param __b
/// A 256-bit integer vector containing one of the source operands.
/// \returns A 256-bit integer vector containing the sums.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_add_epi8(__m256i __a, __m256i __b)
{
return (__m256i)((__v32qu)__a + (__v32qu)__b);
@@ -298,7 +298,7 @@ _mm256_add_epi8(__m256i __a, __m256i __b)
/// \param __b
/// A 256-bit vector of [16 x i16] containing one of the source operands.
/// \returns A 256-bit vector of [16 x i16] containing the sums.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_add_epi16(__m256i __a, __m256i __b)
{
return (__m256i)((__v16hu)__a + (__v16hu)__b);
@@ -317,7 +317,7 @@ _mm256_add_epi16(__m256i __a, __m256i __b)
/// \param __b
/// A 256-bit vector of [8 x i32] containing one of the source operands.
/// \returns A 256-bit vector of [8 x i32] containing the sums.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_add_epi32(__m256i __a, __m256i __b)
{
return (__m256i)((__v8su)__a + (__v8su)__b);
@@ -336,7 +336,7 @@ _mm256_add_epi32(__m256i __a, __m256i __b)
/// \param __b
/// A 256-bit vector of [4 x i64] containing one of the source operands.
/// \returns A 256-bit vector of [4 x i64] containing the sums.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_add_epi64(__m256i __a, __m256i __b)
{
return (__m256i)((__v4du)__a + (__v4du)__b);
@@ -2462,7 +2462,7 @@ _mm256_srl_epi64(__m256i __a, __m128i __count)
/// \param __b
/// A 256-bit integer vector containing the subtrahends.
/// \returns A 256-bit integer vector containing the differences.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_sub_epi8(__m256i __a, __m256i __b)
{
return (__m256i)((__v32qu)__a - (__v32qu)__b);
@@ -2489,7 +2489,7 @@ _mm256_sub_epi8(__m256i __a, __m256i __b)
/// \param __b
/// A 256-bit vector of [16 x i16] containing the subtrahends.
/// \returns A 256-bit vector of [16 x i16] containing the differences.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_sub_epi16(__m256i __a, __m256i __b)
{
return (__m256i)((__v16hu)__a - (__v16hu)__b);
@@ -2515,7 +2515,7 @@ _mm256_sub_epi16(__m256i __a, __m256i __b)
/// \param __b
/// A 256-bit vector of [8 x i32] containing the subtrahends.
/// \returns A 256-bit vector of [8 x i32] containing the differences.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_sub_epi32(__m256i __a, __m256i __b)
{
return (__m256i)((__v8su)__a - (__v8su)__b);
@@ -2541,7 +2541,7 @@ _mm256_sub_epi32(__m256i __a, __m256i __b)
/// \param __b
/// A 256-bit vector of [4 x i64] containing the subtrahends.
/// \returns A 256-bit vector of [4 x i64] containing the differences.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_sub_epi64(__m256i __a, __m256i __b)
{
return (__m256i)((__v4du)__a - (__v4du)__b);
>From 94e4e4d6ece5b24d9d494f1b0874c179bacb1b5c Mon Sep 17 00:00:00 2001
From: donneypr <donatoprabahar at gmail.com>
Date: Mon, 8 Sep 2025 20:19:28 -0400
Subject: [PATCH 3/7] [clang][x86][headers] Make AVX-512 epi64 add/sub
intrinsics constexpr Fixes #152490
---
clang/lib/Headers/avx512fintrin.h | 14 +++++++-------
1 file changed, 7 insertions(+), 7 deletions(-)
diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h
index 67499fd83a089..1d3aeb284b00c 100644
--- a/clang/lib/Headers/avx512fintrin.h
+++ b/clang/lib/Headers/avx512fintrin.h
@@ -859,7 +859,7 @@ _mm512_add_epi64(__m512i __A, __m512i __B) {
return (__m512i) ((__v8du) __A + (__v8du) __B);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_mask_add_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
{
return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
@@ -867,7 +867,7 @@ _mm512_mask_add_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
(__v8di)__W);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_maskz_add_epi64(__mmask8 __U, __m512i __A, __m512i __B)
{
return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
@@ -875,13 +875,13 @@ _mm512_maskz_add_epi64(__mmask8 __U, __m512i __A, __m512i __B)
(__v8di)_mm512_setzero_si512());
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_sub_epi64 (__m512i __A, __m512i __B)
{
return (__m512i) ((__v8du) __A - (__v8du) __B);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_mask_sub_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
{
return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
@@ -889,7 +889,7 @@ _mm512_mask_sub_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
(__v8di)__W);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_maskz_sub_epi64(__mmask8 __U, __m512i __A, __m512i __B)
{
return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
@@ -897,7 +897,7 @@ _mm512_maskz_sub_epi64(__mmask8 __U, __m512i __A, __m512i __B)
(__v8di)_mm512_setzero_si512());
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_add_epi32 (__m512i __A, __m512i __B)
{
return (__m512i) ((__v16su) __A + (__v16su) __B);
@@ -919,7 +919,7 @@ _mm512_maskz_add_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
(__v16si)_mm512_setzero_si512());
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_sub_epi32 (__m512i __A, __m512i __B)
{
return (__m512i) ((__v16su) __A - (__v16su) __B);
>From b21b4ffb4bd30624d4891f068002bccfe01ec5bc Mon Sep 17 00:00:00 2001
From: donneypr <donatoprabahar at gmail.com>
Date: Mon, 8 Sep 2025 21:48:53 -0400
Subject: [PATCH 4/7] [clang][x86][headers] Make AVX-512F masked epi32 add/sub
constexpr Fixes llvm/llvm-project#152490
---
clang/lib/Headers/avx512fintrin.h | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h
index 1d3aeb284b00c..6dc70b54f2fd9 100644
--- a/clang/lib/Headers/avx512fintrin.h
+++ b/clang/lib/Headers/avx512fintrin.h
@@ -903,7 +903,7 @@ _mm512_add_epi32 (__m512i __A, __m512i __B)
return (__m512i) ((__v16su) __A + (__v16su) __B);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_mask_add_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
{
return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
@@ -911,7 +911,7 @@ _mm512_mask_add_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
(__v16si)__W);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_maskz_add_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
{
return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
@@ -925,7 +925,7 @@ _mm512_sub_epi32 (__m512i __A, __m512i __B)
return (__m512i) ((__v16su) __A - (__v16su) __B);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_mask_sub_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
{
return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
@@ -933,7 +933,7 @@ _mm512_mask_sub_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
(__v16si)__W);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_maskz_sub_epi32(__mmask16 __U, __m512i __A, __m512i __B)
{
return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
>From 19d2c77b9afa4a51d43ce3beabb1f92185b1d0f0 Mon Sep 17 00:00:00 2001
From: donneypr <donatoprabahar at gmail.com>
Date: Mon, 8 Sep 2025 21:53:07 -0400
Subject: [PATCH 5/7] [clang][x86][headers] Make AVX-512BW masked add/sub
(epi8/epi16) constexpr
Fixes llvm/llvm-project#152490.
---
clang/lib/Headers/avx512bwintrin.h | 24 ++++++++++++------------
1 file changed, 12 insertions(+), 12 deletions(-)
diff --git a/clang/lib/Headers/avx512bwintrin.h b/clang/lib/Headers/avx512bwintrin.h
index 31e0a2242240c..cfb420f9ac1f9 100644
--- a/clang/lib/Headers/avx512bwintrin.h
+++ b/clang/lib/Headers/avx512bwintrin.h
@@ -369,76 +369,76 @@ static __inline__ void __DEFAULT_FN_ATTRS _store_mask64(__mmask64 *__A,
#define _mm512_mask_cmpneq_epu16_mask(k, A, B) \
_mm512_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_NE)
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_add_epi8 (__m512i __A, __m512i __B) {
return (__m512i) ((__v64qu) __A + (__v64qu) __B);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_mask_add_epi8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) {
return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
(__v64qi)_mm512_add_epi8(__A, __B),
(__v64qi)__W);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_maskz_add_epi8(__mmask64 __U, __m512i __A, __m512i __B) {
return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
(__v64qi)_mm512_add_epi8(__A, __B),
(__v64qi)_mm512_setzero_si512());
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_sub_epi8 (__m512i __A, __m512i __B) {
return (__m512i) ((__v64qu) __A - (__v64qu) __B);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_mask_sub_epi8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) {
return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
(__v64qi)_mm512_sub_epi8(__A, __B),
(__v64qi)__W);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_maskz_sub_epi8(__mmask64 __U, __m512i __A, __m512i __B) {
return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
(__v64qi)_mm512_sub_epi8(__A, __B),
(__v64qi)_mm512_setzero_si512());
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_add_epi16 (__m512i __A, __m512i __B) {
return (__m512i) ((__v32hu) __A + (__v32hu) __B);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_mask_add_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) {
return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
(__v32hi)_mm512_add_epi16(__A, __B),
(__v32hi)__W);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_maskz_add_epi16(__mmask32 __U, __m512i __A, __m512i __B) {
return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
(__v32hi)_mm512_add_epi16(__A, __B),
(__v32hi)_mm512_setzero_si512());
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_sub_epi16 (__m512i __A, __m512i __B) {
return (__m512i) ((__v32hu) __A - (__v32hu) __B);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_mask_sub_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) {
return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
(__v32hi)_mm512_sub_epi16(__A, __B),
(__v32hi)__W);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_maskz_sub_epi16(__mmask32 __U, __m512i __A, __m512i __B) {
return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
(__v32hi)_mm512_sub_epi16(__A, __B),
>From 80edfc4ee1984b065ec3cc17090a7bbd419bb7e3 Mon Sep 17 00:00:00 2001
From: donneypr <donatoprabahar at gmail.com>
Date: Tue, 9 Sep 2025 07:51:26 -0400
Subject: [PATCH 6/7] clang-format: touched lines in x86 add/sub headers for
#152490
---
clang/lib/Headers/avx2intrin.h | 24 ++++++++--------------
clang/lib/Headers/avx512bwintrin.h | 10 ++++-----
clang/lib/Headers/avx512fintrin.h | 33 ++++++++++--------------------
clang/lib/Headers/emmintrin.h | 16 +++++++--------
4 files changed, 32 insertions(+), 51 deletions(-)
diff --git a/clang/lib/Headers/avx2intrin.h b/clang/lib/Headers/avx2intrin.h
index 5c8c2996229c6..fdd464e0195c8 100644
--- a/clang/lib/Headers/avx2intrin.h
+++ b/clang/lib/Headers/avx2intrin.h
@@ -280,8 +280,7 @@ _mm256_packus_epi32(__m256i __V1, __m256i __V2)
/// A 256-bit integer vector containing one of the source operands.
/// \returns A 256-bit integer vector containing the sums.
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
-_mm256_add_epi8(__m256i __a, __m256i __b)
-{
+_mm256_add_epi8(__m256i __a, __m256i __b) {
return (__m256i)((__v32qu)__a + (__v32qu)__b);
}
@@ -299,8 +298,7 @@ _mm256_add_epi8(__m256i __a, __m256i __b)
/// A 256-bit vector of [16 x i16] containing one of the source operands.
/// \returns A 256-bit vector of [16 x i16] containing the sums.
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
-_mm256_add_epi16(__m256i __a, __m256i __b)
-{
+_mm256_add_epi16(__m256i __a, __m256i __b) {
return (__m256i)((__v16hu)__a + (__v16hu)__b);
}
@@ -318,8 +316,7 @@ _mm256_add_epi16(__m256i __a, __m256i __b)
/// A 256-bit vector of [8 x i32] containing one of the source operands.
/// \returns A 256-bit vector of [8 x i32] containing the sums.
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
-_mm256_add_epi32(__m256i __a, __m256i __b)
-{
+_mm256_add_epi32(__m256i __a, __m256i __b) {
return (__m256i)((__v8su)__a + (__v8su)__b);
}
@@ -337,8 +334,7 @@ _mm256_add_epi32(__m256i __a, __m256i __b)
/// A 256-bit vector of [4 x i64] containing one of the source operands.
/// \returns A 256-bit vector of [4 x i64] containing the sums.
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
-_mm256_add_epi64(__m256i __a, __m256i __b)
-{
+_mm256_add_epi64(__m256i __a, __m256i __b) {
return (__m256i)((__v4du)__a + (__v4du)__b);
}
@@ -2463,8 +2459,7 @@ _mm256_srl_epi64(__m256i __a, __m128i __count)
/// A 256-bit integer vector containing the subtrahends.
/// \returns A 256-bit integer vector containing the differences.
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
-_mm256_sub_epi8(__m256i __a, __m256i __b)
-{
+_mm256_sub_epi8(__m256i __a, __m256i __b) {
return (__m256i)((__v32qu)__a - (__v32qu)__b);
}
@@ -2490,8 +2485,7 @@ _mm256_sub_epi8(__m256i __a, __m256i __b)
/// A 256-bit vector of [16 x i16] containing the subtrahends.
/// \returns A 256-bit vector of [16 x i16] containing the differences.
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
-_mm256_sub_epi16(__m256i __a, __m256i __b)
-{
+_mm256_sub_epi16(__m256i __a, __m256i __b) {
return (__m256i)((__v16hu)__a - (__v16hu)__b);
}
@@ -2516,8 +2510,7 @@ _mm256_sub_epi16(__m256i __a, __m256i __b)
/// A 256-bit vector of [8 x i32] containing the subtrahends.
/// \returns A 256-bit vector of [8 x i32] containing the differences.
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
-_mm256_sub_epi32(__m256i __a, __m256i __b)
-{
+_mm256_sub_epi32(__m256i __a, __m256i __b) {
return (__m256i)((__v8su)__a - (__v8su)__b);
}
@@ -2542,8 +2535,7 @@ _mm256_sub_epi32(__m256i __a, __m256i __b)
/// A 256-bit vector of [4 x i64] containing the subtrahends.
/// \returns A 256-bit vector of [4 x i64] containing the differences.
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
-_mm256_sub_epi64(__m256i __a, __m256i __b)
-{
+_mm256_sub_epi64(__m256i __a, __m256i __b) {
return (__m256i)((__v4du)__a - (__v4du)__b);
}
diff --git a/clang/lib/Headers/avx512bwintrin.h b/clang/lib/Headers/avx512bwintrin.h
index cfb420f9ac1f9..93d8b085900e7 100644
--- a/clang/lib/Headers/avx512bwintrin.h
+++ b/clang/lib/Headers/avx512bwintrin.h
@@ -369,8 +369,8 @@ static __inline__ void __DEFAULT_FN_ATTRS _store_mask64(__mmask64 *__A,
#define _mm512_mask_cmpneq_epu16_mask(k, A, B) \
_mm512_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_NE)
-static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
-_mm512_add_epi8 (__m512i __A, __m512i __B) {
+static __inline__ __m512i
+ __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_add_epi8(__m512i __A, __m512i __B) {
return (__m512i) ((__v64qu) __A + (__v64qu) __B);
}
@@ -389,7 +389,7 @@ _mm512_maskz_add_epi8(__mmask64 __U, __m512i __A, __m512i __B) {
}
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
-_mm512_sub_epi8 (__m512i __A, __m512i __B) {
+_mm512_sub_epi8(__m512i __A, __m512i __B) {
return (__m512i) ((__v64qu) __A - (__v64qu) __B);
}
@@ -408,7 +408,7 @@ _mm512_maskz_sub_epi8(__mmask64 __U, __m512i __A, __m512i __B) {
}
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
-_mm512_add_epi16 (__m512i __A, __m512i __B) {
+_mm512_add_epi16(__m512i __A, __m512i __B) {
return (__m512i) ((__v32hu) __A + (__v32hu) __B);
}
@@ -427,7 +427,7 @@ _mm512_maskz_add_epi16(__mmask32 __U, __m512i __A, __m512i __B) {
}
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
-_mm512_sub_epi16 (__m512i __A, __m512i __B) {
+_mm512_sub_epi16(__m512i __A, __m512i __B) {
return (__m512i) ((__v32hu) __A - (__v32hu) __B);
}
diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h
index 6dc70b54f2fd9..f53bd13f21776 100644
--- a/clang/lib/Headers/avx512fintrin.h
+++ b/clang/lib/Headers/avx512fintrin.h
@@ -860,82 +860,71 @@ _mm512_add_epi64(__m512i __A, __m512i __B) {
}
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
-_mm512_mask_add_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
-{
+_mm512_mask_add_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) {
return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
(__v8di)_mm512_add_epi64(__A, __B),
(__v8di)__W);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
-_mm512_maskz_add_epi64(__mmask8 __U, __m512i __A, __m512i __B)
-{
+_mm512_maskz_add_epi64(__mmask8 __U, __m512i __A, __m512i __B) {
return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
(__v8di)_mm512_add_epi64(__A, __B),
(__v8di)_mm512_setzero_si512());
}
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
-_mm512_sub_epi64 (__m512i __A, __m512i __B)
-{
+_mm512_sub_epi64(__m512i __A, __m512i __B) {
return (__m512i) ((__v8du) __A - (__v8du) __B);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
-_mm512_mask_sub_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
-{
+_mm512_mask_sub_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) {
return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
(__v8di)_mm512_sub_epi64(__A, __B),
(__v8di)__W);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
-_mm512_maskz_sub_epi64(__mmask8 __U, __m512i __A, __m512i __B)
-{
+_mm512_maskz_sub_epi64(__mmask8 __U, __m512i __A, __m512i __B) {
return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
(__v8di)_mm512_sub_epi64(__A, __B),
(__v8di)_mm512_setzero_si512());
}
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
-_mm512_add_epi32 (__m512i __A, __m512i __B)
-{
+_mm512_add_epi32(__m512i __A, __m512i __B) {
return (__m512i) ((__v16su) __A + (__v16su) __B);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
-_mm512_mask_add_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
-{
+_mm512_mask_add_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) {
return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
(__v16si)_mm512_add_epi32(__A, __B),
(__v16si)__W);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
-_mm512_maskz_add_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
-{
+_mm512_maskz_add_epi32(__mmask16 __U, __m512i __A, __m512i __B) {
return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
(__v16si)_mm512_add_epi32(__A, __B),
(__v16si)_mm512_setzero_si512());
}
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
-_mm512_sub_epi32 (__m512i __A, __m512i __B)
-{
+_mm512_sub_epi32(__m512i __A, __m512i __B) {
return (__m512i) ((__v16su) __A - (__v16su) __B);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
-_mm512_mask_sub_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
-{
+_mm512_mask_sub_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) {
return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
(__v16si)_mm512_sub_epi32(__A, __B),
(__v16si)__W);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
-_mm512_maskz_sub_epi32(__mmask16 __U, __m512i __A, __m512i __B)
-{
+_mm512_maskz_sub_epi32(__mmask16 __U, __m512i __A, __m512i __B) {
return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
(__v16si)_mm512_sub_epi32(__A, __B),
(__v16si)_mm512_setzero_si512());
diff --git a/clang/lib/Headers/emmintrin.h b/clang/lib/Headers/emmintrin.h
index c99c85f26c6d1..3999c2e242410 100644
--- a/clang/lib/Headers/emmintrin.h
+++ b/clang/lib/Headers/emmintrin.h
@@ -2060,8 +2060,8 @@ static __inline__ void __DEFAULT_FN_ATTRS _mm_storel_pd(double *__dp,
/// A 128-bit vector of [16 x i8].
/// \returns A 128-bit vector of [16 x i8] containing the sums of both
/// parameters.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_add_epi8_CONSTEXPR(__m128i __a,
- __m128i __b) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_add_epi8_CONSTEXPR(__m128i __a, __m128i __b) {
return (__m128i)((__v16qu)__a + (__v16qu)__b);
}
@@ -2081,8 +2081,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_add_epi8_CONSTEXPR(__m128i __a,
/// A 128-bit vector of [8 x i16].
/// \returns A 128-bit vector of [8 x i16] containing the sums of both
/// parameters.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_add_epi16_CONSTEXPR(__m128i __a,
- __m128i __b) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_add_epi16_CONSTEXPR(__m128i __a, __m128i __b) {
return (__m128i)((__v8hu)__a + (__v8hu)__b);
}
@@ -2499,8 +2499,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sad_epu8(__m128i __a,
/// A 128-bit integer vector containing the subtrahends.
/// \returns A 128-bit integer vector containing the differences of the values
/// in the operands.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sub_epi8_CONSTEXPR(__m128i __a,
- __m128i __b) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_sub_epi8_CONSTEXPR(__m128i __a, __m128i __b) {
return (__m128i)((__v16qu)__a - (__v16qu)__b);
}
@@ -2516,8 +2516,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sub_epi8_CONSTEXPR(__m128i __a,
/// A 128-bit integer vector containing the subtrahends.
/// \returns A 128-bit integer vector containing the differences of the values
/// in the operands.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sub_epi16_CONSTEXPR(__m128i __a,
- __m128i __b) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_sub_epi16_CONSTEXPR(__m128i __a, __m128i __b) {
return (__m128i)((__v8hu)__a - (__v8hu)__b);
}
>From 5b0b590f848b630fd1f44abd3958d1a723d126ef Mon Sep 17 00:00:00 2001
From: donneypr <donatoprabahar at gmail.com>
Date: Tue, 9 Sep 2025 12:21:56 -0400
Subject: [PATCH 7/7] X86: add constexpr tests for add/sub intrinsics
---
clang/test/CodeGen/X86/avx2-builtins.c | 66 ++++++++++++++++++
clang/test/CodeGen/X86/avx512bw-builtins.c | 60 ++++++++++++++++
clang/test/CodeGen/X86/avx512f-builtins.c | 80 ++++++++++++++++++++++
clang/test/CodeGen/X86/sse2-builtins.c | 28 ++++++++
4 files changed, 234 insertions(+)
diff --git a/clang/test/CodeGen/X86/avx2-builtins.c b/clang/test/CodeGen/X86/avx2-builtins.c
index 724a5f693f9fe..ddbfd93b7f5f6 100644
--- a/clang/test/CodeGen/X86/avx2-builtins.c
+++ b/clang/test/CodeGen/X86/avx2-builtins.c
@@ -48,24 +48,57 @@ __m256i test_mm256_add_epi8(__m256i a, __m256i b) {
return _mm256_add_epi8(a, b);
}
+TEST_CONSTEXPR(
+ match_v32qi(
+ _mm256_add_epi8(
+ (__m256i)(__v32qi){
+ 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,
+ 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31 },
+ (__m256i)(__v32qi){
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1 }),
+ 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,
+ 17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32));
+
__m256i test_mm256_add_epi16(__m256i a, __m256i b) {
// CHECK-LABEL: test_mm256_add_epi16
// CHECK: add <16 x i16>
return _mm256_add_epi16(a, b);
}
+TEST_CONSTEXPR(
+ match_v16hi(
+ _mm256_add_epi16(
+ (__m256i)(__v16hi){ 0, 2, 4, 6, 8,10,12,14, 16,18,20,22,24,26,28,30 },
+ (__m256i)(__v16hi){ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }),
+ 1, 3, 5, 7, 9,11,13,15, 17,19,21,23,25,27,29,31));
+
__m256i test_mm256_add_epi32(__m256i a, __m256i b) {
// CHECK-LABEL: test_mm256_add_epi32
// CHECK: add <8 x i32>
return _mm256_add_epi32(a, b);
}
+TEST_CONSTEXPR(
+ match_v8si(
+ _mm256_add_epi32(
+ (__m256i)(__v8si){1,2,3,4,5,6,7,8},
+ (__m256i)(__v8si){8,7,6,5,4,3,2,1}),
+ 9,9,9,9,9,9,9,9));
+
__m256i test_mm256_add_epi64(__m256i a, __m256i b) {
// CHECK-LABEL: test_mm256_add_epi64
// CHECK: add <4 x i64>
return _mm256_add_epi64(a, b);
}
+TEST_CONSTEXPR(
+ match_v4di(
+ _mm256_add_epi64(
+ (__m256i)(__v4di){10,20,30,40},
+ (__m256i)(__v4di){ 1, 3, 5, 7 }),
+ 11,23,35,47));
+
__m256i test_mm256_adds_epi8(__m256i a, __m256i b) {
// CHECK-LABEL: test_mm256_adds_epi8
// CHECK: call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> %{{.*}}, <32 x i8> %{{.*}})
@@ -1358,24 +1391,57 @@ __m256i test_mm256_sub_epi8(__m256i a, __m256i b) {
return _mm256_sub_epi8(a, b);
}
+TEST_CONSTEXPR(
+ match_v32qi(
+ _mm256_sub_epi8(
+ (__m256i)(__v32qi){
+ 0,1,2,3,4,5,6,7, 8, 9,10,11,12,13,14,15,
+ 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31 },
+ (__m256i)(__v32qi){
+ 1,1,1,1,1,1,1,1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1,1,1,1,1,1,1,1, 1, 1, 1, 1, 1, 1, 1, 1 }),
+ -1,0,1,2,3,4,5,6, 7, 8, 9,10,11,12,13,14,
+ 15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30));
+
__m256i test_mm256_sub_epi16(__m256i a, __m256i b) {
// CHECK-LABEL: test_mm256_sub_epi16
// CHECK: sub <16 x i16>
return _mm256_sub_epi16(a, b);
}
+TEST_CONSTEXPR(
+ match_v16hi(
+ _mm256_sub_epi16(
+ (__m256i)(__v16hi){ 0, 2, 4, 6, 8,10,12,14, 16,18,20,22,24,26,28,30 },
+ (__m256i)(__v16hi){ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }),
+ -1, 1, 3, 5, 7, 9,11,13, 15,17,19,21,23,25,27,29));
+
__m256i test_mm256_sub_epi32(__m256i a, __m256i b) {
// CHECK-LABEL: test_mm256_sub_epi32
// CHECK: sub <8 x i32>
return _mm256_sub_epi32(a, b);
}
+TEST_CONSTEXPR(
+ match_v8si(
+ _mm256_sub_epi32(
+ (__m256i)(__v8si){10,20,30,40,50,60,70,80},
+ (__m256i)(__v8si){ 1, 2, 3, 4, 5, 6, 7, 8}),
+ 9,18,27,36,45,54,63,72));
+
__m256i test_mm256_sub_epi64(__m256i a, __m256i b) {
// CHECK-LABEL: test_mm256_sub_epi64
// CHECK: sub <4 x i64>
return _mm256_sub_epi64(a, b);
}
+TEST_CONSTEXPR(
+ match_v4di(
+ _mm256_sub_epi64(
+ (__m256i)(__v4di){10,20,30,40},
+ (__m256i)(__v4di){ 1, 3, 5, 7 }),
+ 9,17,25,33));
+
__m256i test_mm256_subs_epi8(__m256i a, __m256i b) {
// CHECK-LABEL: test_mm256_subs_epi8
// CHECK: call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> %{{.*}}, <32 x i8> %{{.*}})
diff --git a/clang/test/CodeGen/X86/avx512bw-builtins.c b/clang/test/CodeGen/X86/avx512bw-builtins.c
index 1f67a9e4d2e53..96a5a6438e004 100644
--- a/clang/test/CodeGen/X86/avx512bw-builtins.c
+++ b/clang/test/CodeGen/X86/avx512bw-builtins.c
@@ -752,6 +752,24 @@ __m512i test_mm512_add_epi8 (__m512i __A, __m512i __B) {
return _mm512_add_epi8(__A,__B);
}
+TEST_CONSTEXPR(
+ match_v64qi(
+ _mm512_add_epi8(
+ (__m512i)(__v64qi){
+ 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,
+ 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,
+ 32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,
+ 48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63 },
+ (__m512i)(__v64qi){
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1 }),
+ 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,
+ 17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,
+ 33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,
+ 49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64));
+
__m512i test_mm512_mask_add_epi8 (__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) {
//CHECK-LABEL: test_mm512_mask_add_epi8
//CHECK: add <64 x i8> %{{.*}}, %{{.*}}
@@ -772,6 +790,24 @@ __m512i test_mm512_sub_epi8 (__m512i __A, __m512i __B) {
return _mm512_sub_epi8(__A, __B);
}
+TEST_CONSTEXPR(
+ match_v64qi(
+ _mm512_sub_epi8(
+ (__m512i)(__v64qi){
+ 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,
+ 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,
+ 32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,
+ 48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63 },
+ (__m512i)(__v64qi){
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1 }),
+ -1,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,
+ 15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,
+ 31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,
+ 47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62));
+
__m512i test_mm512_mask_sub_epi8 (__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) {
//CHECK-LABEL: test_mm512_mask_sub_epi8
//CHECK: sub <64 x i8> %{{.*}}, %{{.*}}
@@ -792,6 +828,18 @@ __m512i test_mm512_add_epi16 (__m512i __A, __m512i __B) {
return _mm512_add_epi16(__A, __B);
}
+TEST_CONSTEXPR(
+ match_v32hi(
+ _mm512_add_epi16(
+ (__m512i)(__v32hi){
+ 0, 2, 4, 6, 8,10,12,14, 16,18,20,22,24,26,28,30,
+ 32,34,36,38,40,42,44,46, 48,50,52,54,56,58,60,62 },
+ (__m512i)(__v32hi){
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }),
+ 1, 3, 5, 7, 9,11,13,15, 17,19,21,23,25,27,29,31,
+ 33,35,37,39,41,43,45,47, 49,51,53,55,57,59,61,63));
+
__m512i test_mm512_mask_add_epi16 (__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) {
//CHECK-LABEL: test_mm512_mask_add_epi16
//CHECK: add <32 x i16> %{{.*}}, %{{.*}}
@@ -812,6 +860,18 @@ __m512i test_mm512_sub_epi16 (__m512i __A, __m512i __B) {
return _mm512_sub_epi16(__A, __B);
}
+TEST_CONSTEXPR(
+ match_v32hi(
+ _mm512_sub_epi16(
+ (__m512i)(__v32hi){
+ 0, 2, 4, 6, 8,10,12,14, 16,18,20,22,24,26,28,30,
+ 32,34,36,38,40,42,44,46, 48,50,52,54,56,58,60,62 },
+ (__m512i)(__v32hi){
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }),
+ -1, 1, 3, 5, 7, 9,11,13, 15,17,19,21,23,25,27,29,
+ 31,33,35,37,39,41,43,45, 47,49,51,53,55,57,59,61));
+
__m512i test_mm512_mask_sub_epi16 (__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) {
//CHECK-LABEL: test_mm512_mask_sub_epi16
//CHECK: sub <32 x i16> %{{.*}}, %{{.*}}
diff --git a/clang/test/CodeGen/X86/avx512f-builtins.c b/clang/test/CodeGen/X86/avx512f-builtins.c
index 10d464c429a43..50c59a4edd9f5 100644
--- a/clang/test/CodeGen/X86/avx512f-builtins.c
+++ b/clang/test/CodeGen/X86/avx512f-builtins.c
@@ -3006,6 +3006,14 @@ __m512i test_mm512_maskz_sub_epi32 (__mmask16 __k,__m512i __A, __m512i __B) {
return _mm512_maskz_sub_epi32(__k,__A,__B);
}
+TEST_CONSTEXPR(
+ match_v16si(
+ _mm512_maskz_sub_epi32(
+ K,
+ (__m512i)(__v16si){10,11,12,13,14,15,16,17, 100,200,300,400,500,600,700,800},
+ (__m512i)(__v16si){ 1, 2, 3, 4, 5, 6, 7, 8, 9, 8, 7, 6, 5, 4, 3, 2}),
+ 9,9,9,9,9,9,9,9, 0,0,0,0,0,0,0,0));
+
__m512i test_mm512_mask_sub_epi32 (__mmask16 __k,__m512i __A, __m512i __B,
__m512i __src) {
//CHECK-LABEL: test_mm512_mask_sub_epi32
@@ -3014,12 +3022,24 @@ __m512i test_mm512_mask_sub_epi32 (__mmask16 __k,__m512i __A, __m512i __B,
return _mm512_mask_sub_epi32(__src,__k,__A,__B);
}
+TEST_CONSTEXPR(
+ match_v16si(
+ _mm512_mask_sub_epi32(SRC, K, A, B),
+ 9,9,9,9,9,9,9,9, 42,42,42,42,42,42,42,42));
+
__m512i test_mm512_sub_epi32(__m512i __A, __m512i __B) {
//CHECK-LABEL: test_mm512_sub_epi32
//CHECK: sub <16 x i32>
return _mm512_sub_epi32(__A,__B);
}
+TEST_CONSTEXPR(
+ match_v16si(
+ _mm512_sub_epi32(
+ (__m512i)(__v16si){10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25},
+ (__m512i)(__v16si){ 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16}),
+ 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9));
+
__m512i test_mm512_maskz_sub_epi64 (__mmask8 __k,__m512i __A, __m512i __B) {
//CHECK-LABEL: test_mm512_maskz_sub_epi64
//CHECK: sub <8 x i64> %{{.*}}, %{{.*}}
@@ -3027,6 +3047,14 @@ __m512i test_mm512_maskz_sub_epi64 (__mmask8 __k,__m512i __A, __m512i __B) {
return _mm512_maskz_sub_epi64(__k,__A,__B);
}
+TEST_CONSTEXPR(
+ match_v8di(
+ _mm512_maskz_sub_epi64(
+ K,
+ (__m512i)(__v8di){100,200,300,400,500,600,700,800},
+ (__m512i)(__v8di){ 1, 2, 3, 4, 5, 6, 7, 8}),
+ 99,198,297,396, 0,0,0,0));
+
__m512i test_mm512_mask_sub_epi64 (__mmask8 __k,__m512i __A, __m512i __B,
__m512i __src) {
//CHECK-LABEL: test_mm512_mask_sub_epi64
@@ -3035,12 +3063,24 @@ __m512i test_mm512_mask_sub_epi64 (__mmask8 __k,__m512i __A, __m512i __B,
return _mm512_mask_sub_epi64(__src,__k,__A,__B);
}
+TEST_CONSTEXPR(
+ match_v8di(
+ _mm512_mask_sub_epi64(SRC, K, A, B),
+ 99,198,297,396, -1,-1,-1,-1));
+
__m512i test_mm512_sub_epi64(__m512i __A, __m512i __B) {
//CHECK-LABEL: test_mm512_sub_epi64
//CHECK: sub <8 x i64>
return _mm512_sub_epi64(__A,__B);
}
+TEST_CONSTEXPR(
+ match_v8di(
+ _mm512_sub_epi64(
+ (__m512i)(__v8di){10,20,30,40,50,60,70,80},
+ (__m512i)(__v8di){ 1, 3, 5, 7, 9,11,13,15}),
+ 9,17,25,33,41,49,57,65));
+
__m512i test_mm512_maskz_add_epi32 (__mmask16 __k,__m512i __A, __m512i __B) {
//CHECK-LABEL: test_mm512_maskz_add_epi32
//CHECK: add <16 x i32> %{{.*}}, %{{.*}}
@@ -3048,6 +3088,14 @@ __m512i test_mm512_maskz_add_epi32 (__mmask16 __k,__m512i __A, __m512i __B) {
return _mm512_maskz_add_epi32(__k,__A,__B);
}
+TEST_CONSTEXPR(
+ match_v16si(
+ _mm512_maskz_add_epi32(
+ K,
+ (__m512i)(__v16si){ 0, 1, 2, 3, 4, 5, 6, 7, 10,20,30,40,50,60,70,80 },
+ (__m512i)(__v16si){ 1, 1, 1, 1, 1, 1, 1, 1, 9, 8, 7, 6, 5, 4, 3, 2 }),
+ 1,2,3,4,5,6,7,8, 0,0,0,0,0,0,0,0));
+
__m512i test_mm512_mask_add_epi32 (__mmask16 __k,__m512i __A, __m512i __B,
__m512i __src) {
//CHECK-LABEL: test_mm512_mask_add_epi32
@@ -3056,12 +3104,24 @@ __m512i test_mm512_mask_add_epi32 (__mmask16 __k,__m512i __A, __m512i __B,
return _mm512_mask_add_epi32(__src,__k,__A,__B);
}
+TEST_CONSTEXPR(
+ match_v16si(
+ _mm512_mask_add_epi32(SRC, K, A, B),
+ 1,2,3,4,5,6,7,8, 100,100,100,100,100,100,100,100));
+
__m512i test_mm512_add_epi32(__m512i __A, __m512i __B) {
//CHECK-LABEL: test_mm512_add_epi32
//CHECK: add <16 x i32>
return _mm512_add_epi32(__A,__B);
}
+TEST_CONSTEXPR(
+ match_v16si(
+ _mm512_add_epi32(
+ (__m512i)(__v16si){ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15 },
+ (__m512i)(__v16si){ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }),
+ 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16));
+
__m512i test_mm512_maskz_add_epi64 (__mmask8 __k,__m512i __A, __m512i __B) {
//CHECK-LABEL: test_mm512_maskz_add_epi64
//CHECK: add <8 x i64> %{{.*}}, %{{.*}}
@@ -3069,6 +3129,14 @@ __m512i test_mm512_maskz_add_epi64 (__mmask8 __k,__m512i __A, __m512i __B) {
return _mm512_maskz_add_epi64(__k,__A,__B);
}
+TEST_CONSTEXPR(
+ match_v8di(
+ _mm512_maskz_add_epi64(
+ K,
+ (__m512i)(__v8di){10,20,30,40,50,60,70,80},
+ (__m512i)(__v8di){ 1, 2, 3, 4, 1, 2, 3, 4}),
+ 11,22,33,44, 0,0,0,0));
+
__m512i test_mm512_mask_add_epi64 (__mmask8 __k,__m512i __A, __m512i __B,
__m512i __src) {
//CHECK-LABEL: test_mm512_mask_add_epi64
@@ -3077,12 +3145,24 @@ __m512i test_mm512_mask_add_epi64 (__mmask8 __k,__m512i __A, __m512i __B,
return _mm512_mask_add_epi64(__src,__k,__A,__B);
}
+TEST_CONSTEXPR(
+ match_v8di(
+ _mm512_mask_add_epi64(SRC, K, A, B),
+ 11,22,33,44, 100,100,100,100));
+
__m512i test_mm512_add_epi64(__m512i __A, __m512i __B) {
//CHECK-LABEL: test_mm512_add_epi64
//CHECK: add <8 x i64>
return _mm512_add_epi64(__A,__B);
}
+TEST_CONSTEXPR(
+ match_v8di(
+ _mm512_add_epi64(
+ (__m512i)(__v8di){10,20,30,40,50,60,70,80},
+ (__m512i)(__v8di){ 1, 1, 1, 1, 1, 1, 1, 1}),
+ 11,21,31,41,51,61,71,81));
+
__m512i test_mm512_mul_epi32(__m512i __A, __m512i __B) {
//CHECK-LABEL: test_mm512_mul_epi32
//CHECK: shl <8 x i64> %{{.*}}, splat (i64 32)
diff --git a/clang/test/CodeGen/X86/sse2-builtins.c b/clang/test/CodeGen/X86/sse2-builtins.c
index f5de5069c0046..ee3bfce119b2e 100644
--- a/clang/test/CodeGen/X86/sse2-builtins.c
+++ b/clang/test/CodeGen/X86/sse2-builtins.c
@@ -32,12 +32,26 @@ __m128i test_mm_add_epi8(__m128i A, __m128i B) {
return _mm_add_epi8(A, B);
}
+TEST_CONSTEXPR(
+ match_v16qi(
+ _mm_add_epi8(
+ (__m128i)(__v16qi){ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15 },
+ (__m128i)(__v16qi){ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }),
+ 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16));
+
__m128i test_mm_add_epi16(__m128i A, __m128i B) {
// CHECK-LABEL: test_mm_add_epi16
// CHECK: add <8 x i16>
return _mm_add_epi16(A, B);
}
+TEST_CONSTEXPR(
+ match_v8hi(
+ _mm_add_epi16(
+ (__m128i)(__v8hi){ 0, 2, 4, 6, 8,10,12,14 },
+ (__m128i)(__v8hi){ 1, 1, 1, 1, 1, 1, 1, 1 }),
+ 1, 3, 5, 7, 9, 11, 13, 15));
+
__m128i test_mm_add_epi32(__m128i A, __m128i B) {
// CHECK-LABEL: test_mm_add_epi32
// CHECK: add <4 x i32>
@@ -1713,12 +1727,26 @@ __m128i test_mm_sub_epi8(__m128i A, __m128i B) {
return _mm_sub_epi8(A, B);
}
+TEST_CONSTEXPR(
+ match_v16qi(
+ _mm_sub_epi8(
+ (__m128i)(__v16qi){ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15 },
+ (__m128i)(__v16qi){ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }),
+ -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14));
+
__m128i test_mm_sub_epi16(__m128i A, __m128i B) {
// CHECK-LABEL: test_mm_sub_epi16
// CHECK: sub <8 x i16>
return _mm_sub_epi16(A, B);
}
+TEST_CONSTEXPR(
+ match_v8hi(
+ _mm_sub_epi16(
+ (__m128i)(__v8hi){ 0, 2, 4, 6, 8,10,12,14 },
+ (__m128i)(__v8hi){ 1, 1, 1, 1, 1, 1, 1, 1 }),
+ -1, 1, 3, 5, 7, 9, 11, 13));
+
__m128i test_mm_sub_epi32(__m128i A, __m128i B) {
// CHECK-LABEL: test_mm_sub_epi32
// CHECK: sub <4 x i32>
More information about the cfe-commits
mailing list