[clang] [Headers][X86] Allow SSE2/AVX2/AVX512F/AVX512BW/AVX512DQ integer arithmetic intrinsics to be used in constexpr (PR #157582)

via cfe-commits cfe-commits at lists.llvm.org
Tue Sep 9 10:47:48 PDT 2025


https://github.com/donneypr updated https://github.com/llvm/llvm-project/pull/157582

>From 0e67c0217dada580d43c30e03180b0977a6cce98 Mon Sep 17 00:00:00 2001
From: donneypr <donatoprabahar at gmail.com>
Date: Mon, 8 Sep 2025 19:54:43 -0400
Subject: [PATCH 1/8] [clang][x86][headers] Make SSE2 add/sub intrinsics
 constexpr

---
 clang/lib/Headers/emmintrin.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/clang/lib/Headers/emmintrin.h b/clang/lib/Headers/emmintrin.h
index a366e0df407a9..c99c85f26c6d1 100644
--- a/clang/lib/Headers/emmintrin.h
+++ b/clang/lib/Headers/emmintrin.h
@@ -2060,7 +2060,7 @@ static __inline__ void __DEFAULT_FN_ATTRS _mm_storel_pd(double *__dp,
 ///    A 128-bit vector of [16 x i8].
 /// \returns A 128-bit vector of [16 x i8] containing the sums of both
 ///    parameters.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_add_epi8(__m128i __a,
+static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_add_epi8_CONSTEXPR(__m128i __a,
                                                           __m128i __b) {
   return (__m128i)((__v16qu)__a + (__v16qu)__b);
 }
@@ -2081,7 +2081,7 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_add_epi8(__m128i __a,
 ///    A 128-bit vector of [8 x i16].
 /// \returns A 128-bit vector of [8 x i16] containing the sums of both
 ///    parameters.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_add_epi16(__m128i __a,
+static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_add_epi16_CONSTEXPR(__m128i __a,
                                                            __m128i __b) {
   return (__m128i)((__v8hu)__a + (__v8hu)__b);
 }
@@ -2499,7 +2499,7 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sad_epu8(__m128i __a,
 ///    A 128-bit integer vector containing the subtrahends.
 /// \returns A 128-bit integer vector containing the differences of the values
 ///    in the operands.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sub_epi8(__m128i __a,
+static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sub_epi8_CONSTEXPR(__m128i __a,
                                                           __m128i __b) {
   return (__m128i)((__v16qu)__a - (__v16qu)__b);
 }
@@ -2516,7 +2516,7 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sub_epi8(__m128i __a,
 ///    A 128-bit integer vector containing the subtrahends.
 /// \returns A 128-bit integer vector containing the differences of the values
 ///    in the operands.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sub_epi16(__m128i __a,
+static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sub_epi16_CONSTEXPR(__m128i __a,
                                                            __m128i __b) {
   return (__m128i)((__v8hu)__a - (__v8hu)__b);
 }

>From 6f109906308620d8674cc7e69c3cee09495fc34f Mon Sep 17 00:00:00 2001
From: donneypr <donatoprabahar at gmail.com>
Date: Mon, 8 Sep 2025 20:05:11 -0400
Subject: [PATCH 2/8] [clang][x86][headers] Make AVX2 add/sub intrinsics
 constexpr

---
 clang/lib/Headers/avx2intrin.h | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/clang/lib/Headers/avx2intrin.h b/clang/lib/Headers/avx2intrin.h
index 2cacdc3c4596c..5c8c2996229c6 100644
--- a/clang/lib/Headers/avx2intrin.h
+++ b/clang/lib/Headers/avx2intrin.h
@@ -279,7 +279,7 @@ _mm256_packus_epi32(__m256i __V1, __m256i __V2)
 /// \param __b
 ///    A 256-bit integer vector containing one of the source operands.
 /// \returns A 256-bit integer vector containing the sums.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_add_epi8(__m256i __a, __m256i __b)
 {
   return (__m256i)((__v32qu)__a + (__v32qu)__b);
@@ -298,7 +298,7 @@ _mm256_add_epi8(__m256i __a, __m256i __b)
 /// \param __b
 ///    A 256-bit vector of [16 x i16] containing one of the source operands.
 /// \returns A 256-bit vector of [16 x i16] containing the sums.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_add_epi16(__m256i __a, __m256i __b)
 {
   return (__m256i)((__v16hu)__a + (__v16hu)__b);
@@ -317,7 +317,7 @@ _mm256_add_epi16(__m256i __a, __m256i __b)
 /// \param __b
 ///    A 256-bit vector of [8 x i32] containing one of the source operands.
 /// \returns A 256-bit vector of [8 x i32] containing the sums.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_add_epi32(__m256i __a, __m256i __b)
 {
   return (__m256i)((__v8su)__a + (__v8su)__b);
@@ -336,7 +336,7 @@ _mm256_add_epi32(__m256i __a, __m256i __b)
 /// \param __b
 ///    A 256-bit vector of [4 x i64] containing one of the source operands.
 /// \returns A 256-bit vector of [4 x i64] containing the sums.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_add_epi64(__m256i __a, __m256i __b)
 {
   return (__m256i)((__v4du)__a + (__v4du)__b);
@@ -2462,7 +2462,7 @@ _mm256_srl_epi64(__m256i __a, __m128i __count)
 /// \param __b
 ///    A 256-bit integer vector containing the subtrahends.
 /// \returns A 256-bit integer vector containing the differences.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_sub_epi8(__m256i __a, __m256i __b)
 {
   return (__m256i)((__v32qu)__a - (__v32qu)__b);
@@ -2489,7 +2489,7 @@ _mm256_sub_epi8(__m256i __a, __m256i __b)
 /// \param __b
 ///    A 256-bit vector of [16 x i16] containing the subtrahends.
 /// \returns A 256-bit vector of [16 x i16] containing the differences.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_sub_epi16(__m256i __a, __m256i __b)
 {
   return (__m256i)((__v16hu)__a - (__v16hu)__b);
@@ -2515,7 +2515,7 @@ _mm256_sub_epi16(__m256i __a, __m256i __b)
 /// \param __b
 ///    A 256-bit vector of [8 x i32] containing the subtrahends.
 /// \returns A 256-bit vector of [8 x i32] containing the differences.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_sub_epi32(__m256i __a, __m256i __b)
 {
   return (__m256i)((__v8su)__a - (__v8su)__b);
@@ -2541,7 +2541,7 @@ _mm256_sub_epi32(__m256i __a, __m256i __b)
 /// \param __b
 ///    A 256-bit vector of [4 x i64] containing the subtrahends.
 /// \returns A 256-bit vector of [4 x i64] containing the differences.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_sub_epi64(__m256i __a, __m256i __b)
 {
   return (__m256i)((__v4du)__a - (__v4du)__b);

>From 94e4e4d6ece5b24d9d494f1b0874c179bacb1b5c Mon Sep 17 00:00:00 2001
From: donneypr <donatoprabahar at gmail.com>
Date: Mon, 8 Sep 2025 20:19:28 -0400
Subject: [PATCH 3/8] [clang][x86][headers] Make AVX-512 epi64 add/sub
 intrinsics constexpr Fixes #152490

---
 clang/lib/Headers/avx512fintrin.h | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h
index 67499fd83a089..1d3aeb284b00c 100644
--- a/clang/lib/Headers/avx512fintrin.h
+++ b/clang/lib/Headers/avx512fintrin.h
@@ -859,7 +859,7 @@ _mm512_add_epi64(__m512i __A, __m512i __B) {
   return (__m512i) ((__v8du) __A + (__v8du) __B);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_mask_add_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
 {
   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
@@ -867,7 +867,7 @@ _mm512_mask_add_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
                                              (__v8di)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_maskz_add_epi64(__mmask8 __U, __m512i __A, __m512i __B)
 {
   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
@@ -875,13 +875,13 @@ _mm512_maskz_add_epi64(__mmask8 __U, __m512i __A, __m512i __B)
                                              (__v8di)_mm512_setzero_si512());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_sub_epi64 (__m512i __A, __m512i __B)
 {
   return (__m512i) ((__v8du) __A - (__v8du) __B);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_mask_sub_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
 {
   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
@@ -889,7 +889,7 @@ _mm512_mask_sub_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
                                              (__v8di)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_maskz_sub_epi64(__mmask8 __U, __m512i __A, __m512i __B)
 {
   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
@@ -897,7 +897,7 @@ _mm512_maskz_sub_epi64(__mmask8 __U, __m512i __A, __m512i __B)
                                              (__v8di)_mm512_setzero_si512());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_add_epi32 (__m512i __A, __m512i __B)
 {
   return (__m512i) ((__v16su) __A + (__v16su) __B);
@@ -919,7 +919,7 @@ _mm512_maskz_add_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
                                              (__v16si)_mm512_setzero_si512());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_sub_epi32 (__m512i __A, __m512i __B)
 {
   return (__m512i) ((__v16su) __A - (__v16su) __B);

>From b21b4ffb4bd30624d4891f068002bccfe01ec5bc Mon Sep 17 00:00:00 2001
From: donneypr <donatoprabahar at gmail.com>
Date: Mon, 8 Sep 2025 21:48:53 -0400
Subject: [PATCH 4/8] [clang][x86][headers] Make AVX-512F masked epi32 add/sub
 constexpr Fixes llvm/llvm-project#152490

---
 clang/lib/Headers/avx512fintrin.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h
index 1d3aeb284b00c..6dc70b54f2fd9 100644
--- a/clang/lib/Headers/avx512fintrin.h
+++ b/clang/lib/Headers/avx512fintrin.h
@@ -903,7 +903,7 @@ _mm512_add_epi32 (__m512i __A, __m512i __B)
   return (__m512i) ((__v16su) __A + (__v16su) __B);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_mask_add_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
 {
   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
@@ -911,7 +911,7 @@ _mm512_mask_add_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
                                              (__v16si)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_maskz_add_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
 {
   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
@@ -925,7 +925,7 @@ _mm512_sub_epi32 (__m512i __A, __m512i __B)
   return (__m512i) ((__v16su) __A - (__v16su) __B);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_mask_sub_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
 {
   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
@@ -933,7 +933,7 @@ _mm512_mask_sub_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
                                              (__v16si)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_maskz_sub_epi32(__mmask16 __U, __m512i __A, __m512i __B)
 {
   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,

>From 19d2c77b9afa4a51d43ce3beabb1f92185b1d0f0 Mon Sep 17 00:00:00 2001
From: donneypr <donatoprabahar at gmail.com>
Date: Mon, 8 Sep 2025 21:53:07 -0400
Subject: [PATCH 5/8] [clang][x86][headers] Make AVX-512BW masked add/sub
 (epi8/epi16) constexpr

Fixes llvm/llvm-project#152490.
---
 clang/lib/Headers/avx512bwintrin.h | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/clang/lib/Headers/avx512bwintrin.h b/clang/lib/Headers/avx512bwintrin.h
index 31e0a2242240c..cfb420f9ac1f9 100644
--- a/clang/lib/Headers/avx512bwintrin.h
+++ b/clang/lib/Headers/avx512bwintrin.h
@@ -369,76 +369,76 @@ static __inline__ void __DEFAULT_FN_ATTRS _store_mask64(__mmask64 *__A,
 #define _mm512_mask_cmpneq_epu16_mask(k, A, B) \
     _mm512_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_NE)
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_add_epi8 (__m512i __A, __m512i __B) {
   return (__m512i) ((__v64qu) __A + (__v64qu) __B);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_mask_add_epi8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) {
   return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
                                              (__v64qi)_mm512_add_epi8(__A, __B),
                                              (__v64qi)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_maskz_add_epi8(__mmask64 __U, __m512i __A, __m512i __B) {
   return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
                                              (__v64qi)_mm512_add_epi8(__A, __B),
                                              (__v64qi)_mm512_setzero_si512());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_sub_epi8 (__m512i __A, __m512i __B) {
   return (__m512i) ((__v64qu) __A - (__v64qu) __B);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_mask_sub_epi8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) {
   return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
                                              (__v64qi)_mm512_sub_epi8(__A, __B),
                                              (__v64qi)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_maskz_sub_epi8(__mmask64 __U, __m512i __A, __m512i __B) {
   return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
                                              (__v64qi)_mm512_sub_epi8(__A, __B),
                                              (__v64qi)_mm512_setzero_si512());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_add_epi16 (__m512i __A, __m512i __B) {
   return (__m512i) ((__v32hu) __A + (__v32hu) __B);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_mask_add_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) {
   return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
                                              (__v32hi)_mm512_add_epi16(__A, __B),
                                              (__v32hi)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_maskz_add_epi16(__mmask32 __U, __m512i __A, __m512i __B) {
   return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
                                              (__v32hi)_mm512_add_epi16(__A, __B),
                                              (__v32hi)_mm512_setzero_si512());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_sub_epi16 (__m512i __A, __m512i __B) {
   return (__m512i) ((__v32hu) __A - (__v32hu) __B);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_mask_sub_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) {
   return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
                                              (__v32hi)_mm512_sub_epi16(__A, __B),
                                              (__v32hi)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_maskz_sub_epi16(__mmask32 __U, __m512i __A, __m512i __B) {
   return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
                                              (__v32hi)_mm512_sub_epi16(__A, __B),

>From 80edfc4ee1984b065ec3cc17090a7bbd419bb7e3 Mon Sep 17 00:00:00 2001
From: donneypr <donatoprabahar at gmail.com>
Date: Tue, 9 Sep 2025 07:51:26 -0400
Subject: [PATCH 6/8] clang-format: touched lines in x86 add/sub headers for
 #152490

---
 clang/lib/Headers/avx2intrin.h     | 24 ++++++++--------------
 clang/lib/Headers/avx512bwintrin.h | 10 ++++-----
 clang/lib/Headers/avx512fintrin.h  | 33 ++++++++++--------------------
 clang/lib/Headers/emmintrin.h      | 16 +++++++--------
 4 files changed, 32 insertions(+), 51 deletions(-)

diff --git a/clang/lib/Headers/avx2intrin.h b/clang/lib/Headers/avx2intrin.h
index 5c8c2996229c6..fdd464e0195c8 100644
--- a/clang/lib/Headers/avx2intrin.h
+++ b/clang/lib/Headers/avx2intrin.h
@@ -280,8 +280,7 @@ _mm256_packus_epi32(__m256i __V1, __m256i __V2)
 ///    A 256-bit integer vector containing one of the source operands.
 /// \returns A 256-bit integer vector containing the sums.
 static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
-_mm256_add_epi8(__m256i __a, __m256i __b)
-{
+_mm256_add_epi8(__m256i __a, __m256i __b) {
   return (__m256i)((__v32qu)__a + (__v32qu)__b);
 }
 
@@ -299,8 +298,7 @@ _mm256_add_epi8(__m256i __a, __m256i __b)
 ///    A 256-bit vector of [16 x i16] containing one of the source operands.
 /// \returns A 256-bit vector of [16 x i16] containing the sums.
 static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
-_mm256_add_epi16(__m256i __a, __m256i __b)
-{
+_mm256_add_epi16(__m256i __a, __m256i __b) {
   return (__m256i)((__v16hu)__a + (__v16hu)__b);
 }
 
@@ -318,8 +316,7 @@ _mm256_add_epi16(__m256i __a, __m256i __b)
 ///    A 256-bit vector of [8 x i32] containing one of the source operands.
 /// \returns A 256-bit vector of [8 x i32] containing the sums.
 static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
-_mm256_add_epi32(__m256i __a, __m256i __b)
-{
+_mm256_add_epi32(__m256i __a, __m256i __b) {
   return (__m256i)((__v8su)__a + (__v8su)__b);
 }
 
@@ -337,8 +334,7 @@ _mm256_add_epi32(__m256i __a, __m256i __b)
 ///    A 256-bit vector of [4 x i64] containing one of the source operands.
 /// \returns A 256-bit vector of [4 x i64] containing the sums.
 static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
-_mm256_add_epi64(__m256i __a, __m256i __b)
-{
+_mm256_add_epi64(__m256i __a, __m256i __b) {
   return (__m256i)((__v4du)__a + (__v4du)__b);
 }
 
@@ -2463,8 +2459,7 @@ _mm256_srl_epi64(__m256i __a, __m128i __count)
 ///    A 256-bit integer vector containing the subtrahends.
 /// \returns A 256-bit integer vector containing the differences.
 static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
-_mm256_sub_epi8(__m256i __a, __m256i __b)
-{
+_mm256_sub_epi8(__m256i __a, __m256i __b) {
   return (__m256i)((__v32qu)__a - (__v32qu)__b);
 }
 
@@ -2490,8 +2485,7 @@ _mm256_sub_epi8(__m256i __a, __m256i __b)
 ///    A 256-bit vector of [16 x i16] containing the subtrahends.
 /// \returns A 256-bit vector of [16 x i16] containing the differences.
 static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
-_mm256_sub_epi16(__m256i __a, __m256i __b)
-{
+_mm256_sub_epi16(__m256i __a, __m256i __b) {
   return (__m256i)((__v16hu)__a - (__v16hu)__b);
 }
 
@@ -2516,8 +2510,7 @@ _mm256_sub_epi16(__m256i __a, __m256i __b)
 ///    A 256-bit vector of [8 x i32] containing the subtrahends.
 /// \returns A 256-bit vector of [8 x i32] containing the differences.
 static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
-_mm256_sub_epi32(__m256i __a, __m256i __b)
-{
+_mm256_sub_epi32(__m256i __a, __m256i __b) {
   return (__m256i)((__v8su)__a - (__v8su)__b);
 }
 
@@ -2542,8 +2535,7 @@ _mm256_sub_epi32(__m256i __a, __m256i __b)
 ///    A 256-bit vector of [4 x i64] containing the subtrahends.
 /// \returns A 256-bit vector of [4 x i64] containing the differences.
 static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
-_mm256_sub_epi64(__m256i __a, __m256i __b)
-{
+_mm256_sub_epi64(__m256i __a, __m256i __b) {
   return (__m256i)((__v4du)__a - (__v4du)__b);
 }
 
diff --git a/clang/lib/Headers/avx512bwintrin.h b/clang/lib/Headers/avx512bwintrin.h
index cfb420f9ac1f9..93d8b085900e7 100644
--- a/clang/lib/Headers/avx512bwintrin.h
+++ b/clang/lib/Headers/avx512bwintrin.h
@@ -369,8 +369,8 @@ static __inline__ void __DEFAULT_FN_ATTRS _store_mask64(__mmask64 *__A,
 #define _mm512_mask_cmpneq_epu16_mask(k, A, B) \
     _mm512_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_NE)
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
-_mm512_add_epi8 (__m512i __A, __m512i __B) {
+static __inline__ __m512i
+    __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_add_epi8(__m512i __A, __m512i __B) {
   return (__m512i) ((__v64qu) __A + (__v64qu) __B);
 }
 
@@ -389,7 +389,7 @@ _mm512_maskz_add_epi8(__mmask64 __U, __m512i __A, __m512i __B) {
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
-_mm512_sub_epi8 (__m512i __A, __m512i __B) {
+_mm512_sub_epi8(__m512i __A, __m512i __B) {
   return (__m512i) ((__v64qu) __A - (__v64qu) __B);
 }
 
@@ -408,7 +408,7 @@ _mm512_maskz_sub_epi8(__mmask64 __U, __m512i __A, __m512i __B) {
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
-_mm512_add_epi16 (__m512i __A, __m512i __B) {
+_mm512_add_epi16(__m512i __A, __m512i __B) {
   return (__m512i) ((__v32hu) __A + (__v32hu) __B);
 }
 
@@ -427,7 +427,7 @@ _mm512_maskz_add_epi16(__mmask32 __U, __m512i __A, __m512i __B) {
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
-_mm512_sub_epi16 (__m512i __A, __m512i __B) {
+_mm512_sub_epi16(__m512i __A, __m512i __B) {
   return (__m512i) ((__v32hu) __A - (__v32hu) __B);
 }
 
diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h
index 6dc70b54f2fd9..f53bd13f21776 100644
--- a/clang/lib/Headers/avx512fintrin.h
+++ b/clang/lib/Headers/avx512fintrin.h
@@ -860,82 +860,71 @@ _mm512_add_epi64(__m512i __A, __m512i __B) {
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
-_mm512_mask_add_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
-{
+_mm512_mask_add_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) {
   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
                                              (__v8di)_mm512_add_epi64(__A, __B),
                                              (__v8di)__W);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
-_mm512_maskz_add_epi64(__mmask8 __U, __m512i __A, __m512i __B)
-{
+_mm512_maskz_add_epi64(__mmask8 __U, __m512i __A, __m512i __B) {
   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
                                              (__v8di)_mm512_add_epi64(__A, __B),
                                              (__v8di)_mm512_setzero_si512());
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
-_mm512_sub_epi64 (__m512i __A, __m512i __B)
-{
+_mm512_sub_epi64(__m512i __A, __m512i __B) {
   return (__m512i) ((__v8du) __A - (__v8du) __B);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
-_mm512_mask_sub_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
-{
+_mm512_mask_sub_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) {
   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
                                              (__v8di)_mm512_sub_epi64(__A, __B),
                                              (__v8di)__W);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
-_mm512_maskz_sub_epi64(__mmask8 __U, __m512i __A, __m512i __B)
-{
+_mm512_maskz_sub_epi64(__mmask8 __U, __m512i __A, __m512i __B) {
   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
                                              (__v8di)_mm512_sub_epi64(__A, __B),
                                              (__v8di)_mm512_setzero_si512());
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
-_mm512_add_epi32 (__m512i __A, __m512i __B)
-{
+_mm512_add_epi32(__m512i __A, __m512i __B) {
   return (__m512i) ((__v16su) __A + (__v16su) __B);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
-_mm512_mask_add_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
-{
+_mm512_mask_add_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) {
   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
                                              (__v16si)_mm512_add_epi32(__A, __B),
                                              (__v16si)__W);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
-_mm512_maskz_add_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
-{
+_mm512_maskz_add_epi32(__mmask16 __U, __m512i __A, __m512i __B) {
   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
                                              (__v16si)_mm512_add_epi32(__A, __B),
                                              (__v16si)_mm512_setzero_si512());
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
-_mm512_sub_epi32 (__m512i __A, __m512i __B)
-{
+_mm512_sub_epi32(__m512i __A, __m512i __B) {
   return (__m512i) ((__v16su) __A - (__v16su) __B);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
-_mm512_mask_sub_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
-{
+_mm512_mask_sub_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) {
   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
                                              (__v16si)_mm512_sub_epi32(__A, __B),
                                              (__v16si)__W);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
-_mm512_maskz_sub_epi32(__mmask16 __U, __m512i __A, __m512i __B)
-{
+_mm512_maskz_sub_epi32(__mmask16 __U, __m512i __A, __m512i __B) {
   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
                                              (__v16si)_mm512_sub_epi32(__A, __B),
                                              (__v16si)_mm512_setzero_si512());
diff --git a/clang/lib/Headers/emmintrin.h b/clang/lib/Headers/emmintrin.h
index c99c85f26c6d1..3999c2e242410 100644
--- a/clang/lib/Headers/emmintrin.h
+++ b/clang/lib/Headers/emmintrin.h
@@ -2060,8 +2060,8 @@ static __inline__ void __DEFAULT_FN_ATTRS _mm_storel_pd(double *__dp,
 ///    A 128-bit vector of [16 x i8].
 /// \returns A 128-bit vector of [16 x i8] containing the sums of both
 ///    parameters.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_add_epi8_CONSTEXPR(__m128i __a,
-                                                          __m128i __b) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_add_epi8_CONSTEXPR(__m128i __a, __m128i __b) {
   return (__m128i)((__v16qu)__a + (__v16qu)__b);
 }
 
@@ -2081,8 +2081,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_add_epi8_CONSTEXPR(__m128i __a,
 ///    A 128-bit vector of [8 x i16].
 /// \returns A 128-bit vector of [8 x i16] containing the sums of both
 ///    parameters.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_add_epi16_CONSTEXPR(__m128i __a,
-                                                           __m128i __b) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_add_epi16_CONSTEXPR(__m128i __a, __m128i __b) {
   return (__m128i)((__v8hu)__a + (__v8hu)__b);
 }
 
@@ -2499,8 +2499,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sad_epu8(__m128i __a,
 ///    A 128-bit integer vector containing the subtrahends.
 /// \returns A 128-bit integer vector containing the differences of the values
 ///    in the operands.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sub_epi8_CONSTEXPR(__m128i __a,
-                                                          __m128i __b) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_sub_epi8_CONSTEXPR(__m128i __a, __m128i __b) {
   return (__m128i)((__v16qu)__a - (__v16qu)__b);
 }
 
@@ -2516,8 +2516,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sub_epi8_CONSTEXPR(__m128i __a,
 ///    A 128-bit integer vector containing the subtrahends.
 /// \returns A 128-bit integer vector containing the differences of the values
 ///    in the operands.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sub_epi16_CONSTEXPR(__m128i __a,
-                                                           __m128i __b) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_sub_epi16_CONSTEXPR(__m128i __a, __m128i __b) {
   return (__m128i)((__v8hu)__a - (__v8hu)__b);
 }
 

>From 5b0b590f848b630fd1f44abd3958d1a723d126ef Mon Sep 17 00:00:00 2001
From: donneypr <donatoprabahar at gmail.com>
Date: Tue, 9 Sep 2025 12:21:56 -0400
Subject: [PATCH 7/8] X86: add constexpr tests for add/sub intrinsics

---
 clang/test/CodeGen/X86/avx2-builtins.c     | 66 ++++++++++++++++++
 clang/test/CodeGen/X86/avx512bw-builtins.c | 60 ++++++++++++++++
 clang/test/CodeGen/X86/avx512f-builtins.c  | 80 ++++++++++++++++++++++
 clang/test/CodeGen/X86/sse2-builtins.c     | 28 ++++++++
 4 files changed, 234 insertions(+)

diff --git a/clang/test/CodeGen/X86/avx2-builtins.c b/clang/test/CodeGen/X86/avx2-builtins.c
index 724a5f693f9fe..ddbfd93b7f5f6 100644
--- a/clang/test/CodeGen/X86/avx2-builtins.c
+++ b/clang/test/CodeGen/X86/avx2-builtins.c
@@ -48,24 +48,57 @@ __m256i test_mm256_add_epi8(__m256i a, __m256i b) {
   return _mm256_add_epi8(a, b);
 }
 
+TEST_CONSTEXPR(
+  match_v32qi(
+    _mm256_add_epi8(
+      (__m256i)(__v32qi){
+        0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,
+        16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31 },
+      (__m256i)(__v32qi){
+        1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+        1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1 }),
+    1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,
+    17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32));
+
 __m256i test_mm256_add_epi16(__m256i a, __m256i b) {
   // CHECK-LABEL: test_mm256_add_epi16
   // CHECK: add <16 x i16>
   return _mm256_add_epi16(a, b);
 }
 
+TEST_CONSTEXPR(
+  match_v16hi(
+    _mm256_add_epi16(
+      (__m256i)(__v16hi){ 0, 2, 4, 6, 8,10,12,14, 16,18,20,22,24,26,28,30 },
+      (__m256i)(__v16hi){ 1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1 }),
+    1, 3, 5, 7, 9,11,13,15, 17,19,21,23,25,27,29,31));
+
 __m256i test_mm256_add_epi32(__m256i a, __m256i b) {
   // CHECK-LABEL: test_mm256_add_epi32
   // CHECK: add <8 x i32>
   return _mm256_add_epi32(a, b);
 }
 
+TEST_CONSTEXPR(
+  match_v8si(
+    _mm256_add_epi32(
+      (__m256i)(__v8si){1,2,3,4,5,6,7,8},
+      (__m256i)(__v8si){8,7,6,5,4,3,2,1}),
+    9,9,9,9,9,9,9,9));
+
 __m256i test_mm256_add_epi64(__m256i a, __m256i b) {
   // CHECK-LABEL: test_mm256_add_epi64
   // CHECK: add <4 x i64>
   return _mm256_add_epi64(a, b);
 }
 
+TEST_CONSTEXPR(
+  match_v4di(
+    _mm256_add_epi64(
+      (__m256i)(__v4di){10,20,30,40},
+      (__m256i)(__v4di){ 1, 3, 5, 7 }),
+    11,23,35,47));
+
 __m256i test_mm256_adds_epi8(__m256i a, __m256i b) {
   // CHECK-LABEL: test_mm256_adds_epi8
   // CHECK: call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> %{{.*}}, <32 x i8> %{{.*}})
@@ -1358,24 +1391,57 @@ __m256i test_mm256_sub_epi8(__m256i a, __m256i b) {
   return _mm256_sub_epi8(a, b);
 }
 
+TEST_CONSTEXPR(
+  match_v32qi(
+    _mm256_sub_epi8(
+      (__m256i)(__v32qi){
+        0,1,2,3,4,5,6,7, 8, 9,10,11,12,13,14,15,
+        16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31 },
+      (__m256i)(__v32qi){
+        1,1,1,1,1,1,1,1, 1, 1, 1, 1, 1, 1, 1, 1,
+        1,1,1,1,1,1,1,1, 1, 1, 1, 1, 1, 1, 1, 1 }),
+    -1,0,1,2,3,4,5,6, 7, 8, 9,10,11,12,13,14,
+    15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30));
+
 __m256i test_mm256_sub_epi16(__m256i a, __m256i b) {
   // CHECK-LABEL: test_mm256_sub_epi16
   // CHECK: sub <16 x i16>
   return _mm256_sub_epi16(a, b);
 }
 
+TEST_CONSTEXPR(
+  match_v16hi(
+    _mm256_sub_epi16(
+      (__m256i)(__v16hi){ 0, 2, 4, 6, 8,10,12,14, 16,18,20,22,24,26,28,30 },
+      (__m256i)(__v16hi){ 1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1 }),
+    -1, 1, 3, 5, 7, 9,11,13, 15,17,19,21,23,25,27,29));
+
 __m256i test_mm256_sub_epi32(__m256i a, __m256i b) {
   // CHECK-LABEL: test_mm256_sub_epi32
   // CHECK: sub <8 x i32>
   return _mm256_sub_epi32(a, b);
 }
 
+TEST_CONSTEXPR(
+  match_v8si(
+    _mm256_sub_epi32(
+      (__m256i)(__v8si){10,20,30,40,50,60,70,80},
+      (__m256i)(__v8si){ 1, 2, 3, 4, 5, 6, 7, 8}),
+    9,18,27,36,45,54,63,72));
+
 __m256i test_mm256_sub_epi64(__m256i a, __m256i b) {
   // CHECK-LABEL: test_mm256_sub_epi64
   // CHECK: sub <4 x i64>
   return _mm256_sub_epi64(a, b);
 }
 
+TEST_CONSTEXPR(
+  match_v4di(
+    _mm256_sub_epi64(
+      (__m256i)(__v4di){10,20,30,40},
+      (__m256i)(__v4di){ 1, 3, 5, 7 }),
+    9,17,25,33));
+
 __m256i test_mm256_subs_epi8(__m256i a, __m256i b) {
   // CHECK-LABEL: test_mm256_subs_epi8
   // CHECK: call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> %{{.*}}, <32 x i8> %{{.*}})
diff --git a/clang/test/CodeGen/X86/avx512bw-builtins.c b/clang/test/CodeGen/X86/avx512bw-builtins.c
index 1f67a9e4d2e53..96a5a6438e004 100644
--- a/clang/test/CodeGen/X86/avx512bw-builtins.c
+++ b/clang/test/CodeGen/X86/avx512bw-builtins.c
@@ -752,6 +752,24 @@ __m512i test_mm512_add_epi8 (__m512i __A, __m512i __B) {
   return _mm512_add_epi8(__A,__B);
 }
 
+TEST_CONSTEXPR(
+  match_v64qi(
+    _mm512_add_epi8(
+      (__m512i)(__v64qi){
+        0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,
+        16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,
+        32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,
+        48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63 },
+      (__m512i)(__v64qi){
+        1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+        1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+        1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+        1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1 }),
+    1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,
+    17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,
+    33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,
+    49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64));
+
 __m512i test_mm512_mask_add_epi8 (__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) {
   //CHECK-LABEL: test_mm512_mask_add_epi8
   //CHECK: add <64 x i8> %{{.*}}, %{{.*}}
@@ -772,6 +790,24 @@ __m512i test_mm512_sub_epi8 (__m512i __A, __m512i __B) {
   return _mm512_sub_epi8(__A, __B);
 }
 
+TEST_CONSTEXPR(
+  match_v64qi(
+    _mm512_sub_epi8(
+      (__m512i)(__v64qi){
+        0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,
+        16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,
+        32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,
+        48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63 },
+      (__m512i)(__v64qi){
+        1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+        1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+        1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+        1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1 }),
+    -1,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,
+    15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,
+    31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,
+    47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62));
+
 __m512i test_mm512_mask_sub_epi8 (__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) {
   //CHECK-LABEL: test_mm512_mask_sub_epi8
   //CHECK: sub <64 x i8> %{{.*}}, %{{.*}}
@@ -792,6 +828,18 @@ __m512i test_mm512_add_epi16 (__m512i __A, __m512i __B) {
   return _mm512_add_epi16(__A, __B);
 }
 
+TEST_CONSTEXPR(
+  match_v32hi(
+    _mm512_add_epi16(
+      (__m512i)(__v32hi){
+        0, 2, 4, 6, 8,10,12,14, 16,18,20,22,24,26,28,30,
+       32,34,36,38,40,42,44,46, 48,50,52,54,56,58,60,62 },
+      (__m512i)(__v32hi){
+        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }),
+    1, 3, 5, 7, 9,11,13,15, 17,19,21,23,25,27,29,31,
+    33,35,37,39,41,43,45,47, 49,51,53,55,57,59,61,63));
+
 __m512i test_mm512_mask_add_epi16 (__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) {
   //CHECK-LABEL: test_mm512_mask_add_epi16
   //CHECK: add <32 x i16> %{{.*}}, %{{.*}}
@@ -812,6 +860,18 @@ __m512i test_mm512_sub_epi16 (__m512i __A, __m512i __B) {
   return _mm512_sub_epi16(__A, __B);
 }
 
+TEST_CONSTEXPR(
+  match_v32hi(
+    _mm512_sub_epi16(
+      (__m512i)(__v32hi){
+        0, 2, 4, 6, 8,10,12,14, 16,18,20,22,24,26,28,30,
+       32,34,36,38,40,42,44,46, 48,50,52,54,56,58,60,62 },
+      (__m512i)(__v32hi){
+        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }),
+    -1, 1, 3, 5, 7, 9,11,13, 15,17,19,21,23,25,27,29,
+    31,33,35,37,39,41,43,45, 47,49,51,53,55,57,59,61));
+
 __m512i test_mm512_mask_sub_epi16 (__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) {
   //CHECK-LABEL: test_mm512_mask_sub_epi16
   //CHECK: sub <32 x i16> %{{.*}}, %{{.*}}
diff --git a/clang/test/CodeGen/X86/avx512f-builtins.c b/clang/test/CodeGen/X86/avx512f-builtins.c
index 10d464c429a43..50c59a4edd9f5 100644
--- a/clang/test/CodeGen/X86/avx512f-builtins.c
+++ b/clang/test/CodeGen/X86/avx512f-builtins.c
@@ -3006,6 +3006,14 @@ __m512i test_mm512_maskz_sub_epi32 (__mmask16 __k,__m512i __A, __m512i __B) {
   return _mm512_maskz_sub_epi32(__k,__A,__B);
 }
 
+TEST_CONSTEXPR(
+  match_v16si(
+    _mm512_maskz_sub_epi32(
+      K,
+      (__m512i)(__v16si){10,11,12,13,14,15,16,17, 100,200,300,400,500,600,700,800},
+      (__m512i)(__v16si){ 1, 2, 3, 4, 5, 6, 7, 8,   9,  8,  7,  6,  5,  4,  3,  2}),
+    9,9,9,9,9,9,9,9, 0,0,0,0,0,0,0,0));
+
 __m512i test_mm512_mask_sub_epi32 (__mmask16 __k,__m512i __A, __m512i __B, 
                                    __m512i __src) {
   //CHECK-LABEL: test_mm512_mask_sub_epi32
@@ -3014,12 +3022,24 @@ __m512i test_mm512_mask_sub_epi32 (__mmask16 __k,__m512i __A, __m512i __B,
   return _mm512_mask_sub_epi32(__src,__k,__A,__B);
 }
 
+TEST_CONSTEXPR(
+  match_v16si(
+    _mm512_mask_sub_epi32(SRC, K, A, B),
+    9,9,9,9,9,9,9,9, 42,42,42,42,42,42,42,42));
+
 __m512i test_mm512_sub_epi32(__m512i __A, __m512i __B) {
   //CHECK-LABEL: test_mm512_sub_epi32
   //CHECK: sub <16 x i32>
   return _mm512_sub_epi32(__A,__B);
 }
 
+TEST_CONSTEXPR(
+  match_v16si(
+    _mm512_sub_epi32(
+      (__m512i)(__v16si){10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25},
+      (__m512i)(__v16si){ 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16}),
+    9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9));
+
 __m512i test_mm512_maskz_sub_epi64 (__mmask8 __k,__m512i __A, __m512i __B) {
   //CHECK-LABEL: test_mm512_maskz_sub_epi64
   //CHECK: sub <8 x i64> %{{.*}}, %{{.*}}
@@ -3027,6 +3047,14 @@ __m512i test_mm512_maskz_sub_epi64 (__mmask8 __k,__m512i __A, __m512i __B) {
   return _mm512_maskz_sub_epi64(__k,__A,__B);
 }
 
+TEST_CONSTEXPR(
+  match_v8di(
+    _mm512_maskz_sub_epi64(
+      K,
+      (__m512i)(__v8di){100,200,300,400,500,600,700,800},
+      (__m512i)(__v8di){  1,  2,  3,  4,  5,  6,  7,  8}),
+    99,198,297,396, 0,0,0,0));
+
 __m512i test_mm512_mask_sub_epi64 (__mmask8 __k,__m512i __A, __m512i __B, 
                                    __m512i __src) {
   //CHECK-LABEL: test_mm512_mask_sub_epi64
@@ -3035,12 +3063,24 @@ __m512i test_mm512_mask_sub_epi64 (__mmask8 __k,__m512i __A, __m512i __B,
   return _mm512_mask_sub_epi64(__src,__k,__A,__B);
 }
 
+TEST_CONSTEXPR(
+  match_v8di(
+    _mm512_mask_sub_epi64(SRC, K, A, B),
+    99,198,297,396, -1,-1,-1,-1));
+
 __m512i test_mm512_sub_epi64(__m512i __A, __m512i __B) {
   //CHECK-LABEL: test_mm512_sub_epi64
   //CHECK: sub <8 x i64>
   return _mm512_sub_epi64(__A,__B);
 }
 
+TEST_CONSTEXPR(
+  match_v8di(
+    _mm512_sub_epi64(
+      (__m512i)(__v8di){10,20,30,40,50,60,70,80},
+      (__m512i)(__v8di){ 1, 3, 5, 7, 9,11,13,15}),
+    9,17,25,33,41,49,57,65));
+
 __m512i test_mm512_maskz_add_epi32 (__mmask16 __k,__m512i __A, __m512i __B) {
   //CHECK-LABEL: test_mm512_maskz_add_epi32
   //CHECK: add <16 x i32> %{{.*}}, %{{.*}}
@@ -3048,6 +3088,14 @@ __m512i test_mm512_maskz_add_epi32 (__mmask16 __k,__m512i __A, __m512i __B) {
   return _mm512_maskz_add_epi32(__k,__A,__B);
 }
 
+TEST_CONSTEXPR(
+  match_v16si(
+    _mm512_maskz_add_epi32(
+      K,
+      (__m512i)(__v16si){ 0, 1, 2, 3, 4, 5, 6, 7, 10,20,30,40,50,60,70,80 },
+      (__m512i)(__v16si){ 1, 1, 1, 1, 1, 1, 1, 1,  9, 8, 7, 6, 5, 4, 3, 2 }),
+    1,2,3,4,5,6,7,8, 0,0,0,0,0,0,0,0));
+
 __m512i test_mm512_mask_add_epi32 (__mmask16 __k,__m512i __A, __m512i __B, 
                                    __m512i __src) {
   //CHECK-LABEL: test_mm512_mask_add_epi32
@@ -3056,12 +3104,24 @@ __m512i test_mm512_mask_add_epi32 (__mmask16 __k,__m512i __A, __m512i __B,
   return _mm512_mask_add_epi32(__src,__k,__A,__B);
 }
 
+TEST_CONSTEXPR(
+  match_v16si(
+    _mm512_mask_add_epi32(SRC, K, A, B),
+    1,2,3,4,5,6,7,8, 100,100,100,100,100,100,100,100));
+
 __m512i test_mm512_add_epi32(__m512i __A, __m512i __B) {
   //CHECK-LABEL: test_mm512_add_epi32
   //CHECK: add <16 x i32>
   return _mm512_add_epi32(__A,__B);
 }
 
+TEST_CONSTEXPR(
+  match_v16si(
+    _mm512_add_epi32(
+      (__m512i)(__v16si){ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15 },
+      (__m512i)(__v16si){ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }),
+    1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16));
+
 __m512i test_mm512_maskz_add_epi64 (__mmask8 __k,__m512i __A, __m512i __B) {
   //CHECK-LABEL: test_mm512_maskz_add_epi64
   //CHECK: add <8 x i64> %{{.*}}, %{{.*}}
@@ -3069,6 +3129,14 @@ __m512i test_mm512_maskz_add_epi64 (__mmask8 __k,__m512i __A, __m512i __B) {
   return _mm512_maskz_add_epi64(__k,__A,__B);
 }
 
+TEST_CONSTEXPR(
+  match_v8di(
+    _mm512_maskz_add_epi64(
+      K,
+      (__m512i)(__v8di){10,20,30,40,50,60,70,80},
+      (__m512i)(__v8di){ 1, 2, 3, 4, 1, 2, 3, 4}),
+    11,22,33,44, 0,0,0,0));
+
 __m512i test_mm512_mask_add_epi64 (__mmask8 __k,__m512i __A, __m512i __B, 
                                    __m512i __src) {
   //CHECK-LABEL: test_mm512_mask_add_epi64
@@ -3077,12 +3145,24 @@ __m512i test_mm512_mask_add_epi64 (__mmask8 __k,__m512i __A, __m512i __B,
   return _mm512_mask_add_epi64(__src,__k,__A,__B);
 }
 
+TEST_CONSTEXPR(
+  match_v8di(
+    _mm512_mask_add_epi64(SRC, K, A, B),
+    11,22,33,44, 100,100,100,100));
+
 __m512i test_mm512_add_epi64(__m512i __A, __m512i __B) {
   //CHECK-LABEL: test_mm512_add_epi64
   //CHECK: add <8 x i64>
   return _mm512_add_epi64(__A,__B);
 }
 
+TEST_CONSTEXPR(
+  match_v8di(
+    _mm512_add_epi64(
+      (__m512i)(__v8di){10,20,30,40,50,60,70,80},
+      (__m512i)(__v8di){ 1, 1, 1, 1, 1, 1, 1, 1}),
+    11,21,31,41,51,61,71,81));
+
 __m512i test_mm512_mul_epi32(__m512i __A, __m512i __B) {
   //CHECK-LABEL: test_mm512_mul_epi32
   //CHECK: shl <8 x i64> %{{.*}}, splat (i64 32)
diff --git a/clang/test/CodeGen/X86/sse2-builtins.c b/clang/test/CodeGen/X86/sse2-builtins.c
index f5de5069c0046..ee3bfce119b2e 100644
--- a/clang/test/CodeGen/X86/sse2-builtins.c
+++ b/clang/test/CodeGen/X86/sse2-builtins.c
@@ -32,12 +32,26 @@ __m128i test_mm_add_epi8(__m128i A, __m128i B) {
   return _mm_add_epi8(A, B);
 }
 
+TEST_CONSTEXPR(
+  match_v16qi(
+    _mm_add_epi8(
+      (__m128i)(__v16qi){ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15 },
+      (__m128i)(__v16qi){ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }),
+    1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16));
+
 __m128i test_mm_add_epi16(__m128i A, __m128i B) {
   // CHECK-LABEL: test_mm_add_epi16
   // CHECK: add <8 x i16>
   return _mm_add_epi16(A, B);
 }
 
+TEST_CONSTEXPR(
+  match_v8hi(
+    _mm_add_epi16(
+      (__m128i)(__v8hi){ 0, 2, 4, 6, 8,10,12,14 },
+      (__m128i)(__v8hi){ 1, 1, 1, 1, 1, 1, 1, 1 }),
+    1, 3, 5, 7, 9, 11, 13, 15));
+
 __m128i test_mm_add_epi32(__m128i A, __m128i B) {
   // CHECK-LABEL: test_mm_add_epi32
   // CHECK: add <4 x i32>
@@ -1713,12 +1727,26 @@ __m128i test_mm_sub_epi8(__m128i A, __m128i B) {
   return _mm_sub_epi8(A, B);
 }
 
+TEST_CONSTEXPR(
+  match_v16qi(
+    _mm_sub_epi8(
+      (__m128i)(__v16qi){ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15 },
+      (__m128i)(__v16qi){ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }),
+    -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14));
+
 __m128i test_mm_sub_epi16(__m128i A, __m128i B) {
   // CHECK-LABEL: test_mm_sub_epi16
   // CHECK: sub <8 x i16>
   return _mm_sub_epi16(A, B);
 }
 
+TEST_CONSTEXPR(
+  match_v8hi(
+    _mm_sub_epi16(
+      (__m128i)(__v8hi){ 0, 2, 4, 6, 8,10,12,14 },
+      (__m128i)(__v8hi){ 1, 1, 1, 1, 1, 1, 1, 1 }),
+    -1, 1, 3, 5, 7, 9, 11, 13));
+
 __m128i test_mm_sub_epi32(__m128i A, __m128i B) {
   // CHECK-LABEL: test_mm_sub_epi32
   // CHECK: sub <4 x i32>

>From 1d5c9696fecdf0abae50353336807e03d60e27d8 Mon Sep 17 00:00:00 2001
From: donneypr <donatoprabahar at gmail.com>
Date: Tue, 9 Sep 2025 13:47:25 -0400
Subject: [PATCH 8/8] Revert avx512f-builtins.c to upstream/main version

---
 clang/test/CodeGen/X86/avx512f-builtins.c | 101 +++++-----------------
 1 file changed, 21 insertions(+), 80 deletions(-)

diff --git a/clang/test/CodeGen/X86/avx512f-builtins.c b/clang/test/CodeGen/X86/avx512f-builtins.c
index 50c59a4edd9f5..f93216e546a63 100644
--- a/clang/test/CodeGen/X86/avx512f-builtins.c
+++ b/clang/test/CodeGen/X86/avx512f-builtins.c
@@ -3006,14 +3006,6 @@ __m512i test_mm512_maskz_sub_epi32 (__mmask16 __k,__m512i __A, __m512i __B) {
   return _mm512_maskz_sub_epi32(__k,__A,__B);
 }
 
-TEST_CONSTEXPR(
-  match_v16si(
-    _mm512_maskz_sub_epi32(
-      K,
-      (__m512i)(__v16si){10,11,12,13,14,15,16,17, 100,200,300,400,500,600,700,800},
-      (__m512i)(__v16si){ 1, 2, 3, 4, 5, 6, 7, 8,   9,  8,  7,  6,  5,  4,  3,  2}),
-    9,9,9,9,9,9,9,9, 0,0,0,0,0,0,0,0));
-
 __m512i test_mm512_mask_sub_epi32 (__mmask16 __k,__m512i __A, __m512i __B, 
                                    __m512i __src) {
   //CHECK-LABEL: test_mm512_mask_sub_epi32
@@ -3022,24 +3014,12 @@ __m512i test_mm512_mask_sub_epi32 (__mmask16 __k,__m512i __A, __m512i __B,
   return _mm512_mask_sub_epi32(__src,__k,__A,__B);
 }
 
-TEST_CONSTEXPR(
-  match_v16si(
-    _mm512_mask_sub_epi32(SRC, K, A, B),
-    9,9,9,9,9,9,9,9, 42,42,42,42,42,42,42,42));
-
 __m512i test_mm512_sub_epi32(__m512i __A, __m512i __B) {
   //CHECK-LABEL: test_mm512_sub_epi32
   //CHECK: sub <16 x i32>
   return _mm512_sub_epi32(__A,__B);
 }
 
-TEST_CONSTEXPR(
-  match_v16si(
-    _mm512_sub_epi32(
-      (__m512i)(__v16si){10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25},
-      (__m512i)(__v16si){ 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16}),
-    9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9));
-
 __m512i test_mm512_maskz_sub_epi64 (__mmask8 __k,__m512i __A, __m512i __B) {
   //CHECK-LABEL: test_mm512_maskz_sub_epi64
   //CHECK: sub <8 x i64> %{{.*}}, %{{.*}}
@@ -3047,14 +3027,6 @@ __m512i test_mm512_maskz_sub_epi64 (__mmask8 __k,__m512i __A, __m512i __B) {
   return _mm512_maskz_sub_epi64(__k,__A,__B);
 }
 
-TEST_CONSTEXPR(
-  match_v8di(
-    _mm512_maskz_sub_epi64(
-      K,
-      (__m512i)(__v8di){100,200,300,400,500,600,700,800},
-      (__m512i)(__v8di){  1,  2,  3,  4,  5,  6,  7,  8}),
-    99,198,297,396, 0,0,0,0));
-
 __m512i test_mm512_mask_sub_epi64 (__mmask8 __k,__m512i __A, __m512i __B, 
                                    __m512i __src) {
   //CHECK-LABEL: test_mm512_mask_sub_epi64
@@ -3063,24 +3035,12 @@ __m512i test_mm512_mask_sub_epi64 (__mmask8 __k,__m512i __A, __m512i __B,
   return _mm512_mask_sub_epi64(__src,__k,__A,__B);
 }
 
-TEST_CONSTEXPR(
-  match_v8di(
-    _mm512_mask_sub_epi64(SRC, K, A, B),
-    99,198,297,396, -1,-1,-1,-1));
-
 __m512i test_mm512_sub_epi64(__m512i __A, __m512i __B) {
   //CHECK-LABEL: test_mm512_sub_epi64
   //CHECK: sub <8 x i64>
   return _mm512_sub_epi64(__A,__B);
 }
 
-TEST_CONSTEXPR(
-  match_v8di(
-    _mm512_sub_epi64(
-      (__m512i)(__v8di){10,20,30,40,50,60,70,80},
-      (__m512i)(__v8di){ 1, 3, 5, 7, 9,11,13,15}),
-    9,17,25,33,41,49,57,65));
-
 __m512i test_mm512_maskz_add_epi32 (__mmask16 __k,__m512i __A, __m512i __B) {
   //CHECK-LABEL: test_mm512_maskz_add_epi32
   //CHECK: add <16 x i32> %{{.*}}, %{{.*}}
@@ -3088,14 +3048,6 @@ __m512i test_mm512_maskz_add_epi32 (__mmask16 __k,__m512i __A, __m512i __B) {
   return _mm512_maskz_add_epi32(__k,__A,__B);
 }
 
-TEST_CONSTEXPR(
-  match_v16si(
-    _mm512_maskz_add_epi32(
-      K,
-      (__m512i)(__v16si){ 0, 1, 2, 3, 4, 5, 6, 7, 10,20,30,40,50,60,70,80 },
-      (__m512i)(__v16si){ 1, 1, 1, 1, 1, 1, 1, 1,  9, 8, 7, 6, 5, 4, 3, 2 }),
-    1,2,3,4,5,6,7,8, 0,0,0,0,0,0,0,0));
-
 __m512i test_mm512_mask_add_epi32 (__mmask16 __k,__m512i __A, __m512i __B, 
                                    __m512i __src) {
   //CHECK-LABEL: test_mm512_mask_add_epi32
@@ -3104,24 +3056,12 @@ __m512i test_mm512_mask_add_epi32 (__mmask16 __k,__m512i __A, __m512i __B,
   return _mm512_mask_add_epi32(__src,__k,__A,__B);
 }
 
-TEST_CONSTEXPR(
-  match_v16si(
-    _mm512_mask_add_epi32(SRC, K, A, B),
-    1,2,3,4,5,6,7,8, 100,100,100,100,100,100,100,100));
-
 __m512i test_mm512_add_epi32(__m512i __A, __m512i __B) {
   //CHECK-LABEL: test_mm512_add_epi32
   //CHECK: add <16 x i32>
   return _mm512_add_epi32(__A,__B);
 }
 
-TEST_CONSTEXPR(
-  match_v16si(
-    _mm512_add_epi32(
-      (__m512i)(__v16si){ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15 },
-      (__m512i)(__v16si){ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }),
-    1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16));
-
 __m512i test_mm512_maskz_add_epi64 (__mmask8 __k,__m512i __A, __m512i __B) {
   //CHECK-LABEL: test_mm512_maskz_add_epi64
   //CHECK: add <8 x i64> %{{.*}}, %{{.*}}
@@ -3129,14 +3069,6 @@ __m512i test_mm512_maskz_add_epi64 (__mmask8 __k,__m512i __A, __m512i __B) {
   return _mm512_maskz_add_epi64(__k,__A,__B);
 }
 
-TEST_CONSTEXPR(
-  match_v8di(
-    _mm512_maskz_add_epi64(
-      K,
-      (__m512i)(__v8di){10,20,30,40,50,60,70,80},
-      (__m512i)(__v8di){ 1, 2, 3, 4, 1, 2, 3, 4}),
-    11,22,33,44, 0,0,0,0));
-
 __m512i test_mm512_mask_add_epi64 (__mmask8 __k,__m512i __A, __m512i __B, 
                                    __m512i __src) {
   //CHECK-LABEL: test_mm512_mask_add_epi64
@@ -3145,24 +3077,12 @@ __m512i test_mm512_mask_add_epi64 (__mmask8 __k,__m512i __A, __m512i __B,
   return _mm512_mask_add_epi64(__src,__k,__A,__B);
 }
 
-TEST_CONSTEXPR(
-  match_v8di(
-    _mm512_mask_add_epi64(SRC, K, A, B),
-    11,22,33,44, 100,100,100,100));
-
 __m512i test_mm512_add_epi64(__m512i __A, __m512i __B) {
   //CHECK-LABEL: test_mm512_add_epi64
   //CHECK: add <8 x i64>
   return _mm512_add_epi64(__A,__B);
 }
 
-TEST_CONSTEXPR(
-  match_v8di(
-    _mm512_add_epi64(
-      (__m512i)(__v8di){10,20,30,40,50,60,70,80},
-      (__m512i)(__v8di){ 1, 1, 1, 1, 1, 1, 1, 1}),
-    11,21,31,41,51,61,71,81));
-
 __m512i test_mm512_mul_epi32(__m512i __A, __m512i __B) {
   //CHECK-LABEL: test_mm512_mul_epi32
   //CHECK: shl <8 x i64> %{{.*}}, splat (i64 32)
@@ -4254,6 +4174,7 @@ __m512i test_mm512_rolv_epi32(__m512i __A, __m512i __B) {
   // CHECK: @llvm.fshl.v16i32
   return _mm512_rolv_epi32(__A, __B); 
 }
+TEST_CONSTEXPR(match_v16si(_mm512_rolv_epi32((__m512i)(__v16si){ -1, -2, 3, -4, -5, -6, 7, 8, 9, -10, -11, -12, -13, 14, 15, -16}, (__m512i)(__v16si){ 16, 15, -14, 13, -12, -11, 10, -9, 8, -7, 6, 5, 4, -3, 2, -1}), -1, -32769, 786432, -24577, -4194305, -10485761, 7168, 67108864, 2304, -301989889, -641, -353, -193, -1073741823, 60, 2147483640));
 
 __m512i test_mm512_mask_rolv_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) {
   // CHECK-LABEL: test_mm512_mask_rolv_epi32
@@ -4261,6 +4182,7 @@ __m512i test_mm512_mask_rolv_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m5
   // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
   return _mm512_mask_rolv_epi32(__W, __U, __A, __B); 
 }
+TEST_CONSTEXPR(match_v16si(_mm512_mask_rolv_epi32((__m512i)(__v16si){ 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999}, 0xBFF5, (__m512i)(__v16si){ -1, -2, 3, -4, -5, -6, 7, 8, 9, -10, -11, -12, -13, 14, 15, -16}, (__m512i)(__v16si){ 16, 15, -14, 13, -12, -11, 10, -9, 8, -7, 6, 5, 4, -3, 2, -1}), -1, 999, 786432, 999, -4194305, -10485761, 7168, 67108864, 2304, -301989889, -641, -353, -193, -1073741823, 999, 2147483640));
 
 __m512i test_mm512_maskz_rolv_epi32(__mmask16 __U, __m512i __A, __m512i __B) {
   // CHECK-LABEL: test_mm512_maskz_rolv_epi32
@@ -4268,12 +4190,14 @@ __m512i test_mm512_maskz_rolv_epi32(__mmask16 __U, __m512i __A, __m512i __B) {
   // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
   return _mm512_maskz_rolv_epi32(__U, __A, __B); 
 }
+TEST_CONSTEXPR(match_v16si(_mm512_maskz_rolv_epi32(0xBFF5, (__m512i)(__v16si){ -1, -2, 3, -4, -5, -6, 7, 8, 9, -10, -11, -12, -13, 14, 15, -16}, (__m512i)(__v16si){ 16, 15, -14, 13, -12, -11, 10, -9, 8, -7, 6, 5, 4, -3, 2, -1}), -1, 0, 786432, 0, -4194305, -10485761, 7168, 67108864, 2304, -301989889, -641, -353, -193, -1073741823, 0, 2147483640));
 
 __m512i test_mm512_rolv_epi64(__m512i __A, __m512i __B) {
   // CHECK-LABEL: test_mm512_rolv_epi64
   // CHECK: @llvm.fshl.v8i64
   return _mm512_rolv_epi64(__A, __B); 
 }
+TEST_CONSTEXPR(match_v8di(_mm512_rolv_epi64((__m512i)(__v8di){ 1, -2, 3, -4, 5, 6, -7, -8}, (__m512i)(__v8di){ 8, 7, -6, 5, -4, -3, 2, 1}), 256, -129, 864691128455135232LL, -97, 5764607523034234880LL, -4611686018427387904LL, -25, -15));
 
 __m512i test_mm512_mask_rolv_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) {
   // CHECK-LABEL: test_mm512_mask_rolv_epi64
@@ -4281,6 +4205,7 @@ __m512i test_mm512_mask_rolv_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m51
   // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
   return _mm512_mask_rolv_epi64(__W, __U, __A, __B); 
 }
+TEST_CONSTEXPR(match_v8di(_mm512_mask_rolv_epi64((__m512i)(__v8di){ 999, 999, 999, 999, 999, 999, 999, 999}, 0x19, (__m512i)(__v8di){ 1, -2, 3, -4, 5, 6, -7, -8}, (__m512i)(__v8di){ 8, 7, -6, 5, -4, -3, 2, 1}), 256, 999, 999, -97, 5764607523034234880LL, 999, 999, 999));
 
 __m512i test_mm512_maskz_rolv_epi64(__mmask8 __U, __m512i __A, __m512i __B) {
   // CHECK-LABEL: test_mm512_maskz_rolv_epi64
@@ -4288,6 +4213,7 @@ __m512i test_mm512_maskz_rolv_epi64(__mmask8 __U, __m512i __A, __m512i __B) {
   // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
   return _mm512_maskz_rolv_epi64(__U, __A, __B); 
 }
+TEST_CONSTEXPR(match_v8di(_mm512_maskz_rolv_epi64(0x19, (__m512i)(__v8di){ 1, -2, 3, -4, 5, 6, -7, -8}, (__m512i)(__v8di){ 8, 7, -6, 5, -4, -3, 2, 1}), 256, 0, 0, -97, 5764607523034234880LL, 0, 0, 0));
 
 __m512i test_mm512_ror_epi32(__m512i __A) {
   // CHECK-LABEL: test_mm512_ror_epi32
@@ -4340,6 +4266,7 @@ __m512i test_mm512_rorv_epi32(__m512i __A, __m512i __B) {
   // CHECK: @llvm.fshr.v16i32
   return _mm512_rorv_epi32(__A, __B); 
 }
+TEST_CONSTEXPR(match_v16si(_mm512_rorv_epi32((__m512i)(__v16si){ -1, -2, 3, -4, -5, -6, 7, 8, 9, -10, -11, -12, -13, 14, 15, -16}, (__m512i)(__v16si){ 16, 15, -14, 13, -12, -11, 10, -9, 8, -7, 6, 5, 4, -3, 2, -1}), -1, -131073, 49152, -1572865, -16385, -10241, 29360128, 4096, 150994944, -1153, -671088641, -1476395009, 1073741823, 112, -1073741821, -31));
 
 __m512i test_mm512_mask_rorv_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) {
   // CHECK-LABEL: test_mm512_mask_rorv_epi32
@@ -4347,6 +4274,7 @@ __m512i test_mm512_mask_rorv_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m5
   // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
   return _mm512_mask_rorv_epi32(__W, __U, __A, __B); 
 }
+TEST_CONSTEXPR(match_v16si(_mm512_mask_rorv_epi32((__m512i)(__v16si){ 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999}, 0xBFF5, (__m512i)(__v16si){ -1, -2, 3, -4, -5, -6, 7, 8, 9, -10, -11, -12, -13, 14, 15, -16}, (__m512i)(__v16si){ 16, 15, -14, 13, -12, -11, 10, -9, 8, -7, 6, 5, 4, -3, 2, -1}), -1, 999, 49152, 999, -16385, -10241, 29360128, 4096, 150994944, -1153, -671088641, -1476395009, 1073741823, 112, 999, -31));
 
 __m512i test_mm512_maskz_rorv_epi32(__mmask16 __U, __m512i __A, __m512i __B) {
   // CHECK-LABEL: test_mm512_maskz_rorv_epi32
@@ -4354,12 +4282,14 @@ __m512i test_mm512_maskz_rorv_epi32(__mmask16 __U, __m512i __A, __m512i __B) {
   // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
   return _mm512_maskz_rorv_epi32(__U, __A, __B); 
 }
+TEST_CONSTEXPR(match_v16si(_mm512_maskz_rorv_epi32(0xBFF5, (__m512i)(__v16si){ -1, -2, 3, -4, -5, -6, 7, 8, 9, -10, -11, -12, -13, 14, 15, -16}, (__m512i)(__v16si){ 16, 15, -14, 13, -12, -11, 10, -9, 8, -7, 6, 5, 4, -3, 2, -1}), -1, 0, 49152, 0, -16385, -10241, 29360128, 4096, 150994944, -1153, -671088641, -1476395009, 1073741823, 112, 0, -31));
 
 __m512i test_mm512_rorv_epi64(__m512i __A, __m512i __B) {
   // CHECK-LABEL: test_mm512_rorv_epi64
   // CHECK: @llvm.fshr.v8i64
   return _mm512_rorv_epi64(__A, __B); 
 }
+TEST_CONSTEXPR(match_v8di(_mm512_rorv_epi64((__m512i)(__v8di){ 1, -2, 3, -4, 5, 6, -7, -8}, (__m512i)(__v8di){ 8, 7, -6, 5, -4, -3, 2, 1}), 72057594037927936LL, -144115188075855873LL, 192, -1729382256910270465LL, 80, 48, 9223372036854775806LL, 9223372036854775804LL));
 
 __m512i test_mm512_mask_rorv_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) {
   // CHECK-LABEL: test_mm512_mask_rorv_epi64
@@ -4367,6 +4297,7 @@ __m512i test_mm512_mask_rorv_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m51
   // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
   return _mm512_mask_rorv_epi64(__W, __U, __A, __B); 
 }
+TEST_CONSTEXPR(match_v8di(_mm512_mask_rorv_epi64((__m512i)(__v8di){ 999, 999, 999, 999, 999, 999, 999, 999}, 0x19, (__m512i)(__v8di){ 1, -2, 3, -4, 5, 6, -7, -8}, (__m512i)(__v8di){ 8, 7, -6, 5, -4, -3, 2, 1}), 72057594037927936LL, 999, 999, -1729382256910270465LL, 80, 999, 999, 999));
 
 __m512i test_mm512_maskz_rorv_epi64(__mmask8 __U, __m512i __A, __m512i __B) {
   // CHECK-LABEL: test_mm512_maskz_rorv_epi64
@@ -4374,6 +4305,7 @@ __m512i test_mm512_maskz_rorv_epi64(__mmask8 __U, __m512i __A, __m512i __B) {
   // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
   return _mm512_maskz_rorv_epi64(__U, __A, __B); 
 }
+TEST_CONSTEXPR(match_v8di(_mm512_maskz_rorv_epi64(0x19, (__m512i)(__v8di){ 1, -2, 3, -4, 5, 6, -7, -8}, (__m512i)(__v8di){ 8, 7, -6, 5, -4, -3, 2, 1}), 72057594037927936LL, 0, 0, -1729382256910270465LL, 80, 0, 0, 0));
 
 __m512i test_mm512_slli_epi32(__m512i __A) {
   // CHECK-LABEL: test_mm512_slli_epi32
@@ -6079,6 +6011,7 @@ __m512i test_mm512_sllv_epi64(__m512i __X, __m512i __Y) {
   // CHECK: @llvm.x86.avx512.psllv.q.512
   return _mm512_sllv_epi64(__X, __Y); 
 }
+TEST_CONSTEXPR(match_v8di(_mm512_sllv_epi64((__m512i)(__v8di){ 16, -17, 18, -19, 20, 21, 22, 23}, (__m512i)(__v8di){ 1, 2, -3, -4, -5, 6, -7, -8}),  32, -68, 0, 0, 0, 1344, 0, 0));
 
 __m512i test_mm512_mask_sllv_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y) {
   // CHECK-LABEL: test_mm512_mask_sllv_epi64
@@ -6086,6 +6019,7 @@ __m512i test_mm512_mask_sllv_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m51
   // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
   return _mm512_mask_sllv_epi64(__W, __U, __X, __Y); 
 }
+TEST_CONSTEXPR(match_v8di(_mm512_mask_sllv_epi64((__m512i)(__v8di){ 999, 999, 999, 999, 999, 999, 999, 999}, 0xE4, (__m512i)(__v8di){ 16, -17, 18, -19, 20, 21, 22, 23}, (__m512i)(__v8di){ 1, 2, -3, -4, -5, 6, -7, -8}), 999, 999, 0, 999, 999, 1344, 0, 0));
 
 __m512i test_mm512_maskz_sllv_epi64(__mmask8 __U, __m512i __X, __m512i __Y) {
   // CHECK-LABEL: test_mm512_maskz_sllv_epi64
@@ -6093,6 +6027,7 @@ __m512i test_mm512_maskz_sllv_epi64(__mmask8 __U, __m512i __X, __m512i __Y) {
   // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
   return _mm512_maskz_sllv_epi64(__U, __X, __Y); 
 }
+TEST_CONSTEXPR(match_v8di(_mm512_maskz_sllv_epi64(0xE4, (__m512i)(__v8di){ 16, -17, 18, -19, 20, 21, 22, 23}, (__m512i)(__v8di){ 1, 2, -3, -4, -5, 6, -7, -8}),  0, 0, 0, 0, 0, 1344, 0, 0));
 
 __m512i test_mm512_sra_epi32(__m512i __A, __m128i __B) {
   // CHECK-LABEL: test_mm512_sra_epi32
@@ -6162,6 +6097,7 @@ __m512i test_mm512_srav_epi64(__m512i __X, __m512i __Y) {
   // CHECK: @llvm.x86.avx512.psrav.q.512
   return _mm512_srav_epi64(__X, __Y); 
 }
+TEST_CONSTEXPR(match_v8di(_mm512_srav_epi64((__m512i)(__v8di){ 16, -17, 18, -19, 20, 21, 22, 23}, (__m512i)(__v8di){ 1, 2, -3, -4, -5, 6, -7, -8}),  8, -5, 0, -1, 0, 0, 0, 0));
 
 __m512i test_mm512_mask_srav_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y) {
   // CHECK-LABEL: test_mm512_mask_srav_epi64
@@ -6169,6 +6105,7 @@ __m512i test_mm512_mask_srav_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m51
   // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
   return _mm512_mask_srav_epi64(__W, __U, __X, __Y); 
 }
+TEST_CONSTEXPR(match_v8di(_mm512_mask_srav_epi64((__m512i)(__v8di){ 999, 999, 999, 999, 999, 999, 999, 999}, 0xE4, (__m512i)(__v8di){ 16, -17, 18, -19, 20, 21, 22, 23}, (__m512i)(__v8di){ 1, 2, -3, -4, -5, 6, -7, -8}), 999, 999, 0, 999, 999, 0, 0, 0));
 
 __m512i test_mm512_maskz_srav_epi64(__mmask8 __U, __m512i __X, __m512i __Y) {
   // CHECK-LABEL: test_mm512_maskz_srav_epi64
@@ -6176,6 +6113,7 @@ __m512i test_mm512_maskz_srav_epi64(__mmask8 __U, __m512i __X, __m512i __Y) {
   // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
   return _mm512_maskz_srav_epi64(__U, __X, __Y); 
 }
+TEST_CONSTEXPR(match_v8di(_mm512_maskz_srav_epi64(0xE4, (__m512i)(__v8di){ 16, -17, 18, -19, 20, 21, 22, 23}, (__m512i)(__v8di){ 1, 2, -3, -4, -5, 6, -7, -8}),  0, 0, 0, 0, 0, 0, 0, 0));
 
 __m512i test_mm512_srl_epi32(__m512i __A, __m128i __B) {
   // CHECK-LABEL: test_mm512_srl_epi32
@@ -6245,6 +6183,7 @@ __m512i test_mm512_srlv_epi64(__m512i __X, __m512i __Y) {
   // CHECK: @llvm.x86.avx512.psrlv.q.512
   return _mm512_srlv_epi64(__X, __Y); 
 }
+TEST_CONSTEXPR(match_v8di(_mm512_srlv_epi64((__m512i)(__v8di){ 16, 17, -18, 19, -20, -21, 22, -23}, (__m512i)(__v8di){ 1, 2, 3, 4, -5, -6, 7, 8}),  8, 4, 2305843009213693949, 1, 0, 0, 0, 72057594037927935));
 
 __m512i test_mm512_mask_srlv_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y) {
   // CHECK-LABEL: test_mm512_mask_srlv_epi64
@@ -6252,6 +6191,7 @@ __m512i test_mm512_mask_srlv_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m51
   // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
   return _mm512_mask_srlv_epi64(__W, __U, __X, __Y); 
 }
+TEST_CONSTEXPR(match_v8di(_mm512_mask_srlv_epi64((__m512i)(__v8di){ 999, 999, 999, 999, 999, 999, 999, 999}, 0x28, (__m512i)(__v8di){ 16, 17, -18, 19, -20, -21, 22, -23}, (__m512i)(__v8di){ 1, 2, 3, 4, -5, -6, 7, 8}), 999, 999, 999, 1, 999, 0, 999, 999));
 
 __m512i test_mm512_maskz_srlv_epi64(__mmask8 __U, __m512i __X, __m512i __Y) {
   // CHECK-LABEL: test_mm512_maskz_srlv_epi64
@@ -6259,6 +6199,7 @@ __m512i test_mm512_maskz_srlv_epi64(__mmask8 __U, __m512i __X, __m512i __Y) {
   // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
   return _mm512_maskz_srlv_epi64(__U, __X, __Y); 
 }
+TEST_CONSTEXPR(match_v8di(_mm512_maskz_srlv_epi64(0x28, (__m512i)(__v8di){ 16, 17, -18, 19, -20, -21, 22, -23}, (__m512i)(__v8di){ 1, 2, 3, 4, -5, -6, 7, 8}),  0, 0, 0, 1, 0, 0, 0, 0));
 
 __m512i test_mm512_ternarylogic_epi32(__m512i __A, __m512i __B, __m512i __C) {
   // CHECK-LABEL: test_mm512_ternarylogic_epi32



More information about the cfe-commits mailing list