[clang] [Headers][X86] Allow AVX512 masked shuffles to be used in constexpr (PR #162301)

Simon Pilgrim via cfe-commits cfe-commits at lists.llvm.org
Thu Oct 16 08:33:22 PDT 2025


https://github.com/RKSimon updated https://github.com/llvm/llvm-project/pull/162301

>From 0f0effd2cfd05ebb23c00550c487702672140c87 Mon Sep 17 00:00:00 2001
From: ahmed <ahmednour.mohamed2012 at gmail.com>
Date: Tue, 7 Oct 2025 13:03:31 +0300
Subject: [PATCH 01/12] feat: [Headers][X86] Allow AVX512 masked shuffles to be
 used in constexpr

---
 clang/lib/Headers/avx512dqintrin.h         |  24 ++--
 clang/lib/Headers/avx512fintrin.h          |  48 ++++----
 clang/lib/Headers/avx512vldqintrin.h       |  20 ++--
 clang/lib/Headers/avx512vlintrin.h         | 132 ++++++++++-----------
 clang/test/CodeGen/X86/avx512dq-builtins.c |  17 +++
 clang/test/CodeGen/X86/avx512f-builtins.c  |  76 ++++++++++++
 clang/test/CodeGen/X86/avx512vl-builtins.c |  80 +++++++++++++
 7 files changed, 285 insertions(+), 112 deletions(-)

diff --git a/clang/lib/Headers/avx512dqintrin.h b/clang/lib/Headers/avx512dqintrin.h
index fb65bf933b8ad..a5c1b78ad4305 100644
--- a/clang/lib/Headers/avx512dqintrin.h
+++ b/clang/lib/Headers/avx512dqintrin.h
@@ -1083,7 +1083,7 @@ _mm512_broadcast_f32x2(__m128 __A) {
                                          0, 1, 0, 1, 0, 1, 0, 1);
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_mask_broadcast_f32x2 (__m512 __O, __mmask16 __M, __m128 __A)
 {
   return (__m512)__builtin_ia32_selectps_512((__mmask16)__M,
@@ -1091,7 +1091,7 @@ _mm512_mask_broadcast_f32x2 (__m512 __O, __mmask16 __M, __m128 __A)
                                              (__v16sf)__O);
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_maskz_broadcast_f32x2 (__mmask16 __M, __m128 __A)
 {
   return (__m512)__builtin_ia32_selectps_512((__mmask16)__M,
@@ -1106,7 +1106,7 @@ _mm512_broadcast_f32x8(__m256 __A) {
                                          0, 1, 2, 3, 4, 5, 6, 7);
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_mask_broadcast_f32x8(__m512 __O, __mmask16 __M, __m256 __A)
 {
   return (__m512)__builtin_ia32_selectps_512((__mmask16)__M,
@@ -1114,7 +1114,7 @@ _mm512_mask_broadcast_f32x8(__m512 __O, __mmask16 __M, __m256 __A)
                                            (__v16sf)__O);
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_maskz_broadcast_f32x8(__mmask16 __M, __m256 __A)
 {
   return (__m512)__builtin_ia32_selectps_512((__mmask16)__M,
@@ -1128,7 +1128,7 @@ _mm512_broadcast_f64x2(__m128d __A) {
                                           0, 1, 0, 1, 0, 1, 0, 1);
 }
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_mask_broadcast_f64x2(__m512d __O, __mmask8 __M, __m128d __A)
 {
   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__M,
@@ -1136,7 +1136,7 @@ _mm512_mask_broadcast_f64x2(__m512d __O, __mmask8 __M, __m128d __A)
                                             (__v8df)__O);
 }
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_maskz_broadcast_f64x2(__mmask8 __M, __m128d __A)
 {
   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__M,
@@ -1151,7 +1151,7 @@ _mm512_broadcast_i32x2(__m128i __A) {
                                           0, 1, 0, 1, 0, 1, 0, 1);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_mask_broadcast_i32x2 (__m512i __O, __mmask16 __M, __m128i __A)
 {
   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
@@ -1159,7 +1159,7 @@ _mm512_mask_broadcast_i32x2 (__m512i __O, __mmask16 __M, __m128i __A)
                                              (__v16si)__O);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_maskz_broadcast_i32x2 (__mmask16 __M, __m128i __A)
 {
   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
@@ -1174,7 +1174,7 @@ _mm512_broadcast_i32x8(__m256i __A) {
                                           0, 1, 2, 3, 4, 5, 6, 7);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_mask_broadcast_i32x8(__m512i __O, __mmask16 __M, __m256i __A)
 {
   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
@@ -1182,7 +1182,7 @@ _mm512_mask_broadcast_i32x8(__m512i __O, __mmask16 __M, __m256i __A)
                                            (__v16si)__O);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_maskz_broadcast_i32x8(__mmask16 __M, __m256i __A)
 {
   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
@@ -1196,7 +1196,7 @@ _mm512_broadcast_i64x2(__m128i __A) {
                                           0, 1, 0, 1, 0, 1, 0, 1);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_mask_broadcast_i64x2(__m512i __O, __mmask8 __M, __m128i __A)
 {
   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
@@ -1204,7 +1204,7 @@ _mm512_mask_broadcast_i64x2(__m512i __O, __mmask8 __M, __m128i __A)
                                             (__v8di)__O);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_maskz_broadcast_i64x2(__mmask8 __M, __m128i __A)
 {
   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h
index 80e58425cdd71..14d79bdba3844 100644
--- a/clang/lib/Headers/avx512fintrin.h
+++ b/clang/lib/Headers/avx512fintrin.h
@@ -225,7 +225,7 @@ _mm512_broadcastd_epi32(__m128i __A) {
                                           0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_mask_broadcastd_epi32 (__m512i __O, __mmask16 __M, __m128i __A)
 {
   return (__m512i)__builtin_ia32_selectd_512(__M,
@@ -233,7 +233,7 @@ _mm512_mask_broadcastd_epi32 (__m512i __O, __mmask16 __M, __m128i __A)
                                              (__v16si) __O);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_maskz_broadcastd_epi32 (__mmask16 __M, __m128i __A)
 {
   return (__m512i)__builtin_ia32_selectd_512(__M,
@@ -247,7 +247,7 @@ _mm512_broadcastq_epi64(__m128i __A) {
                                           0, 0, 0, 0, 0, 0, 0, 0);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_mask_broadcastq_epi64 (__m512i __O, __mmask8 __M, __m128i __A)
 {
   return (__m512i)__builtin_ia32_selectq_512(__M,
@@ -256,7 +256,7 @@ _mm512_mask_broadcastq_epi64 (__m512i __O, __mmask8 __M, __m128i __A)
 
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A)
 {
   return (__m512i)__builtin_ia32_selectq_512(__M,
@@ -321,7 +321,7 @@ _mm512_set1_epi32(int __s)
     __s, __s, __s, __s, __s, __s, __s, __s };
 }
 
-static __inline __m512i __DEFAULT_FN_ATTRS512
+static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_maskz_set1_epi32(__mmask16 __M, int __A)
 {
   return (__m512i)__builtin_ia32_selectd_512(__M,
@@ -335,7 +335,7 @@ _mm512_set1_epi64(long long __d)
   return __extension__(__m512i)(__v8di){ __d, __d, __d, __d, __d, __d, __d, __d };
 }
 
-static __inline __m512i __DEFAULT_FN_ATTRS512
+static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_maskz_set1_epi64(__mmask8 __M, long long __A)
 {
   return (__m512i)__builtin_ia32_selectq_512(__M,
@@ -6552,7 +6552,7 @@ _mm512_broadcast_f32x4(__m128 __A) {
                                          0, 1, 2, 3, 0, 1, 2, 3);
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_mask_broadcast_f32x4(__m512 __O, __mmask16 __M, __m128 __A)
 {
   return (__m512)__builtin_ia32_selectps_512((__mmask16)__M,
@@ -6560,7 +6560,7 @@ _mm512_mask_broadcast_f32x4(__m512 __O, __mmask16 __M, __m128 __A)
                                            (__v16sf)__O);
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_maskz_broadcast_f32x4(__mmask16 __M, __m128 __A)
 {
   return (__m512)__builtin_ia32_selectps_512((__mmask16)__M,
@@ -6597,7 +6597,7 @@ _mm512_broadcast_i32x4(__m128i __A) {
                                           0, 1, 2, 3, 0, 1, 2, 3);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_mask_broadcast_i32x4(__m512i __O, __mmask16 __M, __m128i __A)
 {
   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
@@ -6605,7 +6605,7 @@ _mm512_mask_broadcast_i32x4(__m512i __O, __mmask16 __M, __m128i __A)
                                            (__v16si)__O);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_maskz_broadcast_i32x4(__mmask16 __M, __m128i __A)
 {
   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
@@ -6635,7 +6635,7 @@ _mm512_maskz_broadcast_i64x4(__mmask8 __M, __m256i __A)
                                             (__v8di)_mm512_setzero_si512());
 }
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_mask_broadcastsd_pd (__m512d __O, __mmask8 __M, __m128d __A)
 {
   return (__m512d)__builtin_ia32_selectpd_512(__M,
@@ -6643,7 +6643,7 @@ _mm512_mask_broadcastsd_pd (__m512d __O, __mmask8 __M, __m128d __A)
                                               (__v8df) __O);
 }
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_maskz_broadcastsd_pd (__mmask8 __M, __m128d __A)
 {
   return (__m512d)__builtin_ia32_selectpd_512(__M,
@@ -6651,7 +6651,7 @@ _mm512_maskz_broadcastsd_pd (__mmask8 __M, __m128d __A)
                                               (__v8df) _mm512_setzero_pd());
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_mask_broadcastss_ps (__m512 __O, __mmask16 __M, __m128 __A)
 {
   return (__m512)__builtin_ia32_selectps_512(__M,
@@ -6659,7 +6659,7 @@ _mm512_mask_broadcastss_ps (__m512 __O, __mmask16 __M, __m128 __A)
                                              (__v16sf) __O);
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_maskz_broadcastss_ps (__mmask16 __M, __m128 __A)
 {
   return (__m512)__builtin_ia32_selectps_512(__M,
@@ -8381,7 +8381,7 @@ _mm512_movehdup_ps (__m512 __A)
                          1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15);
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_mask_movehdup_ps (__m512 __W, __mmask16 __U, __m512 __A)
 {
   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
@@ -8389,7 +8389,7 @@ _mm512_mask_movehdup_ps (__m512 __W, __mmask16 __U, __m512 __A)
                                              (__v16sf)__W);
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_maskz_movehdup_ps (__mmask16 __U, __m512 __A)
 {
   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
@@ -8404,7 +8404,7 @@ _mm512_moveldup_ps (__m512 __A)
                          0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14);
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_mask_moveldup_ps (__m512 __W, __mmask16 __U, __m512 __A)
 {
   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
@@ -8412,7 +8412,7 @@ _mm512_mask_moveldup_ps (__m512 __W, __mmask16 __U, __m512 __A)
                                              (__v16sf)__W);
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_maskz_moveldup_ps (__mmask16 __U, __m512 __A)
 {
   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
@@ -8420,26 +8420,26 @@ _mm512_maskz_moveldup_ps (__mmask16 __U, __m512 __A)
                                              (__v16sf)_mm512_setzero_ps());
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_mask_move_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
 {
   return __builtin_ia32_selectss_128(__U, _mm_move_ss(__A, __B), __W);
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_maskz_move_ss (__mmask8 __U, __m128 __A, __m128 __B)
 {
   return __builtin_ia32_selectss_128(__U, _mm_move_ss(__A, __B),
                                      _mm_setzero_ps());
 }
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS128
+static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_mask_move_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
 {
   return __builtin_ia32_selectsd_128(__U, _mm_move_sd(__A, __B), __W);
 }
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS128
+static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_maskz_move_sd (__mmask8 __U, __m128d __A, __m128d __B)
 {
   return __builtin_ia32_selectsd_128(__U, _mm_move_sd(__A, __B),
@@ -8884,7 +8884,7 @@ _mm_cvtu64_ss (__m128 __A, unsigned long long __B)
 }
 #endif
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+ static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_mask_set1_epi32 (__m512i __O, __mmask16 __M, int __A)
 {
   return (__m512i) __builtin_ia32_selectd_512(__M,
@@ -8892,7 +8892,7 @@ _mm512_mask_set1_epi32 (__m512i __O, __mmask16 __M, int __A)
                                               (__v16si) __O);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+ static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_mask_set1_epi64 (__m512i __O, __mmask8 __M, long long __A)
 {
   return (__m512i) __builtin_ia32_selectq_512(__M,
diff --git a/clang/lib/Headers/avx512vldqintrin.h b/clang/lib/Headers/avx512vldqintrin.h
index 68bd52e43981a..bc0ab63c20351 100644
--- a/clang/lib/Headers/avx512vldqintrin.h
+++ b/clang/lib/Headers/avx512vldqintrin.h
@@ -968,7 +968,7 @@ _mm256_broadcast_f32x2(__m128 __A) {
                                          0, 1, 0, 1, 0, 1, 0, 1);
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS256
+static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_mask_broadcast_f32x2 (__m256 __O, __mmask8 __M, __m128 __A)
 {
   return (__m256)__builtin_ia32_selectps_256((__mmask8)__M,
@@ -976,7 +976,7 @@ _mm256_mask_broadcast_f32x2 (__m256 __O, __mmask8 __M, __m128 __A)
                                              (__v8sf)__O);
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS256
+static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_maskz_broadcast_f32x2 (__mmask8 __M, __m128 __A)
 {
   return (__m256)__builtin_ia32_selectps_256((__mmask8)__M,
@@ -990,7 +990,7 @@ _mm256_broadcast_f64x2(__m128d __A) {
                                           0, 1, 0, 1);
 }
 
-static __inline__ __m256d __DEFAULT_FN_ATTRS256
+static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_mask_broadcast_f64x2(__m256d __O, __mmask8 __M, __m128d __A)
 {
   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__M,
@@ -998,7 +998,7 @@ _mm256_mask_broadcast_f64x2(__m256d __O, __mmask8 __M, __m128d __A)
                                             (__v4df)__O);
 }
 
-static __inline__ __m256d __DEFAULT_FN_ATTRS256
+static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_maskz_broadcast_f64x2 (__mmask8 __M, __m128d __A)
 {
   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__M,
@@ -1012,7 +1012,7 @@ _mm_broadcast_i32x2(__m128i __A) {
                                           0, 1, 0, 1);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_mask_broadcast_i32x2 (__m128i __O, __mmask8 __M, __m128i __A)
 {
   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
@@ -1020,7 +1020,7 @@ _mm_mask_broadcast_i32x2 (__m128i __O, __mmask8 __M, __m128i __A)
                                              (__v4si)__O);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_maskz_broadcast_i32x2 (__mmask8 __M, __m128i __A)
 {
   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
@@ -1034,7 +1034,7 @@ _mm256_broadcast_i32x2(__m128i __A) {
                                           0, 1, 0, 1, 0, 1, 0, 1);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_mask_broadcast_i32x2 (__m256i __O, __mmask8 __M, __m128i __A)
 {
   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
@@ -1042,7 +1042,7 @@ _mm256_mask_broadcast_i32x2 (__m256i __O, __mmask8 __M, __m128i __A)
                                              (__v8si)__O);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_maskz_broadcast_i32x2 (__mmask8 __M, __m128i __A)
 {
   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
@@ -1056,7 +1056,7 @@ _mm256_broadcast_i64x2(__m128i __A) {
                                           0, 1, 0, 1);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_mask_broadcast_i64x2(__m256i __O, __mmask8 __M, __m128i __A)
 {
   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
@@ -1064,7 +1064,7 @@ _mm256_mask_broadcast_i64x2(__m256i __O, __mmask8 __M, __m128i __A)
                                             (__v4di)__O);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_maskz_broadcast_i64x2 (__mmask8 __M, __m128i __A)
 {
   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
diff --git a/clang/lib/Headers/avx512vlintrin.h b/clang/lib/Headers/avx512vlintrin.h
index 965741f0ff944..1cef356eddb0a 100644
--- a/clang/lib/Headers/avx512vlintrin.h
+++ b/clang/lib/Headers/avx512vlintrin.h
@@ -5101,7 +5101,7 @@ _mm256_maskz_movedup_pd (__mmask8 __U, __m256d __A)
                                               (__v4df)_mm256_setzero_pd());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_mask_set1_epi32(__m128i __O, __mmask8 __M, int __A)
 {
    return (__m128i)__builtin_ia32_selectd_128(__M,
@@ -5109,7 +5109,7 @@ _mm_mask_set1_epi32(__m128i __O, __mmask8 __M, int __A)
                                               (__v4si)__O);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_maskz_set1_epi32( __mmask8 __M, int __A)
 {
    return (__m128i)__builtin_ia32_selectd_128(__M,
@@ -5117,7 +5117,7 @@ _mm_maskz_set1_epi32( __mmask8 __M, int __A)
                                               (__v4si)_mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_mask_set1_epi32(__m256i __O, __mmask8 __M, int __A)
 {
    return (__m256i)__builtin_ia32_selectd_256(__M,
@@ -5125,7 +5125,7 @@ _mm256_mask_set1_epi32(__m256i __O, __mmask8 __M, int __A)
                                               (__v8si)__O);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_maskz_set1_epi32( __mmask8 __M, int __A)
 {
    return (__m256i)__builtin_ia32_selectd_256(__M,
@@ -5134,7 +5134,7 @@ _mm256_maskz_set1_epi32( __mmask8 __M, int __A)
 }
 
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_mask_set1_epi64 (__m128i __O, __mmask8 __M, long long __A)
 {
   return (__m128i) __builtin_ia32_selectq_128(__M,
@@ -5142,7 +5142,7 @@ _mm_mask_set1_epi64 (__m128i __O, __mmask8 __M, long long __A)
                                               (__v2di) __O);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_maskz_set1_epi64 (__mmask8 __M, long long __A)
 {
   return (__m128i) __builtin_ia32_selectq_128(__M,
@@ -5150,7 +5150,7 @@ _mm_maskz_set1_epi64 (__mmask8 __M, long long __A)
                                               (__v2di) _mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_mask_set1_epi64 (__m256i __O, __mmask8 __M, long long __A)
 {
   return (__m256i) __builtin_ia32_selectq_256(__M,
@@ -5158,7 +5158,7 @@ _mm256_mask_set1_epi64 (__m256i __O, __mmask8 __M, long long __A)
                                               (__v4di) __O) ;
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_maskz_set1_epi64 (__mmask8 __M, long long __A)
 {
    return (__m256i) __builtin_ia32_selectq_256(__M,
@@ -5611,7 +5611,7 @@ _mm256_mask_storeu_ps (void *__P, __mmask8 __U, __m256 __A)
 }
 
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS128
+static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_mask_unpackhi_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
 {
   return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
@@ -5619,7 +5619,7 @@ _mm_mask_unpackhi_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
                                               (__v2df)__W);
 }
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS128
+static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_maskz_unpackhi_pd(__mmask8 __U, __m128d __A, __m128d __B)
 {
   return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
@@ -5627,7 +5627,7 @@ _mm_maskz_unpackhi_pd(__mmask8 __U, __m128d __A, __m128d __B)
                                               (__v2df)_mm_setzero_pd());
 }
 
-static __inline__ __m256d __DEFAULT_FN_ATTRS256
+static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_mask_unpackhi_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B)
 {
   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
@@ -5635,7 +5635,7 @@ _mm256_mask_unpackhi_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B)
                                            (__v4df)__W);
 }
 
-static __inline__ __m256d __DEFAULT_FN_ATTRS256
+static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_maskz_unpackhi_pd(__mmask8 __U, __m256d __A, __m256d __B)
 {
   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
@@ -5643,7 +5643,7 @@ _mm256_maskz_unpackhi_pd(__mmask8 __U, __m256d __A, __m256d __B)
                                            (__v4df)_mm256_setzero_pd());
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_mask_unpackhi_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
 {
   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
@@ -5651,7 +5651,7 @@ _mm_mask_unpackhi_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
                                              (__v4sf)__W);
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_maskz_unpackhi_ps(__mmask8 __U, __m128 __A, __m128 __B)
 {
   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
@@ -5659,7 +5659,7 @@ _mm_maskz_unpackhi_ps(__mmask8 __U, __m128 __A, __m128 __B)
                                              (__v4sf)_mm_setzero_ps());
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS256
+static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_mask_unpackhi_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
 {
   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
@@ -5667,7 +5667,7 @@ _mm256_mask_unpackhi_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
                                            (__v8sf)__W);
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS256
+static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_maskz_unpackhi_ps(__mmask8 __U, __m256 __A, __m256 __B)
 {
   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
@@ -5675,7 +5675,7 @@ _mm256_maskz_unpackhi_ps(__mmask8 __U, __m256 __A, __m256 __B)
                                            (__v8sf)_mm256_setzero_ps());
 }
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS128
+static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_mask_unpacklo_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
 {
   return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
@@ -5683,7 +5683,7 @@ _mm_mask_unpacklo_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
                                               (__v2df)__W);
 }
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS128
+static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_maskz_unpacklo_pd(__mmask8 __U, __m128d __A, __m128d __B)
 {
   return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
@@ -5691,7 +5691,7 @@ _mm_maskz_unpacklo_pd(__mmask8 __U, __m128d __A, __m128d __B)
                                               (__v2df)_mm_setzero_pd());
 }
 
-static __inline__ __m256d __DEFAULT_FN_ATTRS256
+static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_mask_unpacklo_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B)
 {
   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
@@ -5699,7 +5699,7 @@ _mm256_mask_unpacklo_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B)
                                            (__v4df)__W);
 }
 
-static __inline__ __m256d __DEFAULT_FN_ATTRS256
+static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_maskz_unpacklo_pd(__mmask8 __U, __m256d __A, __m256d __B)
 {
   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
@@ -5707,7 +5707,7 @@ _mm256_maskz_unpacklo_pd(__mmask8 __U, __m256d __A, __m256d __B)
                                            (__v4df)_mm256_setzero_pd());
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_mask_unpacklo_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
 {
   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
@@ -5715,7 +5715,7 @@ _mm_mask_unpacklo_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
                                              (__v4sf)__W);
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_maskz_unpacklo_ps(__mmask8 __U, __m128 __A, __m128 __B)
 {
   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
@@ -5723,7 +5723,7 @@ _mm_maskz_unpacklo_ps(__mmask8 __U, __m128 __A, __m128 __B)
                                              (__v4sf)_mm_setzero_ps());
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS256
+static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_mask_unpacklo_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
 {
   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
@@ -5731,7 +5731,7 @@ _mm256_mask_unpacklo_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
                                            (__v8sf)__W);
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS256
+static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_maskz_unpacklo_ps(__mmask8 __U, __m256 __A, __m256 __B)
 {
   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
@@ -6055,7 +6055,7 @@ _mm256_mask_testn_epi64_mask (__mmask8 __U, __m256i __A, __m256i __B)
                                        _mm256_setzero_si256());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_mask_unpackhi_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
@@ -6063,7 +6063,7 @@ _mm_mask_unpackhi_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
                                            (__v4si)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_maskz_unpackhi_epi32(__mmask8 __U, __m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
@@ -6071,7 +6071,7 @@ _mm_maskz_unpackhi_epi32(__mmask8 __U, __m128i __A, __m128i __B)
                                            (__v4si)_mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_mask_unpackhi_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
 {
   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
@@ -6079,7 +6079,7 @@ _mm256_mask_unpackhi_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
                                         (__v8si)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_maskz_unpackhi_epi32(__mmask8 __U, __m256i __A, __m256i __B)
 {
   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
@@ -6087,7 +6087,7 @@ _mm256_maskz_unpackhi_epi32(__mmask8 __U, __m256i __A, __m256i __B)
                                         (__v8si)_mm256_setzero_si256());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_mask_unpackhi_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
@@ -6095,7 +6095,7 @@ _mm_mask_unpackhi_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
                                            (__v2di)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_maskz_unpackhi_epi64(__mmask8 __U, __m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
@@ -6103,7 +6103,7 @@ _mm_maskz_unpackhi_epi64(__mmask8 __U, __m128i __A, __m128i __B)
                                            (__v2di)_mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_mask_unpackhi_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
 {
   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
@@ -6111,7 +6111,7 @@ _mm256_mask_unpackhi_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
                                         (__v4di)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_maskz_unpackhi_epi64(__mmask8 __U, __m256i __A, __m256i __B)
 {
   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
@@ -6119,7 +6119,7 @@ _mm256_maskz_unpackhi_epi64(__mmask8 __U, __m256i __A, __m256i __B)
                                         (__v4di)_mm256_setzero_si256());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_mask_unpacklo_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
@@ -6127,7 +6127,7 @@ _mm_mask_unpacklo_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
                                            (__v4si)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_maskz_unpacklo_epi32(__mmask8 __U, __m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
@@ -6135,7 +6135,7 @@ _mm_maskz_unpacklo_epi32(__mmask8 __U, __m128i __A, __m128i __B)
                                            (__v4si)_mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_mask_unpacklo_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
 {
   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
@@ -6143,7 +6143,7 @@ _mm256_mask_unpacklo_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
                                         (__v8si)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_maskz_unpacklo_epi32(__mmask8 __U, __m256i __A, __m256i __B)
 {
   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
@@ -6151,7 +6151,7 @@ _mm256_maskz_unpacklo_epi32(__mmask8 __U, __m256i __A, __m256i __B)
                                         (__v8si)_mm256_setzero_si256());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_mask_unpacklo_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
@@ -6159,7 +6159,7 @@ _mm_mask_unpacklo_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
                                            (__v2di)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_maskz_unpacklo_epi64(__mmask8 __U, __m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
@@ -6167,7 +6167,7 @@ _mm_maskz_unpacklo_epi64(__mmask8 __U, __m128i __A, __m128i __B)
                                            (__v2di)_mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_mask_unpacklo_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
 {
   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
@@ -6175,7 +6175,7 @@ _mm256_mask_unpacklo_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
                                         (__v4di)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_maskz_unpacklo_epi64(__mmask8 __U, __m256i __A, __m256i __B)
 {
   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
@@ -6594,7 +6594,7 @@ _mm256_broadcast_f32x4(__m128 __A) {
                                          0, 1, 2, 3, 0, 1, 2, 3);
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS256
+static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_mask_broadcast_f32x4(__m256 __O, __mmask8 __M, __m128 __A)
 {
   return (__m256)__builtin_ia32_selectps_256((__mmask8)__M,
@@ -6602,7 +6602,7 @@ _mm256_mask_broadcast_f32x4(__m256 __O, __mmask8 __M, __m128 __A)
                                             (__v8sf)__O);
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS256
+static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_maskz_broadcast_f32x4 (__mmask8 __M, __m128 __A)
 {
   return (__m256)__builtin_ia32_selectps_256((__mmask8)__M,
@@ -6616,7 +6616,7 @@ _mm256_broadcast_i32x4(__m128i __A) {
                                           0, 1, 2, 3, 0, 1, 2, 3);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_mask_broadcast_i32x4(__m256i __O, __mmask8 __M, __m128i __A)
 {
   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
@@ -6624,7 +6624,7 @@ _mm256_mask_broadcast_i32x4(__m256i __O, __mmask8 __M, __m128i __A)
                                             (__v8si)__O);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_maskz_broadcast_i32x4(__mmask8 __M, __m128i __A)
 {
   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
@@ -6632,7 +6632,7 @@ _mm256_maskz_broadcast_i32x4(__mmask8 __M, __m128i __A)
                                             (__v8si)_mm256_setzero_si256());
 }
 
-static __inline__ __m256d __DEFAULT_FN_ATTRS256
+static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_mask_broadcastsd_pd (__m256d __O, __mmask8 __M, __m128d __A)
 {
   return (__m256d)__builtin_ia32_selectpd_256(__M,
@@ -6640,7 +6640,7 @@ _mm256_mask_broadcastsd_pd (__m256d __O, __mmask8 __M, __m128d __A)
                                               (__v4df) __O);
 }
 
-static __inline__ __m256d __DEFAULT_FN_ATTRS256
+static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_maskz_broadcastsd_pd (__mmask8 __M, __m128d __A)
 {
   return (__m256d)__builtin_ia32_selectpd_256(__M,
@@ -6648,7 +6648,7 @@ _mm256_maskz_broadcastsd_pd (__mmask8 __M, __m128d __A)
                                               (__v4df) _mm256_setzero_pd());
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_mask_broadcastss_ps (__m128 __O, __mmask8 __M, __m128 __A)
 {
   return (__m128)__builtin_ia32_selectps_128(__M,
@@ -6656,7 +6656,7 @@ _mm_mask_broadcastss_ps (__m128 __O, __mmask8 __M, __m128 __A)
                                              (__v4sf) __O);
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_maskz_broadcastss_ps (__mmask8 __M, __m128 __A)
 {
   return (__m128)__builtin_ia32_selectps_128(__M,
@@ -6664,7 +6664,7 @@ _mm_maskz_broadcastss_ps (__mmask8 __M, __m128 __A)
                                              (__v4sf) _mm_setzero_ps());
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS256
+static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_mask_broadcastss_ps (__m256 __O, __mmask8 __M, __m128 __A)
 {
   return (__m256)__builtin_ia32_selectps_256(__M,
@@ -6672,7 +6672,7 @@ _mm256_mask_broadcastss_ps (__m256 __O, __mmask8 __M, __m128 __A)
                                              (__v8sf) __O);
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS256
+static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_maskz_broadcastss_ps (__mmask8 __M, __m128 __A)
 {
   return (__m256)__builtin_ia32_selectps_256(__M,
@@ -6680,7 +6680,7 @@ _mm256_maskz_broadcastss_ps (__mmask8 __M, __m128 __A)
                                              (__v8sf) _mm256_setzero_ps());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_mask_broadcastd_epi32 (__m128i __O, __mmask8 __M, __m128i __A)
 {
   return (__m128i)__builtin_ia32_selectd_128(__M,
@@ -6688,7 +6688,7 @@ _mm_mask_broadcastd_epi32 (__m128i __O, __mmask8 __M, __m128i __A)
                                              (__v4si) __O);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_maskz_broadcastd_epi32 (__mmask8 __M, __m128i __A)
 {
   return (__m128i)__builtin_ia32_selectd_128(__M,
@@ -6696,7 +6696,7 @@ _mm_maskz_broadcastd_epi32 (__mmask8 __M, __m128i __A)
                                              (__v4si) _mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_mask_broadcastd_epi32 (__m256i __O, __mmask8 __M, __m128i __A)
 {
   return (__m256i)__builtin_ia32_selectd_256(__M,
@@ -6704,7 +6704,7 @@ _mm256_mask_broadcastd_epi32 (__m256i __O, __mmask8 __M, __m128i __A)
                                              (__v8si) __O);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_maskz_broadcastd_epi32 (__mmask8 __M, __m128i __A)
 {
   return (__m256i)__builtin_ia32_selectd_256(__M,
@@ -6712,7 +6712,7 @@ _mm256_maskz_broadcastd_epi32 (__mmask8 __M, __m128i __A)
                                              (__v8si) _mm256_setzero_si256());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_mask_broadcastq_epi64 (__m128i __O, __mmask8 __M, __m128i __A)
 {
   return (__m128i)__builtin_ia32_selectq_128(__M,
@@ -6720,7 +6720,7 @@ _mm_mask_broadcastq_epi64 (__m128i __O, __mmask8 __M, __m128i __A)
                                              (__v2di) __O);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A)
 {
   return (__m128i)__builtin_ia32_selectq_128(__M,
@@ -6728,7 +6728,7 @@ _mm_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A)
                                              (__v2di) _mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_mask_broadcastq_epi64 (__m256i __O, __mmask8 __M, __m128i __A)
 {
   return (__m256i)__builtin_ia32_selectq_256(__M,
@@ -6736,7 +6736,7 @@ _mm256_mask_broadcastq_epi64 (__m256i __O, __mmask8 __M, __m128i __A)
                                              (__v4di) __O);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A)
 {
   return (__m256i)__builtin_ia32_selectq_256(__M,
@@ -8003,7 +8003,7 @@ _mm256_maskz_permutexvar_epi32(__mmask8 __M, __m256i __X, __m256i __Y)
                                  (__v4di)_mm256_alignr_epi64((A), (B), (imm)), \
                                  (__v4di)_mm256_setzero_si256()))
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_mask_movehdup_ps (__m128 __W, __mmask8 __U, __m128 __A)
 {
   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
@@ -8011,7 +8011,7 @@ _mm_mask_movehdup_ps (__m128 __W, __mmask8 __U, __m128 __A)
                                              (__v4sf)__W);
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_maskz_movehdup_ps (__mmask8 __U, __m128 __A)
 {
   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
@@ -8019,7 +8019,7 @@ _mm_maskz_movehdup_ps (__mmask8 __U, __m128 __A)
                                              (__v4sf)_mm_setzero_ps());
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS256
+static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_mask_movehdup_ps (__m256 __W, __mmask8 __U, __m256 __A)
 {
   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
@@ -8027,7 +8027,7 @@ _mm256_mask_movehdup_ps (__m256 __W, __mmask8 __U, __m256 __A)
                                              (__v8sf)__W);
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS256
+static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_maskz_movehdup_ps (__mmask8 __U, __m256 __A)
 {
   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
@@ -8035,7 +8035,7 @@ _mm256_maskz_movehdup_ps (__mmask8 __U, __m256 __A)
                                              (__v8sf)_mm256_setzero_ps());
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_mask_moveldup_ps (__m128 __W, __mmask8 __U, __m128 __A)
 {
   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
@@ -8043,7 +8043,7 @@ _mm_mask_moveldup_ps (__m128 __W, __mmask8 __U, __m128 __A)
                                              (__v4sf)__W);
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_maskz_moveldup_ps (__mmask8 __U, __m128 __A)
 {
   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
@@ -8051,7 +8051,7 @@ _mm_maskz_moveldup_ps (__mmask8 __U, __m128 __A)
                                              (__v4sf)_mm_setzero_ps());
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS256
+static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_mask_moveldup_ps (__m256 __W, __mmask8 __U, __m256 __A)
 {
   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
@@ -8059,7 +8059,7 @@ _mm256_mask_moveldup_ps (__m256 __W, __mmask8 __U, __m256 __A)
                                              (__v8sf)__W);
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS256
+static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_maskz_moveldup_ps (__mmask8 __U, __m256 __A)
 {
   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
diff --git a/clang/test/CodeGen/X86/avx512dq-builtins.c b/clang/test/CodeGen/X86/avx512dq-builtins.c
index 4112561216af8..0676b5b091588 100644
--- a/clang/test/CodeGen/X86/avx512dq-builtins.c
+++ b/clang/test/CodeGen/X86/avx512dq-builtins.c
@@ -12,6 +12,23 @@
 #include <immintrin.h>
 #include "builtin_test_helpers.h"
 
+// constexpr coverage for i32x8/f32x8 broadcasts (DQ)
+{
+  __m256i a = _mm256_set1_epi32(8);
+  TEST_CONSTEXPR(match_v16si(_mm512_mask_broadcast_i32x8(_mm512_setzero_si512(), 0xFFFF, a),
+    8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8));
+}
+{
+  __m256 a = _mm256_set1_ps(5.0f);
+  TEST_CONSTEXPR(match_m512(_mm512_mask_broadcast_f32x8(_mm512_setzero_ps(), 0xFFFF, a),
+    5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5));
+}
+{
+  __m256 a = _mm256_set1_ps(7.0f);
+  TEST_CONSTEXPR(match_m512(_mm512_maskz_broadcast_f32x8(0xFFFF, a),
+    7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7));
+}
+
 __mmask8 test_knot_mask8(__mmask8 a) {
   // CHECK-LABEL: test_knot_mask8
   // CHECK: [[IN:%.*]] = bitcast i8 %{{.*}} to <8 x i1>
diff --git a/clang/test/CodeGen/X86/avx512f-builtins.c b/clang/test/CodeGen/X86/avx512f-builtins.c
index 7756f0da18c03..0caa23df95982 100644
--- a/clang/test/CodeGen/X86/avx512f-builtins.c
+++ b/clang/test/CodeGen/X86/avx512f-builtins.c
@@ -11,6 +11,82 @@
 #include <immintrin.h>
 #include "builtin_test_helpers.h"
 
+// constexpr coverage for masked set1/broadcast/unpack/move_ss/move_sd (F)
+TEST_CONSTEXPR(match_v16si(_mm512_mask_set1_epi32(_mm512_setzero_si512(), 0xFFFF, 13),
+  13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13));
+TEST_CONSTEXPR(match_v8di(_mm512_mask_set1_epi64(_mm512_setzero_si512(), 0xFF, 21),
+  21,21,21,21,21,21,21,21));
+
+{
+  __m128 a = (__m128)(__v4sf){1,2,3,4};
+  TEST_CONSTEXPR(match_m512(_mm512_mask_broadcast_f32x4(_mm512_setzero_ps(), 0xFFFF, a),
+    1,2,3,4,1,2,3,4,1,2,3,4,1,2,3,4));
+}
+{
+  __m128 a = (__m128)(__v4sf){1,2,3,4};
+  TEST_CONSTEXPR(match_m512(_mm512_maskz_broadcast_f32x4(0xFFFF, a),
+    1,2,3,4,1,2,3,4,1,2,3,4,1,2,3,4));
+}
+{
+  __m128d a = (__m128d)(__v2df){1,2};
+  TEST_CONSTEXPR(match_m512d(_mm512_mask_broadcast_f64x2(_mm512_setzero_pd(), 0xFF, a),
+    1,2,1,2,1,2,1,2));
+}
+{
+  __m128d a = (__m128d)(__v2df){1,2};
+  TEST_CONSTEXPR(match_m512d(_mm512_maskz_broadcast_f64x2(0xFF, a),
+    1,2,1,2,1,2,1,2));
+}
+
+// additionally add i32x4/i32x8 z-forms
+{
+  __m128i a = (__m128i)(__v4si){0,1,2,3};
+  TEST_CONSTEXPR(match_v16si(_mm512_maskz_broadcast_i32x4(0xFFFF, a),
+    0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3));
+}
+{
+  __m256i a = _mm256_set1_epi32(9);
+  TEST_CONSTEXPR(match_v16si(_mm512_maskz_broadcast_i32x8(0xFFFF, a),
+    9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9));
+}
+
+// unpack and moves (512)
+{
+  __m512 a = _mm512_set_ps(16,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1);
+  __m512 b = _mm512_set1_ps(0);
+  TEST_CONSTEXPR(match_m512(_mm512_mask_unpackhi_ps(_mm512_setzero_ps(), 0xFFFF, a, b),
+    15,0,13,0,11,0,9,0,7,0,5,0,3,0,1,0));
+}
+{
+  __m512 a = _mm512_set1_ps(1);
+  __m512 b = _mm512_set1_ps(2);
+  TEST_CONSTEXPR(match_m512(_mm512_mask_unpacklo_ps(_mm512_setzero_ps(), 0xFFFF, a, b),
+    1,2,1,2,1,2,1,2,1,2,1,2,1,2,1,2));
+}
+
+// move_ss/move_sd in constexpr masked form
+{
+  __m128 a = (__m128)(__v4sf){10,2,3,4};
+  __m128 b = (__m128)(__v4sf){20,6,7,8};
+  TEST_CONSTEXPR(match_m128(_mm_mask_move_ss(_mm_setzero_ps(), 0x1, a, b), 20,0,0,0));
+}
+{
+  __m128d a = (__m128d)(__v2df){10,2};
+  __m128d b = (__m128d)(__v2df){20,6};
+  TEST_CONSTEXPR(match_m128d(_mm_mask_move_sd(_mm_setzero_pd(), 0x1, a, b), 20,0));
+}
+// z-forms for movehdup/moveldup
+{
+  __m512 a = _mm512_set_ps(16,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1);
+  TEST_CONSTEXPR(match_m512(_mm512_maskz_movehdup_ps(0xFFFF, a),
+    15,15,13,13,11,11,9,9,7,7,5,5,3,3,1,1));
+}
+{
+  __m512 a = _mm512_set_ps(16,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1);
+  TEST_CONSTEXPR(match_m512(_mm512_maskz_moveldup_ps(0xFFFF, a),
+    16,16,14,14,12,12,10,10,8,8,6,6,4,4,2,2));
+}
+
 __m512d test_mm512_sqrt_pd(__m512d a)
 {
   // CHECK-LABEL: test_mm512_sqrt_pd
diff --git a/clang/test/CodeGen/X86/avx512vl-builtins.c b/clang/test/CodeGen/X86/avx512vl-builtins.c
index 51385d57d2944..1d1d3520d7659 100644
--- a/clang/test/CodeGen/X86/avx512vl-builtins.c
+++ b/clang/test/CodeGen/X86/avx512vl-builtins.c
@@ -7,6 +7,86 @@
 #include <immintrin.h>
 #include "builtin_test_helpers.h"
 
+// constexpr coverage for masked set1/broadcast/unpack/movehdup/moveldup (VL)
+TEST_CONSTEXPR(match_v4si(_mm_mask_set1_epi32(_mm_setzero_si128(), 0xF, 7), 7, 7, 7, 7));
+TEST_CONSTEXPR(match_v2di(_mm_mask_set1_epi64(_mm_setzero_si128(), 0x3, 9), 9, 9));
+
+TEST_CONSTEXPR(match_v8si(_mm256_mask_set1_epi32(_mm256_setzero_si256(), 0xFF, 5), 5, 5, 5, 5, 5, 5, 5, 5));
+TEST_CONSTEXPR(match_v4di(_mm256_mask_set1_epi64(_mm256_setzero_si256(), 0xF, 11), 11, 11, 11, 11));
+
+{
+  __m128i a = (__m128i)(__v4si){0,1,2,3};
+  TEST_CONSTEXPR(match_v4si(_mm_mask_broadcast_i32x2(_mm_setzero_si128(), 0xF, a), 0,1,0,1));
+}
+{
+  __m128 a = (__m128)(__v4sf){1.f,2.f,3.f,4.f};
+  TEST_CONSTEXPR(match_m256(_mm256_mask_broadcast_f32x2(_mm256_setzero_ps(), 0xFF, a), 1.f,2.f,1.f,2.f,1.f,2.f,1.f,2.f));
+}
+{
+  __m128 a = (__m128)(__v4sf){1.f,2.f,3.f,4.f};
+  TEST_CONSTEXPR(match_m256(_mm256_maskz_broadcast_f32x2(0xFF, a), 1.f,2.f,1.f,2.f,1.f,2.f,1.f,2.f));
+}
+{
+  __m128d a = (__m128d)(__v2df){1.0,2.0};
+  TEST_CONSTEXPR(match_m256d(_mm256_mask_broadcast_f64x2(_mm256_setzero_pd(), 0xFF, a), 1.0,2.0,1.0,2.0));
+}
+{
+  __m128d a = (__m128d)(__v2df){1.0,2.0};
+  TEST_CONSTEXPR(match_m256d(_mm256_maskz_broadcast_f64x2(0xFF, a), 1.0,2.0,1.0,2.0));
+}
+
+// i64x2 maskz
+{
+  __m128i a = (__m128i)(__v2di){1,2};
+  TEST_CONSTEXPR(match_v4di(_mm256_maskz_broadcast_i64x2(0xF, a), 1,2,1,2));
+}
+
+// i32x4/f32x4 maskz (VL)
+{
+  __m128i a = (__m128i)(__v4si){0,1,2,3};
+  TEST_CONSTEXPR(match_v8si(_mm256_maskz_broadcast_i32x4(0xFF, a), 0,1,2,3,0,1,2,3));
+}
+{
+  __m128 a = (__m128)(__v4sf){0,1,2,3};
+  TEST_CONSTEXPR(match_m256(_mm256_maskz_broadcast_f32x4(0xFF, a), 0,1,2,3,0,1,2,3));
+}
+
+// i32x2 maskz (128/256)
+{
+  __m128i a = (__m128i)(__v4si){0,1,2,3};
+  TEST_CONSTEXPR(match_v4si(_mm_maskz_broadcast_i32x2(0xF, a), 0,1,0,1));
+}
+{
+  __m128i a = (__m128i)(__v4si){0,1,2,3};
+  TEST_CONSTEXPR(match_v8si(_mm256_maskz_broadcast_i32x2(0xFF, a), 0,1,0,1,0,1,0,1));
+}
+
+// unpackhi/lo with full mask behaves like the underlying unpack
+{
+  __m128d a = (__m128d)(__v2df){1.0,2.0};
+  __m128d b = (__m128d)(__v2df){3.0,4.0};
+  TEST_CONSTEXPR(match_m128d(_mm_mask_unpackhi_pd(_mm_setzero_pd(), 0x3, a, b), 2.0,4.0));
+  TEST_CONSTEXPR(match_m128d(_mm_mask_unpacklo_pd(_mm_setzero_pd(), 0x3, a, b), 1.0,3.0));
+}
+{
+  __m256d a = (__m256d)(__v4df){1.0,2.0,3.0,4.0};
+  __m256d b = (__m256d)(__v4df){5.0,6.0,7.0,8.0};
+  TEST_CONSTEXPR(match_m256d(_mm256_mask_unpackhi_pd(_mm256_setzero_pd(), 0xFF, a, b), 2.0,6.0,4.0,8.0));
+  TEST_CONSTEXPR(match_m256d(_mm256_mask_unpacklo_pd(_mm256_setzero_pd(), 0xFF, a, b), 1.0,5.0,3.0,7.0));
+}
+
+// movehdup/moveldup with full mask equals the underlying *_move*dup
+{
+  __m128 a = (__m128)(__v4sf){1.f,2.f,3.f,4.f};
+  TEST_CONSTEXPR(match_m128(_mm_mask_movehdup_ps(_mm_setzero_ps(), 0xF, a), 2.f,2.f,4.f,4.f));
+  TEST_CONSTEXPR(match_m128(_mm_mask_moveldup_ps(_mm_setzero_ps(), 0xF, a), 1.f,1.f,3.f,3.f));
+}
+{
+  __m256 a = (__m256)(__v8sf){1,2,3,4,5,6,7,8};
+  TEST_CONSTEXPR(match_m256(_mm256_mask_movehdup_ps(_mm256_setzero_ps(), 0xFF, a), 2,2,4,4,6,6,8,8));
+  TEST_CONSTEXPR(match_m256(_mm256_mask_moveldup_ps(_mm256_setzero_ps(), 0xFF, a), 1,1,3,3,5,5,7,7));
+}
+
 __mmask8 test_mm_cmpeq_epu32_mask(__m128i __a, __m128i __b) {
   // CHECK-LABEL: test_mm_cmpeq_epu32_mask
   // CHECK: icmp eq <4 x i32> %{{.*}}, %{{.*}}

>From facdc7e0663aaeebaf107b16108e6d82857c1552 Mon Sep 17 00:00:00 2001
From: ahmed <ahmednour.mohamed2012 at gmail.com>
Date: Tue, 7 Oct 2025 13:06:55 +0300
Subject: [PATCH 02/12] chor: apply formatting

---
 clang/lib/Headers/avx512dqintrin.h   |  36 ++---
 clang/lib/Headers/avx512fintrin.h    |  77 +++------
 clang/lib/Headers/avx512vldqintrin.h |  30 ++--
 clang/lib/Headers/avx512vlintrin.h   | 229 ++++++++++-----------------
 4 files changed, 128 insertions(+), 244 deletions(-)

diff --git a/clang/lib/Headers/avx512dqintrin.h b/clang/lib/Headers/avx512dqintrin.h
index a5c1b78ad4305..3681ccab0b179 100644
--- a/clang/lib/Headers/avx512dqintrin.h
+++ b/clang/lib/Headers/avx512dqintrin.h
@@ -1084,16 +1084,14 @@ _mm512_broadcast_f32x2(__m128 __A) {
 }
 
 static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
-_mm512_mask_broadcast_f32x2 (__m512 __O, __mmask16 __M, __m128 __A)
-{
+_mm512_mask_broadcast_f32x2(__m512 __O, __mmask16 __M, __m128 __A) {
   return (__m512)__builtin_ia32_selectps_512((__mmask16)__M,
                                              (__v16sf)_mm512_broadcast_f32x2(__A),
                                              (__v16sf)__O);
 }
 
 static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
-_mm512_maskz_broadcast_f32x2 (__mmask16 __M, __m128 __A)
-{
+_mm512_maskz_broadcast_f32x2(__mmask16 __M, __m128 __A) {
   return (__m512)__builtin_ia32_selectps_512((__mmask16)__M,
                                              (__v16sf)_mm512_broadcast_f32x2(__A),
                                              (__v16sf)_mm512_setzero_ps());
@@ -1107,16 +1105,14 @@ _mm512_broadcast_f32x8(__m256 __A) {
 }
 
 static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
-_mm512_mask_broadcast_f32x8(__m512 __O, __mmask16 __M, __m256 __A)
-{
+_mm512_mask_broadcast_f32x8(__m512 __O, __mmask16 __M, __m256 __A) {
   return (__m512)__builtin_ia32_selectps_512((__mmask16)__M,
                                            (__v16sf)_mm512_broadcast_f32x8(__A),
                                            (__v16sf)__O);
 }
 
 static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
-_mm512_maskz_broadcast_f32x8(__mmask16 __M, __m256 __A)
-{
+_mm512_maskz_broadcast_f32x8(__mmask16 __M, __m256 __A) {
   return (__m512)__builtin_ia32_selectps_512((__mmask16)__M,
                                            (__v16sf)_mm512_broadcast_f32x8(__A),
                                            (__v16sf)_mm512_setzero_ps());
@@ -1129,16 +1125,14 @@ _mm512_broadcast_f64x2(__m128d __A) {
 }
 
 static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
-_mm512_mask_broadcast_f64x2(__m512d __O, __mmask8 __M, __m128d __A)
-{
+_mm512_mask_broadcast_f64x2(__m512d __O, __mmask8 __M, __m128d __A) {
   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__M,
                                             (__v8df)_mm512_broadcast_f64x2(__A),
                                             (__v8df)__O);
 }
 
 static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
-_mm512_maskz_broadcast_f64x2(__mmask8 __M, __m128d __A)
-{
+_mm512_maskz_broadcast_f64x2(__mmask8 __M, __m128d __A) {
   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__M,
                                             (__v8df)_mm512_broadcast_f64x2(__A),
                                             (__v8df)_mm512_setzero_pd());
@@ -1152,16 +1146,14 @@ _mm512_broadcast_i32x2(__m128i __A) {
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
-_mm512_mask_broadcast_i32x2 (__m512i __O, __mmask16 __M, __m128i __A)
-{
+_mm512_mask_broadcast_i32x2(__m512i __O, __mmask16 __M, __m128i __A) {
   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
                                              (__v16si)_mm512_broadcast_i32x2(__A),
                                              (__v16si)__O);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
-_mm512_maskz_broadcast_i32x2 (__mmask16 __M, __m128i __A)
-{
+_mm512_maskz_broadcast_i32x2(__mmask16 __M, __m128i __A) {
   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
                                              (__v16si)_mm512_broadcast_i32x2(__A),
                                              (__v16si)_mm512_setzero_si512());
@@ -1175,16 +1167,14 @@ _mm512_broadcast_i32x8(__m256i __A) {
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
-_mm512_mask_broadcast_i32x8(__m512i __O, __mmask16 __M, __m256i __A)
-{
+_mm512_mask_broadcast_i32x8(__m512i __O, __mmask16 __M, __m256i __A) {
   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
                                            (__v16si)_mm512_broadcast_i32x8(__A),
                                            (__v16si)__O);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
-_mm512_maskz_broadcast_i32x8(__mmask16 __M, __m256i __A)
-{
+_mm512_maskz_broadcast_i32x8(__mmask16 __M, __m256i __A) {
   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
                                            (__v16si)_mm512_broadcast_i32x8(__A),
                                            (__v16si)_mm512_setzero_si512());
@@ -1197,16 +1187,14 @@ _mm512_broadcast_i64x2(__m128i __A) {
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
-_mm512_mask_broadcast_i64x2(__m512i __O, __mmask8 __M, __m128i __A)
-{
+_mm512_mask_broadcast_i64x2(__m512i __O, __mmask8 __M, __m128i __A) {
   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
                                             (__v8di)_mm512_broadcast_i64x2(__A),
                                             (__v8di)__O);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
-_mm512_maskz_broadcast_i64x2(__mmask8 __M, __m128i __A)
-{
+_mm512_maskz_broadcast_i64x2(__mmask8 __M, __m128i __A) {
   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
                                             (__v8di)_mm512_broadcast_i64x2(__A),
                                             (__v8di)_mm512_setzero_si512());
diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h
index 14d79bdba3844..8401647ef9da5 100644
--- a/clang/lib/Headers/avx512fintrin.h
+++ b/clang/lib/Headers/avx512fintrin.h
@@ -226,16 +226,14 @@ _mm512_broadcastd_epi32(__m128i __A) {
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
-_mm512_mask_broadcastd_epi32 (__m512i __O, __mmask16 __M, __m128i __A)
-{
+_mm512_mask_broadcastd_epi32(__m512i __O, __mmask16 __M, __m128i __A) {
   return (__m512i)__builtin_ia32_selectd_512(__M,
                                              (__v16si) _mm512_broadcastd_epi32(__A),
                                              (__v16si) __O);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
-_mm512_maskz_broadcastd_epi32 (__mmask16 __M, __m128i __A)
-{
+_mm512_maskz_broadcastd_epi32(__mmask16 __M, __m128i __A) {
   return (__m512i)__builtin_ia32_selectd_512(__M,
                                              (__v16si) _mm512_broadcastd_epi32(__A),
                                              (__v16si) _mm512_setzero_si512());
@@ -248,17 +246,14 @@ _mm512_broadcastq_epi64(__m128i __A) {
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
-_mm512_mask_broadcastq_epi64 (__m512i __O, __mmask8 __M, __m128i __A)
-{
+_mm512_mask_broadcastq_epi64(__m512i __O, __mmask8 __M, __m128i __A) {
   return (__m512i)__builtin_ia32_selectq_512(__M,
                                              (__v8di) _mm512_broadcastq_epi64(__A),
                                              (__v8di) __O);
-
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
-_mm512_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A)
-{
+_mm512_maskz_broadcastq_epi64(__mmask8 __M, __m128i __A) {
   return (__m512i)__builtin_ia32_selectq_512(__M,
                                              (__v8di) _mm512_broadcastq_epi64(__A),
                                              (__v8di) _mm512_setzero_si512());
@@ -322,8 +317,7 @@ _mm512_set1_epi32(int __s)
 }
 
 static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
-_mm512_maskz_set1_epi32(__mmask16 __M, int __A)
-{
+_mm512_maskz_set1_epi32(__mmask16 __M, int __A) {
   return (__m512i)__builtin_ia32_selectd_512(__M,
                                              (__v16si)_mm512_set1_epi32(__A),
                                              (__v16si)_mm512_setzero_si512());
@@ -336,8 +330,7 @@ _mm512_set1_epi64(long long __d)
 }
 
 static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
-_mm512_maskz_set1_epi64(__mmask8 __M, long long __A)
-{
+_mm512_maskz_set1_epi64(__mmask8 __M, long long __A) {
   return (__m512i)__builtin_ia32_selectq_512(__M,
                                              (__v8di)_mm512_set1_epi64(__A),
                                              (__v8di)_mm512_setzero_si512());
@@ -6553,16 +6546,14 @@ _mm512_broadcast_f32x4(__m128 __A) {
 }
 
 static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
-_mm512_mask_broadcast_f32x4(__m512 __O, __mmask16 __M, __m128 __A)
-{
+_mm512_mask_broadcast_f32x4(__m512 __O, __mmask16 __M, __m128 __A) {
   return (__m512)__builtin_ia32_selectps_512((__mmask16)__M,
                                            (__v16sf)_mm512_broadcast_f32x4(__A),
                                            (__v16sf)__O);
 }
 
 static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
-_mm512_maskz_broadcast_f32x4(__mmask16 __M, __m128 __A)
-{
+_mm512_maskz_broadcast_f32x4(__mmask16 __M, __m128 __A) {
   return (__m512)__builtin_ia32_selectps_512((__mmask16)__M,
                                            (__v16sf)_mm512_broadcast_f32x4(__A),
                                            (__v16sf)_mm512_setzero_ps());
@@ -6598,16 +6589,14 @@ _mm512_broadcast_i32x4(__m128i __A) {
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
-_mm512_mask_broadcast_i32x4(__m512i __O, __mmask16 __M, __m128i __A)
-{
+_mm512_mask_broadcast_i32x4(__m512i __O, __mmask16 __M, __m128i __A) {
   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
                                            (__v16si)_mm512_broadcast_i32x4(__A),
                                            (__v16si)__O);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
-_mm512_maskz_broadcast_i32x4(__mmask16 __M, __m128i __A)
-{
+_mm512_maskz_broadcast_i32x4(__mmask16 __M, __m128i __A) {
   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
                                            (__v16si)_mm512_broadcast_i32x4(__A),
                                            (__v16si)_mm512_setzero_si512());
@@ -6636,32 +6625,28 @@ _mm512_maskz_broadcast_i64x4(__mmask8 __M, __m256i __A)
 }
 
 static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
-_mm512_mask_broadcastsd_pd (__m512d __O, __mmask8 __M, __m128d __A)
-{
+_mm512_mask_broadcastsd_pd(__m512d __O, __mmask8 __M, __m128d __A) {
   return (__m512d)__builtin_ia32_selectpd_512(__M,
                                               (__v8df) _mm512_broadcastsd_pd(__A),
                                               (__v8df) __O);
 }
 
 static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
-_mm512_maskz_broadcastsd_pd (__mmask8 __M, __m128d __A)
-{
+_mm512_maskz_broadcastsd_pd(__mmask8 __M, __m128d __A) {
   return (__m512d)__builtin_ia32_selectpd_512(__M,
                                               (__v8df) _mm512_broadcastsd_pd(__A),
                                               (__v8df) _mm512_setzero_pd());
 }
 
 static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
-_mm512_mask_broadcastss_ps (__m512 __O, __mmask16 __M, __m128 __A)
-{
+_mm512_mask_broadcastss_ps(__m512 __O, __mmask16 __M, __m128 __A) {
   return (__m512)__builtin_ia32_selectps_512(__M,
                                              (__v16sf) _mm512_broadcastss_ps(__A),
                                              (__v16sf) __O);
 }
 
 static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
-_mm512_maskz_broadcastss_ps (__mmask16 __M, __m128 __A)
-{
+_mm512_maskz_broadcastss_ps(__mmask16 __M, __m128 __A) {
   return (__m512)__builtin_ia32_selectps_512(__M,
                                              (__v16sf) _mm512_broadcastss_ps(__A),
                                              (__v16sf) _mm512_setzero_ps());
@@ -8382,16 +8367,14 @@ _mm512_movehdup_ps (__m512 __A)
 }
 
 static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
-_mm512_mask_movehdup_ps (__m512 __W, __mmask16 __U, __m512 __A)
-{
+_mm512_mask_movehdup_ps(__m512 __W, __mmask16 __U, __m512 __A) {
   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
                                              (__v16sf)_mm512_movehdup_ps(__A),
                                              (__v16sf)__W);
 }
 
 static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
-_mm512_maskz_movehdup_ps (__mmask16 __U, __m512 __A)
-{
+_mm512_maskz_movehdup_ps(__mmask16 __U, __m512 __A) {
   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
                                              (__v16sf)_mm512_movehdup_ps(__A),
                                              (__v16sf)_mm512_setzero_ps());
@@ -8405,43 +8388,37 @@ _mm512_moveldup_ps (__m512 __A)
 }
 
 static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
-_mm512_mask_moveldup_ps (__m512 __W, __mmask16 __U, __m512 __A)
-{
+_mm512_mask_moveldup_ps(__m512 __W, __mmask16 __U, __m512 __A) {
   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
                                              (__v16sf)_mm512_moveldup_ps(__A),
                                              (__v16sf)__W);
 }
 
 static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
-_mm512_maskz_moveldup_ps (__mmask16 __U, __m512 __A)
-{
+_mm512_maskz_moveldup_ps(__mmask16 __U, __m512 __A) {
   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
                                              (__v16sf)_mm512_moveldup_ps(__A),
                                              (__v16sf)_mm512_setzero_ps());
 }
 
 static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
-_mm_mask_move_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
-{
+_mm_mask_move_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
   return __builtin_ia32_selectss_128(__U, _mm_move_ss(__A, __B), __W);
 }
 
 static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
-_mm_maskz_move_ss (__mmask8 __U, __m128 __A, __m128 __B)
-{
+_mm_maskz_move_ss(__mmask8 __U, __m128 __A, __m128 __B) {
   return __builtin_ia32_selectss_128(__U, _mm_move_ss(__A, __B),
                                      _mm_setzero_ps());
 }
 
 static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
-_mm_mask_move_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
-{
+_mm_mask_move_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
   return __builtin_ia32_selectsd_128(__U, _mm_move_sd(__A, __B), __W);
 }
 
 static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
-_mm_maskz_move_sd (__mmask8 __U, __m128d __A, __m128d __B)
-{
+_mm_maskz_move_sd(__mmask8 __U, __m128d __A, __m128d __B) {
   return __builtin_ia32_selectsd_128(__U, _mm_move_sd(__A, __B),
                                      _mm_setzero_pd());
 }
@@ -8884,17 +8861,15 @@ _mm_cvtu64_ss (__m128 __A, unsigned long long __B)
 }
 #endif
 
- static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
-_mm512_mask_set1_epi32 (__m512i __O, __mmask16 __M, int __A)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_set1_epi32(__m512i __O, __mmask16 __M, int __A) {
   return (__m512i) __builtin_ia32_selectd_512(__M,
                                               (__v16si) _mm512_set1_epi32(__A),
                                               (__v16si) __O);
 }
 
- static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
-_mm512_mask_set1_epi64 (__m512i __O, __mmask8 __M, long long __A)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_set1_epi64(__m512i __O, __mmask8 __M, long long __A) {
   return (__m512i) __builtin_ia32_selectq_512(__M,
                                               (__v8di) _mm512_set1_epi64(__A),
                                               (__v8di) __O);
diff --git a/clang/lib/Headers/avx512vldqintrin.h b/clang/lib/Headers/avx512vldqintrin.h
index bc0ab63c20351..ee7974e924afe 100644
--- a/clang/lib/Headers/avx512vldqintrin.h
+++ b/clang/lib/Headers/avx512vldqintrin.h
@@ -969,16 +969,14 @@ _mm256_broadcast_f32x2(__m128 __A) {
 }
 
 static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
-_mm256_mask_broadcast_f32x2 (__m256 __O, __mmask8 __M, __m128 __A)
-{
+_mm256_mask_broadcast_f32x2(__m256 __O, __mmask8 __M, __m128 __A) {
   return (__m256)__builtin_ia32_selectps_256((__mmask8)__M,
                                              (__v8sf)_mm256_broadcast_f32x2(__A),
                                              (__v8sf)__O);
 }
 
 static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
-_mm256_maskz_broadcast_f32x2 (__mmask8 __M, __m128 __A)
-{
+_mm256_maskz_broadcast_f32x2(__mmask8 __M, __m128 __A) {
   return (__m256)__builtin_ia32_selectps_256((__mmask8)__M,
                                              (__v8sf)_mm256_broadcast_f32x2(__A),
                                              (__v8sf)_mm256_setzero_ps());
@@ -991,16 +989,14 @@ _mm256_broadcast_f64x2(__m128d __A) {
 }
 
 static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
-_mm256_mask_broadcast_f64x2(__m256d __O, __mmask8 __M, __m128d __A)
-{
+_mm256_mask_broadcast_f64x2(__m256d __O, __mmask8 __M, __m128d __A) {
   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__M,
                                             (__v4df)_mm256_broadcast_f64x2(__A),
                                             (__v4df)__O);
 }
 
 static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
-_mm256_maskz_broadcast_f64x2 (__mmask8 __M, __m128d __A)
-{
+_mm256_maskz_broadcast_f64x2(__mmask8 __M, __m128d __A) {
   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__M,
                                             (__v4df)_mm256_broadcast_f64x2(__A),
                                             (__v4df)_mm256_setzero_pd());
@@ -1013,16 +1009,14 @@ _mm_broadcast_i32x2(__m128i __A) {
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
-_mm_mask_broadcast_i32x2 (__m128i __O, __mmask8 __M, __m128i __A)
-{
+_mm_mask_broadcast_i32x2(__m128i __O, __mmask8 __M, __m128i __A) {
   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
                                              (__v4si)_mm_broadcast_i32x2(__A),
                                              (__v4si)__O);
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
-_mm_maskz_broadcast_i32x2 (__mmask8 __M, __m128i __A)
-{
+_mm_maskz_broadcast_i32x2(__mmask8 __M, __m128i __A) {
   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
                                              (__v4si)_mm_broadcast_i32x2(__A),
                                              (__v4si)_mm_setzero_si128());
@@ -1035,16 +1029,14 @@ _mm256_broadcast_i32x2(__m128i __A) {
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
-_mm256_mask_broadcast_i32x2 (__m256i __O, __mmask8 __M, __m128i __A)
-{
+_mm256_mask_broadcast_i32x2(__m256i __O, __mmask8 __M, __m128i __A) {
   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
                                              (__v8si)_mm256_broadcast_i32x2(__A),
                                              (__v8si)__O);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
-_mm256_maskz_broadcast_i32x2 (__mmask8 __M, __m128i __A)
-{
+_mm256_maskz_broadcast_i32x2(__mmask8 __M, __m128i __A) {
   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
                                              (__v8si)_mm256_broadcast_i32x2(__A),
                                              (__v8si)_mm256_setzero_si256());
@@ -1057,16 +1049,14 @@ _mm256_broadcast_i64x2(__m128i __A) {
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
-_mm256_mask_broadcast_i64x2(__m256i __O, __mmask8 __M, __m128i __A)
-{
+_mm256_mask_broadcast_i64x2(__m256i __O, __mmask8 __M, __m128i __A) {
   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
                                             (__v4di)_mm256_broadcast_i64x2(__A),
                                             (__v4di)__O);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
-_mm256_maskz_broadcast_i64x2 (__mmask8 __M, __m128i __A)
-{
+_mm256_maskz_broadcast_i64x2(__mmask8 __M, __m128i __A) {
   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
                                             (__v4di)_mm256_broadcast_i64x2(__A),
                                             (__v4di)_mm256_setzero_si256());
diff --git a/clang/lib/Headers/avx512vlintrin.h b/clang/lib/Headers/avx512vlintrin.h
index 1cef356eddb0a..676b5a07e2521 100644
--- a/clang/lib/Headers/avx512vlintrin.h
+++ b/clang/lib/Headers/avx512vlintrin.h
@@ -5102,68 +5102,54 @@ _mm256_maskz_movedup_pd (__mmask8 __U, __m256d __A)
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
-_mm_mask_set1_epi32(__m128i __O, __mmask8 __M, int __A)
-{
-   return (__m128i)__builtin_ia32_selectd_128(__M,
-                                              (__v4si) _mm_set1_epi32(__A),
-                                              (__v4si)__O);
+_mm_mask_set1_epi32(__m128i __O, __mmask8 __M, int __A) {
+  return (__m128i)__builtin_ia32_selectd_128(__M, (__v4si)_mm_set1_epi32(__A),
+                                             (__v4si)__O);
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
-_mm_maskz_set1_epi32( __mmask8 __M, int __A)
-{
-   return (__m128i)__builtin_ia32_selectd_128(__M,
-                                              (__v4si) _mm_set1_epi32(__A),
-                                              (__v4si)_mm_setzero_si128());
+_mm_maskz_set1_epi32(__mmask8 __M, int __A) {
+  return (__m128i)__builtin_ia32_selectd_128(__M, (__v4si)_mm_set1_epi32(__A),
+                                             (__v4si)_mm_setzero_si128());
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
-_mm256_mask_set1_epi32(__m256i __O, __mmask8 __M, int __A)
-{
-   return (__m256i)__builtin_ia32_selectd_256(__M,
-                                              (__v8si) _mm256_set1_epi32(__A),
-                                              (__v8si)__O);
+_mm256_mask_set1_epi32(__m256i __O, __mmask8 __M, int __A) {
+  return (__m256i)__builtin_ia32_selectd_256(
+      __M, (__v8si)_mm256_set1_epi32(__A), (__v8si)__O);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
-_mm256_maskz_set1_epi32( __mmask8 __M, int __A)
-{
-   return (__m256i)__builtin_ia32_selectd_256(__M,
-                                              (__v8si) _mm256_set1_epi32(__A),
-                                              (__v8si)_mm256_setzero_si256());
+_mm256_maskz_set1_epi32(__mmask8 __M, int __A) {
+  return (__m256i)__builtin_ia32_selectd_256(
+      __M, (__v8si)_mm256_set1_epi32(__A), (__v8si)_mm256_setzero_si256());
 }
 
-
 static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
-_mm_mask_set1_epi64 (__m128i __O, __mmask8 __M, long long __A)
-{
+_mm_mask_set1_epi64(__m128i __O, __mmask8 __M, long long __A) {
   return (__m128i) __builtin_ia32_selectq_128(__M,
                                               (__v2di) _mm_set1_epi64x(__A),
                                               (__v2di) __O);
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
-_mm_maskz_set1_epi64 (__mmask8 __M, long long __A)
-{
+_mm_maskz_set1_epi64(__mmask8 __M, long long __A) {
   return (__m128i) __builtin_ia32_selectq_128(__M,
                                               (__v2di) _mm_set1_epi64x(__A),
                                               (__v2di) _mm_setzero_si128());
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
-_mm256_mask_set1_epi64 (__m256i __O, __mmask8 __M, long long __A)
-{
+_mm256_mask_set1_epi64(__m256i __O, __mmask8 __M, long long __A) {
   return (__m256i) __builtin_ia32_selectq_256(__M,
                                               (__v4di) _mm256_set1_epi64x(__A),
                                               (__v4di) __O) ;
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
-_mm256_maskz_set1_epi64 (__mmask8 __M, long long __A)
-{
-   return (__m256i) __builtin_ia32_selectq_256(__M,
-                                               (__v4di) _mm256_set1_epi64x(__A),
-                                               (__v4di) _mm256_setzero_si256());
+_mm256_maskz_set1_epi64(__mmask8 __M, long long __A) {
+  return (__m256i)__builtin_ia32_selectq_256(
+      __M, (__v4di)_mm256_set1_epi64x(__A), (__v4di)_mm256_setzero_si256());
 }
 
 #define _mm_fixupimm_pd(A, B, C, imm) \
@@ -5610,130 +5596,113 @@ _mm256_mask_storeu_ps (void *__P, __mmask8 __U, __m256 __A)
            (__mmask8) __U);
 }
 
-
 static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
-_mm_mask_unpackhi_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
-{
+_mm_mask_unpackhi_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
   return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
                                               (__v2df)_mm_unpackhi_pd(__A, __B),
                                               (__v2df)__W);
 }
 
 static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
-_mm_maskz_unpackhi_pd(__mmask8 __U, __m128d __A, __m128d __B)
-{
+_mm_maskz_unpackhi_pd(__mmask8 __U, __m128d __A, __m128d __B) {
   return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
                                               (__v2df)_mm_unpackhi_pd(__A, __B),
                                               (__v2df)_mm_setzero_pd());
 }
 
 static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
-_mm256_mask_unpackhi_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B)
-{
+_mm256_mask_unpackhi_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
                                            (__v4df)_mm256_unpackhi_pd(__A, __B),
                                            (__v4df)__W);
 }
 
 static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
-_mm256_maskz_unpackhi_pd(__mmask8 __U, __m256d __A, __m256d __B)
-{
+_mm256_maskz_unpackhi_pd(__mmask8 __U, __m256d __A, __m256d __B) {
   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
                                            (__v4df)_mm256_unpackhi_pd(__A, __B),
                                            (__v4df)_mm256_setzero_pd());
 }
 
 static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
-_mm_mask_unpackhi_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
-{
+_mm_mask_unpackhi_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
                                              (__v4sf)_mm_unpackhi_ps(__A, __B),
                                              (__v4sf)__W);
 }
 
 static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
-_mm_maskz_unpackhi_ps(__mmask8 __U, __m128 __A, __m128 __B)
-{
+_mm_maskz_unpackhi_ps(__mmask8 __U, __m128 __A, __m128 __B) {
   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
                                              (__v4sf)_mm_unpackhi_ps(__A, __B),
                                              (__v4sf)_mm_setzero_ps());
 }
 
 static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
-_mm256_mask_unpackhi_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
-{
+_mm256_mask_unpackhi_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
                                            (__v8sf)_mm256_unpackhi_ps(__A, __B),
                                            (__v8sf)__W);
 }
 
 static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
-_mm256_maskz_unpackhi_ps(__mmask8 __U, __m256 __A, __m256 __B)
-{
+_mm256_maskz_unpackhi_ps(__mmask8 __U, __m256 __A, __m256 __B) {
   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
                                            (__v8sf)_mm256_unpackhi_ps(__A, __B),
                                            (__v8sf)_mm256_setzero_ps());
 }
 
 static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
-_mm_mask_unpacklo_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
-{
+_mm_mask_unpacklo_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
   return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
                                               (__v2df)_mm_unpacklo_pd(__A, __B),
                                               (__v2df)__W);
 }
 
 static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
-_mm_maskz_unpacklo_pd(__mmask8 __U, __m128d __A, __m128d __B)
-{
+_mm_maskz_unpacklo_pd(__mmask8 __U, __m128d __A, __m128d __B) {
   return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
                                               (__v2df)_mm_unpacklo_pd(__A, __B),
                                               (__v2df)_mm_setzero_pd());
 }
 
 static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
-_mm256_mask_unpacklo_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B)
-{
+_mm256_mask_unpacklo_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
                                            (__v4df)_mm256_unpacklo_pd(__A, __B),
                                            (__v4df)__W);
 }
 
 static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
-_mm256_maskz_unpacklo_pd(__mmask8 __U, __m256d __A, __m256d __B)
-{
+_mm256_maskz_unpacklo_pd(__mmask8 __U, __m256d __A, __m256d __B) {
   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
                                            (__v4df)_mm256_unpacklo_pd(__A, __B),
                                            (__v4df)_mm256_setzero_pd());
 }
 
 static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
-_mm_mask_unpacklo_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
-{
+_mm_mask_unpacklo_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
                                              (__v4sf)_mm_unpacklo_ps(__A, __B),
                                              (__v4sf)__W);
 }
 
 static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
-_mm_maskz_unpacklo_ps(__mmask8 __U, __m128 __A, __m128 __B)
-{
+_mm_maskz_unpacklo_ps(__mmask8 __U, __m128 __A, __m128 __B) {
   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
                                              (__v4sf)_mm_unpacklo_ps(__A, __B),
                                              (__v4sf)_mm_setzero_ps());
 }
 
 static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
-_mm256_mask_unpacklo_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
-{
+_mm256_mask_unpacklo_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
                                            (__v8sf)_mm256_unpacklo_ps(__A, __B),
                                            (__v8sf)__W);
 }
 
 static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
-_mm256_maskz_unpacklo_ps(__mmask8 __U, __m256 __A, __m256 __B)
-{
+_mm256_maskz_unpacklo_ps(__mmask8 __U, __m256 __A, __m256 __B) {
   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
                                            (__v8sf)_mm256_unpacklo_ps(__A, __B),
                                            (__v8sf)_mm256_setzero_ps());
@@ -6056,128 +6025,116 @@ _mm256_mask_testn_epi64_mask (__mmask8 __U, __m256i __A, __m256i __B)
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
-_mm_mask_unpackhi_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
-{
+_mm_mask_unpackhi_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
                                            (__v4si)_mm_unpackhi_epi32(__A, __B),
                                            (__v4si)__W);
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
-_mm_maskz_unpackhi_epi32(__mmask8 __U, __m128i __A, __m128i __B)
-{
+_mm_maskz_unpackhi_epi32(__mmask8 __U, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
                                            (__v4si)_mm_unpackhi_epi32(__A, __B),
                                            (__v4si)_mm_setzero_si128());
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
-_mm256_mask_unpackhi_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
-{
+_mm256_mask_unpackhi_epi32(__m256i __W, __mmask8 __U, __m256i __A,
+                           __m256i __B) {
   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
                                         (__v8si)_mm256_unpackhi_epi32(__A, __B),
                                         (__v8si)__W);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
-_mm256_maskz_unpackhi_epi32(__mmask8 __U, __m256i __A, __m256i __B)
-{
+_mm256_maskz_unpackhi_epi32(__mmask8 __U, __m256i __A, __m256i __B) {
   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
                                         (__v8si)_mm256_unpackhi_epi32(__A, __B),
                                         (__v8si)_mm256_setzero_si256());
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
-_mm_mask_unpackhi_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
-{
+_mm_mask_unpackhi_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
                                            (__v2di)_mm_unpackhi_epi64(__A, __B),
                                            (__v2di)__W);
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
-_mm_maskz_unpackhi_epi64(__mmask8 __U, __m128i __A, __m128i __B)
-{
+_mm_maskz_unpackhi_epi64(__mmask8 __U, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
                                            (__v2di)_mm_unpackhi_epi64(__A, __B),
                                            (__v2di)_mm_setzero_si128());
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
-_mm256_mask_unpackhi_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
-{
+_mm256_mask_unpackhi_epi64(__m256i __W, __mmask8 __U, __m256i __A,
+                           __m256i __B) {
   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
                                         (__v4di)_mm256_unpackhi_epi64(__A, __B),
                                         (__v4di)__W);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
-_mm256_maskz_unpackhi_epi64(__mmask8 __U, __m256i __A, __m256i __B)
-{
+_mm256_maskz_unpackhi_epi64(__mmask8 __U, __m256i __A, __m256i __B) {
   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
                                         (__v4di)_mm256_unpackhi_epi64(__A, __B),
                                         (__v4di)_mm256_setzero_si256());
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
-_mm_mask_unpacklo_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
-{
+_mm_mask_unpacklo_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
                                            (__v4si)_mm_unpacklo_epi32(__A, __B),
                                            (__v4si)__W);
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
-_mm_maskz_unpacklo_epi32(__mmask8 __U, __m128i __A, __m128i __B)
-{
+_mm_maskz_unpacklo_epi32(__mmask8 __U, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
                                            (__v4si)_mm_unpacklo_epi32(__A, __B),
                                            (__v4si)_mm_setzero_si128());
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
-_mm256_mask_unpacklo_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
-{
+_mm256_mask_unpacklo_epi32(__m256i __W, __mmask8 __U, __m256i __A,
+                           __m256i __B) {
   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
                                         (__v8si)_mm256_unpacklo_epi32(__A, __B),
                                         (__v8si)__W);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
-_mm256_maskz_unpacklo_epi32(__mmask8 __U, __m256i __A, __m256i __B)
-{
+_mm256_maskz_unpacklo_epi32(__mmask8 __U, __m256i __A, __m256i __B) {
   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
                                         (__v8si)_mm256_unpacklo_epi32(__A, __B),
                                         (__v8si)_mm256_setzero_si256());
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
-_mm_mask_unpacklo_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
-{
+_mm_mask_unpacklo_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
                                            (__v2di)_mm_unpacklo_epi64(__A, __B),
                                            (__v2di)__W);
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
-_mm_maskz_unpacklo_epi64(__mmask8 __U, __m128i __A, __m128i __B)
-{
+_mm_maskz_unpacklo_epi64(__mmask8 __U, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
                                            (__v2di)_mm_unpacklo_epi64(__A, __B),
                                            (__v2di)_mm_setzero_si128());
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
-_mm256_mask_unpacklo_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
-{
+_mm256_mask_unpacklo_epi64(__m256i __W, __mmask8 __U, __m256i __A,
+                           __m256i __B) {
   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
                                         (__v4di)_mm256_unpacklo_epi64(__A, __B),
                                         (__v4di)__W);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
-_mm256_maskz_unpacklo_epi64(__mmask8 __U, __m256i __A, __m256i __B)
-{
+_mm256_maskz_unpacklo_epi64(__mmask8 __U, __m256i __A, __m256i __B) {
   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
                                         (__v4di)_mm256_unpacklo_epi64(__A, __B),
                                         (__v4di)_mm256_setzero_si256());
@@ -6595,16 +6552,14 @@ _mm256_broadcast_f32x4(__m128 __A) {
 }
 
 static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
-_mm256_mask_broadcast_f32x4(__m256 __O, __mmask8 __M, __m128 __A)
-{
+_mm256_mask_broadcast_f32x4(__m256 __O, __mmask8 __M, __m128 __A) {
   return (__m256)__builtin_ia32_selectps_256((__mmask8)__M,
                                             (__v8sf)_mm256_broadcast_f32x4(__A),
                                             (__v8sf)__O);
 }
 
 static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
-_mm256_maskz_broadcast_f32x4 (__mmask8 __M, __m128 __A)
-{
+_mm256_maskz_broadcast_f32x4(__mmask8 __M, __m128 __A) {
   return (__m256)__builtin_ia32_selectps_256((__mmask8)__M,
                                             (__v8sf)_mm256_broadcast_f32x4(__A),
                                             (__v8sf)_mm256_setzero_ps());
@@ -6617,128 +6572,112 @@ _mm256_broadcast_i32x4(__m128i __A) {
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
-_mm256_mask_broadcast_i32x4(__m256i __O, __mmask8 __M, __m128i __A)
-{
+_mm256_mask_broadcast_i32x4(__m256i __O, __mmask8 __M, __m128i __A) {
   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
                                             (__v8si)_mm256_broadcast_i32x4(__A),
                                             (__v8si)__O);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
-_mm256_maskz_broadcast_i32x4(__mmask8 __M, __m128i __A)
-{
+_mm256_maskz_broadcast_i32x4(__mmask8 __M, __m128i __A) {
   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
                                             (__v8si)_mm256_broadcast_i32x4(__A),
                                             (__v8si)_mm256_setzero_si256());
 }
 
 static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
-_mm256_mask_broadcastsd_pd (__m256d __O, __mmask8 __M, __m128d __A)
-{
+_mm256_mask_broadcastsd_pd(__m256d __O, __mmask8 __M, __m128d __A) {
   return (__m256d)__builtin_ia32_selectpd_256(__M,
                                               (__v4df) _mm256_broadcastsd_pd(__A),
                                               (__v4df) __O);
 }
 
 static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
-_mm256_maskz_broadcastsd_pd (__mmask8 __M, __m128d __A)
-{
+_mm256_maskz_broadcastsd_pd(__mmask8 __M, __m128d __A) {
   return (__m256d)__builtin_ia32_selectpd_256(__M,
                                               (__v4df) _mm256_broadcastsd_pd(__A),
                                               (__v4df) _mm256_setzero_pd());
 }
 
 static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
-_mm_mask_broadcastss_ps (__m128 __O, __mmask8 __M, __m128 __A)
-{
+_mm_mask_broadcastss_ps(__m128 __O, __mmask8 __M, __m128 __A) {
   return (__m128)__builtin_ia32_selectps_128(__M,
                                              (__v4sf) _mm_broadcastss_ps(__A),
                                              (__v4sf) __O);
 }
 
 static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
-_mm_maskz_broadcastss_ps (__mmask8 __M, __m128 __A)
-{
+_mm_maskz_broadcastss_ps(__mmask8 __M, __m128 __A) {
   return (__m128)__builtin_ia32_selectps_128(__M,
                                              (__v4sf) _mm_broadcastss_ps(__A),
                                              (__v4sf) _mm_setzero_ps());
 }
 
 static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
-_mm256_mask_broadcastss_ps (__m256 __O, __mmask8 __M, __m128 __A)
-{
+_mm256_mask_broadcastss_ps(__m256 __O, __mmask8 __M, __m128 __A) {
   return (__m256)__builtin_ia32_selectps_256(__M,
                                              (__v8sf) _mm256_broadcastss_ps(__A),
                                              (__v8sf) __O);
 }
 
 static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
-_mm256_maskz_broadcastss_ps (__mmask8 __M, __m128 __A)
-{
+_mm256_maskz_broadcastss_ps(__mmask8 __M, __m128 __A) {
   return (__m256)__builtin_ia32_selectps_256(__M,
                                              (__v8sf) _mm256_broadcastss_ps(__A),
                                              (__v8sf) _mm256_setzero_ps());
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
-_mm_mask_broadcastd_epi32 (__m128i __O, __mmask8 __M, __m128i __A)
-{
+_mm_mask_broadcastd_epi32(__m128i __O, __mmask8 __M, __m128i __A) {
   return (__m128i)__builtin_ia32_selectd_128(__M,
                                              (__v4si) _mm_broadcastd_epi32(__A),
                                              (__v4si) __O);
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
-_mm_maskz_broadcastd_epi32 (__mmask8 __M, __m128i __A)
-{
+_mm_maskz_broadcastd_epi32(__mmask8 __M, __m128i __A) {
   return (__m128i)__builtin_ia32_selectd_128(__M,
                                              (__v4si) _mm_broadcastd_epi32(__A),
                                              (__v4si) _mm_setzero_si128());
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
-_mm256_mask_broadcastd_epi32 (__m256i __O, __mmask8 __M, __m128i __A)
-{
+_mm256_mask_broadcastd_epi32(__m256i __O, __mmask8 __M, __m128i __A) {
   return (__m256i)__builtin_ia32_selectd_256(__M,
                                              (__v8si) _mm256_broadcastd_epi32(__A),
                                              (__v8si) __O);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
-_mm256_maskz_broadcastd_epi32 (__mmask8 __M, __m128i __A)
-{
+_mm256_maskz_broadcastd_epi32(__mmask8 __M, __m128i __A) {
   return (__m256i)__builtin_ia32_selectd_256(__M,
                                              (__v8si) _mm256_broadcastd_epi32(__A),
                                              (__v8si) _mm256_setzero_si256());
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
-_mm_mask_broadcastq_epi64 (__m128i __O, __mmask8 __M, __m128i __A)
-{
+_mm_mask_broadcastq_epi64(__m128i __O, __mmask8 __M, __m128i __A) {
   return (__m128i)__builtin_ia32_selectq_128(__M,
                                              (__v2di) _mm_broadcastq_epi64(__A),
                                              (__v2di) __O);
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
-_mm_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A)
-{
+_mm_maskz_broadcastq_epi64(__mmask8 __M, __m128i __A) {
   return (__m128i)__builtin_ia32_selectq_128(__M,
                                              (__v2di) _mm_broadcastq_epi64(__A),
                                              (__v2di) _mm_setzero_si128());
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
-_mm256_mask_broadcastq_epi64 (__m256i __O, __mmask8 __M, __m128i __A)
-{
+_mm256_mask_broadcastq_epi64(__m256i __O, __mmask8 __M, __m128i __A) {
   return (__m256i)__builtin_ia32_selectq_256(__M,
                                              (__v4di) _mm256_broadcastq_epi64(__A),
                                              (__v4di) __O);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
-_mm256_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A)
-{
+_mm256_maskz_broadcastq_epi64(__mmask8 __M, __m128i __A) {
   return (__m256i)__builtin_ia32_selectq_256(__M,
                                              (__v4di) _mm256_broadcastq_epi64(__A),
                                              (__v4di) _mm256_setzero_si256());
@@ -8004,64 +7943,56 @@ _mm256_maskz_permutexvar_epi32(__mmask8 __M, __m256i __X, __m256i __Y)
                                  (__v4di)_mm256_setzero_si256()))
 
 static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
-_mm_mask_movehdup_ps (__m128 __W, __mmask8 __U, __m128 __A)
-{
+_mm_mask_movehdup_ps(__m128 __W, __mmask8 __U, __m128 __A) {
   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
                                              (__v4sf)_mm_movehdup_ps(__A),
                                              (__v4sf)__W);
 }
 
 static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
-_mm_maskz_movehdup_ps (__mmask8 __U, __m128 __A)
-{
+_mm_maskz_movehdup_ps(__mmask8 __U, __m128 __A) {
   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
                                              (__v4sf)_mm_movehdup_ps(__A),
                                              (__v4sf)_mm_setzero_ps());
 }
 
 static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
-_mm256_mask_movehdup_ps (__m256 __W, __mmask8 __U, __m256 __A)
-{
+_mm256_mask_movehdup_ps(__m256 __W, __mmask8 __U, __m256 __A) {
   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
                                              (__v8sf)_mm256_movehdup_ps(__A),
                                              (__v8sf)__W);
 }
 
 static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
-_mm256_maskz_movehdup_ps (__mmask8 __U, __m256 __A)
-{
+_mm256_maskz_movehdup_ps(__mmask8 __U, __m256 __A) {
   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
                                              (__v8sf)_mm256_movehdup_ps(__A),
                                              (__v8sf)_mm256_setzero_ps());
 }
 
 static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
-_mm_mask_moveldup_ps (__m128 __W, __mmask8 __U, __m128 __A)
-{
+_mm_mask_moveldup_ps(__m128 __W, __mmask8 __U, __m128 __A) {
   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
                                              (__v4sf)_mm_moveldup_ps(__A),
                                              (__v4sf)__W);
 }
 
 static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
-_mm_maskz_moveldup_ps (__mmask8 __U, __m128 __A)
-{
+_mm_maskz_moveldup_ps(__mmask8 __U, __m128 __A) {
   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
                                              (__v4sf)_mm_moveldup_ps(__A),
                                              (__v4sf)_mm_setzero_ps());
 }
 
 static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
-_mm256_mask_moveldup_ps (__m256 __W, __mmask8 __U, __m256 __A)
-{
+_mm256_mask_moveldup_ps(__m256 __W, __mmask8 __U, __m256 __A) {
   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
                                              (__v8sf)_mm256_moveldup_ps(__A),
                                              (__v8sf)__W);
 }
 
 static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
-_mm256_maskz_moveldup_ps (__mmask8 __U, __m256 __A)
-{
+_mm256_maskz_moveldup_ps(__mmask8 __U, __m256 __A) {
   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
                                              (__v8sf)_mm256_moveldup_ps(__A),
                                              (__v8sf)_mm256_setzero_ps());

>From d9488e509bf71273f673a5ecd5b05024a86801c2 Mon Sep 17 00:00:00 2001
From: ahmed <ahmednour.mohamed2012 at gmail.com>
Date: Tue, 7 Oct 2025 17:50:05 +0300
Subject: [PATCH 03/12] refactor: update tests

---
 clang/test/CodeGen/X86/avx512dq-builtins.c |  23 ++---
 clang/test/CodeGen/X86/avx512vl-builtins.c | 114 ++++++++++-----------
 2 files changed, 63 insertions(+), 74 deletions(-)

diff --git a/clang/test/CodeGen/X86/avx512dq-builtins.c b/clang/test/CodeGen/X86/avx512dq-builtins.c
index 0676b5b091588..c361a1426547b 100644
--- a/clang/test/CodeGen/X86/avx512dq-builtins.c
+++ b/clang/test/CodeGen/X86/avx512dq-builtins.c
@@ -13,21 +13,20 @@
 #include "builtin_test_helpers.h"
 
 // constexpr coverage for i32x8/f32x8 broadcasts (DQ)
-{
-  __m256i a = _mm256_set1_epi32(8);
-  TEST_CONSTEXPR(match_v16si(_mm512_mask_broadcast_i32x8(_mm512_setzero_si512(), 0xFFFF, a),
+
+  
+  TEST_CONSTEXPR(match_v16si(_mm512_mask_broadcast_i32x8(_mm512_setzero_si512(), 0xFFFF, _mm256_set1_epi32(8)),
     8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8));
-}
-{
-  __m256 a = _mm256_set1_ps(5.0f);
-  TEST_CONSTEXPR(match_m512(_mm512_mask_broadcast_f32x8(_mm512_setzero_ps(), 0xFFFF, a),
+
+
+  
+  TEST_CONSTEXPR(match_m512(_mm512_mask_broadcast_f32x8(_mm512_setzero_ps(), 0xFFFF, _mm256_set1_ps(5.0f)),
     5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5));
-}
-{
-  __m256 a = _mm256_set1_ps(7.0f);
-  TEST_CONSTEXPR(match_m512(_mm512_maskz_broadcast_f32x8(0xFFFF, a),
+
+
+  TEST_CONSTEXPR(match_m512(_mm512_maskz_broadcast_f32x8(0xFFFF, _mm256_set1_ps(7.0f)),
     7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7));
-}
+
 
 __mmask8 test_knot_mask8(__mmask8 a) {
   // CHECK-LABEL: test_knot_mask8
diff --git a/clang/test/CodeGen/X86/avx512vl-builtins.c b/clang/test/CodeGen/X86/avx512vl-builtins.c
index 1d1d3520d7659..1c4ed17144b01 100644
--- a/clang/test/CodeGen/X86/avx512vl-builtins.c
+++ b/clang/test/CodeGen/X86/avx512vl-builtins.c
@@ -14,78 +14,68 @@ TEST_CONSTEXPR(match_v2di(_mm_mask_set1_epi64(_mm_setzero_si128(), 0x3, 9), 9, 9
 TEST_CONSTEXPR(match_v8si(_mm256_mask_set1_epi32(_mm256_setzero_si256(), 0xFF, 5), 5, 5, 5, 5, 5, 5, 5, 5));
 TEST_CONSTEXPR(match_v4di(_mm256_mask_set1_epi64(_mm256_setzero_si256(), 0xF, 11), 11, 11, 11, 11));
 
-{
-  __m128i a = (__m128i)(__v4si){0,1,2,3};
-  TEST_CONSTEXPR(match_v4si(_mm_mask_broadcast_i32x2(_mm_setzero_si128(), 0xF, a), 0,1,0,1));
-}
-{
-  __m128 a = (__m128)(__v4sf){1.f,2.f,3.f,4.f};
-  TEST_CONSTEXPR(match_m256(_mm256_mask_broadcast_f32x2(_mm256_setzero_ps(), 0xFF, a), 1.f,2.f,1.f,2.f,1.f,2.f,1.f,2.f));
-}
-{
-  __m128 a = (__m128)(__v4sf){1.f,2.f,3.f,4.f};
-  TEST_CONSTEXPR(match_m256(_mm256_maskz_broadcast_f32x2(0xFF, a), 1.f,2.f,1.f,2.f,1.f,2.f,1.f,2.f));
-}
-{
-  __m128d a = (__m128d)(__v2df){1.0,2.0};
-  TEST_CONSTEXPR(match_m256d(_mm256_mask_broadcast_f64x2(_mm256_setzero_pd(), 0xFF, a), 1.0,2.0,1.0,2.0));
-}
-{
-  __m128d a = (__m128d)(__v2df){1.0,2.0};
-  TEST_CONSTEXPR(match_m256d(_mm256_maskz_broadcast_f64x2(0xFF, a), 1.0,2.0,1.0,2.0));
-}
+
+  
+  TEST_CONSTEXPR(match_v4si(_mm_mask_broadcast_i32x2(_mm_setzero_si128(), 0xF, (__m128i)(__v4si){0,1,2,3}), 0,1,0,1));
+
+
+  
+  TEST_CONSTEXPR(match_m256(_mm256_mask_broadcast_f32x2(_mm256_setzero_ps(), 0xFF, (__m128)(__v4sf){1.f,2.f,3.f,4.f}), 1.f,2.f,1.f,2.f,1.f,2.f,1.f,2.f));
+
+
+  TEST_CONSTEXPR(match_m256(_mm256_maskz_broadcast_f32x2(0xFF, (__m128)(__v4sf){1.f,2.f,3.f,4.f}), 1.f,2.f,1.f,2.f,1.f,2.f,1.f,2.f));
+
+
+  TEST_CONSTEXPR(match_m256d(_mm256_mask_broadcast_f64x2(_mm256_setzero_pd(), 0xFF, (__m128d)(__v2df){1.0,2.0}), 1.0,2.0,1.0,2.0));
+
+
+  
+  TEST_CONSTEXPR(match_m256d(_mm256_maskz_broadcast_f64x2(0xFF, (__m128d)(__v2df){1.0,2.0}), 1.0,2.0,1.0,2.0));
+
 
 // i64x2 maskz
-{
-  __m128i a = (__m128i)(__v2di){1,2};
-  TEST_CONSTEXPR(match_v4di(_mm256_maskz_broadcast_i64x2(0xF, a), 1,2,1,2));
-}
+
+  TEST_CONSTEXPR(match_v4di(_mm256_maskz_broadcast_i64x2(0xF, (__m128i)(__v2di){1,2}), 1,2,1,2));
+
 
 // i32x4/f32x4 maskz (VL)
-{
-  __m128i a = (__m128i)(__v4si){0,1,2,3};
-  TEST_CONSTEXPR(match_v8si(_mm256_maskz_broadcast_i32x4(0xFF, a), 0,1,2,3,0,1,2,3));
-}
-{
-  __m128 a = (__m128)(__v4sf){0,1,2,3};
-  TEST_CONSTEXPR(match_m256(_mm256_maskz_broadcast_f32x4(0xFF, a), 0,1,2,3,0,1,2,3));
-}
+
+  
+  TEST_CONSTEXPR(match_v8si(_mm256_maskz_broadcast_i32x4(0xFF, (__m128i)(__v4si){0,1,2,3}), 0,1,2,3,0,1,2,3));
+
+
+  
+  TEST_CONSTEXPR(match_m256(_mm256_maskz_broadcast_f32x4(0xFF, (__m128)(__v4sf){0,1,2,3}), 0,1,2,3,0,1,2,3));
+
 
 // i32x2 maskz (128/256)
-{
-  __m128i a = (__m128i)(__v4si){0,1,2,3};
-  TEST_CONSTEXPR(match_v4si(_mm_maskz_broadcast_i32x2(0xF, a), 0,1,0,1));
-}
-{
-  __m128i a = (__m128i)(__v4si){0,1,2,3};
-  TEST_CONSTEXPR(match_v8si(_mm256_maskz_broadcast_i32x2(0xFF, a), 0,1,0,1,0,1,0,1));
-}
+
+  TEST_CONSTEXPR(match_v4si(_mm_maskz_broadcast_i32x2(0xF, (__m128i)(__v4si){0,1,2,3}), 0,1,0,1));
+
+
+  
+  TEST_CONSTEXPR(match_v8si(_mm256_maskz_broadcast_i32x2(0xFF, (__m128i)(__v4si){0,1,2,3}), 0,1,0,1,0,1,0,1));
+
 
 // unpackhi/lo with full mask behaves like the underlying unpack
-{
-  __m128d a = (__m128d)(__v2df){1.0,2.0};
-  __m128d b = (__m128d)(__v2df){3.0,4.0};
-  TEST_CONSTEXPR(match_m128d(_mm_mask_unpackhi_pd(_mm_setzero_pd(), 0x3, a, b), 2.0,4.0));
-  TEST_CONSTEXPR(match_m128d(_mm_mask_unpacklo_pd(_mm_setzero_pd(), 0x3, a, b), 1.0,3.0));
-}
-{
-  __m256d a = (__m256d)(__v4df){1.0,2.0,3.0,4.0};
-  __m256d b = (__m256d)(__v4df){5.0,6.0,7.0,8.0};
-  TEST_CONSTEXPR(match_m256d(_mm256_mask_unpackhi_pd(_mm256_setzero_pd(), 0xFF, a, b), 2.0,6.0,4.0,8.0));
-  TEST_CONSTEXPR(match_m256d(_mm256_mask_unpacklo_pd(_mm256_setzero_pd(), 0xFF, a, b), 1.0,5.0,3.0,7.0));
-}
+
+  TEST_CONSTEXPR(match_m128d(_mm_mask_unpackhi_pd(_mm_setzero_pd(), 0x3, (__m128d)(__v2df){1.0,2.0}, (__m128d)(__v2df){3.0,4.0}), 2.0,4.0));
+  TEST_CONSTEXPR(match_m128d(_mm_mask_unpacklo_pd(_mm_setzero_pd(), 0x3, (__m128d)(__v2df){1.0,2.0}, (__m128d)(__v2df){3.0,4.0}), 1.0,3.0));
+
+
+  TEST_CONSTEXPR(match_m256d(_mm256_mask_unpackhi_pd(_mm256_setzero_pd(), 0xFF, (__m256d)(__v4df){1.0,2.0,3.0,4.0}, (__m256d)(__v4df){5.0,6.0,7.0,8.0}), 2.0,6.0,4.0,8.0));
+  TEST_CONSTEXPR(match_m256d(_mm256_mask_unpacklo_pd(_mm256_setzero_pd(), 0xFF, (__m256d)(__v4df){1.0,2.0,3.0,4.0}, (__m256d)(__v4df){5.0,6.0,7.0,8.0}), 1.0,5.0,3.0,7.0));
+
 
 // movehdup/moveldup with full mask equals the underlying *_move*dup
-{
-  __m128 a = (__m128)(__v4sf){1.f,2.f,3.f,4.f};
-  TEST_CONSTEXPR(match_m128(_mm_mask_movehdup_ps(_mm_setzero_ps(), 0xF, a), 2.f,2.f,4.f,4.f));
-  TEST_CONSTEXPR(match_m128(_mm_mask_moveldup_ps(_mm_setzero_ps(), 0xF, a), 1.f,1.f,3.f,3.f));
-}
-{
-  __m256 a = (__m256)(__v8sf){1,2,3,4,5,6,7,8};
-  TEST_CONSTEXPR(match_m256(_mm256_mask_movehdup_ps(_mm256_setzero_ps(), 0xFF, a), 2,2,4,4,6,6,8,8));
-  TEST_CONSTEXPR(match_m256(_mm256_mask_moveldup_ps(_mm256_setzero_ps(), 0xFF, a), 1,1,3,3,5,5,7,7));
-}
+
+  TEST_CONSTEXPR(match_m128(_mm_mask_movehdup_ps(_mm_setzero_ps(), 0xF, (__m128)(__v4sf){1.f,2.f,3.f,4.f}), 2.f,2.f,4.f,4.f));
+  TEST_CONSTEXPR(match_m128(_mm_mask_moveldup_ps(_mm_setzero_ps(), 0xF, (__m128)(__v4sf){1.f,2.f,3.f,4.f}), 1.f,1.f,3.f,3.f));
+
+
+  TEST_CONSTEXPR(match_m256(_mm256_mask_movehdup_ps(_mm256_setzero_ps(), 0xFF, (__m256)(__v8sf){1,2,3,4,5,6,7,8}), 2,2,4,4,6,6,8,8));
+  TEST_CONSTEXPR(match_m256(_mm256_mask_moveldup_ps(_mm256_setzero_ps(), 0xFF, (__m256)(__v8sf){1,2,3,4,5,6,7,8}), 1,1,3,3,5,5,7,7));
+
 
 __mmask8 test_mm_cmpeq_epu32_mask(__m128i __a, __m128i __b) {
   // CHECK-LABEL: test_mm_cmpeq_epu32_mask

>From c9472b61b62b543678e46a78e53a0f413d5a2989 Mon Sep 17 00:00:00 2001
From: ahmed <ahmednour.mohamed2012 at gmail.com>
Date: Tue, 7 Oct 2025 18:13:46 +0300
Subject: [PATCH 04/12] refactor: update test

---
 clang/test/CodeGen/X86/avx512f-builtins.c | 81 +++++------------------
 1 file changed, 15 insertions(+), 66 deletions(-)

diff --git a/clang/test/CodeGen/X86/avx512f-builtins.c b/clang/test/CodeGen/X86/avx512f-builtins.c
index 0caa23df95982..207133160e2db 100644
--- a/clang/test/CodeGen/X86/avx512f-builtins.c
+++ b/clang/test/CodeGen/X86/avx512f-builtins.c
@@ -17,75 +17,24 @@ TEST_CONSTEXPR(match_v16si(_mm512_mask_set1_epi32(_mm512_setzero_si512(), 0xFFFF
 TEST_CONSTEXPR(match_v8di(_mm512_mask_set1_epi64(_mm512_setzero_si512(), 0xFF, 21),
   21,21,21,21,21,21,21,21));
 
-{
-  __m128 a = (__m128)(__v4sf){1,2,3,4};
-  TEST_CONSTEXPR(match_m512(_mm512_mask_broadcast_f32x4(_mm512_setzero_ps(), 0xFFFF, a),
-    1,2,3,4,1,2,3,4,1,2,3,4,1,2,3,4));
-}
-{
-  __m128 a = (__m128)(__v4sf){1,2,3,4};
-  TEST_CONSTEXPR(match_m512(_mm512_maskz_broadcast_f32x4(0xFFFF, a),
-    1,2,3,4,1,2,3,4,1,2,3,4,1,2,3,4));
-}
-{
-  __m128d a = (__m128d)(__v2df){1,2};
-  TEST_CONSTEXPR(match_m512d(_mm512_mask_broadcast_f64x2(_mm512_setzero_pd(), 0xFF, a),
-    1,2,1,2,1,2,1,2));
-}
-{
-  __m128d a = (__m128d)(__v2df){1,2};
-  TEST_CONSTEXPR(match_m512d(_mm512_maskz_broadcast_f64x2(0xFF, a),
-    1,2,1,2,1,2,1,2));
-}
+TEST_CONSTEXPR(match_m512(_mm512_mask_broadcast_f32x4(_mm512_setzero_ps(), 0xFFFF, (__m128)(__v4sf){1,2,3,4}),
+  1,2,3,4,1,2,3,4,1,2,3,4,1,2,3,4));
 
-// additionally add i32x4/i32x8 z-forms
-{
-  __m128i a = (__m128i)(__v4si){0,1,2,3};
-  TEST_CONSTEXPR(match_v16si(_mm512_maskz_broadcast_i32x4(0xFFFF, a),
-    0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3));
-}
-{
-  __m256i a = _mm256_set1_epi32(9);
-  TEST_CONSTEXPR(match_v16si(_mm512_maskz_broadcast_i32x8(0xFFFF, a),
-    9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9));
-}
+TEST_CONSTEXPR(match_m512(_mm512_maskz_broadcast_f32x4(0xFFFF, (__m128)(__v4sf){1,2,3,4}),
+  1,2,3,4,1,2,3,4,1,2,3,4,1,2,3,4));
 
-// unpack and moves (512)
-{
-  __m512 a = _mm512_set_ps(16,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1);
-  __m512 b = _mm512_set1_ps(0);
-  TEST_CONSTEXPR(match_m512(_mm512_mask_unpackhi_ps(_mm512_setzero_ps(), 0xFFFF, a, b),
-    15,0,13,0,11,0,9,0,7,0,5,0,3,0,1,0));
-}
-{
-  __m512 a = _mm512_set1_ps(1);
-  __m512 b = _mm512_set1_ps(2);
-  TEST_CONSTEXPR(match_m512(_mm512_mask_unpacklo_ps(_mm512_setzero_ps(), 0xFFFF, a, b),
-    1,2,1,2,1,2,1,2,1,2,1,2,1,2,1,2));
-}
+TEST_CONSTEXPR(match_m512d(_mm512_mask_broadcast_f64x2(_mm512_setzero_pd(), 0xFF, (__m128d)(__v2df){1,2}),
+  1,2,1,2,1,2,1,2));
 
-// move_ss/move_sd in constexpr masked form
-{
-  __m128 a = (__m128)(__v4sf){10,2,3,4};
-  __m128 b = (__m128)(__v4sf){20,6,7,8};
-  TEST_CONSTEXPR(match_m128(_mm_mask_move_ss(_mm_setzero_ps(), 0x1, a, b), 20,0,0,0));
-}
-{
-  __m128d a = (__m128d)(__v2df){10,2};
-  __m128d b = (__m128d)(__v2df){20,6};
-  TEST_CONSTEXPR(match_m128d(_mm_mask_move_sd(_mm_setzero_pd(), 0x1, a, b), 20,0));
-}
-// z-forms for movehdup/moveldup
-{
-  __m512 a = _mm512_set_ps(16,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1);
-  TEST_CONSTEXPR(match_m512(_mm512_maskz_movehdup_ps(0xFFFF, a),
-    15,15,13,13,11,11,9,9,7,7,5,5,3,3,1,1));
-}
-{
-  __m512 a = _mm512_set_ps(16,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1);
-  TEST_CONSTEXPR(match_m512(_mm512_maskz_moveldup_ps(0xFFFF, a),
-    16,16,14,14,12,12,10,10,8,8,6,6,4,4,2,2));
-}
+TEST_CONSTEXPR(match_m512d(_mm512_maskz_broadcast_f64x2(0xFF, (__m128d)(__v2df){1,2}),
+  1,2,1,2,1,2,1,2));
+
+// additionally add i32x4/i32x8 z-forms
+TEST_CONSTEXPR(match_v16si(_mm512_maskz_broadcast_i32x4(0xFFFF, (__m128i)(__v4si){0,1,2,3}),
+  0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3));
+
+TEST_CONSTEXPR(match_v16si(_mm512_maskz_broadcast_i32x8(0xFFFF, _mm256_set1_epi32(9)),
+  9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9));
 
 __m512d test_mm512_sqrt_pd(__m512d a)
 {

>From 1fcea1b13422d7dc3b12faffc8771a446aefd09c Mon Sep 17 00:00:00 2001
From: ahmed <ahmednour.mohamed2012 at gmail.com>
Date: Tue, 7 Oct 2025 21:52:39 +0300
Subject: [PATCH 05/12] refactor: PR Feedback

---
 clang/test/CodeGen/X86/avx512dq-builtins.c | 19 ++------
 clang/test/CodeGen/X86/avx512f-builtins.c  | 28 +++++------
 clang/test/CodeGen/X86/avx512vl-builtins.c | 54 ++++++++--------------
 3 files changed, 35 insertions(+), 66 deletions(-)

diff --git a/clang/test/CodeGen/X86/avx512dq-builtins.c b/clang/test/CodeGen/X86/avx512dq-builtins.c
index c361a1426547b..d1e980601c462 100644
--- a/clang/test/CodeGen/X86/avx512dq-builtins.c
+++ b/clang/test/CodeGen/X86/avx512dq-builtins.c
@@ -12,22 +12,6 @@
 #include <immintrin.h>
 #include "builtin_test_helpers.h"
 
-// constexpr coverage for i32x8/f32x8 broadcasts (DQ)
-
-  
-  TEST_CONSTEXPR(match_v16si(_mm512_mask_broadcast_i32x8(_mm512_setzero_si512(), 0xFFFF, _mm256_set1_epi32(8)),
-    8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8));
-
-
-  
-  TEST_CONSTEXPR(match_m512(_mm512_mask_broadcast_f32x8(_mm512_setzero_ps(), 0xFFFF, _mm256_set1_ps(5.0f)),
-    5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5));
-
-
-  TEST_CONSTEXPR(match_m512(_mm512_maskz_broadcast_f32x8(0xFFFF, _mm256_set1_ps(7.0f)),
-    7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7));
-
-
 __mmask8 test_knot_mask8(__mmask8 a) {
   // CHECK-LABEL: test_knot_mask8
   // CHECK: [[IN:%.*]] = bitcast i8 %{{.*}} to <8 x i1>
@@ -1321,6 +1305,7 @@ __m512 test_mm512_mask_broadcast_f32x8(__m512 __O, __mmask16 __M, float const* _
   // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
   return _mm512_mask_broadcast_f32x8(__O, __M, _mm256_loadu_ps(__A)); 
 }
+TEST_CONSTEXPR(match_m512(_mm512_mask_broadcast_f32x8(_mm512_setzero_ps(), 0xFFFF, _mm256_set1_ps(5.0f)), 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5));
 
 __m512 test_mm512_maskz_broadcast_f32x8(__mmask16 __M, float const* __A) {
   // CHECK-LABEL: test_mm512_maskz_broadcast_f32x8
@@ -1328,6 +1313,7 @@ __m512 test_mm512_maskz_broadcast_f32x8(__mmask16 __M, float const* __A) {
   // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
   return _mm512_maskz_broadcast_f32x8(__M, _mm256_loadu_ps(__A)); 
 }
+TEST_CONSTEXPR(match_m512(_mm512_maskz_broadcast_f32x8(0xFFFF, _mm256_set1_ps(7.0f)), 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7));
 
 __m512d test_mm512_broadcast_f64x2(double const* __A) {
   // CHECK-LABEL: test_mm512_broadcast_f64x2
@@ -1384,6 +1370,7 @@ __m512i test_mm512_mask_broadcast_i32x8(__m512i __O, __mmask16 __M, __m256i cons
   // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
   return _mm512_mask_broadcast_i32x8(__O, __M, _mm256_loadu_si256(__A)); 
 }
+TEST_CONSTEXPR(match_v16si(_mm512_mask_broadcast_i32x8(_mm512_setzero_si512(), 0xFFFF, _mm256_set1_epi32(8)), 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8));
 
 __m512i test_mm512_maskz_broadcast_i32x8(__mmask16 __M, __m256i const* __A) {
   // CHECK-LABEL: test_mm512_maskz_broadcast_i32x8
diff --git a/clang/test/CodeGen/X86/avx512f-builtins.c b/clang/test/CodeGen/X86/avx512f-builtins.c
index 207133160e2db..d06808631ab4e 100644
--- a/clang/test/CodeGen/X86/avx512f-builtins.c
+++ b/clang/test/CodeGen/X86/avx512f-builtins.c
@@ -11,28 +11,14 @@
 #include <immintrin.h>
 #include "builtin_test_helpers.h"
 
-// constexpr coverage for masked set1/broadcast/unpack/move_ss/move_sd (F)
-TEST_CONSTEXPR(match_v16si(_mm512_mask_set1_epi32(_mm512_setzero_si512(), 0xFFFF, 13),
-  13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13));
-TEST_CONSTEXPR(match_v8di(_mm512_mask_set1_epi64(_mm512_setzero_si512(), 0xFF, 21),
-  21,21,21,21,21,21,21,21));
-
-TEST_CONSTEXPR(match_m512(_mm512_mask_broadcast_f32x4(_mm512_setzero_ps(), 0xFFFF, (__m128)(__v4sf){1,2,3,4}),
-  1,2,3,4,1,2,3,4,1,2,3,4,1,2,3,4));
-
-TEST_CONSTEXPR(match_m512(_mm512_maskz_broadcast_f32x4(0xFFFF, (__m128)(__v4sf){1,2,3,4}),
-  1,2,3,4,1,2,3,4,1,2,3,4,1,2,3,4));
-
+// Constexpr coverage for DQ broadcast features (f64x2/i32x8) that don't have test functions in this file.
+// The corresponding test_mm512_*_broadcast_* functions are in avx512dq-builtins.c.
 TEST_CONSTEXPR(match_m512d(_mm512_mask_broadcast_f64x2(_mm512_setzero_pd(), 0xFF, (__m128d)(__v2df){1,2}),
   1,2,1,2,1,2,1,2));
 
 TEST_CONSTEXPR(match_m512d(_mm512_maskz_broadcast_f64x2(0xFF, (__m128d)(__v2df){1,2}),
   1,2,1,2,1,2,1,2));
 
-// additionally add i32x4/i32x8 z-forms
-TEST_CONSTEXPR(match_v16si(_mm512_maskz_broadcast_i32x4(0xFFFF, (__m128i)(__v4si){0,1,2,3}),
-  0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3));
-
 TEST_CONSTEXPR(match_v16si(_mm512_maskz_broadcast_i32x8(0xFFFF, _mm256_set1_epi32(9)),
   9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9));
 
@@ -6861,6 +6847,8 @@ __m512 test_mm512_mask_broadcast_f32x4(__m512 __O, __mmask16 __M, float const* _
   return _mm512_mask_broadcast_f32x4(__O, __M, _mm_loadu_ps(__A)); 
 }
 
+TEST_CONSTEXPR(match_m512(_mm512_mask_broadcast_f32x4(_mm512_setzero_ps(), 0xFFFF, (__m128)(__v4sf){1,2,3,4}), 1,2,3,4,1,2,3,4,1,2,3,4,1,2,3,4));
+
 __m512 test_mm512_maskz_broadcast_f32x4(__mmask16 __M, float const* __A) {
   // CHECK-LABEL: test_mm512_maskz_broadcast_f32x4
   // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
@@ -6868,6 +6856,8 @@ __m512 test_mm512_maskz_broadcast_f32x4(__mmask16 __M, float const* __A) {
   return _mm512_maskz_broadcast_f32x4(__M, _mm_loadu_ps(__A)); 
 }
 
+TEST_CONSTEXPR(match_m512(_mm512_maskz_broadcast_f32x4(0xFFFF, (__m128)(__v4sf){1,2,3,4}), 1,2,3,4,1,2,3,4,1,2,3,4,1,2,3,4));
+
 __m512d test_mm512_broadcast_f64x4(double const* __A) {
   // CHECK-LABEL: test_mm512_broadcast_f64x4
   // CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
@@ -6910,6 +6900,8 @@ __m512i test_mm512_maskz_broadcast_i32x4(__mmask16 __M, __m128i const* __A) {
   return _mm512_maskz_broadcast_i32x4(__M, _mm_loadu_si128(__A)); 
 }
 
+TEST_CONSTEXPR(match_v16si(_mm512_maskz_broadcast_i32x4(0xFFFF, (__m128i)(__v4si){0,1,2,3}), 0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3));
+
 __m512i test_mm512_broadcast_i64x4(__m256i const* __A) {
   // CHECK-LABEL: test_mm512_broadcast_i64x4
   // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
@@ -10928,6 +10920,8 @@ __m512i test_mm512_mask_set1_epi32 (__m512i __O, __mmask16 __M, int __A)
   return _mm512_mask_set1_epi32 ( __O, __M, __A);
 }
 
+TEST_CONSTEXPR(match_v16si(_mm512_mask_set1_epi32(_mm512_setzero_si512(), 0xFFFF, 13), 13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13));
+
 __m512i test_mm512_maskz_set1_epi32(__mmask16 __M, int __A)
 {     
   // CHECK-LABEL: test_mm512_maskz_set1_epi32
@@ -11170,6 +11164,8 @@ __m512i test_mm512_mask_set1_epi64 (__m512i __O, __mmask8 __M, long long __A)
   return _mm512_mask_set1_epi64 (__O, __M, __A);
 }
 
+TEST_CONSTEXPR(match_v8di(_mm512_mask_set1_epi64(_mm512_setzero_si512(), 0xFF, 21), 21,21,21,21,21,21,21,21));
+
 __m512i test_mm512_maskz_set1_epi64 (__mmask8 __M, long long __A)
 {
   // CHECK-LABEL: test_mm512_maskz_set1_epi64
diff --git a/clang/test/CodeGen/X86/avx512vl-builtins.c b/clang/test/CodeGen/X86/avx512vl-builtins.c
index 1c4ed17144b01..f3b8a4c246623 100644
--- a/clang/test/CodeGen/X86/avx512vl-builtins.c
+++ b/clang/test/CodeGen/X86/avx512vl-builtins.c
@@ -7,45 +7,19 @@
 #include <immintrin.h>
 #include "builtin_test_helpers.h"
 
-// constexpr coverage for masked set1/broadcast/unpack/movehdup/moveldup (VL)
-TEST_CONSTEXPR(match_v4si(_mm_mask_set1_epi32(_mm_setzero_si128(), 0xF, 7), 7, 7, 7, 7));
-TEST_CONSTEXPR(match_v2di(_mm_mask_set1_epi64(_mm_setzero_si128(), 0x3, 9), 9, 9));
-
-TEST_CONSTEXPR(match_v8si(_mm256_mask_set1_epi32(_mm256_setzero_si256(), 0xFF, 5), 5, 5, 5, 5, 5, 5, 5, 5));
-TEST_CONSTEXPR(match_v4di(_mm256_mask_set1_epi64(_mm256_setzero_si256(), 0xF, 11), 11, 11, 11, 11));
-
-
-  
-  TEST_CONSTEXPR(match_v4si(_mm_mask_broadcast_i32x2(_mm_setzero_si128(), 0xF, (__m128i)(__v4si){0,1,2,3}), 0,1,0,1));
-
+// Constexpr coverage for VLDQ broadcast features (i32x2/f32x2/f64x2/i64x2) that don't have test functions in this file.
+// The corresponding test_mm*_*_broadcast_* functions are in avx512vldq-builtins.c.
+TEST_CONSTEXPR(match_v4si(_mm_mask_broadcast_i32x2(_mm_setzero_si128(), 0xF, (__m128i)(__v4si){0,1,2,3}), 0,1,0,1));
 
-  
-  TEST_CONSTEXPR(match_m256(_mm256_mask_broadcast_f32x2(_mm256_setzero_ps(), 0xFF, (__m128)(__v4sf){1.f,2.f,3.f,4.f}), 1.f,2.f,1.f,2.f,1.f,2.f,1.f,2.f));
-
-
-  TEST_CONSTEXPR(match_m256(_mm256_maskz_broadcast_f32x2(0xFF, (__m128)(__v4sf){1.f,2.f,3.f,4.f}), 1.f,2.f,1.f,2.f,1.f,2.f,1.f,2.f));
-
-
-  TEST_CONSTEXPR(match_m256d(_mm256_mask_broadcast_f64x2(_mm256_setzero_pd(), 0xFF, (__m128d)(__v2df){1.0,2.0}), 1.0,2.0,1.0,2.0));
-
-
-  
-  TEST_CONSTEXPR(match_m256d(_mm256_maskz_broadcast_f64x2(0xFF, (__m128d)(__v2df){1.0,2.0}), 1.0,2.0,1.0,2.0));
+TEST_CONSTEXPR(match_m256(_mm256_mask_broadcast_f32x2(_mm256_setzero_ps(), 0xFF, (__m128)(__v4sf){1.f,2.f,3.f,4.f}), 1.f,2.f,1.f,2.f,1.f,2.f,1.f,2.f));
 
+TEST_CONSTEXPR(match_m256(_mm256_maskz_broadcast_f32x2(0xFF, (__m128)(__v4sf){1.f,2.f,3.f,4.f}), 1.f,2.f,1.f,2.f,1.f,2.f,1.f,2.f));
 
-// i64x2 maskz
+TEST_CONSTEXPR(match_m256d(_mm256_mask_broadcast_f64x2(_mm256_setzero_pd(), 0xFF, (__m128d)(__v2df){1.0,2.0}), 1.0,2.0,1.0,2.0));
 
-  TEST_CONSTEXPR(match_v4di(_mm256_maskz_broadcast_i64x2(0xF, (__m128i)(__v2di){1,2}), 1,2,1,2));
+TEST_CONSTEXPR(match_m256d(_mm256_maskz_broadcast_f64x2(0xFF, (__m128d)(__v2df){1.0,2.0}), 1.0,2.0,1.0,2.0));
 
-
-// i32x4/f32x4 maskz (VL)
-
-  
-  TEST_CONSTEXPR(match_v8si(_mm256_maskz_broadcast_i32x4(0xFF, (__m128i)(__v4si){0,1,2,3}), 0,1,2,3,0,1,2,3));
-
-
-  
-  TEST_CONSTEXPR(match_m256(_mm256_maskz_broadcast_f32x4(0xFF, (__m128)(__v4sf){0,1,2,3}), 0,1,2,3,0,1,2,3));
+TEST_CONSTEXPR(match_v4di(_mm256_maskz_broadcast_i64x2(0xF, (__m128i)(__v2di){1,2}), 1,2,1,2));
 
 
 // i32x2 maskz (128/256)
@@ -7271,6 +7245,8 @@ __m128i test_mm_mask_set1_epi32(__m128i __O, __mmask8 __M) {
   return _mm_mask_set1_epi32(__O, __M, 5); 
 }
 
+TEST_CONSTEXPR(match_v4si(_mm_mask_set1_epi32(_mm_setzero_si128(), 0xF, 7), 7, 7, 7, 7));
+
 __m128i test_mm_maskz_set1_epi32(__mmask8 __M) {
   // CHECK-LABEL: test_mm_maskz_set1_epi32
   // CHECK: insertelement <4 x i32> poison, i32 %{{.*}}, i32 0
@@ -7296,6 +7272,8 @@ __m256i test_mm256_mask_set1_epi32(__m256i __O, __mmask8 __M) {
   return _mm256_mask_set1_epi32(__O, __M, 5); 
 }
 
+TEST_CONSTEXPR(match_v8si(_mm256_mask_set1_epi32(_mm256_setzero_si256(), 0xFF, 5), 5, 5, 5, 5, 5, 5, 5, 5));
+
 __m256i test_mm256_maskz_set1_epi32(__mmask8 __M) {
   // CHECK-LABEL: test_mm256_maskz_set1_epi32
   // CHECK:  insertelement <8 x i32> poison, i32 %{{.*}}, i32 0
@@ -7319,6 +7297,8 @@ __m128i test_mm_mask_set1_epi64(__m128i __O, __mmask8 __M, long long __A) {
   return _mm_mask_set1_epi64(__O, __M, __A); 
 }
 
+TEST_CONSTEXPR(match_v2di(_mm_mask_set1_epi64(_mm_setzero_si128(), 0x3, 9), 9, 9));
+
 __m128i test_mm_maskz_set1_epi64(__mmask8 __M, long long __A) {
   // CHECK-LABEL: test_mm_maskz_set1_epi64
   // CHECK: insertelement <2 x i64> poison, i64 %{{.*}}, i32 0
@@ -7339,6 +7319,8 @@ __m256i test_mm256_mask_set1_epi64(__m256i __O, __mmask8 __M, long long __A) {
   return _mm256_mask_set1_epi64(__O, __M, __A); 
 }
 
+TEST_CONSTEXPR(match_v4di(_mm256_mask_set1_epi64(_mm256_setzero_si256(), 0xF, 11), 11, 11, 11, 11));
+
 __m256i test_mm256_maskz_set1_epi64(__mmask8 __M, long long __A) {
   // CHECK-LABEL: test_mm256_maskz_set1_epi64
   // CHECK: insertelement <4 x i64> poison, i64 %{{.*}}, i32 0
@@ -9067,6 +9049,8 @@ __m256 test_mm256_maskz_broadcast_f32x4(__mmask8 __M, __m128 __A) {
   return _mm256_maskz_broadcast_f32x4(__M, __A); 
 }
 
+TEST_CONSTEXPR(match_m256(_mm256_maskz_broadcast_f32x4(0xFF, (__m128)(__v4sf){0,1,2,3}), 0,1,2,3,0,1,2,3));
+
 __m256i test_mm256_broadcast_i32x4(__m128i const* __A) {
   // CHECK-LABEL: test_mm256_broadcast_i32x4
   // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
@@ -9088,6 +9072,8 @@ __m256i test_mm256_maskz_broadcast_i32x4(__mmask8 __M, __m128i const* __A) {
   return _mm256_maskz_broadcast_i32x4(__M, _mm_loadu_si128(__A)); 
 }
 
+TEST_CONSTEXPR(match_v8si(_mm256_maskz_broadcast_i32x4(0xFF, (__m128i)(__v4si){0,1,2,3}), 0,1,2,3,0,1,2,3));
+
 __m256d test_mm256_mask_broadcastsd_pd(__m256d __O, __mmask8 __M, __m128d __A) {
   // CHECK-LABEL: test_mm256_mask_broadcastsd_pd
   // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <4 x i32> zeroinitializer

>From 39619bfe0ac953b3b86772fa7666baf4df6fefe0 Mon Sep 17 00:00:00 2001
From: ahmed <ahmednour.mohamed2012 at gmail.com>
Date: Tue, 7 Oct 2025 21:56:16 +0300
Subject: [PATCH 06/12] chore: fix formatting

---
 clang/lib/Headers/avx512fintrin.h | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h
index 8401647ef9da5..07de036ef5143 100644
--- a/clang/lib/Headers/avx512fintrin.h
+++ b/clang/lib/Headers/avx512fintrin.h
@@ -247,9 +247,8 @@ _mm512_broadcastq_epi64(__m128i __A) {
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_mask_broadcastq_epi64(__m512i __O, __mmask8 __M, __m128i __A) {
-  return (__m512i)__builtin_ia32_selectq_512(__M,
-                                             (__v8di) _mm512_broadcastq_epi64(__A),
-                                             (__v8di) __O);
+  return (__m512i)__builtin_ia32_selectq_512(
+      __M, (__v8di)_mm512_broadcastq_epi64(__A), (__v8di)__O);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR

>From 539b86e612eda41809f11c3b00f90de66b38949c Mon Sep 17 00:00:00 2001
From: ahmed <ahmednour.mohamed2012 at gmail.com>
Date: Thu, 9 Oct 2025 13:09:04 +0300
Subject: [PATCH 07/12] chore: Move tests from avx512vl-builtins

---
 clang/test/CodeGen/X86/avx512vl-builtins.c   | 60 ++++++--------------
 clang/test/CodeGen/X86/avx512vldq-builtins.c | 16 ++++++
 2 files changed, 32 insertions(+), 44 deletions(-)

diff --git a/clang/test/CodeGen/X86/avx512vl-builtins.c b/clang/test/CodeGen/X86/avx512vl-builtins.c
index f3b8a4c246623..71bd5f4949611 100644
--- a/clang/test/CodeGen/X86/avx512vl-builtins.c
+++ b/clang/test/CodeGen/X86/avx512vl-builtins.c
@@ -7,50 +7,6 @@
 #include <immintrin.h>
 #include "builtin_test_helpers.h"
 
-// Constexpr coverage for VLDQ broadcast features (i32x2/f32x2/f64x2/i64x2) that don't have test functions in this file.
-// The corresponding test_mm*_*_broadcast_* functions are in avx512vldq-builtins.c.
-TEST_CONSTEXPR(match_v4si(_mm_mask_broadcast_i32x2(_mm_setzero_si128(), 0xF, (__m128i)(__v4si){0,1,2,3}), 0,1,0,1));
-
-TEST_CONSTEXPR(match_m256(_mm256_mask_broadcast_f32x2(_mm256_setzero_ps(), 0xFF, (__m128)(__v4sf){1.f,2.f,3.f,4.f}), 1.f,2.f,1.f,2.f,1.f,2.f,1.f,2.f));
-
-TEST_CONSTEXPR(match_m256(_mm256_maskz_broadcast_f32x2(0xFF, (__m128)(__v4sf){1.f,2.f,3.f,4.f}), 1.f,2.f,1.f,2.f,1.f,2.f,1.f,2.f));
-
-TEST_CONSTEXPR(match_m256d(_mm256_mask_broadcast_f64x2(_mm256_setzero_pd(), 0xFF, (__m128d)(__v2df){1.0,2.0}), 1.0,2.0,1.0,2.0));
-
-TEST_CONSTEXPR(match_m256d(_mm256_maskz_broadcast_f64x2(0xFF, (__m128d)(__v2df){1.0,2.0}), 1.0,2.0,1.0,2.0));
-
-TEST_CONSTEXPR(match_v4di(_mm256_maskz_broadcast_i64x2(0xF, (__m128i)(__v2di){1,2}), 1,2,1,2));
-
-
-// i32x2 maskz (128/256)
-
-  TEST_CONSTEXPR(match_v4si(_mm_maskz_broadcast_i32x2(0xF, (__m128i)(__v4si){0,1,2,3}), 0,1,0,1));
-
-
-  
-  TEST_CONSTEXPR(match_v8si(_mm256_maskz_broadcast_i32x2(0xFF, (__m128i)(__v4si){0,1,2,3}), 0,1,0,1,0,1,0,1));
-
-
-// unpackhi/lo with full mask behaves like the underlying unpack
-
-  TEST_CONSTEXPR(match_m128d(_mm_mask_unpackhi_pd(_mm_setzero_pd(), 0x3, (__m128d)(__v2df){1.0,2.0}, (__m128d)(__v2df){3.0,4.0}), 2.0,4.0));
-  TEST_CONSTEXPR(match_m128d(_mm_mask_unpacklo_pd(_mm_setzero_pd(), 0x3, (__m128d)(__v2df){1.0,2.0}, (__m128d)(__v2df){3.0,4.0}), 1.0,3.0));
-
-
-  TEST_CONSTEXPR(match_m256d(_mm256_mask_unpackhi_pd(_mm256_setzero_pd(), 0xFF, (__m256d)(__v4df){1.0,2.0,3.0,4.0}, (__m256d)(__v4df){5.0,6.0,7.0,8.0}), 2.0,6.0,4.0,8.0));
-  TEST_CONSTEXPR(match_m256d(_mm256_mask_unpacklo_pd(_mm256_setzero_pd(), 0xFF, (__m256d)(__v4df){1.0,2.0,3.0,4.0}, (__m256d)(__v4df){5.0,6.0,7.0,8.0}), 1.0,5.0,3.0,7.0));
-
-
-// movehdup/moveldup with full mask equals the underlying *_move*dup
-
-  TEST_CONSTEXPR(match_m128(_mm_mask_movehdup_ps(_mm_setzero_ps(), 0xF, (__m128)(__v4sf){1.f,2.f,3.f,4.f}), 2.f,2.f,4.f,4.f));
-  TEST_CONSTEXPR(match_m128(_mm_mask_moveldup_ps(_mm_setzero_ps(), 0xF, (__m128)(__v4sf){1.f,2.f,3.f,4.f}), 1.f,1.f,3.f,3.f));
-
-
-  TEST_CONSTEXPR(match_m256(_mm256_mask_movehdup_ps(_mm256_setzero_ps(), 0xFF, (__m256)(__v8sf){1,2,3,4,5,6,7,8}), 2,2,4,4,6,6,8,8));
-  TEST_CONSTEXPR(match_m256(_mm256_mask_moveldup_ps(_mm256_setzero_ps(), 0xFF, (__m256)(__v8sf){1,2,3,4,5,6,7,8}), 1,1,3,3,5,5,7,7));
-
-
 __mmask8 test_mm_cmpeq_epu32_mask(__m128i __a, __m128i __b) {
   // CHECK-LABEL: test_mm_cmpeq_epu32_mask
   // CHECK: icmp eq <4 x i32> %{{.*}}, %{{.*}}
@@ -7675,6 +7631,8 @@ __m128d test_mm_mask_unpackhi_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d
   return _mm_mask_unpackhi_pd(__W, __U, __A, __B); 
 }
 
+TEST_CONSTEXPR(match_m128d(_mm_mask_unpackhi_pd(_mm_setzero_pd(), 0x3, (__m128d)(__v2df){1.0,2.0}, (__m128d)(__v2df){3.0,4.0}), 2.0,4.0));
+
 __m128d test_mm_maskz_unpackhi_pd(__mmask8 __U, __m128d __A, __m128d __B) {
   // CHECK-LABEL: test_mm_maskz_unpackhi_pd
   // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> <i32 1, i32 3>
@@ -7689,6 +7647,8 @@ __m256d test_mm256_mask_unpackhi_pd(__m256d __W, __mmask8 __U, __m256d __A, __m2
   return _mm256_mask_unpackhi_pd(__W, __U, __A, __B); 
 }
 
+TEST_CONSTEXPR(match_m256d(_mm256_mask_unpackhi_pd(_mm256_setzero_pd(), 0xFF, (__m256d)(__v4df){1.0,2.0,3.0,4.0}, (__m256d)(__v4df){5.0,6.0,7.0,8.0}), 2.0,6.0,4.0,8.0));
+
 __m256d test_mm256_maskz_unpackhi_pd(__mmask8 __U, __m256d __A, __m256d __B) {
   // CHECK-LABEL: test_mm256_maskz_unpackhi_pd
   // CHECK: shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
@@ -7731,6 +7691,8 @@ __m128d test_mm_mask_unpacklo_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d
   return _mm_mask_unpacklo_pd(__W, __U, __A, __B); 
 }
 
+TEST_CONSTEXPR(match_m128d(_mm_mask_unpacklo_pd(_mm_setzero_pd(), 0x3, (__m128d)(__v2df){1.0,2.0}, (__m128d)(__v2df){3.0,4.0}), 1.0,3.0));
+
 __m128d test_mm_maskz_unpacklo_pd(__mmask8 __U, __m128d __A, __m128d __B) {
   // CHECK-LABEL: test_mm_maskz_unpacklo_pd
   // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> <i32 0, i32 2>
@@ -7745,6 +7707,8 @@ __m256d test_mm256_mask_unpacklo_pd(__m256d __W, __mmask8 __U, __m256d __A, __m2
   return _mm256_mask_unpacklo_pd(__W, __U, __A, __B); 
 }
 
+TEST_CONSTEXPR(match_m256d(_mm256_mask_unpacklo_pd(_mm256_setzero_pd(), 0xFF, (__m256d)(__v4df){1.0,2.0,3.0,4.0}, (__m256d)(__v4df){5.0,6.0,7.0,8.0}), 1.0,5.0,3.0,7.0));
+
 __m256d test_mm256_maskz_unpacklo_pd(__mmask8 __U, __m256d __A, __m256d __B) {
   // CHECK-LABEL: test_mm256_maskz_unpacklo_pd
   // CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
@@ -10380,6 +10344,8 @@ __m128 test_mm_mask_movehdup_ps(__m128 __W, __mmask8 __U, __m128 __A) {
   return _mm_mask_movehdup_ps(__W, __U, __A);
 }
 
+TEST_CONSTEXPR(match_m128(_mm_mask_movehdup_ps(_mm_setzero_ps(), 0xF, (__m128)(__v4sf){1.f,2.f,3.f,4.f}), 2.f,2.f,4.f,4.f));
+
 __m128 test_mm_maskz_movehdup_ps(__mmask8 __U, __m128 __A) {
   // CHECK-LABEL: test_mm_maskz_movehdup_ps
   // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
@@ -10394,6 +10360,8 @@ __m256 test_mm256_mask_movehdup_ps(__m256 __W, __mmask8 __U, __m256 __A) {
   return _mm256_mask_movehdup_ps(__W, __U, __A);
 }
 
+TEST_CONSTEXPR(match_m256(_mm256_mask_movehdup_ps(_mm256_setzero_ps(), 0xFF, (__m256)(__v8sf){1,2,3,4,5,6,7,8}), 2,2,4,4,6,6,8,8));
+
 __m256 test_mm256_maskz_movehdup_ps(__mmask8 __U, __m256 __A) {
   // CHECK-LABEL: test_mm256_maskz_movehdup_ps
   // CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7>
@@ -10408,6 +10376,8 @@ __m128 test_mm_mask_moveldup_ps(__m128 __W, __mmask8 __U, __m128 __A) {
   return _mm_mask_moveldup_ps(__W, __U, __A);
 }
 
+TEST_CONSTEXPR(match_m128(_mm_mask_moveldup_ps(_mm_setzero_ps(), 0xF, (__m128)(__v4sf){1.f,2.f,3.f,4.f}), 1.f,1.f,3.f,3.f));
+
 __m128 test_mm_maskz_moveldup_ps(__mmask8 __U, __m128 __A) {
   // CHECK-LABEL: test_mm_maskz_moveldup_ps
   // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
@@ -10422,6 +10392,8 @@ __m256 test_mm256_mask_moveldup_ps(__m256 __W, __mmask8 __U, __m256 __A) {
   return _mm256_mask_moveldup_ps(__W, __U, __A);
 }
 
+TEST_CONSTEXPR(match_m256(_mm256_mask_moveldup_ps(_mm256_setzero_ps(), 0xFF, (__m256)(__v8sf){1,2,3,4,5,6,7,8}), 1,1,3,3,5,5,7,7));
+
 __m256 test_mm256_maskz_moveldup_ps(__mmask8 __U, __m256 __A) {
   // CHECK-LABEL: test_mm256_maskz_moveldup_ps
   // CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
diff --git a/clang/test/CodeGen/X86/avx512vldq-builtins.c b/clang/test/CodeGen/X86/avx512vldq-builtins.c
index 938845799acf5..a8708c5fe4349 100644
--- a/clang/test/CodeGen/X86/avx512vldq-builtins.c
+++ b/clang/test/CodeGen/X86/avx512vldq-builtins.c
@@ -987,6 +987,8 @@ __m256 test_mm256_mask_broadcast_f32x2(__m256 __O, __mmask8 __M, __m128 __A) {
   return _mm256_mask_broadcast_f32x2(__O, __M, __A); 
 }
 
+TEST_CONSTEXPR(match_m256(_mm256_mask_broadcast_f32x2(_mm256_setzero_ps(), 0xFF, (__m128)(__v4sf){1.f,2.f,3.f,4.f}), 1.f,2.f,1.f,2.f,1.f,2.f,1.f,2.f));
+
 __m256 test_mm256_maskz_broadcast_f32x2(__mmask8 __M, __m128 __A) {
   // CHECK-LABEL: test_mm256_maskz_broadcast_f32x2
   // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
@@ -994,6 +996,8 @@ __m256 test_mm256_maskz_broadcast_f32x2(__mmask8 __M, __m128 __A) {
   return _mm256_maskz_broadcast_f32x2(__M, __A); 
 }
 
+TEST_CONSTEXPR(match_m256(_mm256_maskz_broadcast_f32x2(0xFF, (__m128)(__v4sf){1.f,2.f,3.f,4.f}), 1.f,2.f,1.f,2.f,1.f,2.f,1.f,2.f));
+
 __m256d test_mm256_broadcast_f64x2(double const* __A) {
   // CHECK-LABEL: test_mm256_broadcast_f64x2
   // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
@@ -1008,6 +1012,8 @@ __m256d test_mm256_mask_broadcast_f64x2(__m256d __O, __mmask8 __M, double const*
   return _mm256_mask_broadcast_f64x2(__O, __M, _mm_loadu_pd(__A)); 
 }
 
+TEST_CONSTEXPR(match_m256d(_mm256_mask_broadcast_f64x2(_mm256_setzero_pd(), 0xFF, (__m128d)(__v2df){1.0,2.0}), 1.0,2.0,1.0,2.0));
+
 __m256d test_mm256_maskz_broadcast_f64x2(__mmask8 __M, double const* __A) {
   // CHECK-LABEL: test_mm256_maskz_broadcast_f64x2
   // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
@@ -1015,6 +1021,8 @@ __m256d test_mm256_maskz_broadcast_f64x2(__mmask8 __M, double const* __A) {
   return _mm256_maskz_broadcast_f64x2(__M, _mm_loadu_pd(__A)); 
 }
 
+TEST_CONSTEXPR(match_m256d(_mm256_maskz_broadcast_f64x2(0xFF, (__m128d)(__v2df){1.0,2.0}), 1.0,2.0,1.0,2.0));
+
 __m128i test_mm_broadcast_i32x2(__m128i __A) {
   // CHECK-LABEL: test_mm_broadcast_i32x2
   // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
@@ -1029,6 +1037,8 @@ __m128i test_mm_mask_broadcast_i32x2(__m128i __O, __mmask8 __M, __m128i __A) {
   return _mm_mask_broadcast_i32x2(__O, __M, __A); 
 }
 
+TEST_CONSTEXPR(match_v4si(_mm_mask_broadcast_i32x2(_mm_setzero_si128(), 0xF, (__m128i)(__v4si){0,1,2,3}), 0,1,0,1));
+
 __m128i test_mm_maskz_broadcast_i32x2(__mmask8 __M, __m128i __A) {
   // CHECK-LABEL: test_mm_maskz_broadcast_i32x2
   // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
@@ -1036,6 +1046,8 @@ __m128i test_mm_maskz_broadcast_i32x2(__mmask8 __M, __m128i __A) {
   return _mm_maskz_broadcast_i32x2(__M, __A); 
 }
 
+TEST_CONSTEXPR(match_v4si(_mm_maskz_broadcast_i32x2(0xF, (__m128i)(__v4si){0,1,2,3}), 0,1,0,1));
+
 __m256i test_mm256_broadcast_i32x2(__m128i __A) {
   // CHECK-LABEL: test_mm256_broadcast_i32x2
   // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
@@ -1057,6 +1069,8 @@ __m256i test_mm256_maskz_broadcast_i32x2(__mmask8 __M, __m128i __A) {
   return _mm256_maskz_broadcast_i32x2(__M, __A); 
 }
 
+TEST_CONSTEXPR(match_v8si(_mm256_maskz_broadcast_i32x2(0xFF, (__m128i)(__v4si){0,1,2,3}), 0,1,0,1,0,1,0,1));
+
 __m256i test_mm256_broadcast_i64x2(__m128i const* __A) {
   // CHECK-LABEL: test_mm256_broadcast_i64x2
   // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
@@ -1078,6 +1092,8 @@ __m256i test_mm256_maskz_broadcast_i64x2(__mmask8 __M, __m128i const* __A) {
   return _mm256_maskz_broadcast_i64x2(__M, _mm_loadu_si128(__A)); 
 }
 
+TEST_CONSTEXPR(match_v4di(_mm256_maskz_broadcast_i64x2(0xF, (__m128i)(__v2di){1,2}), 1,2,1,2));
+
 __m128d test_mm256_extractf64x2_pd(__m256d __A) {
   // CHECK-LABEL: test_mm256_extractf64x2_pd
   // CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> poison, <2 x i32> <i32 2, i32 3>

>From ccc654a658febe990610d96b0f65e02c8a9511c9 Mon Sep 17 00:00:00 2001
From: ahmed <ahmednour.mohamed2012 at gmail.com>
Date: Thu, 9 Oct 2025 13:13:13 +0300
Subject: [PATCH 08/12] chore: Move test func from avx512f-builtins to
 corresponding test file

---
 clang/test/CodeGen/X86/avx512dq-builtins.c |  6 ++++++
 clang/test/CodeGen/X86/avx512f-builtins.c  | 11 -----------
 2 files changed, 6 insertions(+), 11 deletions(-)

diff --git a/clang/test/CodeGen/X86/avx512dq-builtins.c b/clang/test/CodeGen/X86/avx512dq-builtins.c
index d1e980601c462..d25921f852b95 100644
--- a/clang/test/CodeGen/X86/avx512dq-builtins.c
+++ b/clang/test/CodeGen/X86/avx512dq-builtins.c
@@ -1329,6 +1329,8 @@ __m512d test_mm512_mask_broadcast_f64x2(__m512d __O, __mmask8 __M, double const*
   return _mm512_mask_broadcast_f64x2(__O, __M, _mm_loadu_pd(__A)); 
 }
 
+TEST_CONSTEXPR(match_m512d(_mm512_mask_broadcast_f64x2(_mm512_setzero_pd(), 0xFF, (__m128d)(__v2df){1,2}), 1,2,1,2,1,2,1,2));
+
 __m512d test_mm512_maskz_broadcast_f64x2(__mmask8 __M, double const* __A) {
   // CHECK-LABEL: test_mm512_maskz_broadcast_f64x2
   // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
@@ -1336,6 +1338,8 @@ __m512d test_mm512_maskz_broadcast_f64x2(__mmask8 __M, double const* __A) {
   return _mm512_maskz_broadcast_f64x2(__M, _mm_loadu_pd(__A)); 
 }
 
+TEST_CONSTEXPR(match_m512d(_mm512_maskz_broadcast_f64x2(0xFF, (__m128d)(__v2df){1,2}), 1,2,1,2,1,2,1,2));
+
 __m512i test_mm512_broadcast_i32x2(__m128i __A) {
   // CHECK-LABEL: test_mm512_broadcast_i32x2
   // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
@@ -1379,6 +1383,8 @@ __m512i test_mm512_maskz_broadcast_i32x8(__mmask16 __M, __m256i const* __A) {
   return _mm512_maskz_broadcast_i32x8(__M, _mm256_loadu_si256(__A)); 
 }
 
+TEST_CONSTEXPR(match_v16si(_mm512_maskz_broadcast_i32x8(0xFFFF, _mm256_set1_epi32(9)), 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9));
+
 __m512i test_mm512_broadcast_i64x2(__m128i const* __A) {
   // CHECK-LABEL: test_mm512_broadcast_i64x2
   // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
diff --git a/clang/test/CodeGen/X86/avx512f-builtins.c b/clang/test/CodeGen/X86/avx512f-builtins.c
index d06808631ab4e..011b677f0f182 100644
--- a/clang/test/CodeGen/X86/avx512f-builtins.c
+++ b/clang/test/CodeGen/X86/avx512f-builtins.c
@@ -11,17 +11,6 @@
 #include <immintrin.h>
 #include "builtin_test_helpers.h"
 
-// Constexpr coverage for DQ broadcast features (f64x2/i32x8) that don't have test functions in this file.
-// The corresponding test_mm512_*_broadcast_* functions are in avx512dq-builtins.c.
-TEST_CONSTEXPR(match_m512d(_mm512_mask_broadcast_f64x2(_mm512_setzero_pd(), 0xFF, (__m128d)(__v2df){1,2}),
-  1,2,1,2,1,2,1,2));
-
-TEST_CONSTEXPR(match_m512d(_mm512_maskz_broadcast_f64x2(0xFF, (__m128d)(__v2df){1,2}),
-  1,2,1,2,1,2,1,2));
-
-TEST_CONSTEXPR(match_v16si(_mm512_maskz_broadcast_i32x8(0xFFFF, _mm256_set1_epi32(9)),
-  9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9));
-
 __m512d test_mm512_sqrt_pd(__m512d a)
 {
   // CHECK-LABEL: test_mm512_sqrt_pd

>From f0f7f3f38edfa118ad45871173e6c0b42c3309bc Mon Sep 17 00:00:00 2001
From: ahmed <ahmednour.mohamed2012 at gmail.com>
Date: Wed, 15 Oct 2025 12:08:06 +0300
Subject: [PATCH 09/12] chore: PR Feedback

---
 clang/test/CodeGen/X86/avx512dq-builtins.c | 2 +-
 clang/test/CodeGen/X86/avx512vl-builtins.c | 4 ++++
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/clang/test/CodeGen/X86/avx512dq-builtins.c b/clang/test/CodeGen/X86/avx512dq-builtins.c
index d25921f852b95..9d7966d6dc09a 100644
--- a/clang/test/CodeGen/X86/avx512dq-builtins.c
+++ b/clang/test/CodeGen/X86/avx512dq-builtins.c
@@ -1305,7 +1305,7 @@ __m512 test_mm512_mask_broadcast_f32x8(__m512 __O, __mmask16 __M, float const* _
   // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
   return _mm512_mask_broadcast_f32x8(__O, __M, _mm256_loadu_ps(__A)); 
 }
-TEST_CONSTEXPR(match_m512(_mm512_mask_broadcast_f32x8(_mm512_setzero_ps(), 0xFFFF, _mm256_set1_ps(5.0f)), 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5));
+TEST_CONSTEXPR(match_m512(_mm512_mask_broadcast_f32x8(_mm512_setzero_ps(), 0xAAAA, (__m256)(__v8sf){5.0f,5.0f,5.0f,5.0f,5.0f,5.0f,5.0f,5.0f}), 0,5,0,5,0,5,0,5,0,5,0,5,0,5,0,5));
 
 __m512 test_mm512_maskz_broadcast_f32x8(__mmask16 __M, float const* __A) {
   // CHECK-LABEL: test_mm512_maskz_broadcast_f32x8
diff --git a/clang/test/CodeGen/X86/avx512vl-builtins.c b/clang/test/CodeGen/X86/avx512vl-builtins.c
index 71bd5f4949611..5c7a00d9375ee 100644
--- a/clang/test/CodeGen/X86/avx512vl-builtins.c
+++ b/clang/test/CodeGen/X86/avx512vl-builtins.c
@@ -7700,6 +7700,8 @@ __m128d test_mm_maskz_unpacklo_pd(__mmask8 __U, __m128d __A, __m128d __B) {
   return _mm_maskz_unpacklo_pd(__U, __A, __B); 
 }
 
+TEST_CONSTEXPR(match_m128d(_mm_maskz_unpacklo_pd(0x2, (__m128d)(__v2df){1.0,2.0}, (__m128d)(__v2df){3.0,4.0}), 0.0,3.0));
+
 __m256d test_mm256_mask_unpacklo_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
   // CHECK-LABEL: test_mm256_mask_unpacklo_pd
   // CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
@@ -7716,6 +7718,8 @@ __m256d test_mm256_maskz_unpacklo_pd(__mmask8 __U, __m256d __A, __m256d __B) {
   return _mm256_maskz_unpacklo_pd(__U, __A, __B); 
 }
 
+TEST_CONSTEXPR(match_m256d(_mm256_maskz_unpacklo_pd(0x0A, (__m256d)(__v4df){1.0,2.0,3.0,4.0}, (__m256d)(__v4df){5.0,6.0,7.0,8.0}), 0.0,5.0,0.0,7.0));
+
 __m128 test_mm_mask_unpacklo_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
   // CHECK-LABEL: test_mm_mask_unpacklo_ps
   // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 0, i32 4, i32 1, i32 5>

>From df5211ee5ff9689e44ba96868cc6ed1554a74a60 Mon Sep 17 00:00:00 2001
From: ahmed <ahmednour.mohamed2012 at gmail.com>
Date: Thu, 16 Oct 2025 11:40:52 +0300
Subject: [PATCH 10/12] chore: add missing tests

---
 clang/test/CodeGen/X86/avx512dq-builtins.c   |  4 ++++
 clang/test/CodeGen/X86/avx512f-builtins.c    |  4 ++++
 clang/test/CodeGen/X86/avx512vl-builtins.c   | 12 ++++++++++++
 clang/test/CodeGen/X86/avx512vldq-builtins.c |  2 ++
 4 files changed, 22 insertions(+)

diff --git a/clang/test/CodeGen/X86/avx512dq-builtins.c b/clang/test/CodeGen/X86/avx512dq-builtins.c
index 9d7966d6dc09a..b0e1588dd3dc5 100644
--- a/clang/test/CodeGen/X86/avx512dq-builtins.c
+++ b/clang/test/CodeGen/X86/avx512dq-builtins.c
@@ -1354,6 +1354,8 @@ __m512i test_mm512_mask_broadcast_i32x2(__m512i __O, __mmask16 __M, __m128i __A)
   return _mm512_mask_broadcast_i32x2(__O, __M, __A); 
 }
 
+TEST_CONSTEXPR(match_v16si(_mm512_mask_broadcast_i32x2(_mm512_setzero_si512(), 0xAAAA, (__m128i)(__v4si){0,1,2,3}), 0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1));
+
 __m512i test_mm512_maskz_broadcast_i32x2(__mmask16 __M, __m128i __A) {
   // CHECK-LABEL: test_mm512_maskz_broadcast_i32x2
   // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
@@ -1361,6 +1363,8 @@ __m512i test_mm512_maskz_broadcast_i32x2(__mmask16 __M, __m128i __A) {
   return _mm512_maskz_broadcast_i32x2(__M, __A); 
 }
 
+TEST_CONSTEXPR(match_v16si(_mm512_maskz_broadcast_i32x2(0xAAAA, (__m128i)(__v4si){0,1,2,3}), 0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1));
+
 __m512i test_mm512_broadcast_i32x8(__m256i const* __A) {
   // CHECK-LABEL: test_mm512_broadcast_i32x8
   // CHECK: shufflevector <8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
diff --git a/clang/test/CodeGen/X86/avx512f-builtins.c b/clang/test/CodeGen/X86/avx512f-builtins.c
index 011b677f0f182..a4319aef20a9b 100644
--- a/clang/test/CodeGen/X86/avx512f-builtins.c
+++ b/clang/test/CodeGen/X86/avx512f-builtins.c
@@ -10934,6 +10934,8 @@ __m512i test_mm512_maskz_set1_epi32(__mmask16 __M, int __A)
     return _mm512_maskz_set1_epi32(__M, __A);
 }
 
+TEST_CONSTEXPR(match_v16si(_mm512_maskz_set1_epi32(0xAAAA, 19), 19, 0, 19, 0, 19, 0, 19, 0, 19, 0, 19, 0, 19, 0, 19, 0));
+
 
 __m512i test_mm512_set_epi8(char e63, char e62, char e61, char e60, char e59,
     char e58, char e57, char e56, char e55, char e54, char e53, char e52,
@@ -11170,6 +11172,8 @@ __m512i test_mm512_maskz_set1_epi64 (__mmask8 __M, long long __A)
   return _mm512_maskz_set1_epi64 (__M, __A);
 }
 
+TEST_CONSTEXPR(match_v8di(_mm512_maskz_set1_epi64(0xAA, 23), 0, 23, 0, 23, 0, 23, 0, 23));
+
 
 __m512i test_mm512_set_epi64 (long long __A, long long __B, long long __C,
                               long long __D, long long __E, long long __F,
diff --git a/clang/test/CodeGen/X86/avx512vl-builtins.c b/clang/test/CodeGen/X86/avx512vl-builtins.c
index 5c7a00d9375ee..13d23fbe88c83 100644
--- a/clang/test/CodeGen/X86/avx512vl-builtins.c
+++ b/clang/test/CodeGen/X86/avx512vl-builtins.c
@@ -7214,6 +7214,8 @@ __m128i test_mm_maskz_set1_epi32(__mmask8 __M) {
   return _mm_maskz_set1_epi32(__M, 5); 
 }
 
+TEST_CONSTEXPR(match_v4si(_mm_maskz_set1_epi32(0xA, 11), 11, 0, 11, 0));
+
 __m256i test_mm256_mask_set1_epi32(__m256i __O, __mmask8 __M) {
   // CHECK-LABEL: test_mm256_mask_set1_epi32
   // CHECK:  insertelement <8 x i32> poison, i32 %{{.*}}, i32 0
@@ -7244,6 +7246,8 @@ __m256i test_mm256_maskz_set1_epi32(__mmask8 __M) {
   return _mm256_maskz_set1_epi32(__M, 5); 
 }
 
+TEST_CONSTEXPR(match_v8si(_mm256_maskz_set1_epi32(0xAA, 13), 13, 0, 13, 0, 13, 0, 13, 0));
+
 __m128i test_mm_mask_set1_epi64(__m128i __O, __mmask8 __M, long long __A) {
   // CHECK-LABEL: test_mm_mask_set1_epi64
   // CHECK: insertelement <2 x i64> poison, i64 %{{.*}}, i32 0
@@ -7264,6 +7268,8 @@ __m128i test_mm_maskz_set1_epi64(__mmask8 __M, long long __A) {
   return _mm_maskz_set1_epi64(__M, __A); 
 }
 
+TEST_CONSTEXPR(match_v2di(_mm_maskz_set1_epi64(0x2, 15), 0, 15));
+
 __m256i test_mm256_mask_set1_epi64(__m256i __O, __mmask8 __M, long long __A) {
   // CHECK-LABEL: test_mm256_mask_set1_epi64
   // CHECK: insertelement <4 x i64> poison, i64 %{{.*}}, i32 0
@@ -7288,6 +7294,8 @@ __m256i test_mm256_maskz_set1_epi64(__mmask8 __M, long long __A) {
   return _mm256_maskz_set1_epi64(__M, __A); 
 }
 
+TEST_CONSTEXPR(match_v4di(_mm256_maskz_set1_epi64(0xA, 17), 0, 17, 0, 17));
+
 __m128d test_mm_fixupimm_pd(__m128d __A, __m128d __B, __m128i __C) {
   // CHECK-LABEL: test_mm_fixupimm_pd
   // CHECK: @llvm.x86.avx512.mask.fixupimm.pd.128
@@ -8059,6 +8067,8 @@ __m128i test_mm_mask_unpackhi_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m1
   return _mm_mask_unpackhi_epi32(__W, __U, __A, __B); 
 }
 
+TEST_CONSTEXPR(match_v4si(_mm_mask_unpackhi_epi32(_mm_setzero_si128(), 0xA, (__m128i)(__v4si){0,1,2,3}, (__m128i)(__v4si){4,5,6,7}), 0,2,0,3));
+
 __m128i test_mm_maskz_unpackhi_epi32(__mmask8 __U, __m128i __A, __m128i __B) {
   // CHECK-LABEL: test_mm_maskz_unpackhi_epi32
   // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
@@ -8066,6 +8076,8 @@ __m128i test_mm_maskz_unpackhi_epi32(__mmask8 __U, __m128i __A, __m128i __B) {
   return _mm_maskz_unpackhi_epi32(__U, __A, __B); 
 }
 
+TEST_CONSTEXPR(match_v4si(_mm_maskz_unpackhi_epi32(0x5, (__m128i)(__v4si){0,1,2,3}, (__m128i)(__v4si){4,5,6,7}), 0,2,0,7));
+
 __m256i test_mm256_mask_unpackhi_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) {
   // CHECK-LABEL: test_mm256_mask_unpackhi_epi32
   // CHECK: shufflevector <8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15>
diff --git a/clang/test/CodeGen/X86/avx512vldq-builtins.c b/clang/test/CodeGen/X86/avx512vldq-builtins.c
index a8708c5fe4349..6cefa73a9aba0 100644
--- a/clang/test/CodeGen/X86/avx512vldq-builtins.c
+++ b/clang/test/CodeGen/X86/avx512vldq-builtins.c
@@ -1062,6 +1062,8 @@ __m256i test_mm256_mask_broadcast_i32x2(__m256i __O, __mmask8 __M, __m128i __A)
   return _mm256_mask_broadcast_i32x2(__O, __M, __A); 
 }
 
+TEST_CONSTEXPR(match_v8si(_mm256_mask_broadcast_i32x2(_mm256_setzero_si256(), 0xAA, (__m128i)(__v4si){0,1,2,3}), 0,1,0,1,0,1,0,1));
+
 __m256i test_mm256_maskz_broadcast_i32x2(__mmask8 __M, __m128i __A) {
   // CHECK-LABEL: test_mm256_maskz_broadcast_i32x2
   // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>

>From 89ad0c344bcf70a5b6891f9e11764ff97c7c0d43 Mon Sep 17 00:00:00 2001
From: ahmed <ahmednour.mohamed2012 at gmail.com>
Date: Thu, 16 Oct 2025 12:08:14 +0300
Subject: [PATCH 11/12] refactor: update tests

---
 clang/test/CodeGen/X86/avx512f-builtins.c  | 2 +-
 clang/test/CodeGen/X86/avx512vl-builtins.c | 8 ++++----
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/clang/test/CodeGen/X86/avx512f-builtins.c b/clang/test/CodeGen/X86/avx512f-builtins.c
index a4319aef20a9b..a70ff048f1a32 100644
--- a/clang/test/CodeGen/X86/avx512f-builtins.c
+++ b/clang/test/CodeGen/X86/avx512f-builtins.c
@@ -10934,7 +10934,7 @@ __m512i test_mm512_maskz_set1_epi32(__mmask16 __M, int __A)
     return _mm512_maskz_set1_epi32(__M, __A);
 }
 
-TEST_CONSTEXPR(match_v16si(_mm512_maskz_set1_epi32(0xAAAA, 19), 19, 0, 19, 0, 19, 0, 19, 0, 19, 0, 19, 0, 19, 0, 19, 0));
+TEST_CONSTEXPR(match_v16si(_mm512_maskz_set1_epi32(0xAAAA, 19), 0,19,0,19,0,19,0,19,0,19,0,19,0,19,0,19));
 
 
 __m512i test_mm512_set_epi8(char e63, char e62, char e61, char e60, char e59,
diff --git a/clang/test/CodeGen/X86/avx512vl-builtins.c b/clang/test/CodeGen/X86/avx512vl-builtins.c
index 13d23fbe88c83..36b8d399d3647 100644
--- a/clang/test/CodeGen/X86/avx512vl-builtins.c
+++ b/clang/test/CodeGen/X86/avx512vl-builtins.c
@@ -7214,7 +7214,7 @@ __m128i test_mm_maskz_set1_epi32(__mmask8 __M) {
   return _mm_maskz_set1_epi32(__M, 5); 
 }
 
-TEST_CONSTEXPR(match_v4si(_mm_maskz_set1_epi32(0xA, 11), 11, 0, 11, 0));
+TEST_CONSTEXPR(match_v4si(_mm_maskz_set1_epi32(0xA, 11), 0, 11, 0, 11));
 
 __m256i test_mm256_mask_set1_epi32(__m256i __O, __mmask8 __M) {
   // CHECK-LABEL: test_mm256_mask_set1_epi32
@@ -7246,7 +7246,7 @@ __m256i test_mm256_maskz_set1_epi32(__mmask8 __M) {
   return _mm256_maskz_set1_epi32(__M, 5); 
 }
 
-TEST_CONSTEXPR(match_v8si(_mm256_maskz_set1_epi32(0xAA, 13), 13, 0, 13, 0, 13, 0, 13, 0));
+TEST_CONSTEXPR(match_v8si(_mm256_maskz_set1_epi32(0xAA, 13), 0, 13, 0, 13, 0, 13, 0, 13));
 
 __m128i test_mm_mask_set1_epi64(__m128i __O, __mmask8 __M, long long __A) {
   // CHECK-LABEL: test_mm_mask_set1_epi64
@@ -8067,7 +8067,7 @@ __m128i test_mm_mask_unpackhi_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m1
   return _mm_mask_unpackhi_epi32(__W, __U, __A, __B); 
 }
 
-TEST_CONSTEXPR(match_v4si(_mm_mask_unpackhi_epi32(_mm_setzero_si128(), 0xA, (__m128i)(__v4si){0,1,2,3}, (__m128i)(__v4si){4,5,6,7}), 0,2,0,3));
+TEST_CONSTEXPR(match_v4si(_mm_mask_unpackhi_epi32(_mm_setzero_si128(), 0xA, (__m128i)(__v4si){0,1,2,3}, (__m128i)(__v4si){4,5,6,7}), 0,6,0,7));
 
 __m128i test_mm_maskz_unpackhi_epi32(__mmask8 __U, __m128i __A, __m128i __B) {
   // CHECK-LABEL: test_mm_maskz_unpackhi_epi32
@@ -8076,7 +8076,7 @@ __m128i test_mm_maskz_unpackhi_epi32(__mmask8 __U, __m128i __A, __m128i __B) {
   return _mm_maskz_unpackhi_epi32(__U, __A, __B); 
 }
 
-TEST_CONSTEXPR(match_v4si(_mm_maskz_unpackhi_epi32(0x5, (__m128i)(__v4si){0,1,2,3}, (__m128i)(__v4si){4,5,6,7}), 0,2,0,7));
+TEST_CONSTEXPR(match_v4si(_mm_maskz_unpackhi_epi32(0x5, (__m128i)(__v4si){0,1,2,3}, (__m128i)(__v4si){4,5,6,7}), 2,0,3,0));
 
 __m256i test_mm256_mask_unpackhi_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) {
   // CHECK-LABEL: test_mm256_mask_unpackhi_epi32

>From cfea576780511b404ca30e34636644bf79cb0948 Mon Sep 17 00:00:00 2001
From: ahmed <ahmednour.mohamed2012 at gmail.com>
Date: Thu, 16 Oct 2025 12:14:35 +0300
Subject: [PATCH 12/12] feat: Use partial masks

---
 clang/test/CodeGen/X86/avx512dq-builtins.c   | 10 +++++-----
 clang/test/CodeGen/X86/avx512f-builtins.c    | 10 +++++-----
 clang/test/CodeGen/X86/avx512vl-builtins.c   | 14 +++++++-------
 clang/test/CodeGen/X86/avx512vldq-builtins.c | 10 +++++-----
 4 files changed, 22 insertions(+), 22 deletions(-)

diff --git a/clang/test/CodeGen/X86/avx512dq-builtins.c b/clang/test/CodeGen/X86/avx512dq-builtins.c
index b0e1588dd3dc5..1b099594c88fa 100644
--- a/clang/test/CodeGen/X86/avx512dq-builtins.c
+++ b/clang/test/CodeGen/X86/avx512dq-builtins.c
@@ -1313,7 +1313,7 @@ __m512 test_mm512_maskz_broadcast_f32x8(__mmask16 __M, float const* __A) {
   // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
   return _mm512_maskz_broadcast_f32x8(__M, _mm256_loadu_ps(__A)); 
 }
-TEST_CONSTEXPR(match_m512(_mm512_maskz_broadcast_f32x8(0xFFFF, _mm256_set1_ps(7.0f)), 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7));
+TEST_CONSTEXPR(match_m512(_mm512_maskz_broadcast_f32x8(0xAAAA, _mm256_set1_ps(7.0f)), 0,7,0,7,0,7,0,7,0,7,0,7,0,7,0,7));
 
 __m512d test_mm512_broadcast_f64x2(double const* __A) {
   // CHECK-LABEL: test_mm512_broadcast_f64x2
@@ -1329,7 +1329,7 @@ __m512d test_mm512_mask_broadcast_f64x2(__m512d __O, __mmask8 __M, double const*
   return _mm512_mask_broadcast_f64x2(__O, __M, _mm_loadu_pd(__A)); 
 }
 
-TEST_CONSTEXPR(match_m512d(_mm512_mask_broadcast_f64x2(_mm512_setzero_pd(), 0xFF, (__m128d)(__v2df){1,2}), 1,2,1,2,1,2,1,2));
+TEST_CONSTEXPR(match_m512d(_mm512_mask_broadcast_f64x2(_mm512_setzero_pd(), 0xAA, (__m128d)(__v2df){1,2}), 0,2,0,2,0,2,0,2));
 
 __m512d test_mm512_maskz_broadcast_f64x2(__mmask8 __M, double const* __A) {
   // CHECK-LABEL: test_mm512_maskz_broadcast_f64x2
@@ -1338,7 +1338,7 @@ __m512d test_mm512_maskz_broadcast_f64x2(__mmask8 __M, double const* __A) {
   return _mm512_maskz_broadcast_f64x2(__M, _mm_loadu_pd(__A)); 
 }
 
-TEST_CONSTEXPR(match_m512d(_mm512_maskz_broadcast_f64x2(0xFF, (__m128d)(__v2df){1,2}), 1,2,1,2,1,2,1,2));
+TEST_CONSTEXPR(match_m512d(_mm512_maskz_broadcast_f64x2(0xAA, (__m128d)(__v2df){1,2}), 0,2,0,2,0,2,0,2));
 
 __m512i test_mm512_broadcast_i32x2(__m128i __A) {
   // CHECK-LABEL: test_mm512_broadcast_i32x2
@@ -1378,7 +1378,7 @@ __m512i test_mm512_mask_broadcast_i32x8(__m512i __O, __mmask16 __M, __m256i cons
   // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
   return _mm512_mask_broadcast_i32x8(__O, __M, _mm256_loadu_si256(__A)); 
 }
-TEST_CONSTEXPR(match_v16si(_mm512_mask_broadcast_i32x8(_mm512_setzero_si512(), 0xFFFF, _mm256_set1_epi32(8)), 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8));
+TEST_CONSTEXPR(match_v16si(_mm512_mask_broadcast_i32x8(_mm512_setzero_si512(), 0xAAAA, _mm256_set1_epi32(8)), 0,8,0,8,0,8,0,8,0,8,0,8,0,8,0,8));
 
 __m512i test_mm512_maskz_broadcast_i32x8(__mmask16 __M, __m256i const* __A) {
   // CHECK-LABEL: test_mm512_maskz_broadcast_i32x8
@@ -1387,7 +1387,7 @@ __m512i test_mm512_maskz_broadcast_i32x8(__mmask16 __M, __m256i const* __A) {
   return _mm512_maskz_broadcast_i32x8(__M, _mm256_loadu_si256(__A)); 
 }
 
-TEST_CONSTEXPR(match_v16si(_mm512_maskz_broadcast_i32x8(0xFFFF, _mm256_set1_epi32(9)), 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9));
+TEST_CONSTEXPR(match_v16si(_mm512_maskz_broadcast_i32x8(0xAAAA, _mm256_set1_epi32(9)), 0,9,0,9,0,9,0,9,0,9,0,9,0,9,0,9));
 
 __m512i test_mm512_broadcast_i64x2(__m128i const* __A) {
   // CHECK-LABEL: test_mm512_broadcast_i64x2
diff --git a/clang/test/CodeGen/X86/avx512f-builtins.c b/clang/test/CodeGen/X86/avx512f-builtins.c
index a70ff048f1a32..3deaf8efc9632 100644
--- a/clang/test/CodeGen/X86/avx512f-builtins.c
+++ b/clang/test/CodeGen/X86/avx512f-builtins.c
@@ -6836,7 +6836,7 @@ __m512 test_mm512_mask_broadcast_f32x4(__m512 __O, __mmask16 __M, float const* _
   return _mm512_mask_broadcast_f32x4(__O, __M, _mm_loadu_ps(__A)); 
 }
 
-TEST_CONSTEXPR(match_m512(_mm512_mask_broadcast_f32x4(_mm512_setzero_ps(), 0xFFFF, (__m128)(__v4sf){1,2,3,4}), 1,2,3,4,1,2,3,4,1,2,3,4,1,2,3,4));
+TEST_CONSTEXPR(match_m512(_mm512_mask_broadcast_f32x4(_mm512_setzero_ps(), 0xAAAA, (__m128)(__v4sf){1,2,3,4}), 0,2,0,4,0,2,0,4,0,2,0,4,0,2,0,4));
 
 __m512 test_mm512_maskz_broadcast_f32x4(__mmask16 __M, float const* __A) {
   // CHECK-LABEL: test_mm512_maskz_broadcast_f32x4
@@ -6845,7 +6845,7 @@ __m512 test_mm512_maskz_broadcast_f32x4(__mmask16 __M, float const* __A) {
   return _mm512_maskz_broadcast_f32x4(__M, _mm_loadu_ps(__A)); 
 }
 
-TEST_CONSTEXPR(match_m512(_mm512_maskz_broadcast_f32x4(0xFFFF, (__m128)(__v4sf){1,2,3,4}), 1,2,3,4,1,2,3,4,1,2,3,4,1,2,3,4));
+TEST_CONSTEXPR(match_m512(_mm512_maskz_broadcast_f32x4(0xAAAA, (__m128)(__v4sf){1,2,3,4}), 0,2,0,4,0,2,0,4,0,2,0,4,0,2,0,4));
 
 __m512d test_mm512_broadcast_f64x4(double const* __A) {
   // CHECK-LABEL: test_mm512_broadcast_f64x4
@@ -6889,7 +6889,7 @@ __m512i test_mm512_maskz_broadcast_i32x4(__mmask16 __M, __m128i const* __A) {
   return _mm512_maskz_broadcast_i32x4(__M, _mm_loadu_si128(__A)); 
 }
 
-TEST_CONSTEXPR(match_v16si(_mm512_maskz_broadcast_i32x4(0xFFFF, (__m128i)(__v4si){0,1,2,3}), 0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3));
+TEST_CONSTEXPR(match_v16si(_mm512_maskz_broadcast_i32x4(0xAAAA, (__m128i)(__v4si){0,1,2,3}), 0,1,0,3,0,1,0,3,0,1,0,3,0,1,0,3));
 
 __m512i test_mm512_broadcast_i64x4(__m256i const* __A) {
   // CHECK-LABEL: test_mm512_broadcast_i64x4
@@ -10909,7 +10909,7 @@ __m512i test_mm512_mask_set1_epi32 (__m512i __O, __mmask16 __M, int __A)
   return _mm512_mask_set1_epi32 ( __O, __M, __A);
 }
 
-TEST_CONSTEXPR(match_v16si(_mm512_mask_set1_epi32(_mm512_setzero_si512(), 0xFFFF, 13), 13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13));
+TEST_CONSTEXPR(match_v16si(_mm512_mask_set1_epi32(_mm512_setzero_si512(), 0xAAAA, 13), 0,13,0,13,0,13,0,13,0,13,0,13,0,13,0,13));
 
 __m512i test_mm512_maskz_set1_epi32(__mmask16 __M, int __A)
 {     
@@ -11155,7 +11155,7 @@ __m512i test_mm512_mask_set1_epi64 (__m512i __O, __mmask8 __M, long long __A)
   return _mm512_mask_set1_epi64 (__O, __M, __A);
 }
 
-TEST_CONSTEXPR(match_v8di(_mm512_mask_set1_epi64(_mm512_setzero_si512(), 0xFF, 21), 21,21,21,21,21,21,21,21));
+TEST_CONSTEXPR(match_v8di(_mm512_mask_set1_epi64(_mm512_setzero_si512(), 0xAA, 21), 0,21,0,21,0,21,0,21));
 
 __m512i test_mm512_maskz_set1_epi64 (__mmask8 __M, long long __A)
 {
diff --git a/clang/test/CodeGen/X86/avx512vl-builtins.c b/clang/test/CodeGen/X86/avx512vl-builtins.c
index 36b8d399d3647..9b6bfea918191 100644
--- a/clang/test/CodeGen/X86/avx512vl-builtins.c
+++ b/clang/test/CodeGen/X86/avx512vl-builtins.c
@@ -7230,7 +7230,7 @@ __m256i test_mm256_mask_set1_epi32(__m256i __O, __mmask8 __M) {
   return _mm256_mask_set1_epi32(__O, __M, 5); 
 }
 
-TEST_CONSTEXPR(match_v8si(_mm256_mask_set1_epi32(_mm256_setzero_si256(), 0xFF, 5), 5, 5, 5, 5, 5, 5, 5, 5));
+TEST_CONSTEXPR(match_v8si(_mm256_mask_set1_epi32(_mm256_setzero_si256(), 0xAA, 5), 0, 5, 0, 5, 0, 5, 0, 5));
 
 __m256i test_mm256_maskz_set1_epi32(__mmask8 __M) {
   // CHECK-LABEL: test_mm256_maskz_set1_epi32
@@ -7655,7 +7655,7 @@ __m256d test_mm256_mask_unpackhi_pd(__m256d __W, __mmask8 __U, __m256d __A, __m2
   return _mm256_mask_unpackhi_pd(__W, __U, __A, __B); 
 }
 
-TEST_CONSTEXPR(match_m256d(_mm256_mask_unpackhi_pd(_mm256_setzero_pd(), 0xFF, (__m256d)(__v4df){1.0,2.0,3.0,4.0}, (__m256d)(__v4df){5.0,6.0,7.0,8.0}), 2.0,6.0,4.0,8.0));
+TEST_CONSTEXPR(match_m256d(_mm256_mask_unpackhi_pd(_mm256_setzero_pd(), 0xAA, (__m256d)(__v4df){1.0,2.0,3.0,4.0}, (__m256d)(__v4df){5.0,6.0,7.0,8.0}), 0,6.0,0,8.0));
 
 __m256d test_mm256_maskz_unpackhi_pd(__mmask8 __U, __m256d __A, __m256d __B) {
   // CHECK-LABEL: test_mm256_maskz_unpackhi_pd
@@ -7717,7 +7717,7 @@ __m256d test_mm256_mask_unpacklo_pd(__m256d __W, __mmask8 __U, __m256d __A, __m2
   return _mm256_mask_unpacklo_pd(__W, __U, __A, __B); 
 }
 
-TEST_CONSTEXPR(match_m256d(_mm256_mask_unpacklo_pd(_mm256_setzero_pd(), 0xFF, (__m256d)(__v4df){1.0,2.0,3.0,4.0}, (__m256d)(__v4df){5.0,6.0,7.0,8.0}), 1.0,5.0,3.0,7.0));
+TEST_CONSTEXPR(match_m256d(_mm256_mask_unpacklo_pd(_mm256_setzero_pd(), 0xAA, (__m256d)(__v4df){1.0,2.0,3.0,4.0}, (__m256d)(__v4df){5.0,6.0,7.0,8.0}), 0,5.0,0,7.0));
 
 __m256d test_mm256_maskz_unpacklo_pd(__mmask8 __U, __m256d __A, __m256d __B) {
   // CHECK-LABEL: test_mm256_maskz_unpacklo_pd
@@ -9029,7 +9029,7 @@ __m256 test_mm256_maskz_broadcast_f32x4(__mmask8 __M, __m128 __A) {
   return _mm256_maskz_broadcast_f32x4(__M, __A); 
 }
 
-TEST_CONSTEXPR(match_m256(_mm256_maskz_broadcast_f32x4(0xFF, (__m128)(__v4sf){0,1,2,3}), 0,1,2,3,0,1,2,3));
+TEST_CONSTEXPR(match_m256(_mm256_maskz_broadcast_f32x4(0xAA, (__m128)(__v4sf){0,1,2,3}), 0,1,0,3,0,1,0,3));
 
 __m256i test_mm256_broadcast_i32x4(__m128i const* __A) {
   // CHECK-LABEL: test_mm256_broadcast_i32x4
@@ -9052,7 +9052,7 @@ __m256i test_mm256_maskz_broadcast_i32x4(__mmask8 __M, __m128i const* __A) {
   return _mm256_maskz_broadcast_i32x4(__M, _mm_loadu_si128(__A)); 
 }
 
-TEST_CONSTEXPR(match_v8si(_mm256_maskz_broadcast_i32x4(0xFF, (__m128i)(__v4si){0,1,2,3}), 0,1,2,3,0,1,2,3));
+TEST_CONSTEXPR(match_v8si(_mm256_maskz_broadcast_i32x4(0xAA, (__m128i)(__v4si){0,1,2,3}), 0,1,0,3,0,1,0,3));
 
 __m256d test_mm256_mask_broadcastsd_pd(__m256d __O, __mmask8 __M, __m128d __A) {
   // CHECK-LABEL: test_mm256_mask_broadcastsd_pd
@@ -10376,7 +10376,7 @@ __m256 test_mm256_mask_movehdup_ps(__m256 __W, __mmask8 __U, __m256 __A) {
   return _mm256_mask_movehdup_ps(__W, __U, __A);
 }
 
-TEST_CONSTEXPR(match_m256(_mm256_mask_movehdup_ps(_mm256_setzero_ps(), 0xFF, (__m256)(__v8sf){1,2,3,4,5,6,7,8}), 2,2,4,4,6,6,8,8));
+TEST_CONSTEXPR(match_m256(_mm256_mask_movehdup_ps(_mm256_setzero_ps(), 0xAA, (__m256)(__v8sf){1,2,3,4,5,6,7,8}), 0,2,0,4,0,6,0,8));
 
 __m256 test_mm256_maskz_movehdup_ps(__mmask8 __U, __m256 __A) {
   // CHECK-LABEL: test_mm256_maskz_movehdup_ps
@@ -10408,7 +10408,7 @@ __m256 test_mm256_mask_moveldup_ps(__m256 __W, __mmask8 __U, __m256 __A) {
   return _mm256_mask_moveldup_ps(__W, __U, __A);
 }
 
-TEST_CONSTEXPR(match_m256(_mm256_mask_moveldup_ps(_mm256_setzero_ps(), 0xFF, (__m256)(__v8sf){1,2,3,4,5,6,7,8}), 1,1,3,3,5,5,7,7));
+TEST_CONSTEXPR(match_m256(_mm256_mask_moveldup_ps(_mm256_setzero_ps(), 0xAA, (__m256)(__v8sf){1,2,3,4,5,6,7,8}), 0,1,0,3,0,5,0,7));
 
 __m256 test_mm256_maskz_moveldup_ps(__mmask8 __U, __m256 __A) {
   // CHECK-LABEL: test_mm256_maskz_moveldup_ps
diff --git a/clang/test/CodeGen/X86/avx512vldq-builtins.c b/clang/test/CodeGen/X86/avx512vldq-builtins.c
index 6cefa73a9aba0..4773b60986169 100644
--- a/clang/test/CodeGen/X86/avx512vldq-builtins.c
+++ b/clang/test/CodeGen/X86/avx512vldq-builtins.c
@@ -987,7 +987,7 @@ __m256 test_mm256_mask_broadcast_f32x2(__m256 __O, __mmask8 __M, __m128 __A) {
   return _mm256_mask_broadcast_f32x2(__O, __M, __A); 
 }
 
-TEST_CONSTEXPR(match_m256(_mm256_mask_broadcast_f32x2(_mm256_setzero_ps(), 0xFF, (__m128)(__v4sf){1.f,2.f,3.f,4.f}), 1.f,2.f,1.f,2.f,1.f,2.f,1.f,2.f));
+TEST_CONSTEXPR(match_m256(_mm256_mask_broadcast_f32x2(_mm256_setzero_ps(), 0xAA, (__m128)(__v4sf){1.f,2.f,3.f,4.f}), 0,2.f,0,2.f,0,2.f,0,2.f));
 
 __m256 test_mm256_maskz_broadcast_f32x2(__mmask8 __M, __m128 __A) {
   // CHECK-LABEL: test_mm256_maskz_broadcast_f32x2
@@ -996,7 +996,7 @@ __m256 test_mm256_maskz_broadcast_f32x2(__mmask8 __M, __m128 __A) {
   return _mm256_maskz_broadcast_f32x2(__M, __A); 
 }
 
-TEST_CONSTEXPR(match_m256(_mm256_maskz_broadcast_f32x2(0xFF, (__m128)(__v4sf){1.f,2.f,3.f,4.f}), 1.f,2.f,1.f,2.f,1.f,2.f,1.f,2.f));
+TEST_CONSTEXPR(match_m256(_mm256_maskz_broadcast_f32x2(0xAA, (__m128)(__v4sf){1.f,2.f,3.f,4.f}), 0,2.f,0,2.f,0,2.f,0,2.f));
 
 __m256d test_mm256_broadcast_f64x2(double const* __A) {
   // CHECK-LABEL: test_mm256_broadcast_f64x2
@@ -1012,7 +1012,7 @@ __m256d test_mm256_mask_broadcast_f64x2(__m256d __O, __mmask8 __M, double const*
   return _mm256_mask_broadcast_f64x2(__O, __M, _mm_loadu_pd(__A)); 
 }
 
-TEST_CONSTEXPR(match_m256d(_mm256_mask_broadcast_f64x2(_mm256_setzero_pd(), 0xFF, (__m128d)(__v2df){1.0,2.0}), 1.0,2.0,1.0,2.0));
+TEST_CONSTEXPR(match_m256d(_mm256_mask_broadcast_f64x2(_mm256_setzero_pd(), 0xA, (__m128d)(__v2df){1.0,2.0}), 0,2.0,0,2.0));
 
 __m256d test_mm256_maskz_broadcast_f64x2(__mmask8 __M, double const* __A) {
   // CHECK-LABEL: test_mm256_maskz_broadcast_f64x2
@@ -1021,7 +1021,7 @@ __m256d test_mm256_maskz_broadcast_f64x2(__mmask8 __M, double const* __A) {
   return _mm256_maskz_broadcast_f64x2(__M, _mm_loadu_pd(__A)); 
 }
 
-TEST_CONSTEXPR(match_m256d(_mm256_maskz_broadcast_f64x2(0xFF, (__m128d)(__v2df){1.0,2.0}), 1.0,2.0,1.0,2.0));
+TEST_CONSTEXPR(match_m256d(_mm256_maskz_broadcast_f64x2(0xA, (__m128d)(__v2df){1.0,2.0}), 0,2.0,0,2.0));
 
 __m128i test_mm_broadcast_i32x2(__m128i __A) {
   // CHECK-LABEL: test_mm_broadcast_i32x2
@@ -1071,7 +1071,7 @@ __m256i test_mm256_maskz_broadcast_i32x2(__mmask8 __M, __m128i __A) {
   return _mm256_maskz_broadcast_i32x2(__M, __A); 
 }
 
-TEST_CONSTEXPR(match_v8si(_mm256_maskz_broadcast_i32x2(0xFF, (__m128i)(__v4si){0,1,2,3}), 0,1,0,1,0,1,0,1));
+TEST_CONSTEXPR(match_v8si(_mm256_maskz_broadcast_i32x2(0xAA, (__m128i)(__v4si){0,1,2,3}), 0,1,0,1,0,1,0,1));
 
 __m256i test_mm256_broadcast_i64x2(__m128i const* __A) {
   // CHECK-LABEL: test_mm256_broadcast_i64x2



More information about the cfe-commits mailing list