[clang] [X86][Clang] Support constexpr evaluation of cvtpd2ps intrinsics (PR #169980)
Hamza Hassanain via cfe-commits
cfe-commits at lists.llvm.org
Tue Dec 2 09:29:55 PST 2025
https://github.com/HamzaHassanain updated https://github.com/llvm/llvm-project/pull/169980
>From 29e2794651c50ccf60a28c2e08639913a68cd71c Mon Sep 17 00:00:00 2001
From: Hamza Hassanain <hamzahassanain067 at gmail.com>
Date: Wed, 26 Nov 2025 17:05:45 +0200
Subject: [PATCH 01/24] add tests that should pass:
clang/test/SemaCXX/constexpr-x86-intrinsics-pd2ps.cpp
---
.../constexpr-x86-intrinsics-pd2ps.cpp | 120 ++++++++++++++++++
1 file changed, 120 insertions(+)
create mode 100644 clang/test/SemaCXX/constexpr-x86-intrinsics-pd2ps.cpp
diff --git a/clang/test/SemaCXX/constexpr-x86-intrinsics-pd2ps.cpp b/clang/test/SemaCXX/constexpr-x86-intrinsics-pd2ps.cpp
new file mode 100644
index 0000000000000..a082b23bfae03
--- /dev/null
+++ b/clang/test/SemaCXX/constexpr-x86-intrinsics-pd2ps.cpp
@@ -0,0 +1,120 @@
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-unknown -target-feature +avx -target-feature +avx512f -target-feature +avx512vl -verify %s
+
+// HACK: Prevent immintrin.h from pulling in standard library headers
+// that don't exist in this test environment.
+#define __MM_MALLOC_H
+
+#include <immintrin.h>
+
+namespace ExactFinite {
+constexpr __m128d d2 = { -1.0, +2.0 };
+constexpr __m128 r128 = _mm_cvtpd_ps(d2);
+static_assert(r128[0] == -1.0f && r128[1] == +2.0f, "");
+static_assert(r128[2] == 0.0f && r128[3] == 0.0f, "");
+
+constexpr __m128 src128 = { 9.0f, 9.0f, 9.0f, 9.0f };
+constexpr __m128 m128_full = _mm_mask_cvtpd_ps(src128, 0x3, d2);
+static_assert(m128_full[0] == -1.0f && m128_full[1] == +2.0f, "");
+static_assert(m128_full[2] == 9.0f && m128_full[3] == 9.0f, "");
+
+constexpr __m128 m128_partial = _mm_mask_cvtpd_ps(src128, 0x1, d2);
+static_assert(m128_partial[0] == -1.0f && m128_partial[1] == 9.0f, "");
+
+constexpr __m128 m128_zero = _mm_maskz_cvtpd_ps(0x1, d2);
+static_assert(m128_zero[0] == -1.0f && m128_zero[1] == 0.0f, "");
+static_assert(m128_zero[2] == 0.0f && m128_zero[3] == 0.0f, "");
+
+constexpr __m256d d4 = { 0.0, -1.0, +2.0, +3.5 };
+constexpr __m128 r256 = _mm256_cvtpd_ps(d4);
+static_assert(r256[0] == 0.0f && r256[1] == -1.0f, "");
+static_assert(r256[2] == +2.0f && r256[3] == +3.5f, "");
+
+constexpr __m512d d8 = { -1.0, +2.0, +4.0, +8.0, +16.0, +32.0, +64.0, +128.0 };
+constexpr __m256 r512 = _mm512_cvtpd_ps(d8);
+static_assert(r512[0] == -1.0f && r512[7] == +128.0f, "");
+
+constexpr __m256 src256 = { 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f };
+constexpr __m256 r512_mask = _mm512_mask_cvtpd_ps(src256, 0x05, d8);
+static_assert(r512_mask[0] == -1.0f && r512_mask[2] == +4.0f, "");
+static_assert(r512_mask[1] == 9.0f && r512_mask[3] == 9.0f, "");
+
+constexpr __m256 r512_maskz = _mm512_maskz_cvtpd_ps(0x81, d8);
+static_assert(r512_maskz[0] == -1.0f && r512_maskz[7] == +128.0f, "");
+static_assert(r512_maskz[1] == 0.0f && r512_maskz[6] == 0.0f, "");
+
+constexpr __m512 r512lo = _mm512_cvtpd_pslo(d8);
+static_assert(r512lo[0] == -1.0f && r512lo[7] == +128.0f, "");
+static_assert(r512lo[8] == 0.0f && r512lo[15] == 0.0f, "");
+
+constexpr __m512 ws = (__m512){ 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,
+ 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f };
+constexpr __m512 r512lo_mask = _mm512_mask_cvtpd_pslo(ws, 0x3, d8);
+static_assert(r512lo_mask[0] == -1.0f, "");
+static_assert(r512lo_mask[1] == +2.0f, "");
+static_assert(r512lo_mask[2] == 9.0f && r512lo_mask[3] == 9.0f, "");
+
+constexpr __m128 src_ss = { 9.0f, 5.0f, 6.0f, 7.0f };
+constexpr __m128d b_ss = { -1.0, 42.0 };
+constexpr __m128 r_ss = _mm_cvtsd_ss(src_ss, b_ss);
+static_assert(r_ss[0] == -1.0f, "");
+static_assert(r_ss[1] == 5.0f && r_ss[3] == 7.0f, "");
+
+constexpr __m128 r_ss_mask_on = _mm_mask_cvtsd_ss(src_ss, 0x1, src_ss, b_ss);
+static_assert(r_ss_mask_on[0] == -1.0f && r_ss_mask_on[1] == 5.0f, "");
+constexpr __m128 r_ss_mask_off = _mm_mask_cvtsd_ss(src_ss, 0x0, src_ss, b_ss);
+static_assert(r_ss_mask_off[0] == 9.0f, "");
+constexpr __m128 r_ss_maskz_off = _mm_maskz_cvtsd_ss(0x0, src_ss, b_ss);
+static_assert(r_ss_maskz_off[0] == 0.0f && r_ss_maskz_off[1] == 0.0f, "");
+}
+
+namespace InexactOrSpecialReject {
+constexpr __m128d inexact = { 1.0000000000000002, 0.0 };
+constexpr __m128 r_inexact = _mm_cvtpd_ps(inexact); // both-error {{not an integral constant expression}}
+static_assert(r_inexact[0] == 1.0f, ""); // both-note {{subexpression not valid in a constant expression}}
+
+constexpr __m128d dinf = { __builtin_huge_val(), 0.0 };
+constexpr __m128 r_inf = _mm_cvtpd_ps(dinf); // both-error {{not an integral constant expression}}
+static_assert(r_inf[0] == __builtin_inff(), ""); // both-note {{subexpression not valid in a constant expression}}
+
+constexpr __m128d dnan = { __builtin_nan(""), 0.0 };
+constexpr __m128 r_nan = _mm_cvtpd_ps(dnan); // both-error {{not an integral constant expression}}
+static_assert(r_nan[0] != r_nan[0], ""); // both-note {{subexpression not valid in a constant expression}}
+
+constexpr __m128d dsub = { 1e-310, 0.0 };
+constexpr __m128 r_sub = _mm_cvtpd_ps(dsub); // both-error {{not an integral constant expression}}
+static_assert(r_sub[0] == 0.0f, ""); // both-note {{subexpression not valid in a constant expression}}
+
+constexpr __m128 src_ss2 = { 0.0f, 1.0f, 2.0f, 3.0f };
+constexpr __m128d inexact_sd = { 1.0000000000000002, 0.0 };
+constexpr __m128 r_ss_inexact = _mm_cvtsd_ss(src_ss2, inexact_sd); // both-error {{not an integral constant expression}}
+static_assert(r_ss_inexact[0] == 1.0f, ""); // both-note {{subexpression not valid in a constant expression}}
+}
+
+namespace MaskedSpecialCasesAllowed {
+constexpr __m128 src128a = { 9.0f, 9.0f, 9.0f, 9.0f };
+constexpr __m128d d2_inexact = { -1.0, 1.0000000000000002 };
+constexpr __m128 ok128 = _mm_mask_cvtpd_ps(src128a, 0x1, d2_inexact);
+static_assert(ok128[0] == -1.0f && ok128[1] == 9.0f, "");
+
+constexpr __m128 ok128z = _mm_maskz_cvtpd_ps(0x1, d2_inexact);
+static_assert(ok128z[0] == -1.0f && ok128z[1] == 0.0f, "");
+
+constexpr __m256d d4_inexact = { 0.0, 1.0000000000000002, 2.0, 3.0 };
+constexpr __m128 src_m = { 9.0f, 9.0f, 9.0f, 9.0f };
+constexpr __m128 ok256m = _mm256_mask_cvtpd_ps(src_m, 0b0101, d4_inexact);
+static_assert(ok256m[0] == 0.0f && ok256m[1] == 9.0f && ok256m[2] == 2.0f && ok256m[3] == 9.0f, "");
+
+constexpr __m128 ok256z = _mm256_maskz_cvtpd_ps(0b0101, d4_inexact);
+static_assert(ok256z[0] == 0.0f && ok256z[1] == 0.0f && ok256z[2] == 2.0f && ok256z[3] == 0.0f, "");
+
+constexpr __m512d d8_inexact = { -1.0, 2.0, 4.0, 8.0, 16.0, 1.0000000000000002, 64.0, 128.0 };
+constexpr __m256 src256b = { 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f };
+constexpr __m256 ok512m = _mm512_mask_cvtpd_ps(src256b, 0b110111, d8_inexact);
+static_assert(ok512m[0] == -1.0f && ok512m[5] == 9.0f && ok512m[7] == 128.0f, "");
+
+constexpr __m256 ok512z = _mm512_maskz_cvtpd_ps(0b110111, d8_inexact);
+static_assert(ok512z[5] == 0.0f && ok512z[0] == -1.0f && ok512z[7] == 128.0f, "");
+
+constexpr __m128 bad128 = _mm_mask_cvtpd_ps(src128a, 0x2, d2_inexact); // both-error {{not an integral constant expression}}
+static_assert(bad128[1] == 9.0f, ""); // both-note {{subexpression not valid in a constant expression}}
+}
>From 30c0dc75714191e31625bb074e6e62d54aeece7f Mon Sep 17 00:00:00 2001
From: Hamza Hassanain <hamzahassanain067 at gmail.com>
Date: Wed, 26 Nov 2025 22:20:48 +0200
Subject: [PATCH 02/24] added __DEFAULT_FN_ATTRS_CONSTEXPR To Headers
---
clang/lib/Headers/avx512fintrin.h | 16 ++++++++--------
clang/lib/Headers/avx512vlintrin.h | 8 ++++----
clang/lib/Headers/avxintrin.h | 4 ++--
clang/lib/Headers/emmintrin.h | 4 ++--
4 files changed, 16 insertions(+), 16 deletions(-)
diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h
index e1de56069870b..b9f1d1eecc09f 100644
--- a/clang/lib/Headers/avx512fintrin.h
+++ b/clang/lib/Headers/avx512fintrin.h
@@ -207,7 +207,7 @@ _mm512_undefined(void)
return (__m512)__builtin_ia32_undef512();
}
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_undefined_ps(void)
{
return (__m512)__builtin_ia32_undef512();
@@ -3489,7 +3489,7 @@ _mm512_mask_cvtepu32lo_pd(__m512d __W, __mmask8 __U, __m512i __A) {
(__v8sf)_mm256_setzero_ps(), \
(__mmask8)(U), (int)(R)))
-static __inline__ __m256 __DEFAULT_FN_ATTRS512
+static __inline__ __m256 __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_cvtpd_ps (__m512d __A)
{
return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
@@ -3498,7 +3498,7 @@ _mm512_cvtpd_ps (__m512d __A)
_MM_FROUND_CUR_DIRECTION);
}
-static __inline__ __m256 __DEFAULT_FN_ATTRS512
+static __inline__ __m256 __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_mask_cvtpd_ps (__m256 __W, __mmask8 __U, __m512d __A)
{
return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
@@ -3507,7 +3507,7 @@ _mm512_mask_cvtpd_ps (__m256 __W, __mmask8 __U, __m512d __A)
_MM_FROUND_CUR_DIRECTION);
}
-static __inline__ __m256 __DEFAULT_FN_ATTRS512
+static __inline__ __m256 __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_maskz_cvtpd_ps (__mmask8 __U, __m512d __A)
{
return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
@@ -3516,7 +3516,7 @@ _mm512_maskz_cvtpd_ps (__mmask8 __U, __m512d __A)
_MM_FROUND_CUR_DIRECTION);
}
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_cvtpd_pslo (__m512d __A)
{
return (__m512) __builtin_shufflevector((__v8sf) _mm512_cvtpd_ps(__A),
@@ -3524,7 +3524,7 @@ _mm512_cvtpd_pslo (__m512d __A)
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
}
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_mask_cvtpd_pslo (__m512 __W, __mmask8 __U,__m512d __A)
{
return (__m512) __builtin_shufflevector (
@@ -8672,7 +8672,7 @@ _mm512_mask_compressstoreu_epi32 (void *__P, __mmask16 __U, __m512i __A)
(__v4sf)_mm_setzero_ps(), \
(__mmask8)(U), (int)(R)))
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_mask_cvtsd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128d __B)
{
return __builtin_ia32_cvtsd2ss_round_mask ((__v4sf)__A,
@@ -8681,7 +8681,7 @@ _mm_mask_cvtsd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128d __B)
(__mmask8)__U, _MM_FROUND_CUR_DIRECTION);
}
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_maskz_cvtsd_ss (__mmask8 __U, __m128 __A, __m128d __B)
{
return __builtin_ia32_cvtsd2ss_round_mask ((__v4sf)__A,
diff --git a/clang/lib/Headers/avx512vlintrin.h b/clang/lib/Headers/avx512vlintrin.h
index 99c057030a4cc..82a06edd28ba2 100644
--- a/clang/lib/Headers/avx512vlintrin.h
+++ b/clang/lib/Headers/avx512vlintrin.h
@@ -1791,14 +1791,14 @@ _mm256_maskz_cvtpd_epi32 (__mmask8 __U, __m256d __A) {
(__v4si)_mm_setzero_si128());
}
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_mask_cvtpd_ps (__m128 __W, __mmask8 __U, __m128d __A) {
return (__m128) __builtin_ia32_cvtpd2ps_mask ((__v2df) __A,
(__v4sf) __W,
(__mmask8) __U);
}
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_maskz_cvtpd_ps (__mmask8 __U, __m128d __A) {
return (__m128) __builtin_ia32_cvtpd2ps_mask ((__v2df) __A,
(__v4sf)
@@ -1806,14 +1806,14 @@ _mm_maskz_cvtpd_ps (__mmask8 __U, __m128d __A) {
(__mmask8) __U);
}
-static __inline__ __m128 __DEFAULT_FN_ATTRS256
+static __inline__ __m128 __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_mask_cvtpd_ps (__m128 __W, __mmask8 __U, __m256d __A) {
return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
(__v4sf)_mm256_cvtpd_ps(__A),
(__v4sf)__W);
}
-static __inline__ __m128 __DEFAULT_FN_ATTRS256
+static __inline__ __m128 __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_maskz_cvtpd_ps (__mmask8 __U, __m256d __A) {
return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
(__v4sf)_mm256_cvtpd_ps(__A),
diff --git a/clang/lib/Headers/avxintrin.h b/clang/lib/Headers/avxintrin.h
index 44ef88db5cbce..f3f444083edbf 100644
--- a/clang/lib/Headers/avxintrin.h
+++ b/clang/lib/Headers/avxintrin.h
@@ -2190,7 +2190,7 @@ _mm256_cvtepi32_ps(__m256i __a) {
/// \param __a
/// A 256-bit vector of [4 x double].
/// \returns A 128-bit vector of [4 x float] containing the converted values.
-static __inline __m128 __DEFAULT_FN_ATTRS
+static __inline __m128 __DEFAULT_FN_ATTRS_CONSTEXPR
_mm256_cvtpd_ps(__m256d __a)
{
return (__m128)__builtin_ia32_cvtpd2ps256((__v4df) __a);
@@ -3610,7 +3610,7 @@ _mm256_undefined_pd(void)
/// This intrinsic has no corresponding instruction.
///
/// \returns A 256-bit vector of [8 x float] containing undefined values.
-static __inline__ __m256 __DEFAULT_FN_ATTRS
+static __inline__ __m256 __DEFAULT_FN_ATTRS_CONSTEXPR
_mm256_undefined_ps(void)
{
return (__m256)__builtin_ia32_undef256();
diff --git a/clang/lib/Headers/emmintrin.h b/clang/lib/Headers/emmintrin.h
index dbe5ca0379cf5..1701effedc5ce 100644
--- a/clang/lib/Headers/emmintrin.h
+++ b/clang/lib/Headers/emmintrin.h
@@ -1279,7 +1279,7 @@ static __inline__ int __DEFAULT_FN_ATTRS _mm_ucomineq_sd(__m128d __a,
/// A 128-bit vector of [2 x double].
/// \returns A 128-bit vector of [4 x float] whose lower 64 bits contain the
/// converted values. The upper 64 bits are set to zero.
-static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cvtpd_ps(__m128d __a) {
+static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cvtpd_ps(__m128d __a) {
return __builtin_ia32_cvtpd2ps((__v2df)__a);
}
@@ -1384,7 +1384,7 @@ static __inline__ int __DEFAULT_FN_ATTRS _mm_cvtsd_si32(__m128d __a) {
/// \returns A 128-bit vector of [4 x float]. The lower 32 bits contain the
/// converted value from the second parameter. The upper 96 bits are copied
/// from the upper 96 bits of the first parameter.
-static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cvtsd_ss(__m128 __a,
+static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cvtsd_ss(__m128 __a,
__m128d __b) {
return (__m128)__builtin_ia32_cvtsd2ss((__v4sf)__a, (__v2df)__b);
}
>From 9f1020ecf3a706df9537b38464b61748aa0278f0 Mon Sep 17 00:00:00 2001
From: Hamza Hassanain <hamzahassanain067 at gmail.com>
Date: Wed, 26 Nov 2025 22:24:54 +0200
Subject: [PATCH 03/24] added Constexpr to necessary builtins
---
clang/include/clang/Basic/BuiltinsX86.td | 16 ++++++++--------
1 file changed, 8 insertions(+), 8 deletions(-)
diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td
index 4aa3d51931980..283a0a3e6ae0c 100644
--- a/clang/include/clang/Basic/BuiltinsX86.td
+++ b/clang/include/clang/Basic/BuiltinsX86.td
@@ -24,12 +24,12 @@ def undef128 : X86Builtin<"_Vector<2, double>()"> {
let Attributes = [Const, NoThrow, RequiredVectorWidth<128>];
}
-def undef256 : X86Builtin<"_Vector<4, double>()"> {
- let Attributes = [Const, NoThrow, RequiredVectorWidth<256>];
+def undef256 : X86Builtin<"_Vector<4, double>()" > {
+ let Attributes = [Const, Constexpr, NoThrow, RequiredVectorWidth<256>];
}
def undef512 : X86Builtin<"_Vector<8, double>()"> {
- let Attributes = [Const, NoThrow, RequiredVectorWidth<512>];
+ let Attributes = [Const, Constexpr, NoThrow, RequiredVectorWidth<512>];
}
// FLAGS
@@ -168,7 +168,7 @@ let Features = "sse2", Attributes = [NoThrow] in {
def movnti : X86Builtin<"void(int *, int)">;
}
-let Features = "sse2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+let Features = "sse2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
def psadbw128 : X86Builtin<"_Vector<2, long long int>(_Vector<16, char>, _Vector<16, char>)">;
def sqrtpd : X86Builtin<"_Vector<2, double>(_Vector<2, double>)">;
def sqrtsd : X86Builtin<"_Vector<2, double>(_Vector<2, double>)">;
@@ -468,7 +468,7 @@ let Features = "avx", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWid
def vpermilvarps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, int>)">;
}
-let Features = "avx", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+let Features = "avx", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
def dpps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, _Constant char)">;
def cmppd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, double>, _Constant char)">;
def cmpps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, _Constant char)">;
@@ -1009,7 +1009,7 @@ let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128
def cmppd128_mask : X86Builtin<"unsigned char(_Vector<2, double>, _Vector<2, double>, _Constant int, unsigned char)">;
}
-let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+let Features = "avx512f", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
def rndscaleps_mask : X86Builtin<"_Vector<16, float>(_Vector<16, float>, _Constant int, _Vector<16, float>, unsigned short, _Constant int)">;
def rndscalepd_mask : X86Builtin<"_Vector<8, double>(_Vector<8, double>, _Constant int, _Vector<8, double>, unsigned char, _Constant int)">;
def cvtps2dq512_mask : X86Builtin<"_Vector<16, int>(_Vector<16, float>, _Vector<16, int>, unsigned short, _Constant int)">;
@@ -1457,7 +1457,7 @@ let Features = "avx512vl", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
def compressstoresi256_mask : X86Builtin<"void(_Vector<8, int *>, _Vector<8, int>, unsigned char)">;
}
-let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
def cvtpd2dq128_mask : X86Builtin<"_Vector<4, int>(_Vector<2, double>, _Vector<4, int>, unsigned char)">;
def cvtpd2ps_mask : X86Builtin<"_Vector<4, float>(_Vector<2, double>, _Vector<4, float>, unsigned char)">;
def cvtpd2udq128_mask : X86Builtin<"_Vector<4, int>(_Vector<2, double>, _Vector<4, int>, unsigned char)">;
@@ -3301,7 +3301,7 @@ let Features = "avx512bw,avx512vl",
def cvtw2mask256 : X86Builtin<"unsigned short(_Vector<16, short>)">;
}
-let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+let Features = "avx512f", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
def cvtsd2ss_round_mask : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<2, double>, _Vector<4, float>, unsigned char, _Constant int)">;
def cvtsi2ss32 : X86Builtin<"_Vector<4, float>(_Vector<4, float>, int, _Constant int)">;
def cvtss2sd_round_mask : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<4, float>, _Vector<2, double>, unsigned char, _Constant int)">;
>From d28d6d8c7cc6e816f772a78dd0d177f0248d3178 Mon Sep 17 00:00:00 2001
From: Hamza Hassanain <hamzahassanain067 at gmail.com>
Date: Sat, 29 Nov 2025 11:08:42 +0200
Subject: [PATCH 04/24] added FULL tests for pd2ps constexpr
---
.../constexpr-x86-intrinsics-pd2ps.cpp | 559 ++++++++++++++----
1 file changed, 459 insertions(+), 100 deletions(-)
diff --git a/clang/test/SemaCXX/constexpr-x86-intrinsics-pd2ps.cpp b/clang/test/SemaCXX/constexpr-x86-intrinsics-pd2ps.cpp
index a082b23bfae03..4a1e9a9c5ae2c 100644
--- a/clang/test/SemaCXX/constexpr-x86-intrinsics-pd2ps.cpp
+++ b/clang/test/SemaCXX/constexpr-x86-intrinsics-pd2ps.cpp
@@ -1,120 +1,479 @@
// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-unknown -target-feature +avx -target-feature +avx512f -target-feature +avx512vl -verify %s
-// HACK: Prevent immintrin.h from pulling in standard library headers
-// that don't exist in this test environment.
-#define __MM_MALLOC_H
-
+#define __MM_MALLOC_H
#include <immintrin.h>
-namespace ExactFinite {
-constexpr __m128d d2 = { -1.0, +2.0 };
-constexpr __m128 r128 = _mm_cvtpd_ps(d2);
-static_assert(r128[0] == -1.0f && r128[1] == +2.0f, "");
-static_assert(r128[2] == 0.0f && r128[3] == 0.0f, "");
-
-constexpr __m128 src128 = { 9.0f, 9.0f, 9.0f, 9.0f };
-constexpr __m128 m128_full = _mm_mask_cvtpd_ps(src128, 0x3, d2);
-static_assert(m128_full[0] == -1.0f && m128_full[1] == +2.0f, "");
-static_assert(m128_full[2] == 9.0f && m128_full[3] == 9.0f, "");
-
-constexpr __m128 m128_partial = _mm_mask_cvtpd_ps(src128, 0x1, d2);
-static_assert(m128_partial[0] == -1.0f && m128_partial[1] == 9.0f, "");
-
-constexpr __m128 m128_zero = _mm_maskz_cvtpd_ps(0x1, d2);
-static_assert(m128_zero[0] == -1.0f && m128_zero[1] == 0.0f, "");
-static_assert(m128_zero[2] == 0.0f && m128_zero[3] == 0.0f, "");
-
-constexpr __m256d d4 = { 0.0, -1.0, +2.0, +3.5 };
-constexpr __m128 r256 = _mm256_cvtpd_ps(d4);
-static_assert(r256[0] == 0.0f && r256[1] == -1.0f, "");
-static_assert(r256[2] == +2.0f && r256[3] == +3.5f, "");
-
-constexpr __m512d d8 = { -1.0, +2.0, +4.0, +8.0, +16.0, +32.0, +64.0, +128.0 };
-constexpr __m256 r512 = _mm512_cvtpd_ps(d8);
-static_assert(r512[0] == -1.0f && r512[7] == +128.0f, "");
-
-constexpr __m256 src256 = { 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f };
-constexpr __m256 r512_mask = _mm512_mask_cvtpd_ps(src256, 0x05, d8);
-static_assert(r512_mask[0] == -1.0f && r512_mask[2] == +4.0f, "");
-static_assert(r512_mask[1] == 9.0f && r512_mask[3] == 9.0f, "");
-
-constexpr __m256 r512_maskz = _mm512_maskz_cvtpd_ps(0x81, d8);
-static_assert(r512_maskz[0] == -1.0f && r512_maskz[7] == +128.0f, "");
-static_assert(r512_maskz[1] == 0.0f && r512_maskz[6] == 0.0f, "");
-
-constexpr __m512 r512lo = _mm512_cvtpd_pslo(d8);
-static_assert(r512lo[0] == -1.0f && r512lo[7] == +128.0f, "");
-static_assert(r512lo[8] == 0.0f && r512lo[15] == 0.0f, "");
-
-constexpr __m512 ws = (__m512){ 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,
- 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f };
-constexpr __m512 r512lo_mask = _mm512_mask_cvtpd_pslo(ws, 0x3, d8);
-static_assert(r512lo_mask[0] == -1.0f, "");
-static_assert(r512lo_mask[1] == +2.0f, "");
-static_assert(r512lo_mask[2] == 9.0f && r512lo_mask[3] == 9.0f, "");
-
-constexpr __m128 src_ss = { 9.0f, 5.0f, 6.0f, 7.0f };
-constexpr __m128d b_ss = { -1.0, 42.0 };
-constexpr __m128 r_ss = _mm_cvtsd_ss(src_ss, b_ss);
-static_assert(r_ss[0] == -1.0f, "");
-static_assert(r_ss[1] == 5.0f && r_ss[3] == 7.0f, "");
+namespace Test_mm_cvtsd_ss {
+namespace OK {
+constexpr __m128 a = { 9.0f, 5.0f, 6.0f, 7.0f };
+constexpr __m128d b = { -1.0, 42.0 };
+constexpr __m128 r = _mm_cvtsd_ss(a, b);
+static_assert(r[0] == -1.0f && r[1] == 5.0f && r[2] == 6.0f && r[3] == 7.0f, "");
+}
+namespace Inexact {
+constexpr __m128 a = { 0.0f, 1.0f, 2.0f, 3.0f };
+constexpr __m128d b = { 1.0000000000000002, 0.0 };
+constexpr __m128 r = _mm_cvtsd_ss(a, b);
+// expected-error at -1 {{must be initialized by a constant expression}}
+// expected-note at emmintrin.h:* {{compile time floating point arithmetic suppressed in strict evaluation modes}}
+// expected-note at -3 {{in call to '_mm_cvtsd_ss({0.000000e+00, 1.000000e+00, 2.000000e+00, 3.000000e+00}, {1.000000e+00, 0.000000e+00})'}}
+}
+namespace Inf {
+constexpr __m128 a = { 0.0f, 1.0f, 2.0f, 3.0f };
+constexpr __m128d b = { __builtin_huge_val(), 0.0 };
+constexpr __m128 r = _mm_cvtsd_ss(a, b);
+// expected-error at -1 {{must be initialized by a constant expression}}
+// expected-note at emmintrin.h:* {{floating point arithmetic produces an infinity}}
+// expected-note at -3 {{in call to '_mm_cvtsd_ss({0.000000e+00, 1.000000e+00, 2.000000e+00, 3.000000e+00}, {INF, 0.000000e+00})'}}
+}
+namespace NaN {
+constexpr __m128 a = { 0.0f, 1.0f, 2.0f, 3.0f };
+constexpr __m128d b = { __builtin_nan(""), 0.0 };
+constexpr __m128 r = _mm_cvtsd_ss(a, b);
+// expected-error at -1 {{must be initialized by a constant expression}}
+// expected-note at emmintrin.h:* {{floating point arithmetic produces a NaN}}
+// expected-note at -3 {{in call to '_mm_cvtsd_ss({0.000000e+00, 1.000000e+00, 2.000000e+00, 3.000000e+00}, {nan, 0.000000e+00})'}}
+}
+namespace Subnormal {
+constexpr __m128 a = { 0.0f, 1.0f, 2.0f, 3.0f };
+constexpr __m128d b = { 1e-310, 0.0 };
+constexpr __m128 r = _mm_cvtsd_ss(a, b);
+// expected-error at -1 {{must be initialized by a constant expression}}
+// expected-note at emmintrin.h:* {{compile time floating point arithmetic suppressed in strict evaluation modes}}
+// expected-note at -3 {{in call to '_mm_cvtsd_ss({0.000000e+00, 1.000000e+00, 2.000000e+00, 3.000000e+00}, {1.000000e-310, 0.000000e+00})'}}
+}
+}
-constexpr __m128 r_ss_mask_on = _mm_mask_cvtsd_ss(src_ss, 0x1, src_ss, b_ss);
-static_assert(r_ss_mask_on[0] == -1.0f && r_ss_mask_on[1] == 5.0f, "");
-constexpr __m128 r_ss_mask_off = _mm_mask_cvtsd_ss(src_ss, 0x0, src_ss, b_ss);
-static_assert(r_ss_mask_off[0] == 9.0f, "");
-constexpr __m128 r_ss_maskz_off = _mm_maskz_cvtsd_ss(0x0, src_ss, b_ss);
-static_assert(r_ss_maskz_off[0] == 0.0f && r_ss_maskz_off[1] == 0.0f, "");
+namespace Test_mm_mask_cvtsd_ss {
+namespace OK {
+constexpr __m128 src = { 9.0f, 5.0f, 6.0f, 7.0f };
+constexpr __m128 a = { 1.0f, 2.0f, 3.0f, 4.0f };
+constexpr __m128d b = { -1.0, 42.0 };
+constexpr __m128 r = _mm_mask_cvtsd_ss(src, 0x1, a, b);
+static_assert(r[0] == -1.0f && r[1] == 2.0f && r[2] == 3.0f && r[3] == 4.0f, "");
+}
+namespace MaskOff {
+constexpr __m128 src = { 9.0f, 5.0f, 6.0f, 7.0f };
+constexpr __m128 a = { 1.0f, 2.0f, 3.0f, 4.0f };
+constexpr __m128d b = { -1.0, 42.0 };
+constexpr __m128 r = _mm_mask_cvtsd_ss(src, 0x0, a, b);
+static_assert(r[0] == 9.0f && r[1] == 2.0f, "");
+}
+namespace MaskOffInexact {
+constexpr __m128 src = { 9.0f, 5.0f, 6.0f, 7.0f };
+constexpr __m128 a = { 1.0f, 2.0f, 3.0f, 4.0f };
+constexpr __m128d b_inexact = { 1.0000000000000002, 0.0 };
+constexpr __m128 r = _mm_mask_cvtsd_ss(src, 0x0, a, b_inexact);
+static_assert(r[0] == 9.0f, "");
+}
+namespace MaskOnInexact {
+constexpr __m128 src = { 9.0f, 5.0f, 6.0f, 7.0f };
+constexpr __m128 a = { 1.0f, 2.0f, 3.0f, 4.0f };
+constexpr __m128d b_inexact = { 1.0000000000000002, 0.0 };
+constexpr __m128 r = _mm_mask_cvtsd_ss(src, 0x1, a, b_inexact);
+// expected-error at -1 {{must be initialized by a constant expression}}
+// expected-note at avx512fintrin.h:* {{compile time floating point arithmetic suppressed in strict evaluation modes}}
+// expected-note at -3 {{in call to '_mm_mask_cvtsd_ss({9.000000e+00, 5.000000e+00, 6.000000e+00, 7.000000e+00}, 1, {1.000000e+00, 2.000000e+00, 3.000000e+00, 4.000000e+00}, {1.000000e+00, 0.000000e+00})'}}
+}
+namespace MaskOnInf {
+constexpr __m128 src = { 9.0f, 5.0f, 6.0f, 7.0f };
+constexpr __m128 a = { 1.0f, 2.0f, 3.0f, 4.0f };
+constexpr __m128d b_inf = { __builtin_huge_val(), 0.0 };
+constexpr __m128 r = _mm_mask_cvtsd_ss(src, 0x1, a, b_inf);
+// expected-error at -1 {{must be initialized by a constant expression}}
+// expected-note at avx512fintrin.h:* {{floating point arithmetic produces an infinity}}
+// expected-note at -3 {{in call to '_mm_mask_cvtsd_ss({9.000000e+00, 5.000000e+00, 6.000000e+00, 7.000000e+00}, 1, {1.000000e+00, 2.000000e+00, 3.000000e+00, 4.000000e+00}, {INF, 0.000000e+00})'}}
+}
+namespace MaskOnNaN {
+constexpr __m128 src = { 9.0f, 5.0f, 6.0f, 7.0f };
+constexpr __m128 a = { 1.0f, 2.0f, 3.0f, 4.0f };
+constexpr __m128d b_nan = { __builtin_nan(""), 0.0 };
+constexpr __m128 r = _mm_mask_cvtsd_ss(src, 0x1, a, b_nan);
+// expected-error at -1 {{must be initialized by a constant expression}}
+// expected-note at avx512fintrin.h:* {{floating point arithmetic produces a NaN}}
+// expected-note at -3 {{in call to '_mm_mask_cvtsd_ss({9.000000e+00, 5.000000e+00, 6.000000e+00, 7.000000e+00}, 1, {1.000000e+00, 2.000000e+00, 3.000000e+00, 4.000000e+00}, {nan, 0.000000e+00})'}}
+}
+namespace MaskOnSubnormal {
+constexpr __m128 src = { 9.0f, 5.0f, 6.0f, 7.0f };
+constexpr __m128 a = { 1.0f, 2.0f, 3.0f, 4.0f };
+constexpr __m128d b_sub = { 1e-310, 0.0 };
+constexpr __m128 r = _mm_mask_cvtsd_ss(src, 0x1, a, b_sub);
+// expected-error at -1 {{must be initialized by a constant expression}}
+// expected-note at avx512fintrin.h:* {{compile time floating point arithmetic suppressed in strict evaluation modes}}
+// expected-note at -3 {{in call to '_mm_mask_cvtsd_ss({9.000000e+00, 5.000000e+00, 6.000000e+00, 7.000000e+00}, 1, {1.000000e+00, 2.000000e+00, 3.000000e+00, 4.000000e+00}, {1.000000e-310, 0.000000e+00})'}}
+}
}
-namespace InexactOrSpecialReject {
-constexpr __m128d inexact = { 1.0000000000000002, 0.0 };
-constexpr __m128 r_inexact = _mm_cvtpd_ps(inexact); // both-error {{not an integral constant expression}}
-static_assert(r_inexact[0] == 1.0f, ""); // both-note {{subexpression not valid in a constant expression}}
+namespace Test_mm_maskz_cvtsd_ss {
+namespace OK {
+constexpr __m128 a = { 1.0f, 2.0f, 3.0f, 4.0f };
+constexpr __m128d b = { -1.0, 42.0 };
+constexpr __m128 r = _mm_maskz_cvtsd_ss(0x1, a, b);
+static_assert(r[0] == -1.0f && r[1] == 2.0f, "");
+}
+namespace MaskOff {
+constexpr __m128 a = { 1.0f, 2.0f, 3.0f, 4.0f };
+constexpr __m128d b = { -1.0, 42.0 };
+constexpr __m128 r = _mm_maskz_cvtsd_ss(0x0, a, b);
+static_assert(r[0] == 0.0f && r[1] == 2.0f, "");
+}
+namespace MaskOffInexact {
+constexpr __m128 a = { 1.0f, 2.0f, 3.0f, 4.0f };
+constexpr __m128d b_inexact = { 1.0000000000000002, 0.0 };
+constexpr __m128 r = _mm_maskz_cvtsd_ss(0x0, a, b_inexact);
+static_assert(r[0] == 0.0f, "");
+}
+namespace MaskOnInf {
+constexpr __m128 a = { 1.0f, 2.0f, 3.0f, 4.0f };
+constexpr __m128d b_inf = { __builtin_huge_val(), 0.0 };
+constexpr __m128 r = _mm_maskz_cvtsd_ss(0x1, a, b_inf);
+// expected-error at -1 {{must be initialized by a constant expression}}
+// expected-note at avx512fintrin.h:* {{floating point arithmetic produces an infinity}}
+// expected-note at -3 {{in call to '_mm_maskz_cvtsd_ss(1, {1.000000e+00, 2.000000e+00, 3.000000e+00, 4.000000e+00}, {INF, 0.000000e+00})'}}
+}
+namespace MaskOnNaN {
+constexpr __m128 a = { 1.0f, 2.0f, 3.0f, 4.0f };
+constexpr __m128d b_nan = { __builtin_nan(""), 0.0 };
+constexpr __m128 r = _mm_maskz_cvtsd_ss(0x1, a, b_nan);
+// expected-error at -1 {{must be initialized by a constant expression}}
+// expected-note at avx512fintrin.h:* {{floating point arithmetic produces a NaN}}
+// expected-note at -3 {{in call to '_mm_maskz_cvtsd_ss(1, {1.000000e+00, 2.000000e+00, 3.000000e+00, 4.000000e+00}, {nan, 0.000000e+00})'}}
+}
+}
-constexpr __m128d dinf = { __builtin_huge_val(), 0.0 };
-constexpr __m128 r_inf = _mm_cvtpd_ps(dinf); // both-error {{not an integral constant expression}}
-static_assert(r_inf[0] == __builtin_inff(), ""); // both-note {{subexpression not valid in a constant expression}}
+namespace Test_mm_cvtpd_ps {
+namespace OK {
+constexpr __m128d a = { -1.0, +2.0 };
+constexpr __m128 r = _mm_cvtpd_ps(a);
+static_assert(r[0] == -1.0f && r[1] == +2.0f, "");
+static_assert(r[2] == 0.0f && r[3] == 0.0f, "");
+}
+namespace Inexact {
+constexpr __m128d a = { 1.0000000000000002, 0.0 };
+constexpr __m128 r = _mm_cvtpd_ps(a);
+// expected-error at -1 {{must be initialized by a constant expression}}
+// expected-note at emmintrin.h:* {{compile time floating point arithmetic suppressed in strict evaluation modes}}
+// expected-note at -3 {{in call to '_mm_cvtpd_ps({1.000000e+00, 0.000000e+00})'}}
+}
+namespace Inf {
+constexpr __m128d a = { __builtin_huge_val(), 0.0 };
+constexpr __m128 r = _mm_cvtpd_ps(a);
+// expected-error at -1 {{must be initialized by a constant expression}}
+// expected-note at emmintrin.h:* {{floating point arithmetic produces an infinity}}
+// expected-note at -3 {{in call to '_mm_cvtpd_ps({INF, 0.000000e+00})'}}
+}
+namespace NaN {
+constexpr __m128d a = { __builtin_nan(""), 0.0 };
+constexpr __m128 r = _mm_cvtpd_ps(a);
+// expected-error at -1 {{must be initialized by a constant expression}}
+// expected-note at emmintrin.h:* {{floating point arithmetic produces a NaN}}
+// expected-note at -3 {{in call to '_mm_cvtpd_ps({nan, 0.000000e+00})'}}
+}
+namespace Subnormal {
+constexpr __m128d a = { 1e-310, 0.0 };
+constexpr __m128 r = _mm_cvtpd_ps(a);
+// expected-error at -1 {{must be initialized by a constant expression}}
+// expected-note at emmintrin.h:* {{compile time floating point arithmetic suppressed in strict evaluation modes}}
+// expected-note at -3 {{in call to '_mm_cvtpd_ps({1.000000e-310, 0.000000e+00})'}}
+}
+}
-constexpr __m128d dnan = { __builtin_nan(""), 0.0 };
-constexpr __m128 r_nan = _mm_cvtpd_ps(dnan); // both-error {{not an integral constant expression}}
-static_assert(r_nan[0] != r_nan[0], ""); // both-note {{subexpression not valid in a constant expression}}
+namespace Test_mm_mask_cvtpd_ps {
+namespace OK {
+constexpr __m128 src = { 9.0f, 9.0f, 9.0f, 9.0f };
+constexpr __m128d a = { -1.0, +2.0 };
+constexpr __m128 r = _mm_mask_cvtpd_ps(src, 0x3, a);
+static_assert(r[0] == -1.0f && r[1] == +2.0f, "");
+static_assert(r[2] == 9.0f && r[3] == 9.0f, "");
+}
+namespace Partial {
+constexpr __m128 src = { 9.0f, 9.0f, 9.0f, 9.0f };
+constexpr __m128d a = { -1.0, +2.0 };
+constexpr __m128 r = _mm_mask_cvtpd_ps(src, 0x1, a);
+static_assert(r[0] == -1.0f && r[1] == 9.0f, "");
+}
+namespace MaskOffInexact {
+constexpr __m128 src = { 9.0f, 9.0f, 9.0f, 9.0f };
+constexpr __m128d a_inexact = { -1.0, 1.0000000000000002 };
+constexpr __m128 r = _mm_mask_cvtpd_ps(src, 0x1, a_inexact);
+static_assert(r[0] == -1.0f && r[1] == 9.0f, "");
+}
+namespace MaskOnInexact {
+constexpr __m128 src = { 9.0f, 9.0f, 9.0f, 9.0f };
+constexpr __m128d a_inexact = { -1.0, 1.0000000000000002 };
+constexpr __m128 r = _mm_mask_cvtpd_ps(src, 0x2, a_inexact);
+// expected-error at -1 {{must be initialized by a constant expression}}
+// expected-note at avx512vlintrin.h:* {{compile time floating point arithmetic suppressed in strict evaluation modes}}
+// expected-note at -3 {{in call to '_mm_mask_cvtpd_ps({9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00}, 2, {-1.000000e+00, 1.000000e+00})'}}
+}
+namespace MaskOnInf {
+constexpr __m128 src = { 9.0f, 9.0f, 9.0f, 9.0f };
+constexpr __m128d a_inf = { -1.0, __builtin_huge_val() };
+constexpr __m128 r = _mm_mask_cvtpd_ps(src, 0x2, a_inf);
+// expected-error at -1 {{must be initialized by a constant expression}}
+// expected-note at avx512vlintrin.h:* {{floating point arithmetic produces an infinity}}
+// expected-note at -3 {{in call to '_mm_mask_cvtpd_ps({9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00}, 2, {-1.000000e+00, INF})'}}
+}
+namespace MaskOnNaN {
+constexpr __m128 src = { 9.0f, 9.0f, 9.0f, 9.0f };
+constexpr __m128d a_nan = { -1.0, __builtin_nan("") };
+constexpr __m128 r = _mm_mask_cvtpd_ps(src, 0x2, a_nan);
+// expected-error at -1 {{must be initialized by a constant expression}}
+// expected-note at avx512vlintrin.h:* {{floating point arithmetic produces a NaN}}
+// expected-note at -3 {{in call to '_mm_mask_cvtpd_ps({9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00}, 2, {-1.000000e+00, nan})'}}
+}
+}
-constexpr __m128d dsub = { 1e-310, 0.0 };
-constexpr __m128 r_sub = _mm_cvtpd_ps(dsub); // both-error {{not an integral constant expression}}
-static_assert(r_sub[0] == 0.0f, ""); // both-note {{subexpression not valid in a constant expression}}
+namespace Test_mm_maskz_cvtpd_ps {
+namespace OK {
+constexpr __m128d a = { -1.0, +2.0 };
+constexpr __m128 r = _mm_maskz_cvtpd_ps(0x1, a);
+static_assert(r[0] == -1.0f && r[1] == 0.0f, "");
+static_assert(r[2] == 0.0f && r[3] == 0.0f, "");
+}
+namespace MaskOffInexact {
+constexpr __m128d a_inexact = { -1.0, 1.0000000000000002 };
+constexpr __m128 r = _mm_maskz_cvtpd_ps(0x1, a_inexact);
+static_assert(r[0] == -1.0f && r[1] == 0.0f, "");
+}
+namespace MaskOnInf {
+constexpr __m128d a_inf = { -1.0, __builtin_huge_val() };
+constexpr __m128 r = _mm_maskz_cvtpd_ps(0x2, a_inf);
+// expected-error at -1 {{must be initialized by a constant expression}}
+// expected-note at avx512vlintrin.h:* {{floating point arithmetic produces an infinity}}
+// expected-note at -3 {{in call to '_mm_maskz_cvtpd_ps(2, {-1.000000e+00, INF})'}}
+}
+namespace MaskOnNaN {
+constexpr __m128d a_nan = { -1.0, __builtin_nan("") };
+constexpr __m128 r = _mm_maskz_cvtpd_ps(0x2, a_nan);
+// expected-error at -1 {{must be initialized by a constant expression}}
+// expected-note at avx512vlintrin.h:* {{floating point arithmetic produces a NaN}}
+// expected-note at -3 {{in call to '_mm_maskz_cvtpd_ps(2, {-1.000000e+00, nan})'}}
+}
+}
-constexpr __m128 src_ss2 = { 0.0f, 1.0f, 2.0f, 3.0f };
-constexpr __m128d inexact_sd = { 1.0000000000000002, 0.0 };
-constexpr __m128 r_ss_inexact = _mm_cvtsd_ss(src_ss2, inexact_sd); // both-error {{not an integral constant expression}}
-static_assert(r_ss_inexact[0] == 1.0f, ""); // both-note {{subexpression not valid in a constant expression}}
+namespace Test_mm256_cvtpd_ps {
+namespace OK {
+constexpr __m256d a = { 0.0, -1.0, +2.0, +3.5 };
+constexpr __m128 r = _mm256_cvtpd_ps(a);
+static_assert(r[0] == 0.0f && r[1] == -1.0f, "");
+static_assert(r[2] == +2.0f && r[3] == +3.5f, "");
+}
+namespace Inexact {
+constexpr __m256d a = { 1.0000000000000002, 0.0, 0.0, 0.0 };
+constexpr __m128 r = _mm256_cvtpd_ps(a);
+// expected-error at -1 {{must be initialized by a constant expression}}
+// expected-note at avxintrin.h:* {{compile time floating point arithmetic suppressed in strict evaluation modes}}
+// expected-note at -3 {{in call to '_mm256_cvtpd_ps({1.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00})'}}
+}
}
-namespace MaskedSpecialCasesAllowed {
-constexpr __m128 src128a = { 9.0f, 9.0f, 9.0f, 9.0f };
-constexpr __m128d d2_inexact = { -1.0, 1.0000000000000002 };
-constexpr __m128 ok128 = _mm_mask_cvtpd_ps(src128a, 0x1, d2_inexact);
-static_assert(ok128[0] == -1.0f && ok128[1] == 9.0f, "");
+namespace Test_mm256_mask_cvtpd_ps {
+namespace OK {
+constexpr __m128 src = { 9.0f, 9.0f, 9.0f, 9.0f };
+constexpr __m256d a = { 0.0, -1.0, +2.0, +3.5 };
+constexpr __m128 r = _mm256_mask_cvtpd_ps(src, 0xF, a);
+static_assert(r[0] == 0.0f && r[1] == -1.0f && r[2] == +2.0f && r[3] == +3.5f, "");
+}
+namespace MaskOffInf {
+// Note: 256-bit masked operations use selectps, which evaluates ALL lanes before masking
+// So even masked-off Inf/NaN values cause errors (architectural limitation)
+constexpr __m128 src = { 9.0f, 9.0f, 9.0f, 9.0f };
+constexpr __m256d a_inf = { -1.0, +2.0, __builtin_huge_val(), +8.0 };
+constexpr __m128 r = _mm256_mask_cvtpd_ps(src, 0x3, a_inf);
+// expected-error at -1 {{must be initialized by a constant expression}}
+// expected-note at avxintrin.h:* {{floating point arithmetic produces an infinity}}
+// expected-note at avx512vlintrin.h:* {{in call to '_mm256_cvtpd_ps({-1.000000e+00, 2.000000e+00, INF, 8.000000e+00})'}}
+// expected-note at -4 {{in call to '_mm256_mask_cvtpd_ps({9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00}, 3, {-1.000000e+00, 2.000000e+00, INF, 8.000000e+00})'}}
+}
+namespace MaskOffNaN {
+// Note: 256-bit masked operations use selectps, which evaluates ALL lanes before masking
+// So even masked-off Inf/NaN values cause errors (architectural limitation)
+constexpr __m128 src = { 9.0f, 9.0f, 9.0f, 9.0f };
+constexpr __m256d a_nan = { -1.0, +2.0, +4.0, __builtin_nan("") };
+constexpr __m128 r = _mm256_mask_cvtpd_ps(src, 0x7, a_nan);
+// expected-error at -1 {{must be initialized by a constant expression}}
+// expected-note at avxintrin.h:* {{floating point arithmetic produces a NaN}}
+// expected-note at avx512vlintrin.h:* {{in call to '_mm256_cvtpd_ps({-1.000000e+00, 2.000000e+00, 4.000000e+00, nan})'}}
+// expected-note at -4 {{in call to '_mm256_mask_cvtpd_ps({9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00}, 7, {-1.000000e+00, 2.000000e+00, 4.000000e+00, nan})'}}
+}
+}
-constexpr __m128 ok128z = _mm_maskz_cvtpd_ps(0x1, d2_inexact);
-static_assert(ok128z[0] == -1.0f && ok128z[1] == 0.0f, "");
+namespace Test_mm256_maskz_cvtpd_ps {
+namespace OK {
+constexpr __m256d a = { 0.0, -1.0, +2.0, +3.5 };
+constexpr __m128 r = _mm256_maskz_cvtpd_ps(0x5, a);
+static_assert(r[0] == 0.0f && r[1] == 0.0f && r[2] == +2.0f && r[3] == 0.0f, "");
+}
+namespace MaskOffInf {
+// Note: 256-bit masked operations use selectps, which evaluates ALL lanes before masking
+// So even masked-off Inf/NaN values cause errors (architectural limitation)
+constexpr __m256d a_inf = { -1.0, +2.0, __builtin_huge_val(), +8.0 };
+constexpr __m128 r = _mm256_maskz_cvtpd_ps(0x3, a_inf);
+// expected-error at -1 {{must be initialized by a constant expression}}
+// expected-note at avxintrin.h:* {{floating point arithmetic produces an infinity}}
+// expected-note at avx512vlintrin.h:* {{in call to '_mm256_cvtpd_ps({-1.000000e+00, 2.000000e+00, INF, 8.000000e+00})'}}
+// expected-note at -4 {{in call to '_mm256_maskz_cvtpd_ps(3, {-1.000000e+00, 2.000000e+00, INF, 8.000000e+00})'}}
+}
+namespace MaskOffNaN {
+// Note: 256-bit masked operations use selectps, which evaluates ALL lanes before masking
+// So even masked-off Inf/NaN values cause errors (architectural limitation)
+constexpr __m256d a_nan = { -1.0, +2.0, +4.0, __builtin_nan("") };
+constexpr __m128 r = _mm256_maskz_cvtpd_ps(0x7, a_nan);
+// expected-error at -1 {{must be initialized by a constant expression}}
+// expected-note at avxintrin.h:* {{floating point arithmetic produces a NaN}}
+// expected-note at avx512vlintrin.h:* {{in call to '_mm256_cvtpd_ps({-1.000000e+00, 2.000000e+00, 4.000000e+00, nan})'}}
+// expected-note at -4 {{in call to '_mm256_maskz_cvtpd_ps(7, {-1.000000e+00, 2.000000e+00, 4.000000e+00, nan})'}}
+}
+}
-constexpr __m256d d4_inexact = { 0.0, 1.0000000000000002, 2.0, 3.0 };
-constexpr __m128 src_m = { 9.0f, 9.0f, 9.0f, 9.0f };
-constexpr __m128 ok256m = _mm256_mask_cvtpd_ps(src_m, 0b0101, d4_inexact);
-static_assert(ok256m[0] == 0.0f && ok256m[1] == 9.0f && ok256m[2] == 2.0f && ok256m[3] == 9.0f, "");
+namespace Test_mm512_cvtpd_ps {
+namespace OK {
+constexpr __m512d a = { -1.0, +2.0, +4.0, +8.0, +16.0, +32.0, +64.0, +128.0 };
+constexpr __m256 r = _mm512_cvtpd_ps(a);
+static_assert(r[0] == -1.0f && r[7] == +128.0f, "");
+}
+namespace Inexact {
+constexpr __m512d a = { 1.0000000000000002, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
+constexpr __m256 r = _mm512_cvtpd_ps(a);
+// expected-error at -1 {{must be initialized by a constant expression}}
+// expected-note at avx512fintrin.h:* {{compile time floating point arithmetic suppressed in strict evaluation modes}}
+// expected-note at -3 {{in call to '_mm512_cvtpd_ps({1.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00})'}}
+}
+}
-constexpr __m128 ok256z = _mm256_maskz_cvtpd_ps(0b0101, d4_inexact);
-static_assert(ok256z[0] == 0.0f && ok256z[1] == 0.0f && ok256z[2] == 2.0f && ok256z[3] == 0.0f, "");
+namespace Test_mm512_mask_cvtpd_ps {
+namespace OK {
+constexpr __m256 src = { 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f };
+constexpr __m512d a = { -1.0, +2.0, +4.0, +8.0, +16.0, +32.0, +64.0, +128.0 };
+constexpr __m256 r = _mm512_mask_cvtpd_ps(src, 0x05, a);
+static_assert(r[0] == -1.0f && r[2] == +4.0f, "");
+static_assert(r[1] == 9.0f && r[3] == 9.0f, "");
+}
+namespace MaskOffInexact {
+constexpr __m256 src = { 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f };
+constexpr __m512d a_inexact = { -1.0, +2.0, +4.0, +8.0, +16.0, 1.0000000000000002, +64.0, +128.0 };
+constexpr __m256 r = _mm512_mask_cvtpd_ps(src, 0b11011111, a_inexact);
+static_assert(r[0] == -1.0f && r[5] == 9.0f && r[6] == 64.0f && r[7] == 128.0f, "");
+}
+namespace MaskOffInf {
+constexpr __m256 src = { 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f };
+constexpr __m512d a_inf = { -1.0, +2.0, +4.0, +8.0, +16.0, __builtin_huge_val(), +64.0, +128.0 };
+constexpr __m256 r = _mm512_mask_cvtpd_ps(src, 0x1F, a_inf);
+static_assert(r[0] == -1.0f && r[4] == 16.0f && r[5] == 9.0f, "");
+}
+namespace MaskOffNaN {
+constexpr __m256 src = { 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f };
+constexpr __m512d a_nan = { -1.0, +2.0, +4.0, +8.0, +16.0, __builtin_nan(""), +64.0, +128.0 };
+constexpr __m256 r = _mm512_mask_cvtpd_ps(src, 0x1F, a_nan);
+static_assert(r[0] == -1.0f && r[4] == 16.0f && r[5] == 9.0f, "");
+}
+namespace MaskOnInf {
+constexpr __m256 src = { 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f };
+constexpr __m512d a_inf = { -1.0, +2.0, +4.0, __builtin_huge_val(), +16.0, +32.0, +64.0, +128.0 };
+constexpr __m256 r = _mm512_mask_cvtpd_ps(src, 0x08, a_inf);
+// expected-error at -1 {{must be initialized by a constant expression}}
+// expected-note at avx512fintrin.h:* {{floating point arithmetic produces an infinity}}
+// expected-note at -3 {{in call to '_mm512_mask_cvtpd_ps({9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00}, 8, {-1.000000e+00, 2.000000e+00, 4.000000e+00, INF, 1.600000e+01, 3.200000e+01, 6.400000e+01, 1.280000e+02})'}}
+}
+namespace MaskOnNaN {
+constexpr __m256 src = { 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f };
+constexpr __m512d a_nan = { -1.0, +2.0, +4.0, __builtin_nan(""), +16.0, +32.0, +64.0, +128.0 };
+constexpr __m256 r = _mm512_mask_cvtpd_ps(src, 0x08, a_nan);
+// expected-error at -1 {{must be initialized by a constant expression}}
+// expected-note at avx512fintrin.h:* {{floating point arithmetic produces a NaN}}
+// expected-note at -3 {{in call to '_mm512_mask_cvtpd_ps({9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00}, 8, {-1.000000e+00, 2.000000e+00, 4.000000e+00, nan, 1.600000e+01, 3.200000e+01, 6.400000e+01, 1.280000e+02})'}}
+}
+}
-constexpr __m512d d8_inexact = { -1.0, 2.0, 4.0, 8.0, 16.0, 1.0000000000000002, 64.0, 128.0 };
-constexpr __m256 src256b = { 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f };
-constexpr __m256 ok512m = _mm512_mask_cvtpd_ps(src256b, 0b110111, d8_inexact);
-static_assert(ok512m[0] == -1.0f && ok512m[5] == 9.0f && ok512m[7] == 128.0f, "");
+namespace Test_mm512_maskz_cvtpd_ps {
+namespace OK {
+constexpr __m512d a = { -1.0, +2.0, +4.0, +8.0, +16.0, +32.0, +64.0, +128.0 };
+constexpr __m256 r = _mm512_maskz_cvtpd_ps(0x81, a);
+static_assert(r[0] == -1.0f && r[7] == +128.0f, "");
+static_assert(r[1] == 0.0f && r[6] == 0.0f, "");
+}
+namespace MaskOffInexact {
+constexpr __m512d a_inexact = { -1.0, +2.0, +4.0, +8.0, +16.0, 1.0000000000000002, +64.0, +128.0 };
+constexpr __m256 r = _mm512_maskz_cvtpd_ps(0b11011111, a_inexact);
+static_assert(r[0] == -1.0f && r[5] == 0.0f && r[6] == 64.0f && r[7] == 128.0f, "");
+}
+namespace MaskOffInf {
+constexpr __m512d a_inf = { -1.0, +2.0, +4.0, +8.0, +16.0, __builtin_huge_val(), +64.0, +128.0 };
+constexpr __m256 r = _mm512_maskz_cvtpd_ps(0x1F, a_inf);
+static_assert(r[0] == -1.0f && r[4] == 16.0f && r[5] == 0.0f, "");
+}
+namespace MaskOffNaN {
+constexpr __m512d a_nan = { -1.0, +2.0, +4.0, +8.0, +16.0, __builtin_nan(""), +64.0, +128.0 };
+constexpr __m256 r = _mm512_maskz_cvtpd_ps(0x1F, a_nan);
+static_assert(r[0] == -1.0f && r[4] == 16.0f && r[5] == 0.0f, "");
+}
+namespace MaskOnInf {
+constexpr __m512d a_inf = { -1.0, +2.0, +4.0, __builtin_huge_val(), +16.0, +32.0, +64.0, +128.0 };
+constexpr __m256 r = _mm512_maskz_cvtpd_ps(0x08, a_inf);
+// expected-error at -1 {{must be initialized by a constant expression}}
+// expected-note at avx512fintrin.h:* {{floating point arithmetic produces an infinity}}
+// expected-note at -3 {{in call to '_mm512_maskz_cvtpd_ps(8, {-1.000000e+00, 2.000000e+00, 4.000000e+00, INF, 1.600000e+01, 3.200000e+01, 6.400000e+01, 1.280000e+02})'}}
+}
+namespace MaskOnNaN {
+constexpr __m512d a_nan = { -1.0, +2.0, +4.0, __builtin_nan(""), +16.0, +32.0, +64.0, +128.0 };
+constexpr __m256 r = _mm512_maskz_cvtpd_ps(0x08, a_nan);
+// expected-error at -1 {{must be initialized by a constant expression}}
+// expected-note at avx512fintrin.h:* {{floating point arithmetic produces a NaN}}
+// expected-note at -3 {{in call to '_mm512_maskz_cvtpd_ps(8, {-1.000000e+00, 2.000000e+00, 4.000000e+00, nan, 1.600000e+01, 3.200000e+01, 6.400000e+01, 1.280000e+02})'}}
+}
+}
-constexpr __m256 ok512z = _mm512_maskz_cvtpd_ps(0b110111, d8_inexact);
-static_assert(ok512z[5] == 0.0f && ok512z[0] == -1.0f && ok512z[7] == 128.0f, "");
+namespace Test_mm512_cvtpd_pslo {
+namespace OK {
+constexpr __m512d a = { -1.0, +2.0, +4.0, +8.0, +16.0, +32.0, +64.0, +128.0 };
+constexpr __m512 r = _mm512_cvtpd_pslo(a);
+static_assert(r[0] == -1.0f && r[7] == +128.0f, "");
+static_assert(r[8] == 0.0f && r[15] == 0.0f, "");
+}
+}
-constexpr __m128 bad128 = _mm_mask_cvtpd_ps(src128a, 0x2, d2_inexact); // both-error {{not an integral constant expression}}
-static_assert(bad128[1] == 9.0f, ""); // both-note {{subexpression not valid in a constant expression}}
+namespace Test_mm512_mask_cvtpd_pslo {
+namespace OK {
+constexpr __m512 src = (__m512){ 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,
+ 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f };
+constexpr __m512d a = { -1.0, +2.0, +4.0, +8.0, +16.0, +32.0, +64.0, +128.0 };
+constexpr __m512 r = _mm512_mask_cvtpd_pslo(src, 0x3, a);
+static_assert(r[0] == -1.0f && r[1] == +2.0f, "");
+static_assert(r[2] == 9.0f && r[3] == 9.0f, "");
+}
+namespace MaskOffInf {
+constexpr __m512 src = (__m512){ 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,
+ 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f };
+constexpr __m512d a_inf = { -1.0, +2.0, __builtin_huge_val(), +8.0, +16.0, +32.0, +64.0, +128.0 };
+constexpr __m512 r = _mm512_mask_cvtpd_pslo(src, 0x3, a_inf);
+static_assert(r[0] == -1.0f && r[1] == +2.0f && r[2] == 9.0f, "");
+}
+namespace MaskOffNaN {
+constexpr __m512 src = (__m512){ 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,
+ 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f };
+constexpr __m512d a_nan = { -1.0, +2.0, +4.0, __builtin_nan(""), +16.0, +32.0, +64.0, +128.0 };
+constexpr __m512 r = _mm512_mask_cvtpd_pslo(src, 0x7, a_nan);
+static_assert(r[0] == -1.0f && r[1] == +2.0f && r[2] == 4.0f && r[3] == 9.0f, "");
+}
+namespace MaskOnInf {
+constexpr __m512 src = (__m512){ 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,
+ 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f };
+constexpr __m512d a_inf = { -1.0, +2.0, __builtin_huge_val(), +8.0, +16.0, +32.0, +64.0, +128.0 };
+constexpr __m512 r = _mm512_mask_cvtpd_pslo(src, 0x4, a_inf);
+// expected-error at -1 {{must be initialized by a constant expression}}
+// expected-note at avx512fintrin.h:* {{floating point arithmetic produces an infinity}}
+// expected-note at avx512fintrin.h:* {{in call to '_mm512_mask_cvtpd_ps({9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00}, 4, {-1.000000e+00, 2.000000e+00, INF, 8.000000e+00, 1.600000e+01, 3.200000e+01, 6.400000e+01, 1.280000e+02})'}}
+// expected-note at -4 {{in call to '_mm512_mask_cvtpd_pslo({9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00}, 4, {-1.000000e+00, 2.000000e+00, INF, 8.000000e+00, 1.600000e+01, 3.200000e+01, 6.400000e+01, 1.280000e+02})'}}
+}
+namespace MaskOnNaN {
+constexpr __m512 src = (__m512){ 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,
+ 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f };
+constexpr __m512d a_nan = { -1.0, +2.0, __builtin_nan(""), +8.0, +16.0, +32.0, +64.0, +128.0 };
+constexpr __m512 r = _mm512_mask_cvtpd_pslo(src, 0x4, a_nan);
+// expected-error at -1 {{must be initialized by a constant expression}}
+// expected-note at avx512fintrin.h:* {{floating point arithmetic produces a NaN}}
+// expected-note at avx512fintrin.h:* {{in call to '_mm512_mask_cvtpd_ps({9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00}, 4, {-1.000000e+00, 2.000000e+00, nan, 8.000000e+00, 1.600000e+01, 3.200000e+01, 6.400000e+01, 1.280000e+02})'}}
+// expected-note at -4 {{in call to '_mm512_mask_cvtpd_pslo({9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00}, 4, {-1.000000e+00, 2.000000e+00, nan, 8.000000e+00, 1.600000e+01, 3.200000e+01, 6.400000e+01, 1.280000e+02})'}}
+}
}
>From 4a2f59bee574efec48ac87e74dae356dc72fb2ae Mon Sep 17 00:00:00 2001
From: Hamza Hassanain <hamzahassanain067 at gmail.com>
Date: Sat, 29 Nov 2025 11:09:57 +0200
Subject: [PATCH 05/24] fully implmeneted features in ExprConstant visiting
logic
---
clang/lib/AST/ExprConstant.cpp | 143 +++++++++++++++++++++++++++++++++
1 file changed, 143 insertions(+)
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 3b91678f7d400..065d5c2e33a9c 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -12165,7 +12165,37 @@ static bool evalShuffleGeneric(
Out = APValue(ResultElements.data(), ResultElements.size());
return true;
}
+static bool ConvertDoubleToFloatStrict(EvalInfo &Info, const Expr *E,
+ APFloat OrigVal, APValue &Result) {
+ if (OrigVal.isInfinity()) {
+ Info.CCEDiag(E, diag::note_constexpr_float_arithmetic) << 0;
+ return false;
+ }
+ if (OrigVal.isNaN()) {
+ Info.CCEDiag(E, diag::note_constexpr_float_arithmetic) << 1;
+ return false;
+ }
+
+ APFloat Val = OrigVal;
+ bool LosesInfo = false;
+ APFloat::opStatus Status = Val.convert(APFloat::IEEEsingle(),
+ APFloat::rmNearestTiesToEven,
+ &LosesInfo);
+
+ if(LosesInfo || Val.isDenormal()) {
+ Info.CCEDiag(E, diag::note_constexpr_float_arithmetic_strict);
+ return false;
+ }
+
+ if(Status != APFloat::opOK) {
+ Info.CCEDiag(E, diag::note_invalid_subexpr_in_const_expr);
+ return false;
+ }
+
+ Result = APValue(Val);
+ return true;
+}
bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
if (!IsConstantEvaluatedBuiltinCall(E))
return ExprEvaluatorBaseTy::VisitCallExpr(E);
@@ -12878,6 +12908,119 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
return Success(APValue(ResultElements.data(), ResultElements.size()), E);
}
+
+ case X86::BI__builtin_ia32_cvtsd2ss: {
+ APValue VecA, VecB;
+ if (!EvaluateAsRValue(Info, E->getArg(0), VecA) ||
+ !EvaluateAsRValue(Info, E->getArg(1), VecB))
+ return false;
+
+ SmallVector<APValue, 4> Elements;
+
+ APValue ResultVal;
+ if (!ConvertDoubleToFloatStrict(Info, E, VecB.getVectorElt(0).getFloat(), ResultVal))
+ return false;
+
+ Elements.push_back(ResultVal);
+
+ unsigned NumEltsA = VecA.getVectorLength();
+ for (unsigned I = 1; I < NumEltsA; ++I) {
+ Elements.push_back(VecA.getVectorElt(I));
+ }
+
+ return Success(Elements, E);
+ }
+ case X86::BI__builtin_ia32_cvtsd2ss_round_mask: {
+ APValue VecA, VecB, VecSrc, MaskValue;
+
+ if (!EvaluateAsRValue(Info, E->getArg(0), VecA) ||
+ !EvaluateAsRValue(Info, E->getArg(1), VecB) ||
+ !EvaluateAsRValue(Info, E->getArg(2), VecSrc) ||
+ !EvaluateAsRValue(Info, E->getArg(3), MaskValue))
+ return false;
+
+ unsigned Mask = MaskValue.getInt().getZExtValue();
+ SmallVector<APValue, 4> Elements;
+
+
+ if (Mask & 1) {
+ APValue ResultVal;
+ if (!ConvertDoubleToFloatStrict(Info, E, VecB.getVectorElt(0).getFloat(), ResultVal))
+ return false;
+ Elements.push_back(ResultVal);
+ } else {
+ Elements.push_back(VecSrc.getVectorElt(0));
+ }
+
+ unsigned NumEltsA = VecA.getVectorLength();
+ for (unsigned I = 1; I < NumEltsA; ++I) {
+ Elements.push_back(VecA.getVectorElt(I));
+ }
+
+ return Success(Elements, E);
+ }
+ case X86::BI__builtin_ia32_cvtpd2ps:
+ case X86::BI__builtin_ia32_cvtpd2ps256:
+ case X86::BI__builtin_ia32_cvtpd2ps_mask:
+ case X86::BI__builtin_ia32_cvtpd2ps512_mask: {
+
+
+ const auto BuiltinID = E->getBuiltinCallee();
+ bool IsMasked = (BuiltinID == X86::BI__builtin_ia32_cvtpd2ps_mask ||
+ BuiltinID == X86::BI__builtin_ia32_cvtpd2ps512_mask);
+
+ APValue InputValue;
+ if (!EvaluateAsRValue(Info, E->getArg(0), InputValue))
+ return false;
+
+ APValue MergeValue;
+ unsigned Mask = 0xFFFFFFFF;
+ bool NeedsMerge = false;
+ if (IsMasked) {
+ APValue MaskValue;
+ if (!EvaluateAsRValue(Info, E->getArg(2), MaskValue))
+ return false;
+ Mask = MaskValue.getInt().getZExtValue();
+ auto NumEltsResult = E->getType()->getAs<VectorType>()->getNumElements();
+ for (unsigned I = 0; I < NumEltsResult; ++I) {
+ if (!((Mask >> I) & 1)) {
+ NeedsMerge = true;
+ break;
+ }
+ }
+ if (NeedsMerge) {
+ if (!EvaluateAsRValue(Info, E->getArg(1), MergeValue))
+ return false;
+ }
+ }
+
+ unsigned NumEltsResult = E->getType()->getAs<VectorType>()->getNumElements();
+ unsigned NumEltsInput = InputValue.getVectorLength();
+ SmallVector<APValue, 8> Elements;
+ for (unsigned I = 0; I < NumEltsResult; ++I) {
+ if (IsMasked && !((Mask >> I) & 1)) {
+ if (!NeedsMerge) {
+ return false;
+ }
+ Elements.push_back(MergeValue.getVectorElt(I));
+ continue;
+ }
+
+ if (I >= NumEltsInput) {
+ Elements.push_back(APValue(APFloat::getZero(APFloat::IEEEsingle())));
+ continue;
+ }
+
+ APValue ResultVal;
+ if (!ConvertDoubleToFloatStrict(Info, E, InputValue.getVectorElt(I).getFloat(), ResultVal))
+ return false;
+
+ Elements.push_back(ResultVal);
+ }
+ return Success(Elements, E);
+ }
+
+
case X86::BI__builtin_ia32_shufps:
case X86::BI__builtin_ia32_shufps256:
case X86::BI__builtin_ia32_shufps512: {
>From 0fb3292fe860e30de61d2df3a90912f27f04f143 Mon Sep 17 00:00:00 2001
From: Hamza Hassanain <hamzahassanain067 at gmail.com>
Date: Sat, 29 Nov 2025 11:21:17 +0200
Subject: [PATCH 06/24] Ran the git clang-format command
---
clang/lib/AST/ExprConstant.cpp | 192 ++++++++++++++++-----------------
1 file changed, 96 insertions(+), 96 deletions(-)
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 065d5c2e33a9c..6f512dd538e7d 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -12169,7 +12169,7 @@ static bool ConvertDoubleToFloatStrict(EvalInfo &Info, const Expr *E,
APFloat OrigVal, APValue &Result) {
if (OrigVal.isInfinity()) {
- Info.CCEDiag(E, diag::note_constexpr_float_arithmetic) << 0;
+ Info.CCEDiag(E, diag::note_constexpr_float_arithmetic) << 0;
return false;
}
if (OrigVal.isNaN()) {
@@ -12177,18 +12177,17 @@ static bool ConvertDoubleToFloatStrict(EvalInfo &Info, const Expr *E,
return false;
}
- APFloat Val = OrigVal;
+ APFloat Val = OrigVal;
bool LosesInfo = false;
- APFloat::opStatus Status = Val.convert(APFloat::IEEEsingle(),
- APFloat::rmNearestTiesToEven,
- &LosesInfo);
+ APFloat::opStatus Status = Val.convert(
+ APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven, &LosesInfo);
- if(LosesInfo || Val.isDenormal()) {
+ if (LosesInfo || Val.isDenormal()) {
Info.CCEDiag(E, diag::note_constexpr_float_arithmetic_strict);
return false;
}
- if(Status != APFloat::opOK) {
+ if (Status != APFloat::opOK) {
Info.CCEDiag(E, diag::note_invalid_subexpr_in_const_expr);
return false;
}
@@ -12909,118 +12908,119 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
return Success(APValue(ResultElements.data(), ResultElements.size()), E);
}
- case X86::BI__builtin_ia32_cvtsd2ss: {
- APValue VecA, VecB;
- if (!EvaluateAsRValue(Info, E->getArg(0), VecA) ||
- !EvaluateAsRValue(Info, E->getArg(1), VecB))
- return false;
+ case X86::BI__builtin_ia32_cvtsd2ss: {
+ APValue VecA, VecB;
+ if (!EvaluateAsRValue(Info, E->getArg(0), VecA) ||
+ !EvaluateAsRValue(Info, E->getArg(1), VecB))
+ return false;
- SmallVector<APValue, 4> Elements;
+ SmallVector<APValue, 4> Elements;
- APValue ResultVal;
- if (!ConvertDoubleToFloatStrict(Info, E, VecB.getVectorElt(0).getFloat(), ResultVal))
- return false;
-
- Elements.push_back(ResultVal);
+ APValue ResultVal;
+ if (!ConvertDoubleToFloatStrict(Info, E, VecB.getVectorElt(0).getFloat(),
+ ResultVal))
+ return false;
- unsigned NumEltsA = VecA.getVectorLength();
- for (unsigned I = 1; I < NumEltsA; ++I) {
- Elements.push_back(VecA.getVectorElt(I));
- }
+ Elements.push_back(ResultVal);
- return Success(Elements, E);
+ unsigned NumEltsA = VecA.getVectorLength();
+ for (unsigned I = 1; I < NumEltsA; ++I) {
+ Elements.push_back(VecA.getVectorElt(I));
}
- case X86::BI__builtin_ia32_cvtsd2ss_round_mask: {
- APValue VecA, VecB, VecSrc, MaskValue;
-
- if (!EvaluateAsRValue(Info, E->getArg(0), VecA) ||
- !EvaluateAsRValue(Info, E->getArg(1), VecB) ||
- !EvaluateAsRValue(Info, E->getArg(2), VecSrc) ||
- !EvaluateAsRValue(Info, E->getArg(3), MaskValue))
- return false;
- unsigned Mask = MaskValue.getInt().getZExtValue();
- SmallVector<APValue, 4> Elements;
+ return Success(Elements, E);
+ }
+ case X86::BI__builtin_ia32_cvtsd2ss_round_mask: {
+ APValue VecA, VecB, VecSrc, MaskValue;
+ if (!EvaluateAsRValue(Info, E->getArg(0), VecA) ||
+ !EvaluateAsRValue(Info, E->getArg(1), VecB) ||
+ !EvaluateAsRValue(Info, E->getArg(2), VecSrc) ||
+ !EvaluateAsRValue(Info, E->getArg(3), MaskValue))
+ return false;
- if (Mask & 1) {
- APValue ResultVal;
- if (!ConvertDoubleToFloatStrict(Info, E, VecB.getVectorElt(0).getFloat(), ResultVal))
- return false;
- Elements.push_back(ResultVal);
- } else {
- Elements.push_back(VecSrc.getVectorElt(0));
- }
+ unsigned Mask = MaskValue.getInt().getZExtValue();
+ SmallVector<APValue, 4> Elements;
- unsigned NumEltsA = VecA.getVectorLength();
- for (unsigned I = 1; I < NumEltsA; ++I) {
- Elements.push_back(VecA.getVectorElt(I));
- }
+ if (Mask & 1) {
+ APValue ResultVal;
+ if (!ConvertDoubleToFloatStrict(Info, E, VecB.getVectorElt(0).getFloat(),
+ ResultVal))
+ return false;
+ Elements.push_back(ResultVal);
+ } else {
+ Elements.push_back(VecSrc.getVectorElt(0));
+ }
- return Success(Elements, E);
+ unsigned NumEltsA = VecA.getVectorLength();
+ for (unsigned I = 1; I < NumEltsA; ++I) {
+ Elements.push_back(VecA.getVectorElt(I));
}
- case X86::BI__builtin_ia32_cvtpd2ps:
- case X86::BI__builtin_ia32_cvtpd2ps256:
- case X86::BI__builtin_ia32_cvtpd2ps_mask:
- case X86::BI__builtin_ia32_cvtpd2ps512_mask: {
+ return Success(Elements, E);
+ }
+ case X86::BI__builtin_ia32_cvtpd2ps:
+ case X86::BI__builtin_ia32_cvtpd2ps256:
+ case X86::BI__builtin_ia32_cvtpd2ps_mask:
+ case X86::BI__builtin_ia32_cvtpd2ps512_mask: {
- const auto BuiltinID = E->getBuiltinCallee();
- bool IsMasked = (BuiltinID == X86::BI__builtin_ia32_cvtpd2ps_mask ||
- BuiltinID == X86::BI__builtin_ia32_cvtpd2ps512_mask);
+ const auto BuiltinID = E->getBuiltinCallee();
+ bool IsMasked = (BuiltinID == X86::BI__builtin_ia32_cvtpd2ps_mask ||
+ BuiltinID == X86::BI__builtin_ia32_cvtpd2ps512_mask);
- APValue InputValue;
- if (!EvaluateAsRValue(Info, E->getArg(0), InputValue))
- return false;
-
- APValue MergeValue;
- unsigned Mask = 0xFFFFFFFF;
- bool NeedsMerge = false;
- if (IsMasked) {
- APValue MaskValue;
- if (!EvaluateAsRValue(Info, E->getArg(2), MaskValue))
- return false;
- Mask = MaskValue.getInt().getZExtValue();
- auto NumEltsResult = E->getType()->getAs<VectorType>()->getNumElements();
- for (unsigned I = 0; I < NumEltsResult; ++I) {
- if (!((Mask >> I) & 1)) {
- NeedsMerge = true;
- break;
- }
- }
- if (NeedsMerge) {
- if (!EvaluateAsRValue(Info, E->getArg(1), MergeValue))
- return false;
- }
- }
+ APValue InputValue;
+ if (!EvaluateAsRValue(Info, E->getArg(0), InputValue))
+ return false;
- unsigned NumEltsResult = E->getType()->getAs<VectorType>()->getNumElements();
- unsigned NumEltsInput = InputValue.getVectorLength();
- SmallVector<APValue, 8> Elements;
+ APValue MergeValue;
+ unsigned Mask = 0xFFFFFFFF;
+ bool NeedsMerge = false;
+ if (IsMasked) {
+ APValue MaskValue;
+ if (!EvaluateAsRValue(Info, E->getArg(2), MaskValue))
+ return false;
+ Mask = MaskValue.getInt().getZExtValue();
+ auto NumEltsResult = E->getType()->getAs<VectorType>()->getNumElements();
for (unsigned I = 0; I < NumEltsResult; ++I) {
- if (IsMasked && !((Mask >> I) & 1)) {
- if (!NeedsMerge) {
- return false;
- }
- Elements.push_back(MergeValue.getVectorElt(I));
- continue;
+ if (!((Mask >> I) & 1)) {
+ NeedsMerge = true;
+ break;
}
+ }
+ if (NeedsMerge) {
+ if (!EvaluateAsRValue(Info, E->getArg(1), MergeValue))
+ return false;
+ }
+ }
- if (I >= NumEltsInput) {
- Elements.push_back(APValue(APFloat::getZero(APFloat::IEEEsingle())));
- continue;
+ unsigned NumEltsResult =
+ E->getType()->getAs<VectorType>()->getNumElements();
+ unsigned NumEltsInput = InputValue.getVectorLength();
+ SmallVector<APValue, 8> Elements;
+ for (unsigned I = 0; I < NumEltsResult; ++I) {
+ if (IsMasked && !((Mask >> I) & 1)) {
+ if (!NeedsMerge) {
+ return false;
}
+ Elements.push_back(MergeValue.getVectorElt(I));
+ continue;
+ }
- APValue ResultVal;
- if (!ConvertDoubleToFloatStrict(Info, E, InputValue.getVectorElt(I).getFloat(), ResultVal))
- return false;
-
- Elements.push_back(ResultVal);
+ if (I >= NumEltsInput) {
+ Elements.push_back(APValue(APFloat::getZero(APFloat::IEEEsingle())));
+ continue;
}
- return Success(Elements, E);
+
+ APValue ResultVal;
+ if (!ConvertDoubleToFloatStrict(
+ Info, E, InputValue.getVectorElt(I).getFloat(), ResultVal))
+ return false;
+
+ Elements.push_back(ResultVal);
}
+ return Success(Elements, E);
+ }
-
case X86::BI__builtin_ia32_shufps:
case X86::BI__builtin_ia32_shufps256:
case X86::BI__builtin_ia32_shufps512: {
>From 75c76719bfe4116e79140388fd52fa47df8da96b Mon Sep 17 00:00:00 2001
From: Hamza Hassanain <hamzahassanain067 at gmail.com>
Date: Mon, 1 Dec 2025 06:44:04 +0200
Subject: [PATCH 07/24] removed constexpr form _mm512_undefined_ps
---
clang/lib/Headers/avx512fintrin.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h
index 63031c2fcfd82..85d54bc8eff8c 100644
--- a/clang/lib/Headers/avx512fintrin.h
+++ b/clang/lib/Headers/avx512fintrin.h
@@ -207,7 +207,7 @@ _mm512_undefined(void)
return (__m512)__builtin_ia32_undef512();
}
-static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
+static __inline__ __m512 __DEFAULT_FN_ATTRS512
_mm512_undefined_ps(void) {
return (__m512)__builtin_ia32_undef512();
}
>From 04dabc03228514825a07fa3648e3d8a646cdc33c Mon Sep 17 00:00:00 2001
From: Hamza Hassanain <hamzahassanain067 at gmail.com>
Date: Mon, 1 Dec 2025 11:07:51 +0200
Subject: [PATCH 08/24] added constexpr to __builtin_ia32_undef, and updated
BuiltinsX86.td
---
clang/include/clang/Basic/BuiltinsX86.td | 37 ++++++++++++++++--------
clang/lib/AST/ExprConstant.cpp | 24 +++++++++++++++
clang/lib/Headers/avx512fintrin.h | 3 +-
clang/lib/Headers/xmmintrin.h | 2 +-
4 files changed, 51 insertions(+), 15 deletions(-)
diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td
index df6ec01959bd4..097e980989941 100644
--- a/clang/include/clang/Basic/BuiltinsX86.td
+++ b/clang/include/clang/Basic/BuiltinsX86.td
@@ -21,15 +21,15 @@ def rdtscp : X86Builtin<"unsigned long long int(unsigned int*)">;
// Undefined Values
def undef128 : X86Builtin<"_Vector<2, double>()"> {
- let Attributes = [Const, NoThrow, RequiredVectorWidth<128>];
+ let Attributes = [Const, NoThrow, Constexpr, RequiredVectorWidth<128>];
}
-def undef256 : X86Builtin<"_Vector<4, double>()" > {
- let Attributes = [Const, Constexpr, NoThrow, RequiredVectorWidth<256>];
+def undef256 : X86Builtin<"_Vector<4, double>()"> {
+ let Attributes = [Const, NoThrow, Constexpr, RequiredVectorWidth<256>];
}
def undef512 : X86Builtin<"_Vector<8, double>()"> {
- let Attributes = [Const, Constexpr, NoThrow, RequiredVectorWidth<512>];
+ let Attributes = [Const, NoThrow, Constexpr, RequiredVectorWidth<512>];
}
// FLAGS
@@ -167,13 +167,19 @@ let Features = "sse2", Attributes = [NoThrow] in {
}
let Features = "sse2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
+ def cvtpd2ps : X86Builtin<"_Vector<4, float>(_Vector<2, double>)">;
+ def cvtsd2ss : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<2, double>)">;
+}
+let Features = "avx512f", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
+ def cvtsd2ss_round_mask : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<2, double>, _Vector<4, float>, unsigned char, _Constant int)">;
+}
+
+let Features = "sse2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
def psadbw128 : X86Builtin<"_Vector<2, long long int>(_Vector<16, char>, _Vector<16, char>)">;
def cvtpd2dq : X86Builtin<"_Vector<2, long long int>(_Vector<2, double>)">;
- def cvtpd2ps : X86Builtin<"_Vector<4, float>(_Vector<2, double>)">;
def cvttpd2dq : X86Builtin<"_Vector<4, int>(_Vector<2, double>)">;
def cvtsd2si : X86Builtin<"int(_Vector<2, double>)">;
def cvttsd2si : X86Builtin<"int(_Vector<2, double>)">;
- def cvtsd2ss : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<2, double>)">;
def cvtps2dq : X86Builtin<"_Vector<4, int>(_Vector<4, float>)">;
def cvttps2dq : X86Builtin<"_Vector<4, int>(_Vector<4, float>)">;
}
@@ -463,10 +469,13 @@ let Features = "avx", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWid
}
let Features = "avx", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
+ def cvtpd2ps256 : X86Builtin<"_Vector<4, float>(_Vector<4, double>)">;
+}
+
+let Features = "avx", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
def dpps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, _Constant char)">;
def cmppd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, double>, _Constant char)">;
def cmpps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, _Constant char)">;
- def cvtpd2ps256 : X86Builtin<"_Vector<4, float>(_Vector<4, double>)">;
def cvtps2dq256 : X86Builtin<"_Vector<8, int>(_Vector<8, float>)">;
def cvttpd2dq256 : X86Builtin<"_Vector<4, int>(_Vector<4, double>)">;
def cvtpd2dq256 : X86Builtin<"_Vector<4, int>(_Vector<4, double>)">;
@@ -474,7 +483,6 @@ let Features = "avx", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWid
def vperm2f128_pd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, double>, _Constant int)">;
def vperm2f128_ps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, _Constant int)">;
def vperm2f128_si256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>, _Constant int)">;
-
foreach Op = ["max", "min"] in {
def Op#pd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, double>)">;
def Op#ps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>)">;
@@ -1005,6 +1013,10 @@ let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128
}
let Features = "avx512f", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
+ def cvtpd2ps512_mask : X86Builtin<"_Vector<8, float>(_Vector<8, double>, _Vector<8, float>, unsigned char, _Constant int)">;
+}
+
+let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
def rndscaleps_mask : X86Builtin<"_Vector<16, float>(_Vector<16, float>, _Constant int, _Vector<16, float>, unsigned short, _Constant int)">;
def rndscalepd_mask : X86Builtin<"_Vector<8, double>(_Vector<8, double>, _Constant int, _Vector<8, double>, unsigned char, _Constant int)">;
def cvtps2dq512_mask : X86Builtin<"_Vector<16, int>(_Vector<16, float>, _Vector<16, int>, unsigned short, _Constant int)">;
@@ -1017,7 +1029,6 @@ let Features = "avx512f", Attributes = [NoThrow, Const, Constexpr, RequiredVecto
def maxpd512 : X86Builtin<"_Vector<8, double>(_Vector<8, double>, _Vector<8, double>, _Constant int)">;
def cvtdq2ps512_mask : X86Builtin<"_Vector<16, float>(_Vector<16, int>, _Vector<16, float>, unsigned short, _Constant int)">;
def cvtudq2ps512_mask : X86Builtin<"_Vector<16, float>(_Vector<16, int>, _Vector<16, float>, unsigned short, _Constant int)">;
- def cvtpd2ps512_mask : X86Builtin<"_Vector<8, float>(_Vector<8, double>, _Vector<8, float>, unsigned char, _Constant int)">;
def vcvtps2ph512_mask : X86Builtin<"_Vector<16, short>(_Vector<16, float>, _Constant int, _Vector<16, short>, unsigned short)">;
def vcvtph2ps512_mask : X86Builtin<"_Vector<16, float>(_Vector<16, short>, _Vector<16, float>, unsigned short, _Constant int)">;
}
@@ -1453,8 +1464,11 @@ let Features = "avx512vl", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
}
let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
- def cvtpd2dq128_mask : X86Builtin<"_Vector<4, int>(_Vector<2, double>, _Vector<4, int>, unsigned char)">;
def cvtpd2ps_mask : X86Builtin<"_Vector<4, float>(_Vector<2, double>, _Vector<4, float>, unsigned char)">;
+}
+
+let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+ def cvtpd2dq128_mask : X86Builtin<"_Vector<4, int>(_Vector<2, double>, _Vector<4, int>, unsigned char)">;
def cvtpd2udq128_mask : X86Builtin<"_Vector<4, int>(_Vector<2, double>, _Vector<4, int>, unsigned char)">;
}
@@ -3287,8 +3301,7 @@ let Features = "avx512bw,avx512vl",
def cvtw2mask256 : X86Builtin<"unsigned short(_Vector<16, short>)">;
}
-let Features = "avx512f", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
- def cvtsd2ss_round_mask : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<2, double>, _Vector<4, float>, unsigned char, _Constant int)">;
+let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
def cvtsi2ss32 : X86Builtin<"_Vector<4, float>(_Vector<4, float>, int, _Constant int)">;
def cvtss2sd_round_mask : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<4, float>, _Vector<2, double>, unsigned char, _Constant int)">;
def cvtusi2ss32 : X86Builtin<"_Vector<4, float>(_Vector<4, float>, unsigned int, _Constant int)">;
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index ee82398d7ac2a..0868237d52404 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -12954,6 +12954,30 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
return Success(APValue(ResultElements.data(), ResultElements.size()), E);
}
+ case X86::BI__builtin_ia32_undef128:
+ case X86::BI__builtin_ia32_undef256:
+ case X86::BI__builtin_ia32_undef512: {
+ // Undefined builtins return zero-initialized vectors in constexpr contexts
+ const auto *VTy = E->getType()->castAs<VectorType>();
+ unsigned NumElts = VTy->getNumElements();
+ QualType EltTy = VTy->getElementType();
+
+ SmallVector<APValue, 16> Elements;
+ Elements.reserve(NumElts);
+
+ if (EltTy->isIntegerType()) {
+ APSInt Zero(Info.Ctx.getTypeSize(EltTy), EltTy->isUnsignedIntegerType());
+ for (unsigned I = 0; I < NumElts; ++I)
+ Elements.push_back(APValue(Zero));
+ } else {
+ APFloat Zero(Info.Ctx.getFloatTypeSemantics(EltTy));
+ for (unsigned I = 0; I < NumElts; ++I)
+ Elements.push_back(APValue(Zero));
+ }
+
+ return Success(APValue(Elements.data(), Elements.size()), E);
+ }
+
case X86::BI__builtin_ia32_cvtsd2ss: {
APValue VecA, VecB;
if (!EvaluateAsRValue(Info, E->getArg(0), VecA) ||
diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h
index 85d54bc8eff8c..9dcc4bea24a37 100644
--- a/clang/lib/Headers/avx512fintrin.h
+++ b/clang/lib/Headers/avx512fintrin.h
@@ -207,8 +207,7 @@ _mm512_undefined(void)
return (__m512)__builtin_ia32_undef512();
}
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
-_mm512_undefined_ps(void) {
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_undefined_ps(void) {
return (__m512)__builtin_ia32_undef512();
}
diff --git a/clang/lib/Headers/xmmintrin.h b/clang/lib/Headers/xmmintrin.h
index 72a643948bed6..b6487bed9facb 100644
--- a/clang/lib/Headers/xmmintrin.h
+++ b/clang/lib/Headers/xmmintrin.h
@@ -1892,7 +1892,7 @@ _mm_loadr_ps(const float *__p)
/// This intrinsic has no corresponding instruction.
///
/// \returns A 128-bit vector of [4 x float] containing undefined values.
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR
_mm_undefined_ps(void)
{
return (__m128)__builtin_ia32_undef128();
>From 880b06029a68e75722326ebd62afb153b5724664 Mon Sep 17 00:00:00 2001
From: Hamza Hassanain <hamzahassanain067 at gmail.com>
Date: Mon, 1 Dec 2025 12:22:01 +0200
Subject: [PATCH 09/24] Removed undef as constexpr and used _mm256_setzero_ps
instead
---
clang/include/clang/Basic/BuiltinsX86.td | 6 +++---
clang/lib/AST/ExprConstant.cpp | 23 -----------------------
clang/lib/Headers/avx512fintrin.h | 4 ++--
clang/lib/Headers/avxintrin.h | 2 +-
clang/lib/Headers/xmmintrin.h | 2 +-
5 files changed, 7 insertions(+), 30 deletions(-)
diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td
index 097e980989941..9754f839fc803 100644
--- a/clang/include/clang/Basic/BuiltinsX86.td
+++ b/clang/include/clang/Basic/BuiltinsX86.td
@@ -21,15 +21,15 @@ def rdtscp : X86Builtin<"unsigned long long int(unsigned int*)">;
// Undefined Values
def undef128 : X86Builtin<"_Vector<2, double>()"> {
- let Attributes = [Const, NoThrow, Constexpr, RequiredVectorWidth<128>];
+ let Attributes = [Const, NoThrow, RequiredVectorWidth<128>];
}
def undef256 : X86Builtin<"_Vector<4, double>()"> {
- let Attributes = [Const, NoThrow, Constexpr, RequiredVectorWidth<256>];
+ let Attributes = [Const, NoThrow, RequiredVectorWidth<256>];
}
def undef512 : X86Builtin<"_Vector<8, double>()"> {
- let Attributes = [Const, NoThrow, Constexpr, RequiredVectorWidth<512>];
+ let Attributes = [Const, NoThrow, RequiredVectorWidth<512>];
}
// FLAGS
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 0868237d52404..c0a719e578332 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -12954,29 +12954,6 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
return Success(APValue(ResultElements.data(), ResultElements.size()), E);
}
- case X86::BI__builtin_ia32_undef128:
- case X86::BI__builtin_ia32_undef256:
- case X86::BI__builtin_ia32_undef512: {
- // Undefined builtins return zero-initialized vectors in constexpr contexts
- const auto *VTy = E->getType()->castAs<VectorType>();
- unsigned NumElts = VTy->getNumElements();
- QualType EltTy = VTy->getElementType();
-
- SmallVector<APValue, 16> Elements;
- Elements.reserve(NumElts);
-
- if (EltTy->isIntegerType()) {
- APSInt Zero(Info.Ctx.getTypeSize(EltTy), EltTy->isUnsignedIntegerType());
- for (unsigned I = 0; I < NumElts; ++I)
- Elements.push_back(APValue(Zero));
- } else {
- APFloat Zero(Info.Ctx.getFloatTypeSemantics(EltTy));
- for (unsigned I = 0; I < NumElts; ++I)
- Elements.push_back(APValue(Zero));
- }
-
- return Success(APValue(Elements.data(), Elements.size()), E);
- }
case X86::BI__builtin_ia32_cvtsd2ss: {
APValue VecA, VecB;
diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h
index 9dcc4bea24a37..edcbdba908522 100644
--- a/clang/lib/Headers/avx512fintrin.h
+++ b/clang/lib/Headers/avx512fintrin.h
@@ -207,7 +207,7 @@ _mm512_undefined(void)
return (__m512)__builtin_ia32_undef512();
}
-static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_undefined_ps(void) {
+static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_undefined_ps(void) {
return (__m512)__builtin_ia32_undef512();
}
@@ -3490,7 +3490,7 @@ _mm512_mask_cvtepu32lo_pd(__m512d __W, __mmask8 __U, __m512i __A) {
static __inline__ __m256 __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_cvtpd_ps(__m512d __A) {
return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
- (__v8sf) _mm256_undefined_ps (),
+ (__v8sf) _mm256_setzero_ps (),
(__mmask8) -1,
_MM_FROUND_CUR_DIRECTION);
}
diff --git a/clang/lib/Headers/avxintrin.h b/clang/lib/Headers/avxintrin.h
index 605e70307cfc9..126ba30bcca7e 100644
--- a/clang/lib/Headers/avxintrin.h
+++ b/clang/lib/Headers/avxintrin.h
@@ -3605,7 +3605,7 @@ _mm256_undefined_pd(void)
/// This intrinsic has no corresponding instruction.
///
/// \returns A 256-bit vector of [8 x float] containing undefined values.
-static __inline__ __m256 __DEFAULT_FN_ATTRS_CONSTEXPR
+static __inline__ __m256 __DEFAULT_FN_ATTRS
_mm256_undefined_ps(void) {
return (__m256)__builtin_ia32_undef256();
}
diff --git a/clang/lib/Headers/xmmintrin.h b/clang/lib/Headers/xmmintrin.h
index b6487bed9facb..72a643948bed6 100644
--- a/clang/lib/Headers/xmmintrin.h
+++ b/clang/lib/Headers/xmmintrin.h
@@ -1892,7 +1892,7 @@ _mm_loadr_ps(const float *__p)
/// This intrinsic has no corresponding instruction.
///
/// \returns A 128-bit vector of [4 x float] containing undefined values.
-static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR
+static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_undefined_ps(void)
{
return (__m128)__builtin_ia32_undef128();
>From 4efe60af5e75b7c51320b66a3bc764a34b757df3 Mon Sep 17 00:00:00 2001
From: Hamza Hassanain <hamzahassanain067 at gmail.com>
Date: Mon, 1 Dec 2025 12:41:57 +0200
Subject: [PATCH 10/24] Implemented InterpBuiltin cpp implmentaions
---
clang/lib/AST/ByteCode/InterpBuiltin.cpp | 176 +++++++++++++++++++++++
1 file changed, 176 insertions(+)
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index 8496b58105c7a..ad49eb14b911f 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -167,6 +167,37 @@ static llvm::APSInt convertBoolVectorToInt(const Pointer &Val) {
return Result;
}
+// Strict double -> float conversion used for X86 PD2PS/cvtsd2ss intrinsics.
+// Reject NaN/Inf/Subnormal inputs and any lossy/inexact conversions.
+static bool convertDoubleToFloatStrict(APFloat Src, Floating &Dst, InterpState &S,
+ const Expr *DiagExpr) {
+ if (Src.isInfinity()) {
+ if (S.diagnosing())
+ S.CCEDiag(DiagExpr, diag::note_constexpr_float_arithmetic) << 0;
+ return false;
+ }
+ if (Src.isNaN()) {
+ if (S.diagnosing())
+ S.CCEDiag(DiagExpr, diag::note_constexpr_float_arithmetic) << 1;
+ return false;
+ }
+ APFloat Val = Src;
+ bool LosesInfo = false;
+ APFloat::opStatus Status = Val.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven, &LosesInfo);
+ if (LosesInfo || Val.isDenormal()) {
+ if (S.diagnosing())
+ S.CCEDiag(DiagExpr, diag::note_constexpr_float_arithmetic_strict);
+ return false;
+ }
+ if (Status != APFloat::opOK) {
+ if (S.diagnosing())
+ S.CCEDiag(DiagExpr, diag::note_invalid_subexpr_in_const_expr);
+ return false;
+ }
+ Dst.copy(Val);
+ return true;
+}
+
static bool interp__builtin_is_constant_evaluated(InterpState &S, CodePtr OpPC,
const InterpFrame *Frame,
const CallExpr *Call) {
@@ -3359,6 +3390,140 @@ static bool interp__builtin_ia32_cvt_vec2mask(InterpState &S, CodePtr OpPC,
pushInteger(S, RetMask, Call->getType());
return true;
}
+static bool interp__builtin_ia32_cvtsd2ss(InterpState &S, CodePtr OpPC,
+ const CallExpr *Call) {
+ assert(Call->getNumArgs() == 2);
+
+ const Pointer &B = S.Stk.pop<Pointer>();
+ const Pointer &A = S.Stk.pop<Pointer>();
+ if (!CheckLoad(S, OpPC, A) || !CheckLoad(S, OpPC, B))
+ return false;
+
+ const auto *DstVTy = Call->getType()->castAs<VectorType>();
+ unsigned NumElems = DstVTy->getNumElements();
+ const Pointer &Dst = S.Stk.peek<Pointer>();
+
+ // Copy all elements from A to Dst
+ for (unsigned I = 0; I != NumElems; ++I)
+ Dst.elem<Floating>(I) = A.elem<Floating>(I);
+
+ // Convert element 0 from double to float
+ Floating Conv = S.allocFloat(
+ S.getASTContext().getFloatTypeSemantics(S.getASTContext().FloatTy));
+ APFloat SrcD = B.elem<Floating>(0).getAPFloat();
+ if (!convertDoubleToFloatStrict(SrcD, Conv, S, Call))
+ return false;
+ Dst.elem<Floating>(0) = Conv;
+
+ Dst.initializeAllElements();
+ return true;
+}
+
+static bool interp__builtin_ia32_cvtsd2ss_round_mask(InterpState &S,
+ CodePtr OpPC,
+ const CallExpr *Call) {
+ assert(Call->getNumArgs() == 5);
+
+ // Pop in reverse order: rounding, mask, src, b, a
+ APSInt Rounding = popToAPSInt(S, Call->getArg(4)->getType());
+ APSInt MaskInt = popToAPSInt(S, Call->getArg(3)->getType());
+ const Pointer &Src = S.Stk.pop<Pointer>();
+ const Pointer &B = S.Stk.pop<Pointer>();
+ const Pointer &A = S.Stk.pop<Pointer>();
+ if (!CheckLoad(S, OpPC, A) || !CheckLoad(S, OpPC, B) || !CheckLoad(S, OpPC, Src))
+ return false;
+
+ const auto *DstVTy = Call->getType()->castAs<VectorType>();
+ unsigned NumElems = DstVTy->getNumElements();
+ const Pointer &Dst = S.Stk.peek<Pointer>();
+
+ // Copy all elements from A to Dst
+ for (unsigned I = 0; I != NumElems; ++I)
+ Dst.elem<Floating>(I) = A.elem<Floating>(I);
+
+ // If mask bit 0 is set, convert element 0 from double to float; otherwise use Src
+ if (MaskInt.getZExtValue() & 0x1) {
+ Floating Conv = S.allocFloat(
+ S.getASTContext().getFloatTypeSemantics(S.getASTContext().FloatTy));
+ APFloat SrcD = B.elem<Floating>(0).getAPFloat();
+ if (!convertDoubleToFloatStrict(SrcD, Conv, S, Call))
+ return false;
+ Dst.elem<Floating>(0) = Conv;
+ } else {
+ Dst.elem<Floating>(0) = Src.elem<Floating>(0);
+ }
+
+ Dst.initializeAllElements();
+ return true;
+}
+
+static bool interp__builtin_ia32_cvtpd2ps(InterpState &S, CodePtr OpPC,
+ const CallExpr *Call,
+ unsigned BuiltinID) {
+ bool IsMasked = (BuiltinID == X86::BI__builtin_ia32_cvtpd2ps_mask ||
+ BuiltinID == X86::BI__builtin_ia32_cvtpd2ps512_mask);
+ bool HasRounding = (BuiltinID == X86::BI__builtin_ia32_cvtpd2ps512_mask);
+
+ APSInt MaskVal(1, false);
+ Pointer PassThrough;
+ Pointer SrcPd;
+ APSInt Rounding;
+
+ if (IsMasked) {
+ // Pop in reverse order
+ if (HasRounding) {
+ // For 512: rounding, mask, passthrough, source
+ Rounding = popToAPSInt(S, Call->getArg(3)->getType());
+ MaskVal = popToAPSInt(S, Call->getArg(2)->getType());
+ PassThrough = S.Stk.pop<Pointer>();
+ SrcPd = S.Stk.pop<Pointer>();
+ } else {
+ // For VL: mask, passthrough, source
+ MaskVal = popToAPSInt(S, Call->getArg(2)->getType());
+ PassThrough = S.Stk.pop<Pointer>();
+ SrcPd = S.Stk.pop<Pointer>();
+ }
+
+ if (!CheckLoad(S, OpPC, PassThrough))
+ return false;
+ } else {
+ // Pop source only
+ SrcPd = S.Stk.pop<Pointer>();
+ }
+
+ if (!CheckLoad(S, OpPC, SrcPd))
+ return false;
+
+ const auto *RetVTy = Call->getType()->castAs<VectorType>();
+ unsigned RetElems = RetVTy->getNumElements();
+ unsigned SrcElems = SrcPd.getNumElems();
+ const Pointer &Dst = S.Stk.peek<Pointer>();
+
+ // Initialize destination with passthrough or zeros
+ for (unsigned I = 0; I != RetElems; ++I) {
+ if (IsMasked) {
+ Dst.elem<Floating>(I) = PassThrough.elem<Floating>(I);
+ } else {
+ Dst.elem<Floating>(I) = Floating(APFloat(0.0f));
+ }
+ }
+
+ // Convert double to float for enabled elements (only process source elements that exist)
+ for (unsigned I = 0; I != SrcElems; ++I) {
+ if (IsMasked && (((MaskVal.getZExtValue() >> I) & 0x1) == 0))
+ continue;
+
+ APFloat SrcD = SrcPd.elem<Floating>(I).getAPFloat();
+ Floating Conv = S.allocFloat(
+ S.getASTContext().getFloatTypeSemantics(S.getASTContext().FloatTy));
+ if (!convertDoubleToFloatStrict(SrcD, Conv, S, Call))
+ return false;
+ Dst.elem<Floating>(I) = Conv;
+ }
+
+ Dst.initializeAllElements();
+ return true;
+}
static bool interp__builtin_ia32_shuffle_generic(
InterpState &S, CodePtr OpPC, const CallExpr *Call,
@@ -5169,6 +5334,17 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
case X86::BI__builtin_ia32_cvtq2mask512:
return interp__builtin_ia32_cvt_vec2mask(S, OpPC, Call, BuiltinID);
+ case X86::BI__builtin_ia32_cvtsd2ss:
+ return interp__builtin_ia32_cvtsd2ss(S, OpPC, Call);
+
+ case X86::BI__builtin_ia32_cvtsd2ss_round_mask:
+ return interp__builtin_ia32_cvtsd2ss_round_mask(S, OpPC, Call);
+ case X86::BI__builtin_ia32_cvtpd2ps:
+ case X86::BI__builtin_ia32_cvtpd2ps256:
+ case X86::BI__builtin_ia32_cvtpd2ps_mask:
+ case X86::BI__builtin_ia32_cvtpd2ps512_mask:
+ return interp__builtin_ia32_cvtpd2ps(S, OpPC, Call, BuiltinID);
+
case X86::BI__builtin_ia32_cmpb128_mask:
case X86::BI__builtin_ia32_cmpw128_mask:
case X86::BI__builtin_ia32_cmpd128_mask:
>From d5084f7beedd37a6cec81558b2c00224dbc5d8d6 Mon Sep 17 00:00:00 2001
From: Hamza Hassanain <hamzahassanain067 at gmail.com>
Date: Mon, 1 Dec 2025 13:14:03 +0200
Subject: [PATCH 11/24] styled The tests
---
clang/test/CodeGen/X86/avx-builtins.c | 2 +
clang/test/CodeGen/X86/avx512f-builtins.c | 12 +
clang/test/CodeGen/X86/avx512vl-builtins.c | 8 +
clang/test/CodeGen/X86/sse2-builtins.c | 4 +
.../SemaCXX/constexpr-x86-avx-builtins.cpp | 18 +
.../constexpr-x86-avx512f-builtins.cpp | 230 +++++++++
.../constexpr-x86-avx512vl-builtins.cpp | 120 +++++
.../constexpr-x86-intrinsics-pd2ps.cpp | 479 ------------------
.../SemaCXX/constexpr-x86-sse2-builtins.cpp | 79 +++
9 files changed, 473 insertions(+), 479 deletions(-)
create mode 100644 clang/test/SemaCXX/constexpr-x86-avx-builtins.cpp
create mode 100644 clang/test/SemaCXX/constexpr-x86-avx512f-builtins.cpp
create mode 100644 clang/test/SemaCXX/constexpr-x86-avx512vl-builtins.cpp
delete mode 100644 clang/test/SemaCXX/constexpr-x86-intrinsics-pd2ps.cpp
create mode 100644 clang/test/SemaCXX/constexpr-x86-sse2-builtins.cpp
diff --git a/clang/test/CodeGen/X86/avx-builtins.c b/clang/test/CodeGen/X86/avx-builtins.c
index 00bcf9cc1da58..13da4292c5b92 100644
--- a/clang/test/CodeGen/X86/avx-builtins.c
+++ b/clang/test/CodeGen/X86/avx-builtins.c
@@ -968,6 +968,8 @@ __m128 test_mm256_cvtpd_ps(__m256d A) {
return _mm256_cvtpd_ps(A);
}
+TEST_CONSTEXPR(match_m128(_mm256_cvtpd_ps((__m256d){ 0.0, -1.0, +2.0, +3.5 }), 0.0f, -1.0f, +2.0f, +3.5f));
+
__m256i test_mm256_cvtps_epi32(__m256 A) {
// CHECK-LABEL: test_mm256_cvtps_epi32
// CHECK: call <8 x i32> @llvm.x86.avx.cvt.ps2dq.256(<8 x float> %{{.*}})
diff --git a/clang/test/CodeGen/X86/avx512f-builtins.c b/clang/test/CodeGen/X86/avx512f-builtins.c
index 6401a0e55a83b..499cbd9dee30a 100644
--- a/clang/test/CodeGen/X86/avx512f-builtins.c
+++ b/clang/test/CodeGen/X86/avx512f-builtins.c
@@ -10615,6 +10615,8 @@ __m256 test_mm512_cvtpd_ps (__m512d __A)
return _mm512_cvtpd_ps (__A);
}
+TEST_CONSTEXPR(match_m256(_mm512_cvtpd_ps((__m512d){ -1.0, +2.0, +4.0, +8.0, +16.0, +32.0, +64.0, +128.0 }), -1.0f, +2.0f, +4.0f, +8.0f, +16.0f, +32.0f, +64.0f, +128.0f));
+
__m256 test_mm512_mask_cvtpd_ps (__m256 __W, __mmask8 __U, __m512d __A)
{
// CHECK-LABEL: test_mm512_mask_cvtpd_ps
@@ -10622,6 +10624,8 @@ __m256 test_mm512_mask_cvtpd_ps (__m256 __W, __mmask8 __U, __m512d __A)
return _mm512_mask_cvtpd_ps (__W,__U,__A);
}
+TEST_CONSTEXPR(match_m256(_mm512_mask_cvtpd_ps((__m256){ 9.0f, 9.0f, 9.0f, 9.0f, 9.0f, 9.0f, 9.0f, 9.0f }, 0x05, (__m512d){ -1.0, +2.0, +4.0, +8.0, +16.0, +32.0, +64.0, +128.0 }), -1.0f, 9.0f, +4.0f, 9.0f, 9.0f, 9.0f, 9.0f, 9.0f));
+
__m512 test_mm512_cvtpd_pslo(__m512d __A)
{
// CHECK-LABEL: test_mm512_cvtpd_pslo
@@ -10631,6 +10635,8 @@ __m512 test_mm512_cvtpd_pslo(__m512d __A)
return _mm512_cvtpd_pslo(__A);
}
+TEST_CONSTEXPR(match_m512(_mm512_cvtpd_pslo((__m512d){ -1.0, +2.0, +4.0, +8.0, +16.0, +32.0, +64.0, +128.0 }), -1.0f, +2.0f, +4.0f, +8.0f, +16.0f, +32.0f, +64.0f, +128.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f));
+
__m512 test_mm512_mask_cvtpd_pslo(__m512 __W, __mmask8 __U, __m512d __A) {
// CHECK-LABEL: test_mm512_mask_cvtpd_pslo
// CHECK: @llvm.x86.avx512.mask.cvtpd2ps.512
@@ -10639,6 +10645,8 @@ __m512 test_mm512_mask_cvtpd_pslo(__m512 __W, __mmask8 __U, __m512d __A) {
return _mm512_mask_cvtpd_pslo(__W, __U, __A);
}
+TEST_CONSTEXPR(match_m512(_mm512_mask_cvtpd_pslo((__m512){ 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f, 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f }, 0x3, (__m512d){ -1.0, +2.0, +4.0, +8.0, +16.0, +32.0, +64.0, +128.0 }), -1.0f, +2.0f, 9.0f, 9.0f, 9.0f, 9.0f, 9.0f, 9.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f));
+
__m256 test_mm512_maskz_cvtpd_ps (__mmask8 __U, __m512d __A)
{
// CHECK-LABEL: test_mm512_maskz_cvtpd_ps
@@ -11860,12 +11868,16 @@ __m128 test_mm_mask_cvtsd_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128d __B)
return _mm_mask_cvtsd_ss(__W, __U, __A, __B);
}
+TEST_CONSTEXPR(match_m128(_mm_mask_cvtsd_ss((__m128){ 9.0f, 5.0f, 6.0f, 7.0f }, 0x1, (__m128){ 1.0f, 2.0f, 3.0f, 4.0f }, (__m128d){ -1.0, 42.0 }), -1.0f, 2.0f, 3.0f, 4.0f));
+
__m128 test_mm_maskz_cvtsd_ss(__mmask8 __U, __m128 __A, __m128d __B) {
// CHECK-LABEL: test_mm_maskz_cvtsd_ss
// CHECK: @llvm.x86.avx512.mask.cvtsd2ss.round
return _mm_maskz_cvtsd_ss(__U, __A, __B);
}
+TEST_CONSTEXPR(match_m128(_mm_maskz_cvtsd_ss(0x1, (__m128){ 1.0f, 2.0f, 3.0f, 4.0f }, (__m128d){ -1.0, 42.0 }), -1.0f, 2.0f, 3.0f, 4.0f));
+
__m512i test_mm512_setzero_epi32(void)
{
// CHECK-LABEL: test_mm512_setzero_epi32
diff --git a/clang/test/CodeGen/X86/avx512vl-builtins.c b/clang/test/CodeGen/X86/avx512vl-builtins.c
index 5f6d8360888f5..013c19ba7a929 100644
--- a/clang/test/CodeGen/X86/avx512vl-builtins.c
+++ b/clang/test/CodeGen/X86/avx512vl-builtins.c
@@ -3999,23 +3999,31 @@ __m128 test_mm_mask_cvtpd_ps(__m128 __W, __mmask8 __U, __m128d __A) {
// CHECK: @llvm.x86.avx512.mask.cvtpd2ps
return _mm_mask_cvtpd_ps(__W,__U,__A);
}
+
+TEST_CONSTEXPR(match_m128(_mm_mask_cvtpd_ps((__m128){ 9.0f, 9.0f, 9.0f, 9.0f }, 0x3, (__m128d){ -1.0, +2.0 }), -1.0f, +2.0f, 9.0f, 9.0f));
__m128 test_mm_maskz_cvtpd_ps(__mmask8 __U, __m128d __A) {
// CHECK-LABEL: test_mm_maskz_cvtpd_ps
// CHECK: @llvm.x86.avx512.mask.cvtpd2ps
return _mm_maskz_cvtpd_ps(__U,__A);
}
+
+TEST_CONSTEXPR(match_m128(_mm_maskz_cvtpd_ps(0x1, (__m128d){ -1.0, +2.0 }), -1.0f, 0.0f, 0.0f, 0.0f));
__m128 test_mm256_mask_cvtpd_ps(__m128 __W, __mmask8 __U, __m256d __A) {
// CHECK-LABEL: test_mm256_mask_cvtpd_ps
// CHECK: @llvm.x86.avx.cvt.pd2.ps.256
// CHECK: select <4 x i1> {{.*}}, <4 x float> {{.*}}, <4 x float> {{.*}}
return _mm256_mask_cvtpd_ps(__W,__U,__A);
}
+
+TEST_CONSTEXPR(match_m128(_mm256_mask_cvtpd_ps((__m128){ 9.0f, 9.0f, 9.0f, 9.0f }, 0xF, (__m256d){ 0.0, -1.0, +2.0, +3.5 }), 0.0f, -1.0f, +2.0f, +3.5f));
__m128 test_mm256_maskz_cvtpd_ps(__mmask8 __U, __m256d __A) {
// CHECK-LABEL: test_mm256_maskz_cvtpd_ps
// CHECK: @llvm.x86.avx.cvt.pd2.ps.256
// CHECK: select <4 x i1> {{.*}}, <4 x float> {{.*}}, <4 x float> {{.*}}
return _mm256_maskz_cvtpd_ps(__U,__A);
}
+
+TEST_CONSTEXPR(match_m128(_mm256_maskz_cvtpd_ps(0x5, (__m256d){ 0.0, -1.0, +2.0, +3.5 }), 0.0f, 0.0f, +2.0f, 0.0f));
__m128i test_mm_cvtpd_epu32(__m128d __A) {
// CHECK-LABEL: test_mm_cvtpd_epu32
// CHECK: @llvm.x86.avx512.mask.cvtpd2udq.128
diff --git a/clang/test/CodeGen/X86/sse2-builtins.c b/clang/test/CodeGen/X86/sse2-builtins.c
index ed1ac84b8c4a3..c4975b456ba22 100644
--- a/clang/test/CodeGen/X86/sse2-builtins.c
+++ b/clang/test/CodeGen/X86/sse2-builtins.c
@@ -573,6 +573,8 @@ __m128 test_mm_cvtpd_ps(__m128d A) {
return _mm_cvtpd_ps(A);
}
+TEST_CONSTEXPR(match_m128(_mm_cvtpd_ps((__m128d){ -1.0, +2.0 }), -1.0f, +2.0f, 0.0f, 0.0f));
+
__m128i test_mm_cvtps_epi32(__m128 A) {
// CHECK-LABEL: test_mm_cvtps_epi32
// CHECK: call <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float> %{{.*}})
@@ -614,6 +616,8 @@ __m128 test_mm_cvtsd_ss(__m128 A, __m128d B) {
return _mm_cvtsd_ss(A, B);
}
+TEST_CONSTEXPR(match_m128(_mm_cvtsd_ss((__m128){ 9.0f, 5.0f, 6.0f, 7.0f }, (__m128d){ -1.0, 42.0 }), -1.0f, 5.0f, 6.0f, 7.0f));
+
int test_mm_cvtsi128_si32(__m128i A) {
// CHECK-LABEL: test_mm_cvtsi128_si32
// CHECK: extractelement <4 x i32> %{{.*}}, i32 0
diff --git a/clang/test/SemaCXX/constexpr-x86-avx-builtins.cpp b/clang/test/SemaCXX/constexpr-x86-avx-builtins.cpp
new file mode 100644
index 0000000000000..724aff3011ded
--- /dev/null
+++ b/clang/test/SemaCXX/constexpr-x86-avx-builtins.cpp
@@ -0,0 +1,18 @@
+// RUN: %clang_cc1 -std=c++20 -ffreestanding -fexperimental-new-constant-interpreter -triple x86_64-unknown-unknown -target-feature +avx -verify %s
+
+#include <immintrin.h>
+#include "../CodeGen/X86/builtin_test_helpers.h"
+
+namespace Test_mm256_cvtpd_ps {
+namespace OK {
+constexpr __m256d a = { 0.0, -1.0, +2.0, +3.5 };
+TEST_CONSTEXPR(match_m128(_mm256_cvtpd_ps(a), 0.0f, -1.0f, +2.0f, +3.5f));
+}
+namespace Inexact {
+constexpr __m256d a = { 1.0000000000000002, 0.0, 0.0, 0.0 };
+constexpr __m128 r = _mm256_cvtpd_ps(a);
+// expected-error at -1 {{must be initialized by a constant expression}}
+// expected-note at avxintrin.h:* {{compile time floating point arithmetic suppressed in strict evaluation modes}}
+// expected-note at -3 {{in call to '_mm256_cvtpd_ps({1.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00})'}}
+}
+}
diff --git a/clang/test/SemaCXX/constexpr-x86-avx512f-builtins.cpp b/clang/test/SemaCXX/constexpr-x86-avx512f-builtins.cpp
new file mode 100644
index 0000000000000..0d2a82cbbb83c
--- /dev/null
+++ b/clang/test/SemaCXX/constexpr-x86-avx512f-builtins.cpp
@@ -0,0 +1,230 @@
+// RUN: %clang_cc1 -std=c++20 -ffreestanding -fexperimental-new-constant-interpreter -triple x86_64-unknown-unknown -target-feature +avx512f -verify %s
+
+#include <immintrin.h>
+#include "../CodeGen/X86/builtin_test_helpers.h"
+
+namespace Test_mm_mask_cvtsd_ss {
+namespace OK {
+constexpr __m128 src = { 9.0f, 5.0f, 6.0f, 7.0f };
+constexpr __m128 a = { 1.0f, 2.0f, 3.0f, 4.0f };
+constexpr __m128d b = { -1.0, 42.0 };
+TEST_CONSTEXPR(match_m128(_mm_mask_cvtsd_ss(src, 0x1, a, b), -1.0f, 2.0f, 3.0f, 4.0f));
+}
+namespace MaskOff {
+constexpr __m128 src = { 9.0f, 5.0f, 6.0f, 7.0f };
+constexpr __m128 a = { 1.0f, 2.0f, 3.0f, 4.0f };
+constexpr __m128d b = { -1.0, 42.0 };
+TEST_CONSTEXPR(match_m128(_mm_mask_cvtsd_ss(src, 0x0, a, b), 9.0f, 2.0f, 3.0f, 4.0f));
+}
+namespace MaskOffInexact {
+constexpr __m128 src = { 9.0f, 5.0f, 6.0f, 7.0f };
+constexpr __m128 a = { 1.0f, 2.0f, 3.0f, 4.0f };
+constexpr __m128d b_inexact = { 1.0000000000000002, 0.0 };
+constexpr __m128 r = _mm_mask_cvtsd_ss(src, 0x0, a, b_inexact);
+TEST_CONSTEXPR(match_m128(r, 9.0f, 2.0f, 3.0f, 4.0f));
+}
+namespace MaskOnInexact {
+constexpr __m128 src = { 9.0f, 5.0f, 6.0f, 7.0f };
+constexpr __m128 a = { 1.0f, 2.0f, 3.0f, 4.0f };
+constexpr __m128d b_inexact = { 1.0000000000000002, 0.0 };
+constexpr __m128 r = _mm_mask_cvtsd_ss(src, 0x1, a, b_inexact);
+// expected-error at -1 {{must be initialized by a constant expression}}
+// expected-note at avx512fintrin.h:* {{compile time floating point arithmetic suppressed in strict evaluation modes}}
+// expected-note at -3 {{in call to '_mm_mask_cvtsd_ss({9.000000e+00, 5.000000e+00, 6.000000e+00, 7.000000e+00}, 1, {1.000000e+00, 2.000000e+00, 3.000000e+00, 4.000000e+00}, {1.000000e+00, 0.000000e+00})'}}
+}
+namespace MaskOnInf {
+constexpr __m128 src = { 9.0f, 5.0f, 6.0f, 7.0f };
+constexpr __m128 a = { 1.0f, 2.0f, 3.0f, 4.0f };
+constexpr __m128d b_inf = { __builtin_huge_val(), 0.0 };
+constexpr __m128 r = _mm_mask_cvtsd_ss(src, 0x1, a, b_inf);
+// expected-error at -1 {{must be initialized by a constant expression}}
+// expected-note at avx512fintrin.h:* {{floating point arithmetic produces an infinity}}
+// expected-note at -3 {{in call to '_mm_mask_cvtsd_ss({9.000000e+00, 5.000000e+00, 6.000000e+00, 7.000000e+00}, 1, {1.000000e+00, 2.000000e+00, 3.000000e+00, 4.000000e+00}, {INF, 0.000000e+00})'}}
+}
+namespace MaskOnNaN {
+constexpr __m128 src = { 9.0f, 5.0f, 6.0f, 7.0f };
+constexpr __m128 a = { 1.0f, 2.0f, 3.0f, 4.0f };
+constexpr __m128d b_nan = { __builtin_nan(""), 0.0 };
+constexpr __m128 r = _mm_mask_cvtsd_ss(src, 0x1, a, b_nan);
+// expected-error at -1 {{must be initialized by a constant expression}}
+// expected-note at avx512fintrin.h:* {{floating point arithmetic produces a NaN}}
+// expected-note at -3 {{in call to '_mm_mask_cvtsd_ss({9.000000e+00, 5.000000e+00, 6.000000e+00, 7.000000e+00}, 1, {1.000000e+00, 2.000000e+00, 3.000000e+00, 4.000000e+00}, {nan, 0.000000e+00})'}}
+}
+namespace MaskOnSubnormal {
+constexpr __m128 src = { 9.0f, 5.0f, 6.0f, 7.0f };
+constexpr __m128 a = { 1.0f, 2.0f, 3.0f, 4.0f };
+constexpr __m128d b_sub = { 1e-310, 0.0 };
+constexpr __m128 r = _mm_mask_cvtsd_ss(src, 0x1, a, b_sub);
+// expected-error at -1 {{must be initialized by a constant expression}}
+// expected-note at avx512fintrin.h:* {{compile time floating point arithmetic suppressed in strict evaluation modes}}
+// expected-note at -3 {{in call to '_mm_mask_cvtsd_ss({9.000000e+00, 5.000000e+00, 6.000000e+00, 7.000000e+00}, 1, {1.000000e+00, 2.000000e+00, 3.000000e+00, 4.000000e+00}, {1.000000e-310, 0.000000e+00})'}}
+}
+}
+
+namespace Test_mm_maskz_cvtsd_ss {
+namespace OK {
+constexpr __m128 a = { 1.0f, 2.0f, 3.0f, 4.0f };
+constexpr __m128d b = { -1.0, 42.0 };
+TEST_CONSTEXPR(match_m128(_mm_maskz_cvtsd_ss(0x1, a, b), -1.0f, 2.0f, 3.0f, 4.0f));
+}
+namespace MaskOff {
+constexpr __m128 a = { 1.0f, 2.0f, 3.0f, 4.0f };
+constexpr __m128d b = { -1.0, 42.0 };
+TEST_CONSTEXPR(match_m128(_mm_maskz_cvtsd_ss(0x0, a, b), 0.0f, 2.0f, 3.0f, 4.0f));
+}
+namespace MaskOffInexact {
+constexpr __m128 a = { 1.0f, 2.0f, 3.0f, 4.0f };
+constexpr __m128d b_inexact = { 1.0000000000000002, 0.0 };
+TEST_CONSTEXPR(match_m128(_mm_maskz_cvtsd_ss(0x0, a, b_inexact), 0.0f, 2.0f, 3.0f, 4.0f));
+}
+namespace MaskOnInf {
+constexpr __m128 a = { 1.0f, 2.0f, 3.0f, 4.0f };
+constexpr __m128d b_inf = { __builtin_huge_val(), 0.0 };
+constexpr __m128 r = _mm_maskz_cvtsd_ss(0x1, a, b_inf);
+// expected-error at -1 {{must be initialized by a constant expression}}
+// expected-note at avx512fintrin.h:* {{floating point arithmetic produces an infinity}}
+// expected-note at -3 {{in call to '_mm_maskz_cvtsd_ss(1, {1.000000e+00, 2.000000e+00, 3.000000e+00, 4.000000e+00}, {INF, 0.000000e+00})'}}
+}
+namespace MaskOnNaN {
+constexpr __m128 a = { 1.0f, 2.0f, 3.0f, 4.0f };
+constexpr __m128d b_nan = { __builtin_nan(""), 0.0 };
+constexpr __m128 r = _mm_maskz_cvtsd_ss(0x1, a, b_nan);
+// expected-error at -1 {{must be initialized by a constant expression}}
+// expected-note at avx512fintrin.h:* {{floating point arithmetic produces a NaN}}
+// expected-note at -3 {{in call to '_mm_maskz_cvtsd_ss(1, {1.000000e+00, 2.000000e+00, 3.000000e+00, 4.000000e+00}, {nan, 0.000000e+00})'}}
+}
+}
+
+namespace Test_mm512_cvtpd_ps {
+namespace OK {
+constexpr __m512d a = { -1.0, +2.0, +4.0, +8.0, +16.0, +32.0, +64.0, +128.0 };
+TEST_CONSTEXPR(match_m256(_mm512_cvtpd_ps(a), -1.0f, +2.0f, +4.0f, +8.0f, +16.0f, +32.0f, +64.0f, +128.0f));
+}
+namespace Inexact {
+constexpr __m512d a = { 1.0000000000000002, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
+constexpr __m256 r = _mm512_cvtpd_ps(a);
+// expected-error at -1 {{must be initialized by a constant expression}}
+// expected-note at avx512fintrin.h:* {{compile time floating point arithmetic suppressed in strict evaluation modes}}
+// expected-note at -3 {{in call to '_mm512_cvtpd_ps({1.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00})'}}
+}
+}
+
+namespace Test_mm512_mask_cvtpd_ps {
+namespace OK {
+constexpr __m256 src = { 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f };
+constexpr __m512d a = { -1.0, +2.0, +4.0, +8.0, +16.0, +32.0, +64.0, +128.0 };
+TEST_CONSTEXPR(match_m256(_mm512_mask_cvtpd_ps(src, 0x05, a), -1.0f, 9.0f, +4.0f, 9.0f, 9.0f, 9.0f, 9.0f, 9.0f));
+}
+namespace MaskOffInexact {
+constexpr __m256 src = { 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f };
+constexpr __m512d a_inexact = { -1.0, +2.0, +4.0, +8.0, +16.0, 1.0000000000000002, +64.0, +128.0 };
+TEST_CONSTEXPR(match_m256(_mm512_mask_cvtpd_ps(src, 0b11011111, a_inexact), -1.0f, +2.0f, +4.0f, +8.0f, +16.0f, 9.0f, +64.0f, +128.0f));
+}
+namespace MaskOffInf {
+constexpr __m256 src = { 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f };
+constexpr __m512d a_inf = { -1.0, +2.0, +4.0, +8.0, +16.0, __builtin_huge_val(), +64.0, +128.0 };
+TEST_CONSTEXPR(match_m256(_mm512_mask_cvtpd_ps(src, 0x1F, a_inf), -1.0f, +2.0f, +4.0f, +8.0f, +16.0f, 9.0f, 9.0f, 9.0f));
+}
+namespace MaskOffNaN {
+constexpr __m256 src = { 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f };
+constexpr __m512d a_nan = { -1.0, +2.0, +4.0, +8.0, +16.0, __builtin_nan(""), +64.0, +128.0 };
+TEST_CONSTEXPR(match_m256(_mm512_mask_cvtpd_ps(src, 0x1F, a_nan), -1.0f, +2.0f, +4.0f, +8.0f, +16.0f, 9.0f, 9.0f, 9.0f));
+}
+namespace MaskOnInf {
+constexpr __m256 src = { 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f };
+constexpr __m512d a_inf = { -1.0, +2.0, +4.0, __builtin_huge_val(), +16.0, +32.0, +64.0, +128.0 };
+constexpr __m256 r = _mm512_mask_cvtpd_ps(src, 0x08, a_inf);
+// expected-error at -1 {{must be initialized by a constant expression}}
+// expected-note at avx512fintrin.h:* {{floating point arithmetic produces an infinity}}
+// expected-note at -3 {{in call to '_mm512_mask_cvtpd_ps({9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00}, 8, {-1.000000e+00, 2.000000e+00, 4.000000e+00, INF, 1.600000e+01, 3.200000e+01, 6.400000e+01, 1.280000e+02})'}}
+}
+namespace MaskOnNaN {
+constexpr __m256 src = { 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f };
+constexpr __m512d a_nan = { -1.0, +2.0, +4.0, __builtin_nan(""), +16.0, +32.0, +64.0, +128.0 };
+constexpr __m256 r = _mm512_mask_cvtpd_ps(src, 0x08, a_nan);
+// expected-error at -1 {{must be initialized by a constant expression}}
+// expected-note at avx512fintrin.h:* {{floating point arithmetic produces a NaN}}
+// expected-note at -3 {{in call to '_mm512_mask_cvtpd_ps({9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00}, 8, {-1.000000e+00, 2.000000e+00, 4.000000e+00, nan, 1.600000e+01, 3.200000e+01, 6.400000e+01, 1.280000e+02})'}}
+}
+}
+
+namespace Test_mm512_maskz_cvtpd_ps {
+namespace OK {
+constexpr __m512d a = { -1.0, +2.0, +4.0, +8.0, +16.0, +32.0, +64.0, +128.0 };
+TEST_CONSTEXPR(match_m256(_mm512_maskz_cvtpd_ps(0x81, a), -1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, +128.0f));
+}
+namespace MaskOffInexact {
+constexpr __m512d a_inexact = { -1.0, +2.0, +4.0, +8.0, +16.0, 1.0000000000000002, +64.0, +128.0 };
+TEST_CONSTEXPR(match_m256(_mm512_maskz_cvtpd_ps(0b11011111, a_inexact), -1.0f, +2.0f, +4.0f, +8.0f, +16.0f, 0.0f, +64.0f, +128.0f));
+}
+namespace MaskOffInf {
+constexpr __m512d a_inf = { -1.0, +2.0, +4.0, +8.0, +16.0, __builtin_huge_val(), +64.0, +128.0 };
+TEST_CONSTEXPR(match_m256(_mm512_maskz_cvtpd_ps(0x1F, a_inf), -1.0f, +2.0f, +4.0f, +8.0f, +16.0f, 0.0f, 0.0f, 0.0f));
+}
+namespace MaskOffNaN {
+constexpr __m512d a_nan = { -1.0, +2.0, +4.0, +8.0, +16.0, __builtin_nan(""), +64.0, +128.0 };
+TEST_CONSTEXPR(match_m256(_mm512_maskz_cvtpd_ps(0x1F, a_nan), -1.0f, +2.0f, +4.0f, +8.0f, +16.0f, 0.0f, 0.0f, 0.0f));
+}
+namespace MaskOnInf {
+constexpr __m512d a_inf = { -1.0, +2.0, +4.0, __builtin_huge_val(), +16.0, +32.0, +64.0, +128.0 };
+constexpr __m256 r = _mm512_maskz_cvtpd_ps(0x08, a_inf);
+// expected-error at -1 {{must be initialized by a constant expression}}
+// expected-note at avx512fintrin.h:* {{floating point arithmetic produces an infinity}}
+// expected-note at -3 {{in call to '_mm512_maskz_cvtpd_ps(8, {-1.000000e+00, 2.000000e+00, 4.000000e+00, INF, 1.600000e+01, 3.200000e+01, 6.400000e+01, 1.280000e+02})'}}
+}
+namespace MaskOnNaN {
+constexpr __m512d a_nan = { -1.0, +2.0, +4.0, __builtin_nan(""), +16.0, +32.0, +64.0, +128.0 };
+constexpr __m256 r = _mm512_maskz_cvtpd_ps(0x08, a_nan);
+// expected-error at -1 {{must be initialized by a constant expression}}
+// expected-note at avx512fintrin.h:* {{floating point arithmetic produces a NaN}}
+// expected-note at -3 {{in call to '_mm512_maskz_cvtpd_ps(8, {-1.000000e+00, 2.000000e+00, 4.000000e+00, nan, 1.600000e+01, 3.200000e+01, 6.400000e+01, 1.280000e+02})'}}
+}
+}
+
+namespace Test_mm512_cvtpd_pslo {
+namespace OK {
+constexpr __m512d a = { -1.0, +2.0, +4.0, +8.0, +16.0, +32.0, +64.0, +128.0 };
+TEST_CONSTEXPR(match_m512(_mm512_cvtpd_pslo(a), -1.0f, +2.0f, +4.0f, +8.0f, +16.0f, +32.0f, +64.0f, +128.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f));
+}
+}
+
+namespace Test_mm512_mask_cvtpd_pslo {
+namespace OK {
+constexpr __m512 src = (__m512){ 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,
+ 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f };
+constexpr __m512d a = { -1.0, +2.0, +4.0, +8.0, +16.0, +32.0, +64.0, +128.0 };
+TEST_CONSTEXPR(match_m512(_mm512_mask_cvtpd_pslo(src, 0x3, a), -1.0f, +2.0f, 9.0f, 9.0f, 9.0f, 9.0f, 9.0f, 9.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f));
+}
+namespace MaskOffInf {
+constexpr __m512 src = (__m512){ 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,
+ 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f };
+constexpr __m512d a_inf = { -1.0, +2.0, __builtin_huge_val(), +8.0, +16.0, +32.0, +64.0, +128.0 };
+TEST_CONSTEXPR(match_m512(_mm512_mask_cvtpd_pslo(src, 0x3, a_inf), -1.0f, +2.0f, 9.0f, 9.0f, 9.0f, 9.0f, 9.0f, 9.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f));
+}
+namespace MaskOffNaN {
+constexpr __m512 src = (__m512){ 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,
+ 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f };
+constexpr __m512d a_nan = { -1.0, +2.0, +4.0, __builtin_nan(""), +16.0, +32.0, +64.0, +128.0 };
+TEST_CONSTEXPR(match_m512(_mm512_mask_cvtpd_pslo(src, 0x7, a_nan), -1.0f, +2.0f, +4.0f, 9.0f, 9.0f, 9.0f, 9.0f, 9.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f));
+}
+namespace MaskOnInf {
+constexpr __m512 src = (__m512){ 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,
+ 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f };
+constexpr __m512d a_inf = { -1.0, +2.0, __builtin_huge_val(), +8.0, +16.0, +32.0, +64.0, +128.0 };
+constexpr __m512 r = _mm512_mask_cvtpd_pslo(src, 0x4, a_inf);
+// expected-error at -1 {{must be initialized by a constant expression}}
+// expected-note at avx512fintrin.h:* {{floating point arithmetic produces an infinity}}
+// expected-note at avx512fintrin.h:* {{in call to '_mm512_mask_cvtpd_ps({9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00}, 4, {-1.000000e+00, 2.000000e+00, INF, 8.000000e+00, 1.600000e+01, 3.200000e+01, 6.400000e+01, 1.280000e+02})'}}
+// expected-note at -4 {{in call to '_mm512_mask_cvtpd_pslo({9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00}, 4, {-1.000000e+00, 2.000000e+00, INF, 8.000000e+00, 1.600000e+01, 3.200000e+01, 6.400000e+01, 1.280000e+02})'}}
+}
+namespace MaskOnNaN {
+constexpr __m512 src = (__m512){ 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,
+ 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f };
+constexpr __m512d a_nan = { -1.0, +2.0, __builtin_nan(""), +8.0, +16.0, +32.0, +64.0, +128.0 };
+constexpr __m512 r = _mm512_mask_cvtpd_pslo(src, 0x4, a_nan);
+// expected-error at -1 {{must be initialized by a constant expression}}
+// expected-note at avx512fintrin.h:* {{floating point arithmetic produces a NaN}}
+// expected-note at avx512fintrin.h:* {{in call to '_mm512_mask_cvtpd_ps({9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00}, 4, {-1.000000e+00, 2.000000e+00, nan, 8.000000e+00, 1.600000e+01, 3.200000e+01, 6.400000e+01, 1.280000e+02})'}}
+// expected-note at -4 {{in call to '_mm512_mask_cvtpd_pslo({9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00}, 4, {-1.000000e+00, 2.000000e+00, nan, 8.000000e+00, 1.600000e+01, 3.200000e+01, 6.400000e+01, 1.280000e+02})'}}
+}
+}
diff --git a/clang/test/SemaCXX/constexpr-x86-avx512vl-builtins.cpp b/clang/test/SemaCXX/constexpr-x86-avx512vl-builtins.cpp
new file mode 100644
index 0000000000000..bdce60a357f13
--- /dev/null
+++ b/clang/test/SemaCXX/constexpr-x86-avx512vl-builtins.cpp
@@ -0,0 +1,120 @@
+// RUN: %clang_cc1 -std=c++20 -ffreestanding -fexperimental-new-constant-interpreter -triple x86_64-unknown-unknown -target-feature +avx512f -target-feature +avx512vl -verify %s
+
+#include <immintrin.h>
+#include "../CodeGen/X86/builtin_test_helpers.h"
+
+namespace Test_mm_mask_cvtpd_ps {
+namespace OK {
+constexpr __m128 src = { 9.0f, 9.0f, 9.0f, 9.0f };
+constexpr __m128d a = { -1.0, +2.0 };
+TEST_CONSTEXPR(match_m128(_mm_mask_cvtpd_ps(src, 0x3, a), -1.0f, +2.0f, 9.0f, 9.0f));
+}
+namespace Partial {
+constexpr __m128 src = { 9.0f, 9.0f, 9.0f, 9.0f };
+constexpr __m128d a = { -1.0, +2.0 };
+TEST_CONSTEXPR(match_m128(_mm_mask_cvtpd_ps(src, 0x1, a), -1.0f, 9.0f, 9.0f, 9.0f));
+}
+namespace MaskOffInexact {
+constexpr __m128 src = { 9.0f, 9.0f, 9.0f, 9.0f };
+constexpr __m128d a_inexact = { -1.0, 1.0000000000000002 };
+TEST_CONSTEXPR(match_m128(_mm_mask_cvtpd_ps(src, 0x1, a_inexact), -1.0f, 9.0f, 9.0f, 9.0f));
+}
+namespace MaskOnInexact {
+constexpr __m128 src = { 9.0f, 9.0f, 9.0f, 9.0f };
+constexpr __m128d a_inexact = { -1.0, 1.0000000000000002 };
+constexpr __m128 r = _mm_mask_cvtpd_ps(src, 0x2, a_inexact);
+// expected-error at -1 {{must be initialized by a constant expression}}
+// expected-note at avx512vlintrin.h:* {{compile time floating point arithmetic suppressed in strict evaluation modes}}
+// expected-note at -3 {{in call to '_mm_mask_cvtpd_ps({9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00}, 2, {-1.000000e+00, 1.000000e+00})'}}
+}
+namespace MaskOnInf {
+constexpr __m128 src = { 9.0f, 9.0f, 9.0f, 9.0f };
+constexpr __m128d a_inf = { -1.0, __builtin_huge_val() };
+constexpr __m128 r = _mm_mask_cvtpd_ps(src, 0x2, a_inf);
+// expected-error at -1 {{must be initialized by a constant expression}}
+// expected-note at avx512vlintrin.h:* {{floating point arithmetic produces an infinity}}
+// expected-note at -3 {{in call to '_mm_mask_cvtpd_ps({9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00}, 2, {-1.000000e+00, INF})'}}
+}
+namespace MaskOnNaN {
+constexpr __m128 src = { 9.0f, 9.0f, 9.0f, 9.0f };
+constexpr __m128d a_nan = { -1.0, __builtin_nan("") };
+constexpr __m128 r = _mm_mask_cvtpd_ps(src, 0x2, a_nan);
+// expected-error at -1 {{must be initialized by a constant expression}}
+// expected-note at avx512vlintrin.h:* {{floating point arithmetic produces a NaN}}
+// expected-note at -3 {{in call to '_mm_mask_cvtpd_ps({9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00}, 2, {-1.000000e+00, nan})'}}
+}
+}
+
+namespace Test_mm_maskz_cvtpd_ps {
+namespace OK {
+constexpr __m128d a = { -1.0, +2.0 };
+TEST_CONSTEXPR(match_m128(_mm_maskz_cvtpd_ps(0x1, a), -1.0f, 0.0f, 0.0f, 0.0f));
+}
+namespace MaskOffInexact {
+constexpr __m128d a_inexact = { -1.0, 1.0000000000000002 };
+TEST_CONSTEXPR(match_m128(_mm_maskz_cvtpd_ps(0x1, a_inexact), -1.0f, 0.0f, 0.0f, 0.0f));
+}
+namespace MaskOnInf {
+constexpr __m128d a_inf = { -1.0, __builtin_huge_val() };
+constexpr __m128 r = _mm_maskz_cvtpd_ps(0x2, a_inf);
+// expected-error at -1 {{must be initialized by a constant expression}}
+// expected-note at avx512vlintrin.h:* {{floating point arithmetic produces an infinity}}
+// expected-note at -3 {{in call to '_mm_maskz_cvtpd_ps(2, {-1.000000e+00, INF})'}}
+}
+namespace MaskOnNaN {
+constexpr __m128d a_nan = { -1.0, __builtin_nan("") };
+constexpr __m128 r = _mm_maskz_cvtpd_ps(0x2, a_nan);
+// expected-error at -1 {{must be initialized by a constant expression}}
+// expected-note at avx512vlintrin.h:* {{floating point arithmetic produces a NaN}}
+// expected-note at -3 {{in call to '_mm_maskz_cvtpd_ps(2, {-1.000000e+00, nan})'}}
+}
+}
+
+namespace Test_mm256_mask_cvtpd_ps {
+namespace OK {
+constexpr __m128 src = { 9.0f, 9.0f, 9.0f, 9.0f };
+constexpr __m256d a = { 0.0, -1.0, +2.0, +3.5 };
+TEST_CONSTEXPR(match_m128(_mm256_mask_cvtpd_ps(src, 0xF, a), 0.0f, -1.0f, +2.0f, +3.5f));
+}
+namespace MaskOffInf {
+constexpr __m128 src = { 9.0f, 9.0f, 9.0f, 9.0f };
+constexpr __m256d a_inf = { -1.0, +2.0, __builtin_huge_val(), +8.0 };
+constexpr __m128 r = _mm256_mask_cvtpd_ps(src, 0x3, a_inf);
+// expected-error at -1 {{must be initialized by a constant expression}}
+// expected-note at avxintrin.h:* {{floating point arithmetic produces an infinity}}
+// expected-note at avx512vlintrin.h:* {{in call to '_mm256_cvtpd_ps({-1.000000e+00, 2.000000e+00, INF, 8.000000e+00})'}}
+// expected-note at -4 {{in call to '_mm256_mask_cvtpd_ps({9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00}, 3, {-1.000000e+00, 2.000000e+00, INF, 8.000000e+00})'}}
+}
+namespace MaskOffNaN {
+constexpr __m128 src = { 9.0f, 9.0f, 9.0f, 9.0f };
+constexpr __m256d a_nan = { -1.0, +2.0, +4.0, __builtin_nan("") };
+constexpr __m128 r = _mm256_mask_cvtpd_ps(src, 0x7, a_nan);
+// expected-error at -1 {{must be initialized by a constant expression}}
+// expected-note at avxintrin.h:* {{floating point arithmetic produces a NaN}}
+// expected-note at avx512vlintrin.h:* {{in call to '_mm256_cvtpd_ps({-1.000000e+00, 2.000000e+00, 4.000000e+00, nan})'}}
+// expected-note at -4 {{in call to '_mm256_mask_cvtpd_ps({9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00}, 7, {-1.000000e+00, 2.000000e+00, 4.000000e+00, nan})'}}
+}
+}
+
+namespace Test_mm256_maskz_cvtpd_ps {
+namespace OK {
+constexpr __m256d a = { 0.0, -1.0, +2.0, +3.5 };
+TEST_CONSTEXPR(match_m128(_mm256_maskz_cvtpd_ps(0x5, a), 0.0f, 0.0f, +2.0f, 0.0f));
+}
+namespace MaskOffInf {
+constexpr __m256d a_inf = { -1.0, +2.0, __builtin_huge_val(), +8.0 };
+constexpr __m128 r = _mm256_maskz_cvtpd_ps(0x3, a_inf);
+// expected-error at -1 {{must be initialized by a constant expression}}
+// expected-note at avxintrin.h:* {{floating point arithmetic produces an infinity}}
+// expected-note at avx512vlintrin.h:* {{in call to '_mm256_cvtpd_ps({-1.000000e+00, 2.000000e+00, INF, 8.000000e+00})'}}
+// expected-note at -4 {{in call to '_mm256_maskz_cvtpd_ps(3, {-1.000000e+00, 2.000000e+00, INF, 8.000000e+00})'}}
+}
+namespace MaskOffNaN {
+constexpr __m256d a_nan = { -1.0, +2.0, +4.0, __builtin_nan("") };
+constexpr __m128 r = _mm256_maskz_cvtpd_ps(0x7, a_nan);
+// expected-error at -1 {{must be initialized by a constant expression}}
+// expected-note at avxintrin.h:* {{floating point arithmetic produces a NaN}}
+// expected-note at avx512vlintrin.h:* {{in call to '_mm256_cvtpd_ps({-1.000000e+00, 2.000000e+00, 4.000000e+00, nan})'}}
+// expected-note at -4 {{in call to '_mm256_maskz_cvtpd_ps(7, {-1.000000e+00, 2.000000e+00, 4.000000e+00, nan})'}}
+}
+}
diff --git a/clang/test/SemaCXX/constexpr-x86-intrinsics-pd2ps.cpp b/clang/test/SemaCXX/constexpr-x86-intrinsics-pd2ps.cpp
deleted file mode 100644
index 4a1e9a9c5ae2c..0000000000000
--- a/clang/test/SemaCXX/constexpr-x86-intrinsics-pd2ps.cpp
+++ /dev/null
@@ -1,479 +0,0 @@
-// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-unknown -target-feature +avx -target-feature +avx512f -target-feature +avx512vl -verify %s
-
-#define __MM_MALLOC_H
-#include <immintrin.h>
-
-namespace Test_mm_cvtsd_ss {
-namespace OK {
-constexpr __m128 a = { 9.0f, 5.0f, 6.0f, 7.0f };
-constexpr __m128d b = { -1.0, 42.0 };
-constexpr __m128 r = _mm_cvtsd_ss(a, b);
-static_assert(r[0] == -1.0f && r[1] == 5.0f && r[2] == 6.0f && r[3] == 7.0f, "");
-}
-namespace Inexact {
-constexpr __m128 a = { 0.0f, 1.0f, 2.0f, 3.0f };
-constexpr __m128d b = { 1.0000000000000002, 0.0 };
-constexpr __m128 r = _mm_cvtsd_ss(a, b);
-// expected-error at -1 {{must be initialized by a constant expression}}
-// expected-note at emmintrin.h:* {{compile time floating point arithmetic suppressed in strict evaluation modes}}
-// expected-note at -3 {{in call to '_mm_cvtsd_ss({0.000000e+00, 1.000000e+00, 2.000000e+00, 3.000000e+00}, {1.000000e+00, 0.000000e+00})'}}
-}
-namespace Inf {
-constexpr __m128 a = { 0.0f, 1.0f, 2.0f, 3.0f };
-constexpr __m128d b = { __builtin_huge_val(), 0.0 };
-constexpr __m128 r = _mm_cvtsd_ss(a, b);
-// expected-error at -1 {{must be initialized by a constant expression}}
-// expected-note at emmintrin.h:* {{floating point arithmetic produces an infinity}}
-// expected-note at -3 {{in call to '_mm_cvtsd_ss({0.000000e+00, 1.000000e+00, 2.000000e+00, 3.000000e+00}, {INF, 0.000000e+00})'}}
-}
-namespace NaN {
-constexpr __m128 a = { 0.0f, 1.0f, 2.0f, 3.0f };
-constexpr __m128d b = { __builtin_nan(""), 0.0 };
-constexpr __m128 r = _mm_cvtsd_ss(a, b);
-// expected-error at -1 {{must be initialized by a constant expression}}
-// expected-note at emmintrin.h:* {{floating point arithmetic produces a NaN}}
-// expected-note at -3 {{in call to '_mm_cvtsd_ss({0.000000e+00, 1.000000e+00, 2.000000e+00, 3.000000e+00}, {nan, 0.000000e+00})'}}
-}
-namespace Subnormal {
-constexpr __m128 a = { 0.0f, 1.0f, 2.0f, 3.0f };
-constexpr __m128d b = { 1e-310, 0.0 };
-constexpr __m128 r = _mm_cvtsd_ss(a, b);
-// expected-error at -1 {{must be initialized by a constant expression}}
-// expected-note at emmintrin.h:* {{compile time floating point arithmetic suppressed in strict evaluation modes}}
-// expected-note at -3 {{in call to '_mm_cvtsd_ss({0.000000e+00, 1.000000e+00, 2.000000e+00, 3.000000e+00}, {1.000000e-310, 0.000000e+00})'}}
-}
-}
-
-namespace Test_mm_mask_cvtsd_ss {
-namespace OK {
-constexpr __m128 src = { 9.0f, 5.0f, 6.0f, 7.0f };
-constexpr __m128 a = { 1.0f, 2.0f, 3.0f, 4.0f };
-constexpr __m128d b = { -1.0, 42.0 };
-constexpr __m128 r = _mm_mask_cvtsd_ss(src, 0x1, a, b);
-static_assert(r[0] == -1.0f && r[1] == 2.0f && r[2] == 3.0f && r[3] == 4.0f, "");
-}
-namespace MaskOff {
-constexpr __m128 src = { 9.0f, 5.0f, 6.0f, 7.0f };
-constexpr __m128 a = { 1.0f, 2.0f, 3.0f, 4.0f };
-constexpr __m128d b = { -1.0, 42.0 };
-constexpr __m128 r = _mm_mask_cvtsd_ss(src, 0x0, a, b);
-static_assert(r[0] == 9.0f && r[1] == 2.0f, "");
-}
-namespace MaskOffInexact {
-constexpr __m128 src = { 9.0f, 5.0f, 6.0f, 7.0f };
-constexpr __m128 a = { 1.0f, 2.0f, 3.0f, 4.0f };
-constexpr __m128d b_inexact = { 1.0000000000000002, 0.0 };
-constexpr __m128 r = _mm_mask_cvtsd_ss(src, 0x0, a, b_inexact);
-static_assert(r[0] == 9.0f, "");
-}
-namespace MaskOnInexact {
-constexpr __m128 src = { 9.0f, 5.0f, 6.0f, 7.0f };
-constexpr __m128 a = { 1.0f, 2.0f, 3.0f, 4.0f };
-constexpr __m128d b_inexact = { 1.0000000000000002, 0.0 };
-constexpr __m128 r = _mm_mask_cvtsd_ss(src, 0x1, a, b_inexact);
-// expected-error at -1 {{must be initialized by a constant expression}}
-// expected-note at avx512fintrin.h:* {{compile time floating point arithmetic suppressed in strict evaluation modes}}
-// expected-note at -3 {{in call to '_mm_mask_cvtsd_ss({9.000000e+00, 5.000000e+00, 6.000000e+00, 7.000000e+00}, 1, {1.000000e+00, 2.000000e+00, 3.000000e+00, 4.000000e+00}, {1.000000e+00, 0.000000e+00})'}}
-}
-namespace MaskOnInf {
-constexpr __m128 src = { 9.0f, 5.0f, 6.0f, 7.0f };
-constexpr __m128 a = { 1.0f, 2.0f, 3.0f, 4.0f };
-constexpr __m128d b_inf = { __builtin_huge_val(), 0.0 };
-constexpr __m128 r = _mm_mask_cvtsd_ss(src, 0x1, a, b_inf);
-// expected-error at -1 {{must be initialized by a constant expression}}
-// expected-note at avx512fintrin.h:* {{floating point arithmetic produces an infinity}}
-// expected-note at -3 {{in call to '_mm_mask_cvtsd_ss({9.000000e+00, 5.000000e+00, 6.000000e+00, 7.000000e+00}, 1, {1.000000e+00, 2.000000e+00, 3.000000e+00, 4.000000e+00}, {INF, 0.000000e+00})'}}
-}
-namespace MaskOnNaN {
-constexpr __m128 src = { 9.0f, 5.0f, 6.0f, 7.0f };
-constexpr __m128 a = { 1.0f, 2.0f, 3.0f, 4.0f };
-constexpr __m128d b_nan = { __builtin_nan(""), 0.0 };
-constexpr __m128 r = _mm_mask_cvtsd_ss(src, 0x1, a, b_nan);
-// expected-error at -1 {{must be initialized by a constant expression}}
-// expected-note at avx512fintrin.h:* {{floating point arithmetic produces a NaN}}
-// expected-note at -3 {{in call to '_mm_mask_cvtsd_ss({9.000000e+00, 5.000000e+00, 6.000000e+00, 7.000000e+00}, 1, {1.000000e+00, 2.000000e+00, 3.000000e+00, 4.000000e+00}, {nan, 0.000000e+00})'}}
-}
-namespace MaskOnSubnormal {
-constexpr __m128 src = { 9.0f, 5.0f, 6.0f, 7.0f };
-constexpr __m128 a = { 1.0f, 2.0f, 3.0f, 4.0f };
-constexpr __m128d b_sub = { 1e-310, 0.0 };
-constexpr __m128 r = _mm_mask_cvtsd_ss(src, 0x1, a, b_sub);
-// expected-error at -1 {{must be initialized by a constant expression}}
-// expected-note at avx512fintrin.h:* {{compile time floating point arithmetic suppressed in strict evaluation modes}}
-// expected-note at -3 {{in call to '_mm_mask_cvtsd_ss({9.000000e+00, 5.000000e+00, 6.000000e+00, 7.000000e+00}, 1, {1.000000e+00, 2.000000e+00, 3.000000e+00, 4.000000e+00}, {1.000000e-310, 0.000000e+00})'}}
-}
-}
-
-namespace Test_mm_maskz_cvtsd_ss {
-namespace OK {
-constexpr __m128 a = { 1.0f, 2.0f, 3.0f, 4.0f };
-constexpr __m128d b = { -1.0, 42.0 };
-constexpr __m128 r = _mm_maskz_cvtsd_ss(0x1, a, b);
-static_assert(r[0] == -1.0f && r[1] == 2.0f, "");
-}
-namespace MaskOff {
-constexpr __m128 a = { 1.0f, 2.0f, 3.0f, 4.0f };
-constexpr __m128d b = { -1.0, 42.0 };
-constexpr __m128 r = _mm_maskz_cvtsd_ss(0x0, a, b);
-static_assert(r[0] == 0.0f && r[1] == 2.0f, "");
-}
-namespace MaskOffInexact {
-constexpr __m128 a = { 1.0f, 2.0f, 3.0f, 4.0f };
-constexpr __m128d b_inexact = { 1.0000000000000002, 0.0 };
-constexpr __m128 r = _mm_maskz_cvtsd_ss(0x0, a, b_inexact);
-static_assert(r[0] == 0.0f, "");
-}
-namespace MaskOnInf {
-constexpr __m128 a = { 1.0f, 2.0f, 3.0f, 4.0f };
-constexpr __m128d b_inf = { __builtin_huge_val(), 0.0 };
-constexpr __m128 r = _mm_maskz_cvtsd_ss(0x1, a, b_inf);
-// expected-error at -1 {{must be initialized by a constant expression}}
-// expected-note at avx512fintrin.h:* {{floating point arithmetic produces an infinity}}
-// expected-note at -3 {{in call to '_mm_maskz_cvtsd_ss(1, {1.000000e+00, 2.000000e+00, 3.000000e+00, 4.000000e+00}, {INF, 0.000000e+00})'}}
-}
-namespace MaskOnNaN {
-constexpr __m128 a = { 1.0f, 2.0f, 3.0f, 4.0f };
-constexpr __m128d b_nan = { __builtin_nan(""), 0.0 };
-constexpr __m128 r = _mm_maskz_cvtsd_ss(0x1, a, b_nan);
-// expected-error at -1 {{must be initialized by a constant expression}}
-// expected-note at avx512fintrin.h:* {{floating point arithmetic produces a NaN}}
-// expected-note at -3 {{in call to '_mm_maskz_cvtsd_ss(1, {1.000000e+00, 2.000000e+00, 3.000000e+00, 4.000000e+00}, {nan, 0.000000e+00})'}}
-}
-}
-
-namespace Test_mm_cvtpd_ps {
-namespace OK {
-constexpr __m128d a = { -1.0, +2.0 };
-constexpr __m128 r = _mm_cvtpd_ps(a);
-static_assert(r[0] == -1.0f && r[1] == +2.0f, "");
-static_assert(r[2] == 0.0f && r[3] == 0.0f, "");
-}
-namespace Inexact {
-constexpr __m128d a = { 1.0000000000000002, 0.0 };
-constexpr __m128 r = _mm_cvtpd_ps(a);
-// expected-error at -1 {{must be initialized by a constant expression}}
-// expected-note at emmintrin.h:* {{compile time floating point arithmetic suppressed in strict evaluation modes}}
-// expected-note at -3 {{in call to '_mm_cvtpd_ps({1.000000e+00, 0.000000e+00})'}}
-}
-namespace Inf {
-constexpr __m128d a = { __builtin_huge_val(), 0.0 };
-constexpr __m128 r = _mm_cvtpd_ps(a);
-// expected-error at -1 {{must be initialized by a constant expression}}
-// expected-note at emmintrin.h:* {{floating point arithmetic produces an infinity}}
-// expected-note at -3 {{in call to '_mm_cvtpd_ps({INF, 0.000000e+00})'}}
-}
-namespace NaN {
-constexpr __m128d a = { __builtin_nan(""), 0.0 };
-constexpr __m128 r = _mm_cvtpd_ps(a);
-// expected-error at -1 {{must be initialized by a constant expression}}
-// expected-note at emmintrin.h:* {{floating point arithmetic produces a NaN}}
-// expected-note at -3 {{in call to '_mm_cvtpd_ps({nan, 0.000000e+00})'}}
-}
-namespace Subnormal {
-constexpr __m128d a = { 1e-310, 0.0 };
-constexpr __m128 r = _mm_cvtpd_ps(a);
-// expected-error at -1 {{must be initialized by a constant expression}}
-// expected-note at emmintrin.h:* {{compile time floating point arithmetic suppressed in strict evaluation modes}}
-// expected-note at -3 {{in call to '_mm_cvtpd_ps({1.000000e-310, 0.000000e+00})'}}
-}
-}
-
-namespace Test_mm_mask_cvtpd_ps {
-namespace OK {
-constexpr __m128 src = { 9.0f, 9.0f, 9.0f, 9.0f };
-constexpr __m128d a = { -1.0, +2.0 };
-constexpr __m128 r = _mm_mask_cvtpd_ps(src, 0x3, a);
-static_assert(r[0] == -1.0f && r[1] == +2.0f, "");
-static_assert(r[2] == 9.0f && r[3] == 9.0f, "");
-}
-namespace Partial {
-constexpr __m128 src = { 9.0f, 9.0f, 9.0f, 9.0f };
-constexpr __m128d a = { -1.0, +2.0 };
-constexpr __m128 r = _mm_mask_cvtpd_ps(src, 0x1, a);
-static_assert(r[0] == -1.0f && r[1] == 9.0f, "");
-}
-namespace MaskOffInexact {
-constexpr __m128 src = { 9.0f, 9.0f, 9.0f, 9.0f };
-constexpr __m128d a_inexact = { -1.0, 1.0000000000000002 };
-constexpr __m128 r = _mm_mask_cvtpd_ps(src, 0x1, a_inexact);
-static_assert(r[0] == -1.0f && r[1] == 9.0f, "");
-}
-namespace MaskOnInexact {
-constexpr __m128 src = { 9.0f, 9.0f, 9.0f, 9.0f };
-constexpr __m128d a_inexact = { -1.0, 1.0000000000000002 };
-constexpr __m128 r = _mm_mask_cvtpd_ps(src, 0x2, a_inexact);
-// expected-error at -1 {{must be initialized by a constant expression}}
-// expected-note at avx512vlintrin.h:* {{compile time floating point arithmetic suppressed in strict evaluation modes}}
-// expected-note at -3 {{in call to '_mm_mask_cvtpd_ps({9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00}, 2, {-1.000000e+00, 1.000000e+00})'}}
-}
-namespace MaskOnInf {
-constexpr __m128 src = { 9.0f, 9.0f, 9.0f, 9.0f };
-constexpr __m128d a_inf = { -1.0, __builtin_huge_val() };
-constexpr __m128 r = _mm_mask_cvtpd_ps(src, 0x2, a_inf);
-// expected-error at -1 {{must be initialized by a constant expression}}
-// expected-note at avx512vlintrin.h:* {{floating point arithmetic produces an infinity}}
-// expected-note at -3 {{in call to '_mm_mask_cvtpd_ps({9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00}, 2, {-1.000000e+00, INF})'}}
-}
-namespace MaskOnNaN {
-constexpr __m128 src = { 9.0f, 9.0f, 9.0f, 9.0f };
-constexpr __m128d a_nan = { -1.0, __builtin_nan("") };
-constexpr __m128 r = _mm_mask_cvtpd_ps(src, 0x2, a_nan);
-// expected-error at -1 {{must be initialized by a constant expression}}
-// expected-note at avx512vlintrin.h:* {{floating point arithmetic produces a NaN}}
-// expected-note at -3 {{in call to '_mm_mask_cvtpd_ps({9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00}, 2, {-1.000000e+00, nan})'}}
-}
-}
-
-namespace Test_mm_maskz_cvtpd_ps {
-namespace OK {
-constexpr __m128d a = { -1.0, +2.0 };
-constexpr __m128 r = _mm_maskz_cvtpd_ps(0x1, a);
-static_assert(r[0] == -1.0f && r[1] == 0.0f, "");
-static_assert(r[2] == 0.0f && r[3] == 0.0f, "");
-}
-namespace MaskOffInexact {
-constexpr __m128d a_inexact = { -1.0, 1.0000000000000002 };
-constexpr __m128 r = _mm_maskz_cvtpd_ps(0x1, a_inexact);
-static_assert(r[0] == -1.0f && r[1] == 0.0f, "");
-}
-namespace MaskOnInf {
-constexpr __m128d a_inf = { -1.0, __builtin_huge_val() };
-constexpr __m128 r = _mm_maskz_cvtpd_ps(0x2, a_inf);
-// expected-error at -1 {{must be initialized by a constant expression}}
-// expected-note at avx512vlintrin.h:* {{floating point arithmetic produces an infinity}}
-// expected-note at -3 {{in call to '_mm_maskz_cvtpd_ps(2, {-1.000000e+00, INF})'}}
-}
-namespace MaskOnNaN {
-constexpr __m128d a_nan = { -1.0, __builtin_nan("") };
-constexpr __m128 r = _mm_maskz_cvtpd_ps(0x2, a_nan);
-// expected-error at -1 {{must be initialized by a constant expression}}
-// expected-note at avx512vlintrin.h:* {{floating point arithmetic produces a NaN}}
-// expected-note at -3 {{in call to '_mm_maskz_cvtpd_ps(2, {-1.000000e+00, nan})'}}
-}
-}
-
-namespace Test_mm256_cvtpd_ps {
-namespace OK {
-constexpr __m256d a = { 0.0, -1.0, +2.0, +3.5 };
-constexpr __m128 r = _mm256_cvtpd_ps(a);
-static_assert(r[0] == 0.0f && r[1] == -1.0f, "");
-static_assert(r[2] == +2.0f && r[3] == +3.5f, "");
-}
-namespace Inexact {
-constexpr __m256d a = { 1.0000000000000002, 0.0, 0.0, 0.0 };
-constexpr __m128 r = _mm256_cvtpd_ps(a);
-// expected-error at -1 {{must be initialized by a constant expression}}
-// expected-note at avxintrin.h:* {{compile time floating point arithmetic suppressed in strict evaluation modes}}
-// expected-note at -3 {{in call to '_mm256_cvtpd_ps({1.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00})'}}
-}
-}
-
-namespace Test_mm256_mask_cvtpd_ps {
-namespace OK {
-constexpr __m128 src = { 9.0f, 9.0f, 9.0f, 9.0f };
-constexpr __m256d a = { 0.0, -1.0, +2.0, +3.5 };
-constexpr __m128 r = _mm256_mask_cvtpd_ps(src, 0xF, a);
-static_assert(r[0] == 0.0f && r[1] == -1.0f && r[2] == +2.0f && r[3] == +3.5f, "");
-}
-namespace MaskOffInf {
-// Note: 256-bit masked operations use selectps, which evaluates ALL lanes before masking
-// So even masked-off Inf/NaN values cause errors (architectural limitation)
-constexpr __m128 src = { 9.0f, 9.0f, 9.0f, 9.0f };
-constexpr __m256d a_inf = { -1.0, +2.0, __builtin_huge_val(), +8.0 };
-constexpr __m128 r = _mm256_mask_cvtpd_ps(src, 0x3, a_inf);
-// expected-error at -1 {{must be initialized by a constant expression}}
-// expected-note at avxintrin.h:* {{floating point arithmetic produces an infinity}}
-// expected-note at avx512vlintrin.h:* {{in call to '_mm256_cvtpd_ps({-1.000000e+00, 2.000000e+00, INF, 8.000000e+00})'}}
-// expected-note at -4 {{in call to '_mm256_mask_cvtpd_ps({9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00}, 3, {-1.000000e+00, 2.000000e+00, INF, 8.000000e+00})'}}
-}
-namespace MaskOffNaN {
-// Note: 256-bit masked operations use selectps, which evaluates ALL lanes before masking
-// So even masked-off Inf/NaN values cause errors (architectural limitation)
-constexpr __m128 src = { 9.0f, 9.0f, 9.0f, 9.0f };
-constexpr __m256d a_nan = { -1.0, +2.0, +4.0, __builtin_nan("") };
-constexpr __m128 r = _mm256_mask_cvtpd_ps(src, 0x7, a_nan);
-// expected-error at -1 {{must be initialized by a constant expression}}
-// expected-note at avxintrin.h:* {{floating point arithmetic produces a NaN}}
-// expected-note at avx512vlintrin.h:* {{in call to '_mm256_cvtpd_ps({-1.000000e+00, 2.000000e+00, 4.000000e+00, nan})'}}
-// expected-note at -4 {{in call to '_mm256_mask_cvtpd_ps({9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00}, 7, {-1.000000e+00, 2.000000e+00, 4.000000e+00, nan})'}}
-}
-}
-
-namespace Test_mm256_maskz_cvtpd_ps {
-namespace OK {
-constexpr __m256d a = { 0.0, -1.0, +2.0, +3.5 };
-constexpr __m128 r = _mm256_maskz_cvtpd_ps(0x5, a);
-static_assert(r[0] == 0.0f && r[1] == 0.0f && r[2] == +2.0f && r[3] == 0.0f, "");
-}
-namespace MaskOffInf {
-// Note: 256-bit masked operations use selectps, which evaluates ALL lanes before masking
-// So even masked-off Inf/NaN values cause errors (architectural limitation)
-constexpr __m256d a_inf = { -1.0, +2.0, __builtin_huge_val(), +8.0 };
-constexpr __m128 r = _mm256_maskz_cvtpd_ps(0x3, a_inf);
-// expected-error at -1 {{must be initialized by a constant expression}}
-// expected-note at avxintrin.h:* {{floating point arithmetic produces an infinity}}
-// expected-note at avx512vlintrin.h:* {{in call to '_mm256_cvtpd_ps({-1.000000e+00, 2.000000e+00, INF, 8.000000e+00})'}}
-// expected-note at -4 {{in call to '_mm256_maskz_cvtpd_ps(3, {-1.000000e+00, 2.000000e+00, INF, 8.000000e+00})'}}
-}
-namespace MaskOffNaN {
-// Note: 256-bit masked operations use selectps, which evaluates ALL lanes before masking
-// So even masked-off Inf/NaN values cause errors (architectural limitation)
-constexpr __m256d a_nan = { -1.0, +2.0, +4.0, __builtin_nan("") };
-constexpr __m128 r = _mm256_maskz_cvtpd_ps(0x7, a_nan);
-// expected-error at -1 {{must be initialized by a constant expression}}
-// expected-note at avxintrin.h:* {{floating point arithmetic produces a NaN}}
-// expected-note at avx512vlintrin.h:* {{in call to '_mm256_cvtpd_ps({-1.000000e+00, 2.000000e+00, 4.000000e+00, nan})'}}
-// expected-note at -4 {{in call to '_mm256_maskz_cvtpd_ps(7, {-1.000000e+00, 2.000000e+00, 4.000000e+00, nan})'}}
-}
-}
-
-namespace Test_mm512_cvtpd_ps {
-namespace OK {
-constexpr __m512d a = { -1.0, +2.0, +4.0, +8.0, +16.0, +32.0, +64.0, +128.0 };
-constexpr __m256 r = _mm512_cvtpd_ps(a);
-static_assert(r[0] == -1.0f && r[7] == +128.0f, "");
-}
-namespace Inexact {
-constexpr __m512d a = { 1.0000000000000002, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
-constexpr __m256 r = _mm512_cvtpd_ps(a);
-// expected-error at -1 {{must be initialized by a constant expression}}
-// expected-note at avx512fintrin.h:* {{compile time floating point arithmetic suppressed in strict evaluation modes}}
-// expected-note at -3 {{in call to '_mm512_cvtpd_ps({1.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00})'}}
-}
-}
-
-namespace Test_mm512_mask_cvtpd_ps {
-namespace OK {
-constexpr __m256 src = { 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f };
-constexpr __m512d a = { -1.0, +2.0, +4.0, +8.0, +16.0, +32.0, +64.0, +128.0 };
-constexpr __m256 r = _mm512_mask_cvtpd_ps(src, 0x05, a);
-static_assert(r[0] == -1.0f && r[2] == +4.0f, "");
-static_assert(r[1] == 9.0f && r[3] == 9.0f, "");
-}
-namespace MaskOffInexact {
-constexpr __m256 src = { 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f };
-constexpr __m512d a_inexact = { -1.0, +2.0, +4.0, +8.0, +16.0, 1.0000000000000002, +64.0, +128.0 };
-constexpr __m256 r = _mm512_mask_cvtpd_ps(src, 0b11011111, a_inexact);
-static_assert(r[0] == -1.0f && r[5] == 9.0f && r[6] == 64.0f && r[7] == 128.0f, "");
-}
-namespace MaskOffInf {
-constexpr __m256 src = { 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f };
-constexpr __m512d a_inf = { -1.0, +2.0, +4.0, +8.0, +16.0, __builtin_huge_val(), +64.0, +128.0 };
-constexpr __m256 r = _mm512_mask_cvtpd_ps(src, 0x1F, a_inf);
-static_assert(r[0] == -1.0f && r[4] == 16.0f && r[5] == 9.0f, "");
-}
-namespace MaskOffNaN {
-constexpr __m256 src = { 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f };
-constexpr __m512d a_nan = { -1.0, +2.0, +4.0, +8.0, +16.0, __builtin_nan(""), +64.0, +128.0 };
-constexpr __m256 r = _mm512_mask_cvtpd_ps(src, 0x1F, a_nan);
-static_assert(r[0] == -1.0f && r[4] == 16.0f && r[5] == 9.0f, "");
-}
-namespace MaskOnInf {
-constexpr __m256 src = { 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f };
-constexpr __m512d a_inf = { -1.0, +2.0, +4.0, __builtin_huge_val(), +16.0, +32.0, +64.0, +128.0 };
-constexpr __m256 r = _mm512_mask_cvtpd_ps(src, 0x08, a_inf);
-// expected-error at -1 {{must be initialized by a constant expression}}
-// expected-note at avx512fintrin.h:* {{floating point arithmetic produces an infinity}}
-// expected-note at -3 {{in call to '_mm512_mask_cvtpd_ps({9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00}, 8, {-1.000000e+00, 2.000000e+00, 4.000000e+00, INF, 1.600000e+01, 3.200000e+01, 6.400000e+01, 1.280000e+02})'}}
-}
-namespace MaskOnNaN {
-constexpr __m256 src = { 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f };
-constexpr __m512d a_nan = { -1.0, +2.0, +4.0, __builtin_nan(""), +16.0, +32.0, +64.0, +128.0 };
-constexpr __m256 r = _mm512_mask_cvtpd_ps(src, 0x08, a_nan);
-// expected-error at -1 {{must be initialized by a constant expression}}
-// expected-note at avx512fintrin.h:* {{floating point arithmetic produces a NaN}}
-// expected-note at -3 {{in call to '_mm512_mask_cvtpd_ps({9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00}, 8, {-1.000000e+00, 2.000000e+00, 4.000000e+00, nan, 1.600000e+01, 3.200000e+01, 6.400000e+01, 1.280000e+02})'}}
-}
-}
-
-namespace Test_mm512_maskz_cvtpd_ps {
-namespace OK {
-constexpr __m512d a = { -1.0, +2.0, +4.0, +8.0, +16.0, +32.0, +64.0, +128.0 };
-constexpr __m256 r = _mm512_maskz_cvtpd_ps(0x81, a);
-static_assert(r[0] == -1.0f && r[7] == +128.0f, "");
-static_assert(r[1] == 0.0f && r[6] == 0.0f, "");
-}
-namespace MaskOffInexact {
-constexpr __m512d a_inexact = { -1.0, +2.0, +4.0, +8.0, +16.0, 1.0000000000000002, +64.0, +128.0 };
-constexpr __m256 r = _mm512_maskz_cvtpd_ps(0b11011111, a_inexact);
-static_assert(r[0] == -1.0f && r[5] == 0.0f && r[6] == 64.0f && r[7] == 128.0f, "");
-}
-namespace MaskOffInf {
-constexpr __m512d a_inf = { -1.0, +2.0, +4.0, +8.0, +16.0, __builtin_huge_val(), +64.0, +128.0 };
-constexpr __m256 r = _mm512_maskz_cvtpd_ps(0x1F, a_inf);
-static_assert(r[0] == -1.0f && r[4] == 16.0f && r[5] == 0.0f, "");
-}
-namespace MaskOffNaN {
-constexpr __m512d a_nan = { -1.0, +2.0, +4.0, +8.0, +16.0, __builtin_nan(""), +64.0, +128.0 };
-constexpr __m256 r = _mm512_maskz_cvtpd_ps(0x1F, a_nan);
-static_assert(r[0] == -1.0f && r[4] == 16.0f && r[5] == 0.0f, "");
-}
-namespace MaskOnInf {
-constexpr __m512d a_inf = { -1.0, +2.0, +4.0, __builtin_huge_val(), +16.0, +32.0, +64.0, +128.0 };
-constexpr __m256 r = _mm512_maskz_cvtpd_ps(0x08, a_inf);
-// expected-error at -1 {{must be initialized by a constant expression}}
-// expected-note at avx512fintrin.h:* {{floating point arithmetic produces an infinity}}
-// expected-note at -3 {{in call to '_mm512_maskz_cvtpd_ps(8, {-1.000000e+00, 2.000000e+00, 4.000000e+00, INF, 1.600000e+01, 3.200000e+01, 6.400000e+01, 1.280000e+02})'}}
-}
-namespace MaskOnNaN {
-constexpr __m512d a_nan = { -1.0, +2.0, +4.0, __builtin_nan(""), +16.0, +32.0, +64.0, +128.0 };
-constexpr __m256 r = _mm512_maskz_cvtpd_ps(0x08, a_nan);
-// expected-error at -1 {{must be initialized by a constant expression}}
-// expected-note at avx512fintrin.h:* {{floating point arithmetic produces a NaN}}
-// expected-note at -3 {{in call to '_mm512_maskz_cvtpd_ps(8, {-1.000000e+00, 2.000000e+00, 4.000000e+00, nan, 1.600000e+01, 3.200000e+01, 6.400000e+01, 1.280000e+02})'}}
-}
-}
-
-namespace Test_mm512_cvtpd_pslo {
-namespace OK {
-constexpr __m512d a = { -1.0, +2.0, +4.0, +8.0, +16.0, +32.0, +64.0, +128.0 };
-constexpr __m512 r = _mm512_cvtpd_pslo(a);
-static_assert(r[0] == -1.0f && r[7] == +128.0f, "");
-static_assert(r[8] == 0.0f && r[15] == 0.0f, "");
-}
-}
-
-namespace Test_mm512_mask_cvtpd_pslo {
-namespace OK {
-constexpr __m512 src = (__m512){ 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,
- 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f };
-constexpr __m512d a = { -1.0, +2.0, +4.0, +8.0, +16.0, +32.0, +64.0, +128.0 };
-constexpr __m512 r = _mm512_mask_cvtpd_pslo(src, 0x3, a);
-static_assert(r[0] == -1.0f && r[1] == +2.0f, "");
-static_assert(r[2] == 9.0f && r[3] == 9.0f, "");
-}
-namespace MaskOffInf {
-constexpr __m512 src = (__m512){ 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,
- 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f };
-constexpr __m512d a_inf = { -1.0, +2.0, __builtin_huge_val(), +8.0, +16.0, +32.0, +64.0, +128.0 };
-constexpr __m512 r = _mm512_mask_cvtpd_pslo(src, 0x3, a_inf);
-static_assert(r[0] == -1.0f && r[1] == +2.0f && r[2] == 9.0f, "");
-}
-namespace MaskOffNaN {
-constexpr __m512 src = (__m512){ 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,
- 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f };
-constexpr __m512d a_nan = { -1.0, +2.0, +4.0, __builtin_nan(""), +16.0, +32.0, +64.0, +128.0 };
-constexpr __m512 r = _mm512_mask_cvtpd_pslo(src, 0x7, a_nan);
-static_assert(r[0] == -1.0f && r[1] == +2.0f && r[2] == 4.0f && r[3] == 9.0f, "");
-}
-namespace MaskOnInf {
-constexpr __m512 src = (__m512){ 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,
- 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f };
-constexpr __m512d a_inf = { -1.0, +2.0, __builtin_huge_val(), +8.0, +16.0, +32.0, +64.0, +128.0 };
-constexpr __m512 r = _mm512_mask_cvtpd_pslo(src, 0x4, a_inf);
-// expected-error at -1 {{must be initialized by a constant expression}}
-// expected-note at avx512fintrin.h:* {{floating point arithmetic produces an infinity}}
-// expected-note at avx512fintrin.h:* {{in call to '_mm512_mask_cvtpd_ps({9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00}, 4, {-1.000000e+00, 2.000000e+00, INF, 8.000000e+00, 1.600000e+01, 3.200000e+01, 6.400000e+01, 1.280000e+02})'}}
-// expected-note at -4 {{in call to '_mm512_mask_cvtpd_pslo({9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00}, 4, {-1.000000e+00, 2.000000e+00, INF, 8.000000e+00, 1.600000e+01, 3.200000e+01, 6.400000e+01, 1.280000e+02})'}}
-}
-namespace MaskOnNaN {
-constexpr __m512 src = (__m512){ 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,
- 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f };
-constexpr __m512d a_nan = { -1.0, +2.0, __builtin_nan(""), +8.0, +16.0, +32.0, +64.0, +128.0 };
-constexpr __m512 r = _mm512_mask_cvtpd_pslo(src, 0x4, a_nan);
-// expected-error at -1 {{must be initialized by a constant expression}}
-// expected-note at avx512fintrin.h:* {{floating point arithmetic produces a NaN}}
-// expected-note at avx512fintrin.h:* {{in call to '_mm512_mask_cvtpd_ps({9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00}, 4, {-1.000000e+00, 2.000000e+00, nan, 8.000000e+00, 1.600000e+01, 3.200000e+01, 6.400000e+01, 1.280000e+02})'}}
-// expected-note at -4 {{in call to '_mm512_mask_cvtpd_pslo({9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00}, 4, {-1.000000e+00, 2.000000e+00, nan, 8.000000e+00, 1.600000e+01, 3.200000e+01, 6.400000e+01, 1.280000e+02})'}}
-}
-}
diff --git a/clang/test/SemaCXX/constexpr-x86-sse2-builtins.cpp b/clang/test/SemaCXX/constexpr-x86-sse2-builtins.cpp
new file mode 100644
index 0000000000000..319a3b02a94f9
--- /dev/null
+++ b/clang/test/SemaCXX/constexpr-x86-sse2-builtins.cpp
@@ -0,0 +1,79 @@
+// RUN: %clang_cc1 -std=c++20 -ffreestanding -fexperimental-new-constant-interpreter -triple x86_64-unknown-unknown -target-feature +sse2 -verify %s
+
+#include <immintrin.h>
+#include "../CodeGen/X86/builtin_test_helpers.h"
+
+namespace Test_mm_cvtsd_ss {
+namespace OK {
+constexpr __m128 a = { 9.0f, 5.0f, 6.0f, 7.0f };
+constexpr __m128d b = { -1.0, 42.0 };
+TEST_CONSTEXPR(match_m128(_mm_cvtsd_ss(a, b), -1.0f, 5.0f, 6.0f, 7.0f));
+}
+namespace Inexact {
+constexpr __m128 a = { 0.0f, 1.0f, 2.0f, 3.0f };
+constexpr __m128d b = { 1.0000000000000002, 0.0 };
+constexpr __m128 r = _mm_cvtsd_ss(a, b);
+// expected-error at -1 {{must be initialized by a constant expression}}
+// expected-note at emmintrin.h:* {{compile time floating point arithmetic suppressed in strict evaluation modes}}
+// expected-note at -3 {{in call to '_mm_cvtsd_ss({0.000000e+00, 1.000000e+00, 2.000000e+00, 3.000000e+00}, {1.000000e+00, 0.000000e+00})'}}
+}
+namespace Inf {
+constexpr __m128 a = { 0.0f, 1.0f, 2.0f, 3.0f };
+constexpr __m128d b = { __builtin_huge_val(), 0.0 };
+constexpr __m128 r = _mm_cvtsd_ss(a, b);
+// expected-error at -1 {{must be initialized by a constant expression}}
+// expected-note at emmintrin.h:* {{floating point arithmetic produces an infinity}}
+// expected-note at -3 {{in call to '_mm_cvtsd_ss({0.000000e+00, 1.000000e+00, 2.000000e+00, 3.000000e+00}, {INF, 0.000000e+00})'}}
+}
+namespace NaN {
+constexpr __m128 a = { 0.0f, 1.0f, 2.0f, 3.0f };
+constexpr __m128d b = { __builtin_nan(""), 0.0 };
+constexpr __m128 r = _mm_cvtsd_ss(a, b);
+// expected-error at -1 {{must be initialized by a constant expression}}
+// expected-note at emmintrin.h:* {{floating point arithmetic produces a NaN}}
+// expected-note at -3 {{in call to '_mm_cvtsd_ss({0.000000e+00, 1.000000e+00, 2.000000e+00, 3.000000e+00}, {nan, 0.000000e+00})'}}
+}
+namespace Subnormal {
+constexpr __m128 a = { 0.0f, 1.0f, 2.0f, 3.0f };
+constexpr __m128d b = { 1e-310, 0.0 };
+constexpr __m128 r = _mm_cvtsd_ss(a, b);
+// expected-error at -1 {{must be initialized by a constant expression}}
+// expected-note at emmintrin.h:* {{compile time floating point arithmetic suppressed in strict evaluation modes}}
+// expected-note at -3 {{in call to '_mm_cvtsd_ss({0.000000e+00, 1.000000e+00, 2.000000e+00, 3.000000e+00}, {1.000000e-310, 0.000000e+00})'}}
+}
+}
+
+namespace Test_mm_cvtpd_ps {
+namespace OK {
+constexpr __m128d a = { -1.0, +2.0 };
+TEST_CONSTEXPR(match_m128(_mm_cvtpd_ps(a), -1.0f, +2.0f, 0.0f, 0.0f));
+}
+namespace Inexact {
+constexpr __m128d a = { 1.0000000000000002, 0.0 };
+constexpr __m128 r = _mm_cvtpd_ps(a);
+// expected-error at -1 {{must be initialized by a constant expression}}
+// expected-note at emmintrin.h:* {{compile time floating point arithmetic suppressed in strict evaluation modes}}
+// expected-note at -3 {{in call to '_mm_cvtpd_ps({1.000000e+00, 0.000000e+00})'}}
+}
+namespace Inf {
+constexpr __m128d a = { __builtin_huge_val(), 0.0 };
+constexpr __m128 r = _mm_cvtpd_ps(a);
+// expected-error at -1 {{must be initialized by a constant expression}}
+// expected-note at emmintrin.h:* {{floating point arithmetic produces an infinity}}
+// expected-note at -3 {{in call to '_mm_cvtpd_ps({INF, 0.000000e+00})'}}
+}
+namespace NaN {
+constexpr __m128d a = { __builtin_nan(""), 0.0 };
+constexpr __m128 r = _mm_cvtpd_ps(a);
+// expected-error at -1 {{must be initialized by a constant expression}}
+// expected-note at emmintrin.h:* {{floating point arithmetic produces a NaN}}
+// expected-note at -3 {{in call to '_mm_cvtpd_ps({nan, 0.000000e+00})'}}
+}
+namespace Subnormal {
+constexpr __m128d a = { 1e-310, 0.0 };
+constexpr __m128 r = _mm_cvtpd_ps(a);
+// expected-error at -1 {{must be initialized by a constant expression}}
+// expected-note at emmintrin.h:* {{compile time floating point arithmetic suppressed in strict evaluation modes}}
+// expected-note at -3 {{in call to '_mm_cvtpd_ps({1.000000e-310, 0.000000e+00})'}}
+}
+}
>From 51d213d48fcb7d8bf72d835aeda051e94653dc86 Mon Sep 17 00:00:00 2001
From: Hamza Hassanain <hamzahassanain067 at gmail.com>
Date: Mon, 1 Dec 2025 13:46:38 +0200
Subject: [PATCH 12/24] ran the format commands
---
clang/lib/AST/ByteCode/InterpBuiltin.cpp | 30 ++++++++++++++----------
clang/lib/AST/ExprConstant.cpp | 1 -
clang/lib/Headers/avx512fintrin.h | 7 +++---
3 files changed, 20 insertions(+), 18 deletions(-)
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index ad49eb14b911f..5e8b8e0e31bb6 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -169,8 +169,8 @@ static llvm::APSInt convertBoolVectorToInt(const Pointer &Val) {
// Strict double -> float conversion used for X86 PD2PS/cvtsd2ss intrinsics.
// Reject NaN/Inf/Subnormal inputs and any lossy/inexact conversions.
-static bool convertDoubleToFloatStrict(APFloat Src, Floating &Dst, InterpState &S,
- const Expr *DiagExpr) {
+static bool convertDoubleToFloatStrict(APFloat Src, Floating &Dst,
+ InterpState &S, const Expr *DiagExpr) {
if (Src.isInfinity()) {
if (S.diagnosing())
S.CCEDiag(DiagExpr, diag::note_constexpr_float_arithmetic) << 0;
@@ -183,7 +183,8 @@ static bool convertDoubleToFloatStrict(APFloat Src, Floating &Dst, InterpState &
}
APFloat Val = Src;
bool LosesInfo = false;
- APFloat::opStatus Status = Val.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven, &LosesInfo);
+ APFloat::opStatus Status = Val.convert(
+ APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven, &LosesInfo);
if (LosesInfo || Val.isDenormal()) {
if (S.diagnosing())
S.CCEDiag(DiagExpr, diag::note_constexpr_float_arithmetic_strict);
@@ -3391,7 +3392,7 @@ static bool interp__builtin_ia32_cvt_vec2mask(InterpState &S, CodePtr OpPC,
return true;
}
static bool interp__builtin_ia32_cvtsd2ss(InterpState &S, CodePtr OpPC,
- const CallExpr *Call) {
+ const CallExpr *Call) {
assert(Call->getNumArgs() == 2);
const Pointer &B = S.Stk.pop<Pointer>();
@@ -3420,8 +3421,8 @@ static bool interp__builtin_ia32_cvtsd2ss(InterpState &S, CodePtr OpPC,
}
static bool interp__builtin_ia32_cvtsd2ss_round_mask(InterpState &S,
- CodePtr OpPC,
- const CallExpr *Call) {
+ CodePtr OpPC,
+ const CallExpr *Call) {
assert(Call->getNumArgs() == 5);
// Pop in reverse order: rounding, mask, src, b, a
@@ -3430,7 +3431,8 @@ static bool interp__builtin_ia32_cvtsd2ss_round_mask(InterpState &S,
const Pointer &Src = S.Stk.pop<Pointer>();
const Pointer &B = S.Stk.pop<Pointer>();
const Pointer &A = S.Stk.pop<Pointer>();
- if (!CheckLoad(S, OpPC, A) || !CheckLoad(S, OpPC, B) || !CheckLoad(S, OpPC, Src))
+ if (!CheckLoad(S, OpPC, A) || !CheckLoad(S, OpPC, B) ||
+ !CheckLoad(S, OpPC, Src))
return false;
const auto *DstVTy = Call->getType()->castAs<VectorType>();
@@ -3441,7 +3443,8 @@ static bool interp__builtin_ia32_cvtsd2ss_round_mask(InterpState &S,
for (unsigned I = 0; I != NumElems; ++I)
Dst.elem<Floating>(I) = A.elem<Floating>(I);
- // If mask bit 0 is set, convert element 0 from double to float; otherwise use Src
+ // If mask bit 0 is set, convert element 0 from double to float; otherwise use
+ // Src
if (MaskInt.getZExtValue() & 0x1) {
Floating Conv = S.allocFloat(
S.getASTContext().getFloatTypeSemantics(S.getASTContext().FloatTy));
@@ -3463,12 +3466,12 @@ static bool interp__builtin_ia32_cvtpd2ps(InterpState &S, CodePtr OpPC,
bool IsMasked = (BuiltinID == X86::BI__builtin_ia32_cvtpd2ps_mask ||
BuiltinID == X86::BI__builtin_ia32_cvtpd2ps512_mask);
bool HasRounding = (BuiltinID == X86::BI__builtin_ia32_cvtpd2ps512_mask);
-
+
APSInt MaskVal(1, false);
Pointer PassThrough;
Pointer SrcPd;
APSInt Rounding;
-
+
if (IsMasked) {
// Pop in reverse order
if (HasRounding) {
@@ -3483,14 +3486,14 @@ static bool interp__builtin_ia32_cvtpd2ps(InterpState &S, CodePtr OpPC,
PassThrough = S.Stk.pop<Pointer>();
SrcPd = S.Stk.pop<Pointer>();
}
-
+
if (!CheckLoad(S, OpPC, PassThrough))
return false;
} else {
// Pop source only
SrcPd = S.Stk.pop<Pointer>();
}
-
+
if (!CheckLoad(S, OpPC, SrcPd))
return false;
@@ -3508,7 +3511,8 @@ static bool interp__builtin_ia32_cvtpd2ps(InterpState &S, CodePtr OpPC,
}
}
- // Convert double to float for enabled elements (only process source elements that exist)
+ // Convert double to float for enabled elements (only process source elements
+ // that exist)
for (unsigned I = 0; I != SrcElems; ++I) {
if (IsMasked && (((MaskVal.getZExtValue() >> I) & 0x1) == 0))
continue;
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 3778b0af80b5a..0a9776cb9ee87 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -12951,7 +12951,6 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
return Success(APValue(ResultElements.data(), ResultElements.size()), E);
}
-
case X86::BI__builtin_ia32_cvtsd2ss: {
APValue VecA, VecB;
if (!EvaluateAsRValue(Info, E->getArg(0), VecA) ||
diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h
index edcbdba908522..f019fb45e2eea 100644
--- a/clang/lib/Headers/avx512fintrin.h
+++ b/clang/lib/Headers/avx512fintrin.h
@@ -3489,10 +3489,9 @@ _mm512_mask_cvtepu32lo_pd(__m512d __W, __mmask8 __U, __m512i __A) {
static __inline__ __m256 __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_cvtpd_ps(__m512d __A) {
- return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
- (__v8sf) _mm256_setzero_ps (),
- (__mmask8) -1,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m256)__builtin_ia32_cvtpd2ps512_mask(
+ (__v8df)__A, (__v8sf)_mm256_setzero_ps(), (__mmask8)-1,
+ _MM_FROUND_CUR_DIRECTION);
}
static __inline__ __m256 __DEFAULT_FN_ATTRS512_CONSTEXPR
>From 154eea17cd5f9db2de7a6e366cbda917e867d716 Mon Sep 17 00:00:00 2001
From: Hamza Hassanain <hamzahassanain067 at gmail.com>
Date: Mon, 1 Dec 2025 14:39:09 +0200
Subject: [PATCH 13/24] Fixed The Formates!
---
clang/lib/Headers/avx512fintrin.h | 8 ++++----
clang/lib/Headers/avxintrin.h | 3 +--
2 files changed, 5 insertions(+), 6 deletions(-)
diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h
index f019fb45e2eea..88dfb2fa29878 100644
--- a/clang/lib/Headers/avx512fintrin.h
+++ b/clang/lib/Headers/avx512fintrin.h
@@ -3487,8 +3487,8 @@ _mm512_mask_cvtepu32lo_pd(__m512d __W, __mmask8 __U, __m512i __A) {
(__v8sf)_mm256_setzero_ps(), \
(__mmask8)(U), (int)(R)))
-static __inline__ __m256 __DEFAULT_FN_ATTRS512_CONSTEXPR
-_mm512_cvtpd_ps(__m512d __A) {
+static __inline__ __m256
+__DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtpd_ps(__m512d __A) {
return (__m256)__builtin_ia32_cvtpd2ps512_mask(
(__v8df)__A, (__v8sf)_mm256_setzero_ps(), (__mmask8)-1,
_MM_FROUND_CUR_DIRECTION);
@@ -5374,8 +5374,8 @@ _mm512_kmov (__mmask16 __A)
((long long)__builtin_ia32_vcvtsd2si64((__v2df)(__m128d)(A), (int)(R)))
#endif
-static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
-_mm512_sll_epi32(__m512i __A, __m128i __B) {
+static __inline__ __m512i
+__DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_sll_epi32(__m512i __A, __m128i __B) {
return (__m512i)__builtin_ia32_pslld512((__v16si) __A, (__v4si)__B);
}
diff --git a/clang/lib/Headers/avxintrin.h b/clang/lib/Headers/avxintrin.h
index 126ba30bcca7e..9b45bc3e56bdb 100644
--- a/clang/lib/Headers/avxintrin.h
+++ b/clang/lib/Headers/avxintrin.h
@@ -3605,8 +3605,7 @@ _mm256_undefined_pd(void)
/// This intrinsic has no corresponding instruction.
///
/// \returns A 256-bit vector of [8 x float] containing undefined values.
-static __inline__ __m256 __DEFAULT_FN_ATTRS
-_mm256_undefined_ps(void) {
+static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_undefined_ps(void) {
return (__m256)__builtin_ia32_undef256();
}
>From c5ecb01aa231e7fefbb09a5a6d90dff60963426a Mon Sep 17 00:00:00 2001
From: Hamza Hassanain <hamzahassanain067 at gmail.com>
Date: Mon, 1 Dec 2025 14:48:02 +0200
Subject: [PATCH 14/24] Formatted avx512f header using the projects current
clang-format
---
clang/lib/Headers/avx512fintrin.h | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h
index 88dfb2fa29878..7dbf137d8cac8 100644
--- a/clang/lib/Headers/avx512fintrin.h
+++ b/clang/lib/Headers/avx512fintrin.h
@@ -3487,8 +3487,8 @@ _mm512_mask_cvtepu32lo_pd(__m512d __W, __mmask8 __U, __m512i __A) {
(__v8sf)_mm256_setzero_ps(), \
(__mmask8)(U), (int)(R)))
-static __inline__ __m256
-__DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtpd_ps(__m512d __A) {
+static __inline__ __m256
+ __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_cvtpd_ps(__m512d __A) {
return (__m256)__builtin_ia32_cvtpd2ps512_mask(
(__v8df)__A, (__v8sf)_mm256_setzero_ps(), (__mmask8)-1,
_MM_FROUND_CUR_DIRECTION);
@@ -5374,8 +5374,8 @@ _mm512_kmov (__mmask16 __A)
((long long)__builtin_ia32_vcvtsd2si64((__v2df)(__m128d)(A), (int)(R)))
#endif
-static __inline__ __m512i
-__DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_sll_epi32(__m512i __A, __m128i __B) {
+static __inline__ __m512i
+ __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_sll_epi32(__m512i __A, __m128i __B) {
return (__m512i)__builtin_ia32_pslld512((__v16si) __A, (__v4si)__B);
}
>From 28a823a970255496b48629ebad12cd313cfc71a7 Mon Sep 17 00:00:00 2001
From: Hamza Hassanain <53662962+HamzaHassanain at users.noreply.github.com>
Date: Mon, 1 Dec 2025 17:55:38 +0200
Subject: [PATCH 15/24] Update clang/lib/AST/ByteCode/InterpBuiltin.cpp
Co-authored-by: Timm Baeder <tbaeder at redhat.com>
---
clang/lib/AST/ByteCode/InterpBuiltin.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index 5e8b8e0e31bb6..819c3f27239b3 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -3404,7 +3404,7 @@ static bool interp__builtin_ia32_cvtsd2ss(InterpState &S, CodePtr OpPC,
unsigned NumElems = DstVTy->getNumElements();
const Pointer &Dst = S.Stk.peek<Pointer>();
- // Copy all elements from A to Dst
+ // Copy all elements from A to Dst.
for (unsigned I = 0; I != NumElems; ++I)
Dst.elem<Floating>(I) = A.elem<Floating>(I);
>From b9b71bd174b4628d4c025c67fc6ca1c5a030ae73 Mon Sep 17 00:00:00 2001
From: Hamza Hassanain <hamzahassanain067 at gmail.com>
Date: Mon, 1 Dec 2025 22:45:01 +0200
Subject: [PATCH 16/24] Did the Reuqested Changes
---
clang/lib/AST/ByteCode/InterpBuiltin.cpp | 70 +++++++++++-------------
1 file changed, 32 insertions(+), 38 deletions(-)
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index 5e8b8e0e31bb6..32238c877aad9 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -3404,13 +3404,13 @@ static bool interp__builtin_ia32_cvtsd2ss(InterpState &S, CodePtr OpPC,
unsigned NumElems = DstVTy->getNumElements();
const Pointer &Dst = S.Stk.peek<Pointer>();
- // Copy all elements from A to Dst
- for (unsigned I = 0; I != NumElems; ++I)
+ // Copy all elements except lane 0 (overwritten below) from A to Dst.
+ for (unsigned I = 1; I < NumElems; ++I)
Dst.elem<Floating>(I) = A.elem<Floating>(I);
- // Convert element 0 from double to float
+ // Convert element 0 from double to float.
Floating Conv = S.allocFloat(
- S.getASTContext().getFloatTypeSemantics(S.getASTContext().FloatTy));
+ S.getASTContext().getFloatTypeSemantics(DstVTy->getElementType()));
APFloat SrcD = B.elem<Floating>(0).getAPFloat();
if (!convertDoubleToFloatStrict(SrcD, Conv, S, Call))
return false;
@@ -3425,9 +3425,8 @@ static bool interp__builtin_ia32_cvtsd2ss_round_mask(InterpState &S,
const CallExpr *Call) {
assert(Call->getNumArgs() == 5);
- // Pop in reverse order: rounding, mask, src, b, a
- APSInt Rounding = popToAPSInt(S, Call->getArg(4)->getType());
- APSInt MaskInt = popToAPSInt(S, Call->getArg(3)->getType());
+ APSInt Rounding = popToAPSInt(S, Call->getArg(4));
+ APSInt MaskInt = popToAPSInt(S, Call->getArg(3));
const Pointer &Src = S.Stk.pop<Pointer>();
const Pointer &B = S.Stk.pop<Pointer>();
const Pointer &A = S.Stk.pop<Pointer>();
@@ -3439,17 +3438,17 @@ static bool interp__builtin_ia32_cvtsd2ss_round_mask(InterpState &S,
unsigned NumElems = DstVTy->getNumElements();
const Pointer &Dst = S.Stk.peek<Pointer>();
- // Copy all elements from A to Dst
- for (unsigned I = 0; I != NumElems; ++I)
+ // Copy all elements except lane 0 (overwritten below) from A to Dst.
+ for (unsigned I = 1; I < NumElems; ++I)
Dst.elem<Floating>(I) = A.elem<Floating>(I);
// If mask bit 0 is set, convert element 0 from double to float; otherwise use
- // Src
+ // Src.
if (MaskInt.getZExtValue() & 0x1) {
Floating Conv = S.allocFloat(
- S.getASTContext().getFloatTypeSemantics(S.getASTContext().FloatTy));
- APFloat SrcD = B.elem<Floating>(0).getAPFloat();
- if (!convertDoubleToFloatStrict(SrcD, Conv, S, Call))
+ S.getASTContext().getFloatTypeSemantics(DstVTy->getElementType()));
+ APFloat Src = B.elem<Floating>(0).getAPFloat();
+ if (!convertDoubleToFloatStrict(Src, Conv, S, Call))
return false;
Dst.elem<Floating>(0) = Conv;
} else {
@@ -3467,60 +3466,55 @@ static bool interp__builtin_ia32_cvtpd2ps(InterpState &S, CodePtr OpPC,
BuiltinID == X86::BI__builtin_ia32_cvtpd2ps512_mask);
bool HasRounding = (BuiltinID == X86::BI__builtin_ia32_cvtpd2ps512_mask);
- APSInt MaskVal(1, false);
+ APSInt MaskVal;
Pointer PassThrough;
- Pointer SrcPd;
+ Pointer Src;
APSInt Rounding;
if (IsMasked) {
- // Pop in reverse order
+ // Pop in reverse order.
if (HasRounding) {
- // For 512: rounding, mask, passthrough, source
- Rounding = popToAPSInt(S, Call->getArg(3)->getType());
- MaskVal = popToAPSInt(S, Call->getArg(2)->getType());
+ Rounding = popToAPSInt(S, Call->getArg(3));
+ MaskVal = popToAPSInt(S, Call->getArg(2));
PassThrough = S.Stk.pop<Pointer>();
- SrcPd = S.Stk.pop<Pointer>();
+ Src = S.Stk.pop<Pointer>();
} else {
- // For VL: mask, passthrough, source
MaskVal = popToAPSInt(S, Call->getArg(2)->getType());
PassThrough = S.Stk.pop<Pointer>();
- SrcPd = S.Stk.pop<Pointer>();
+ Src = S.Stk.pop<Pointer>();
}
if (!CheckLoad(S, OpPC, PassThrough))
return false;
} else {
- // Pop source only
- SrcPd = S.Stk.pop<Pointer>();
+ // Pop source only.
+ Src = S.Stk.pop<Pointer>();
}
- if (!CheckLoad(S, OpPC, SrcPd))
+ if (!CheckLoad(S, OpPC, Src))
return false;
const auto *RetVTy = Call->getType()->castAs<VectorType>();
unsigned RetElems = RetVTy->getNumElements();
- unsigned SrcElems = SrcPd.getNumElems();
+ unsigned SrcElems = Src.getNumElems();
const Pointer &Dst = S.Stk.peek<Pointer>();
- // Initialize destination with passthrough or zeros
- for (unsigned I = 0; I != RetElems; ++I) {
- if (IsMasked) {
+ // Initialize destination with passthrough or zeros.
+ for (unsigned I = 0; I != RetElems; ++I)
+ if (IsMasked)
Dst.elem<Floating>(I) = PassThrough.elem<Floating>(I);
- } else {
+ else
Dst.elem<Floating>(I) = Floating(APFloat(0.0f));
- }
- }
- // Convert double to float for enabled elements (only process source elements
- // that exist)
+ // Convert double to float for enabled elements (only process source elements that exist).
for (unsigned I = 0; I != SrcElems; ++I) {
- if (IsMasked && (((MaskVal.getZExtValue() >> I) & 0x1) == 0))
+ if (IsMasked && !MaskVal[I])
continue;
- APFloat SrcD = SrcPd.elem<Floating>(I).getAPFloat();
+ APFloat Src = Src.elem<Floating>(I).getAPFloat();
Floating Conv = S.allocFloat(
- S.getASTContext().getFloatTypeSemantics(S.getASTContext().FloatTy));
- if (!convertDoubleToFloatStrict(SrcD, Conv, S, Call))
+ S.getASTContext().getFloatTypeSemantics(RetVTy->getElementType()));
+ if (!convertDoubleToFloatStrict(Src, Conv, S, Call))
return false;
Dst.elem<Floating>(I) = Conv;
}
>From 21ab33c3ece7a2daba30e7adc4f6e52672bf2e6a Mon Sep 17 00:00:00 2001
From: Hamza Hassanain <hamzahassanain067 at gmail.com>
Date: Mon, 1 Dec 2025 22:45:37 +0200
Subject: [PATCH 17/24] Formated The InterpBuiltin
---
clang/lib/AST/ByteCode/InterpBuiltin.cpp | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index 32238c877aad9..89eae9369eb80 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -3506,7 +3506,8 @@ static bool interp__builtin_ia32_cvtpd2ps(InterpState &S, CodePtr OpPC,
else
Dst.elem<Floating>(I) = Floating(APFloat(0.0f));
- // Convert double to float for enabled elements (only process source elements that exist).
+ // Convert double to float for enabled elements (only process source elements
+ // that exist).
for (unsigned I = 0; I != SrcElems; ++I) {
if (IsMasked && !MaskVal[I])
continue;
>From 4957b30793f4965f3afd24fd96bb0adc4663bb88 Mon Sep 17 00:00:00 2001
From: Hamza Hassanain <hamzahassanain067 at gmail.com>
Date: Tue, 2 Dec 2025 11:13:19 +0200
Subject: [PATCH 18/24] fixed a naming confilcts
---
clang/lib/AST/ByteCode/InterpBuiltin.cpp | 12 ++++++------
1 file changed, 6 insertions(+), 6 deletions(-)
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index 89eae9369eb80..aabf0b8fc4f03 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -3411,8 +3411,8 @@ static bool interp__builtin_ia32_cvtsd2ss(InterpState &S, CodePtr OpPC,
// Convert element 0 from double to float.
Floating Conv = S.allocFloat(
S.getASTContext().getFloatTypeSemantics(DstVTy->getElementType()));
- APFloat SrcD = B.elem<Floating>(0).getAPFloat();
- if (!convertDoubleToFloatStrict(SrcD, Conv, S, Call))
+ APFloat SrcVal = B.elem<Floating>(0).getAPFloat();
+ if (!convertDoubleToFloatStrict(SrcVal, Conv, S, Call))
return false;
Dst.elem<Floating>(0) = Conv;
@@ -3447,8 +3447,8 @@ static bool interp__builtin_ia32_cvtsd2ss_round_mask(InterpState &S,
if (MaskInt.getZExtValue() & 0x1) {
Floating Conv = S.allocFloat(
S.getASTContext().getFloatTypeSemantics(DstVTy->getElementType()));
- APFloat Src = B.elem<Floating>(0).getAPFloat();
- if (!convertDoubleToFloatStrict(Src, Conv, S, Call))
+ APFloat SrcVal = B.elem<Floating>(0).getAPFloat();
+ if (!convertDoubleToFloatStrict(SrcVal, Conv, S, Call))
return false;
Dst.elem<Floating>(0) = Conv;
} else {
@@ -3512,10 +3512,10 @@ static bool interp__builtin_ia32_cvtpd2ps(InterpState &S, CodePtr OpPC,
if (IsMasked && !MaskVal[I])
continue;
- APFloat Src = Src.elem<Floating>(I).getAPFloat();
+ APFloat SrcVal = Src.elem<Floating>(I).getAPFloat();
Floating Conv = S.allocFloat(
S.getASTContext().getFloatTypeSemantics(RetVTy->getElementType()));
- if (!convertDoubleToFloatStrict(Src, Conv, S, Call))
+ if (!convertDoubleToFloatStrict(SrcVal, Conv, S, Call))
return false;
Dst.elem<Floating>(I) = Conv;
}
>From 8b786f02cb2f512969f9b6965f918447f2dd5f8a Mon Sep 17 00:00:00 2001
From: Hamza Hassanain <hamzahassanain067 at gmail.com>
Date: Tue, 2 Dec 2025 11:25:07 +0200
Subject: [PATCH 19/24] added assertion on getElementType() and
getASTContext().FloatTy
---
clang/lib/AST/ByteCode/InterpBuiltin.cpp | 8 ++++++++
1 file changed, 8 insertions(+)
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index aabf0b8fc4f03..b92454d49bfa8 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -3445,6 +3445,10 @@ static bool interp__builtin_ia32_cvtsd2ss_round_mask(InterpState &S,
// If mask bit 0 is set, convert element 0 from double to float; otherwise use
// Src.
if (MaskInt.getZExtValue() & 0x1) {
+
+ assert(S.getASTContext().FloatTy == DstVTy->getElementType() &&
+ "cvtsd2ss requires float element type in destination vector");
+
Floating Conv = S.allocFloat(
S.getASTContext().getFloatTypeSemantics(DstVTy->getElementType()));
APFloat SrcVal = B.elem<Floating>(0).getAPFloat();
@@ -3506,6 +3510,9 @@ static bool interp__builtin_ia32_cvtpd2ps(InterpState &S, CodePtr OpPC,
else
Dst.elem<Floating>(I) = Floating(APFloat(0.0f));
+ assert(S.getASTContext().FloatTy == RetVTy->getElementType() &&
+ "cvtpd2ps requires float element type in return vector");
+
// Convert double to float for enabled elements (only process source elements
// that exist).
for (unsigned I = 0; I != SrcElems; ++I) {
@@ -3513,6 +3520,7 @@ static bool interp__builtin_ia32_cvtpd2ps(InterpState &S, CodePtr OpPC,
continue;
APFloat SrcVal = Src.elem<Floating>(I).getAPFloat();
+
Floating Conv = S.allocFloat(
S.getASTContext().getFloatTypeSemantics(RetVTy->getElementType()));
if (!convertDoubleToFloatStrict(SrcVal, Conv, S, Call))
>From 2bab71ecc6aa5a22c129d678dde6e93ef9ec9e41 Mon Sep 17 00:00:00 2001
From: Hamza Hassanain <hamzahassanain067 at gmail.com>
Date: Tue, 2 Dec 2025 11:39:33 +0200
Subject: [PATCH 20/24] Ran The formatter Again
---
clang/lib/AST/ByteCode/InterpBuiltin.cpp | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index b92454d49bfa8..9a2fe83e98a1e 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -3511,8 +3511,8 @@ static bool interp__builtin_ia32_cvtpd2ps(InterpState &S, CodePtr OpPC,
Dst.elem<Floating>(I) = Floating(APFloat(0.0f));
assert(S.getASTContext().FloatTy == RetVTy->getElementType() &&
- "cvtpd2ps requires float element type in return vector");
-
+ "cvtpd2ps requires float element type in return vector");
+
// Convert double to float for enabled elements (only process source elements
// that exist).
for (unsigned I = 0; I != SrcElems; ++I) {
>From cc1dadad7570c092019ddbc7d8eb3b308e5cb42e Mon Sep 17 00:00:00 2001
From: Hamza Hassanain <hamzahassanain067 at gmail.com>
Date: Tue, 2 Dec 2025 12:33:52 +0200
Subject: [PATCH 21/24] Did the requested changes
---
clang/lib/AST/ByteCode/InterpBuiltin.cpp | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index 9a2fe83e98a1e..44c5a66ad3431 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -3439,7 +3439,7 @@ static bool interp__builtin_ia32_cvtsd2ss_round_mask(InterpState &S,
const Pointer &Dst = S.Stk.peek<Pointer>();
// Copy all elements except lane 0 (overwritten below) from A to Dst.
- for (unsigned I = 1; I < NumElems; ++I)
+ for (unsigned I = 1; I != NumElems; ++I)
Dst.elem<Floating>(I) = A.elem<Floating>(I);
// If mask bit 0 is set, convert element 0 from double to float; otherwise use
@@ -3483,7 +3483,7 @@ static bool interp__builtin_ia32_cvtpd2ps(InterpState &S, CodePtr OpPC,
PassThrough = S.Stk.pop<Pointer>();
Src = S.Stk.pop<Pointer>();
} else {
- MaskVal = popToAPSInt(S, Call->getArg(2)->getType());
+ MaskVal = popToAPSInt(S, Call->getArg(2));
PassThrough = S.Stk.pop<Pointer>();
Src = S.Stk.pop<Pointer>();
}
@@ -5346,6 +5346,7 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
case X86::BI__builtin_ia32_cvtsd2ss_round_mask:
return interp__builtin_ia32_cvtsd2ss_round_mask(S, OpPC, Call);
+
case X86::BI__builtin_ia32_cvtpd2ps:
case X86::BI__builtin_ia32_cvtpd2ps256:
case X86::BI__builtin_ia32_cvtpd2ps_mask:
>From b2b68d9166fdcb0af6e2cc7a3cea82e320036ca3 Mon Sep 17 00:00:00 2001
From: Hamza Hassanain <53662962+HamzaHassanain at users.noreply.github.com>
Date: Tue, 2 Dec 2025 12:45:36 +0200
Subject: [PATCH 22/24] Fix loop condition in element copy operation
---
clang/lib/AST/ByteCode/InterpBuiltin.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index 44c5a66ad3431..4e85ba020fe07 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -3405,7 +3405,7 @@ static bool interp__builtin_ia32_cvtsd2ss(InterpState &S, CodePtr OpPC,
const Pointer &Dst = S.Stk.peek<Pointer>();
// Copy all elements except lane 0 (overwritten below) from A to Dst.
- for (unsigned I = 1; I < NumElems; ++I)
+ for (unsigned I = 1; I != NumElems; ++I)
Dst.elem<Floating>(I) = A.elem<Floating>(I);
// Convert element 0 from double to float.
>From c430491e46b9477d2fa6bfb9bc0c99b1b211f652 Mon Sep 17 00:00:00 2001
From: Hamza Hassanain <hamzahassanain067 at gmail.com>
Date: Tue, 2 Dec 2025 16:13:22 +0200
Subject: [PATCH 23/24] Fixed The Whitespace
---
clang/lib/AST/ByteCode/InterpBuiltin.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index 44c5a66ad3431..314b5258f1a4d 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -5346,7 +5346,7 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
case X86::BI__builtin_ia32_cvtsd2ss_round_mask:
return interp__builtin_ia32_cvtsd2ss_round_mask(S, OpPC, Call);
-
+
case X86::BI__builtin_ia32_cvtpd2ps:
case X86::BI__builtin_ia32_cvtpd2ps256:
case X86::BI__builtin_ia32_cvtpd2ps_mask:
>From 88488aa890ea5e37b06cf186de4701a46b43764e Mon Sep 17 00:00:00 2001
From: Hamza Hassanain <hamzahassanain067 at gmail.com>
Date: Tue, 2 Dec 2025 19:27:58 +0200
Subject: [PATCH 24/24] Did the required changes
---
clang/lib/AST/ByteCode/InterpBuiltin.cpp | 84 +++++++++---------------
1 file changed, 31 insertions(+), 53 deletions(-)
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index 6104e331468a0..438c64e919b82 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -3392,47 +3392,28 @@ static bool interp__builtin_ia32_cvt_vec2mask(InterpState &S, CodePtr OpPC,
return true;
}
static bool interp__builtin_ia32_cvtsd2ss(InterpState &S, CodePtr OpPC,
- const CallExpr *Call) {
- assert(Call->getNumArgs() == 2);
-
- const Pointer &B = S.Stk.pop<Pointer>();
- const Pointer &A = S.Stk.pop<Pointer>();
- if (!CheckLoad(S, OpPC, A) || !CheckLoad(S, OpPC, B))
- return false;
-
- const auto *DstVTy = Call->getType()->castAs<VectorType>();
- unsigned NumElems = DstVTy->getNumElements();
- const Pointer &Dst = S.Stk.peek<Pointer>();
-
- // Copy all elements except lane 0 (overwritten below) from A to Dst.
- for (unsigned I = 1; I != NumElems; ++I)
- Dst.elem<Floating>(I) = A.elem<Floating>(I);
-
- // Convert element 0 from double to float.
- Floating Conv = S.allocFloat(
- S.getASTContext().getFloatTypeSemantics(DstVTy->getElementType()));
- APFloat SrcVal = B.elem<Floating>(0).getAPFloat();
- if (!convertDoubleToFloatStrict(SrcVal, Conv, S, Call))
- return false;
- Dst.elem<Floating>(0) = Conv;
-
- Dst.initializeAllElements();
- return true;
-}
-
-static bool interp__builtin_ia32_cvtsd2ss_round_mask(InterpState &S,
- CodePtr OpPC,
- const CallExpr *Call) {
- assert(Call->getNumArgs() == 5);
-
- APSInt Rounding = popToAPSInt(S, Call->getArg(4));
- APSInt MaskInt = popToAPSInt(S, Call->getArg(3));
- const Pointer &Src = S.Stk.pop<Pointer>();
- const Pointer &B = S.Stk.pop<Pointer>();
- const Pointer &A = S.Stk.pop<Pointer>();
- if (!CheckLoad(S, OpPC, A) || !CheckLoad(S, OpPC, B) ||
- !CheckLoad(S, OpPC, Src))
- return false;
+ const CallExpr *Call, bool HasMask,
+ bool HasRounding) {
+ APSInt Rounding, MaskInt;
+ Pointer Src, B, A;
+
+ if (HasMask) {
+ assert(Call->getNumArgs() == 5);
+ Rounding = popToAPSInt(S, Call->getArg(4));
+ MaskInt = popToAPSInt(S, Call->getArg(3));
+ Src = S.Stk.pop<Pointer>();
+ B = S.Stk.pop<Pointer>();
+ A = S.Stk.pop<Pointer>();
+ if (!CheckLoad(S, OpPC, A) || !CheckLoad(S, OpPC, B) ||
+ !CheckLoad(S, OpPC, Src))
+ return false;
+ } else {
+ assert(Call->getNumArgs() == 2);
+ B = S.Stk.pop<Pointer>();
+ A = S.Stk.pop<Pointer>();
+ if (!CheckLoad(S, OpPC, A) || !CheckLoad(S, OpPC, B))
+ return false;
+ }
const auto *DstVTy = Call->getType()->castAs<VectorType>();
unsigned NumElems = DstVTy->getNumElements();
@@ -3442,10 +3423,8 @@ static bool interp__builtin_ia32_cvtsd2ss_round_mask(InterpState &S,
for (unsigned I = 1; I != NumElems; ++I)
Dst.elem<Floating>(I) = A.elem<Floating>(I);
- // If mask bit 0 is set, convert element 0 from double to float; otherwise use
- // Src.
- if (MaskInt.getZExtValue() & 0x1) {
-
+ // Convert element 0 from double to float, or use Src if masked off.
+ if (!HasMask || (MaskInt.getZExtValue() & 0x1)) {
assert(S.getASTContext().FloatTy == DstVTy->getElementType() &&
"cvtsd2ss requires float element type in destination vector");
@@ -3464,11 +3443,8 @@ static bool interp__builtin_ia32_cvtsd2ss_round_mask(InterpState &S,
}
static bool interp__builtin_ia32_cvtpd2ps(InterpState &S, CodePtr OpPC,
- const CallExpr *Call,
- unsigned BuiltinID) {
- bool IsMasked = (BuiltinID == X86::BI__builtin_ia32_cvtpd2ps_mask ||
- BuiltinID == X86::BI__builtin_ia32_cvtpd2ps512_mask);
- bool HasRounding = (BuiltinID == X86::BI__builtin_ia32_cvtpd2ps512_mask);
+ const CallExpr *Call, bool IsMasked,
+ bool HasRounding) {
APSInt MaskVal;
Pointer PassThrough;
@@ -5342,16 +5318,18 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
return interp__builtin_ia32_cvt_vec2mask(S, OpPC, Call, BuiltinID);
case X86::BI__builtin_ia32_cvtsd2ss:
- return interp__builtin_ia32_cvtsd2ss(S, OpPC, Call);
+ return interp__builtin_ia32_cvtsd2ss(S, OpPC, Call, false, false);
case X86::BI__builtin_ia32_cvtsd2ss_round_mask:
- return interp__builtin_ia32_cvtsd2ss_round_mask(S, OpPC, Call);
+ return interp__builtin_ia32_cvtsd2ss(S, OpPC, Call, true, true);
case X86::BI__builtin_ia32_cvtpd2ps:
case X86::BI__builtin_ia32_cvtpd2ps256:
+ return interp__builtin_ia32_cvtpd2ps(S, OpPC, Call, false, false);
case X86::BI__builtin_ia32_cvtpd2ps_mask:
+ return interp__builtin_ia32_cvtpd2ps(S, OpPC, Call, true, false);
case X86::BI__builtin_ia32_cvtpd2ps512_mask:
- return interp__builtin_ia32_cvtpd2ps(S, OpPC, Call, BuiltinID);
+ return interp__builtin_ia32_cvtpd2ps(S, OpPC, Call, true, true);
case X86::BI__builtin_ia32_cmpb128_mask:
case X86::BI__builtin_ia32_cmpw128_mask:
More information about the cfe-commits
mailing list