[clang] [X86][Clang] Support constexpr evaluation of cvtpd2ps intrinsics (PR #169980)
Hamza Hassanain via cfe-commits
cfe-commits at lists.llvm.org
Sat Nov 29 01:24:05 PST 2025
https://github.com/HamzaHassanain created https://github.com/llvm/llvm-project/pull/169980
This patch implements constant evaluation support for the following X86 intrinsics:
- _mm_cvtpd_ps, _mm256_cvtpd_ps (Packed Double to Float)
- _mm_cvtsd_ss (Scalar Double to Float merge)
- Masked variants of the above
It implements the strict "Exact and Finite" rule: conversions that are
inexact, infinite, or NaN are rejected in constexpr contexts.
Fixes #169370
>From 29e2794651c50ccf60a28c2e08639913a68cd71c Mon Sep 17 00:00:00 2001
From: Hamza Hassanain <hamzahassanain067 at gmail.com>
Date: Wed, 26 Nov 2025 17:05:45 +0200
Subject: [PATCH 1/6] add tests that should pass:
clang/test/SemaCXX/constexpr-x86-intrinsics-pd2ps.cpp
---
.../constexpr-x86-intrinsics-pd2ps.cpp | 120 ++++++++++++++++++
1 file changed, 120 insertions(+)
create mode 100644 clang/test/SemaCXX/constexpr-x86-intrinsics-pd2ps.cpp
diff --git a/clang/test/SemaCXX/constexpr-x86-intrinsics-pd2ps.cpp b/clang/test/SemaCXX/constexpr-x86-intrinsics-pd2ps.cpp
new file mode 100644
index 0000000000000..a082b23bfae03
--- /dev/null
+++ b/clang/test/SemaCXX/constexpr-x86-intrinsics-pd2ps.cpp
@@ -0,0 +1,120 @@
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-unknown -target-feature +avx -target-feature +avx512f -target-feature +avx512vl -verify %s
+
+// HACK: Prevent immintrin.h from pulling in standard library headers
+// that don't exist in this test environment.
+#define __MM_MALLOC_H
+
+#include <immintrin.h>
+
+namespace ExactFinite {
+constexpr __m128d d2 = { -1.0, +2.0 };
+constexpr __m128 r128 = _mm_cvtpd_ps(d2);
+static_assert(r128[0] == -1.0f && r128[1] == +2.0f, "");
+static_assert(r128[2] == 0.0f && r128[3] == 0.0f, "");
+
+constexpr __m128 src128 = { 9.0f, 9.0f, 9.0f, 9.0f };
+constexpr __m128 m128_full = _mm_mask_cvtpd_ps(src128, 0x3, d2);
+static_assert(m128_full[0] == -1.0f && m128_full[1] == +2.0f, "");
+static_assert(m128_full[2] == 9.0f && m128_full[3] == 9.0f, "");
+
+constexpr __m128 m128_partial = _mm_mask_cvtpd_ps(src128, 0x1, d2);
+static_assert(m128_partial[0] == -1.0f && m128_partial[1] == 9.0f, "");
+
+constexpr __m128 m128_zero = _mm_maskz_cvtpd_ps(0x1, d2);
+static_assert(m128_zero[0] == -1.0f && m128_zero[1] == 0.0f, "");
+static_assert(m128_zero[2] == 0.0f && m128_zero[3] == 0.0f, "");
+
+constexpr __m256d d4 = { 0.0, -1.0, +2.0, +3.5 };
+constexpr __m128 r256 = _mm256_cvtpd_ps(d4);
+static_assert(r256[0] == 0.0f && r256[1] == -1.0f, "");
+static_assert(r256[2] == +2.0f && r256[3] == +3.5f, "");
+
+constexpr __m512d d8 = { -1.0, +2.0, +4.0, +8.0, +16.0, +32.0, +64.0, +128.0 };
+constexpr __m256 r512 = _mm512_cvtpd_ps(d8);
+static_assert(r512[0] == -1.0f && r512[7] == +128.0f, "");
+
+constexpr __m256 src256 = { 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f };
+constexpr __m256 r512_mask = _mm512_mask_cvtpd_ps(src256, 0x05, d8);
+static_assert(r512_mask[0] == -1.0f && r512_mask[2] == +4.0f, "");
+static_assert(r512_mask[1] == 9.0f && r512_mask[3] == 9.0f, "");
+
+constexpr __m256 r512_maskz = _mm512_maskz_cvtpd_ps(0x81, d8);
+static_assert(r512_maskz[0] == -1.0f && r512_maskz[7] == +128.0f, "");
+static_assert(r512_maskz[1] == 0.0f && r512_maskz[6] == 0.0f, "");
+
+constexpr __m512 r512lo = _mm512_cvtpd_pslo(d8);
+static_assert(r512lo[0] == -1.0f && r512lo[7] == +128.0f, "");
+static_assert(r512lo[8] == 0.0f && r512lo[15] == 0.0f, "");
+
+constexpr __m512 ws = (__m512){ 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,
+ 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f };
+constexpr __m512 r512lo_mask = _mm512_mask_cvtpd_pslo(ws, 0x3, d8);
+static_assert(r512lo_mask[0] == -1.0f, "");
+static_assert(r512lo_mask[1] == +2.0f, "");
+static_assert(r512lo_mask[2] == 9.0f && r512lo_mask[3] == 9.0f, "");
+
+constexpr __m128 src_ss = { 9.0f, 5.0f, 6.0f, 7.0f };
+constexpr __m128d b_ss = { -1.0, 42.0 };
+constexpr __m128 r_ss = _mm_cvtsd_ss(src_ss, b_ss);
+static_assert(r_ss[0] == -1.0f, "");
+static_assert(r_ss[1] == 5.0f && r_ss[3] == 7.0f, "");
+
+constexpr __m128 r_ss_mask_on = _mm_mask_cvtsd_ss(src_ss, 0x1, src_ss, b_ss);
+static_assert(r_ss_mask_on[0] == -1.0f && r_ss_mask_on[1] == 5.0f, "");
+constexpr __m128 r_ss_mask_off = _mm_mask_cvtsd_ss(src_ss, 0x0, src_ss, b_ss);
+static_assert(r_ss_mask_off[0] == 9.0f, "");
+constexpr __m128 r_ss_maskz_off = _mm_maskz_cvtsd_ss(0x0, src_ss, b_ss);
+static_assert(r_ss_maskz_off[0] == 0.0f && r_ss_maskz_off[1] == 0.0f, "");
+}
+
+namespace InexactOrSpecialReject {
+constexpr __m128d inexact = { 1.0000000000000002, 0.0 };
+constexpr __m128 r_inexact = _mm_cvtpd_ps(inexact); // both-error {{not an integral constant expression}}
+static_assert(r_inexact[0] == 1.0f, ""); // both-note {{subexpression not valid in a constant expression}}
+
+constexpr __m128d dinf = { __builtin_huge_val(), 0.0 };
+constexpr __m128 r_inf = _mm_cvtpd_ps(dinf); // both-error {{not an integral constant expression}}
+static_assert(r_inf[0] == __builtin_inff(), ""); // both-note {{subexpression not valid in a constant expression}}
+
+constexpr __m128d dnan = { __builtin_nan(""), 0.0 };
+constexpr __m128 r_nan = _mm_cvtpd_ps(dnan); // both-error {{not an integral constant expression}}
+static_assert(r_nan[0] != r_nan[0], ""); // both-note {{subexpression not valid in a constant expression}}
+
+constexpr __m128d dsub = { 1e-310, 0.0 };
+constexpr __m128 r_sub = _mm_cvtpd_ps(dsub); // both-error {{not an integral constant expression}}
+static_assert(r_sub[0] == 0.0f, ""); // both-note {{subexpression not valid in a constant expression}}
+
+constexpr __m128 src_ss2 = { 0.0f, 1.0f, 2.0f, 3.0f };
+constexpr __m128d inexact_sd = { 1.0000000000000002, 0.0 };
+constexpr __m128 r_ss_inexact = _mm_cvtsd_ss(src_ss2, inexact_sd); // both-error {{not an integral constant expression}}
+static_assert(r_ss_inexact[0] == 1.0f, ""); // both-note {{subexpression not valid in a constant expression}}
+}
+
+namespace MaskedSpecialCasesAllowed {
+constexpr __m128 src128a = { 9.0f, 9.0f, 9.0f, 9.0f };
+constexpr __m128d d2_inexact = { -1.0, 1.0000000000000002 };
+constexpr __m128 ok128 = _mm_mask_cvtpd_ps(src128a, 0x1, d2_inexact);
+static_assert(ok128[0] == -1.0f && ok128[1] == 9.0f, "");
+
+constexpr __m128 ok128z = _mm_maskz_cvtpd_ps(0x1, d2_inexact);
+static_assert(ok128z[0] == -1.0f && ok128z[1] == 0.0f, "");
+
+constexpr __m256d d4_inexact = { 0.0, 1.0000000000000002, 2.0, 3.0 };
+constexpr __m128 src_m = { 9.0f, 9.0f, 9.0f, 9.0f };
+constexpr __m128 ok256m = _mm256_mask_cvtpd_ps(src_m, 0b0101, d4_inexact);
+static_assert(ok256m[0] == 0.0f && ok256m[1] == 9.0f && ok256m[2] == 2.0f && ok256m[3] == 9.0f, "");
+
+constexpr __m128 ok256z = _mm256_maskz_cvtpd_ps(0b0101, d4_inexact);
+static_assert(ok256z[0] == 0.0f && ok256z[1] == 0.0f && ok256z[2] == 2.0f && ok256z[3] == 0.0f, "");
+
+constexpr __m512d d8_inexact = { -1.0, 2.0, 4.0, 8.0, 16.0, 1.0000000000000002, 64.0, 128.0 };
+constexpr __m256 src256b = { 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f };
+constexpr __m256 ok512m = _mm512_mask_cvtpd_ps(src256b, 0b110111, d8_inexact);
+static_assert(ok512m[0] == -1.0f && ok512m[5] == 9.0f && ok512m[7] == 128.0f, "");
+
+constexpr __m256 ok512z = _mm512_maskz_cvtpd_ps(0b110111, d8_inexact);
+static_assert(ok512z[5] == 0.0f && ok512z[0] == -1.0f && ok512z[7] == 128.0f, "");
+
+constexpr __m128 bad128 = _mm_mask_cvtpd_ps(src128a, 0x2, d2_inexact); // both-error {{not an integral constant expression}}
+static_assert(bad128[1] == 9.0f, ""); // both-note {{subexpression not valid in a constant expression}}
+}
>From 30c0dc75714191e31625bb074e6e62d54aeece7f Mon Sep 17 00:00:00 2001
From: Hamza Hassanain <hamzahassanain067 at gmail.com>
Date: Wed, 26 Nov 2025 22:20:48 +0200
Subject: [PATCH 2/6] added __DEFAULT_FN_ATTRS_CONSTEXPR To Headers
---
clang/lib/Headers/avx512fintrin.h | 16 ++++++++--------
clang/lib/Headers/avx512vlintrin.h | 8 ++++----
clang/lib/Headers/avxintrin.h | 4 ++--
clang/lib/Headers/emmintrin.h | 4 ++--
4 files changed, 16 insertions(+), 16 deletions(-)
diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h
index e1de56069870b..b9f1d1eecc09f 100644
--- a/clang/lib/Headers/avx512fintrin.h
+++ b/clang/lib/Headers/avx512fintrin.h
@@ -207,7 +207,7 @@ _mm512_undefined(void)
return (__m512)__builtin_ia32_undef512();
}
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_undefined_ps(void)
{
return (__m512)__builtin_ia32_undef512();
@@ -3489,7 +3489,7 @@ _mm512_mask_cvtepu32lo_pd(__m512d __W, __mmask8 __U, __m512i __A) {
(__v8sf)_mm256_setzero_ps(), \
(__mmask8)(U), (int)(R)))
-static __inline__ __m256 __DEFAULT_FN_ATTRS512
+static __inline__ __m256 __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_cvtpd_ps (__m512d __A)
{
return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
@@ -3498,7 +3498,7 @@ _mm512_cvtpd_ps (__m512d __A)
_MM_FROUND_CUR_DIRECTION);
}
-static __inline__ __m256 __DEFAULT_FN_ATTRS512
+static __inline__ __m256 __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_mask_cvtpd_ps (__m256 __W, __mmask8 __U, __m512d __A)
{
return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
@@ -3507,7 +3507,7 @@ _mm512_mask_cvtpd_ps (__m256 __W, __mmask8 __U, __m512d __A)
_MM_FROUND_CUR_DIRECTION);
}
-static __inline__ __m256 __DEFAULT_FN_ATTRS512
+static __inline__ __m256 __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_maskz_cvtpd_ps (__mmask8 __U, __m512d __A)
{
return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
@@ -3516,7 +3516,7 @@ _mm512_maskz_cvtpd_ps (__mmask8 __U, __m512d __A)
_MM_FROUND_CUR_DIRECTION);
}
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_cvtpd_pslo (__m512d __A)
{
return (__m512) __builtin_shufflevector((__v8sf) _mm512_cvtpd_ps(__A),
@@ -3524,7 +3524,7 @@ _mm512_cvtpd_pslo (__m512d __A)
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
}
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_mask_cvtpd_pslo (__m512 __W, __mmask8 __U,__m512d __A)
{
return (__m512) __builtin_shufflevector (
@@ -8672,7 +8672,7 @@ _mm512_mask_compressstoreu_epi32 (void *__P, __mmask16 __U, __m512i __A)
(__v4sf)_mm_setzero_ps(), \
(__mmask8)(U), (int)(R)))
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_mask_cvtsd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128d __B)
{
return __builtin_ia32_cvtsd2ss_round_mask ((__v4sf)__A,
@@ -8681,7 +8681,7 @@ _mm_mask_cvtsd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128d __B)
(__mmask8)__U, _MM_FROUND_CUR_DIRECTION);
}
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_maskz_cvtsd_ss (__mmask8 __U, __m128 __A, __m128d __B)
{
return __builtin_ia32_cvtsd2ss_round_mask ((__v4sf)__A,
diff --git a/clang/lib/Headers/avx512vlintrin.h b/clang/lib/Headers/avx512vlintrin.h
index 99c057030a4cc..82a06edd28ba2 100644
--- a/clang/lib/Headers/avx512vlintrin.h
+++ b/clang/lib/Headers/avx512vlintrin.h
@@ -1791,14 +1791,14 @@ _mm256_maskz_cvtpd_epi32 (__mmask8 __U, __m256d __A) {
(__v4si)_mm_setzero_si128());
}
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_mask_cvtpd_ps (__m128 __W, __mmask8 __U, __m128d __A) {
return (__m128) __builtin_ia32_cvtpd2ps_mask ((__v2df) __A,
(__v4sf) __W,
(__mmask8) __U);
}
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_maskz_cvtpd_ps (__mmask8 __U, __m128d __A) {
return (__m128) __builtin_ia32_cvtpd2ps_mask ((__v2df) __A,
(__v4sf)
@@ -1806,14 +1806,14 @@ _mm_maskz_cvtpd_ps (__mmask8 __U, __m128d __A) {
(__mmask8) __U);
}
-static __inline__ __m128 __DEFAULT_FN_ATTRS256
+static __inline__ __m128 __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_mask_cvtpd_ps (__m128 __W, __mmask8 __U, __m256d __A) {
return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
(__v4sf)_mm256_cvtpd_ps(__A),
(__v4sf)__W);
}
-static __inline__ __m128 __DEFAULT_FN_ATTRS256
+static __inline__ __m128 __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_maskz_cvtpd_ps (__mmask8 __U, __m256d __A) {
return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
(__v4sf)_mm256_cvtpd_ps(__A),
diff --git a/clang/lib/Headers/avxintrin.h b/clang/lib/Headers/avxintrin.h
index 44ef88db5cbce..f3f444083edbf 100644
--- a/clang/lib/Headers/avxintrin.h
+++ b/clang/lib/Headers/avxintrin.h
@@ -2190,7 +2190,7 @@ _mm256_cvtepi32_ps(__m256i __a) {
/// \param __a
/// A 256-bit vector of [4 x double].
/// \returns A 128-bit vector of [4 x float] containing the converted values.
-static __inline __m128 __DEFAULT_FN_ATTRS
+static __inline __m128 __DEFAULT_FN_ATTRS_CONSTEXPR
_mm256_cvtpd_ps(__m256d __a)
{
return (__m128)__builtin_ia32_cvtpd2ps256((__v4df) __a);
@@ -3610,7 +3610,7 @@ _mm256_undefined_pd(void)
/// This intrinsic has no corresponding instruction.
///
/// \returns A 256-bit vector of [8 x float] containing undefined values.
-static __inline__ __m256 __DEFAULT_FN_ATTRS
+static __inline__ __m256 __DEFAULT_FN_ATTRS_CONSTEXPR
_mm256_undefined_ps(void)
{
return (__m256)__builtin_ia32_undef256();
diff --git a/clang/lib/Headers/emmintrin.h b/clang/lib/Headers/emmintrin.h
index dbe5ca0379cf5..1701effedc5ce 100644
--- a/clang/lib/Headers/emmintrin.h
+++ b/clang/lib/Headers/emmintrin.h
@@ -1279,7 +1279,7 @@ static __inline__ int __DEFAULT_FN_ATTRS _mm_ucomineq_sd(__m128d __a,
/// A 128-bit vector of [2 x double].
/// \returns A 128-bit vector of [4 x float] whose lower 64 bits contain the
/// converted values. The upper 64 bits are set to zero.
-static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cvtpd_ps(__m128d __a) {
+static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cvtpd_ps(__m128d __a) {
return __builtin_ia32_cvtpd2ps((__v2df)__a);
}
@@ -1384,7 +1384,7 @@ static __inline__ int __DEFAULT_FN_ATTRS _mm_cvtsd_si32(__m128d __a) {
/// \returns A 128-bit vector of [4 x float]. The lower 32 bits contain the
/// converted value from the second parameter. The upper 96 bits are copied
/// from the upper 96 bits of the first parameter.
-static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cvtsd_ss(__m128 __a,
+static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cvtsd_ss(__m128 __a,
__m128d __b) {
return (__m128)__builtin_ia32_cvtsd2ss((__v4sf)__a, (__v2df)__b);
}
>From 9f1020ecf3a706df9537b38464b61748aa0278f0 Mon Sep 17 00:00:00 2001
From: Hamza Hassanain <hamzahassanain067 at gmail.com>
Date: Wed, 26 Nov 2025 22:24:54 +0200
Subject: [PATCH 3/6] added Constexpr to necessary builtins
---
clang/include/clang/Basic/BuiltinsX86.td | 16 ++++++++--------
1 file changed, 8 insertions(+), 8 deletions(-)
diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td
index 4aa3d51931980..283a0a3e6ae0c 100644
--- a/clang/include/clang/Basic/BuiltinsX86.td
+++ b/clang/include/clang/Basic/BuiltinsX86.td
@@ -24,12 +24,12 @@ def undef128 : X86Builtin<"_Vector<2, double>()"> {
let Attributes = [Const, NoThrow, RequiredVectorWidth<128>];
}
-def undef256 : X86Builtin<"_Vector<4, double>()"> {
- let Attributes = [Const, NoThrow, RequiredVectorWidth<256>];
+def undef256 : X86Builtin<"_Vector<4, double>()" > {
+ let Attributes = [Const, Constexpr, NoThrow, RequiredVectorWidth<256>];
}
def undef512 : X86Builtin<"_Vector<8, double>()"> {
- let Attributes = [Const, NoThrow, RequiredVectorWidth<512>];
+ let Attributes = [Const, Constexpr, NoThrow, RequiredVectorWidth<512>];
}
// FLAGS
@@ -168,7 +168,7 @@ let Features = "sse2", Attributes = [NoThrow] in {
def movnti : X86Builtin<"void(int *, int)">;
}
-let Features = "sse2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+let Features = "sse2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
def psadbw128 : X86Builtin<"_Vector<2, long long int>(_Vector<16, char>, _Vector<16, char>)">;
def sqrtpd : X86Builtin<"_Vector<2, double>(_Vector<2, double>)">;
def sqrtsd : X86Builtin<"_Vector<2, double>(_Vector<2, double>)">;
@@ -468,7 +468,7 @@ let Features = "avx", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWid
def vpermilvarps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, int>)">;
}
-let Features = "avx", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+let Features = "avx", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
def dpps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, _Constant char)">;
def cmppd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, double>, _Constant char)">;
def cmpps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, _Constant char)">;
@@ -1009,7 +1009,7 @@ let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128
def cmppd128_mask : X86Builtin<"unsigned char(_Vector<2, double>, _Vector<2, double>, _Constant int, unsigned char)">;
}
-let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+let Features = "avx512f", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
def rndscaleps_mask : X86Builtin<"_Vector<16, float>(_Vector<16, float>, _Constant int, _Vector<16, float>, unsigned short, _Constant int)">;
def rndscalepd_mask : X86Builtin<"_Vector<8, double>(_Vector<8, double>, _Constant int, _Vector<8, double>, unsigned char, _Constant int)">;
def cvtps2dq512_mask : X86Builtin<"_Vector<16, int>(_Vector<16, float>, _Vector<16, int>, unsigned short, _Constant int)">;
@@ -1457,7 +1457,7 @@ let Features = "avx512vl", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
def compressstoresi256_mask : X86Builtin<"void(_Vector<8, int *>, _Vector<8, int>, unsigned char)">;
}
-let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
def cvtpd2dq128_mask : X86Builtin<"_Vector<4, int>(_Vector<2, double>, _Vector<4, int>, unsigned char)">;
def cvtpd2ps_mask : X86Builtin<"_Vector<4, float>(_Vector<2, double>, _Vector<4, float>, unsigned char)">;
def cvtpd2udq128_mask : X86Builtin<"_Vector<4, int>(_Vector<2, double>, _Vector<4, int>, unsigned char)">;
@@ -3301,7 +3301,7 @@ let Features = "avx512bw,avx512vl",
def cvtw2mask256 : X86Builtin<"unsigned short(_Vector<16, short>)">;
}
-let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+let Features = "avx512f", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
def cvtsd2ss_round_mask : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<2, double>, _Vector<4, float>, unsigned char, _Constant int)">;
def cvtsi2ss32 : X86Builtin<"_Vector<4, float>(_Vector<4, float>, int, _Constant int)">;
def cvtss2sd_round_mask : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<4, float>, _Vector<2, double>, unsigned char, _Constant int)">;
>From d28d6d8c7cc6e816f772a78dd0d177f0248d3178 Mon Sep 17 00:00:00 2001
From: Hamza Hassanain <hamzahassanain067 at gmail.com>
Date: Sat, 29 Nov 2025 11:08:42 +0200
Subject: [PATCH 4/6] added FULL tests for pd2ps constexpr
---
.../constexpr-x86-intrinsics-pd2ps.cpp | 559 ++++++++++++++----
1 file changed, 459 insertions(+), 100 deletions(-)
diff --git a/clang/test/SemaCXX/constexpr-x86-intrinsics-pd2ps.cpp b/clang/test/SemaCXX/constexpr-x86-intrinsics-pd2ps.cpp
index a082b23bfae03..4a1e9a9c5ae2c 100644
--- a/clang/test/SemaCXX/constexpr-x86-intrinsics-pd2ps.cpp
+++ b/clang/test/SemaCXX/constexpr-x86-intrinsics-pd2ps.cpp
@@ -1,120 +1,479 @@
// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-unknown -target-feature +avx -target-feature +avx512f -target-feature +avx512vl -verify %s
-// HACK: Prevent immintrin.h from pulling in standard library headers
-// that don't exist in this test environment.
-#define __MM_MALLOC_H
-
+#define __MM_MALLOC_H
#include <immintrin.h>
-namespace ExactFinite {
-constexpr __m128d d2 = { -1.0, +2.0 };
-constexpr __m128 r128 = _mm_cvtpd_ps(d2);
-static_assert(r128[0] == -1.0f && r128[1] == +2.0f, "");
-static_assert(r128[2] == 0.0f && r128[3] == 0.0f, "");
-
-constexpr __m128 src128 = { 9.0f, 9.0f, 9.0f, 9.0f };
-constexpr __m128 m128_full = _mm_mask_cvtpd_ps(src128, 0x3, d2);
-static_assert(m128_full[0] == -1.0f && m128_full[1] == +2.0f, "");
-static_assert(m128_full[2] == 9.0f && m128_full[3] == 9.0f, "");
-
-constexpr __m128 m128_partial = _mm_mask_cvtpd_ps(src128, 0x1, d2);
-static_assert(m128_partial[0] == -1.0f && m128_partial[1] == 9.0f, "");
-
-constexpr __m128 m128_zero = _mm_maskz_cvtpd_ps(0x1, d2);
-static_assert(m128_zero[0] == -1.0f && m128_zero[1] == 0.0f, "");
-static_assert(m128_zero[2] == 0.0f && m128_zero[3] == 0.0f, "");
-
-constexpr __m256d d4 = { 0.0, -1.0, +2.0, +3.5 };
-constexpr __m128 r256 = _mm256_cvtpd_ps(d4);
-static_assert(r256[0] == 0.0f && r256[1] == -1.0f, "");
-static_assert(r256[2] == +2.0f && r256[3] == +3.5f, "");
-
-constexpr __m512d d8 = { -1.0, +2.0, +4.0, +8.0, +16.0, +32.0, +64.0, +128.0 };
-constexpr __m256 r512 = _mm512_cvtpd_ps(d8);
-static_assert(r512[0] == -1.0f && r512[7] == +128.0f, "");
-
-constexpr __m256 src256 = { 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f };
-constexpr __m256 r512_mask = _mm512_mask_cvtpd_ps(src256, 0x05, d8);
-static_assert(r512_mask[0] == -1.0f && r512_mask[2] == +4.0f, "");
-static_assert(r512_mask[1] == 9.0f && r512_mask[3] == 9.0f, "");
-
-constexpr __m256 r512_maskz = _mm512_maskz_cvtpd_ps(0x81, d8);
-static_assert(r512_maskz[0] == -1.0f && r512_maskz[7] == +128.0f, "");
-static_assert(r512_maskz[1] == 0.0f && r512_maskz[6] == 0.0f, "");
-
-constexpr __m512 r512lo = _mm512_cvtpd_pslo(d8);
-static_assert(r512lo[0] == -1.0f && r512lo[7] == +128.0f, "");
-static_assert(r512lo[8] == 0.0f && r512lo[15] == 0.0f, "");
-
-constexpr __m512 ws = (__m512){ 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,
- 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f };
-constexpr __m512 r512lo_mask = _mm512_mask_cvtpd_pslo(ws, 0x3, d8);
-static_assert(r512lo_mask[0] == -1.0f, "");
-static_assert(r512lo_mask[1] == +2.0f, "");
-static_assert(r512lo_mask[2] == 9.0f && r512lo_mask[3] == 9.0f, "");
-
-constexpr __m128 src_ss = { 9.0f, 5.0f, 6.0f, 7.0f };
-constexpr __m128d b_ss = { -1.0, 42.0 };
-constexpr __m128 r_ss = _mm_cvtsd_ss(src_ss, b_ss);
-static_assert(r_ss[0] == -1.0f, "");
-static_assert(r_ss[1] == 5.0f && r_ss[3] == 7.0f, "");
+namespace Test_mm_cvtsd_ss {
+namespace OK {
+constexpr __m128 a = { 9.0f, 5.0f, 6.0f, 7.0f };
+constexpr __m128d b = { -1.0, 42.0 };
+constexpr __m128 r = _mm_cvtsd_ss(a, b);
+static_assert(r[0] == -1.0f && r[1] == 5.0f && r[2] == 6.0f && r[3] == 7.0f, "");
+}
+namespace Inexact {
+constexpr __m128 a = { 0.0f, 1.0f, 2.0f, 3.0f };
+constexpr __m128d b = { 1.0000000000000002, 0.0 };
+constexpr __m128 r = _mm_cvtsd_ss(a, b);
+// expected-error at -1 {{must be initialized by a constant expression}}
+// expected-note at emmintrin.h:* {{compile time floating point arithmetic suppressed in strict evaluation modes}}
+// expected-note at -3 {{in call to '_mm_cvtsd_ss({0.000000e+00, 1.000000e+00, 2.000000e+00, 3.000000e+00}, {1.000000e+00, 0.000000e+00})'}}
+}
+namespace Inf {
+constexpr __m128 a = { 0.0f, 1.0f, 2.0f, 3.0f };
+constexpr __m128d b = { __builtin_huge_val(), 0.0 };
+constexpr __m128 r = _mm_cvtsd_ss(a, b);
+// expected-error at -1 {{must be initialized by a constant expression}}
+// expected-note at emmintrin.h:* {{floating point arithmetic produces an infinity}}
+// expected-note at -3 {{in call to '_mm_cvtsd_ss({0.000000e+00, 1.000000e+00, 2.000000e+00, 3.000000e+00}, {INF, 0.000000e+00})'}}
+}
+namespace NaN {
+constexpr __m128 a = { 0.0f, 1.0f, 2.0f, 3.0f };
+constexpr __m128d b = { __builtin_nan(""), 0.0 };
+constexpr __m128 r = _mm_cvtsd_ss(a, b);
+// expected-error at -1 {{must be initialized by a constant expression}}
+// expected-note at emmintrin.h:* {{floating point arithmetic produces a NaN}}
+// expected-note at -3 {{in call to '_mm_cvtsd_ss({0.000000e+00, 1.000000e+00, 2.000000e+00, 3.000000e+00}, {nan, 0.000000e+00})'}}
+}
+namespace Subnormal {
+constexpr __m128 a = { 0.0f, 1.0f, 2.0f, 3.0f };
+constexpr __m128d b = { 1e-310, 0.0 };
+constexpr __m128 r = _mm_cvtsd_ss(a, b);
+// expected-error at -1 {{must be initialized by a constant expression}}
+// expected-note at emmintrin.h:* {{compile time floating point arithmetic suppressed in strict evaluation modes}}
+// expected-note at -3 {{in call to '_mm_cvtsd_ss({0.000000e+00, 1.000000e+00, 2.000000e+00, 3.000000e+00}, {1.000000e-310, 0.000000e+00})'}}
+}
+}
-constexpr __m128 r_ss_mask_on = _mm_mask_cvtsd_ss(src_ss, 0x1, src_ss, b_ss);
-static_assert(r_ss_mask_on[0] == -1.0f && r_ss_mask_on[1] == 5.0f, "");
-constexpr __m128 r_ss_mask_off = _mm_mask_cvtsd_ss(src_ss, 0x0, src_ss, b_ss);
-static_assert(r_ss_mask_off[0] == 9.0f, "");
-constexpr __m128 r_ss_maskz_off = _mm_maskz_cvtsd_ss(0x0, src_ss, b_ss);
-static_assert(r_ss_maskz_off[0] == 0.0f && r_ss_maskz_off[1] == 0.0f, "");
+namespace Test_mm_mask_cvtsd_ss {
+namespace OK {
+constexpr __m128 src = { 9.0f, 5.0f, 6.0f, 7.0f };
+constexpr __m128 a = { 1.0f, 2.0f, 3.0f, 4.0f };
+constexpr __m128d b = { -1.0, 42.0 };
+constexpr __m128 r = _mm_mask_cvtsd_ss(src, 0x1, a, b);
+static_assert(r[0] == -1.0f && r[1] == 2.0f && r[2] == 3.0f && r[3] == 4.0f, "");
+}
+namespace MaskOff {
+constexpr __m128 src = { 9.0f, 5.0f, 6.0f, 7.0f };
+constexpr __m128 a = { 1.0f, 2.0f, 3.0f, 4.0f };
+constexpr __m128d b = { -1.0, 42.0 };
+constexpr __m128 r = _mm_mask_cvtsd_ss(src, 0x0, a, b);
+static_assert(r[0] == 9.0f && r[1] == 2.0f, "");
+}
+namespace MaskOffInexact {
+constexpr __m128 src = { 9.0f, 5.0f, 6.0f, 7.0f };
+constexpr __m128 a = { 1.0f, 2.0f, 3.0f, 4.0f };
+constexpr __m128d b_inexact = { 1.0000000000000002, 0.0 };
+constexpr __m128 r = _mm_mask_cvtsd_ss(src, 0x0, a, b_inexact);
+static_assert(r[0] == 9.0f, "");
+}
+namespace MaskOnInexact {
+constexpr __m128 src = { 9.0f, 5.0f, 6.0f, 7.0f };
+constexpr __m128 a = { 1.0f, 2.0f, 3.0f, 4.0f };
+constexpr __m128d b_inexact = { 1.0000000000000002, 0.0 };
+constexpr __m128 r = _mm_mask_cvtsd_ss(src, 0x1, a, b_inexact);
+// expected-error at -1 {{must be initialized by a constant expression}}
+// expected-note at avx512fintrin.h:* {{compile time floating point arithmetic suppressed in strict evaluation modes}}
+// expected-note at -3 {{in call to '_mm_mask_cvtsd_ss({9.000000e+00, 5.000000e+00, 6.000000e+00, 7.000000e+00}, 1, {1.000000e+00, 2.000000e+00, 3.000000e+00, 4.000000e+00}, {1.000000e+00, 0.000000e+00})'}}
+}
+namespace MaskOnInf {
+constexpr __m128 src = { 9.0f, 5.0f, 6.0f, 7.0f };
+constexpr __m128 a = { 1.0f, 2.0f, 3.0f, 4.0f };
+constexpr __m128d b_inf = { __builtin_huge_val(), 0.0 };
+constexpr __m128 r = _mm_mask_cvtsd_ss(src, 0x1, a, b_inf);
+// expected-error at -1 {{must be initialized by a constant expression}}
+// expected-note at avx512fintrin.h:* {{floating point arithmetic produces an infinity}}
+// expected-note at -3 {{in call to '_mm_mask_cvtsd_ss({9.000000e+00, 5.000000e+00, 6.000000e+00, 7.000000e+00}, 1, {1.000000e+00, 2.000000e+00, 3.000000e+00, 4.000000e+00}, {INF, 0.000000e+00})'}}
+}
+namespace MaskOnNaN {
+constexpr __m128 src = { 9.0f, 5.0f, 6.0f, 7.0f };
+constexpr __m128 a = { 1.0f, 2.0f, 3.0f, 4.0f };
+constexpr __m128d b_nan = { __builtin_nan(""), 0.0 };
+constexpr __m128 r = _mm_mask_cvtsd_ss(src, 0x1, a, b_nan);
+// expected-error at -1 {{must be initialized by a constant expression}}
+// expected-note at avx512fintrin.h:* {{floating point arithmetic produces a NaN}}
+// expected-note at -3 {{in call to '_mm_mask_cvtsd_ss({9.000000e+00, 5.000000e+00, 6.000000e+00, 7.000000e+00}, 1, {1.000000e+00, 2.000000e+00, 3.000000e+00, 4.000000e+00}, {nan, 0.000000e+00})'}}
+}
+namespace MaskOnSubnormal {
+constexpr __m128 src = { 9.0f, 5.0f, 6.0f, 7.0f };
+constexpr __m128 a = { 1.0f, 2.0f, 3.0f, 4.0f };
+constexpr __m128d b_sub = { 1e-310, 0.0 };
+constexpr __m128 r = _mm_mask_cvtsd_ss(src, 0x1, a, b_sub);
+// expected-error at -1 {{must be initialized by a constant expression}}
+// expected-note at avx512fintrin.h:* {{compile time floating point arithmetic suppressed in strict evaluation modes}}
+// expected-note at -3 {{in call to '_mm_mask_cvtsd_ss({9.000000e+00, 5.000000e+00, 6.000000e+00, 7.000000e+00}, 1, {1.000000e+00, 2.000000e+00, 3.000000e+00, 4.000000e+00}, {1.000000e-310, 0.000000e+00})'}}
+}
}
-namespace InexactOrSpecialReject {
-constexpr __m128d inexact = { 1.0000000000000002, 0.0 };
-constexpr __m128 r_inexact = _mm_cvtpd_ps(inexact); // both-error {{not an integral constant expression}}
-static_assert(r_inexact[0] == 1.0f, ""); // both-note {{subexpression not valid in a constant expression}}
+namespace Test_mm_maskz_cvtsd_ss {
+namespace OK {
+constexpr __m128 a = { 1.0f, 2.0f, 3.0f, 4.0f };
+constexpr __m128d b = { -1.0, 42.0 };
+constexpr __m128 r = _mm_maskz_cvtsd_ss(0x1, a, b);
+static_assert(r[0] == -1.0f && r[1] == 2.0f, "");
+}
+namespace MaskOff {
+constexpr __m128 a = { 1.0f, 2.0f, 3.0f, 4.0f };
+constexpr __m128d b = { -1.0, 42.0 };
+constexpr __m128 r = _mm_maskz_cvtsd_ss(0x0, a, b);
+static_assert(r[0] == 0.0f && r[1] == 2.0f, "");
+}
+namespace MaskOffInexact {
+constexpr __m128 a = { 1.0f, 2.0f, 3.0f, 4.0f };
+constexpr __m128d b_inexact = { 1.0000000000000002, 0.0 };
+constexpr __m128 r = _mm_maskz_cvtsd_ss(0x0, a, b_inexact);
+static_assert(r[0] == 0.0f, "");
+}
+namespace MaskOnInf {
+constexpr __m128 a = { 1.0f, 2.0f, 3.0f, 4.0f };
+constexpr __m128d b_inf = { __builtin_huge_val(), 0.0 };
+constexpr __m128 r = _mm_maskz_cvtsd_ss(0x1, a, b_inf);
+// expected-error at -1 {{must be initialized by a constant expression}}
+// expected-note at avx512fintrin.h:* {{floating point arithmetic produces an infinity}}
+// expected-note at -3 {{in call to '_mm_maskz_cvtsd_ss(1, {1.000000e+00, 2.000000e+00, 3.000000e+00, 4.000000e+00}, {INF, 0.000000e+00})'}}
+}
+namespace MaskOnNaN {
+constexpr __m128 a = { 1.0f, 2.0f, 3.0f, 4.0f };
+constexpr __m128d b_nan = { __builtin_nan(""), 0.0 };
+constexpr __m128 r = _mm_maskz_cvtsd_ss(0x1, a, b_nan);
+// expected-error at -1 {{must be initialized by a constant expression}}
+// expected-note at avx512fintrin.h:* {{floating point arithmetic produces a NaN}}
+// expected-note at -3 {{in call to '_mm_maskz_cvtsd_ss(1, {1.000000e+00, 2.000000e+00, 3.000000e+00, 4.000000e+00}, {nan, 0.000000e+00})'}}
+}
+}
-constexpr __m128d dinf = { __builtin_huge_val(), 0.0 };
-constexpr __m128 r_inf = _mm_cvtpd_ps(dinf); // both-error {{not an integral constant expression}}
-static_assert(r_inf[0] == __builtin_inff(), ""); // both-note {{subexpression not valid in a constant expression}}
+namespace Test_mm_cvtpd_ps {
+namespace OK {
+constexpr __m128d a = { -1.0, +2.0 };
+constexpr __m128 r = _mm_cvtpd_ps(a);
+static_assert(r[0] == -1.0f && r[1] == +2.0f, "");
+static_assert(r[2] == 0.0f && r[3] == 0.0f, "");
+}
+namespace Inexact {
+constexpr __m128d a = { 1.0000000000000002, 0.0 };
+constexpr __m128 r = _mm_cvtpd_ps(a);
+// expected-error at -1 {{must be initialized by a constant expression}}
+// expected-note at emmintrin.h:* {{compile time floating point arithmetic suppressed in strict evaluation modes}}
+// expected-note at -3 {{in call to '_mm_cvtpd_ps({1.000000e+00, 0.000000e+00})'}}
+}
+namespace Inf {
+constexpr __m128d a = { __builtin_huge_val(), 0.0 };
+constexpr __m128 r = _mm_cvtpd_ps(a);
+// expected-error at -1 {{must be initialized by a constant expression}}
+// expected-note at emmintrin.h:* {{floating point arithmetic produces an infinity}}
+// expected-note at -3 {{in call to '_mm_cvtpd_ps({INF, 0.000000e+00})'}}
+}
+namespace NaN {
+constexpr __m128d a = { __builtin_nan(""), 0.0 };
+constexpr __m128 r = _mm_cvtpd_ps(a);
+// expected-error at -1 {{must be initialized by a constant expression}}
+// expected-note at emmintrin.h:* {{floating point arithmetic produces a NaN}}
+// expected-note at -3 {{in call to '_mm_cvtpd_ps({nan, 0.000000e+00})'}}
+}
+namespace Subnormal {
+constexpr __m128d a = { 1e-310, 0.0 };
+constexpr __m128 r = _mm_cvtpd_ps(a);
+// expected-error at -1 {{must be initialized by a constant expression}}
+// expected-note at emmintrin.h:* {{compile time floating point arithmetic suppressed in strict evaluation modes}}
+// expected-note at -3 {{in call to '_mm_cvtpd_ps({1.000000e-310, 0.000000e+00})'}}
+}
+}
-constexpr __m128d dnan = { __builtin_nan(""), 0.0 };
-constexpr __m128 r_nan = _mm_cvtpd_ps(dnan); // both-error {{not an integral constant expression}}
-static_assert(r_nan[0] != r_nan[0], ""); // both-note {{subexpression not valid in a constant expression}}
+namespace Test_mm_mask_cvtpd_ps {
+namespace OK {
+constexpr __m128 src = { 9.0f, 9.0f, 9.0f, 9.0f };
+constexpr __m128d a = { -1.0, +2.0 };
+constexpr __m128 r = _mm_mask_cvtpd_ps(src, 0x3, a);
+static_assert(r[0] == -1.0f && r[1] == +2.0f, "");
+static_assert(r[2] == 9.0f && r[3] == 9.0f, "");
+}
+namespace Partial {
+constexpr __m128 src = { 9.0f, 9.0f, 9.0f, 9.0f };
+constexpr __m128d a = { -1.0, +2.0 };
+constexpr __m128 r = _mm_mask_cvtpd_ps(src, 0x1, a);
+static_assert(r[0] == -1.0f && r[1] == 9.0f, "");
+}
+namespace MaskOffInexact {
+constexpr __m128 src = { 9.0f, 9.0f, 9.0f, 9.0f };
+constexpr __m128d a_inexact = { -1.0, 1.0000000000000002 };
+constexpr __m128 r = _mm_mask_cvtpd_ps(src, 0x1, a_inexact);
+static_assert(r[0] == -1.0f && r[1] == 9.0f, "");
+}
+namespace MaskOnInexact {
+constexpr __m128 src = { 9.0f, 9.0f, 9.0f, 9.0f };
+constexpr __m128d a_inexact = { -1.0, 1.0000000000000002 };
+constexpr __m128 r = _mm_mask_cvtpd_ps(src, 0x2, a_inexact);
+// expected-error at -1 {{must be initialized by a constant expression}}
+// expected-note at avx512vlintrin.h:* {{compile time floating point arithmetic suppressed in strict evaluation modes}}
+// expected-note at -3 {{in call to '_mm_mask_cvtpd_ps({9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00}, 2, {-1.000000e+00, 1.000000e+00})'}}
+}
+namespace MaskOnInf {
+constexpr __m128 src = { 9.0f, 9.0f, 9.0f, 9.0f };
+constexpr __m128d a_inf = { -1.0, __builtin_huge_val() };
+constexpr __m128 r = _mm_mask_cvtpd_ps(src, 0x2, a_inf);
+// expected-error at -1 {{must be initialized by a constant expression}}
+// expected-note at avx512vlintrin.h:* {{floating point arithmetic produces an infinity}}
+// expected-note at -3 {{in call to '_mm_mask_cvtpd_ps({9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00}, 2, {-1.000000e+00, INF})'}}
+}
+namespace MaskOnNaN {
+constexpr __m128 src = { 9.0f, 9.0f, 9.0f, 9.0f };
+constexpr __m128d a_nan = { -1.0, __builtin_nan("") };
+constexpr __m128 r = _mm_mask_cvtpd_ps(src, 0x2, a_nan);
+// expected-error at -1 {{must be initialized by a constant expression}}
+// expected-note at avx512vlintrin.h:* {{floating point arithmetic produces a NaN}}
+// expected-note at -3 {{in call to '_mm_mask_cvtpd_ps({9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00}, 2, {-1.000000e+00, nan})'}}
+}
+}
-constexpr __m128d dsub = { 1e-310, 0.0 };
-constexpr __m128 r_sub = _mm_cvtpd_ps(dsub); // both-error {{not an integral constant expression}}
-static_assert(r_sub[0] == 0.0f, ""); // both-note {{subexpression not valid in a constant expression}}
+namespace Test_mm_maskz_cvtpd_ps {
+namespace OK {
+constexpr __m128d a = { -1.0, +2.0 };
+constexpr __m128 r = _mm_maskz_cvtpd_ps(0x1, a);
+static_assert(r[0] == -1.0f && r[1] == 0.0f, "");
+static_assert(r[2] == 0.0f && r[3] == 0.0f, "");
+}
+namespace MaskOffInexact {
+constexpr __m128d a_inexact = { -1.0, 1.0000000000000002 };
+constexpr __m128 r = _mm_maskz_cvtpd_ps(0x1, a_inexact);
+static_assert(r[0] == -1.0f && r[1] == 0.0f, "");
+}
+namespace MaskOnInf {
+constexpr __m128d a_inf = { -1.0, __builtin_huge_val() };
+constexpr __m128 r = _mm_maskz_cvtpd_ps(0x2, a_inf);
+// expected-error at -1 {{must be initialized by a constant expression}}
+// expected-note at avx512vlintrin.h:* {{floating point arithmetic produces an infinity}}
+// expected-note at -3 {{in call to '_mm_maskz_cvtpd_ps(2, {-1.000000e+00, INF})'}}
+}
+namespace MaskOnNaN {
+constexpr __m128d a_nan = { -1.0, __builtin_nan("") };
+constexpr __m128 r = _mm_maskz_cvtpd_ps(0x2, a_nan);
+// expected-error at -1 {{must be initialized by a constant expression}}
+// expected-note at avx512vlintrin.h:* {{floating point arithmetic produces a NaN}}
+// expected-note at -3 {{in call to '_mm_maskz_cvtpd_ps(2, {-1.000000e+00, nan})'}}
+}
+}
-constexpr __m128 src_ss2 = { 0.0f, 1.0f, 2.0f, 3.0f };
-constexpr __m128d inexact_sd = { 1.0000000000000002, 0.0 };
-constexpr __m128 r_ss_inexact = _mm_cvtsd_ss(src_ss2, inexact_sd); // both-error {{not an integral constant expression}}
-static_assert(r_ss_inexact[0] == 1.0f, ""); // both-note {{subexpression not valid in a constant expression}}
+namespace Test_mm256_cvtpd_ps {
+namespace OK {
+constexpr __m256d a = { 0.0, -1.0, +2.0, +3.5 };
+constexpr __m128 r = _mm256_cvtpd_ps(a);
+static_assert(r[0] == 0.0f && r[1] == -1.0f, "");
+static_assert(r[2] == +2.0f && r[3] == +3.5f, "");
+}
+namespace Inexact {
+constexpr __m256d a = { 1.0000000000000002, 0.0, 0.0, 0.0 };
+constexpr __m128 r = _mm256_cvtpd_ps(a);
+// expected-error at -1 {{must be initialized by a constant expression}}
+// expected-note at avxintrin.h:* {{compile time floating point arithmetic suppressed in strict evaluation modes}}
+// expected-note at -3 {{in call to '_mm256_cvtpd_ps({1.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00})'}}
+}
}
-namespace MaskedSpecialCasesAllowed {
-constexpr __m128 src128a = { 9.0f, 9.0f, 9.0f, 9.0f };
-constexpr __m128d d2_inexact = { -1.0, 1.0000000000000002 };
-constexpr __m128 ok128 = _mm_mask_cvtpd_ps(src128a, 0x1, d2_inexact);
-static_assert(ok128[0] == -1.0f && ok128[1] == 9.0f, "");
+namespace Test_mm256_mask_cvtpd_ps {
+namespace OK {
+constexpr __m128 src = { 9.0f, 9.0f, 9.0f, 9.0f };
+constexpr __m256d a = { 0.0, -1.0, +2.0, +3.5 };
+constexpr __m128 r = _mm256_mask_cvtpd_ps(src, 0xF, a);
+static_assert(r[0] == 0.0f && r[1] == -1.0f && r[2] == +2.0f && r[3] == +3.5f, "");
+}
+namespace MaskOffInf {
+// Note: 256-bit masked operations use selectps, which evaluates ALL lanes before masking
+// So even masked-off Inf/NaN values cause errors (architectural limitation)
+constexpr __m128 src = { 9.0f, 9.0f, 9.0f, 9.0f };
+constexpr __m256d a_inf = { -1.0, +2.0, __builtin_huge_val(), +8.0 };
+constexpr __m128 r = _mm256_mask_cvtpd_ps(src, 0x3, a_inf);
+// expected-error at -1 {{must be initialized by a constant expression}}
+// expected-note at avxintrin.h:* {{floating point arithmetic produces an infinity}}
+// expected-note at avx512vlintrin.h:* {{in call to '_mm256_cvtpd_ps({-1.000000e+00, 2.000000e+00, INF, 8.000000e+00})'}}
+// expected-note at -4 {{in call to '_mm256_mask_cvtpd_ps({9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00}, 3, {-1.000000e+00, 2.000000e+00, INF, 8.000000e+00})'}}
+}
+namespace MaskOffNaN {
+// Note: 256-bit masked operations use selectps, which evaluates ALL lanes before masking
+// So even masked-off Inf/NaN values cause errors (architectural limitation)
+constexpr __m128 src = { 9.0f, 9.0f, 9.0f, 9.0f };
+constexpr __m256d a_nan = { -1.0, +2.0, +4.0, __builtin_nan("") };
+constexpr __m128 r = _mm256_mask_cvtpd_ps(src, 0x7, a_nan);
+// expected-error at -1 {{must be initialized by a constant expression}}
+// expected-note at avxintrin.h:* {{floating point arithmetic produces a NaN}}
+// expected-note at avx512vlintrin.h:* {{in call to '_mm256_cvtpd_ps({-1.000000e+00, 2.000000e+00, 4.000000e+00, nan})'}}
+// expected-note at -4 {{in call to '_mm256_mask_cvtpd_ps({9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00}, 7, {-1.000000e+00, 2.000000e+00, 4.000000e+00, nan})'}}
+}
+}
-constexpr __m128 ok128z = _mm_maskz_cvtpd_ps(0x1, d2_inexact);
-static_assert(ok128z[0] == -1.0f && ok128z[1] == 0.0f, "");
+namespace Test_mm256_maskz_cvtpd_ps {
+namespace OK {
+constexpr __m256d a = { 0.0, -1.0, +2.0, +3.5 };
+constexpr __m128 r = _mm256_maskz_cvtpd_ps(0x5, a);
+static_assert(r[0] == 0.0f && r[1] == 0.0f && r[2] == +2.0f && r[3] == 0.0f, "");
+}
+namespace MaskOffInf {
+// Note: 256-bit masked operations use selectps, which evaluates ALL lanes before masking
+// So even masked-off Inf/NaN values cause errors (architectural limitation)
+constexpr __m256d a_inf = { -1.0, +2.0, __builtin_huge_val(), +8.0 };
+constexpr __m128 r = _mm256_maskz_cvtpd_ps(0x3, a_inf);
+// expected-error at -1 {{must be initialized by a constant expression}}
+// expected-note at avxintrin.h:* {{floating point arithmetic produces an infinity}}
+// expected-note at avx512vlintrin.h:* {{in call to '_mm256_cvtpd_ps({-1.000000e+00, 2.000000e+00, INF, 8.000000e+00})'}}
+// expected-note at -4 {{in call to '_mm256_maskz_cvtpd_ps(3, {-1.000000e+00, 2.000000e+00, INF, 8.000000e+00})'}}
+}
+namespace MaskOffNaN {
+// Note: 256-bit masked operations use selectps, which evaluates ALL lanes before masking
+// So even masked-off Inf/NaN values cause errors (architectural limitation)
+constexpr __m256d a_nan = { -1.0, +2.0, +4.0, __builtin_nan("") };
+constexpr __m128 r = _mm256_maskz_cvtpd_ps(0x7, a_nan);
+// expected-error at -1 {{must be initialized by a constant expression}}
+// expected-note at avxintrin.h:* {{floating point arithmetic produces a NaN}}
+// expected-note at avx512vlintrin.h:* {{in call to '_mm256_cvtpd_ps({-1.000000e+00, 2.000000e+00, 4.000000e+00, nan})'}}
+// expected-note at -4 {{in call to '_mm256_maskz_cvtpd_ps(7, {-1.000000e+00, 2.000000e+00, 4.000000e+00, nan})'}}
+}
+}
-constexpr __m256d d4_inexact = { 0.0, 1.0000000000000002, 2.0, 3.0 };
-constexpr __m128 src_m = { 9.0f, 9.0f, 9.0f, 9.0f };
-constexpr __m128 ok256m = _mm256_mask_cvtpd_ps(src_m, 0b0101, d4_inexact);
-static_assert(ok256m[0] == 0.0f && ok256m[1] == 9.0f && ok256m[2] == 2.0f && ok256m[3] == 9.0f, "");
+namespace Test_mm512_cvtpd_ps {
+namespace OK {
+constexpr __m512d a = { -1.0, +2.0, +4.0, +8.0, +16.0, +32.0, +64.0, +128.0 };
+constexpr __m256 r = _mm512_cvtpd_ps(a);
+static_assert(r[0] == -1.0f && r[7] == +128.0f, "");
+}
+namespace Inexact {
+constexpr __m512d a = { 1.0000000000000002, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
+constexpr __m256 r = _mm512_cvtpd_ps(a);
+// expected-error at -1 {{must be initialized by a constant expression}}
+// expected-note at avx512fintrin.h:* {{compile time floating point arithmetic suppressed in strict evaluation modes}}
+// expected-note at -3 {{in call to '_mm512_cvtpd_ps({1.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00})'}}
+}
+}
-constexpr __m128 ok256z = _mm256_maskz_cvtpd_ps(0b0101, d4_inexact);
-static_assert(ok256z[0] == 0.0f && ok256z[1] == 0.0f && ok256z[2] == 2.0f && ok256z[3] == 0.0f, "");
+namespace Test_mm512_mask_cvtpd_ps {
+namespace OK {
+constexpr __m256 src = { 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f };
+constexpr __m512d a = { -1.0, +2.0, +4.0, +8.0, +16.0, +32.0, +64.0, +128.0 };
+constexpr __m256 r = _mm512_mask_cvtpd_ps(src, 0x05, a);
+static_assert(r[0] == -1.0f && r[2] == +4.0f, "");
+static_assert(r[1] == 9.0f && r[3] == 9.0f, "");
+}
+namespace MaskOffInexact {
+constexpr __m256 src = { 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f };
+constexpr __m512d a_inexact = { -1.0, +2.0, +4.0, +8.0, +16.0, 1.0000000000000002, +64.0, +128.0 };
+constexpr __m256 r = _mm512_mask_cvtpd_ps(src, 0b11011111, a_inexact);
+static_assert(r[0] == -1.0f && r[5] == 9.0f && r[6] == 64.0f && r[7] == 128.0f, "");
+}
+namespace MaskOffInf {
+constexpr __m256 src = { 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f };
+constexpr __m512d a_inf = { -1.0, +2.0, +4.0, +8.0, +16.0, __builtin_huge_val(), +64.0, +128.0 };
+constexpr __m256 r = _mm512_mask_cvtpd_ps(src, 0x1F, a_inf);
+static_assert(r[0] == -1.0f && r[4] == 16.0f && r[5] == 9.0f, "");
+}
+namespace MaskOffNaN {
+constexpr __m256 src = { 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f };
+constexpr __m512d a_nan = { -1.0, +2.0, +4.0, +8.0, +16.0, __builtin_nan(""), +64.0, +128.0 };
+constexpr __m256 r = _mm512_mask_cvtpd_ps(src, 0x1F, a_nan);
+static_assert(r[0] == -1.0f && r[4] == 16.0f && r[5] == 9.0f, "");
+}
+namespace MaskOnInf {
+constexpr __m256 src = { 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f };
+constexpr __m512d a_inf = { -1.0, +2.0, +4.0, __builtin_huge_val(), +16.0, +32.0, +64.0, +128.0 };
+constexpr __m256 r = _mm512_mask_cvtpd_ps(src, 0x08, a_inf);
+// expected-error at -1 {{must be initialized by a constant expression}}
+// expected-note at avx512fintrin.h:* {{floating point arithmetic produces an infinity}}
+// expected-note at -3 {{in call to '_mm512_mask_cvtpd_ps({9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00}, 8, {-1.000000e+00, 2.000000e+00, 4.000000e+00, INF, 1.600000e+01, 3.200000e+01, 6.400000e+01, 1.280000e+02})'}}
+}
+namespace MaskOnNaN {
+constexpr __m256 src = { 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f };
+constexpr __m512d a_nan = { -1.0, +2.0, +4.0, __builtin_nan(""), +16.0, +32.0, +64.0, +128.0 };
+constexpr __m256 r = _mm512_mask_cvtpd_ps(src, 0x08, a_nan);
+// expected-error at -1 {{must be initialized by a constant expression}}
+// expected-note at avx512fintrin.h:* {{floating point arithmetic produces a NaN}}
+// expected-note at -3 {{in call to '_mm512_mask_cvtpd_ps({9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00}, 8, {-1.000000e+00, 2.000000e+00, 4.000000e+00, nan, 1.600000e+01, 3.200000e+01, 6.400000e+01, 1.280000e+02})'}}
+}
+}
-constexpr __m512d d8_inexact = { -1.0, 2.0, 4.0, 8.0, 16.0, 1.0000000000000002, 64.0, 128.0 };
-constexpr __m256 src256b = { 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f };
-constexpr __m256 ok512m = _mm512_mask_cvtpd_ps(src256b, 0b110111, d8_inexact);
-static_assert(ok512m[0] == -1.0f && ok512m[5] == 9.0f && ok512m[7] == 128.0f, "");
+namespace Test_mm512_maskz_cvtpd_ps {
+namespace OK {
+constexpr __m512d a = { -1.0, +2.0, +4.0, +8.0, +16.0, +32.0, +64.0, +128.0 };
+constexpr __m256 r = _mm512_maskz_cvtpd_ps(0x81, a);
+static_assert(r[0] == -1.0f && r[7] == +128.0f, "");
+static_assert(r[1] == 0.0f && r[6] == 0.0f, "");
+}
+namespace MaskOffInexact {
+constexpr __m512d a_inexact = { -1.0, +2.0, +4.0, +8.0, +16.0, 1.0000000000000002, +64.0, +128.0 };
+constexpr __m256 r = _mm512_maskz_cvtpd_ps(0b11011111, a_inexact);
+static_assert(r[0] == -1.0f && r[5] == 0.0f && r[6] == 64.0f && r[7] == 128.0f, "");
+}
+namespace MaskOffInf {
+constexpr __m512d a_inf = { -1.0, +2.0, +4.0, +8.0, +16.0, __builtin_huge_val(), +64.0, +128.0 };
+constexpr __m256 r = _mm512_maskz_cvtpd_ps(0x1F, a_inf);
+static_assert(r[0] == -1.0f && r[4] == 16.0f && r[5] == 0.0f, "");
+}
+namespace MaskOffNaN {
+constexpr __m512d a_nan = { -1.0, +2.0, +4.0, +8.0, +16.0, __builtin_nan(""), +64.0, +128.0 };
+constexpr __m256 r = _mm512_maskz_cvtpd_ps(0x1F, a_nan);
+static_assert(r[0] == -1.0f && r[4] == 16.0f && r[5] == 0.0f, "");
+}
+namespace MaskOnInf {
+constexpr __m512d a_inf = { -1.0, +2.0, +4.0, __builtin_huge_val(), +16.0, +32.0, +64.0, +128.0 };
+constexpr __m256 r = _mm512_maskz_cvtpd_ps(0x08, a_inf);
+// expected-error at -1 {{must be initialized by a constant expression}}
+// expected-note at avx512fintrin.h:* {{floating point arithmetic produces an infinity}}
+// expected-note at -3 {{in call to '_mm512_maskz_cvtpd_ps(8, {-1.000000e+00, 2.000000e+00, 4.000000e+00, INF, 1.600000e+01, 3.200000e+01, 6.400000e+01, 1.280000e+02})'}}
+}
+namespace MaskOnNaN {
+constexpr __m512d a_nan = { -1.0, +2.0, +4.0, __builtin_nan(""), +16.0, +32.0, +64.0, +128.0 };
+constexpr __m256 r = _mm512_maskz_cvtpd_ps(0x08, a_nan);
+// expected-error at -1 {{must be initialized by a constant expression}}
+// expected-note at avx512fintrin.h:* {{floating point arithmetic produces a NaN}}
+// expected-note at -3 {{in call to '_mm512_maskz_cvtpd_ps(8, {-1.000000e+00, 2.000000e+00, 4.000000e+00, nan, 1.600000e+01, 3.200000e+01, 6.400000e+01, 1.280000e+02})'}}
+}
+}
-constexpr __m256 ok512z = _mm512_maskz_cvtpd_ps(0b110111, d8_inexact);
-static_assert(ok512z[5] == 0.0f && ok512z[0] == -1.0f && ok512z[7] == 128.0f, "");
+namespace Test_mm512_cvtpd_pslo {
+namespace OK {
+constexpr __m512d a = { -1.0, +2.0, +4.0, +8.0, +16.0, +32.0, +64.0, +128.0 };
+constexpr __m512 r = _mm512_cvtpd_pslo(a);
+static_assert(r[0] == -1.0f && r[7] == +128.0f, "");
+static_assert(r[8] == 0.0f && r[15] == 0.0f, "");
+}
+}
-constexpr __m128 bad128 = _mm_mask_cvtpd_ps(src128a, 0x2, d2_inexact); // both-error {{not an integral constant expression}}
-static_assert(bad128[1] == 9.0f, ""); // both-note {{subexpression not valid in a constant expression}}
+namespace Test_mm512_mask_cvtpd_pslo {
+namespace OK {
+constexpr __m512 src = (__m512){ 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,
+ 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f };
+constexpr __m512d a = { -1.0, +2.0, +4.0, +8.0, +16.0, +32.0, +64.0, +128.0 };
+constexpr __m512 r = _mm512_mask_cvtpd_pslo(src, 0x3, a);
+static_assert(r[0] == -1.0f && r[1] == +2.0f, "");
+static_assert(r[2] == 9.0f && r[3] == 9.0f, "");
+}
+namespace MaskOffInf {
+constexpr __m512 src = (__m512){ 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,
+ 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f };
+constexpr __m512d a_inf = { -1.0, +2.0, __builtin_huge_val(), +8.0, +16.0, +32.0, +64.0, +128.0 };
+constexpr __m512 r = _mm512_mask_cvtpd_pslo(src, 0x3, a_inf);
+static_assert(r[0] == -1.0f && r[1] == +2.0f && r[2] == 9.0f, "");
+}
+namespace MaskOffNaN {
+constexpr __m512 src = (__m512){ 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,
+ 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f };
+constexpr __m512d a_nan = { -1.0, +2.0, +4.0, __builtin_nan(""), +16.0, +32.0, +64.0, +128.0 };
+constexpr __m512 r = _mm512_mask_cvtpd_pslo(src, 0x7, a_nan);
+static_assert(r[0] == -1.0f && r[1] == +2.0f && r[2] == 4.0f && r[3] == 9.0f, "");
+}
+namespace MaskOnInf {
+constexpr __m512 src = (__m512){ 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,
+ 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f };
+constexpr __m512d a_inf = { -1.0, +2.0, __builtin_huge_val(), +8.0, +16.0, +32.0, +64.0, +128.0 };
+constexpr __m512 r = _mm512_mask_cvtpd_pslo(src, 0x4, a_inf);
+// expected-error at -1 {{must be initialized by a constant expression}}
+// expected-note at avx512fintrin.h:* {{floating point arithmetic produces an infinity}}
+// expected-note at avx512fintrin.h:* {{in call to '_mm512_mask_cvtpd_ps({9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00}, 4, {-1.000000e+00, 2.000000e+00, INF, 8.000000e+00, 1.600000e+01, 3.200000e+01, 6.400000e+01, 1.280000e+02})'}}
+// expected-note at -4 {{in call to '_mm512_mask_cvtpd_pslo({9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00}, 4, {-1.000000e+00, 2.000000e+00, INF, 8.000000e+00, 1.600000e+01, 3.200000e+01, 6.400000e+01, 1.280000e+02})'}}
+}
+namespace MaskOnNaN {
+constexpr __m512 src = (__m512){ 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,
+ 9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f,9.0f };
+constexpr __m512d a_nan = { -1.0, +2.0, __builtin_nan(""), +8.0, +16.0, +32.0, +64.0, +128.0 };
+constexpr __m512 r = _mm512_mask_cvtpd_pslo(src, 0x4, a_nan);
+// expected-error at -1 {{must be initialized by a constant expression}}
+// expected-note at avx512fintrin.h:* {{floating point arithmetic produces a NaN}}
+// expected-note at avx512fintrin.h:* {{in call to '_mm512_mask_cvtpd_ps({9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00}, 4, {-1.000000e+00, 2.000000e+00, nan, 8.000000e+00, 1.600000e+01, 3.200000e+01, 6.400000e+01, 1.280000e+02})'}}
+// expected-note at -4 {{in call to '_mm512_mask_cvtpd_pslo({9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00}, 4, {-1.000000e+00, 2.000000e+00, nan, 8.000000e+00, 1.600000e+01, 3.200000e+01, 6.400000e+01, 1.280000e+02})'}}
+}
}
>From 4a2f59bee574efec48ac87e74dae356dc72fb2ae Mon Sep 17 00:00:00 2001
From: Hamza Hassanain <hamzahassanain067 at gmail.com>
Date: Sat, 29 Nov 2025 11:09:57 +0200
Subject: [PATCH 5/6] fully implmeneted features in ExprConstant visiting logic
---
clang/lib/AST/ExprConstant.cpp | 143 +++++++++++++++++++++++++++++++++
1 file changed, 143 insertions(+)
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 3b91678f7d400..065d5c2e33a9c 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -12165,7 +12165,37 @@ static bool evalShuffleGeneric(
Out = APValue(ResultElements.data(), ResultElements.size());
return true;
}
+static bool ConvertDoubleToFloatStrict(EvalInfo &Info, const Expr *E,
+ APFloat OrigVal, APValue &Result) {
+ if (OrigVal.isInfinity()) {
+ Info.CCEDiag(E, diag::note_constexpr_float_arithmetic) << 0;
+ return false;
+ }
+ if (OrigVal.isNaN()) {
+ Info.CCEDiag(E, diag::note_constexpr_float_arithmetic) << 1;
+ return false;
+ }
+
+ APFloat Val = OrigVal;
+ bool LosesInfo = false;
+ APFloat::opStatus Status = Val.convert(APFloat::IEEEsingle(),
+ APFloat::rmNearestTiesToEven,
+ &LosesInfo);
+
+ if(LosesInfo || Val.isDenormal()) {
+ Info.CCEDiag(E, diag::note_constexpr_float_arithmetic_strict);
+ return false;
+ }
+
+ if(Status != APFloat::opOK) {
+ Info.CCEDiag(E, diag::note_invalid_subexpr_in_const_expr);
+ return false;
+ }
+
+ Result = APValue(Val);
+ return true;
+}
bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
if (!IsConstantEvaluatedBuiltinCall(E))
return ExprEvaluatorBaseTy::VisitCallExpr(E);
@@ -12878,6 +12908,119 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
return Success(APValue(ResultElements.data(), ResultElements.size()), E);
}
+
+ case X86::BI__builtin_ia32_cvtsd2ss: {
+ APValue VecA, VecB;
+ if (!EvaluateAsRValue(Info, E->getArg(0), VecA) ||
+ !EvaluateAsRValue(Info, E->getArg(1), VecB))
+ return false;
+
+ SmallVector<APValue, 4> Elements;
+
+ APValue ResultVal;
+ if (!ConvertDoubleToFloatStrict(Info, E, VecB.getVectorElt(0).getFloat(), ResultVal))
+ return false;
+
+ Elements.push_back(ResultVal);
+
+ unsigned NumEltsA = VecA.getVectorLength();
+ for (unsigned I = 1; I < NumEltsA; ++I) {
+ Elements.push_back(VecA.getVectorElt(I));
+ }
+
+ return Success(Elements, E);
+ }
+ case X86::BI__builtin_ia32_cvtsd2ss_round_mask: {
+ APValue VecA, VecB, VecSrc, MaskValue;
+
+ if (!EvaluateAsRValue(Info, E->getArg(0), VecA) ||
+ !EvaluateAsRValue(Info, E->getArg(1), VecB) ||
+ !EvaluateAsRValue(Info, E->getArg(2), VecSrc) ||
+ !EvaluateAsRValue(Info, E->getArg(3), MaskValue))
+ return false;
+
+ unsigned Mask = MaskValue.getInt().getZExtValue();
+ SmallVector<APValue, 4> Elements;
+
+
+ if (Mask & 1) {
+ APValue ResultVal;
+ if (!ConvertDoubleToFloatStrict(Info, E, VecB.getVectorElt(0).getFloat(), ResultVal))
+ return false;
+ Elements.push_back(ResultVal);
+ } else {
+ Elements.push_back(VecSrc.getVectorElt(0));
+ }
+
+ unsigned NumEltsA = VecA.getVectorLength();
+ for (unsigned I = 1; I < NumEltsA; ++I) {
+ Elements.push_back(VecA.getVectorElt(I));
+ }
+
+ return Success(Elements, E);
+ }
+ case X86::BI__builtin_ia32_cvtpd2ps:
+ case X86::BI__builtin_ia32_cvtpd2ps256:
+ case X86::BI__builtin_ia32_cvtpd2ps_mask:
+ case X86::BI__builtin_ia32_cvtpd2ps512_mask: {
+
+
+ const auto BuiltinID = E->getBuiltinCallee();
+ bool IsMasked = (BuiltinID == X86::BI__builtin_ia32_cvtpd2ps_mask ||
+ BuiltinID == X86::BI__builtin_ia32_cvtpd2ps512_mask);
+
+ APValue InputValue;
+ if (!EvaluateAsRValue(Info, E->getArg(0), InputValue))
+ return false;
+
+ APValue MergeValue;
+ unsigned Mask = 0xFFFFFFFF;
+ bool NeedsMerge = false;
+ if (IsMasked) {
+ APValue MaskValue;
+ if (!EvaluateAsRValue(Info, E->getArg(2), MaskValue))
+ return false;
+ Mask = MaskValue.getInt().getZExtValue();
+ auto NumEltsResult = E->getType()->getAs<VectorType>()->getNumElements();
+ for (unsigned I = 0; I < NumEltsResult; ++I) {
+ if (!((Mask >> I) & 1)) {
+ NeedsMerge = true;
+ break;
+ }
+ }
+ if (NeedsMerge) {
+ if (!EvaluateAsRValue(Info, E->getArg(1), MergeValue))
+ return false;
+ }
+ }
+
+ unsigned NumEltsResult = E->getType()->getAs<VectorType>()->getNumElements();
+ unsigned NumEltsInput = InputValue.getVectorLength();
+ SmallVector<APValue, 8> Elements;
+ for (unsigned I = 0; I < NumEltsResult; ++I) {
+ if (IsMasked && !((Mask >> I) & 1)) {
+ if (!NeedsMerge) {
+ return false;
+ }
+ Elements.push_back(MergeValue.getVectorElt(I));
+ continue;
+ }
+
+ if (I >= NumEltsInput) {
+ Elements.push_back(APValue(APFloat::getZero(APFloat::IEEEsingle())));
+ continue;
+ }
+
+ APValue ResultVal;
+ if (!ConvertDoubleToFloatStrict(Info, E, InputValue.getVectorElt(I).getFloat(), ResultVal))
+ return false;
+
+ Elements.push_back(ResultVal);
+ }
+ return Success(Elements, E);
+ }
+
+
case X86::BI__builtin_ia32_shufps:
case X86::BI__builtin_ia32_shufps256:
case X86::BI__builtin_ia32_shufps512: {
>From 0fb3292fe860e30de61d2df3a90912f27f04f143 Mon Sep 17 00:00:00 2001
From: Hamza Hassanain <hamzahassanain067 at gmail.com>
Date: Sat, 29 Nov 2025 11:21:17 +0200
Subject: [PATCH 6/6] Ran the git clang-format command
---
clang/lib/AST/ExprConstant.cpp | 192 ++++++++++++++++-----------------
1 file changed, 96 insertions(+), 96 deletions(-)
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 065d5c2e33a9c..6f512dd538e7d 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -12169,7 +12169,7 @@ static bool ConvertDoubleToFloatStrict(EvalInfo &Info, const Expr *E,
APFloat OrigVal, APValue &Result) {
if (OrigVal.isInfinity()) {
- Info.CCEDiag(E, diag::note_constexpr_float_arithmetic) << 0;
+ Info.CCEDiag(E, diag::note_constexpr_float_arithmetic) << 0;
return false;
}
if (OrigVal.isNaN()) {
@@ -12177,18 +12177,17 @@ static bool ConvertDoubleToFloatStrict(EvalInfo &Info, const Expr *E,
return false;
}
- APFloat Val = OrigVal;
+ APFloat Val = OrigVal;
bool LosesInfo = false;
- APFloat::opStatus Status = Val.convert(APFloat::IEEEsingle(),
- APFloat::rmNearestTiesToEven,
- &LosesInfo);
+ APFloat::opStatus Status = Val.convert(
+ APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven, &LosesInfo);
- if(LosesInfo || Val.isDenormal()) {
+ if (LosesInfo || Val.isDenormal()) {
Info.CCEDiag(E, diag::note_constexpr_float_arithmetic_strict);
return false;
}
- if(Status != APFloat::opOK) {
+ if (Status != APFloat::opOK) {
Info.CCEDiag(E, diag::note_invalid_subexpr_in_const_expr);
return false;
}
@@ -12909,118 +12908,119 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
return Success(APValue(ResultElements.data(), ResultElements.size()), E);
}
- case X86::BI__builtin_ia32_cvtsd2ss: {
- APValue VecA, VecB;
- if (!EvaluateAsRValue(Info, E->getArg(0), VecA) ||
- !EvaluateAsRValue(Info, E->getArg(1), VecB))
- return false;
+ case X86::BI__builtin_ia32_cvtsd2ss: {
+ APValue VecA, VecB;
+ if (!EvaluateAsRValue(Info, E->getArg(0), VecA) ||
+ !EvaluateAsRValue(Info, E->getArg(1), VecB))
+ return false;
- SmallVector<APValue, 4> Elements;
+ SmallVector<APValue, 4> Elements;
- APValue ResultVal;
- if (!ConvertDoubleToFloatStrict(Info, E, VecB.getVectorElt(0).getFloat(), ResultVal))
- return false;
-
- Elements.push_back(ResultVal);
+ APValue ResultVal;
+ if (!ConvertDoubleToFloatStrict(Info, E, VecB.getVectorElt(0).getFloat(),
+ ResultVal))
+ return false;
- unsigned NumEltsA = VecA.getVectorLength();
- for (unsigned I = 1; I < NumEltsA; ++I) {
- Elements.push_back(VecA.getVectorElt(I));
- }
+ Elements.push_back(ResultVal);
- return Success(Elements, E);
+ unsigned NumEltsA = VecA.getVectorLength();
+ for (unsigned I = 1; I < NumEltsA; ++I) {
+ Elements.push_back(VecA.getVectorElt(I));
}
- case X86::BI__builtin_ia32_cvtsd2ss_round_mask: {
- APValue VecA, VecB, VecSrc, MaskValue;
-
- if (!EvaluateAsRValue(Info, E->getArg(0), VecA) ||
- !EvaluateAsRValue(Info, E->getArg(1), VecB) ||
- !EvaluateAsRValue(Info, E->getArg(2), VecSrc) ||
- !EvaluateAsRValue(Info, E->getArg(3), MaskValue))
- return false;
- unsigned Mask = MaskValue.getInt().getZExtValue();
- SmallVector<APValue, 4> Elements;
+ return Success(Elements, E);
+ }
+ case X86::BI__builtin_ia32_cvtsd2ss_round_mask: {
+ APValue VecA, VecB, VecSrc, MaskValue;
+ if (!EvaluateAsRValue(Info, E->getArg(0), VecA) ||
+ !EvaluateAsRValue(Info, E->getArg(1), VecB) ||
+ !EvaluateAsRValue(Info, E->getArg(2), VecSrc) ||
+ !EvaluateAsRValue(Info, E->getArg(3), MaskValue))
+ return false;
- if (Mask & 1) {
- APValue ResultVal;
- if (!ConvertDoubleToFloatStrict(Info, E, VecB.getVectorElt(0).getFloat(), ResultVal))
- return false;
- Elements.push_back(ResultVal);
- } else {
- Elements.push_back(VecSrc.getVectorElt(0));
- }
+ unsigned Mask = MaskValue.getInt().getZExtValue();
+ SmallVector<APValue, 4> Elements;
- unsigned NumEltsA = VecA.getVectorLength();
- for (unsigned I = 1; I < NumEltsA; ++I) {
- Elements.push_back(VecA.getVectorElt(I));
- }
+ if (Mask & 1) {
+ APValue ResultVal;
+ if (!ConvertDoubleToFloatStrict(Info, E, VecB.getVectorElt(0).getFloat(),
+ ResultVal))
+ return false;
+ Elements.push_back(ResultVal);
+ } else {
+ Elements.push_back(VecSrc.getVectorElt(0));
+ }
- return Success(Elements, E);
+ unsigned NumEltsA = VecA.getVectorLength();
+ for (unsigned I = 1; I < NumEltsA; ++I) {
+ Elements.push_back(VecA.getVectorElt(I));
}
- case X86::BI__builtin_ia32_cvtpd2ps:
- case X86::BI__builtin_ia32_cvtpd2ps256:
- case X86::BI__builtin_ia32_cvtpd2ps_mask:
- case X86::BI__builtin_ia32_cvtpd2ps512_mask: {
+ return Success(Elements, E);
+ }
+ case X86::BI__builtin_ia32_cvtpd2ps:
+ case X86::BI__builtin_ia32_cvtpd2ps256:
+ case X86::BI__builtin_ia32_cvtpd2ps_mask:
+ case X86::BI__builtin_ia32_cvtpd2ps512_mask: {
- const auto BuiltinID = E->getBuiltinCallee();
- bool IsMasked = (BuiltinID == X86::BI__builtin_ia32_cvtpd2ps_mask ||
- BuiltinID == X86::BI__builtin_ia32_cvtpd2ps512_mask);
+ const auto BuiltinID = E->getBuiltinCallee();
+ bool IsMasked = (BuiltinID == X86::BI__builtin_ia32_cvtpd2ps_mask ||
+ BuiltinID == X86::BI__builtin_ia32_cvtpd2ps512_mask);
- APValue InputValue;
- if (!EvaluateAsRValue(Info, E->getArg(0), InputValue))
- return false;
-
- APValue MergeValue;
- unsigned Mask = 0xFFFFFFFF;
- bool NeedsMerge = false;
- if (IsMasked) {
- APValue MaskValue;
- if (!EvaluateAsRValue(Info, E->getArg(2), MaskValue))
- return false;
- Mask = MaskValue.getInt().getZExtValue();
- auto NumEltsResult = E->getType()->getAs<VectorType>()->getNumElements();
- for (unsigned I = 0; I < NumEltsResult; ++I) {
- if (!((Mask >> I) & 1)) {
- NeedsMerge = true;
- break;
- }
- }
- if (NeedsMerge) {
- if (!EvaluateAsRValue(Info, E->getArg(1), MergeValue))
- return false;
- }
- }
+ APValue InputValue;
+ if (!EvaluateAsRValue(Info, E->getArg(0), InputValue))
+ return false;
- unsigned NumEltsResult = E->getType()->getAs<VectorType>()->getNumElements();
- unsigned NumEltsInput = InputValue.getVectorLength();
- SmallVector<APValue, 8> Elements;
+ APValue MergeValue;
+ unsigned Mask = 0xFFFFFFFF;
+ bool NeedsMerge = false;
+ if (IsMasked) {
+ APValue MaskValue;
+ if (!EvaluateAsRValue(Info, E->getArg(2), MaskValue))
+ return false;
+ Mask = MaskValue.getInt().getZExtValue();
+ auto NumEltsResult = E->getType()->getAs<VectorType>()->getNumElements();
for (unsigned I = 0; I < NumEltsResult; ++I) {
- if (IsMasked && !((Mask >> I) & 1)) {
- if (!NeedsMerge) {
- return false;
- }
- Elements.push_back(MergeValue.getVectorElt(I));
- continue;
+ if (!((Mask >> I) & 1)) {
+ NeedsMerge = true;
+ break;
}
+ }
+ if (NeedsMerge) {
+ if (!EvaluateAsRValue(Info, E->getArg(1), MergeValue))
+ return false;
+ }
+ }
- if (I >= NumEltsInput) {
- Elements.push_back(APValue(APFloat::getZero(APFloat::IEEEsingle())));
- continue;
+ unsigned NumEltsResult =
+ E->getType()->getAs<VectorType>()->getNumElements();
+ unsigned NumEltsInput = InputValue.getVectorLength();
+ SmallVector<APValue, 8> Elements;
+ for (unsigned I = 0; I < NumEltsResult; ++I) {
+ if (IsMasked && !((Mask >> I) & 1)) {
+ if (!NeedsMerge) {
+ return false;
}
+ Elements.push_back(MergeValue.getVectorElt(I));
+ continue;
+ }
- APValue ResultVal;
- if (!ConvertDoubleToFloatStrict(Info, E, InputValue.getVectorElt(I).getFloat(), ResultVal))
- return false;
-
- Elements.push_back(ResultVal);
+ if (I >= NumEltsInput) {
+ Elements.push_back(APValue(APFloat::getZero(APFloat::IEEEsingle())));
+ continue;
}
- return Success(Elements, E);
+
+ APValue ResultVal;
+ if (!ConvertDoubleToFloatStrict(
+ Info, E, InputValue.getVectorElt(I).getFloat(), ResultVal))
+ return false;
+
+ Elements.push_back(ResultVal);
}
+ return Success(Elements, E);
+ }
-
case X86::BI__builtin_ia32_shufps:
case X86::BI__builtin_ia32_shufps256:
case X86::BI__builtin_ia32_shufps512: {
More information about the cfe-commits
mailing list