[clang] [X86][Clang] Support constexpr evaluation of cvtpd2ps intrinsics (PR #169980)
via cfe-commits
cfe-commits at lists.llvm.org
Sat Nov 29 01:24:50 PST 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-clang
Author: Hamza Hassanain (HamzaHassanain)
<details>
<summary>Changes</summary>
This patch implements constant evaluation support for the following X86 intrinsics:
- _mm_cvtpd_ps, _mm256_cvtpd_ps (Packed Double to Float)
- _mm_cvtsd_ss (Scalar Double to Float merge)
- Masked variants of the above
It implements the strict "Exact and Finite" rule: conversions that are
inexact, infinite, or NaN are rejected in constexpr contexts.
Fixes #<!-- -->169370
---
Patch is 42.26 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/169980.diff
7 Files Affected:
- (modified) clang/include/clang/Basic/BuiltinsX86.td (+8-8)
- (modified) clang/lib/AST/ExprConstant.cpp (+143)
- (modified) clang/lib/Headers/avx512fintrin.h (+8-8)
- (modified) clang/lib/Headers/avx512vlintrin.h (+4-4)
- (modified) clang/lib/Headers/avxintrin.h (+2-2)
- (modified) clang/lib/Headers/emmintrin.h (+2-2)
- (added) clang/test/SemaCXX/constexpr-x86-intrinsics-pd2ps.cpp (+479)
``````````diff
diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td
index 4aa3d51931980..283a0a3e6ae0c 100644
--- a/clang/include/clang/Basic/BuiltinsX86.td
+++ b/clang/include/clang/Basic/BuiltinsX86.td
@@ -24,12 +24,12 @@ def undef128 : X86Builtin<"_Vector<2, double>()"> {
let Attributes = [Const, NoThrow, RequiredVectorWidth<128>];
}
-def undef256 : X86Builtin<"_Vector<4, double>()"> {
- let Attributes = [Const, NoThrow, RequiredVectorWidth<256>];
+def undef256 : X86Builtin<"_Vector<4, double>()" > {
+ let Attributes = [Const, Constexpr, NoThrow, RequiredVectorWidth<256>];
}
def undef512 : X86Builtin<"_Vector<8, double>()"> {
- let Attributes = [Const, NoThrow, RequiredVectorWidth<512>];
+ let Attributes = [Const, Constexpr, NoThrow, RequiredVectorWidth<512>];
}
// FLAGS
@@ -168,7 +168,7 @@ let Features = "sse2", Attributes = [NoThrow] in {
def movnti : X86Builtin<"void(int *, int)">;
}
-let Features = "sse2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+let Features = "sse2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
def psadbw128 : X86Builtin<"_Vector<2, long long int>(_Vector<16, char>, _Vector<16, char>)">;
def sqrtpd : X86Builtin<"_Vector<2, double>(_Vector<2, double>)">;
def sqrtsd : X86Builtin<"_Vector<2, double>(_Vector<2, double>)">;
@@ -468,7 +468,7 @@ let Features = "avx", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWid
def vpermilvarps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, int>)">;
}
-let Features = "avx", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+let Features = "avx", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
def dpps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, _Constant char)">;
def cmppd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, double>, _Constant char)">;
def cmpps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, _Constant char)">;
@@ -1009,7 +1009,7 @@ let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128
def cmppd128_mask : X86Builtin<"unsigned char(_Vector<2, double>, _Vector<2, double>, _Constant int, unsigned char)">;
}
-let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+let Features = "avx512f", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
def rndscaleps_mask : X86Builtin<"_Vector<16, float>(_Vector<16, float>, _Constant int, _Vector<16, float>, unsigned short, _Constant int)">;
def rndscalepd_mask : X86Builtin<"_Vector<8, double>(_Vector<8, double>, _Constant int, _Vector<8, double>, unsigned char, _Constant int)">;
def cvtps2dq512_mask : X86Builtin<"_Vector<16, int>(_Vector<16, float>, _Vector<16, int>, unsigned short, _Constant int)">;
@@ -1457,7 +1457,7 @@ let Features = "avx512vl", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
def compressstoresi256_mask : X86Builtin<"void(_Vector<8, int *>, _Vector<8, int>, unsigned char)">;
}
-let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
def cvtpd2dq128_mask : X86Builtin<"_Vector<4, int>(_Vector<2, double>, _Vector<4, int>, unsigned char)">;
def cvtpd2ps_mask : X86Builtin<"_Vector<4, float>(_Vector<2, double>, _Vector<4, float>, unsigned char)">;
def cvtpd2udq128_mask : X86Builtin<"_Vector<4, int>(_Vector<2, double>, _Vector<4, int>, unsigned char)">;
@@ -3301,7 +3301,7 @@ let Features = "avx512bw,avx512vl",
def cvtw2mask256 : X86Builtin<"unsigned short(_Vector<16, short>)">;
}
-let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+let Features = "avx512f", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
def cvtsd2ss_round_mask : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<2, double>, _Vector<4, float>, unsigned char, _Constant int)">;
def cvtsi2ss32 : X86Builtin<"_Vector<4, float>(_Vector<4, float>, int, _Constant int)">;
def cvtss2sd_round_mask : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<4, float>, _Vector<2, double>, unsigned char, _Constant int)">;
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 3b91678f7d400..6f512dd538e7d 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -12165,7 +12165,36 @@ static bool evalShuffleGeneric(
Out = APValue(ResultElements.data(), ResultElements.size());
return true;
}
+static bool ConvertDoubleToFloatStrict(EvalInfo &Info, const Expr *E,
+ APFloat OrigVal, APValue &Result) {
+ if (OrigVal.isInfinity()) {
+ Info.CCEDiag(E, diag::note_constexpr_float_arithmetic) << 0;
+ return false;
+ }
+ if (OrigVal.isNaN()) {
+ Info.CCEDiag(E, diag::note_constexpr_float_arithmetic) << 1;
+ return false;
+ }
+
+ APFloat Val = OrigVal;
+ bool LosesInfo = false;
+ APFloat::opStatus Status = Val.convert(
+ APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven, &LosesInfo);
+
+ if (LosesInfo || Val.isDenormal()) {
+ Info.CCEDiag(E, diag::note_constexpr_float_arithmetic_strict);
+ return false;
+ }
+
+ if (Status != APFloat::opOK) {
+ Info.CCEDiag(E, diag::note_invalid_subexpr_in_const_expr);
+ return false;
+ }
+
+ Result = APValue(Val);
+ return true;
+}
bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
if (!IsConstantEvaluatedBuiltinCall(E))
return ExprEvaluatorBaseTy::VisitCallExpr(E);
@@ -12878,6 +12907,120 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
return Success(APValue(ResultElements.data(), ResultElements.size()), E);
}
+
+ case X86::BI__builtin_ia32_cvtsd2ss: {
+ APValue VecA, VecB;
+ if (!EvaluateAsRValue(Info, E->getArg(0), VecA) ||
+ !EvaluateAsRValue(Info, E->getArg(1), VecB))
+ return false;
+
+ SmallVector<APValue, 4> Elements;
+
+ APValue ResultVal;
+ if (!ConvertDoubleToFloatStrict(Info, E, VecB.getVectorElt(0).getFloat(),
+ ResultVal))
+ return false;
+
+ Elements.push_back(ResultVal);
+
+ unsigned NumEltsA = VecA.getVectorLength();
+ for (unsigned I = 1; I < NumEltsA; ++I) {
+ Elements.push_back(VecA.getVectorElt(I));
+ }
+
+ return Success(Elements, E);
+ }
+ case X86::BI__builtin_ia32_cvtsd2ss_round_mask: {
+ APValue VecA, VecB, VecSrc, MaskValue;
+
+ if (!EvaluateAsRValue(Info, E->getArg(0), VecA) ||
+ !EvaluateAsRValue(Info, E->getArg(1), VecB) ||
+ !EvaluateAsRValue(Info, E->getArg(2), VecSrc) ||
+ !EvaluateAsRValue(Info, E->getArg(3), MaskValue))
+ return false;
+
+ unsigned Mask = MaskValue.getInt().getZExtValue();
+ SmallVector<APValue, 4> Elements;
+
+ if (Mask & 1) {
+ APValue ResultVal;
+ if (!ConvertDoubleToFloatStrict(Info, E, VecB.getVectorElt(0).getFloat(),
+ ResultVal))
+ return false;
+ Elements.push_back(ResultVal);
+ } else {
+ Elements.push_back(VecSrc.getVectorElt(0));
+ }
+
+ unsigned NumEltsA = VecA.getVectorLength();
+ for (unsigned I = 1; I < NumEltsA; ++I) {
+ Elements.push_back(VecA.getVectorElt(I));
+ }
+
+ return Success(Elements, E);
+ }
+ case X86::BI__builtin_ia32_cvtpd2ps:
+ case X86::BI__builtin_ia32_cvtpd2ps256:
+ case X86::BI__builtin_ia32_cvtpd2ps_mask:
+ case X86::BI__builtin_ia32_cvtpd2ps512_mask: {
+
+ const auto BuiltinID = E->getBuiltinCallee();
+ bool IsMasked = (BuiltinID == X86::BI__builtin_ia32_cvtpd2ps_mask ||
+ BuiltinID == X86::BI__builtin_ia32_cvtpd2ps512_mask);
+
+ APValue InputValue;
+ if (!EvaluateAsRValue(Info, E->getArg(0), InputValue))
+ return false;
+
+ APValue MergeValue;
+ unsigned Mask = 0xFFFFFFFF;
+ bool NeedsMerge = false;
+ if (IsMasked) {
+ APValue MaskValue;
+ if (!EvaluateAsRValue(Info, E->getArg(2), MaskValue))
+ return false;
+ Mask = MaskValue.getInt().getZExtValue();
+ auto NumEltsResult = E->getType()->getAs<VectorType>()->getNumElements();
+ for (unsigned I = 0; I < NumEltsResult; ++I) {
+ if (!((Mask >> I) & 1)) {
+ NeedsMerge = true;
+ break;
+ }
+ }
+ if (NeedsMerge) {
+ if (!EvaluateAsRValue(Info, E->getArg(1), MergeValue))
+ return false;
+ }
+ }
+
+ unsigned NumEltsResult =
+ E->getType()->getAs<VectorType>()->getNumElements();
+ unsigned NumEltsInput = InputValue.getVectorLength();
+ SmallVector<APValue, 8> Elements;
+ for (unsigned I = 0; I < NumEltsResult; ++I) {
+ if (IsMasked && !((Mask >> I) & 1)) {
+ if (!NeedsMerge) {
+ return false;
+ }
+ Elements.push_back(MergeValue.getVectorElt(I));
+ continue;
+ }
+
+ if (I >= NumEltsInput) {
+ Elements.push_back(APValue(APFloat::getZero(APFloat::IEEEsingle())));
+ continue;
+ }
+
+ APValue ResultVal;
+ if (!ConvertDoubleToFloatStrict(
+ Info, E, InputValue.getVectorElt(I).getFloat(), ResultVal))
+ return false;
+
+ Elements.push_back(ResultVal);
+ }
+ return Success(Elements, E);
+ }
+
case X86::BI__builtin_ia32_shufps:
case X86::BI__builtin_ia32_shufps256:
case X86::BI__builtin_ia32_shufps512: {
diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h
index e1de56069870b..b9f1d1eecc09f 100644
--- a/clang/lib/Headers/avx512fintrin.h
+++ b/clang/lib/Headers/avx512fintrin.h
@@ -207,7 +207,7 @@ _mm512_undefined(void)
return (__m512)__builtin_ia32_undef512();
}
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_undefined_ps(void)
{
return (__m512)__builtin_ia32_undef512();
@@ -3489,7 +3489,7 @@ _mm512_mask_cvtepu32lo_pd(__m512d __W, __mmask8 __U, __m512i __A) {
(__v8sf)_mm256_setzero_ps(), \
(__mmask8)(U), (int)(R)))
-static __inline__ __m256 __DEFAULT_FN_ATTRS512
+static __inline__ __m256 __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_cvtpd_ps (__m512d __A)
{
return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
@@ -3498,7 +3498,7 @@ _mm512_cvtpd_ps (__m512d __A)
_MM_FROUND_CUR_DIRECTION);
}
-static __inline__ __m256 __DEFAULT_FN_ATTRS512
+static __inline__ __m256 __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_mask_cvtpd_ps (__m256 __W, __mmask8 __U, __m512d __A)
{
return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
@@ -3507,7 +3507,7 @@ _mm512_mask_cvtpd_ps (__m256 __W, __mmask8 __U, __m512d __A)
_MM_FROUND_CUR_DIRECTION);
}
-static __inline__ __m256 __DEFAULT_FN_ATTRS512
+static __inline__ __m256 __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_maskz_cvtpd_ps (__mmask8 __U, __m512d __A)
{
return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
@@ -3516,7 +3516,7 @@ _mm512_maskz_cvtpd_ps (__mmask8 __U, __m512d __A)
_MM_FROUND_CUR_DIRECTION);
}
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_cvtpd_pslo (__m512d __A)
{
return (__m512) __builtin_shufflevector((__v8sf) _mm512_cvtpd_ps(__A),
@@ -3524,7 +3524,7 @@ _mm512_cvtpd_pslo (__m512d __A)
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
}
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_mask_cvtpd_pslo (__m512 __W, __mmask8 __U,__m512d __A)
{
return (__m512) __builtin_shufflevector (
@@ -8672,7 +8672,7 @@ _mm512_mask_compressstoreu_epi32 (void *__P, __mmask16 __U, __m512i __A)
(__v4sf)_mm_setzero_ps(), \
(__mmask8)(U), (int)(R)))
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_mask_cvtsd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128d __B)
{
return __builtin_ia32_cvtsd2ss_round_mask ((__v4sf)__A,
@@ -8681,7 +8681,7 @@ _mm_mask_cvtsd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128d __B)
(__mmask8)__U, _MM_FROUND_CUR_DIRECTION);
}
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_maskz_cvtsd_ss (__mmask8 __U, __m128 __A, __m128d __B)
{
return __builtin_ia32_cvtsd2ss_round_mask ((__v4sf)__A,
diff --git a/clang/lib/Headers/avx512vlintrin.h b/clang/lib/Headers/avx512vlintrin.h
index 99c057030a4cc..82a06edd28ba2 100644
--- a/clang/lib/Headers/avx512vlintrin.h
+++ b/clang/lib/Headers/avx512vlintrin.h
@@ -1791,14 +1791,14 @@ _mm256_maskz_cvtpd_epi32 (__mmask8 __U, __m256d __A) {
(__v4si)_mm_setzero_si128());
}
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_mask_cvtpd_ps (__m128 __W, __mmask8 __U, __m128d __A) {
return (__m128) __builtin_ia32_cvtpd2ps_mask ((__v2df) __A,
(__v4sf) __W,
(__mmask8) __U);
}
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_maskz_cvtpd_ps (__mmask8 __U, __m128d __A) {
return (__m128) __builtin_ia32_cvtpd2ps_mask ((__v2df) __A,
(__v4sf)
@@ -1806,14 +1806,14 @@ _mm_maskz_cvtpd_ps (__mmask8 __U, __m128d __A) {
(__mmask8) __U);
}
-static __inline__ __m128 __DEFAULT_FN_ATTRS256
+static __inline__ __m128 __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_mask_cvtpd_ps (__m128 __W, __mmask8 __U, __m256d __A) {
return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
(__v4sf)_mm256_cvtpd_ps(__A),
(__v4sf)__W);
}
-static __inline__ __m128 __DEFAULT_FN_ATTRS256
+static __inline__ __m128 __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_maskz_cvtpd_ps (__mmask8 __U, __m256d __A) {
return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
(__v4sf)_mm256_cvtpd_ps(__A),
diff --git a/clang/lib/Headers/avxintrin.h b/clang/lib/Headers/avxintrin.h
index 44ef88db5cbce..f3f444083edbf 100644
--- a/clang/lib/Headers/avxintrin.h
+++ b/clang/lib/Headers/avxintrin.h
@@ -2190,7 +2190,7 @@ _mm256_cvtepi32_ps(__m256i __a) {
/// \param __a
/// A 256-bit vector of [4 x double].
/// \returns A 128-bit vector of [4 x float] containing the converted values.
-static __inline __m128 __DEFAULT_FN_ATTRS
+static __inline __m128 __DEFAULT_FN_ATTRS_CONSTEXPR
_mm256_cvtpd_ps(__m256d __a)
{
return (__m128)__builtin_ia32_cvtpd2ps256((__v4df) __a);
@@ -3610,7 +3610,7 @@ _mm256_undefined_pd(void)
/// This intrinsic has no corresponding instruction.
///
/// \returns A 256-bit vector of [8 x float] containing undefined values.
-static __inline__ __m256 __DEFAULT_FN_ATTRS
+static __inline__ __m256 __DEFAULT_FN_ATTRS_CONSTEXPR
_mm256_undefined_ps(void)
{
return (__m256)__builtin_ia32_undef256();
diff --git a/clang/lib/Headers/emmintrin.h b/clang/lib/Headers/emmintrin.h
index dbe5ca0379cf5..1701effedc5ce 100644
--- a/clang/lib/Headers/emmintrin.h
+++ b/clang/lib/Headers/emmintrin.h
@@ -1279,7 +1279,7 @@ static __inline__ int __DEFAULT_FN_ATTRS _mm_ucomineq_sd(__m128d __a,
/// A 128-bit vector of [2 x double].
/// \returns A 128-bit vector of [4 x float] whose lower 64 bits contain the
/// converted values. The upper 64 bits are set to zero.
-static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cvtpd_ps(__m128d __a) {
+static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cvtpd_ps(__m128d __a) {
return __builtin_ia32_cvtpd2ps((__v2df)__a);
}
@@ -1384,7 +1384,7 @@ static __inline__ int __DEFAULT_FN_ATTRS _mm_cvtsd_si32(__m128d __a) {
/// \returns A 128-bit vector of [4 x float]. The lower 32 bits contain the
/// converted value from the second parameter. The upper 96 bits are copied
/// from the upper 96 bits of the first parameter.
-static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cvtsd_ss(__m128 __a,
+static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cvtsd_ss(__m128 __a,
__m128d __b) {
return (__m128)__builtin_ia32_cvtsd2ss((__v4sf)__a, (__v2df)__b);
}
diff --git a/clang/test/SemaCXX/constexpr-x86-intrinsics-pd2ps.cpp b/clang/test/SemaCXX/constexpr-x86-intrinsics-pd2ps.cpp
new file mode 100644
index 0000000000000..4a1e9a9c5ae2c
--- /dev/null
+++ b/clang/test/SemaCXX/constexpr-x86-intrinsics-pd2ps.cpp
@@ -0,0 +1,479 @@
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-unknown -target-feature +avx -target-feature +avx512f -target-feature +avx512vl -verify %s
+
+#define __MM_MALLOC_H
+#include <immintrin.h>
+
+namespace Test_mm_cvtsd_ss {
+namespace OK {
+constexpr __m128 a = { 9.0f, 5.0f, 6.0f, 7.0f };
+constexpr __m128d b = { -1.0, 42.0 };
+constexpr __m128 r = _mm_cvtsd_ss(a, b);
+static_assert(r[0] == -1.0f && r[1] == 5.0f && r[2] == 6.0f && r[3] == 7.0f, "");
+}
+namespace Inexact {
+constexpr __m128 a = { 0.0f, 1.0f, 2.0f, 3.0f };
+constexpr __m128d b = { 1.0000000000000002, 0.0 };
+constexpr __m128 r = _mm_cvtsd_ss(a, b);
+// expected-error at -1 {{must be initialized by a constant expression}}
+// expected-note at emmintrin.h:* {{compile time floating point arithmetic suppressed in strict evaluation modes}}
+// expected-note at -3 {{in call to '_mm_cvtsd_ss({0.000000e+00, 1.000000e+00, 2.000000e+00, 3.000000e+00}, {1.000000e+00, 0.000000e+00})'}}
+}
+namespace Inf {
+constexpr __m128 a = { 0.0f, 1.0f, 2.0f, 3.0f };
+constexpr __m128d b = { __builtin_huge_val(), 0.0 };
+constexpr __m128 r = _mm_cvtsd_ss(a, b);
+// expected-error at -1 {{must be initialized by a constant expression}}
+// expected-note at emmintrin.h:* {{floating point arithmetic produces an infinity}}
+// expected-note at -3 {{in call to '_mm_cvtsd_ss({0.000000e+00, 1.000000e+00, 2.000000e+00, 3.000000e+00}, {INF, 0.000000e+00})'}}
+}
+namespace NaN {
+constexpr __m128 a = { 0.0f, 1.0f, 2.0f, 3.0f };
+constexpr __m128d b = { __builtin_nan(""), 0.0 };
+constexpr __m128 r = _mm_cvtsd_ss(a, b);
+// expected-error at -1 {{must be initialized by a constant expression}}
+// expected-note at emmintrin.h:* {{floating point arithmetic produces a NaN}}
+// expected-note at -3 {{in call to '_mm_cvtsd_ss({0.000000e+00, 1.000000e+00, 2.000000e+00, 3.000000e+00}, {nan, 0.000000e+00})'}}
+}
+namespace Subnormal {
+constexpr __m128 a = { 0.0f, 1.0f, 2.0f, 3.0f };
+constexpr __m128d b = { 1e-310, 0.0 };
+constexpr __m128 r = _mm_cvtsd_ss(a, b);
+// expected-error at -1 {{must be initialized by a constant expression}}
+// expected-note at emmintrin.h:* {{compile time floating point arithmetic suppressed in strict evaluation modes}}
+// expected-note at -3 {{in call to '_mm_cvtsd_ss({0.000000e+00, 1.000000e+00, 2.000000e+00, 3.000000e+00}, {1.000000e-310, 0.000000e+00})'}}
+}
+}
+
+namespace Test_mm_mask_cvtsd_ss {
+namespace OK {
+constexpr __m128 src = { 9.0f, 5.0f, 6.0f, 7.0f };
+constexpr __m128 a = { 1.0f, 2.0f, 3.0f, 4.0f };
+constexpr __m128d b = { -1.0, 42.0 };
+constexpr __m128 r = _mm_mask_cvtsd_ss(src, 0x1, a, b);
+static_assert(r[0] == -1.0f && r[1] == 2.0f && r[2] == 3.0f && r[3] == 4.0f, "");
+}
+namespace MaskOff {
+constexpr __m128 src = { 9.0f, 5.0f, 6.0f, 7.0f };
+constexpr __m128 a = { 1.0f, 2.0f, 3.0f, 4.0f };
+constexpr __m128d b = { -1.0, 42.0 };
+constexpr __m128 r = _mm_mask_cvtsd_ss(src, 0x0, a, b);
+static_assert(r[0] == 9.0f && r[1] == 2.0f, "");
+}
+namespace MaskOffInexact {
+constexpr __m128 src = { 9.0f, 5.0f, 6.0f, 7.0f };
+constexpr __m128 a = { 1.0f, 2.0f, 3.0f, 4.0f };
+constexpr __m128d b_inexact = { 1.0000000000000002, 0.0 };
+constexpr __m128 r = _mm_mask_cvtsd_ss(src, 0x0, a, b_inexact);
+static_assert(r[0] == 9.0f, "");
+}
+namespace MaskOnInexact {
+constexpr __m128 src = { 9.0f, 5.0f, 6.0f, 7.0f };
+constexpr __m128 a = { 1.0f, 2.0f, 3.0f, 4.0f };
+constexpr __m128d b_inexact = { 1.0000000000000002, 0.0 };
+constexpr __m128 r = _mm_mask_cvtsd_ss(src, 0x1, a, b_inexact);
+// expected-error at -1 {{must be initialized by a constant expression}}
+// expected-note at ...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/169980
More information about the cfe-commits
mailing list