[clang] [X86][Clang] Add constexpr support for _mm_min_ss/_mm_max_ss/_mm_min_sd/_mm_max_sd/_mm_min_sh/_mm_max_sh intrinsics (PR #178029)
NagaChaitanya Vellanki via cfe-commits
cfe-commits at lists.llvm.org
Thu Feb 5 10:19:31 PST 2026
https://github.com/chaitanyav updated https://github.com/llvm/llvm-project/pull/178029
>From 0a4486131cdb0e59edb8618cba18e79f7004b1b5 Mon Sep 17 00:00:00 2001
From: NagaChaitanya Vellanki <pnagato at protonmail.com>
Date: Mon, 26 Jan 2026 09:38:42 -0800
Subject: [PATCH 1/9] [X86][Clang] Add constexpr support for
_mm_min_ss/_mm_max_ss/_mm_min_sd/_mm_max_sd/_mm_min_sh/_mm_max_sh intrinsics
- Added boolean IsScalar argument to the helper functions in
InterpBuiltin/ExprConstant
- Made minsh_round_mask, maxsh_round_mask constexpr only for
_MM_FROUND_CUR_DIRECTION rounding mode.
- Added helper function for scalar round mask in
InterpBuiltin/ExprConstant
Resolves:#175198
---
clang/include/clang/Basic/BuiltinsX86.td | 22 ++---
clang/lib/AST/ByteCode/InterpBuiltin.cpp | 86 +++++++++++++++++++-
clang/lib/AST/ExprConstant.cpp | 83 ++++++++++++++++++-
clang/lib/Headers/avx512fp16intrin.h | 30 +++----
clang/lib/Headers/emmintrin.h | 8 +-
clang/lib/Headers/xmmintrin.h | 6 +-
clang/test/CodeGen/X86/avx512fp16-builtins.c | 15 ++++
clang/test/CodeGen/X86/sse-builtins.c | 4 +
clang/test/CodeGen/X86/sse2-builtins.c | 4 +
9 files changed, 221 insertions(+), 37 deletions(-)
diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td
index 23eac47eb5e4c..f419613fbed26 100644
--- a/clang/include/clang/Basic/BuiltinsX86.td
+++ b/clang/include/clang/Basic/BuiltinsX86.td
@@ -82,15 +82,6 @@ let Attributes = [Const, NoThrow, RequiredVectorWidth<128>] in {
}
}
- foreach Op = ["min", "max"] in {
- let Features = "sse" in {
- def Op#ss : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>)">;
- }
- let Features = "sse2" in {
- def Op#sd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>)">;
- }
- }
-
let Features = "sse" in {
def cmpps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Constant char)">;
def cmpss : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Constant char)">;
@@ -160,6 +151,8 @@ let Features = "sse", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWid
def shufps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Constant int)">;
def minps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>)">;
def maxps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>)">;
+ def minss : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>)">;
+ def maxss : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>)">;
}
let Features = "sse", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
@@ -180,6 +173,8 @@ let Features = "sse2", Attributes = [NoThrow] in {
let Features = "sse2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
def cvtpd2ps : X86Builtin<"_Vector<4, float>(_Vector<2, double>)">;
def cvtsd2ss : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<2, double>)">;
+ def minsd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>)">;
+ def maxsd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>)">;
}
let Features = "avx512f", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
def cvtsd2ss_round_mask : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<2, double>, _Vector<4, float>, unsigned char, _Constant int)">;
@@ -3403,6 +3398,13 @@ let Features = "avx512fp16", Attributes = [NoThrow, Const, Constexpr, RequiredVe
def minph512 : X86Builtin<"_Vector<32, _Float16>(_Vector<32, _Float16>, _Vector<32, _Float16>, _Constant int)">;
}
+let Features = "avx512fp16", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
+ def maxsh : X86Builtin<"_Vector<8, _Float16>(_Vector<8, _Float16>, _Vector<8, _Float16>)">;
+ def minsh : X86Builtin<"_Vector<8, _Float16>(_Vector<8, _Float16>, _Vector<8, _Float16>)">;
+ def maxsh_round_mask : X86Builtin<"_Vector<8, _Float16>(_Vector<8, _Float16>, _Vector<8, _Float16>, _Vector<8, _Float16>, unsigned char, _Constant int)">;
+ def minsh_round_mask : X86Builtin<"_Vector<8, _Float16>(_Vector<8, _Float16>, _Vector<8, _Float16>, _Vector<8, _Float16>, unsigned char, _Constant int)">;
+}
+
let Features = "avx512fp16,avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
def minph256 : X86Builtin<"_Vector<16, _Float16>(_Vector<16, _Float16>, _Vector<16, _Float16>)">;
def maxph256 : X86Builtin<"_Vector<16, _Float16>(_Vector<16, _Float16>, _Vector<16, _Float16>)">;
@@ -3418,8 +3420,6 @@ let Features = "avx512fp16", Attributes = [NoThrow, Const, RequiredVectorWidth<1
def divsh_round_mask : X86Builtin<"_Vector<8, _Float16>(_Vector<8, _Float16>, _Vector<8, _Float16>, _Vector<8, _Float16>, unsigned char, _Constant int)">;
def mulsh_round_mask : X86Builtin<"_Vector<8, _Float16>(_Vector<8, _Float16>, _Vector<8, _Float16>, _Vector<8, _Float16>, unsigned char, _Constant int)">;
def subsh_round_mask : X86Builtin<"_Vector<8, _Float16>(_Vector<8, _Float16>, _Vector<8, _Float16>, _Vector<8, _Float16>, unsigned char, _Constant int)">;
- def maxsh_round_mask : X86Builtin<"_Vector<8, _Float16>(_Vector<8, _Float16>, _Vector<8, _Float16>, _Vector<8, _Float16>, unsigned char, _Constant int)">;
- def minsh_round_mask : X86Builtin<"_Vector<8, _Float16>(_Vector<8, _Float16>, _Vector<8, _Float16>, _Vector<8, _Float16>, unsigned char, _Constant int)">;
}
let Features = "avx512fp16", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index e47fc43ee8638..f0c8faff07552 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -2510,7 +2510,8 @@ static bool interp__builtin_elementwise_fp_binop(
InterpState &S, CodePtr OpPC, const CallExpr *Call,
llvm::function_ref<std::optional<APFloat>(
const APFloat &, const APFloat &, std::optional<APSInt> RoundingMode)>
- Fn) {
+ Fn,
+ bool IsScalar = false) {
assert((Call->getNumArgs() == 2) || (Call->getNumArgs() == 3));
const auto *VT = Call->getArg(0)->getType()->castAs<VectorType>();
assert(VT->getElementType()->isFloatingType());
@@ -2533,6 +2534,10 @@ static bool interp__builtin_elementwise_fp_binop(
const Pointer &Dst = S.Stk.peek<Pointer>();
for (unsigned ElemIdx = 0; ElemIdx != NumElems; ++ElemIdx) {
using T = PrimConv<PT_Float>::T;
+ if (IsScalar && ElemIdx > 0) {
+ Dst.elem<T>(ElemIdx) = APtr.elem<T>(ElemIdx);
+ continue;
+ }
APFloat ElemA = APtr.elem<T>(ElemIdx).getAPFloat();
APFloat ElemB = BPtr.elem<T>(ElemIdx).getAPFloat();
std::optional<APFloat> Result = Fn(ElemA, ElemB, RoundingMode);
@@ -2546,6 +2551,45 @@ static bool interp__builtin_elementwise_fp_binop(
return true;
}
+static bool interp__builtin_scalar_fp_round_mask_binop(
+ InterpState &S, CodePtr OpPC, const CallExpr *Call,
+ llvm::function_ref<APFloat(const APFloat &, const APFloat &)> Fn) {
+ assert(Call->getNumArgs() == 5);
+ const auto *VT = Call->getArg(0)->getType()->castAs<VectorType>();
+ unsigned NumElems = VT->getNumElements();
+
+ APSInt Rounding = popToAPSInt(S, Call->getArg(4));
+ APSInt MaskVal = popToAPSInt(S, Call->getArg(3));
+ const Pointer &SrcPtr = S.Stk.pop<Pointer>();
+ const Pointer &BPtr = S.Stk.pop<Pointer>();
+ const Pointer &APtr = S.Stk.pop<Pointer>();
+ const Pointer &Dst = S.Stk.peek<Pointer>();
+
+ // Only _MM_FROUND_CUR_DIRECTION (4) is supported.
+ if (Rounding != 4)
+ return false;
+
+ using T = PrimConv<PT_Float>::T;
+
+ if (MaskVal.getZExtValue() & 1) {
+ APFloat ElemA = APtr.elem<T>(0).getAPFloat();
+ APFloat ElemB = BPtr.elem<T>(0).getAPFloat();
+ if (ElemA.isNaN() || ElemA.isInfinity() || ElemA.isDenormal() ||
+ ElemB.isNaN() || ElemB.isInfinity() || ElemB.isDenormal())
+ return false;
+ Dst.elem<T>(0) = static_cast<T>(Fn(ElemA, ElemB));
+ } else {
+ Dst.elem<T>(0) = SrcPtr.elem<T>(0);
+ }
+
+ for (unsigned I = 1; I < NumElems; ++I)
+ Dst.elem<T>(I) = APtr.elem<T>(I);
+
+ Dst.initializeAllElements();
+
+ return true;
+}
+
static bool interp__builtin_elementwise_int_binop(
InterpState &S, CodePtr OpPC, const CallExpr *Call,
llvm::function_ref<APInt(const APSInt &, const APSInt &)> Fn) {
@@ -5879,6 +5923,46 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
return llvm::maximum(A, B);
});
+ case clang::X86::BI__builtin_ia32_minss:
+ case clang::X86::BI__builtin_ia32_minsd:
+ case clang::X86::BI__builtin_ia32_minsh:
+ return interp__builtin_elementwise_fp_binop(
+ S, OpPC, Call,
+ [](const APFloat &A, const APFloat &B, std::optional<APSInt>) {
+ if (A.isZero() && B.isZero())
+ return B;
+ return llvm::minimum(A, B);
+ },
+ /*IsScalar=*/true);
+
+ case clang::X86::BI__builtin_ia32_maxss:
+ case clang::X86::BI__builtin_ia32_maxsd:
+ case clang::X86::BI__builtin_ia32_maxsh:
+ return interp__builtin_elementwise_fp_binop(
+ S, OpPC, Call,
+ [](const APFloat &A, const APFloat &B, std::optional<APSInt>) {
+ if (A.isZero() && B.isZero())
+ return B;
+ return llvm::maximum(A, B);
+ },
+ /*IsScalar=*/true);
+
+ case clang::X86::BI__builtin_ia32_minsh_round_mask:
+ return interp__builtin_scalar_fp_round_mask_binop(
+ S, OpPC, Call, [](const APFloat &A, const APFloat &B) {
+ if (A.isZero() && B.isZero())
+ return B;
+ return llvm::minimum(A, B);
+ });
+
+ case clang::X86::BI__builtin_ia32_maxsh_round_mask:
+ return interp__builtin_scalar_fp_round_mask_binop(
+ S, OpPC, Call, [](const APFloat &A, const APFloat &B) {
+ if (A.isZero() && B.isZero())
+ return B;
+ return llvm::maximum(A, B);
+ });
+
default:
S.FFDiag(S.Current->getLocation(OpPC),
diag::note_invalid_subexpr_in_const_expr)
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index ff433ed729a28..ecc4e4421cfe1 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -12164,7 +12164,8 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
auto EvaluateFpBinOpExpr =
[&](llvm::function_ref<std::optional<APFloat>(
const APFloat &, const APFloat &, std::optional<APSInt>)>
- Fn) {
+ Fn,
+ bool IsScalar = false) {
assert(E->getNumArgs() == 2 || E->getNumArgs() == 3);
APValue A, B;
if (!EvaluateAsRValue(Info, E->getArg(0), A) ||
@@ -12187,6 +12188,10 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
ResultElements.reserve(NumElems);
for (unsigned EltNum = 0; EltNum < NumElems; ++EltNum) {
+ if (IsScalar && EltNum > 0) {
+ ResultElements.push_back(A.getVectorElt(EltNum));
+ continue;
+ }
const APFloat &EltA = A.getVectorElt(EltNum).getFloat();
const APFloat &EltB = B.getVectorElt(EltNum).getFloat();
std::optional<APFloat> Result = Fn(EltA, EltB, RoundingMode);
@@ -12197,6 +12202,44 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
return Success(APValue(ResultElements.data(), NumElems), E);
};
+ auto EvaluateScalarFpRoundMaskBinOp =
+ [&](llvm::function_ref<APFloat(const APFloat &, const APFloat &)> Fn) {
+ assert(E->getNumArgs() == 5);
+ APValue VecA, VecB, VecSrc;
+ APSInt MaskVal, Rounding;
+
+ if (!EvaluateAsRValue(Info, E->getArg(0), VecA) ||
+ !EvaluateAsRValue(Info, E->getArg(1), VecB) ||
+ !EvaluateAsRValue(Info, E->getArg(2), VecSrc) ||
+ !EvaluateInteger(E->getArg(3), MaskVal, Info) ||
+ !EvaluateInteger(E->getArg(4), Rounding, Info))
+ return false;
+
+ // Only _MM_FROUND_CUR_DIRECTION (4) is supported.
+ if (Rounding != 4)
+ return false;
+
+ unsigned NumElems = VecA.getVectorLength();
+ SmallVector<APValue, 8> ResultElements;
+ ResultElements.reserve(NumElems);
+
+ if (MaskVal.getZExtValue() & 1) {
+ const APFloat &EltA = VecA.getVectorElt(0).getFloat();
+ const APFloat &EltB = VecB.getVectorElt(0).getFloat();
+ if (EltA.isNaN() || EltA.isInfinity() || EltA.isDenormal() ||
+ EltB.isNaN() || EltB.isInfinity() || EltB.isDenormal())
+ return false;
+ ResultElements.push_back(APValue(Fn(EltA, EltB)));
+ } else {
+ ResultElements.push_back(VecSrc.getVectorElt(0));
+ }
+
+ for (unsigned I = 1; I < NumElems; ++I)
+ ResultElements.push_back(VecA.getVectorElt(I));
+
+ return Success(APValue(ResultElements.data(), NumElems), E);
+ };
+
auto EvalSelectScalar = [&](unsigned Len) -> bool {
APSInt Mask;
APValue AVal, WVal;
@@ -14300,6 +14343,44 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
return llvm::maximum(A, B);
});
+ case clang::X86::BI__builtin_ia32_minss:
+ case clang::X86::BI__builtin_ia32_minsd:
+ case clang::X86::BI__builtin_ia32_minsh:
+ return EvaluateFpBinOpExpr(
+ [](const APFloat &A, const APFloat &B, std::optional<APSInt>) {
+ if (A.isZero() && B.isZero())
+ return B;
+ return llvm::minimum(A, B);
+ },
+ /*IsScalar=*/true);
+
+ case clang::X86::BI__builtin_ia32_maxss:
+ case clang::X86::BI__builtin_ia32_maxsd:
+ case clang::X86::BI__builtin_ia32_maxsh:
+ return EvaluateFpBinOpExpr(
+ [](const APFloat &A, const APFloat &B, std::optional<APSInt>) {
+ if (A.isZero() && B.isZero())
+ return B;
+ return llvm::maximum(A, B);
+ },
+ /*IsScalar=*/true);
+
+ case clang::X86::BI__builtin_ia32_minsh_round_mask:
+ return EvaluateScalarFpRoundMaskBinOp(
+ [](const APFloat &A, const APFloat &B) {
+ if (A.isZero() && B.isZero())
+ return B;
+ return llvm::minimum(A, B);
+ });
+
+ case clang::X86::BI__builtin_ia32_maxsh_round_mask:
+ return EvaluateScalarFpRoundMaskBinOp(
+ [](const APFloat &A, const APFloat &B) {
+ if (A.isZero() && B.isZero())
+ return B;
+ return llvm::maximum(A, B);
+ });
+
case clang::X86::BI__builtin_ia32_vcvtps2ph:
case clang::X86::BI__builtin_ia32_vcvtps2ph256: {
APValue SrcVec;
diff --git a/clang/lib/Headers/avx512fp16intrin.h b/clang/lib/Headers/avx512fp16intrin.h
index 9a1d1930f66b6..4268104c3b619 100644
--- a/clang/lib/Headers/avx512fp16intrin.h
+++ b/clang/lib/Headers/avx512fp16intrin.h
@@ -720,25 +720,22 @@ _mm_maskz_div_sh(__mmask8 __U, __m128h __A, __m128h __B) {
(__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \
(__mmask8)(U), (int)(R)))
-static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_min_sh(__m128h __A,
- __m128h __B) {
+static __inline__ __m128h
+ __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_min_sh(__m128h __A, __m128h __B) {
return (__m128h)__builtin_ia32_minsh_round_mask(
(__v8hf)__A, (__v8hf)__B, (__v8hf)_mm_setzero_ph(), (__mmask8)-1,
_MM_FROUND_CUR_DIRECTION);
}
-static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_min_sh(__m128h __W,
- __mmask8 __U,
- __m128h __A,
- __m128h __B) {
+static __inline__ __m128h __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_min_sh(__m128h __W, __mmask8 __U, __m128h __A, __m128h __B) {
return (__m128h)__builtin_ia32_minsh_round_mask((__v8hf)__A, (__v8hf)__B,
(__v8hf)__W, (__mmask8)__U,
_MM_FROUND_CUR_DIRECTION);
}
-static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_min_sh(__mmask8 __U,
- __m128h __A,
- __m128h __B) {
+static __inline__ __m128h __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_maskz_min_sh(__mmask8 __U, __m128h __A, __m128h __B) {
return (__m128h)__builtin_ia32_minsh_round_mask(
(__v8hf)__A, (__v8hf)__B, (__v8hf)_mm_setzero_ph(), (__mmask8)__U,
_MM_FROUND_CUR_DIRECTION);
@@ -759,25 +756,22 @@ static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_min_sh(__mmask8 __U,
(__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \
(__mmask8)(U), (int)(R)))
-static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_max_sh(__m128h __A,
- __m128h __B) {
+static __inline__ __m128h
+ __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_max_sh(__m128h __A, __m128h __B) {
return (__m128h)__builtin_ia32_maxsh_round_mask(
(__v8hf)__A, (__v8hf)__B, (__v8hf)_mm_setzero_ph(), (__mmask8)-1,
_MM_FROUND_CUR_DIRECTION);
}
-static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_max_sh(__m128h __W,
- __mmask8 __U,
- __m128h __A,
- __m128h __B) {
+static __inline__ __m128h __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_max_sh(__m128h __W, __mmask8 __U, __m128h __A, __m128h __B) {
return (__m128h)__builtin_ia32_maxsh_round_mask((__v8hf)__A, (__v8hf)__B,
(__v8hf)__W, (__mmask8)__U,
_MM_FROUND_CUR_DIRECTION);
}
-static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_max_sh(__mmask8 __U,
- __m128h __A,
- __m128h __B) {
+static __inline__ __m128h __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_maskz_max_sh(__mmask8 __U, __m128h __A, __m128h __B) {
return (__m128h)__builtin_ia32_maxsh_round_mask(
(__v8hf)__A, (__v8hf)__B, (__v8hf)_mm_setzero_ph(), (__mmask8)__U,
_MM_FROUND_CUR_DIRECTION);
diff --git a/clang/lib/Headers/emmintrin.h b/clang/lib/Headers/emmintrin.h
index 61b35e97314fd..bbf366133c68a 100644
--- a/clang/lib/Headers/emmintrin.h
+++ b/clang/lib/Headers/emmintrin.h
@@ -279,8 +279,8 @@ static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_sqrt_pd(__m128d __a) {
/// \returns A 128-bit vector of [2 x double] whose lower 64 bits contain the
/// minimum value between both operands. The upper 64 bits are copied from
/// the upper 64 bits of the first source operand.
-static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_min_sd(__m128d __a,
- __m128d __b) {
+static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_min_sd(__m128d __a,
+ __m128d __b) {
return __builtin_ia32_minsd((__v2df)__a, (__v2df)__b);
}
@@ -325,8 +325,8 @@ static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_min_pd(__m128d __a,
/// \returns A 128-bit vector of [2 x double] whose lower 64 bits contain the
/// maximum value between both operands. The upper 64 bits are copied from
/// the upper 64 bits of the first source operand.
-static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_max_sd(__m128d __a,
- __m128d __b) {
+static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_max_sd(__m128d __a,
+ __m128d __b) {
return __builtin_ia32_maxsd((__v2df)__a, (__v2df)__b);
}
diff --git a/clang/lib/Headers/xmmintrin.h b/clang/lib/Headers/xmmintrin.h
index ab0f0c1690759..efc0e6ce47e7d 100644
--- a/clang/lib/Headers/xmmintrin.h
+++ b/clang/lib/Headers/xmmintrin.h
@@ -341,7 +341,8 @@ _mm_rsqrt_ps(__m128 __a)
/// \returns A 128-bit vector of [4 x float] whose lower 32 bits contain the
/// minimum value between both operands. The upper 96 bits are copied from
/// the upper 96 bits of the first source operand.
-static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_min_ss(__m128 __a, __m128 __b) {
+static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_min_ss(__m128 __a,
+ __m128 __b) {
return __builtin_ia32_minss((__v4sf)__a, (__v4sf)__b);
}
@@ -384,7 +385,8 @@ static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_min_ps(__m128 __a,
/// \returns A 128-bit vector of [4 x float] whose lower 32 bits contain the
/// maximum value between both operands. The upper 96 bits are copied from
/// the upper 96 bits of the first source operand.
-static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_max_ss(__m128 __a, __m128 __b) {
+static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_max_ss(__m128 __a,
+ __m128 __b) {
return __builtin_ia32_maxss((__v4sf)__a, (__v4sf)__b);
}
diff --git a/clang/test/CodeGen/X86/avx512fp16-builtins.c b/clang/test/CodeGen/X86/avx512fp16-builtins.c
index dbd24d0899c60..840ada8f30bcd 100644
--- a/clang/test/CodeGen/X86/avx512fp16-builtins.c
+++ b/clang/test/CodeGen/X86/avx512fp16-builtins.c
@@ -1037,17 +1037,24 @@ __m128h test_mm_mask_min_sh(__m128h __W, __mmask8 __U, __m128h __A, __m128h __B)
// CHECK: @llvm.x86.avx512fp16.mask.min.sh.round
return _mm_mask_min_sh(__W, __U, __A, __B);
}
+TEST_CONSTEXPR(match_m128h(_mm_mask_min_sh((__m128h)(__v8hf){1.0f,2.0f,3.0f,4.0f,5.0f,6.0f,7.0f,8.0f},(__mmask8)0x01,(__m128h)(__v8hf){10.0f,20.0f,30.0f,40.0f,50.0f,60.0f,70.0f,80.0f},(__m128h)(__v8hf){100.0f,200.0f,300.0f,400.0f,500.0f,600.0f,700.0f,800.0f}),10.0f,20.0f,30.0f,40.0f,50.0f,60.0f,70.0f,80.0f));
+TEST_CONSTEXPR(match_m128h(_mm_mask_min_sh((__m128h)(__v8hf){1.0f,2.0f,3.0f,4.0f,5.0f,6.0f,7.0f,8.0f},(__mmask8)0x00,(__m128h)(__v8hf){10.0f,20.0f,30.0f,40.0f,50.0f,60.0f,70.0f,80.0f},(__m128h)(__v8hf){100.0f,200.0f,300.0f,400.0f,500.0f,600.0f,700.0f,800.0f}),1.0f,20.0f,30.0f,40.0f,50.0f,60.0f,70.0f,80.0f));
+
__m128h test_mm_maskz_min_sh(__mmask8 __U, __m128h __A, __m128h __B) {
// CHECK-LABEL: test_mm_maskz_min_sh
// CHECK: @llvm.x86.avx512fp16.mask.min.sh.round
return _mm_maskz_min_sh(__U, __A, __B);
}
+TEST_CONSTEXPR(match_m128h(_mm_maskz_min_sh((__mmask8)0x01,(__m128h)(__v8hf){10.0f,20.0f,30.0f,40.0f,50.0f,60.0f,70.0f,80.0f},(__m128h)(__v8hf){100.0f,200.0f,300.0f,400.0f,500.0f,600.0f,700.0f,800.0f}),10.0f,20.0f,30.0f,40.0f,50.0f,60.0f,70.0f,80.0f));
+TEST_CONSTEXPR(match_m128h(_mm_maskz_min_sh((__mmask8)0x00,(__m128h)(__v8hf){10.0f,20.0f,30.0f,40.0f,50.0f,60.0f,70.0f,80.0f},(__m128h)(__v8hf){100.0f,200.0f,300.0f,400.0f,500.0f,600.0f,700.0f,800.0f}),0.0f,20.0f,30.0f,40.0f,50.0f,60.0f,70.0f,80.0f));
__m128h test_mm_min_sh(__m128h __A, __m128h __B) {
// CHECK-LABEL: test_mm_min_sh
// CHECK: @llvm.x86.avx512fp16.mask.min.sh.round
return _mm_min_sh(__A, __B);
}
+TEST_CONSTEXPR(match_m128h(_mm_min_sh((__m128h)(__v8hf){10.0f,20.0f,30.0f,40.0f,50.0f,60.0f,70.0f,80.0f},(__m128h)(__v8hf){100.0f,200.0f,300.0f,400.0f,500.0f,600.0f,700.0f,800.0f}),10.0f,20.0f,30.0f,40.0f,50.0f,60.0f,70.0f,80.0f));
+TEST_CONSTEXPR(match_m128h(_mm_min_sh((__m128h)(__v8hf){+0.0f,+0.0f,+0.0f,+0.0f,+0.0f,+0.0f,+0.0f,+0.0f},(__m128h)(__v8hf){-0.0f,-0.0f,-0.0f,-0.0f,-0.0f,-0.0f,-0.0f,-0.0f}),-0.0f,+0.0f,+0.0f,+0.0f,+0.0f,+0.0f,+0.0f,+0.0f));
__m128h test_mm_max_round_sh(__m128h __A, __m128h __B) {
// CHECK-LABEL: test_mm_max_round_sh
@@ -1069,17 +1076,25 @@ __m128h test_mm_mask_max_sh(__m128h __W, __mmask8 __U, __m128h __A, __m128h __B)
// CHECK: @llvm.x86.avx512fp16.mask.max.sh.round
return _mm_mask_max_sh(__W, __U, __A, __B);
}
+TEST_CONSTEXPR(match_m128h(_mm_mask_max_sh((__m128h)(__v8hf){1.0f,2.0f,3.0f,4.0f,5.0f,6.0f,7.0f,8.0f},(__mmask8)0x01,(__m128h)(__v8hf){10.0f,20.0f,30.0f,40.0f,50.0f,60.0f,70.0f,80.0f},(__m128h)(__v8hf){100.0f,200.0f,300.0f,400.0f,500.0f,600.0f,700.0f,800.0f}),100.0f,20.0f,30.0f,40.0f,50.0f,60.0f,70.0f,80.0f));
+TEST_CONSTEXPR(match_m128h(_mm_mask_max_sh((__m128h)(__v8hf){1.0f,2.0f,3.0f,4.0f,5.0f,6.0f,7.0f,8.0f},(__mmask8)0x00,(__m128h)(__v8hf){10.0f,20.0f,30.0f,40.0f,50.0f,60.0f,70.0f,80.0f},(__m128h)(__v8hf){100.0f,200.0f,300.0f,400.0f,500.0f,600.0f,700.0f,800.0f}),1.0f,20.0f,30.0f,40.0f,50.0f,60.0f,70.0f,80.0f));
+
__m128h test_mm_maskz_max_sh(__mmask8 __U, __m128h __A, __m128h __B) {
// CHECK-LABEL: test_mm_maskz_max_sh
// CHECK: @llvm.x86.avx512fp16.mask.max.sh.round
return _mm_maskz_max_sh(__U, __A, __B);
}
+TEST_CONSTEXPR(match_m128h(_mm_maskz_max_sh((__mmask8)0x01,(__m128h)(__v8hf){10.0f,20.0f,30.0f,40.0f,50.0f,60.0f,70.0f,80.0f},(__m128h)(__v8hf){100.0f,200.0f,300.0f,400.0f,500.0f,600.0f,700.0f,800.0f}),100.0f,20.0f,30.0f,40.0f,50.0f,60.0f,70.0f,80.0f));
+TEST_CONSTEXPR(match_m128h(_mm_maskz_max_sh((__mmask8)0x00,(__m128h)(__v8hf){10.0f,20.0f,30.0f,40.0f,50.0f,60.0f,70.0f,80.0f},(__m128h)(__v8hf){100.0f,200.0f,300.0f,400.0f,500.0f,600.0f,700.0f,800.0f}),0.0f,20.0f,30.0f,40.0f,50.0f,60.0f,70.0f,80.0f));
__m128h test_mm_max_sh(__m128h __A, __m128h __B) {
// CHECK-LABEL: test_mm_max_sh
// CHECK: @llvm.x86.avx512fp16.mask.max.sh.round
return _mm_max_sh(__A, __B);
}
+TEST_CONSTEXPR(match_m128h(_mm_max_sh((__m128h)(__v8hf){10.0f,20.0f,30.0f,40.0f,50.0f,60.0f,70.0f,80.0f},(__m128h)(__v8hf){100.0f,200.0f,300.0f,400.0f,500.0f,600.0f,700.0f,800.0f}),100.0f,20.0f,30.0f,40.0f,50.0f,60.0f,70.0f,80.0f));
+TEST_CONSTEXPR(match_m128h(_mm_max_sh((__m128h)(__v8hf){+0.0f,+0.0f,+0.0f,+0.0f,+0.0f,+0.0f,+0.0f,+0.0f},(__m128h)(__v8hf){-0.0f,-0.0f,-0.0f,-0.0f,-0.0f,-0.0f,-0.0f,-0.0f}),-0.0f,+0.0f,+0.0f,+0.0f,+0.0f,+0.0f,+0.0f,+0.0f));
+
__mmask32 test_mm512_cmp_round_ph_mask(__m512h a, __m512h b) {
// CHECK-LABEL: test_mm512_cmp_round_ph_mask
// CHECK: fcmp oeq <32 x half> %{{.*}}, %{{.*}}
diff --git a/clang/test/CodeGen/X86/sse-builtins.c b/clang/test/CodeGen/X86/sse-builtins.c
index edd9f00bae2b2..87b9a99a0e058 100644
--- a/clang/test/CodeGen/X86/sse-builtins.c
+++ b/clang/test/CodeGen/X86/sse-builtins.c
@@ -524,6 +524,8 @@ __m128 test_mm_max_ss(__m128 A, __m128 B) {
// CHECK: @llvm.x86.sse.max.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}})
return _mm_max_ss(A, B);
}
+TEST_CONSTEXPR(match_m128(_mm_max_ss((__m128){+1.0f, +2.0f, +3.0f, +4.0f}, (__m128){+4.0f, +3.0f, +2.0f, +1.0f}), +4.0f, +2.0f, +3.0f, +4.0f));
+TEST_CONSTEXPR(match_m128(_mm_max_ss((__m128){+0.0f, -0.0f, +0.0f, -0.0f}, (__m128){-0.0f, +0.0f, -0.0f, +0.0f}), -0.0f, -0.0f, +0.0f, -0.0f));
__m128 test_mm_min_ps(__m128 A, __m128 B) {
// CHECK-LABEL: test_mm_min_ps
@@ -539,6 +541,8 @@ __m128 test_mm_min_ss(__m128 A, __m128 B) {
// CHECK: @llvm.x86.sse.min.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}})
return _mm_min_ss(A, B);
}
+TEST_CONSTEXPR(match_m128(_mm_min_ss((__m128){+1.0f, +2.0f, +3.0f, +4.0f}, (__m128){+4.0f, +3.0f, +2.0f, +1.0f}), +1.0f, +2.0f, +3.0f, +4.0f));
+TEST_CONSTEXPR(match_m128(_mm_min_ss((__m128){+0.0f, -0.0f, +0.0f, -0.0f}, (__m128){-0.0f, +0.0f, -0.0f, +0.0f}), -0.0f, -0.0f, +0.0f, -0.0f));
__m128 test_mm_move_ss(__m128 A, __m128 B) {
// CHECK-LABEL: test_mm_move_ss
diff --git a/clang/test/CodeGen/X86/sse2-builtins.c b/clang/test/CodeGen/X86/sse2-builtins.c
index ab0a857b926f3..2993b8bb719d6 100644
--- a/clang/test/CodeGen/X86/sse2-builtins.c
+++ b/clang/test/CodeGen/X86/sse2-builtins.c
@@ -894,6 +894,8 @@ __m128d test_mm_max_sd(__m128d A, __m128d B) {
// CHECK: call {{.*}}<2 x double> @llvm.x86.sse2.max.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
return _mm_max_sd(A, B);
}
+TEST_CONSTEXPR(match_m128d(_mm_max_sd((__m128d){+1.0, +2.0}, (__m128d){+4.0, +1.0}), +4.0, +2.0));
+TEST_CONSTEXPR(match_m128d(_mm_max_sd((__m128d){+0.0, -0.0}, (__m128d){-0.0, +0.0}), -0.0, -0.0));
void test_mm_mfence(void) {
// CHECK-LABEL: test_mm_mfence
@@ -931,6 +933,8 @@ __m128d test_mm_min_sd(__m128d A, __m128d B) {
// CHECK: call {{.*}}<2 x double> @llvm.x86.sse2.min.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
return _mm_min_sd(A, B);
}
+TEST_CONSTEXPR(match_m128d(_mm_min_sd((__m128d){+1.0, +2.0}, (__m128d){+4.0, +1.0}), +1.0, +2.0));
+TEST_CONSTEXPR(match_m128d(_mm_min_sd((__m128d){+0.0, -0.0}, (__m128d){-0.0, +0.0}), -0.0, -0.0));
__m64 test_mm_movepi64_pi64(__m128i A) {
// CHECK-LABEL: test_mm_movepi64_pi64
>From e09405168546da74ac5479b8c4e0137a09609f0f Mon Sep 17 00:00:00 2001
From: NagaChaitanya Vellanki <pnagato at protonmail.com>
Date: Tue, 27 Jan 2026 08:51:38 -0800
Subject: [PATCH 2/9] Group min/max cases together for readability
---
clang/lib/AST/ByteCode/InterpBuiltin.cpp | 40 ++++++++++++------------
clang/lib/AST/ExprConstant.cpp | 38 +++++++++++-----------
2 files changed, 39 insertions(+), 39 deletions(-)
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index f0c8faff07552..cdf7108a7c48f 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -5902,6 +5902,26 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
return llvm::minimum(A, B);
});
+ case clang::X86::BI__builtin_ia32_minss:
+ case clang::X86::BI__builtin_ia32_minsd:
+ case clang::X86::BI__builtin_ia32_minsh:
+ return interp__builtin_elementwise_fp_binop(
+ S, OpPC, Call,
+ [](const APFloat &A, const APFloat &B, std::optional<APSInt>) {
+ if (A.isZero() && B.isZero())
+ return B;
+ return llvm::minimum(A, B);
+ },
+ /*IsScalar=*/true);
+
+ case clang::X86::BI__builtin_ia32_minsh_round_mask:
+ return interp__builtin_scalar_fp_round_mask_binop(
+ S, OpPC, Call, [](const APFloat &A, const APFloat &B) {
+ if (A.isZero() && B.isZero())
+ return B;
+ return llvm::minimum(A, B);
+ });
+
case clang::X86::BI__builtin_ia32_maxps:
case clang::X86::BI__builtin_ia32_maxpd:
case clang::X86::BI__builtin_ia32_maxph128:
@@ -5923,18 +5943,6 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
return llvm::maximum(A, B);
});
- case clang::X86::BI__builtin_ia32_minss:
- case clang::X86::BI__builtin_ia32_minsd:
- case clang::X86::BI__builtin_ia32_minsh:
- return interp__builtin_elementwise_fp_binop(
- S, OpPC, Call,
- [](const APFloat &A, const APFloat &B, std::optional<APSInt>) {
- if (A.isZero() && B.isZero())
- return B;
- return llvm::minimum(A, B);
- },
- /*IsScalar=*/true);
-
case clang::X86::BI__builtin_ia32_maxss:
case clang::X86::BI__builtin_ia32_maxsd:
case clang::X86::BI__builtin_ia32_maxsh:
@@ -5947,14 +5955,6 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
},
/*IsScalar=*/true);
- case clang::X86::BI__builtin_ia32_minsh_round_mask:
- return interp__builtin_scalar_fp_round_mask_binop(
- S, OpPC, Call, [](const APFloat &A, const APFloat &B) {
- if (A.isZero() && B.isZero())
- return B;
- return llvm::minimum(A, B);
- });
-
case clang::X86::BI__builtin_ia32_maxsh_round_mask:
return interp__builtin_scalar_fp_round_mask_binop(
S, OpPC, Call, [](const APFloat &A, const APFloat &B) {
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index ecc4e4421cfe1..ec59afdb33d32 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -14323,6 +14323,25 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
return llvm::minimum(A, B);
});
+ case clang::X86::BI__builtin_ia32_minss:
+ case clang::X86::BI__builtin_ia32_minsd:
+ case clang::X86::BI__builtin_ia32_minsh:
+ return EvaluateFpBinOpExpr(
+ [](const APFloat &A, const APFloat &B, std::optional<APSInt>) {
+ if (A.isZero() && B.isZero())
+ return B;
+ return llvm::minimum(A, B);
+ },
+ /*IsScalar=*/true);
+
+ case clang::X86::BI__builtin_ia32_minsh_round_mask:
+ return EvaluateScalarFpRoundMaskBinOp(
+ [](const APFloat &A, const APFloat &B) {
+ if (A.isZero() && B.isZero())
+ return B;
+ return llvm::minimum(A, B);
+ });
+
case clang::X86::BI__builtin_ia32_maxps:
case clang::X86::BI__builtin_ia32_maxpd:
case clang::X86::BI__builtin_ia32_maxps256:
@@ -14343,17 +14362,6 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
return llvm::maximum(A, B);
});
- case clang::X86::BI__builtin_ia32_minss:
- case clang::X86::BI__builtin_ia32_minsd:
- case clang::X86::BI__builtin_ia32_minsh:
- return EvaluateFpBinOpExpr(
- [](const APFloat &A, const APFloat &B, std::optional<APSInt>) {
- if (A.isZero() && B.isZero())
- return B;
- return llvm::minimum(A, B);
- },
- /*IsScalar=*/true);
-
case clang::X86::BI__builtin_ia32_maxss:
case clang::X86::BI__builtin_ia32_maxsd:
case clang::X86::BI__builtin_ia32_maxsh:
@@ -14365,14 +14373,6 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
},
/*IsScalar=*/true);
- case clang::X86::BI__builtin_ia32_minsh_round_mask:
- return EvaluateScalarFpRoundMaskBinOp(
- [](const APFloat &A, const APFloat &B) {
- if (A.isZero() && B.isZero())
- return B;
- return llvm::minimum(A, B);
- });
-
case clang::X86::BI__builtin_ia32_maxsh_round_mask:
return EvaluateScalarFpRoundMaskBinOp(
[](const APFloat &A, const APFloat &B) {
>From 20af3380403ed0e8c3bbb8ca2da66bd9b8f458aa Mon Sep 17 00:00:00 2001
From: NagaChaitanya Vellanki <pnagato at protonmail.com>
Date: Wed, 28 Jan 2026 08:55:32 -0800
Subject: [PATCH 3/9] Address code review comments
- Move RoundingMode into the callback
---
clang/lib/AST/ByteCode/InterpBuiltin.cpp | 51 +++++++++++++++++-------
clang/lib/AST/ExprConstant.cpp | 42 +++++++++++++------
2 files changed, 66 insertions(+), 27 deletions(-)
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index cdf7108a7c48f..bec94c3827f18 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -2553,31 +2553,30 @@ static bool interp__builtin_elementwise_fp_binop(
static bool interp__builtin_scalar_fp_round_mask_binop(
InterpState &S, CodePtr OpPC, const CallExpr *Call,
- llvm::function_ref<APFloat(const APFloat &, const APFloat &)> Fn) {
+ llvm::function_ref<std::optional<APFloat>(const APFloat &, const APFloat &,
+ std::optional<APSInt>)>
+ Fn) {
assert(Call->getNumArgs() == 5);
const auto *VT = Call->getArg(0)->getType()->castAs<VectorType>();
unsigned NumElems = VT->getNumElements();
- APSInt Rounding = popToAPSInt(S, Call->getArg(4));
- APSInt MaskVal = popToAPSInt(S, Call->getArg(3));
+ uint64_t Rounding = popToUInt64(S, Call->getArg(4));
+ uint64_t MaskVal = popToUInt64(S, Call->getArg(3));
const Pointer &SrcPtr = S.Stk.pop<Pointer>();
const Pointer &BPtr = S.Stk.pop<Pointer>();
const Pointer &APtr = S.Stk.pop<Pointer>();
const Pointer &Dst = S.Stk.peek<Pointer>();
- // Only _MM_FROUND_CUR_DIRECTION (4) is supported.
- if (Rounding != 4)
- return false;
-
using T = PrimConv<PT_Float>::T;
- if (MaskVal.getZExtValue() & 1) {
+ if (MaskVal & 1) {
APFloat ElemA = APtr.elem<T>(0).getAPFloat();
APFloat ElemB = BPtr.elem<T>(0).getAPFloat();
- if (ElemA.isNaN() || ElemA.isInfinity() || ElemA.isDenormal() ||
- ElemB.isNaN() || ElemB.isInfinity() || ElemB.isDenormal())
+ APSInt RoundingMode(APInt(32, Rounding), /*isUnsigned=*/true);
+ std::optional<APFloat> Result = Fn(ElemA, ElemB, RoundingMode);
+ if (!Result)
return false;
- Dst.elem<T>(0) = static_cast<T>(Fn(ElemA, ElemB));
+ Dst.elem<T>(0) = static_cast<T>(*Result);
} else {
Dst.elem<T>(0) = SrcPtr.elem<T>(0);
}
@@ -5907,7 +5906,11 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
case clang::X86::BI__builtin_ia32_minsh:
return interp__builtin_elementwise_fp_binop(
S, OpPC, Call,
- [](const APFloat &A, const APFloat &B, std::optional<APSInt>) {
+ [](const APFloat &A, const APFloat &B,
+ std::optional<APSInt>) -> std::optional<APFloat> {
+ if (A.isNaN() || A.isInfinity() || A.isDenormal() || B.isNaN() ||
+ B.isInfinity() || B.isDenormal())
+ return std::nullopt;
if (A.isZero() && B.isZero())
return B;
return llvm::minimum(A, B);
@@ -5916,7 +5919,14 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
case clang::X86::BI__builtin_ia32_minsh_round_mask:
return interp__builtin_scalar_fp_round_mask_binop(
- S, OpPC, Call, [](const APFloat &A, const APFloat &B) {
+ S, OpPC, Call,
+ [](const APFloat &A, const APFloat &B,
+ std::optional<APSInt> RoundingMode) -> std::optional<APFloat> {
+ if (!RoundingMode || *RoundingMode != 4)
+ return std::nullopt;
+ if (A.isNaN() || A.isInfinity() || A.isDenormal() || B.isNaN() ||
+ B.isInfinity() || B.isDenormal())
+ return std::nullopt;
if (A.isZero() && B.isZero())
return B;
return llvm::minimum(A, B);
@@ -5948,7 +5958,11 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
case clang::X86::BI__builtin_ia32_maxsh:
return interp__builtin_elementwise_fp_binop(
S, OpPC, Call,
- [](const APFloat &A, const APFloat &B, std::optional<APSInt>) {
+ [](const APFloat &A, const APFloat &B,
+ std::optional<APSInt>) -> std::optional<APFloat> {
+ if (A.isNaN() || A.isInfinity() || A.isDenormal() || B.isNaN() ||
+ B.isInfinity() || B.isDenormal())
+ return std::nullopt;
if (A.isZero() && B.isZero())
return B;
return llvm::maximum(A, B);
@@ -5957,7 +5971,14 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
case clang::X86::BI__builtin_ia32_maxsh_round_mask:
return interp__builtin_scalar_fp_round_mask_binop(
- S, OpPC, Call, [](const APFloat &A, const APFloat &B) {
+ S, OpPC, Call,
+ [](const APFloat &A, const APFloat &B,
+ std::optional<APSInt> RoundingMode) -> std::optional<APFloat> {
+ if (!RoundingMode || *RoundingMode != 4)
+ return std::nullopt;
+ if (A.isNaN() || A.isInfinity() || A.isDenormal() || B.isNaN() ||
+ B.isInfinity() || B.isDenormal())
+ return std::nullopt;
if (A.isZero() && B.isZero())
return B;
return llvm::maximum(A, B);
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index ec59afdb33d32..2cf14f77127fa 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -12203,7 +12203,9 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
};
auto EvaluateScalarFpRoundMaskBinOp =
- [&](llvm::function_ref<APFloat(const APFloat &, const APFloat &)> Fn) {
+ [&](llvm::function_ref<std::optional<APFloat>(
+ const APFloat &, const APFloat &, std::optional<APSInt>)>
+ Fn) {
assert(E->getNumArgs() == 5);
APValue VecA, VecB, VecSrc;
APSInt MaskVal, Rounding;
@@ -12215,10 +12217,6 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
!EvaluateInteger(E->getArg(4), Rounding, Info))
return false;
- // Only _MM_FROUND_CUR_DIRECTION (4) is supported.
- if (Rounding != 4)
- return false;
-
unsigned NumElems = VecA.getVectorLength();
SmallVector<APValue, 8> ResultElements;
ResultElements.reserve(NumElems);
@@ -12226,10 +12224,10 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
if (MaskVal.getZExtValue() & 1) {
const APFloat &EltA = VecA.getVectorElt(0).getFloat();
const APFloat &EltB = VecB.getVectorElt(0).getFloat();
- if (EltA.isNaN() || EltA.isInfinity() || EltA.isDenormal() ||
- EltB.isNaN() || EltB.isInfinity() || EltB.isDenormal())
+ std::optional<APFloat> Result = Fn(EltA, EltB, Rounding);
+ if (!Result)
return false;
- ResultElements.push_back(APValue(Fn(EltA, EltB)));
+ ResultElements.push_back(APValue(*Result));
} else {
ResultElements.push_back(VecSrc.getVectorElt(0));
}
@@ -14327,7 +14325,11 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
case clang::X86::BI__builtin_ia32_minsd:
case clang::X86::BI__builtin_ia32_minsh:
return EvaluateFpBinOpExpr(
- [](const APFloat &A, const APFloat &B, std::optional<APSInt>) {
+ [](const APFloat &A, const APFloat &B,
+ std::optional<APSInt>) -> std::optional<APFloat> {
+ if (A.isNaN() || A.isInfinity() || A.isDenormal() || B.isNaN() ||
+ B.isInfinity() || B.isDenormal())
+ return std::nullopt;
if (A.isZero() && B.isZero())
return B;
return llvm::minimum(A, B);
@@ -14336,7 +14338,13 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
case clang::X86::BI__builtin_ia32_minsh_round_mask:
return EvaluateScalarFpRoundMaskBinOp(
- [](const APFloat &A, const APFloat &B) {
+ [](const APFloat &A, const APFloat &B,
+ std::optional<APSInt> RoundingMode) -> std::optional<APFloat> {
+ if (!RoundingMode || *RoundingMode != 4)
+ return std::nullopt;
+ if (A.isNaN() || A.isInfinity() || A.isDenormal() || B.isNaN() ||
+ B.isInfinity() || B.isDenormal())
+ return std::nullopt;
if (A.isZero() && B.isZero())
return B;
return llvm::minimum(A, B);
@@ -14366,7 +14374,11 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
case clang::X86::BI__builtin_ia32_maxsd:
case clang::X86::BI__builtin_ia32_maxsh:
return EvaluateFpBinOpExpr(
- [](const APFloat &A, const APFloat &B, std::optional<APSInt>) {
+ [](const APFloat &A, const APFloat &B,
+ std::optional<APSInt>) -> std::optional<APFloat> {
+ if (A.isNaN() || A.isInfinity() || A.isDenormal() || B.isNaN() ||
+ B.isInfinity() || B.isDenormal())
+ return std::nullopt;
if (A.isZero() && B.isZero())
return B;
return llvm::maximum(A, B);
@@ -14375,7 +14387,13 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
case clang::X86::BI__builtin_ia32_maxsh_round_mask:
return EvaluateScalarFpRoundMaskBinOp(
- [](const APFloat &A, const APFloat &B) {
+ [](const APFloat &A, const APFloat &B,
+ std::optional<APSInt> RoundingMode) -> std::optional<APFloat> {
+ if (!RoundingMode || *RoundingMode != 4)
+ return std::nullopt;
+ if (A.isNaN() || A.isInfinity() || A.isDenormal() || B.isNaN() ||
+ B.isInfinity() || B.isDenormal())
+ return std::nullopt;
if (A.isZero() && B.isZero())
return B;
return llvm::maximum(A, B);
>From 2ae68f627115eab4b896aed11fbdb5aca46f509c Mon Sep 17 00:00:00 2001
From: NagaChaitanya Vellanki <pnagato at protonmail.com>
Date: Wed, 28 Jan 2026 09:48:02 -0800
Subject: [PATCH 4/9] Add tests for minsh/maxsh_round_mask
- Validate the rounding modes
- Special values NaN, Denormal, Infinity
---
.../constexpr-x86-avx512fp16-builtins.cpp | 113 ++++++++++++++++++
1 file changed, 113 insertions(+)
create mode 100644 clang/test/SemaCXX/constexpr-x86-avx512fp16-builtins.cpp
diff --git a/clang/test/SemaCXX/constexpr-x86-avx512fp16-builtins.cpp b/clang/test/SemaCXX/constexpr-x86-avx512fp16-builtins.cpp
new file mode 100644
index 0000000000000..c815f2ad063b4
--- /dev/null
+++ b/clang/test/SemaCXX/constexpr-x86-avx512fp16-builtins.cpp
@@ -0,0 +1,113 @@
+// RUN: %clang_cc1 -std=c++20 -ffreestanding -fexperimental-new-constant-interpreter -triple x86_64-unknown-unknown -target-feature +avx512fp16 -verify %s
+
+#include <immintrin.h>
+#include "../CodeGen/X86/builtin_test_helpers.h"
+
+constexpr int ROUND_CUR_DIRECTION = 4;
+constexpr int ROUND_NO_EXC = 8;
+constexpr int ROUND_CUR_DIRECTION_NO_EXC = 12;
+
+namespace Test_mm_min_sh_round_mask_invalid_rounding {
+constexpr __m128h a = (__m128h)(__v8hf){10.0f, 20.0f, 30.0f, 40.0f, 50.0f, 60.0f, 70.0f, 80.0f};
+constexpr __m128h b = (__m128h)(__v8hf){100.0f, 200.0f, 300.0f, 400.0f, 500.0f, 600.0f, 700.0f, 800.0f};
+constexpr __m128h src = (__m128h)(__v8hf){1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f};
+constexpr __m128h result = (__m128h)__builtin_ia32_minsh_round_mask((__v8hf)a, (__v8hf)b, (__v8hf)src, 1, ROUND_NO_EXC); // expected-error {{must be initialized by a constant expression}}
+}
+
+namespace Test_mm_max_sh_round_mask_invalid_rounding_8 {
+constexpr __m128h a = (__m128h)(__v8hf){10.0f, 20.0f, 30.0f, 40.0f, 50.0f, 60.0f, 70.0f, 80.0f};
+constexpr __m128h b = (__m128h)(__v8hf){100.0f, 200.0f, 300.0f, 400.0f, 500.0f, 600.0f, 700.0f, 800.0f};
+constexpr __m128h src = (__m128h)(__v8hf){1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f};
+constexpr __m128h result = (__m128h)__builtin_ia32_maxsh_round_mask((__v8hf)a, (__v8hf)b, (__v8hf)src, 1, ROUND_NO_EXC); // expected-error {{must be initialized by a constant expression}}
+}
+
+namespace Test_mm_max_sh_round_mask_invalid_rounding_12 {
+constexpr __m128h a = (__m128h)(__v8hf){10.0f, 20.0f, 30.0f, 40.0f, 50.0f, 60.0f, 70.0f, 80.0f};
+constexpr __m128h b = (__m128h)(__v8hf){100.0f, 200.0f, 300.0f, 400.0f, 500.0f, 600.0f, 700.0f, 800.0f};
+constexpr __m128h src = (__m128h)(__v8hf){1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f};
+constexpr __m128h result = (__m128h)__builtin_ia32_maxsh_round_mask((__v8hf)a, (__v8hf)b, (__v8hf)src, 1, ROUND_CUR_DIRECTION_NO_EXC); // expected-error {{must be initialized by a constant expression}}
+}
+
+namespace Test_mm_min_sh_round_mask_valid_rounding {
+constexpr __m128h a = (__m128h)(__v8hf){10.0f, 20.0f, 30.0f, 40.0f, 50.0f, 60.0f, 70.0f, 80.0f};
+constexpr __m128h b = (__m128h)(__v8hf){100.0f, 200.0f, 300.0f, 400.0f, 500.0f, 600.0f, 700.0f, 800.0f};
+constexpr __m128h src = (__m128h)(__v8hf){1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f};
+constexpr __m128h result = (__m128h)__builtin_ia32_minsh_round_mask((__v8hf)a, (__v8hf)b, (__v8hf)src, 1, ROUND_CUR_DIRECTION);
+static_assert(match_m128h(result, 10.0f, 20.0f, 30.0f, 40.0f, 50.0f, 60.0f, 70.0f, 80.0f));
+}
+
+namespace Test_mm_max_sh_round_mask_valid_rounding {
+constexpr __m128h a = (__m128h)(__v8hf){10.0f, 20.0f, 30.0f, 40.0f, 50.0f, 60.0f, 70.0f, 80.0f};
+constexpr __m128h b = (__m128h)(__v8hf){100.0f, 200.0f, 300.0f, 400.0f, 500.0f, 600.0f, 700.0f, 800.0f};
+constexpr __m128h src = (__m128h)(__v8hf){1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f};
+constexpr __m128h result = (__m128h)__builtin_ia32_maxsh_round_mask((__v8hf)a, (__v8hf)b, (__v8hf)src, 1, ROUND_CUR_DIRECTION);
+static_assert(match_m128h(result, 100.0f, 20.0f, 30.0f, 40.0f, 50.0f, 60.0f, 70.0f, 80.0f));
+}
+
+namespace Test_mm_min_sh_round_mask_nan {
+constexpr __m128h a = (__m128h)(__v8hf){__builtin_nanf16(""), 20.0f, 30.0f, 40.0f, 50.0f, 60.0f, 70.0f, 80.0f};
+constexpr __m128h b = (__m128h)(__v8hf){100.0f, 200.0f, 300.0f, 400.0f, 500.0f, 600.0f, 700.0f, 800.0f};
+constexpr __m128h src = (__m128h)(__v8hf){1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f};
+constexpr __m128h result = (__m128h)__builtin_ia32_minsh_round_mask((__v8hf)a, (__v8hf)b, (__v8hf)src, 1, ROUND_CUR_DIRECTION); // expected-error {{must be initialized by a constant expression}}
+}
+
+namespace Test_mm_min_sh_round_mask_pos_infinity {
+constexpr __m128h a = (__m128h)(__v8hf){__builtin_inff16(), 20.0f, 30.0f, 40.0f, 50.0f, 60.0f, 70.0f, 80.0f};
+constexpr __m128h b = (__m128h)(__v8hf){100.0f, 200.0f, 300.0f, 400.0f, 500.0f, 600.0f, 700.0f, 800.0f};
+constexpr __m128h src = (__m128h)(__v8hf){1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f};
+constexpr __m128h result = (__m128h)__builtin_ia32_minsh_round_mask((__v8hf)a, (__v8hf)b, (__v8hf)src, 1, ROUND_CUR_DIRECTION); // expected-error {{must be initialized by a constant expression}}
+}
+
+namespace Test_mm_min_sh_round_mask_neg_infinity {
+constexpr __m128h a = (__m128h)(__v8hf){-__builtin_inff16(), 20.0f, 30.0f, 40.0f, 50.0f, 60.0f, 70.0f, 80.0f};
+constexpr __m128h b = (__m128h)(__v8hf){100.0f, 200.0f, 300.0f, 400.0f, 500.0f, 600.0f, 700.0f, 800.0f};
+constexpr __m128h src = (__m128h)(__v8hf){1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f};
+constexpr __m128h result = (__m128h)__builtin_ia32_minsh_round_mask((__v8hf)a, (__v8hf)b, (__v8hf)src, 1, ROUND_CUR_DIRECTION); // expected-error {{must be initialized by a constant expression}}
+}
+
+namespace Test_mm_min_sh_round_mask_denormal {
+constexpr _Float16 denormal = 0x1.0p-15f16;
+constexpr __m128h a = (__m128h)(__v8hf){denormal, 20.0f, 30.0f, 40.0f, 50.0f, 60.0f, 70.0f, 80.0f};
+constexpr __m128h b = (__m128h)(__v8hf){100.0f, 200.0f, 300.0f, 400.0f, 500.0f, 600.0f, 700.0f, 800.0f};
+constexpr __m128h src = (__m128h)(__v8hf){1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f};
+constexpr __m128h result = (__m128h)__builtin_ia32_minsh_round_mask((__v8hf)a, (__v8hf)b, (__v8hf)src, 1, ROUND_CUR_DIRECTION); // expected-error {{must be initialized by a constant expression}}
+}
+
+namespace Test_mm_min_sh_valid {
+constexpr __m128h a = (__m128h)(__v8hf){10.0f, 20.0f, 30.0f, 40.0f, 50.0f, 60.0f, 70.0f, 80.0f};
+constexpr __m128h b = (__m128h)(__v8hf){100.0f, 200.0f, 300.0f, 400.0f, 500.0f, 600.0f, 700.0f, 800.0f};
+constexpr __m128h result = _mm_min_sh(a, b);
+static_assert(match_m128h(result, 10.0f, 20.0f, 30.0f, 40.0f, 50.0f, 60.0f, 70.0f, 80.0f));
+}
+
+namespace Test_mm_max_sh_valid {
+constexpr __m128h a = (__m128h)(__v8hf){10.0f, 20.0f, 30.0f, 40.0f, 50.0f, 60.0f, 70.0f, 80.0f};
+constexpr __m128h b = (__m128h)(__v8hf){100.0f, 200.0f, 300.0f, 400.0f, 500.0f, 600.0f, 700.0f, 800.0f};
+constexpr __m128h result = _mm_max_sh(a, b);
+static_assert(match_m128h(result, 100.0f, 20.0f, 30.0f, 40.0f, 50.0f, 60.0f, 70.0f, 80.0f));
+}
+
+namespace Test_mm_min_sh_nan {
+constexpr __m128h a = (__m128h)(__v8hf){__builtin_nanf16(""), 20.0f, 30.0f, 40.0f, 50.0f, 60.0f, 70.0f, 80.0f};
+constexpr __m128h b = (__m128h)(__v8hf){100.0f, 200.0f, 300.0f, 400.0f, 500.0f, 600.0f, 700.0f, 800.0f};
+constexpr __m128h result = _mm_min_sh(a, b); // expected-error {{must be initialized by a constant expression}}
+}
+
+namespace Test_mm_min_sh_pos_infinity {
+constexpr __m128h a = (__m128h)(__v8hf){__builtin_inff16(), 20.0f, 30.0f, 40.0f, 50.0f, 60.0f, 70.0f, 80.0f};
+constexpr __m128h b = (__m128h)(__v8hf){100.0f, 200.0f, 300.0f, 400.0f, 500.0f, 600.0f, 700.0f, 800.0f};
+constexpr __m128h result = _mm_min_sh(a, b); // expected-error {{must be initialized by a constant expression}}
+}
+
+namespace Test_mm_min_sh_neg_infinity {
+constexpr __m128h a = (__m128h)(__v8hf){-__builtin_inff16(), 20.0f, 30.0f, 40.0f, 50.0f, 60.0f, 70.0f, 80.0f};
+constexpr __m128h b = (__m128h)(__v8hf){100.0f, 200.0f, 300.0f, 400.0f, 500.0f, 600.0f, 700.0f, 800.0f};
+constexpr __m128h result = _mm_min_sh(a, b); // expected-error {{must be initialized by a constant expression}}
+}
+
+namespace Test_mm_min_sh_denormal {
+constexpr _Float16 denormal = 0x1.0p-15f16;
+constexpr __m128h a = (__m128h)(__v8hf){denormal, 20.0f, 30.0f, 40.0f, 50.0f, 60.0f, 70.0f, 80.0f};
+constexpr __m128h b = (__m128h)(__v8hf){100.0f, 200.0f, 300.0f, 400.0f, 500.0f, 600.0f, 700.0f, 800.0f};
+constexpr __m128h result = _mm_min_sh(a, b); // expected-error {{must be initialized by a constant expression}}
+}
>From 71761146951c2a323551d78d7a7f435317e04a4d Mon Sep 17 00:00:00 2001
From: NagaChaitanya Vellanki <pnagato at protonmail.com>
Date: Thu, 29 Jan 2026 08:54:19 -0800
Subject: [PATCH 5/9] Set Rounding Mode to 4(_MM_FROUND_CUR_DIRECTION) by
default if not specified
---
clang/lib/AST/ByteCode/InterpBuiltin.cpp | 24 ++++++++++++++++++++----
clang/lib/AST/ExprConstant.cpp | 24 ++++++++++++++++++++----
2 files changed, 40 insertions(+), 8 deletions(-)
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index bec94c3827f18..445d3e0bbc6b7 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -5907,7 +5907,12 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
return interp__builtin_elementwise_fp_binop(
S, OpPC, Call,
[](const APFloat &A, const APFloat &B,
- std::optional<APSInt>) -> std::optional<APFloat> {
+ std::optional<APSInt> RoundingMode) -> std::optional<APFloat> {
+ // Default to _MM_FROUND_CUR_DIRECTION (4) if no rounding mode
+ // specified
+ APSInt DefaultMode(APInt(32, 4), /*isUnsigned=*/true);
+ if (RoundingMode.value_or(DefaultMode) != 4)
+ return std::nullopt;
if (A.isNaN() || A.isInfinity() || A.isDenormal() || B.isNaN() ||
B.isInfinity() || B.isDenormal())
return std::nullopt;
@@ -5922,7 +5927,10 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
S, OpPC, Call,
[](const APFloat &A, const APFloat &B,
std::optional<APSInt> RoundingMode) -> std::optional<APFloat> {
- if (!RoundingMode || *RoundingMode != 4)
+ // Default to _MM_FROUND_CUR_DIRECTION (4) if no rounding mode
+ // specified
+ APSInt DefaultMode(APInt(32, 4), /*isUnsigned=*/true);
+ if (RoundingMode.value_or(DefaultMode) != 4)
return std::nullopt;
if (A.isNaN() || A.isInfinity() || A.isDenormal() || B.isNaN() ||
B.isInfinity() || B.isDenormal())
@@ -5959,7 +5967,12 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
return interp__builtin_elementwise_fp_binop(
S, OpPC, Call,
[](const APFloat &A, const APFloat &B,
- std::optional<APSInt>) -> std::optional<APFloat> {
+ std::optional<APSInt> RoundingMode) -> std::optional<APFloat> {
+ // Default to _MM_FROUND_CUR_DIRECTION (4) if no rounding mode
+ // specified
+ APSInt DefaultMode(APInt(32, 4), /*isUnsigned=*/true);
+ if (RoundingMode.value_or(DefaultMode) != 4)
+ return std::nullopt;
if (A.isNaN() || A.isInfinity() || A.isDenormal() || B.isNaN() ||
B.isInfinity() || B.isDenormal())
return std::nullopt;
@@ -5974,7 +5987,10 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
S, OpPC, Call,
[](const APFloat &A, const APFloat &B,
std::optional<APSInt> RoundingMode) -> std::optional<APFloat> {
- if (!RoundingMode || *RoundingMode != 4)
+ // Default to _MM_FROUND_CUR_DIRECTION (4) if no rounding mode
+ // specified
+ APSInt DefaultMode(APInt(32, 4), /*isUnsigned=*/true);
+ if (RoundingMode.value_or(DefaultMode) != 4)
return std::nullopt;
if (A.isNaN() || A.isInfinity() || A.isDenormal() || B.isNaN() ||
B.isInfinity() || B.isDenormal())
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 2cf14f77127fa..4889e690d3139 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -14326,7 +14326,12 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
case clang::X86::BI__builtin_ia32_minsh:
return EvaluateFpBinOpExpr(
[](const APFloat &A, const APFloat &B,
- std::optional<APSInt>) -> std::optional<APFloat> {
+ std::optional<APSInt> RoundingMode) -> std::optional<APFloat> {
+ // Default to _MM_FROUND_CUR_DIRECTION (4) if no rounding mode
+ // specified
+ APSInt DefaultMode(APInt(32, 4), /*isUnsigned=*/true);
+ if (RoundingMode.value_or(DefaultMode) != 4)
+ return std::nullopt;
if (A.isNaN() || A.isInfinity() || A.isDenormal() || B.isNaN() ||
B.isInfinity() || B.isDenormal())
return std::nullopt;
@@ -14340,7 +14345,10 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
return EvaluateScalarFpRoundMaskBinOp(
[](const APFloat &A, const APFloat &B,
std::optional<APSInt> RoundingMode) -> std::optional<APFloat> {
- if (!RoundingMode || *RoundingMode != 4)
+ // Default to _MM_FROUND_CUR_DIRECTION (4) if no rounding mode
+ // specified
+ APSInt DefaultMode(APInt(32, 4), /*isUnsigned=*/true);
+ if (RoundingMode.value_or(DefaultMode) != 4)
return std::nullopt;
if (A.isNaN() || A.isInfinity() || A.isDenormal() || B.isNaN() ||
B.isInfinity() || B.isDenormal())
@@ -14375,7 +14383,12 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
case clang::X86::BI__builtin_ia32_maxsh:
return EvaluateFpBinOpExpr(
[](const APFloat &A, const APFloat &B,
- std::optional<APSInt>) -> std::optional<APFloat> {
+ std::optional<APSInt> RoundingMode) -> std::optional<APFloat> {
+ // Default to _MM_FROUND_CUR_DIRECTION (4) if no rounding mode
+ // specified
+ APSInt DefaultMode(APInt(32, 4), /*isUnsigned=*/true);
+ if (RoundingMode.value_or(DefaultMode) != 4)
+ return std::nullopt;
if (A.isNaN() || A.isInfinity() || A.isDenormal() || B.isNaN() ||
B.isInfinity() || B.isDenormal())
return std::nullopt;
@@ -14389,7 +14402,10 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
return EvaluateScalarFpRoundMaskBinOp(
[](const APFloat &A, const APFloat &B,
std::optional<APSInt> RoundingMode) -> std::optional<APFloat> {
- if (!RoundingMode || *RoundingMode != 4)
+ // Default to _MM_FROUND_CUR_DIRECTION (4) if no rounding mode
+ // specified
+ APSInt DefaultMode(APInt(32, 4), /*isUnsigned=*/true);
+ if (RoundingMode.value_or(DefaultMode) != 4)
return std::nullopt;
if (A.isNaN() || A.isInfinity() || A.isDenormal() || B.isNaN() ||
B.isInfinity() || B.isDenormal())
>From 8260dbf79767ee96cdd66112c424b01a8be94529 Mon Sep 17 00:00:00 2001
From: NagaChaitanya Vellanki <pnagato at protonmail.com>
Date: Thu, 29 Jan 2026 11:52:21 -0800
Subject: [PATCH 6/9] Refactor: combine minsh_round_mask and maxsh_round_mask
---
clang/lib/AST/ByteCode/InterpBuiltin.cpp | 27 ++++++------------------
clang/lib/AST/ExprConstant.cpp | 27 ++++++------------------
2 files changed, 13 insertions(+), 41 deletions(-)
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index 445d3e0bbc6b7..8ebea8ca2849c 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -5923,10 +5923,12 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
/*IsScalar=*/true);
case clang::X86::BI__builtin_ia32_minsh_round_mask:
+ case clang::X86::BI__builtin_ia32_maxsh_round_mask: {
+ bool IsMin = BuiltinID == clang::X86::BI__builtin_ia32_minsh_round_mask;
return interp__builtin_scalar_fp_round_mask_binop(
S, OpPC, Call,
- [](const APFloat &A, const APFloat &B,
- std::optional<APSInt> RoundingMode) -> std::optional<APFloat> {
+ [IsMin](const APFloat &A, const APFloat &B,
+ std::optional<APSInt> RoundingMode) -> std::optional<APFloat> {
// Default to _MM_FROUND_CUR_DIRECTION (4) if no rounding mode
// specified
APSInt DefaultMode(APInt(32, 4), /*isUnsigned=*/true);
@@ -5937,8 +5939,9 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
return std::nullopt;
if (A.isZero() && B.isZero())
return B;
- return llvm::minimum(A, B);
+ return IsMin ? llvm::minimum(A, B) : llvm::maximum(A, B);
});
+ }
case clang::X86::BI__builtin_ia32_maxps:
case clang::X86::BI__builtin_ia32_maxpd:
@@ -5982,24 +5985,6 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
},
/*IsScalar=*/true);
- case clang::X86::BI__builtin_ia32_maxsh_round_mask:
- return interp__builtin_scalar_fp_round_mask_binop(
- S, OpPC, Call,
- [](const APFloat &A, const APFloat &B,
- std::optional<APSInt> RoundingMode) -> std::optional<APFloat> {
- // Default to _MM_FROUND_CUR_DIRECTION (4) if no rounding mode
- // specified
- APSInt DefaultMode(APInt(32, 4), /*isUnsigned=*/true);
- if (RoundingMode.value_or(DefaultMode) != 4)
- return std::nullopt;
- if (A.isNaN() || A.isInfinity() || A.isDenormal() || B.isNaN() ||
- B.isInfinity() || B.isDenormal())
- return std::nullopt;
- if (A.isZero() && B.isZero())
- return B;
- return llvm::maximum(A, B);
- });
-
default:
S.FFDiag(S.Current->getLocation(OpPC),
diag::note_invalid_subexpr_in_const_expr)
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 4889e690d3139..c9dc36b7a9a52 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -14342,9 +14342,12 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
/*IsScalar=*/true);
case clang::X86::BI__builtin_ia32_minsh_round_mask:
+ case clang::X86::BI__builtin_ia32_maxsh_round_mask: {
+ bool IsMin =
+ E->getBuiltinCallee() == clang::X86::BI__builtin_ia32_minsh_round_mask;
return EvaluateScalarFpRoundMaskBinOp(
- [](const APFloat &A, const APFloat &B,
- std::optional<APSInt> RoundingMode) -> std::optional<APFloat> {
+ [IsMin](const APFloat &A, const APFloat &B,
+ std::optional<APSInt> RoundingMode) -> std::optional<APFloat> {
// Default to _MM_FROUND_CUR_DIRECTION (4) if no rounding mode
// specified
APSInt DefaultMode(APInt(32, 4), /*isUnsigned=*/true);
@@ -14355,8 +14358,9 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
return std::nullopt;
if (A.isZero() && B.isZero())
return B;
- return llvm::minimum(A, B);
+ return IsMin ? llvm::minimum(A, B) : llvm::maximum(A, B);
});
+ }
case clang::X86::BI__builtin_ia32_maxps:
case clang::X86::BI__builtin_ia32_maxpd:
@@ -14398,23 +14402,6 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
},
/*IsScalar=*/true);
- case clang::X86::BI__builtin_ia32_maxsh_round_mask:
- return EvaluateScalarFpRoundMaskBinOp(
- [](const APFloat &A, const APFloat &B,
- std::optional<APSInt> RoundingMode) -> std::optional<APFloat> {
- // Default to _MM_FROUND_CUR_DIRECTION (4) if no rounding mode
- // specified
- APSInt DefaultMode(APInt(32, 4), /*isUnsigned=*/true);
- if (RoundingMode.value_or(DefaultMode) != 4)
- return std::nullopt;
- if (A.isNaN() || A.isInfinity() || A.isDenormal() || B.isNaN() ||
- B.isInfinity() || B.isDenormal())
- return std::nullopt;
- if (A.isZero() && B.isZero())
- return B;
- return llvm::maximum(A, B);
- });
-
case clang::X86::BI__builtin_ia32_vcvtps2ph:
case clang::X86::BI__builtin_ia32_vcvtps2ph256: {
APValue SrcVec;
>From 4ebbf142954d2e4de879831f17a9db5c1bcab59c Mon Sep 17 00:00:00 2001
From: NagaChaitanya Vellanki <pnagato at protonmail.com>
Date: Fri, 30 Jan 2026 10:21:53 -0800
Subject: [PATCH 7/9] Add constexpr support for AVX512f minss_round_mask,
maxss_round_mask
---
clang/include/clang/Basic/BuiltinsX86.td | 11 ++-
clang/lib/AST/ByteCode/InterpBuiltin.cpp | 8 +-
clang/lib/AST/ExprConstant.cpp | 8 ++
clang/lib/Headers/avx512fintrin.h | 44 +++------
clang/test/CodeGen/X86/avx512f-builtins.c | 31 ++++--
.../constexpr-x86-avx512f-builtins.cpp | 99 +++++++++++++++++++
6 files changed, 160 insertions(+), 41 deletions(-)
diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td
index f419613fbed26..b2d37f6997991 100644
--- a/clang/include/clang/Basic/BuiltinsX86.td
+++ b/clang/include/clang/Basic/BuiltinsX86.td
@@ -1347,19 +1347,22 @@ let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<512>
def subps512 : X86Builtin<"_Vector<16, float>(_Vector<16, float>, _Vector<16, float>, _Constant int)">;
}
+let Features = "avx512f", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
+ def maxss_round_mask : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Vector<4, float>, unsigned char, _Constant int)">;
+ def minss_round_mask : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Vector<4, float>, unsigned char, _Constant int)">;
+ def maxsd_round_mask : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>, _Vector<2, double>, unsigned char, _Constant int)">;
+ def minsd_round_mask : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>, _Vector<2, double>, unsigned char, _Constant int)">;
+}
+
let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
def addss_round_mask : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Vector<4, float>, unsigned char, _Constant int)">;
def divss_round_mask : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Vector<4, float>, unsigned char, _Constant int)">;
def mulss_round_mask : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Vector<4, float>, unsigned char, _Constant int)">;
def subss_round_mask : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Vector<4, float>, unsigned char, _Constant int)">;
- def maxss_round_mask : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Vector<4, float>, unsigned char, _Constant int)">;
- def minss_round_mask : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Vector<4, float>, unsigned char, _Constant int)">;
def addsd_round_mask : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>, _Vector<2, double>, unsigned char, _Constant int)">;
def divsd_round_mask : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>, _Vector<2, double>, unsigned char, _Constant int)">;
def mulsd_round_mask : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>, _Vector<2, double>, unsigned char, _Constant int)">;
def subsd_round_mask : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>, _Vector<2, double>, unsigned char, _Constant int)">;
- def maxsd_round_mask : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>, _Vector<2, double>, unsigned char, _Constant int)">;
- def minsd_round_mask : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>, _Vector<2, double>, unsigned char, _Constant int)">;
}
let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index 8ebea8ca2849c..17836d369f22a 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -5922,9 +5922,15 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
},
/*IsScalar=*/true);
+ case clang::X86::BI__builtin_ia32_minsd_round_mask:
+ case clang::X86::BI__builtin_ia32_minss_round_mask:
case clang::X86::BI__builtin_ia32_minsh_round_mask:
+ case clang::X86::BI__builtin_ia32_maxsd_round_mask:
+ case clang::X86::BI__builtin_ia32_maxss_round_mask:
case clang::X86::BI__builtin_ia32_maxsh_round_mask: {
- bool IsMin = BuiltinID == clang::X86::BI__builtin_ia32_minsh_round_mask;
+ bool IsMin = BuiltinID == clang::X86::BI__builtin_ia32_minsd_round_mask ||
+ BuiltinID == clang::X86::BI__builtin_ia32_minss_round_mask ||
+ BuiltinID == clang::X86::BI__builtin_ia32_minsh_round_mask;
return interp__builtin_scalar_fp_round_mask_binop(
S, OpPC, Call,
[IsMin](const APFloat &A, const APFloat &B,
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index c9dc36b7a9a52..4f51e9ed207a1 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -14341,9 +14341,17 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
},
/*IsScalar=*/true);
+ case clang::X86::BI__builtin_ia32_minsd_round_mask:
+ case clang::X86::BI__builtin_ia32_minss_round_mask:
case clang::X86::BI__builtin_ia32_minsh_round_mask:
+ case clang::X86::BI__builtin_ia32_maxsd_round_mask:
+ case clang::X86::BI__builtin_ia32_maxss_round_mask:
case clang::X86::BI__builtin_ia32_maxsh_round_mask: {
bool IsMin =
+ E->getBuiltinCallee() ==
+ clang::X86::BI__builtin_ia32_minsd_round_mask ||
+ E->getBuiltinCallee() ==
+ clang::X86::BI__builtin_ia32_minss_round_mask ||
E->getBuiltinCallee() == clang::X86::BI__builtin_ia32_minsh_round_mask;
return EvaluateScalarFpRoundMaskBinOp(
[IsMin](const APFloat &A, const APFloat &B,
diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h
index 942ed72686740..51f8369296e53 100644
--- a/clang/lib/Headers/avx512fintrin.h
+++ b/clang/lib/Headers/avx512fintrin.h
@@ -986,10 +986,8 @@ _mm512_maskz_max_ps(__mmask16 __U, __m512 __A, __m512 __B) {
(__v16sf)_mm512_setzero_ps());
}
-static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_max_ss(__m128 __W,
- __mmask8 __U,
- __m128 __A,
- __m128 __B) {
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_max_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
return (__m128) __builtin_ia32_maxss_round_mask ((__v4sf) __A,
(__v4sf) __B,
(__v4sf) __W,
@@ -997,9 +995,8 @@ static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_max_ss(__m128 __W,
_MM_FROUND_CUR_DIRECTION);
}
-static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_max_ss(__mmask8 __U,
- __m128 __A,
- __m128 __B) {
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_maskz_max_ss(__mmask8 __U, __m128 __A, __m128 __B) {
return (__m128) __builtin_ia32_maxss_round_mask ((__v4sf) __A,
(__v4sf) __B,
(__v4sf) _mm_setzero_ps (),
@@ -1025,10 +1022,8 @@ static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_max_ss(__mmask8 __U,
(__v4sf)_mm_setzero_ps(), \
(__mmask8)(U), (int)(R)))
-static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_max_sd(__m128d __W,
- __mmask8 __U,
- __m128d __A,
- __m128d __B) {
+static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_max_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
return (__m128d) __builtin_ia32_maxsd_round_mask ((__v2df) __A,
(__v2df) __B,
(__v2df) __W,
@@ -1036,9 +1031,8 @@ static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_max_sd(__m128d __W,
_MM_FROUND_CUR_DIRECTION);
}
-static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_max_sd(__mmask8 __U,
- __m128d __A,
- __m128d __B) {
+static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_maskz_max_sd(__mmask8 __U, __m128d __A, __m128d __B) {
return (__m128d) __builtin_ia32_maxsd_round_mask ((__v2df) __A,
(__v2df) __B,
(__v2df) _mm_setzero_pd (),
@@ -1208,10 +1202,8 @@ _mm512_maskz_min_ps(__mmask16 __U, __m512 __A, __m512 __B) {
(__v16sf)_mm512_setzero_ps());
}
-static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_min_ss(__m128 __W,
- __mmask8 __U,
- __m128 __A,
- __m128 __B) {
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_min_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
return (__m128) __builtin_ia32_minss_round_mask ((__v4sf) __A,
(__v4sf) __B,
(__v4sf) __W,
@@ -1219,9 +1211,8 @@ static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_min_ss(__m128 __W,
_MM_FROUND_CUR_DIRECTION);
}
-static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_min_ss(__mmask8 __U,
- __m128 __A,
- __m128 __B) {
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_maskz_min_ss(__mmask8 __U, __m128 __A, __m128 __B) {
return (__m128) __builtin_ia32_minss_round_mask ((__v4sf) __A,
(__v4sf) __B,
(__v4sf) _mm_setzero_ps (),
@@ -1247,10 +1238,8 @@ static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_min_ss(__mmask8 __U,
(__v4sf)_mm_setzero_ps(), \
(__mmask8)(U), (int)(R)))
-static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_min_sd(__m128d __W,
- __mmask8 __U,
- __m128d __A,
- __m128d __B) {
+static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_min_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
return (__m128d) __builtin_ia32_minsd_round_mask ((__v2df) __A,
(__v2df) __B,
(__v2df) __W,
@@ -1258,9 +1247,8 @@ static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_min_sd(__m128d __W,
_MM_FROUND_CUR_DIRECTION);
}
-static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_min_sd(__mmask8 __U,
- __m128d __A,
- __m128d __B) {
+static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_maskz_min_sd(__mmask8 __U, __m128d __A, __m128d __B) {
return (__m128d) __builtin_ia32_minsd_round_mask ((__v2df) __A,
(__v2df) __B,
(__v2df) _mm_setzero_pd (),
diff --git a/clang/test/CodeGen/X86/avx512f-builtins.c b/clang/test/CodeGen/X86/avx512f-builtins.c
index 9366f847b3b71..e67e21daaa329 100644
--- a/clang/test/CodeGen/X86/avx512f-builtins.c
+++ b/clang/test/CodeGen/X86/avx512f-builtins.c
@@ -3965,13 +3965,17 @@ __m128 test_mm_maskz_max_round_ss(__mmask8 __U, __m128 __A, __m128 __B) {
__m128 test_mm_mask_max_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
// CHECK-LABEL: test_mm_mask_max_ss
// CHECK: @llvm.x86.avx512.mask.max.ss.round
- return _mm_mask_max_ss(__W,__U,__A,__B);
+ return _mm_mask_max_ss(__W,__U,__A,__B);
}
+TEST_CONSTEXPR(match_m128(_mm_mask_max_ss((__m128)(__v4sf){1.0f, 2.0f, 3.0f, 4.0f}, 0x1, (__m128)(__v4sf){10.0f, 20.0f, 30.0f, 40.0f}, (__m128)(__v4sf){100.0f, 200.0f, 300.0f, 400.0f}), 100.0f, 20.0f, 30.0f, 40.0f));
+
__m128 test_mm_maskz_max_ss(__mmask8 __U, __m128 __A, __m128 __B) {
// CHECK-LABEL: test_mm_maskz_max_ss
// CHECK: @llvm.x86.avx512.mask.max.ss.round
- return _mm_maskz_max_ss(__U,__A,__B);
+ return _mm_maskz_max_ss(__U,__A,__B);
}
+TEST_CONSTEXPR(match_m128(_mm_maskz_max_ss(0x1, (__m128)(__v4sf){10.0f, 20.0f, 30.0f, 40.0f}, (__m128)(__v4sf){100.0f, 200.0f, 300.0f, 400.0f}), 100.0f, 20.0f, 30.0f, 40.0f));
+
__m128d test_mm_max_round_sd(__m128d __A, __m128d __B) {
// CHECK-LABEL: test_mm_max_round_sd
// CHECK: @llvm.x86.avx512.mask.max.sd.round
@@ -3990,13 +3994,17 @@ __m128d test_mm_maskz_max_round_sd(__mmask8 __U, __m128d __A, __m128d __B) {
__m128d test_mm_mask_max_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
// CHECK-LABEL: test_mm_mask_max_sd
// CHECK: @llvm.x86.avx512.mask.max.sd.round
- return _mm_mask_max_sd(__W,__U,__A,__B);
+ return _mm_mask_max_sd(__W,__U,__A,__B);
}
+TEST_CONSTEXPR(match_m128d(_mm_mask_max_sd((__m128d)(__v2df){1.0, 2.0}, 0x1, (__m128d)(__v2df){10.0, 20.0}, (__m128d)(__v2df){100.0, 200.0}), 100.0, 20.0));
+
__m128d test_mm_maskz_max_sd(__mmask8 __U, __m128d __A, __m128d __B) {
// CHECK-LABEL: test_mm_maskz_max_sd
// CHECK: @llvm.x86.avx512.mask.max.sd.round
- return _mm_maskz_max_sd(__U,__A,__B);
+ return _mm_maskz_max_sd(__U,__A,__B);
}
+TEST_CONSTEXPR(match_m128d(_mm_maskz_max_sd(0x1, (__m128d)(__v2df){10.0, 20.0}, (__m128d)(__v2df){100.0, 200.0}), 100.0, 20.0));
+
__m128 test_mm_min_round_ss(__m128 __A, __m128 __B) {
// CHECK-LABEL: test_mm_min_round_ss
// CHECK: @llvm.x86.avx512.mask.min.ss.round
@@ -4015,13 +4023,17 @@ __m128 test_mm_maskz_min_round_ss(__mmask8 __U, __m128 __A, __m128 __B) {
__m128 test_mm_mask_min_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
// CHECK-LABEL: test_mm_mask_min_ss
// CHECK: @llvm.x86.avx512.mask.min.ss.round
- return _mm_mask_min_ss(__W,__U,__A,__B);
+ return _mm_mask_min_ss(__W,__U,__A,__B);
}
+TEST_CONSTEXPR(match_m128(_mm_mask_min_ss((__m128)(__v4sf){1.0f, 2.0f, 3.0f, 4.0f}, 0x1, (__m128)(__v4sf){10.0f, 20.0f, 30.0f, 40.0f}, (__m128)(__v4sf){100.0f, 200.0f, 300.0f, 400.0f}), 10.0f, 20.0f, 30.0f, 40.0f));
+
__m128 test_mm_maskz_min_ss(__mmask8 __U, __m128 __A, __m128 __B) {
// CHECK-LABEL: test_mm_maskz_min_ss
// CHECK: @llvm.x86.avx512.mask.min.ss.round
- return _mm_maskz_min_ss(__U,__A,__B);
+ return _mm_maskz_min_ss(__U,__A,__B);
}
+TEST_CONSTEXPR(match_m128(_mm_maskz_min_ss(0x1, (__m128)(__v4sf){10.0f, 20.0f, 30.0f, 40.0f}, (__m128)(__v4sf){100.0f, 200.0f, 300.0f, 400.0f}), 10.0f, 20.0f, 30.0f, 40.0f));
+
__m128d test_mm_min_round_sd(__m128d __A, __m128d __B) {
// CHECK-LABEL: test_mm_min_round_sd
// CHECK: @llvm.x86.avx512.mask.min.sd.round
@@ -4040,13 +4052,16 @@ __m128d test_mm_maskz_min_round_sd(__mmask8 __U, __m128d __A, __m128d __B) {
__m128d test_mm_mask_min_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
// CHECK-LABEL: test_mm_mask_min_sd
// CHECK: @llvm.x86.avx512.mask.min.sd.round
- return _mm_mask_min_sd(__W,__U,__A,__B);
+ return _mm_mask_min_sd(__W,__U,__A,__B);
}
+TEST_CONSTEXPR(match_m128d(_mm_mask_min_sd((__m128d)(__v2df){1.0, 2.0}, 0x1, (__m128d)(__v2df){10.0, 20.0}, (__m128d)(__v2df){100.0, 200.0}), 10.0, 20.0));
+
__m128d test_mm_maskz_min_sd(__mmask8 __U, __m128d __A, __m128d __B) {
// CHECK-LABEL: test_mm_maskz_min_sd
// CHECK: @llvm.x86.avx512.mask.min.sd.round
- return _mm_maskz_min_sd(__U,__A,__B);
+ return _mm_maskz_min_sd(__U,__A,__B);
}
+TEST_CONSTEXPR(match_m128d(_mm_maskz_min_sd(0x1, (__m128d)(__v2df){10.0, 20.0}, (__m128d)(__v2df){100.0, 200.0}), 10.0, 20.0));
__m512 test_mm512_undefined(void) {
// CHECK-LABEL: test_mm512_undefined
diff --git a/clang/test/SemaCXX/constexpr-x86-avx512f-builtins.cpp b/clang/test/SemaCXX/constexpr-x86-avx512f-builtins.cpp
index 0d2a82cbbb83c..6c0ccb06e3008 100644
--- a/clang/test/SemaCXX/constexpr-x86-avx512f-builtins.cpp
+++ b/clang/test/SemaCXX/constexpr-x86-avx512f-builtins.cpp
@@ -228,3 +228,102 @@ constexpr __m512 r = _mm512_mask_cvtpd_pslo(src, 0x4, a_nan);
// expected-note at -4 {{in call to '_mm512_mask_cvtpd_pslo({9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00, 9.000000e+00}, 4, {-1.000000e+00, 2.000000e+00, nan, 8.000000e+00, 1.600000e+01, 3.200000e+01, 6.400000e+01, 1.280000e+02})'}}
}
}
+
+constexpr int ROUND_CUR_DIRECTION = 4;
+constexpr int ROUND_NO_EXC = 8;
+
+namespace Test_mm_mask_min_ss_valid {
+constexpr __m128 result = _mm_mask_min_ss((__m128)(__v4sf){1.0f, 2.0f, 3.0f, 4.0f}, 1, (__m128)(__v4sf){10.0f, 20.0f, 30.0f, 40.0f}, (__m128)(__v4sf){100.0f, 200.0f, 300.0f, 400.0f});
+TEST_CONSTEXPR(match_m128(result, 10.0f, 20.0f, 30.0f, 40.0f));
+}
+
+namespace Test_mm_mask_max_ss_valid {
+constexpr __m128 result = _mm_mask_max_ss((__m128)(__v4sf){1.0f, 2.0f, 3.0f, 4.0f}, 1, (__m128)(__v4sf){10.0f, 20.0f, 30.0f, 40.0f}, (__m128)(__v4sf){100.0f, 200.0f, 300.0f, 400.0f});
+TEST_CONSTEXPR(match_m128(result, 100.0f, 20.0f, 30.0f, 40.0f));
+}
+
+namespace Test_mm_mask_min_sd_valid {
+constexpr __m128d result = _mm_mask_min_sd((__m128d)(__v2df){1.0, 2.0}, 1, (__m128d)(__v2df){10.0, 20.0}, (__m128d)(__v2df){100.0, 200.0});
+TEST_CONSTEXPR(match_m128d(result, 10.0, 20.0));
+}
+
+namespace Test_mm_mask_max_sd_valid {
+constexpr __m128d result = _mm_mask_max_sd((__m128d)(__v2df){1.0, 2.0}, 1, (__m128d)(__v2df){10.0, 20.0}, (__m128d)(__v2df){100.0, 200.0});
+TEST_CONSTEXPR(match_m128d(result, 100.0, 20.0));
+}
+
+namespace Test_mm_mask_min_ss_nan {
+constexpr __m128 a = (__m128)(__v4sf){__builtin_nanf(""), 20.0f, 30.0f, 40.0f};
+constexpr __m128 b = (__m128)(__v4sf){100.0f, 200.0f, 300.0f, 400.0f};
+constexpr __m128 src = (__m128)(__v4sf){1.0f, 2.0f, 3.0f, 4.0f};
+constexpr __m128 result = _mm_mask_min_ss(src, 1, a, b); // expected-error {{must be initialized by a constant expression}}
+}
+
+namespace Test_mm_mask_min_ss_pos_inf {
+constexpr __m128 a = (__m128)(__v4sf){__builtin_inff(), 20.0f, 30.0f, 40.0f};
+constexpr __m128 b = (__m128)(__v4sf){100.0f, 200.0f, 300.0f, 400.0f};
+constexpr __m128 src = (__m128)(__v4sf){1.0f, 2.0f, 3.0f, 4.0f};
+constexpr __m128 result = _mm_mask_min_ss(src, 1, a, b); // expected-error {{must be initialized by a constant expression}}
+}
+
+namespace Test_mm_mask_min_ss_neg_inf {
+constexpr __m128 a = (__m128)(__v4sf){-__builtin_inff(), 20.0f, 30.0f, 40.0f};
+constexpr __m128 b = (__m128)(__v4sf){100.0f, 200.0f, 300.0f, 400.0f};
+constexpr __m128 src = (__m128)(__v4sf){1.0f, 2.0f, 3.0f, 4.0f};
+constexpr __m128 result = _mm_mask_min_ss(src, 1, a, b); // expected-error {{must be initialized by a constant expression}}
+}
+namespace Test_mm_maskz_min_ss_valid {
+constexpr __m128 result = _mm_maskz_min_ss(1, (__m128)(__v4sf){10.0f, 20.0f, 30.0f, 40.0f}, (__m128)(__v4sf){100.0f, 200.0f, 300.0f, 400.0f});
+TEST_CONSTEXPR(match_m128(result, 10.0f, 20.0f, 30.0f, 40.0f));
+}
+
+namespace Test_mm_maskz_max_ss_valid {
+constexpr __m128 result = _mm_maskz_max_ss(1, (__m128)(__v4sf){10.0f, 20.0f, 30.0f, 40.0f}, (__m128)(__v4sf){100.0f, 200.0f, 300.0f, 400.0f});
+TEST_CONSTEXPR(match_m128(result, 100.0f, 20.0f, 30.0f, 40.0f));
+}
+
+namespace Test_mm_maskz_min_sd_valid {
+constexpr __m128d result = _mm_maskz_min_sd(1, (__m128d)(__v2df){10.0, 20.0}, (__m128d)(__v2df){100.0, 200.0});
+TEST_CONSTEXPR(match_m128d(result, 10.0, 20.0));
+}
+
+namespace Test_mm_maskz_max_sd_valid {
+constexpr __m128d result = _mm_maskz_max_sd(1, (__m128d)(__v2df){10.0, 20.0}, (__m128d)(__v2df){100.0, 200.0});
+TEST_CONSTEXPR(match_m128d(result, 100.0, 20.0));
+}
+
+namespace Test_mm_maskz_min_ss_mask_zero {
+constexpr __m128 result = _mm_maskz_min_ss(0, (__m128)(__v4sf){10.0f, 20.0f, 30.0f, 40.0f}, (__m128)(__v4sf){100.0f, 200.0f, 300.0f, 400.0f});
+TEST_CONSTEXPR(match_m128(result, 0.0f, 20.0f, 30.0f, 40.0f));
+}
+
+namespace Test_mm_mask_min_ss_mask_zero {
+constexpr __m128 result = _mm_mask_min_ss((__m128)(__v4sf){1.0f, 2.0f, 3.0f, 4.0f}, 0, (__m128)(__v4sf){10.0f, 20.0f, 30.0f, 40.0f}, (__m128)(__v4sf){100.0f, 200.0f, 300.0f, 400.0f});
+TEST_CONSTEXPR(match_m128(result, 1.0f, 20.0f, 30.0f, 40.0f));
+}
+
+namespace Test_mm_maskz_min_ss_nan {
+constexpr __m128 a = (__m128)(__v4sf){__builtin_nanf(""), 20.0f, 30.0f, 40.0f};
+constexpr __m128 b = (__m128)(__v4sf){100.0f, 200.0f, 300.0f, 400.0f};
+constexpr __m128 result = _mm_maskz_min_ss(1, a, b); // expected-error {{must be initialized by a constant expression}}
+}
+
+namespace Test_mm_maskz_max_sd_nan {
+constexpr __m128d a = (__m128d)(__v2df){__builtin_nan(""), 20.0};
+constexpr __m128d b = (__m128d)(__v2df){100.0, 200.0};
+constexpr __m128d result = _mm_maskz_max_sd(1, a, b); // expected-error {{must be initialized by a constant expression}}
+}
+
+namespace Test_mm_mask_max_ss_pos_inf {
+constexpr __m128 a = (__m128)(__v4sf){__builtin_inff(), 20.0f, 30.0f, 40.0f};
+constexpr __m128 b = (__m128)(__v4sf){100.0f, 200.0f, 300.0f, 400.0f};
+constexpr __m128 src = (__m128)(__v4sf){1.0f, 2.0f, 3.0f, 4.0f};
+constexpr __m128 result = _mm_mask_max_ss(src, 1, a, b); // expected-error {{must be initialized by a constant expression}}
+}
+
+namespace Test_mm_mask_max_sd_neg_inf {
+constexpr __m128d a = (__m128d)(__v2df){-__builtin_inf(), 20.0};
+constexpr __m128d b = (__m128d)(__v2df){100.0, 200.0};
+constexpr __m128d src = (__m128d)(__v2df){1.0, 2.0};
+constexpr __m128d result = _mm_mask_max_sd(src, 1, a, b); // expected-error {{must be initialized by a constant expression}}
+}
>From d88e6cf84d2189462b71477973842286a21aef46 Mon Sep 17 00:00:00 2001
From: NagaChaitanya Vellanki <pnagato at protonmail.com>
Date: Tue, 3 Feb 2026 09:06:21 -0800
Subject: [PATCH 8/9] Address codereview comments
---
clang/include/clang/Basic/BuiltinsX86.td | 4 ++--
clang/lib/AST/ByteCode/InterpBuiltin.cpp | 3 +--
2 files changed, 3 insertions(+), 4 deletions(-)
diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td
index b2d37f6997991..4e580de73ca91 100644
--- a/clang/include/clang/Basic/BuiltinsX86.td
+++ b/clang/include/clang/Basic/BuiltinsX86.td
@@ -173,8 +173,6 @@ let Features = "sse2", Attributes = [NoThrow] in {
let Features = "sse2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
def cvtpd2ps : X86Builtin<"_Vector<4, float>(_Vector<2, double>)">;
def cvtsd2ss : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<2, double>)">;
- def minsd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>)">;
- def maxsd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>)">;
}
let Features = "avx512f", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
def cvtsd2ss_round_mask : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<2, double>, _Vector<4, float>, unsigned char, _Constant int)">;
@@ -245,6 +243,8 @@ let Features = "sse2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWi
def minpd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>)">;
def maxpd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>)">;
+ def minsd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>)">;
+ def maxsd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>)">;
def psllwi128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, int)">;
def pslldi128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, int)">;
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index 17836d369f22a..7afa045bc047f 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -2560,7 +2560,7 @@ static bool interp__builtin_scalar_fp_round_mask_binop(
const auto *VT = Call->getArg(0)->getType()->castAs<VectorType>();
unsigned NumElems = VT->getNumElements();
- uint64_t Rounding = popToUInt64(S, Call->getArg(4));
+ APSInt RoundingMode = popToAPSInt(S, Call->getArg(4));
uint64_t MaskVal = popToUInt64(S, Call->getArg(3));
const Pointer &SrcPtr = S.Stk.pop<Pointer>();
const Pointer &BPtr = S.Stk.pop<Pointer>();
@@ -2572,7 +2572,6 @@ static bool interp__builtin_scalar_fp_round_mask_binop(
if (MaskVal & 1) {
APFloat ElemA = APtr.elem<T>(0).getAPFloat();
APFloat ElemB = BPtr.elem<T>(0).getAPFloat();
- APSInt RoundingMode(APInt(32, Rounding), /*isUnsigned=*/true);
std::optional<APFloat> Result = Fn(ElemA, ElemB, RoundingMode);
if (!Result)
return false;
>From 4bffa36614af9c53e36d268ecbcfd86b60d5159a Mon Sep 17 00:00:00 2001
From: NagaChaitanya Vellanki <pnagato at protonmail.com>
Date: Thu, 5 Feb 2026 10:01:58 -0800
Subject: [PATCH 9/9] Add constexpr tests for min/max_ss/sd
---
.../constexpr-x86-avx512f-builtins.cpp | 416 ++++++++++++++++++
1 file changed, 416 insertions(+)
diff --git a/clang/test/SemaCXX/constexpr-x86-avx512f-builtins.cpp b/clang/test/SemaCXX/constexpr-x86-avx512f-builtins.cpp
index 6c0ccb06e3008..95d26bc44b9a3 100644
--- a/clang/test/SemaCXX/constexpr-x86-avx512f-builtins.cpp
+++ b/clang/test/SemaCXX/constexpr-x86-avx512f-builtins.cpp
@@ -327,3 +327,419 @@ constexpr __m128d b = (__m128d)(__v2df){100.0, 200.0};
constexpr __m128d src = (__m128d)(__v2df){1.0, 2.0};
constexpr __m128d result = _mm_mask_max_sd(src, 1, a, b); // expected-error {{must be initialized by a constant expression}}
}
+
+namespace Test_mm_minss_round_mask_invalid_rounding_8 {
+constexpr __m128 a = (__m128)(__v4sf){10.0f, 20.0f, 30.0f, 40.0f};
+constexpr __m128 b = (__m128)(__v4sf){100.0f, 200.0f, 300.0f, 400.0f};
+constexpr __m128 src = (__m128)(__v4sf){1.0f, 2.0f, 3.0f, 4.0f};
+constexpr __m128 result = (__m128)__builtin_ia32_minss_round_mask((__v4sf)a, (__v4sf)b, (__v4sf)src, 1, ROUND_NO_EXC); // expected-error {{must be initialized by a constant expression}}
+}
+
+namespace Test_mm_maxss_round_mask_invalid_rounding_8 {
+constexpr __m128 a = (__m128)(__v4sf){10.0f, 20.0f, 30.0f, 40.0f};
+constexpr __m128 b = (__m128)(__v4sf){100.0f, 200.0f, 300.0f, 400.0f};
+constexpr __m128 src = (__m128)(__v4sf){1.0f, 2.0f, 3.0f, 4.0f};
+constexpr __m128 result = (__m128)__builtin_ia32_maxss_round_mask((__v4sf)a, (__v4sf)b, (__v4sf)src, 1, ROUND_NO_EXC); // expected-error {{must be initialized by a constant expression}}
+}
+
+namespace Test_mm_maxss_round_mask_invalid_rounding_12 {
+constexpr int ROUND_CUR_DIRECTION_NO_EXC = 12;
+constexpr __m128 a = (__m128)(__v4sf){10.0f, 20.0f, 30.0f, 40.0f};
+constexpr __m128 b = (__m128)(__v4sf){100.0f, 200.0f, 300.0f, 400.0f};
+constexpr __m128 src = (__m128)(__v4sf){1.0f, 2.0f, 3.0f, 4.0f};
+constexpr __m128 result = (__m128)__builtin_ia32_maxss_round_mask((__v4sf)a, (__v4sf)b, (__v4sf)src, 1, ROUND_CUR_DIRECTION_NO_EXC); // expected-error {{must be initialized by a constant expression}}
+}
+
+namespace Test_mm_minss_round_mask_valid_rounding {
+constexpr __m128 a = (__m128)(__v4sf){10.0f, 20.0f, 30.0f, 40.0f};
+constexpr __m128 b = (__m128)(__v4sf){100.0f, 200.0f, 300.0f, 400.0f};
+constexpr __m128 src = (__m128)(__v4sf){1.0f, 2.0f, 3.0f, 4.0f};
+constexpr __m128 result = (__m128)__builtin_ia32_minss_round_mask((__v4sf)a, (__v4sf)b, (__v4sf)src, 1, ROUND_CUR_DIRECTION);
+static_assert(match_m128(result, 10.0f, 20.0f, 30.0f, 40.0f));
+}
+
+namespace Test_mm_maxss_round_mask_valid_rounding {
+constexpr __m128 a = (__m128)(__v4sf){10.0f, 20.0f, 30.0f, 40.0f};
+constexpr __m128 b = (__m128)(__v4sf){100.0f, 200.0f, 300.0f, 400.0f};
+constexpr __m128 src = (__m128)(__v4sf){1.0f, 2.0f, 3.0f, 4.0f};
+constexpr __m128 result = (__m128)__builtin_ia32_maxss_round_mask((__v4sf)a, (__v4sf)b, (__v4sf)src, 1, ROUND_CUR_DIRECTION);
+static_assert(match_m128(result, 100.0f, 20.0f, 30.0f, 40.0f));
+}
+
+namespace Test_mm_minss_round_mask_mask_zero {
+constexpr __m128 a = (__m128)(__v4sf){10.0f, 20.0f, 30.0f, 40.0f};
+constexpr __m128 b = (__m128)(__v4sf){100.0f, 200.0f, 300.0f, 400.0f};
+constexpr __m128 src = (__m128)(__v4sf){1.0f, 2.0f, 3.0f, 4.0f};
+constexpr __m128 result = (__m128)__builtin_ia32_minss_round_mask((__v4sf)a, (__v4sf)b, (__v4sf)src, 0, ROUND_CUR_DIRECTION);
+static_assert(match_m128(result, 1.0f, 20.0f, 30.0f, 40.0f));
+}
+
+namespace Test_mm_maxss_round_mask_mask_zero {
+constexpr __m128 a = (__m128)(__v4sf){10.0f, 20.0f, 30.0f, 40.0f};
+constexpr __m128 b = (__m128)(__v4sf){100.0f, 200.0f, 300.0f, 400.0f};
+constexpr __m128 src = (__m128)(__v4sf){1.0f, 2.0f, 3.0f, 4.0f};
+constexpr __m128 result = (__m128)__builtin_ia32_maxss_round_mask((__v4sf)a, (__v4sf)b, (__v4sf)src, 0, ROUND_CUR_DIRECTION);
+static_assert(match_m128(result, 1.0f, 20.0f, 30.0f, 40.0f));
+}
+
+namespace Test_mm_minss_round_mask_nan {
+constexpr __m128 a = (__m128)(__v4sf){__builtin_nanf(""), 20.0f, 30.0f, 40.0f};
+constexpr __m128 b = (__m128)(__v4sf){100.0f, 200.0f, 300.0f, 400.0f};
+constexpr __m128 src = (__m128)(__v4sf){1.0f, 2.0f, 3.0f, 4.0f};
+constexpr __m128 result = (__m128)__builtin_ia32_minss_round_mask((__v4sf)a, (__v4sf)b, (__v4sf)src, 1, ROUND_CUR_DIRECTION); // expected-error {{must be initialized by a constant expression}}
+}
+
+namespace Test_mm_maxss_round_mask_nan {
+constexpr __m128 a = (__m128)(__v4sf){__builtin_nanf(""), 20.0f, 30.0f, 40.0f};
+constexpr __m128 b = (__m128)(__v4sf){100.0f, 200.0f, 300.0f, 400.0f};
+constexpr __m128 src = (__m128)(__v4sf){1.0f, 2.0f, 3.0f, 4.0f};
+constexpr __m128 result = (__m128)__builtin_ia32_maxss_round_mask((__v4sf)a, (__v4sf)b, (__v4sf)src, 1, ROUND_CUR_DIRECTION); // expected-error {{must be initialized by a constant expression}}
+}
+
+namespace Test_mm_minss_round_mask_pos_infinity {
+constexpr __m128 a = (__m128)(__v4sf){__builtin_inff(), 20.0f, 30.0f, 40.0f};
+constexpr __m128 b = (__m128)(__v4sf){100.0f, 200.0f, 300.0f, 400.0f};
+constexpr __m128 src = (__m128)(__v4sf){1.0f, 2.0f, 3.0f, 4.0f};
+constexpr __m128 result = (__m128)__builtin_ia32_minss_round_mask((__v4sf)a, (__v4sf)b, (__v4sf)src, 1, ROUND_CUR_DIRECTION); // expected-error {{must be initialized by a constant expression}}
+}
+
+namespace Test_mm_maxss_round_mask_pos_infinity {
+constexpr __m128 a = (__m128)(__v4sf){__builtin_inff(), 20.0f, 30.0f, 40.0f};
+constexpr __m128 b = (__m128)(__v4sf){100.0f, 200.0f, 300.0f, 400.0f};
+constexpr __m128 src = (__m128)(__v4sf){1.0f, 2.0f, 3.0f, 4.0f};
+constexpr __m128 result = (__m128)__builtin_ia32_maxss_round_mask((__v4sf)a, (__v4sf)b, (__v4sf)src, 1, ROUND_CUR_DIRECTION); // expected-error {{must be initialized by a constant expression}}
+}
+
+namespace Test_mm_minss_round_mask_neg_infinity {
+constexpr __m128 a = (__m128)(__v4sf){-__builtin_inff(), 20.0f, 30.0f, 40.0f};
+constexpr __m128 b = (__m128)(__v4sf){100.0f, 200.0f, 300.0f, 400.0f};
+constexpr __m128 src = (__m128)(__v4sf){1.0f, 2.0f, 3.0f, 4.0f};
+constexpr __m128 result = (__m128)__builtin_ia32_minss_round_mask((__v4sf)a, (__v4sf)b, (__v4sf)src, 1, ROUND_CUR_DIRECTION); // expected-error {{must be initialized by a constant expression}}
+}
+
+namespace Test_mm_maxss_round_mask_neg_infinity {
+constexpr __m128 a = (__m128)(__v4sf){-__builtin_inff(), 20.0f, 30.0f, 40.0f};
+constexpr __m128 b = (__m128)(__v4sf){100.0f, 200.0f, 300.0f, 400.0f};
+constexpr __m128 src = (__m128)(__v4sf){1.0f, 2.0f, 3.0f, 4.0f};
+constexpr __m128 result = (__m128)__builtin_ia32_maxss_round_mask((__v4sf)a, (__v4sf)b, (__v4sf)src, 1, ROUND_CUR_DIRECTION); // expected-error {{must be initialized by a constant expression}}
+}
+
+namespace Test_mm_minsd_round_mask_invalid_rounding_8 {
+constexpr __m128d a = (__m128d)(__v2df){10.0, 20.0};
+constexpr __m128d b = (__m128d)(__v2df){100.0, 200.0};
+constexpr __m128d src = (__m128d)(__v2df){1.0, 2.0};
+constexpr __m128d result = (__m128d)__builtin_ia32_minsd_round_mask((__v2df)a, (__v2df)b, (__v2df)src, 1, ROUND_NO_EXC); // expected-error {{must be initialized by a constant expression}}
+}
+
+namespace Test_mm_maxsd_round_mask_invalid_rounding_8 {
+constexpr __m128d a = (__m128d)(__v2df){10.0, 20.0};
+constexpr __m128d b = (__m128d)(__v2df){100.0, 200.0};
+constexpr __m128d src = (__m128d)(__v2df){1.0, 2.0};
+constexpr __m128d result = (__m128d)__builtin_ia32_maxsd_round_mask((__v2df)a, (__v2df)b, (__v2df)src, 1, ROUND_NO_EXC); // expected-error {{must be initialized by a constant expression}}
+}
+
+namespace Test_mm_maxsd_round_mask_invalid_rounding_12 {
+constexpr int ROUND_CUR_DIRECTION_NO_EXC = 12;
+constexpr __m128d a = (__m128d)(__v2df){10.0, 20.0};
+constexpr __m128d b = (__m128d)(__v2df){100.0, 200.0};
+constexpr __m128d src = (__m128d)(__v2df){1.0, 2.0};
+constexpr __m128d result = (__m128d)__builtin_ia32_maxsd_round_mask((__v2df)a, (__v2df)b, (__v2df)src, 1, ROUND_CUR_DIRECTION_NO_EXC); // expected-error {{must be initialized by a constant expression}}
+}
+
+namespace Test_mm_minsd_round_mask_valid_rounding {
+constexpr __m128d a = (__m128d)(__v2df){10.0, 20.0};
+constexpr __m128d b = (__m128d)(__v2df){100.0, 200.0};
+constexpr __m128d src = (__m128d)(__v2df){1.0, 2.0};
+constexpr __m128d result = (__m128d)__builtin_ia32_minsd_round_mask((__v2df)a, (__v2df)b, (__v2df)src, 1, ROUND_CUR_DIRECTION);
+static_assert(match_m128d(result, 10.0, 20.0));
+}
+
+namespace Test_mm_maxsd_round_mask_valid_rounding {
+constexpr __m128d a = (__m128d)(__v2df){10.0, 20.0};
+constexpr __m128d b = (__m128d)(__v2df){100.0, 200.0};
+constexpr __m128d src = (__m128d)(__v2df){1.0, 2.0};
+constexpr __m128d result = (__m128d)__builtin_ia32_maxsd_round_mask((__v2df)a, (__v2df)b, (__v2df)src, 1, ROUND_CUR_DIRECTION);
+static_assert(match_m128d(result, 100.0, 20.0));
+}
+
+namespace Test_mm_minsd_round_mask_mask_zero {
+constexpr __m128d a = (__m128d)(__v2df){10.0, 20.0};
+constexpr __m128d b = (__m128d)(__v2df){100.0, 200.0};
+constexpr __m128d src = (__m128d)(__v2df){1.0, 2.0};
+constexpr __m128d result = (__m128d)__builtin_ia32_minsd_round_mask((__v2df)a, (__v2df)b, (__v2df)src, 0, ROUND_CUR_DIRECTION);
+static_assert(match_m128d(result, 1.0, 20.0));
+}
+
+namespace Test_mm_maxsd_round_mask_mask_zero {
+constexpr __m128d a = (__m128d)(__v2df){10.0, 20.0};
+constexpr __m128d b = (__m128d)(__v2df){100.0, 200.0};
+constexpr __m128d src = (__m128d)(__v2df){1.0, 2.0};
+constexpr __m128d result = (__m128d)__builtin_ia32_maxsd_round_mask((__v2df)a, (__v2df)b, (__v2df)src, 0, ROUND_CUR_DIRECTION);
+static_assert(match_m128d(result, 1.0, 20.0));
+}
+
+namespace Test_mm_minsd_round_mask_nan {
+constexpr __m128d a = (__m128d)(__v2df){__builtin_nan(""), 20.0};
+constexpr __m128d b = (__m128d)(__v2df){100.0, 200.0};
+constexpr __m128d src = (__m128d)(__v2df){1.0, 2.0};
+constexpr __m128d result = (__m128d)__builtin_ia32_minsd_round_mask((__v2df)a, (__v2df)b, (__v2df)src, 1, ROUND_CUR_DIRECTION); // expected-error {{must be initialized by a constant expression}}
+}
+
+namespace Test_mm_maxsd_round_mask_nan {
+constexpr __m128d a = (__m128d)(__v2df){__builtin_nan(""), 20.0};
+constexpr __m128d b = (__m128d)(__v2df){100.0, 200.0};
+constexpr __m128d src = (__m128d)(__v2df){1.0, 2.0};
+constexpr __m128d result = (__m128d)__builtin_ia32_maxsd_round_mask((__v2df)a, (__v2df)b, (__v2df)src, 1, ROUND_CUR_DIRECTION); // expected-error {{must be initialized by a constant expression}}
+}
+
+namespace Test_mm_minsd_round_mask_pos_infinity {
+constexpr __m128d a = (__m128d)(__v2df){__builtin_inf(), 20.0};
+constexpr __m128d b = (__m128d)(__v2df){100.0, 200.0};
+constexpr __m128d src = (__m128d)(__v2df){1.0, 2.0};
+constexpr __m128d result = (__m128d)__builtin_ia32_minsd_round_mask((__v2df)a, (__v2df)b, (__v2df)src, 1, ROUND_CUR_DIRECTION); // expected-error {{must be initialized by a constant expression}}
+}
+
+namespace Test_mm_maxsd_round_mask_pos_infinity {
+constexpr __m128d a = (__m128d)(__v2df){__builtin_inf(), 20.0};
+constexpr __m128d b = (__m128d)(__v2df){100.0, 200.0};
+constexpr __m128d src = (__m128d)(__v2df){1.0, 2.0};
+constexpr __m128d result = (__m128d)__builtin_ia32_maxsd_round_mask((__v2df)a, (__v2df)b, (__v2df)src, 1, ROUND_CUR_DIRECTION); // expected-error {{must be initialized by a constant expression}}
+}
+
+namespace Test_mm_minsd_round_mask_neg_infinity {
+constexpr __m128d a = (__m128d)(__v2df){-__builtin_inf(), 20.0};
+constexpr __m128d b = (__m128d)(__v2df){100.0, 200.0};
+constexpr __m128d src = (__m128d)(__v2df){1.0, 2.0};
+constexpr __m128d result = (__m128d)__builtin_ia32_minsd_round_mask((__v2df)a, (__v2df)b, (__v2df)src, 1, ROUND_CUR_DIRECTION); // expected-error {{must be initialized by a constant expression}}
+}
+
+namespace Test_mm_maxsd_round_mask_neg_infinity {
+constexpr __m128d a = (__m128d)(__v2df){-__builtin_inf(), 20.0};
+constexpr __m128d b = (__m128d)(__v2df){100.0, 200.0};
+constexpr __m128d src = (__m128d)(__v2df){1.0, 2.0};
+constexpr __m128d result = (__m128d)__builtin_ia32_maxsd_round_mask((__v2df)a, (__v2df)b, (__v2df)src, 1, ROUND_CUR_DIRECTION); // expected-error {{must be initialized by a constant expression}}
+}
+
+namespace Test_mm_mask_min_round_ss_valid {
+constexpr __m128 a = (__m128)(__v4sf){10.0f, 20.0f, 30.0f, 40.0f};
+constexpr __m128 b = (__m128)(__v4sf){100.0f, 200.0f, 300.0f, 400.0f};
+constexpr __m128 src = (__m128)(__v4sf){1.0f, 2.0f, 3.0f, 4.0f};
+constexpr __m128 result = _mm_mask_min_round_ss(src, 1, a, b, ROUND_CUR_DIRECTION);
+static_assert(match_m128(result, 10.0f, 20.0f, 30.0f, 40.0f));
+}
+
+namespace Test_mm_maskz_min_round_ss_valid {
+constexpr __m128 a = (__m128)(__v4sf){10.0f, 20.0f, 30.0f, 40.0f};
+constexpr __m128 b = (__m128)(__v4sf){100.0f, 200.0f, 300.0f, 400.0f};
+constexpr __m128 result = _mm_maskz_min_round_ss(1, a, b, ROUND_CUR_DIRECTION);
+static_assert(match_m128(result, 10.0f, 20.0f, 30.0f, 40.0f));
+}
+
+namespace Test_mm_mask_max_round_ss_valid {
+constexpr __m128 a = (__m128)(__v4sf){10.0f, 20.0f, 30.0f, 40.0f};
+constexpr __m128 b = (__m128)(__v4sf){100.0f, 200.0f, 300.0f, 400.0f};
+constexpr __m128 src = (__m128)(__v4sf){1.0f, 2.0f, 3.0f, 4.0f};
+constexpr __m128 result = _mm_mask_max_round_ss(src, 1, a, b, ROUND_CUR_DIRECTION);
+static_assert(match_m128(result, 100.0f, 20.0f, 30.0f, 40.0f));
+}
+
+namespace Test_mm_maskz_max_round_ss_valid {
+constexpr __m128 a = (__m128)(__v4sf){10.0f, 20.0f, 30.0f, 40.0f};
+constexpr __m128 b = (__m128)(__v4sf){100.0f, 200.0f, 300.0f, 400.0f};
+constexpr __m128 result = _mm_maskz_max_round_ss(1, a, b, ROUND_CUR_DIRECTION);
+static_assert(match_m128(result, 100.0f, 20.0f, 30.0f, 40.0f));
+}
+
+namespace Test_mm_mask_min_round_sd_valid {
+constexpr __m128d a = (__m128d)(__v2df){10.0, 20.0};
+constexpr __m128d b = (__m128d)(__v2df){100.0, 200.0};
+constexpr __m128d src = (__m128d)(__v2df){1.0, 2.0};
+constexpr __m128d result = _mm_mask_min_round_sd(src, 1, a, b, ROUND_CUR_DIRECTION);
+static_assert(match_m128d(result, 10.0, 20.0));
+}
+
+namespace Test_mm_maskz_min_round_sd_valid {
+constexpr __m128d a = (__m128d)(__v2df){10.0, 20.0};
+constexpr __m128d b = (__m128d)(__v2df){100.0, 200.0};
+constexpr __m128d result = _mm_maskz_min_round_sd(1, a, b, ROUND_CUR_DIRECTION);
+static_assert(match_m128d(result, 10.0, 20.0));
+}
+
+namespace Test_mm_mask_max_round_sd_valid {
+constexpr __m128d a = (__m128d)(__v2df){10.0, 20.0};
+constexpr __m128d b = (__m128d)(__v2df){100.0, 200.0};
+constexpr __m128d src = (__m128d)(__v2df){1.0, 2.0};
+constexpr __m128d result = _mm_mask_max_round_sd(src, 1, a, b, ROUND_CUR_DIRECTION);
+static_assert(match_m128d(result, 100.0, 20.0));
+}
+
+namespace Test_mm_maskz_max_round_sd_valid {
+constexpr __m128d a = (__m128d)(__v2df){10.0, 20.0};
+constexpr __m128d b = (__m128d)(__v2df){100.0, 200.0};
+constexpr __m128d result = _mm_maskz_max_round_sd(1, a, b, ROUND_CUR_DIRECTION);
+static_assert(match_m128d(result, 100.0, 20.0));
+}
+
+namespace Test_mm_mask_min_round_ss_mask_zero {
+constexpr __m128 a = (__m128)(__v4sf){10.0f, 20.0f, 30.0f, 40.0f};
+constexpr __m128 b = (__m128)(__v4sf){100.0f, 200.0f, 300.0f, 400.0f};
+constexpr __m128 src = (__m128)(__v4sf){1.0f, 2.0f, 3.0f, 4.0f};
+constexpr __m128 result = _mm_mask_min_round_ss(src, 0, a, b, ROUND_CUR_DIRECTION);
+static_assert(match_m128(result, 1.0f, 20.0f, 30.0f, 40.0f));
+}
+
+namespace Test_mm_maskz_min_round_ss_mask_zero {
+constexpr __m128 a = (__m128)(__v4sf){10.0f, 20.0f, 30.0f, 40.0f};
+constexpr __m128 b = (__m128)(__v4sf){100.0f, 200.0f, 300.0f, 400.0f};
+constexpr __m128 result = _mm_maskz_min_round_ss(0, a, b, ROUND_CUR_DIRECTION);
+static_assert(match_m128(result, 0.0f, 20.0f, 30.0f, 40.0f));
+}
+
+namespace Test_mm_mask_max_round_ss_mask_zero {
+constexpr __m128 a = (__m128)(__v4sf){10.0f, 20.0f, 30.0f, 40.0f};
+constexpr __m128 b = (__m128)(__v4sf){100.0f, 200.0f, 300.0f, 400.0f};
+constexpr __m128 src = (__m128)(__v4sf){1.0f, 2.0f, 3.0f, 4.0f};
+constexpr __m128 result = _mm_mask_max_round_ss(src, 0, a, b, ROUND_CUR_DIRECTION);
+static_assert(match_m128(result, 1.0f, 20.0f, 30.0f, 40.0f));
+}
+
+namespace Test_mm_maskz_max_round_ss_mask_zero {
+constexpr __m128 a = (__m128)(__v4sf){10.0f, 20.0f, 30.0f, 40.0f};
+constexpr __m128 b = (__m128)(__v4sf){100.0f, 200.0f, 300.0f, 400.0f};
+constexpr __m128 result = _mm_maskz_max_round_ss(0, a, b, ROUND_CUR_DIRECTION);
+static_assert(match_m128(result, 0.0f, 20.0f, 30.0f, 40.0f));
+}
+
+namespace Test_mm_mask_min_round_sd_mask_zero {
+constexpr __m128d a = (__m128d)(__v2df){10.0, 20.0};
+constexpr __m128d b = (__m128d)(__v2df){100.0, 200.0};
+constexpr __m128d src = (__m128d)(__v2df){1.0, 2.0};
+constexpr __m128d result = _mm_mask_min_round_sd(src, 0, a, b, ROUND_CUR_DIRECTION);
+static_assert(match_m128d(result, 1.0, 20.0));
+}
+
+namespace Test_mm_maskz_min_round_sd_mask_zero {
+constexpr __m128d a = (__m128d)(__v2df){10.0, 20.0};
+constexpr __m128d b = (__m128d)(__v2df){100.0, 200.0};
+constexpr __m128d result = _mm_maskz_min_round_sd(0, a, b, ROUND_CUR_DIRECTION);
+static_assert(match_m128d(result, 0.0, 20.0));
+}
+
+namespace Test_mm_mask_max_round_sd_mask_zero {
+constexpr __m128d a = (__m128d)(__v2df){10.0, 20.0};
+constexpr __m128d b = (__m128d)(__v2df){100.0, 200.0};
+constexpr __m128d src = (__m128d)(__v2df){1.0, 2.0};
+constexpr __m128d result = _mm_mask_max_round_sd(src, 0, a, b, ROUND_CUR_DIRECTION);
+static_assert(match_m128d(result, 1.0, 20.0));
+}
+
+namespace Test_mm_maskz_max_round_sd_mask_zero {
+constexpr __m128d a = (__m128d)(__v2df){10.0, 20.0};
+constexpr __m128d b = (__m128d)(__v2df){100.0, 200.0};
+constexpr __m128d result = _mm_maskz_max_round_sd(0, a, b, ROUND_CUR_DIRECTION);
+static_assert(match_m128d(result, 0.0, 20.0));
+}
+
+namespace Test_mm_mask_min_round_ss_invalid_rounding {
+constexpr __m128 a = (__m128)(__v4sf){10.0f, 20.0f, 30.0f, 40.0f};
+constexpr __m128 b = (__m128)(__v4sf){100.0f, 200.0f, 300.0f, 400.0f};
+constexpr __m128 src = (__m128)(__v4sf){1.0f, 2.0f, 3.0f, 4.0f};
+constexpr __m128 result = _mm_mask_min_round_ss(src, 1, a, b, ROUND_NO_EXC); // expected-error {{must be initialized by a constant expression}}
+}
+
+namespace Test_mm_maskz_max_round_ss_invalid_rounding {
+constexpr __m128 a = (__m128)(__v4sf){10.0f, 20.0f, 30.0f, 40.0f};
+constexpr __m128 b = (__m128)(__v4sf){100.0f, 200.0f, 300.0f, 400.0f};
+constexpr __m128 result = _mm_maskz_max_round_ss(1, a, b, ROUND_NO_EXC); // expected-error {{must be initialized by a constant expression}}
+}
+
+namespace Test_mm_mask_min_round_sd_invalid_rounding {
+constexpr __m128d a = (__m128d)(__v2df){10.0, 20.0};
+constexpr __m128d b = (__m128d)(__v2df){100.0, 200.0};
+constexpr __m128d src = (__m128d)(__v2df){1.0, 2.0};
+constexpr __m128d result = _mm_mask_min_round_sd(src, 1, a, b, ROUND_NO_EXC); // expected-error {{must be initialized by a constant expression}}
+}
+
+namespace Test_mm_maskz_max_round_sd_invalid_rounding {
+constexpr __m128d a = (__m128d)(__v2df){10.0, 20.0};
+constexpr __m128d b = (__m128d)(__v2df){100.0, 200.0};
+constexpr __m128d result = _mm_maskz_max_round_sd(1, a, b, ROUND_NO_EXC); // expected-error {{must be initialized by a constant expression}}
+}
+
+namespace Test_mm_mask_min_round_ss_nan {
+constexpr __m128 a = (__m128)(__v4sf){__builtin_nanf(""), 20.0f, 30.0f, 40.0f};
+constexpr __m128 b = (__m128)(__v4sf){100.0f, 200.0f, 300.0f, 400.0f};
+constexpr __m128 src = (__m128)(__v4sf){1.0f, 2.0f, 3.0f, 4.0f};
+constexpr __m128 result = _mm_mask_min_round_ss(src, 1, a, b, ROUND_CUR_DIRECTION); // expected-error {{must be initialized by a constant expression}}
+}
+
+namespace Test_mm_maskz_max_round_ss_inf {
+constexpr __m128 a = (__m128)(__v4sf){__builtin_inff(), 20.0f, 30.0f, 40.0f};
+constexpr __m128 b = (__m128)(__v4sf){100.0f, 200.0f, 300.0f, 400.0f};
+constexpr __m128 result = _mm_maskz_max_round_ss(1, a, b, ROUND_CUR_DIRECTION); // expected-error {{must be initialized by a constant expression}}
+}
+
+namespace Test_mm_mask_max_round_sd_nan {
+constexpr __m128d a = (__m128d)(__v2df){__builtin_nan(""), 20.0};
+constexpr __m128d b = (__m128d)(__v2df){100.0, 200.0};
+constexpr __m128d src = (__m128d)(__v2df){1.0, 2.0};
+constexpr __m128d result = _mm_mask_max_round_sd(src, 1, a, b, ROUND_CUR_DIRECTION); // expected-error {{must be initialized by a constant expression}}
+}
+
+namespace Test_mm_maskz_min_round_sd_inf {
+constexpr __m128d a = (__m128d)(__v2df){-__builtin_inf(), 20.0};
+constexpr __m128d b = (__m128d)(__v2df){100.0, 200.0};
+constexpr __m128d result = _mm_maskz_min_round_sd(1, a, b, ROUND_CUR_DIRECTION); // expected-error {{must be initialized by a constant expression}}
+}
+
+namespace Test_mm_min_ss_valid {
+constexpr __m128 a = (__m128)(__v4sf){10.0f, 20.0f, 30.0f, 40.0f};
+constexpr __m128 b = (__m128)(__v4sf){100.0f, 200.0f, 300.0f, 400.0f};
+constexpr __m128 result = _mm_min_ss(a, b);
+static_assert(match_m128(result, 10.0f, 20.0f, 30.0f, 40.0f));
+}
+
+namespace Test_mm_max_ss_valid {
+constexpr __m128 a = (__m128)(__v4sf){10.0f, 20.0f, 30.0f, 40.0f};
+constexpr __m128 b = (__m128)(__v4sf){100.0f, 200.0f, 300.0f, 400.0f};
+constexpr __m128 result = _mm_max_ss(a, b);
+static_assert(match_m128(result, 100.0f, 20.0f, 30.0f, 40.0f));
+}
+
+namespace Test_mm_min_sd_valid {
+constexpr __m128d a = (__m128d)(__v2df){10.0, 20.0};
+constexpr __m128d b = (__m128d)(__v2df){100.0, 200.0};
+constexpr __m128d result = _mm_min_sd(a, b);
+static_assert(match_m128d(result, 10.0, 20.0));
+}
+
+namespace Test_mm_max_sd_valid {
+constexpr __m128d a = (__m128d)(__v2df){10.0, 20.0};
+constexpr __m128d b = (__m128d)(__v2df){100.0, 200.0};
+constexpr __m128d result = _mm_max_sd(a, b);
+static_assert(match_m128d(result, 100.0, 20.0));
+}
+
+namespace Test_mm_min_ss_nan {
+constexpr __m128 a = (__m128)(__v4sf){__builtin_nanf(""), 20.0f, 30.0f, 40.0f};
+constexpr __m128 b = (__m128)(__v4sf){100.0f, 200.0f, 300.0f, 400.0f};
+constexpr __m128 result = _mm_min_ss(a, b); // expected-error {{must be initialized by a constant expression}}
+}
+
+namespace Test_mm_max_ss_inf {
+constexpr __m128 a = (__m128)(__v4sf){__builtin_inff(), 20.0f, 30.0f, 40.0f};
+constexpr __m128 b = (__m128)(__v4sf){100.0f, 200.0f, 300.0f, 400.0f};
+constexpr __m128 result = _mm_max_ss(a, b); // expected-error {{must be initialized by a constant expression}}
+}
+
+namespace Test_mm_min_sd_nan {
+constexpr __m128d a = (__m128d)(__v2df){__builtin_nan(""), 20.0};
+constexpr __m128d b = (__m128d)(__v2df){100.0, 200.0};
+constexpr __m128d result = _mm_min_sd(a, b); // expected-error {{must be initialized by a constant expression}}
+}
+
+namespace Test_mm_max_sd_inf {
+constexpr __m128d a = (__m128d)(__v2df){-__builtin_inf(), 20.0};
+constexpr __m128d b = (__m128d)(__v2df){100.0, 200.0};
+constexpr __m128d result = _mm_max_sd(a, b); // expected-error {{must be initialized by a constant expression}}
+}
More information about the cfe-commits
mailing list