[clang] [X86][Clang] VectorExprEvaluator::VisitCallExpr / InterpretBuiltin - allow SSE/AVX COMI/UCOMI/CMPS/CMPP fp comparison intrinsics to be used in constexpr (PR #182589)
Zaky Hermawan via cfe-commits
cfe-commits at lists.llvm.org
Sat Feb 21 12:16:54 PST 2026
https://github.com/ZakyHermawan updated https://github.com/llvm/llvm-project/pull/182589
>From 121ea64c0b16262f731e24fb2f9dcc3289c8f8d3 Mon Sep 17 00:00:00 2001
From: ZakyHermawan <zaky.hermawan9615 at gmail.com>
Date: Fri, 9 Jan 2026 23:59:22 +0700
Subject: [PATCH 01/14] add _mm_cmp_pd _mm256_cmp_pd
Signed-off-by: ZakyHermawan <zaky.hermawan9615 at gmail.com>
---
clang/include/clang/Basic/BuiltinsX86.td | 29 +++--
clang/lib/AST/ByteCode/InterpBuiltin.cpp | 122 ++++++++++++++++++++
clang/lib/AST/ExprConstant.cpp | 137 ++++++++++++++++++++++-
clang/test/CodeGen/X86/avx-builtins.c | 62 ++++++++++
clang/test/CodeGen/X86/sse2-builtins.c | 84 ++++++++++++++
5 files changed, 421 insertions(+), 13 deletions(-)
diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td
index 23eac47eb5e4c..b571af5506ed0 100644
--- a/clang/include/clang/Basic/BuiltinsX86.td
+++ b/clang/include/clang/Basic/BuiltinsX86.td
@@ -59,7 +59,18 @@ let Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<64>], Features
// SSE intrinsics
let Attributes = [Const, NoThrow, RequiredVectorWidth<128>] in {
- foreach Cmp = ["eq", "lt", "le", "gt", "ge", "neq"] in {
+ foreach Op = ["min", "max"] in {
+ let Features = "sse" in {
+ def Op#ss : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>)">;
+ }
+ let Features = "sse2" in {
+ def Op#sd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>)">;
+ }
+ }
+}
+
+let Attributes = [Const, Constexpr, NoThrow, RequiredVectorWidth<128>] in {
+ foreach Cmp = ["eq", "lt", "le", "gt", "ge", "neq"] in {
let Features = "sse" in {
def comi#Cmp : X86Builtin<"int(_Vector<4, float>, _Vector<4, float>)">;
def ucomi#Cmp : X86Builtin<"int(_Vector<4, float>, _Vector<4, float>)">;
@@ -82,15 +93,6 @@ let Attributes = [Const, NoThrow, RequiredVectorWidth<128>] in {
}
}
- foreach Op = ["min", "max"] in {
- let Features = "sse" in {
- def Op#ss : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>)">;
- }
- let Features = "sse2" in {
- def Op#sd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>)">;
- }
- }
-
let Features = "sse" in {
def cmpps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Constant char)">;
def cmpss : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Constant char)">;
@@ -475,10 +477,13 @@ let Features = "avx", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWid
def cvtpd2ps256 : X86Builtin<"_Vector<4, float>(_Vector<4, double>)">;
}
-let Features = "avx", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
- def dpps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, _Constant char)">;
+let Features = "avx", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
def cmppd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, double>, _Constant char)">;
def cmpps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, _Constant char)">;
+}
+
+let Features = "avx", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+ def dpps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, _Constant char)">;
def cvtps2dq256 : X86Builtin<"_Vector<8, int>(_Vector<8, float>)">;
def cvttpd2dq256 : X86Builtin<"_Vector<4, int>(_Vector<4, double>)">;
def cvtpd2dq256 : X86Builtin<"_Vector<4, int>(_Vector<4, double>)">;
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index ac0e854733397..f5d1dbfc3bef5 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -4119,6 +4119,124 @@ static bool interp__builtin_ia32_gfni_mul(InterpState &S, CodePtr OpPC,
return true;
}
+static bool interp__builtin_x86_cmp(InterpState &S, CodePtr OpPC,
+ const InterpFrame *Frame,
+ const CallExpr *Call, unsigned ID) {
+ llvm::APSInt ImmAPS =
+ popToAPSInt(S.Stk, *S.getContext().classify(Call->getArg(2)));
+ uint32_t imm = ImmAPS.getZExtValue();
+ const Pointer &VectorB = S.Stk.pop<Pointer>();
+ const Pointer &VectorA = S.Stk.pop<Pointer>();
+ Pointer &Dst = S.Stk.peek<Pointer>();
+
+ bool isScalar = (ID == X86::BI__builtin_ia32_cmpss) ||
+ (ID == X86::BI__builtin_ia32_cmpsd);
+ bool isF64 = (ID == X86::BI__builtin_ia32_cmppd) ||
+ (ID == X86::BI__builtin_ia32_cmpsd) ||
+ (ID == X86::BI__builtin_ia32_cmppd256);
+
+ int NumberOfLaneA = VectorA.getNumElems();
+ int NumberOfLaneB = VectorB.getNumElems();
+ if (NumberOfLaneA != NumberOfLaneB)
+ return false;
+
+ // Return true if immediate and the comparison result (between operand a and b) are matching
+ auto evalCmpImm = [&](uint32_t imm, llvm::APFloatBase::cmpResult cmp) -> bool {
+ using CmpResult = llvm::APFloatBase::cmpResult;
+
+ bool result = false;
+ bool isUnordered = (cmp == llvm::APFloatBase::cmpUnordered);
+ bool isEq = (cmp == CmpResult::cmpEqual);
+ bool isGt = (cmp == CmpResult::cmpGreaterThan);
+ bool isLt = (cmp == CmpResult::cmpLessThan);
+
+ switch (imm & 0x1F) {
+ case 0x00: /* _CMP_EQ_OQ */
+ case 0x10: /* _CMP_EQ_OS */
+ result = isEq && !isUnordered;
+ break;
+ case 0x01: /* _CMP_LT_OS */
+ case 0x11: /* _CMP_LT_OQ */
+ result = isLt && !isUnordered;
+ break;
+ case 0x02: /* _CMP_LE_OS */
+ case 0x12: /* _CMP_LE_OQ */
+ result = !isGt && !isUnordered;
+ break;
+ case 0x03: /* _CMP_UNORD_Q */
+ case 0x13: /* _CMP_UNORD_S */
+ result = isUnordered;
+ break;
+ case 0x04: /* _CMP_NEQ_UQ */
+ case 0x14: /* _CMP_NEQ_US */
+ result = !isEq || isUnordered;
+ break;
+ case 0x05: /* _CMP_NLT_US */
+ case 0x15: /* _CMP_NLT_UQ */
+ result = !isLt || isUnordered;
+ break;
+ case 0x06: /* _CMP_NLE_US */
+ case 0x16: /* _CMP_NLE_UQ */
+ result = isGt || isUnordered;
+ break;
+ case 0x07: /* _CMP_ORD_Q */
+ case 0x17: /* _CMP_ORD_S */
+ result = !isUnordered;
+ break;
+ case 0x08: /* _CMP_EQ_UQ */
+ case 0x18: /* _CMP_EQ_US */
+ result = isEq || isUnordered;
+ break;
+ case 0x09: /* _CMP_NGE_US */
+ case 0x19: /* _CMP_NGE_UQ */
+ result = isLt || isUnordered;
+ break;
+ case 0x0a: /* _CMP_NGT_US */
+ case 0x1a: /* _CMP_NGT_UQ */
+ result = !isGt || isUnordered;
+ break;
+ case 0x0b: /* _CMP_FALSE_OQ */
+ case 0x1b: /* _CMP_FALSE_OS */
+ result = false;
+ break;
+ case 0x0c: /* _CMP_NEQ_OQ */
+ case 0x1c: /* _CMP_NEQ_OS */
+ result = !isEq && !isUnordered;
+ break;
+ case 0x0d: /* _CMP_GE_OS */
+ case 0x1d: /* _CMP_GE_OQ */
+ result = !isLt && !isUnordered;
+ break;
+ case 0x0e: /* _CMP_GT_OS */
+ case 0x1e: /* _CMP_GT_OQ */
+ result = isGt && !isUnordered;
+ break;
+ case 0x0f: /* _CMP_TRUE_UQ */
+ case 0x1f: /* _CMP_TRUE_US */
+ result = true;
+ break;
+ }
+ return result;
+ };
+
+ for (int i = 0; i < NumberOfLaneA; ++i) {
+ llvm::APFloat AElement = VectorA.elem<Floating>(i).getAPFloat();
+ llvm::APFloat BElement = VectorB.elem<Floating>(i).getAPFloat();
+ auto CR = AElement.compare(BElement);
+ auto ComparisonResult = evalCmpImm(imm, CR);
+
+ llvm::APFloat True(-1.0);
+ llvm::APFloat False(0.0);
+ if (ComparisonResult)
+ Dst.elem<Floating>(i) = Floating(True);
+ else
+ Dst.elem<Floating>(i) = Floating(False);
+ }
+
+ Dst.initializeAllElements();
+ return true;
+};
+
bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
uint32_t BuiltinID) {
if (!S.getASTContext().BuiltinInfo.isConstantEvaluated(BuiltinID))
@@ -5879,6 +5997,10 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
return llvm::maximum(A, B);
});
+ case X86::BI__builtin_ia32_cmppd:
+ case X86::BI__builtin_ia32_cmppd256:
+ return interp__builtin_x86_cmp(S, OpPC, Frame, Call, BuiltinID);
+
default:
S.FFDiag(S.Current->getLocation(OpPC),
diag::note_invalid_subexpr_in_const_expr)
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index b06233423db4d..8fecf39ec57ac 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -12231,7 +12231,141 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
return Success(V, E);
};
- switch (E->getBuiltinCallee()) {
+ auto BuiltinOp = E->getBuiltinCallee();
+ switch (BuiltinOp) {
+
+ case X86::BI__builtin_ia32_cmppd:
+ case X86::BI__builtin_ia32_cmppd256: {
+ using CmpResult = llvm::APFloatBase::cmpResult;
+
+ const Expr *A = E->getArg(0);
+ const Expr *B = E->getArg(1);
+ const Expr *Imm = E->getArg(2);
+
+ APValue AV, BV;
+ APSInt ImmVal;
+ if (!EvaluateVector(A, AV, Info) || !EvaluateVector(B, BV, Info))
+ return false;
+ if (!EvaluateInteger(Imm, ImmVal, Info))
+ return false;
+ unsigned Lanes = AV.getVectorLength();
+ if (Lanes == 0 || BV.getVectorLength() != Lanes)
+ return false;
+
+ QualType RetTy = E->getType();
+ const auto *VT = RetTy->getAs<VectorType>();
+ if (!VT)
+ return false;
+ bool IsF64 =
+ VT->getElementType()->isSpecificBuiltinType(BuiltinType::Double);
+ const bool IsScalar = (BuiltinOp == X86::BI__builtin_ia32_cmpss) ||
+ (BuiltinOp == X86::BI__builtin_ia32_cmpsd);
+ const uint32_t imm = ImmVal.getZExtValue();
+
+ // Return true if immediate and the comparison result (between operand a and b) are matching
+ auto evalCmpImm = [](uint32_t imm,
+ llvm::APFloatBase::cmpResult cmp) -> bool {
+ bool isUnordered = (cmp == llvm::APFloatBase::cmpUnordered);
+ bool isEq = (cmp == CmpResult::cmpEqual);
+ bool isGt = (cmp == CmpResult::cmpGreaterThan);
+ bool isLt = (cmp == CmpResult::cmpLessThan);
+ bool result = false;
+
+ switch (imm & 0x1F) {
+ case 0x00: /* _CMP_EQ_OQ */
+ case 0x10: /* _CMP_EQ_OS */
+ result = isEq && !isUnordered;
+ break;
+ case 0x01: /* _CMP_LT_OS */
+ case 0x11: /* _CMP_LT_OQ */
+ result = isLt && !isUnordered;
+ break;
+ case 0x02: /* _CMP_LE_OS */
+ case 0x12: /* _CMP_LE_OQ */
+ result = !isGt && !isUnordered;
+ break;
+ case 0x03: /* _CMP_UNORD_Q */
+ case 0x13: /* _CMP_UNORD_S */
+ result = isUnordered;
+ break;
+ case 0x04: /* _CMP_NEQ_UQ */
+ case 0x14: /* _CMP_NEQ_US */
+ result = !isEq || isUnordered;
+ break;
+ case 0x05: /* _CMP_NLT_US */
+ case 0x15: /* _CMP_NLT_UQ */
+ result = !isLt || isUnordered;
+ break;
+ case 0x06: /* _CMP_NLE_US */
+ case 0x16: /* _CMP_NLE_UQ */
+ result = isGt || isUnordered;
+ break;
+ case 0x07: /* _CMP_ORD_Q */
+ case 0x17: /* _CMP_ORD_S */
+ result = !isUnordered;
+ break;
+ case 0x08: /* _CMP_EQ_UQ */
+ case 0x18: /* _CMP_EQ_US */
+ result = isEq || isUnordered;
+ break;
+ case 0x09: /* _CMP_NGE_US */
+ case 0x19: /* _CMP_NGE_UQ */
+ result = isLt || isUnordered;
+ break;
+ case 0x0a: /* _CMP_NGT_US */
+ case 0x1a: /* _CMP_NGT_UQ */
+ result = !isGt || isUnordered;
+ break;
+ case 0x0b: /* _CMP_FALSE_OQ */
+ case 0x1b: /* _CMP_FALSE_OS */
+ result = false;
+ break;
+ case 0x0c: /* _CMP_NEQ_OQ */
+ case 0x1c: /* _CMP_NEQ_OS */
+ result = !isEq && !isUnordered;
+ break;
+ case 0x0d: /* _CMP_GE_OS */
+ case 0x1d: /* _CMP_GE_OQ */
+ result = !isLt && !isUnordered;
+ break;
+ case 0x0e: /* _CMP_GT_OS */
+ case 0x1e: /* _CMP_GT_OQ */
+ result = isGt && !isUnordered;
+ break;
+ case 0x0f: /* _CMP_TRUE_UQ */
+ case 0x1f: /* _CMP_TRUE_US */
+ result = true;
+ break;
+ }
+ return result;
+ };
+
+ std::vector<APValue> results;
+ for (unsigned i = 0; i < Lanes; ++i) {
+ auto AElem = AV.getVectorElt(i);
+ auto BElem = BV.getVectorElt(i);
+
+ llvm::APFloat A0 = AElem.getFloat();
+ llvm::APFloat B0 = BElem.getFloat();
+
+ // harus suport multiple operands
+
+ llvm::APFloat::cmpResult CR = A0.compare(B0);
+ auto ComparisonResult = evalCmpImm(imm, CR);
+
+ llvm::APFloat True(-1.0);
+ llvm::APFloat False(0.0);
+
+ if (ComparisonResult)
+ results.push_back(APValue(True));
+ else
+ results.push_back(APValue(False));
+ }
+
+ // construct result
+ APValue retVal(results.data(), results.size());
+ return Success(retVal, E);
+ }
default:
return false;
case Builtin::BI__builtin_elementwise_popcount:
@@ -17478,6 +17612,7 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E,
}
return Success(APValue(RetMask), E);
}
+
}
}
diff --git a/clang/test/CodeGen/X86/avx-builtins.c b/clang/test/CodeGen/X86/avx-builtins.c
index 6ed4217231119..a0c3a6620af36 100644
--- a/clang/test/CodeGen/X86/avx-builtins.c
+++ b/clang/test/CodeGen/X86/avx-builtins.c
@@ -261,192 +261,254 @@ __m256d test_mm256_cmp_pd_eq_oq(__m256d a, __m256d b) {
// CHECK: fcmp oeq <4 x double> %{{.*}}, %{{.*}}
return _mm256_cmp_pd(a, b, _CMP_EQ_OQ);
}
+TEST_CONSTEXPR(match_m256d(_mm256_cmp_pd(((__m256d){1.0, 2.0, 3.0, 4.0}), ((__m256d){1.0, 2.0, 3.0, 4.0}), _CMP_EQ_OQ), -1.0, -1.0, -1.0, -1.0));
+TEST_CONSTEXPR(match_m256d(_mm256_cmp_pd(((__m256d){1.0, 2.0, 3.0, 4.0}), (((__m256d){1.0, 2.0, 3.0, 5.0})), _CMP_EQ_OQ), -1.0, -1.0, -1.0, 0.0));
__m256d test_mm256_cmp_pd_lt_os(__m256d a, __m256d b) {
// CHECK-LABEL: test_mm256_cmp_pd_lt_os
// CHECK: fcmp olt <4 x double> %{{.*}}, %{{.*}}
return _mm256_cmp_pd(a, b, _CMP_LT_OS);
}
+TEST_CONSTEXPR(match_m256d(_mm256_cmp_pd(((__m256d){1.0, 2.0, 3.0, 4.0}), ((__m256d){2.0, 3.0, 4.0, 5.0}), _CMP_LT_OS), -1.0, -1.0, -1.0, -1.0));
+TEST_CONSTEXPR(match_m256d(_mm256_cmp_pd(((__m256d){1.0, 2.0, 3.0, 4.0}), ((__m256d){2.0, 3.0, 4.0, 3.0}), _CMP_LT_OS), -1.0, -1.0, -1.0, 0.0));
__m256d test_mm256_cmp_pd_le_os(__m256d a, __m256d b) {
// CHECK-LABEL: test_mm256_cmp_pd_le_os
// CHECK: fcmp ole <4 x double> %{{.*}}, %{{.*}}
return _mm256_cmp_pd(a, b, _CMP_LE_OS);
}
+TEST_CONSTEXPR(match_m256d(_mm256_cmp_pd(((__m256d){1.0, 2.0, 3.0, 4.0}), ((__m256d){1.0, 2.0, 3.0, 4.0}), _CMP_LE_OS), -1.0, -1.0, -1.0, -1.0));
+TEST_CONSTEXPR(match_m256d(_mm256_cmp_pd(((__m256d){1.0, 2.0, 3.0, 4.0}), ((__m256d){1.0, 2.0, 3.0, 3.0}), _CMP_LE_OS), -1.0, -1.0, -1.0, 0.0));
__m256d test_mm256_cmp_pd_unord_q(__m256d a, __m256d b) {
// CHECK-LABEL: test_mm256_cmp_pd_unord_q
// CHECK: fcmp uno <4 x double> %{{.*}}, %{{.*}}
return _mm256_cmp_pd(a, b, _CMP_UNORD_Q);
}
+TEST_CONSTEXPR(match_m256d(_mm256_cmp_pd(((__m256d){1.0, 2.0, 3.0, 4.0}), ((__m256d){5.0, 6.0, 7.0, 8.0}), _CMP_UNORD_Q), 0.0, 0.0, 0.0, 0.0));
+TEST_CONSTEXPR(match_m256d(_mm256_cmp_pd(((__m256d){1.0, 2.0, 3.0, 4.0}), ((__m256d){1.0, 2.0, 3.0, 4.0}), _CMP_UNORD_Q), 0.0, 0.0, 0.0, 0.0));
__m256d test_mm256_cmp_pd_neq_uq(__m256d a, __m256d b) {
// CHECK-LABEL: test_mm256_cmp_pd_neq_uq
// CHECK: fcmp une <4 x double> %{{.*}}, %{{.*}}
return _mm256_cmp_pd(a, b, _CMP_NEQ_UQ);
}
+TEST_CONSTEXPR(match_m256d(_mm256_cmp_pd(((__m256d){1.0, 2.0, 3.0, 4.0}), ((__m256d){5.0, 6.0, 7.0, 8.0}), _CMP_NEQ_UQ), -1.0, -1.0, -1.0, -1.0));
+TEST_CONSTEXPR(match_m256d(_mm256_cmp_pd(((__m256d){1.0, 2.0, 3.0, 4.0}), ((__m256d){1.0, 2.0, 3.0, 4.0}), _CMP_NEQ_UQ), 0.0, 0.0, 0.0, 0.0));
__m256d test_mm256_cmp_pd_nlt_us(__m256d a, __m256d b) {
// CHECK-LABEL: test_mm256_cmp_pd_nlt_us
// CHECK: fcmp uge <4 x double> %{{.*}}, %{{.*}}
return _mm256_cmp_pd(a, b, _CMP_NLT_US);
}
+TEST_CONSTEXPR(match_m256d(_mm256_cmp_pd(((__m256d){1.0, 2.0, 3.0, 4.0}), ((__m256d){0.0, 1.0, 2.0, 3.0}), _CMP_NLT_US), -1.0, -1.0, -1.0, -1.0));
+TEST_CONSTEXPR(match_m256d(_mm256_cmp_pd(((__m256d){1.0, 2.0, 3.0, 4.0}), ((__m256d){0.0, 1.0, 2.0, 5.0}), _CMP_NLT_US), -1.0, -1.0, -1.0, 0.0));
__m256d test_mm256_cmp_pd_nle_us(__m256d a, __m256d b) {
// CHECK-LABEL: test_mm256_cmp_pd_nle_us
// CHECK: fcmp ugt <4 x double> %{{.*}}, %{{.*}}
return _mm256_cmp_pd(a, b, _CMP_NLE_US);
}
+TEST_CONSTEXPR(match_m256d(_mm256_cmp_pd(((__m256d){1.0, 2.0, 3.0, 4.0}), ((__m256d){0.0, 1.0, 2.0, 3.0}), _CMP_NLE_US), -1.0, -1.0, -1.0, -1.0));
+TEST_CONSTEXPR(match_m256d(_mm256_cmp_pd(((__m256d){1.0, 2.0, 3.0, 4.0}), ((__m256d){0.0, 1.0, 2.0, 4.0}), _CMP_NLE_US), -1.0, -1.0, -1.0, 0.0));
__m256d test_mm256_cmp_pd_ord_q(__m256d a, __m256d b) {
// CHECK-LABEL: test_mm256_cmp_pd_ord_q
// CHECK: fcmp ord <4 x double> %{{.*}}, %{{.*}}
return _mm256_cmp_pd(a, b, _CMP_ORD_Q);
}
+TEST_CONSTEXPR(match_m256d(_mm256_cmp_pd(((__m256d){1.0, 2.0, 3.0, 4.0}), ((__m256d){5.0, 6.0, 7.0, 8.0}), _CMP_ORD_Q), -1.0, -1.0, -1.0, -1.0));
__m256d test_mm256_cmp_pd_eq_uq(__m256d a, __m256d b) {
// CHECK-LABEL: test_mm256_cmp_pd_eq_uq
// CHECK: fcmp ueq <4 x double> %{{.*}}, %{{.*}}
return _mm256_cmp_pd(a, b, _CMP_EQ_UQ);
}
+TEST_CONSTEXPR(match_m256d(_mm256_cmp_pd(((__m256d){1.0, 2.0, 3.0, 4.0}), ((__m256d){1.0, 2.0, 3.0, 4.0}), _CMP_EQ_UQ), -1.0, -1.0, -1.0, -1.0));
+TEST_CONSTEXPR(match_m256d(_mm256_cmp_pd(((__m256d){1.0, 2.0, 3.0, 4.0}), ((__m256d){1.0, 2.0, 3.0, 5.0}), _CMP_EQ_UQ), -1.0, -1.0, -1.0, 0.0));
__m256d test_mm256_cmp_pd_nge_us(__m256d a, __m256d b) {
// CHECK-LABEL: test_mm256_cmp_pd_nge_us
// CHECK: fcmp ult <4 x double> %{{.*}}, %{{.*}}
return _mm256_cmp_pd(a, b, _CMP_NGE_US);
}
+TEST_CONSTEXPR(match_m256d(_mm256_cmp_pd(((__m256d){1.0, 2.0, 3.0, 4.0}), ((__m256d){5.0, 6.0, 7.0, 8.0}), _CMP_NGE_US), -1.0, -1.0, -1.0, -1.0));
+TEST_CONSTEXPR(match_m256d(_mm256_cmp_pd(((__m256d){1.0, 2.0, 3.0, 4.0}), ((__m256d){5.0, 6.0, 3.0, 2.0}), _CMP_NGE_US), -1.0, -1.0, 0.0, 0.0));
__m256d test_mm256_cmp_pd_ngt_us(__m256d a, __m256d b) {
// CHECK-LABEL: test_mm256_cmp_pd_ngt_us
// CHECK: fcmp ule <4 x double> %{{.*}}, %{{.*}}
return _mm256_cmp_pd(a, b, _CMP_NGT_US);
}
+TEST_CONSTEXPR(match_m256d(_mm256_cmp_pd(((__m256d){1.0, 2.0, 3.0, 4.0}), ((__m256d){5.0, 6.0, 3.0, 2.0}), _CMP_NGT_US), -1.0, -1.0, -1.0, 0.0));
+TEST_CONSTEXPR(match_m256d(_mm256_cmp_pd(((__m256d){1.0, 2.0, 3.0, 4.0}), ((__m256d){5.0, 6.0, 3.0, 2.0}), _CMP_NGT_US), -1.0, -1.0, -1.0, 0.0));
__m256d test_mm256_cmp_pd_false_oq(__m256d a, __m256d b) {
// CHECK-LABEL: test_mm256_cmp_pd_false_oq
// CHECK: fcmp false <4 x double> %{{.*}}, %{{.*}}
return _mm256_cmp_pd(a, b, _CMP_FALSE_OQ);
}
+TEST_CONSTEXPR(match_m256d(_mm256_cmp_pd(((__m256d){1.0, 2.0, 3.0, 4.0}), ((__m256d){5.0, 6.0, 7.0, 8.0}), _CMP_FALSE_OQ), 0.0, 0.0, 0.0, 0.0));
+TEST_CONSTEXPR(match_m256d(_mm256_cmp_pd(((__m256d){1.0, 2.0, 3.0, 4.0}), ((__m256d){5.0, 6.0, 3.0, 2.0}), _CMP_FALSE_OQ), 0.0, 0.0, 0.0, 0.0));
__m256d test_mm256_cmp_pd_neq_oq(__m256d a, __m256d b) {
// CHECK-LABEL: test_mm256_cmp_pd_neq_oq
// CHECK: fcmp one <4 x double> %{{.*}}, %{{.*}}
return _mm256_cmp_pd(a, b, _CMP_NEQ_OQ);
}
+TEST_CONSTEXPR(match_m256d(_mm256_cmp_pd(((__m256d){1.0, 2.0, 3.0, 4.0}), ((__m256d){5.0, 6.0, 7.0, 8.0}), _CMP_NEQ_OQ), -1.0, -1.0, -1.0, -1.0));
+TEST_CONSTEXPR(match_m256d(_mm256_cmp_pd(((__m256d){1.0, 2.0, 3.0, 4.0}), ((__m256d){1.0, 2.0, 3.0, 5.0}), _CMP_NEQ_OQ), 0.0, 0.0, 0.0, -1.0));
__m256d test_mm256_cmp_pd_ge_os(__m256d a, __m256d b) {
// CHECK-LABEL: test_mm256_cmp_pd_ge_os
// CHECK: fcmp oge <4 x double> %{{.*}}, %{{.*}}
return _mm256_cmp_pd(a, b, _CMP_GE_OS);
}
+TEST_CONSTEXPR(match_m256d(_mm256_cmp_pd(((__m256d){5.0, 6.0, 7.0, 8.0}), ((__m256d){1.0, 2.0, 3.0, 4.0}), _CMP_GE_OS), -1.0, -1.0, -1.0, -1.0));
+TEST_CONSTEXPR(match_m256d(_mm256_cmp_pd(((__m256d){5.0, 6.0, 7.0, 8.0}), ((__m256d){1.0, 6.0, 3.0, 8.0}), _CMP_GE_OS), -1.0, -1.0, -1.0, -1.0));
__m256d test_mm256_cmp_pd_gt_os(__m256d a, __m256d b) {
// CHECK-LABEL: test_mm256_cmp_pd_gt_os
// CHECK: fcmp ogt <4 x double> %{{.*}}, %{{.*}}
return _mm256_cmp_pd(a, b, _CMP_GT_OS);
}
+TEST_CONSTEXPR(match_m256d(_mm256_cmp_pd(((__m256d){5.0, 6.0, 7.0, 8.0}), ((__m256d){1.0, 2.0, 3.0, 4.0}), _CMP_GT_OS), -1.0, -1.0, -1.0, -1.0));
+TEST_CONSTEXPR(match_m256d(_mm256_cmp_pd(((__m256d){5.0, 6.0, 7.0, 8.0}), ((__m256d){1.0, 6.0, 3.0, 8.0}), _CMP_GT_OS), -1.0, 0.0, -1.0, 0.0));
__m256d test_mm256_cmp_pd_true_uq(__m256d a, __m256d b) {
// CHECK-LABEL: test_mm256_cmp_pd_true_uq
// CHECK: fcmp true <4 x double> %{{.*}}, %{{.*}}
return _mm256_cmp_pd(a, b, _CMP_TRUE_UQ);
}
+TEST_CONSTEXPR(match_m256d(_mm256_cmp_pd(((__m256d){1.0, 2.0, 3.0, 4.0}), ((__m256d){5.0, 6.0, 7.0, 8.0}), _CMP_TRUE_UQ), -1.0, -1.0, -1.0, -1.0));
+TEST_CONSTEXPR(match_m256d(_mm256_cmp_pd(((__m256d){1.0, 2.0, 3.0, 4.0}), ((__m256d){1.0, 2.0, 3.0, 4.0}), _CMP_TRUE_UQ), -1.0, -1.0, -1.0, -1.0));
__m256d test_mm256_cmp_pd_eq_os(__m256d a, __m256d b) {
// CHECK-LABEL: test_mm256_cmp_pd_eq_os
// CHECK: fcmp oeq <4 x double> %{{.*}}, %{{.*}}
return _mm256_cmp_pd(a, b, _CMP_EQ_OS);
}
+TEST_CONSTEXPR(match_m256d(_mm256_cmp_pd(((__m256d){1.0, 2.0, 3.0, 4.0}), ((__m256d){1.0, 2.0, 3.0, 4.0}), _CMP_EQ_OS), -1.0, -1.0, -1.0, -1.0));
+TEST_CONSTEXPR(match_m256d(_mm256_cmp_pd(((__m256d){1.0, 2.0, 3.0, 4.0}), ((__m256d){1.0, 2.0, 3.0, 5.0}), _CMP_EQ_OS), -1.0, -1.0, -1.0, 0.0));
__m256d test_mm256_cmp_pd_lt_oq(__m256d a, __m256d b) {
// CHECK-LABEL: test_mm256_cmp_pd_lt_oq
// CHECK: fcmp olt <4 x double> %{{.*}}, %{{.*}}
return _mm256_cmp_pd(a, b, _CMP_LT_OQ);
}
+TEST_CONSTEXPR(match_m256d(_mm256_cmp_pd(((__m256d){1.0, 2.0, 3.0, 4.0}), ((__m256d){5.0, 6.0, 7.0, 8.0}), _CMP_LT_OQ), -1.0, -1.0, -1.0, -1.0));
+TEST_CONSTEXPR(match_m256d(_mm256_cmp_pd(((__m256d){1.0, 2.0, 3.0, 4.0}), ((__m256d){5.0, 6.0, 3.0, 2.0}), _CMP_LT_OQ), -1.0, -1.0, 0.0, 0.0));
__m256d test_mm256_cmp_pd_le_oq(__m256d a, __m256d b) {
// CHECK-LABEL: test_mm256_cmp_pd_le_oq
// CHECK: fcmp ole <4 x double> %{{.*}}, %{{.*}}
return _mm256_cmp_pd(a, b, _CMP_LE_OQ);
}
+TEST_CONSTEXPR(match_m256d(_mm256_cmp_pd(((__m256d){1.0, 2.0, 3.0, 4.0}), ((__m256d){1.0, 2.0, 3.0, 4.0}), _CMP_LE_OQ), -1.0, -1.0, -1.0, -1.0));
+TEST_CONSTEXPR(match_m256d(_mm256_cmp_pd(((__m256d){1.0, 2.0, 3.0, 4.0}), ((__m256d){1.0, 2.0, 3.0, 5.0}), _CMP_LE_OQ), -1.0, -1.0, -1.0, -1.0));
__m256d test_mm256_cmp_pd_unord_s(__m256d a, __m256d b) {
// CHECK-LABEL: test_mm256_cmp_pd_unord_s
// CHECK: fcmp uno <4 x double> %{{.*}}, %{{.*}}
return _mm256_cmp_pd(a, b, _CMP_UNORD_S);
}
+TEST_CONSTEXPR(match_m256d(_mm256_cmp_pd(((__m256d){1.0, 2.0, 3.0, 4.0}), ((__m256d){5.0, 6.0, 7.0, 8.0}), _CMP_UNORD_S), 0.0, 0.0, 0.0, 0.0));
+TEST_CONSTEXPR(match_m256d(_mm256_cmp_pd(((__m256d){1.0, 2.0, 3.0, 4.0}), ((__m256d){1.0, 2.0, 3.0, 4.0}), _CMP_UNORD_S), 0.0, 0.0, 0.0, 0.0));
__m256d test_mm256_cmp_pd_neq_us(__m256d a, __m256d b) {
// CHECK-LABEL: test_mm256_cmp_pd_neq_us
// CHECK: fcmp une <4 x double> %{{.*}}, %{{.*}}
return _mm256_cmp_pd(a, b, _CMP_NEQ_US);
}
+TEST_CONSTEXPR(match_m256d(_mm256_cmp_pd(((__m256d){1.0, 2.0, 3.0, 4.0}), ((__m256d){5.0, 6.0, 7.0, 8.0}), _CMP_NEQ_US), -1.0, -1.0, -1.0, -1.0));
+TEST_CONSTEXPR(match_m256d(_mm256_cmp_pd(((__m256d){1.0, 2.0, 3.0, 4.0}), ((__m256d){1.0, 6.0, 3.0, 8.0}), _CMP_NEQ_US), 0.0, -1.0, 0.0, -1.0));
__m256d test_mm256_cmp_pd_nlt_uq(__m256d a, __m256d b) {
// CHECK-LABEL: test_mm256_cmp_pd_nlt_uq
// CHECK: fcmp uge <4 x double> %{{.*}}, %{{.*}}
return _mm256_cmp_pd(a, b, _CMP_NLT_UQ);
}
+TEST_CONSTEXPR(match_m256d(_mm256_cmp_pd(((__m256d){5.0, 6.0, 7.0, 8.0}), ((__m256d){1.0, 2.0, 3.0, 4.0}), _CMP_NLT_UQ), -1.0, -1.0, -1.0, -1.0));
+TEST_CONSTEXPR(match_m256d(_mm256_cmp_pd(((__m256d){5.0, 6.0, 7.0, 8.0}), ((__m256d){1.0, 6.0, 3.0, 8.0}), _CMP_NLT_UQ), -1.0, -1.0, -1.0, -1.0));
__m256d test_mm256_cmp_pd_nle_uq(__m256d a, __m256d b) {
// CHECK-LABEL: test_mm256_cmp_pd_nle_uq
// CHECK: fcmp ugt <4 x double> %{{.*}}, %{{.*}}
return _mm256_cmp_pd(a, b, _CMP_NLE_UQ);
}
+TEST_CONSTEXPR(match_m256d(_mm256_cmp_pd(((__m256d){5.0, 6.0, 7.0, 8.0}), ((__m256d){1.0, 2.0, 3.0, 4.0}), _CMP_NLE_UQ), -1.0, -1.0, -1.0, -1.0));
+TEST_CONSTEXPR(match_m256d(_mm256_cmp_pd(((__m256d){5.0, 6.0, 7.0, 8.0}), ((__m256d){1.0, 6.0, 3.0, 8.0}), _CMP_NLE_UQ), -1.0, 0.0, -1.0, 0.0));
__m256d test_mm256_cmp_pd_ord_s(__m256d a, __m256d b) {
// CHECK-LABEL: test_mm256_cmp_pd_ord_s
// CHECK: fcmp ord <4 x double> %{{.*}}, %{{.*}}
return _mm256_cmp_pd(a, b, _CMP_ORD_S);
}
+TEST_CONSTEXPR(match_m256d(_mm256_cmp_pd(((__m256d){1.0, 2.0, 3.0, 4.0}), ((__m256d){5.0, 6.0, 7.0, 8.0}), _CMP_ORD_S), -1.0, -1.0, -1.0, -1.0));
__m256d test_mm256_cmp_pd_eq_us(__m256d a, __m256d b) {
// CHECK-LABEL: test_mm256_cmp_pd_eq_us
// CHECK: fcmp ueq <4 x double> %{{.*}}, %{{.*}}
return _mm256_cmp_pd(a, b, _CMP_EQ_US);
}
+TEST_CONSTEXPR(match_m256d(_mm256_cmp_pd(((__m256d){1.0, 2.0, 3.0, 4.0}), ((__m256d){1.0, 2.0, 3.0, 4.0}), _CMP_EQ_US), -1.0, -1.0, -1.0, -1.0));
+TEST_CONSTEXPR(match_m256d(_mm256_cmp_pd(((__m256d){1.0, 2.0, 3.0, 4.0}), ((__m256d){1.0, 6.0, 3.0, 8.0}), _CMP_EQ_US), -1.0, 0.0, -1.0, 0.0));
__m256d test_mm256_cmp_pd_nge_uq(__m256d a, __m256d b) {
// CHECK-LABEL: test_mm256_cmp_pd_nge_uq
// CHECK: fcmp ult <4 x double> %{{.*}}, %{{.*}}
return _mm256_cmp_pd(a, b, _CMP_NGE_UQ);
}
+TEST_CONSTEXPR(match_m256d(_mm256_cmp_pd(((__m256d){1.0, 2.0, 3.0, 4.0}), ((__m256d){5.0, 6.0, 7.0, 8.0}), _CMP_NGE_UQ), -1.0, -1.0, -1.0, -1.0));
+TEST_CONSTEXPR(match_m256d(_mm256_cmp_pd(((__m256d){1.0, 2.0, 3.0, 4.0}), ((__m256d){5.0, 2.0, 7.0, 4.0}), _CMP_NGE_UQ), -1.0, 0.0, -1.0, 0.0));
__m256d test_mm256_cmp_pd_ngt_uq(__m256d a, __m256d b) {
// CHECK-LABEL: test_mm256_cmp_pd_ngt_uq
// CHECK: fcmp ule <4 x double> %{{.*}}, %{{.*}}
return _mm256_cmp_pd(a, b, _CMP_NGT_UQ);
}
+TEST_CONSTEXPR(match_m256d(_mm256_cmp_pd(((__m256d){1.0, 2.0, 3.0, 4.0}), ((__m256d){5.0, 6.0, 7.0, 8.0}), _CMP_NGT_UQ), -1.0, -1.0, -1.0, -1.0));
+TEST_CONSTEXPR(match_m256d(_mm256_cmp_pd(((__m256d){1.0, 2.0, 3.0, 4.0}), ((__m256d){5.0, 2.0, 7.0, 4.0}), _CMP_NGT_UQ), -1.0, -1.0, -1.0, -1.0));
__m256d test_mm256_cmp_pd_false_os(__m256d a, __m256d b) {
// CHECK-LABEL: test_mm256_cmp_pd_false_os
// CHECK: fcmp false <4 x double> %{{.*}}, %{{.*}}
return _mm256_cmp_pd(a, b, _CMP_FALSE_OS);
}
+TEST_CONSTEXPR(match_m256d(_mm256_cmp_pd(((__m256d){1.0, 2.0, 3.0, 4.0}), ((__m256d){5.0, 6.0, 7.0, 8.0}), _CMP_FALSE_OS), 0.0, 0.0, 0.0, 0.0));
+TEST_CONSTEXPR(match_m256d(_mm256_cmp_pd(((__m256d){1.0, 2.0, 3.0, 4.0}), ((__m256d){5.0, 6.0, 3.0, 4.0}), _CMP_FALSE_OS), 0.0, 0.0, 0.0, 0.0));
__m256d test_mm256_cmp_pd_neq_os(__m256d a, __m256d b) {
// CHECK-LABEL: test_mm256_cmp_pd_neq_os
// CHECK: fcmp one <4 x double> %{{.*}}, %{{.*}}
return _mm256_cmp_pd(a, b, _CMP_NEQ_OS);
}
+TEST_CONSTEXPR(match_m256d(_mm256_cmp_pd(((__m256d){1.0, 2.0, 3.0, 4.0}), ((__m256d){5.0, 6.0, 7.0, 8.0}), _CMP_NEQ_OS), -1.0, -1.0, -1.0, -1.0));
+TEST_CONSTEXPR(match_m256d(_mm256_cmp_pd(((__m256d){1.0, 2.0, 3.0, 4.0}), ((__m256d){1.0, 6.0, 3.0, 8.0}), _CMP_NEQ_OS), 0.0, -1.0, 0.0, -1.0));
__m256d test_mm256_cmp_pd_ge_oq(__m256d a, __m256d b) {
// CHECK-LABEL: test_mm256_cmp_pd_ge_oq
// CHECK: fcmp oge <4 x double> %{{.*}}, %{{.*}}
return _mm256_cmp_pd(a, b, _CMP_GE_OQ);
}
+TEST_CONSTEXPR(match_m256d(_mm256_cmp_pd(((__m256d){5.0, 6.0, 7.0, 8.0}), ((__m256d){1.0, 2.0, 3.0, 4.0}), _CMP_GE_OQ), -1.0, -1.0, -1.0, -1.0));
+TEST_CONSTEXPR(match_m256d(_mm256_cmp_pd(((__m256d){5.0, 6.0, 7.0, 8.0}), ((__m256d){5.0, 6.0, 3.0, 8.0}), _CMP_GE_OQ), -1.0, -1.0, -1.0, -1.0));
__m256d test_mm256_cmp_pd_gt_oq(__m256d a, __m256d b) {
// CHECK-LABEL: test_mm256_cmp_pd_gt_oq
// CHECK: fcmp ogt <4 x double> %{{.*}}, %{{.*}}
return _mm256_cmp_pd(a, b, _CMP_GT_OQ);
}
+TEST_CONSTEXPR(match_m256d(_mm256_cmp_pd(((__m256d){5.0, 6.0, 7.0, 8.0}), ((__m256d){1.0, 2.0, 3.0, 4.0}), _CMP_GT_OQ), -1.0, -1.0, -1.0, -1.0));
+TEST_CONSTEXPR(match_m256d(_mm256_cmp_pd(((__m256d){5.0, 6.0, 7.0, 8.0}), ((__m256d){5.0, 6.0, 3.0, 8.0}), _CMP_GT_OQ), 0.0, 0.0, -1.0, 0.0));
__m256d test_mm256_cmp_pd_true_us(__m256d a, __m256d b) {
// CHECK-LABEL: test_mm256_cmp_pd_true_us
// CHECK: fcmp true <4 x double> %{{.*}}, %{{.*}}
return _mm256_cmp_pd(a, b, _CMP_TRUE_US);
}
+TEST_CONSTEXPR(match_m256d(_mm256_cmp_pd(((__m256d){1.0, 2.0, 3.0, 4.0}), ((__m256d){5.0, 6.0, 7.0, 8.0}), _CMP_TRUE_US), -1.0, -1.0, -1.0, -1.0));
+TEST_CONSTEXPR(match_m256d(_mm256_cmp_pd(((__m256d){1.0, 2.0, 3.0, 4.0}), ((__m256d){1.0, 2.0, 3.0, 4.0}), _CMP_TRUE_US), -1.0, -1.0, -1.0, -1.0));
__m256 test_mm256_cmp_ps_eq_oq(__m256 a, __m256 b) {
// CHECK-LABEL: test_mm256_cmp_ps_eq_oq
diff --git a/clang/test/CodeGen/X86/sse2-builtins.c b/clang/test/CodeGen/X86/sse2-builtins.c
index ab0a857b926f3..c71c466cc2e5f 100644
--- a/clang/test/CodeGen/X86/sse2-builtins.c
+++ b/clang/test/CodeGen/X86/sse2-builtins.c
@@ -208,48 +208,132 @@ __m128d test_mm_cmp_pd_eq_oq(__m128d a, __m128d b) {
// CHECK: fcmp oeq <2 x double> %{{.*}}, %{{.*}}
return _mm_cmp_pd(a, b, _CMP_EQ_OQ);
}
+TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+1.0, +2.0}), ((__m128d){+2.0, +3.0}), _CMP_EQ_OQ), 0.0, 0.0));
+TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+2.0, +3.0}), ((__m128d){+2.0, +3.0}), _CMP_EQ_OQ), -1.0, -1.0));
__m128d test_mm_cmp_pd_lt_os(__m128d a, __m128d b) {
// CHECK-LABEL: test_mm_cmp_pd_lt_os
// CHECK: fcmp olt <2 x double> %{{.*}}, %{{.*}}
return _mm_cmp_pd(a, b, _CMP_LT_OS);
}
+TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+1.0, +2.0}), ((__m128d){+2.0, +3.0}), _CMP_LT_OS), -1.0, -1.0));
+TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+1.0, +5.0}), ((__m128d){+2.0, +3.0}), _CMP_LT_OS), -1.0, 0.0));
__m128d test_mm_cmp_pd_le_os(__m128d a, __m128d b) {
// CHECK-LABEL: test_mm_cmp_pd_le_os
// CHECK: fcmp ole <2 x double> %{{.*}}, %{{.*}}
return _mm_cmp_pd(a, b, _CMP_LE_OS);
}
+TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+1.0, +5.0}), ((__m128d){+2.0, +3.0}), _CMP_LE_OS), -1.0, 0.0));
+TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+2.0, +3.0}), ((__m128d){+2.0, +3.0}), _CMP_LE_OS), -1.0, -1.0));
__m128d test_mm_cmp_pd_unord_q(__m128d a, __m128d b) {
// CHECK-LABEL: test_mm_cmp_pd_unord_q
// CHECK: fcmp uno <2 x double> %{{.*}}, %{{.*}}
return _mm_cmp_pd(a, b, _CMP_UNORD_Q);
}
+TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+1.0, +2.0}), ((__m128d){+2.0, +3.0}), _CMP_UNORD_Q), 0.0, 0.0));
__m128d test_mm_cmp_pd_neq_uq(__m128d a, __m128d b) {
// CHECK-LABEL: test_mm_cmp_pd_neq_uq
// CHECK: fcmp une <2 x double> %{{.*}}, %{{.*}}
return _mm_cmp_pd(a, b, _CMP_NEQ_UQ);
}
+TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+1.0, +3.0}), ((__m128d){+2.0, +3.0}), _CMP_NEQ_UQ), -1.0, 0.0));
+TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+1.0, +2.0}), ((__m128d){+2.0, +3.0}), _CMP_NEQ_UQ), -1.0, -1.0));
__m128d test_mm_cmp_pd_nlt_us(__m128d a, __m128d b) {
// CHECK-LABEL: test_mm_cmp_pd_nlt_us
// CHECK: fcmp uge <2 x double> %{{.*}}, %{{.*}}
return _mm_cmp_pd(a, b, _CMP_NLT_US);
}
+TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+3.0, +4.0}), ((__m128d){+2.0, +3.0}), _CMP_NLT_US), -1.0, -1.0));
+TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+1.0, +2.0}), ((__m128d){+2.0, +3.0}), _CMP_NLT_US), 0.0, 0.0));
__m128d test_mm_cmp_pd_nle_us(__m128d a, __m128d b) {
// CHECK-LABEL: test_mm_cmp_pd_nle_us
// CHECK: fcmp ugt <2 x double> %{{.*}}, %{{.*}}
return _mm_cmp_pd(a, b, _CMP_NLE_US);
}
+TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+3.0, +4.0}), ((__m128d){+2.0, +3.0}), _CMP_NLE_US), -1.0, -1.0));
+TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+1.0, +2.0}), ((__m128d){+2.0, +3.0}), _CMP_NLE_US), 0.0, 0.0));
__m128d test_mm_cmp_pd_ord_q(__m128d a, __m128d b) {
// CHECK-LABEL: test_mm_cmp_pd_ord_q
// CHECK: fcmp ord <2 x double> %{{.*}}, %{{.*}}
return _mm_cmp_pd(a, b, _CMP_ORD_Q);
}
+TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+1.0, +2.0}), ((__m128d){+2.0, +3.0}), _CMP_ORD_Q), -1.0, -1.0));
+
+TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+2.0, +3.0}), ((__m128d){+2.0, +3.0}), _CMP_EQ_UQ), -1.0, -1.0));
+TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+1.0, +2.0}), ((__m128d){+2.0, +3.0}), _CMP_EQ_UQ), 0.0, 0.0));
+
+TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+1.0, +1.0}), ((__m128d){+2.0, +3.0}), _CMP_NGE_US), -1.0, -1.0));
+TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+3.0, +4.0}), ((__m128d){+2.0, +3.0}), _CMP_NGE_US), 0.0, 0.0));
+
+TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+1.0, +1.0}), ((__m128d){+2.0, +3.0}), _CMP_NGT_US), -1.0, -1.0));
+TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+3.0, +4.0}), ((__m128d){+2.0, +3.0}), _CMP_NGT_US), 0.0, 0.0));
+
+TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+1.0, +1.0}), ((__m128d){+2.0, +3.0}), _CMP_FALSE_OQ), 0.0, 0.0));
+TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+1.0, +1.0}), ((__m128d){+1.0, +1.0}), _CMP_FALSE_OQ), 0.0, 0.0));
+
+TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+1.0, +3.0}), ((__m128d){+2.0, +3.0}), _CMP_NEQ_OQ), -1.0, 0.0));
+TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+1.0, +2.0}), ((__m128d){+2.0, +3.0}), _CMP_NEQ_OQ), -1.0, -1.0));
+
+TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+3.0, +4.0}), ((__m128d){+2.0, +3.0}), _CMP_GE_OS), -1.0, -1.0));
+TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+1.0, +2.0}), ((__m128d){+2.0, +3.0}), _CMP_GE_OS), 0.0, 0.0));
+
+TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+3.0, +4.0}), ((__m128d){+2.0, +3.0}), _CMP_GT_OS), -1.0, -1.0));
+TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+1.0, +2.0}), ((__m128d){+2.0, +3.0}), _CMP_GT_OS), 0.0, 0.0));
+
+TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+1.0, +1.0}), ((__m128d){+2.0, +3.0}), _CMP_TRUE_UQ), -1.0, -1.0));
+TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+1.0, +1.0}), ((__m128d){+1.0, +1.0}), _CMP_TRUE_UQ), -1.0, -1.0));
+
+TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+2.0, +3.0}), ((__m128d){+2.0, +3.0}), _CMP_EQ_OS), -1.0, -1.0));
+TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+1.0, +2.0}), ((__m128d){+2.0, +3.0}), _CMP_EQ_OS), 0.0, 0.0));
+
+TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+1.0, +1.0}), ((__m128d){+2.0, +3.0}), _CMP_LT_OQ), -1.0, -1.0));
+TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+3.0, +4.0}), ((__m128d){+2.0, +3.0}), _CMP_LT_OQ), 0.0, 0.0));
+
+TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+1.0, +1.0}), ((__m128d){+2.0, +3.0}), _CMP_LE_OQ), -1.0, -1.0));
+TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+3.0, +4.0}), ((__m128d){+2.0, +3.0}), _CMP_LE_OQ), 0.0, 0.0));
+
+TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+1.0, +2.0}), ((__m128d){+2.0, +3.0}), _CMP_UNORD_S), 0.0, 0.0));
+
+TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+1.0, +3.0}), ((__m128d){+2.0, +3.0}), _CMP_NEQ_US), -1.0, 0.0));
+TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+1.0, +2.0}), ((__m128d){+2.0, +3.0}), _CMP_NEQ_US), -1.0, -1.0));
+
+TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+3.0, +4.0}), ((__m128d){+2.0, +3.0}), _CMP_NLT_UQ), -1.0, -1.0));
+TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+1.0, +2.0}), ((__m128d){+2.0, +3.0}), _CMP_NLT_UQ), 0.0, 0.0));
+
+TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+3.0, +4.0}), ((__m128d){+2.0, +3.0}), _CMP_NLE_UQ), -1.0, -1.0));
+TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+1.0, +2.0}), ((__m128d){+2.0, +3.0}), _CMP_NLE_UQ), 0.0, 0.0));
+
+TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+1.0, +2.0}), ((__m128d){+2.0, +3.0}), _CMP_ORD_S), -1.0, -1.0));
+
+TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+2.0, +3.0}), ((__m128d){+2.0, +3.0}), _CMP_EQ_US), -1.0, -1.0));
+TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+1.0, +2.0}), ((__m128d){+2.0, +3.0}), _CMP_EQ_US), 0.0, 0.0));
+
+TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+1.0, +1.0}), ((__m128d){+2.0, +3.0}), _CMP_NGE_UQ), -1.0, -1.0));
+TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+3.0, +4.0}), ((__m128d){+2.0, +3.0}), _CMP_NGE_UQ), 0.0, 0.0));
+
+TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+1.0, +1.0}), ((__m128d){+2.0, +3.0}), _CMP_NGT_UQ), -1.0, -1.0));
+TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+3.0, +4.0}), ((__m128d){+2.0, +3.0}), _CMP_NGT_UQ), 0.0, 0.0));
+
+TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+1.0, +1.0}), ((__m128d){+2.0, +3.0}), _CMP_FALSE_OS), 0.0, 0.0));
+TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+1.0, +1.0}), ((__m128d){+1.0, +1.0}), _CMP_FALSE_OS), 0.0, 0.0));
+
+TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+1.0, +3.0}), ((__m128d){+2.0, +3.0}), _CMP_NEQ_OS), -1.0, 0.0));
+TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+1.0, +2.0}), ((__m128d){+2.0, +3.0}), _CMP_NEQ_OS), -1.0, -1.0));
+
+TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+3.0, +4.0}), ((__m128d){+2.0, +3.0}), _CMP_GE_OQ), -1.0, -1.0));
+TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+1.0, +2.0}), ((__m128d){+2.0, +3.0}), _CMP_GE_OQ), 0.0, 0.0));
+
+TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+3.0, +4.0}), ((__m128d){+2.0, +3.0}), _CMP_GT_OQ), -1.0, -1.0));
+TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+1.0, +2.0}), ((__m128d){+2.0, +3.0}), _CMP_GT_OQ), 0.0, 0.0));
+
+TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+1.0, +1.0}), ((__m128d){+2.0, +3.0}), _CMP_TRUE_US), -1.0, -1.0));
+TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+1.0, +1.0}), ((__m128d){+1.0, +1.0}), _CMP_TRUE_US), -1.0, -1.0));
__m128d test_mm_cmp_sd(__m128d A, __m128d B) {
// CHECK-LABEL: test_mm_cmp_sd
>From 0b3306789692c0224bdea1efa688a5facbe14b11 Mon Sep 17 00:00:00 2001
From: ZakyHermawan <zaky.hermawan9615 at gmail.com>
Date: Sat, 10 Jan 2026 01:04:04 +0700
Subject: [PATCH 02/14] DRY and add const type qualifiers
Signed-off-by: ZakyHermawan <zaky.hermawan9615 at gmail.com>
---
clang/lib/AST/ByteCode/InterpBuiltin.cpp | 113 +++-----------
clang/lib/AST/ExprConstShared.h | 66 +++++++-
clang/lib/AST/ExprConstant.cpp | 190 +++++++----------------
3 files changed, 141 insertions(+), 228 deletions(-)
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index f5d1dbfc3bef5..82f7161a750d1 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -4122,111 +4122,36 @@ static bool interp__builtin_ia32_gfni_mul(InterpState &S, CodePtr OpPC,
static bool interp__builtin_x86_cmp(InterpState &S, CodePtr OpPC,
const InterpFrame *Frame,
const CallExpr *Call, unsigned ID) {
- llvm::APSInt ImmAPS =
+ const auto ImmAPS =
popToAPSInt(S.Stk, *S.getContext().classify(Call->getArg(2)));
- uint32_t imm = ImmAPS.getZExtValue();
+ const uint32_t ImmZExt = ImmAPS.getZExtValue();
const Pointer &VectorB = S.Stk.pop<Pointer>();
const Pointer &VectorA = S.Stk.pop<Pointer>();
Pointer &Dst = S.Stk.peek<Pointer>();
- bool isScalar = (ID == X86::BI__builtin_ia32_cmpss) ||
- (ID == X86::BI__builtin_ia32_cmpsd);
- bool isF64 = (ID == X86::BI__builtin_ia32_cmppd) ||
- (ID == X86::BI__builtin_ia32_cmpsd) ||
- (ID == X86::BI__builtin_ia32_cmppd256);
+ const bool IsScalar = (ID == X86::BI__builtin_ia32_cmpss) ||
+ (ID == X86::BI__builtin_ia32_cmpsd);
+ const bool IsF64 = (ID == X86::BI__builtin_ia32_cmppd) ||
+ (ID == X86::BI__builtin_ia32_cmpsd) ||
+ (ID == X86::BI__builtin_ia32_cmppd256);
- int NumberOfLaneA = VectorA.getNumElems();
- int NumberOfLaneB = VectorB.getNumElems();
- if (NumberOfLaneA != NumberOfLaneB)
+ const auto NumLanes = VectorA.getNumElems();
+ if (NumLanes != VectorB.getNumElems())
return false;
- // Return true if immediate and the comparison result (between operand a and b) are matching
- auto evalCmpImm = [&](uint32_t imm, llvm::APFloatBase::cmpResult cmp) -> bool {
- using CmpResult = llvm::APFloatBase::cmpResult;
-
- bool result = false;
- bool isUnordered = (cmp == llvm::APFloatBase::cmpUnordered);
- bool isEq = (cmp == CmpResult::cmpEqual);
- bool isGt = (cmp == CmpResult::cmpGreaterThan);
- bool isLt = (cmp == CmpResult::cmpLessThan);
-
- switch (imm & 0x1F) {
- case 0x00: /* _CMP_EQ_OQ */
- case 0x10: /* _CMP_EQ_OS */
- result = isEq && !isUnordered;
- break;
- case 0x01: /* _CMP_LT_OS */
- case 0x11: /* _CMP_LT_OQ */
- result = isLt && !isUnordered;
- break;
- case 0x02: /* _CMP_LE_OS */
- case 0x12: /* _CMP_LE_OQ */
- result = !isGt && !isUnordered;
- break;
- case 0x03: /* _CMP_UNORD_Q */
- case 0x13: /* _CMP_UNORD_S */
- result = isUnordered;
- break;
- case 0x04: /* _CMP_NEQ_UQ */
- case 0x14: /* _CMP_NEQ_US */
- result = !isEq || isUnordered;
- break;
- case 0x05: /* _CMP_NLT_US */
- case 0x15: /* _CMP_NLT_UQ */
- result = !isLt || isUnordered;
- break;
- case 0x06: /* _CMP_NLE_US */
- case 0x16: /* _CMP_NLE_UQ */
- result = isGt || isUnordered;
- break;
- case 0x07: /* _CMP_ORD_Q */
- case 0x17: /* _CMP_ORD_S */
- result = !isUnordered;
- break;
- case 0x08: /* _CMP_EQ_UQ */
- case 0x18: /* _CMP_EQ_US */
- result = isEq || isUnordered;
- break;
- case 0x09: /* _CMP_NGE_US */
- case 0x19: /* _CMP_NGE_UQ */
- result = isLt || isUnordered;
- break;
- case 0x0a: /* _CMP_NGT_US */
- case 0x1a: /* _CMP_NGT_UQ */
- result = !isGt || isUnordered;
- break;
- case 0x0b: /* _CMP_FALSE_OQ */
- case 0x1b: /* _CMP_FALSE_OS */
- result = false;
- break;
- case 0x0c: /* _CMP_NEQ_OQ */
- case 0x1c: /* _CMP_NEQ_OS */
- result = !isEq && !isUnordered;
- break;
- case 0x0d: /* _CMP_GE_OS */
- case 0x1d: /* _CMP_GE_OQ */
- result = !isLt && !isUnordered;
- break;
- case 0x0e: /* _CMP_GT_OS */
- case 0x1e: /* _CMP_GT_OQ */
- result = isGt && !isUnordered;
- break;
- case 0x0f: /* _CMP_TRUE_UQ */
- case 0x1f: /* _CMP_TRUE_US */
- result = true;
- break;
- }
- return result;
- };
-
- for (int i = 0; i < NumberOfLaneA; ++i) {
+ for (unsigned int i = 0; i < NumLanes; ++i) {
llvm::APFloat AElement = VectorA.elem<Floating>(i).getAPFloat();
llvm::APFloat BElement = VectorB.elem<Floating>(i).getAPFloat();
- auto CR = AElement.compare(BElement);
- auto ComparisonResult = evalCmpImm(imm, CR);
- llvm::APFloat True(-1.0);
- llvm::APFloat False(0.0);
+ auto CR = AElement.compare(BElement);
+ const FPCompareFlags CF{/*IsUnordered=*/CR == llvm::APFloatBase::cmpUnordered,
+ /*IsEq=*/CR == llvm::APFloatBase::cmpEqual,
+ /*IsGt=*/CR == llvm::APFloatBase::cmpGreaterThan,
+ /*IsLt=*/CR == llvm::APFloatBase::cmpLessThan};
+ const auto ComparisonResult = MatchesPredicate(ImmZExt, CF);
+
+ const llvm::APFloat True(-1.0);
+ const llvm::APFloat False(0.0);
if (ComparisonResult)
Dst.elem<Floating>(i) = Floating(True);
else
diff --git a/clang/lib/AST/ExprConstShared.h b/clang/lib/AST/ExprConstShared.h
index 550b36c232161..531adac376df4 100644
--- a/clang/lib/AST/ExprConstShared.h
+++ b/clang/lib/AST/ExprConstShared.h
@@ -21,14 +21,78 @@ namespace llvm {
class APFloat;
class APSInt;
class APInt;
-}
+} // namespace llvm
+
namespace clang {
class QualType;
class LangOptions;
class ASTContext;
class CharUnits;
class Expr;
+
+struct FPCompareFlags {
+ bool IsUnordered;
+ bool IsEq;
+ bool IsGt;
+ bool IsLt;
+};
+
+// Return true if immediate and the comparison flags are matching
+static bool MatchesPredicate(const uint32_t Imm, const FPCompareFlags &F) {
+ switch (Imm & 0x1F) {
+ case 0x00: /* _CMP_EQ_OQ */
+ case 0x10: /* _CMP_EQ_OS */
+ return F.IsEq && !F.IsUnordered;
+ case 0x01: /* _CMP_LT_OS */
+ case 0x11: /* _CMP_LT_OQ */
+ return F.IsLt && !F.IsUnordered;
+ case 0x02: /* _CMP_LE_OS */
+ case 0x12: /* _CMP_LE_OQ */
+ return !F.IsGt && !F.IsUnordered;
+ case 0x03: /* _CMP_UNORD_Q */
+ case 0x13: /* _CMP_UNORD_S */
+ return F.IsUnordered;
+ case 0x04: /* _CMP_NEQ_UQ */
+ case 0x14: /* _CMP_NEQ_US */
+ return !F.IsEq || F.IsUnordered;
+ case 0x05: /* _CMP_NLT_US */
+ case 0x15: /* _CMP_NLT_UQ */
+ return !F.IsLt || F.IsUnordered;
+ case 0x06: /* _CMP_NLE_US */
+ case 0x16: /* _CMP_NLE_UQ */
+ return F.IsGt || F.IsUnordered;
+ case 0x07: /* _CMP_ORD_Q */
+ case 0x17: /* _CMP_ORD_S */
+ return !F.IsUnordered;
+ case 0x08: /* _CMP_EQ_UQ */
+ case 0x18: /* _CMP_EQ_US */
+ return F.IsEq || F.IsUnordered;
+ case 0x09: /* _CMP_NGE_US */
+ case 0x19: /* _CMP_NGE_UQ */
+ return F.IsLt || F.IsUnordered;
+ case 0x0a: /* _CMP_NGT_US */
+ case 0x1a: /* _CMP_NGT_UQ */
+ return !F.IsGt || F.IsUnordered;
+ case 0x0b: /* _CMP_FALSE_OQ */
+ case 0x1b: /* _CMP_FALSE_OS */
+ return false;
+ case 0x0c: /* _CMP_NEQ_OQ */
+ case 0x1c: /* _CMP_NEQ_OS */
+ return !F.IsEq && !F.IsUnordered;
+ case 0x0d: /* _CMP_GE_OS */
+ case 0x1d: /* _CMP_GE_OQ */
+ return !F.IsLt && !F.IsUnordered;
+ case 0x0e: /* _CMP_GT_OS */
+ case 0x1e: /* _CMP_GT_OQ */
+ return F.IsGt && !F.IsUnordered;
+ case 0x0f: /* _CMP_TRUE_UQ */
+ case 0x1f: /* _CMP_TRUE_US */
+ return true;
+ }
+ return false;
+};
} // namespace clang
+
using namespace clang;
/// Values returned by __builtin_classify_type, chosen to match the values
/// produced by GCC's builtin.
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 8fecf39ec57ac..1962f7e64a3b4 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -12233,139 +12233,6 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
auto BuiltinOp = E->getBuiltinCallee();
switch (BuiltinOp) {
-
- case X86::BI__builtin_ia32_cmppd:
- case X86::BI__builtin_ia32_cmppd256: {
- using CmpResult = llvm::APFloatBase::cmpResult;
-
- const Expr *A = E->getArg(0);
- const Expr *B = E->getArg(1);
- const Expr *Imm = E->getArg(2);
-
- APValue AV, BV;
- APSInt ImmVal;
- if (!EvaluateVector(A, AV, Info) || !EvaluateVector(B, BV, Info))
- return false;
- if (!EvaluateInteger(Imm, ImmVal, Info))
- return false;
- unsigned Lanes = AV.getVectorLength();
- if (Lanes == 0 || BV.getVectorLength() != Lanes)
- return false;
-
- QualType RetTy = E->getType();
- const auto *VT = RetTy->getAs<VectorType>();
- if (!VT)
- return false;
- bool IsF64 =
- VT->getElementType()->isSpecificBuiltinType(BuiltinType::Double);
- const bool IsScalar = (BuiltinOp == X86::BI__builtin_ia32_cmpss) ||
- (BuiltinOp == X86::BI__builtin_ia32_cmpsd);
- const uint32_t imm = ImmVal.getZExtValue();
-
- // Return true if immediate and the comparison result (between operand a and b) are matching
- auto evalCmpImm = [](uint32_t imm,
- llvm::APFloatBase::cmpResult cmp) -> bool {
- bool isUnordered = (cmp == llvm::APFloatBase::cmpUnordered);
- bool isEq = (cmp == CmpResult::cmpEqual);
- bool isGt = (cmp == CmpResult::cmpGreaterThan);
- bool isLt = (cmp == CmpResult::cmpLessThan);
- bool result = false;
-
- switch (imm & 0x1F) {
- case 0x00: /* _CMP_EQ_OQ */
- case 0x10: /* _CMP_EQ_OS */
- result = isEq && !isUnordered;
- break;
- case 0x01: /* _CMP_LT_OS */
- case 0x11: /* _CMP_LT_OQ */
- result = isLt && !isUnordered;
- break;
- case 0x02: /* _CMP_LE_OS */
- case 0x12: /* _CMP_LE_OQ */
- result = !isGt && !isUnordered;
- break;
- case 0x03: /* _CMP_UNORD_Q */
- case 0x13: /* _CMP_UNORD_S */
- result = isUnordered;
- break;
- case 0x04: /* _CMP_NEQ_UQ */
- case 0x14: /* _CMP_NEQ_US */
- result = !isEq || isUnordered;
- break;
- case 0x05: /* _CMP_NLT_US */
- case 0x15: /* _CMP_NLT_UQ */
- result = !isLt || isUnordered;
- break;
- case 0x06: /* _CMP_NLE_US */
- case 0x16: /* _CMP_NLE_UQ */
- result = isGt || isUnordered;
- break;
- case 0x07: /* _CMP_ORD_Q */
- case 0x17: /* _CMP_ORD_S */
- result = !isUnordered;
- break;
- case 0x08: /* _CMP_EQ_UQ */
- case 0x18: /* _CMP_EQ_US */
- result = isEq || isUnordered;
- break;
- case 0x09: /* _CMP_NGE_US */
- case 0x19: /* _CMP_NGE_UQ */
- result = isLt || isUnordered;
- break;
- case 0x0a: /* _CMP_NGT_US */
- case 0x1a: /* _CMP_NGT_UQ */
- result = !isGt || isUnordered;
- break;
- case 0x0b: /* _CMP_FALSE_OQ */
- case 0x1b: /* _CMP_FALSE_OS */
- result = false;
- break;
- case 0x0c: /* _CMP_NEQ_OQ */
- case 0x1c: /* _CMP_NEQ_OS */
- result = !isEq && !isUnordered;
- break;
- case 0x0d: /* _CMP_GE_OS */
- case 0x1d: /* _CMP_GE_OQ */
- result = !isLt && !isUnordered;
- break;
- case 0x0e: /* _CMP_GT_OS */
- case 0x1e: /* _CMP_GT_OQ */
- result = isGt && !isUnordered;
- break;
- case 0x0f: /* _CMP_TRUE_UQ */
- case 0x1f: /* _CMP_TRUE_US */
- result = true;
- break;
- }
- return result;
- };
-
- std::vector<APValue> results;
- for (unsigned i = 0; i < Lanes; ++i) {
- auto AElem = AV.getVectorElt(i);
- auto BElem = BV.getVectorElt(i);
-
- llvm::APFloat A0 = AElem.getFloat();
- llvm::APFloat B0 = BElem.getFloat();
-
- // harus suport multiple operands
-
- llvm::APFloat::cmpResult CR = A0.compare(B0);
- auto ComparisonResult = evalCmpImm(imm, CR);
-
- llvm::APFloat True(-1.0);
- llvm::APFloat False(0.0);
-
- if (ComparisonResult)
- results.push_back(APValue(True));
- else
- results.push_back(APValue(False));
- }
-
- // construct result
- APValue retVal(results.data(), results.size());
- return Success(retVal, E);
- }
default:
return false;
case Builtin::BI__builtin_elementwise_popcount:
@@ -14550,6 +14417,63 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
return false;
return Success(R, E);
}
+ case X86::BI__builtin_ia32_cmppd:
+ case X86::BI__builtin_ia32_cmppd256: {
+ using CmpResult = llvm::APFloatBase::cmpResult;
+
+ const Expr *A = E->getArg(0);
+ const Expr *B = E->getArg(1);
+ const Expr *Imm = E->getArg(2);
+
+ APValue AV, BV;
+ APSInt ImmVal;
+ if (!EvaluateVector(A, AV, Info) || !EvaluateVector(B, BV, Info))
+ return false;
+ if (!EvaluateInteger(Imm, ImmVal, Info))
+ return false;
+
+ const auto NumLanes = AV.getVectorLength();
+ if (NumLanes == 0 || BV.getVectorLength() != NumLanes)
+ return false;
+
+ const auto RetTy = E->getType();
+ const auto *VT = RetTy->getAs<VectorType>();
+ if (!VT)
+ return false;
+
+ const bool IsF64 =
+ VT->getElementType()->isSpecificBuiltinType(BuiltinType::Double);
+ const bool IsScalar = (BuiltinOp == X86::BI__builtin_ia32_cmpss) ||
+ (BuiltinOp == X86::BI__builtin_ia32_cmpsd);
+ const uint32_t ImmZExt = ImmVal.getZExtValue();
+
+ std::vector<APValue> results;
+ for (unsigned i = 0; i < NumLanes; ++i) {
+ const auto AElem = AV.getVectorElt(i);
+ const auto BElem = BV.getVectorElt(i);
+
+ const auto A0 = AElem.getFloat();
+ const auto B0 = BElem.getFloat();
+
+ const auto CR = A0.compare(B0);
+ const FPCompareFlags CF{/*IsUnordered=*/CR == llvm::APFloatBase::cmpUnordered,
+ /*IsEq=*/CR == CmpResult::cmpEqual,
+ /*IsGt=*/CR == CmpResult::cmpGreaterThan,
+ /*IsLt=*/CR == CmpResult::cmpLessThan};
+
+ const auto ComparisonResult = MatchesPredicate(ImmZExt, CF);
+ const llvm::APFloat True(-1.0);
+ const llvm::APFloat False(0.0);
+
+ if (ComparisonResult)
+ results.push_back(APValue(True));
+ else
+ results.push_back(APValue(False));
+ }
+
+ const APValue retVal(results.data(), results.size());
+ return Success(retVal, E);
+ }
}
}
>From e15d835d7cc8386c31be622c424d3200d1c1d8e5 Mon Sep 17 00:00:00 2001
From: ZakyHermawan <zaky.hermawan9615 at gmail.com>
Date: Sat, 10 Jan 2026 04:39:15 +0700
Subject: [PATCH 03/14] Remove unnecessary struct
Signed-off-by: ZakyHermawan <zaky.hermawan9615 at gmail.com>
---
clang/lib/AST/ByteCode/InterpBuiltin.cpp | 8 +---
clang/lib/AST/ExprConstShared.h | 54 +++++++++++++++++-------
clang/lib/AST/ExprConstant.cpp | 8 +---
3 files changed, 43 insertions(+), 27 deletions(-)
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index 82f7161a750d1..0f78070fa8f8d 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -4143,12 +4143,8 @@ static bool interp__builtin_x86_cmp(InterpState &S, CodePtr OpPC,
llvm::APFloat AElement = VectorA.elem<Floating>(i).getAPFloat();
llvm::APFloat BElement = VectorB.elem<Floating>(i).getAPFloat();
- auto CR = AElement.compare(BElement);
- const FPCompareFlags CF{/*IsUnordered=*/CR == llvm::APFloatBase::cmpUnordered,
- /*IsEq=*/CR == llvm::APFloatBase::cmpEqual,
- /*IsGt=*/CR == llvm::APFloatBase::cmpGreaterThan,
- /*IsLt=*/CR == llvm::APFloatBase::cmpLessThan};
- const auto ComparisonResult = MatchesPredicate(ImmZExt, CF);
+ auto CompareResult = AElement.compare(BElement);
+ const auto ComparisonResult = MatchesPredicate(ImmZExt, CompareResult);
const llvm::APFloat True(-1.0);
const llvm::APFloat False(0.0);
diff --git a/clang/lib/AST/ExprConstShared.h b/clang/lib/AST/ExprConstShared.h
index 531adac376df4..c716af625a6b4 100644
--- a/clang/lib/AST/ExprConstShared.h
+++ b/clang/lib/AST/ExprConstShared.h
@@ -15,6 +15,7 @@
#define LLVM_CLANG_LIB_AST_EXPRCONSTSHARED_H
#include "clang/Basic/TypeTraits.h"
+#include "llvm/ADT/APFloat.h"
#include <cstdint>
namespace llvm {
@@ -38,56 +39,79 @@ struct FPCompareFlags {
};
// Return true if immediate and the comparison flags are matching
-static bool MatchesPredicate(const uint32_t Imm, const FPCompareFlags &F) {
+static bool MatchesPredicate(const uint32_t Imm, const llvm::APFloatBase::cmpResult CompareResult) {
+ using CmpResult = llvm::APFloatBase::cmpResult;
+
+ bool IsUnordered = (CompareResult == llvm::APFloatBase::cmpUnordered);
+ bool IsEq = (CompareResult == CmpResult::cmpEqual);
+ bool IsGt = (CompareResult == CmpResult::cmpGreaterThan);
+ bool IsLt = (CompareResult == CmpResult::cmpLessThan);
+
switch (Imm & 0x1F) {
case 0x00: /* _CMP_EQ_OQ */
case 0x10: /* _CMP_EQ_OS */
- return F.IsEq && !F.IsUnordered;
+ return IsEq && !IsUnordered;
+ break;
case 0x01: /* _CMP_LT_OS */
case 0x11: /* _CMP_LT_OQ */
- return F.IsLt && !F.IsUnordered;
+ return IsLt && !IsUnordered;
+ break;
case 0x02: /* _CMP_LE_OS */
case 0x12: /* _CMP_LE_OQ */
- return !F.IsGt && !F.IsUnordered;
+ return !IsGt && !IsUnordered;
+ break;
case 0x03: /* _CMP_UNORD_Q */
case 0x13: /* _CMP_UNORD_S */
- return F.IsUnordered;
+ return IsUnordered;
+ break;
case 0x04: /* _CMP_NEQ_UQ */
case 0x14: /* _CMP_NEQ_US */
- return !F.IsEq || F.IsUnordered;
+ return !IsEq || IsUnordered;
+ break;
case 0x05: /* _CMP_NLT_US */
case 0x15: /* _CMP_NLT_UQ */
- return !F.IsLt || F.IsUnordered;
+ return !IsLt || IsUnordered;
+ break;
case 0x06: /* _CMP_NLE_US */
case 0x16: /* _CMP_NLE_UQ */
- return F.IsGt || F.IsUnordered;
+ return IsGt || IsUnordered;
+ break;
case 0x07: /* _CMP_ORD_Q */
case 0x17: /* _CMP_ORD_S */
- return !F.IsUnordered;
+ return !IsUnordered;
+ break;
case 0x08: /* _CMP_EQ_UQ */
case 0x18: /* _CMP_EQ_US */
- return F.IsEq || F.IsUnordered;
+ return IsEq || IsUnordered;
+ break;
case 0x09: /* _CMP_NGE_US */
case 0x19: /* _CMP_NGE_UQ */
- return F.IsLt || F.IsUnordered;
+ return IsLt || IsUnordered;
+ break;
case 0x0a: /* _CMP_NGT_US */
case 0x1a: /* _CMP_NGT_UQ */
- return !F.IsGt || F.IsUnordered;
+ return !IsGt || IsUnordered;
+ break;
case 0x0b: /* _CMP_FALSE_OQ */
case 0x1b: /* _CMP_FALSE_OS */
return false;
+ break;
case 0x0c: /* _CMP_NEQ_OQ */
case 0x1c: /* _CMP_NEQ_OS */
- return !F.IsEq && !F.IsUnordered;
+ return !IsEq && !IsUnordered;
+ break;
case 0x0d: /* _CMP_GE_OS */
case 0x1d: /* _CMP_GE_OQ */
- return !F.IsLt && !F.IsUnordered;
+ return !IsLt && !IsUnordered;
+ break;
case 0x0e: /* _CMP_GT_OS */
case 0x1e: /* _CMP_GT_OQ */
- return F.IsGt && !F.IsUnordered;
+ return IsGt && !IsUnordered;
+ break;
case 0x0f: /* _CMP_TRUE_UQ */
case 0x1f: /* _CMP_TRUE_US */
return true;
+ break;
}
return false;
};
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 1962f7e64a3b4..ead72f9fc014f 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -14455,13 +14455,9 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
const auto A0 = AElem.getFloat();
const auto B0 = BElem.getFloat();
- const auto CR = A0.compare(B0);
- const FPCompareFlags CF{/*IsUnordered=*/CR == llvm::APFloatBase::cmpUnordered,
- /*IsEq=*/CR == CmpResult::cmpEqual,
- /*IsGt=*/CR == CmpResult::cmpGreaterThan,
- /*IsLt=*/CR == CmpResult::cmpLessThan};
+ const auto CompareResult = A0.compare(B0);
- const auto ComparisonResult = MatchesPredicate(ImmZExt, CF);
+ const auto ComparisonResult = MatchesPredicate(ImmZExt, CompareResult);
const llvm::APFloat True(-1.0);
const llvm::APFloat False(0.0);
>From fe2308f5173a4ad63188db1445d72bdd9845b4b7 Mon Sep 17 00:00:00 2001
From: ZakyHermawan <zaky.hermawan9615 at gmail.com>
Date: Fri, 20 Feb 2026 08:06:52 +0700
Subject: [PATCH 04/14] Add _mm_cmp_ss, _mm_cmp_sd, _mm_cmp_ps, and
_mm256_cmp_ps
Signed-off-by: ZakyHermawan <zaky.hermawan9615 at gmail.com>
---
clang/lib/AST/ByteCode/InterpBuiltin.cpp | 32 ++-
clang/lib/AST/ExprConstant.cpp | 46 ++--
clang/test/CodeGen/X86/avx-builtins.c | 267 ++++++++++++++++++-----
3 files changed, 266 insertions(+), 79 deletions(-)
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index 0f78070fa8f8d..277806b99012b 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -4129,26 +4129,36 @@ static bool interp__builtin_x86_cmp(InterpState &S, CodePtr OpPC,
const Pointer &VectorA = S.Stk.pop<Pointer>();
Pointer &Dst = S.Stk.peek<Pointer>();
- const bool IsScalar = (ID == X86::BI__builtin_ia32_cmpss) ||
- (ID == X86::BI__builtin_ia32_cmpsd);
- const bool IsF64 = (ID == X86::BI__builtin_ia32_cmppd) ||
- (ID == X86::BI__builtin_ia32_cmpsd) ||
- (ID == X86::BI__builtin_ia32_cmppd256);
+ const bool IsScalar = ID == X86::BI__builtin_ia32_cmpss ||
+ ID == X86::BI__builtin_ia32_cmpsd;
const auto NumLanes = VectorA.getNumElems();
if (NumLanes != VectorB.getNumElems())
return false;
for (unsigned int i = 0; i < NumLanes; ++i) {
+ // Handle cmpss/cmpsd
+ if (IsScalar && i > 0) {
+ // Copy the upper 3 packed elements from a to the upper elements of dst
+ Dst.elem<Floating>(i) = VectorA.elem<Floating>(i);
+ continue;
+ }
+
llvm::APFloat AElement = VectorA.elem<Floating>(i).getAPFloat();
llvm::APFloat BElement = VectorB.elem<Floating>(i).getAPFloat();
auto CompareResult = AElement.compare(BElement);
- const auto ComparisonResult = MatchesPredicate(ImmZExt, CompareResult);
+ const bool Matches = MatchesPredicate(ImmZExt, CompareResult);
+
+ // Create bit patterns for comparison results:
+ // True = all bits set (0xFFFFFFFF for float, 0xFFFFFFFFFFFFFFFF for double)
+ // False = all bits zero
+ const llvm::fltSemantics &Sem = AElement.getSemantics();
+ const unsigned BitWidth = llvm::APFloat::getSizeInBits(Sem);
+ const llvm::APFloat True(Sem, llvm::APInt::getAllOnes(BitWidth));
+ const llvm::APFloat False(Sem, llvm::APInt(BitWidth, 0));
- const llvm::APFloat True(-1.0);
- const llvm::APFloat False(0.0);
- if (ComparisonResult)
+ if (Matches)
Dst.elem<Floating>(i) = Floating(True);
else
Dst.elem<Floating>(i) = Floating(False);
@@ -5918,8 +5928,12 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
return llvm::maximum(A, B);
});
+ case X86::BI__builtin_ia32_cmpps:
case X86::BI__builtin_ia32_cmppd:
+ case X86::BI__builtin_ia32_cmpps256:
case X86::BI__builtin_ia32_cmppd256:
+ case X86::BI__builtin_ia32_cmpss:
+ case X86::BI__builtin_ia32_cmpsd:
return interp__builtin_x86_cmp(S, OpPC, Frame, Call, BuiltinID);
default:
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index ead72f9fc014f..b3617289e5691 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -14417,10 +14417,12 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
return false;
return Success(R, E);
}
+ case X86::BI__builtin_ia32_cmpps:
case X86::BI__builtin_ia32_cmppd:
- case X86::BI__builtin_ia32_cmppd256: {
- using CmpResult = llvm::APFloatBase::cmpResult;
-
+ case X86::BI__builtin_ia32_cmpps256:
+ case X86::BI__builtin_ia32_cmppd256:
+ case X86::BI__builtin_ia32_cmpss:
+ case X86::BI__builtin_ia32_cmpsd: {
const Expr *A = E->getArg(0);
const Expr *B = E->getArg(1);
const Expr *Imm = E->getArg(2);
@@ -14441,14 +14443,20 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
if (!VT)
return false;
- const bool IsF64 =
- VT->getElementType()->isSpecificBuiltinType(BuiltinType::Double);
+ const uint32_t ImmZExt = ImmVal.getZExtValue();
const bool IsScalar = (BuiltinOp == X86::BI__builtin_ia32_cmpss) ||
(BuiltinOp == X86::BI__builtin_ia32_cmpsd);
- const uint32_t ImmZExt = ImmVal.getZExtValue();
- std::vector<APValue> results;
+ SmallVector<APValue, 8> ResultElements;
+ ResultElements.reserve(NumLanes);
for (unsigned i = 0; i < NumLanes; ++i) {
+ // Handle cmpss/cmpsd
+ if (IsScalar && i > 0) {
+ // Copy the upper 3 packed elements from a to the upper elements of dst
+ ResultElements.push_back(AV.getVectorElt(i));
+ continue;
+ }
+
const auto AElem = AV.getVectorElt(i);
const auto BElem = BV.getVectorElt(i);
@@ -14456,19 +14464,23 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
const auto B0 = BElem.getFloat();
const auto CompareResult = A0.compare(B0);
-
- const auto ComparisonResult = MatchesPredicate(ImmZExt, CompareResult);
- const llvm::APFloat True(-1.0);
- const llvm::APFloat False(0.0);
-
- if (ComparisonResult)
- results.push_back(APValue(True));
+ const bool Matches = MatchesPredicate(ImmZExt, CompareResult);
+
+ // Create bit patterns for comparison results:
+ // True = all bits set (0xFFFFFFFF for float, 0xFFFFFFFFFFFFFFFF for double)
+ // False = all bits zero
+ const llvm::fltSemantics &Sem = A0.getSemantics();
+ const unsigned BitWidth = llvm::APFloat::getSizeInBits(Sem);
+ const llvm::APFloat True(Sem, llvm::APInt::getAllOnes(BitWidth));
+ const llvm::APFloat False(Sem, llvm::APInt(BitWidth, 0));
+
+ if (Matches)
+ ResultElements.push_back(APValue(True));
else
- results.push_back(APValue(False));
+ ResultElements.push_back(APValue(False));
}
- const APValue retVal(results.data(), results.size());
- return Success(retVal, E);
+ return Success(APValue(ResultElements.data(), ResultElements.size()), E);
}
}
}
diff --git a/clang/test/CodeGen/X86/avx-builtins.c b/clang/test/CodeGen/X86/avx-builtins.c
index a0c3a6620af36..75e960d672746 100644
--- a/clang/test/CodeGen/X86/avx-builtins.c
+++ b/clang/test/CodeGen/X86/avx-builtins.c
@@ -24,6 +24,9 @@
#include <immintrin.h>
#include "builtin_test_helpers.h"
+#define ALL_ONES_F __builtin_bit_cast(float, 0xFFFFFFFFU)
+#define ALL_ONES_D __builtin_bit_cast(double, 0xFFFFFFFFFFFFFFFFULL)
+
// NOTE: This should match the tests in llvm/test/CodeGen/X86/avx-intrinsics-fast-isel.ll
__m256d test_mm256_add_pd(__m256d A, __m256d B) {
@@ -261,24 +264,24 @@ __m256d test_mm256_cmp_pd_eq_oq(__m256d a, __m256d b) {
// CHECK: fcmp oeq <4 x double> %{{.*}}, %{{.*}}
return _mm256_cmp_pd(a, b, _CMP_EQ_OQ);
}
-TEST_CONSTEXPR(match_m256d(_mm256_cmp_pd(((__m256d){1.0, 2.0, 3.0, 4.0}), ((__m256d){1.0, 2.0, 3.0, 4.0}), _CMP_EQ_OQ), -1.0, -1.0, -1.0, -1.0));
-TEST_CONSTEXPR(match_m256d(_mm256_cmp_pd(((__m256d){1.0, 2.0, 3.0, 4.0}), (((__m256d){1.0, 2.0, 3.0, 5.0})), _CMP_EQ_OQ), -1.0, -1.0, -1.0, 0.0));
+TEST_CONSTEXPR(match_m256d(_mm256_cmp_pd(((__m256d){1.0, 2.0, 3.0, 4.0}), ((__m256d){1.0, 2.0, 3.0, 4.0}), _CMP_EQ_OQ), ALL_ONES_D, ALL_ONES_D, ALL_ONES_D, ALL_ONES_D));
+TEST_CONSTEXPR(match_m256d(_mm256_cmp_pd(((__m256d){1.0, 2.0, 3.0, 4.0}), (((__m256d){1.0, 2.0, 3.0, 5.0})), _CMP_EQ_OQ), ALL_ONES_D, ALL_ONES_D, ALL_ONES_D, 0.0));
__m256d test_mm256_cmp_pd_lt_os(__m256d a, __m256d b) {
// CHECK-LABEL: test_mm256_cmp_pd_lt_os
// CHECK: fcmp olt <4 x double> %{{.*}}, %{{.*}}
return _mm256_cmp_pd(a, b, _CMP_LT_OS);
}
-TEST_CONSTEXPR(match_m256d(_mm256_cmp_pd(((__m256d){1.0, 2.0, 3.0, 4.0}), ((__m256d){2.0, 3.0, 4.0, 5.0}), _CMP_LT_OS), -1.0, -1.0, -1.0, -1.0));
-TEST_CONSTEXPR(match_m256d(_mm256_cmp_pd(((__m256d){1.0, 2.0, 3.0, 4.0}), ((__m256d){2.0, 3.0, 4.0, 3.0}), _CMP_LT_OS), -1.0, -1.0, -1.0, 0.0));
+TEST_CONSTEXPR(match_m256d(_mm256_cmp_pd(((__m256d){1.0, 2.0, 3.0, 4.0}), ((__m256d){2.0, 3.0, 4.0, 5.0}), _CMP_LT_OS), ALL_ONES_D, ALL_ONES_D, ALL_ONES_D, ALL_ONES_D));
+TEST_CONSTEXPR(match_m256d(_mm256_cmp_pd(((__m256d){1.0, 2.0, 3.0, 4.0}), ((__m256d){2.0, 3.0, 4.0, 3.0}), _CMP_LT_OS), ALL_ONES_D, ALL_ONES_D, ALL_ONES_D, 0.0));
__m256d test_mm256_cmp_pd_le_os(__m256d a, __m256d b) {
// CHECK-LABEL: test_mm256_cmp_pd_le_os
// CHECK: fcmp ole <4 x double> %{{.*}}, %{{.*}}
return _mm256_cmp_pd(a, b, _CMP_LE_OS);
}
-TEST_CONSTEXPR(match_m256d(_mm256_cmp_pd(((__m256d){1.0, 2.0, 3.0, 4.0}), ((__m256d){1.0, 2.0, 3.0, 4.0}), _CMP_LE_OS), -1.0, -1.0, -1.0, -1.0));
-TEST_CONSTEXPR(match_m256d(_mm256_cmp_pd(((__m256d){1.0, 2.0, 3.0, 4.0}), ((__m256d){1.0, 2.0, 3.0, 3.0}), _CMP_LE_OS), -1.0, -1.0, -1.0, 0.0));
+TEST_CONSTEXPR(match_m256d(_mm256_cmp_pd(((__m256d){1.0, 2.0, 3.0, 4.0}), ((__m256d){1.0, 2.0, 3.0, 4.0}), _CMP_LE_OS), ALL_ONES_D, ALL_ONES_D, ALL_ONES_D, ALL_ONES_D));
+TEST_CONSTEXPR(match_m256d(_mm256_cmp_pd(((__m256d){1.0, 2.0, 3.0, 4.0}), ((__m256d){1.0, 2.0, 3.0, 3.0}), _CMP_LE_OS), ALL_ONES_D, ALL_ONES_D, ALL_ONES_D, 0.0));
__m256d test_mm256_cmp_pd_unord_q(__m256d a, __m256d b) {
// CHECK-LABEL: test_mm256_cmp_pd_unord_q
@@ -287,13 +290,17 @@ __m256d test_mm256_cmp_pd_unord_q(__m256d a, __m256d b) {
}
TEST_CONSTEXPR(match_m256d(_mm256_cmp_pd(((__m256d){1.0, 2.0, 3.0, 4.0}), ((__m256d){5.0, 6.0, 7.0, 8.0}), _CMP_UNORD_Q), 0.0, 0.0, 0.0, 0.0));
TEST_CONSTEXPR(match_m256d(_mm256_cmp_pd(((__m256d){1.0, 2.0, 3.0, 4.0}), ((__m256d){1.0, 2.0, 3.0, 4.0}), _CMP_UNORD_Q), 0.0, 0.0, 0.0, 0.0));
+TEST_CONSTEXPR(match_m256d(_mm256_cmp_pd(((__m256d){__builtin_nan(""), 2.0, 3.0, __builtin_nan("")}),
+ ((__m256d){1.0, 2.0, 3.0, 4.0}),
+ _CMP_UNORD_Q),
+ ALL_ONES_D, 0.0, 0.0, ALL_ONES_D));
__m256d test_mm256_cmp_pd_neq_uq(__m256d a, __m256d b) {
// CHECK-LABEL: test_mm256_cmp_pd_neq_uq
// CHECK: fcmp une <4 x double> %{{.*}}, %{{.*}}
return _mm256_cmp_pd(a, b, _CMP_NEQ_UQ);
}
-TEST_CONSTEXPR(match_m256d(_mm256_cmp_pd(((__m256d){1.0, 2.0, 3.0, 4.0}), ((__m256d){5.0, 6.0, 7.0, 8.0}), _CMP_NEQ_UQ), -1.0, -1.0, -1.0, -1.0));
+TEST_CONSTEXPR(match_m256d(_mm256_cmp_pd(((__m256d){1.0, 2.0, 3.0, 4.0}), ((__m256d){5.0, 6.0, 7.0, 8.0}), _CMP_NEQ_UQ), ALL_ONES_D, ALL_ONES_D, ALL_ONES_D, ALL_ONES_D));
TEST_CONSTEXPR(match_m256d(_mm256_cmp_pd(((__m256d){1.0, 2.0, 3.0, 4.0}), ((__m256d){1.0, 2.0, 3.0, 4.0}), _CMP_NEQ_UQ), 0.0, 0.0, 0.0, 0.0));
__m256d test_mm256_cmp_pd_nlt_us(__m256d a, __m256d b) {
@@ -301,47 +308,51 @@ __m256d test_mm256_cmp_pd_nlt_us(__m256d a, __m256d b) {
// CHECK: fcmp uge <4 x double> %{{.*}}, %{{.*}}
return _mm256_cmp_pd(a, b, _CMP_NLT_US);
}
-TEST_CONSTEXPR(match_m256d(_mm256_cmp_pd(((__m256d){1.0, 2.0, 3.0, 4.0}), ((__m256d){0.0, 1.0, 2.0, 3.0}), _CMP_NLT_US), -1.0, -1.0, -1.0, -1.0));
-TEST_CONSTEXPR(match_m256d(_mm256_cmp_pd(((__m256d){1.0, 2.0, 3.0, 4.0}), ((__m256d){0.0, 1.0, 2.0, 5.0}), _CMP_NLT_US), -1.0, -1.0, -1.0, 0.0));
+TEST_CONSTEXPR(match_m256d(_mm256_cmp_pd(((__m256d){1.0, 2.0, 3.0, 4.0}), ((__m256d){0.0, 1.0, 2.0, 3.0}), _CMP_NLT_US), ALL_ONES_D, ALL_ONES_D, ALL_ONES_D, ALL_ONES_D));
+TEST_CONSTEXPR(match_m256d(_mm256_cmp_pd(((__m256d){1.0, 2.0, 3.0, 4.0}), ((__m256d){0.0, 1.0, 2.0, 5.0}), _CMP_NLT_US), ALL_ONES_D, ALL_ONES_D, ALL_ONES_D, 0.0));
__m256d test_mm256_cmp_pd_nle_us(__m256d a, __m256d b) {
// CHECK-LABEL: test_mm256_cmp_pd_nle_us
// CHECK: fcmp ugt <4 x double> %{{.*}}, %{{.*}}
return _mm256_cmp_pd(a, b, _CMP_NLE_US);
}
-TEST_CONSTEXPR(match_m256d(_mm256_cmp_pd(((__m256d){1.0, 2.0, 3.0, 4.0}), ((__m256d){0.0, 1.0, 2.0, 3.0}), _CMP_NLE_US), -1.0, -1.0, -1.0, -1.0));
-TEST_CONSTEXPR(match_m256d(_mm256_cmp_pd(((__m256d){1.0, 2.0, 3.0, 4.0}), ((__m256d){0.0, 1.0, 2.0, 4.0}), _CMP_NLE_US), -1.0, -1.0, -1.0, 0.0));
+TEST_CONSTEXPR(match_m256d(_mm256_cmp_pd(((__m256d){1.0, 2.0, 3.0, 4.0}), ((__m256d){0.0, 1.0, 2.0, 3.0}), _CMP_NLE_US), ALL_ONES_D, ALL_ONES_D, ALL_ONES_D, ALL_ONES_D));
+TEST_CONSTEXPR(match_m256d(_mm256_cmp_pd(((__m256d){1.0, 2.0, 3.0, 4.0}), ((__m256d){0.0, 1.0, 2.0, 4.0}), _CMP_NLE_US), ALL_ONES_D, ALL_ONES_D, ALL_ONES_D, 0.0));
__m256d test_mm256_cmp_pd_ord_q(__m256d a, __m256d b) {
// CHECK-LABEL: test_mm256_cmp_pd_ord_q
// CHECK: fcmp ord <4 x double> %{{.*}}, %{{.*}}
return _mm256_cmp_pd(a, b, _CMP_ORD_Q);
}
-TEST_CONSTEXPR(match_m256d(_mm256_cmp_pd(((__m256d){1.0, 2.0, 3.0, 4.0}), ((__m256d){5.0, 6.0, 7.0, 8.0}), _CMP_ORD_Q), -1.0, -1.0, -1.0, -1.0));
+TEST_CONSTEXPR(match_m256d(_mm256_cmp_pd(((__m256d){1.0, 2.0, 3.0, 4.0}), ((__m256d){5.0, 6.0, 7.0, 8.0}), _CMP_ORD_Q), ALL_ONES_D, ALL_ONES_D, ALL_ONES_D, ALL_ONES_D));
+TEST_CONSTEXPR(match_m256d(_mm256_cmp_pd(((__m256d){__builtin_nan(""), 2.0, 3.0, __builtin_nan("")}),
+ ((__m256d){1.0, 2.0, 3.0, 4.0}),
+ _CMP_ORD_Q),
+ 0.0, ALL_ONES_D, ALL_ONES_D, 0.0));
__m256d test_mm256_cmp_pd_eq_uq(__m256d a, __m256d b) {
// CHECK-LABEL: test_mm256_cmp_pd_eq_uq
// CHECK: fcmp ueq <4 x double> %{{.*}}, %{{.*}}
return _mm256_cmp_pd(a, b, _CMP_EQ_UQ);
}
-TEST_CONSTEXPR(match_m256d(_mm256_cmp_pd(((__m256d){1.0, 2.0, 3.0, 4.0}), ((__m256d){1.0, 2.0, 3.0, 4.0}), _CMP_EQ_UQ), -1.0, -1.0, -1.0, -1.0));
-TEST_CONSTEXPR(match_m256d(_mm256_cmp_pd(((__m256d){1.0, 2.0, 3.0, 4.0}), ((__m256d){1.0, 2.0, 3.0, 5.0}), _CMP_EQ_UQ), -1.0, -1.0, -1.0, 0.0));
+TEST_CONSTEXPR(match_m256d(_mm256_cmp_pd(((__m256d){1.0, 2.0, 3.0, 4.0}), ((__m256d){1.0, 2.0, 3.0, 4.0}), _CMP_EQ_UQ), ALL_ONES_D, ALL_ONES_D, ALL_ONES_D, ALL_ONES_D));
+TEST_CONSTEXPR(match_m256d(_mm256_cmp_pd(((__m256d){1.0, 2.0, 3.0, 4.0}), ((__m256d){1.0, 2.0, 3.0, 5.0}), _CMP_EQ_UQ), ALL_ONES_D, ALL_ONES_D, ALL_ONES_D, 0.0));
__m256d test_mm256_cmp_pd_nge_us(__m256d a, __m256d b) {
// CHECK-LABEL: test_mm256_cmp_pd_nge_us
// CHECK: fcmp ult <4 x double> %{{.*}}, %{{.*}}
return _mm256_cmp_pd(a, b, _CMP_NGE_US);
}
-TEST_CONSTEXPR(match_m256d(_mm256_cmp_pd(((__m256d){1.0, 2.0, 3.0, 4.0}), ((__m256d){5.0, 6.0, 7.0, 8.0}), _CMP_NGE_US), -1.0, -1.0, -1.0, -1.0));
-TEST_CONSTEXPR(match_m256d(_mm256_cmp_pd(((__m256d){1.0, 2.0, 3.0, 4.0}), ((__m256d){5.0, 6.0, 3.0, 2.0}), _CMP_NGE_US), -1.0, -1.0, 0.0, 0.0));
+TEST_CONSTEXPR(match_m256d(_mm256_cmp_pd(((__m256d){1.0, 2.0, 3.0, 4.0}), ((__m256d){5.0, 6.0, 7.0, 8.0}), _CMP_NGE_US), ALL_ONES_D, ALL_ONES_D, ALL_ONES_D, ALL_ONES_D));
+TEST_CONSTEXPR(match_m256d(_mm256_cmp_pd(((__m256d){1.0, 2.0, 3.0, 4.0}), ((__m256d){5.0, 6.0, 3.0, 2.0}), _CMP_NGE_US), ALL_ONES_D, ALL_ONES_D, 0.0, 0.0));
__m256d test_mm256_cmp_pd_ngt_us(__m256d a, __m256d b) {
// CHECK-LABEL: test_mm256_cmp_pd_ngt_us
// CHECK: fcmp ule <4 x double> %{{.*}}, %{{.*}}
return _mm256_cmp_pd(a, b, _CMP_NGT_US);
}
-TEST_CONSTEXPR(match_m256d(_mm256_cmp_pd(((__m256d){1.0, 2.0, 3.0, 4.0}), ((__m256d){5.0, 6.0, 3.0, 2.0}), _CMP_NGT_US), -1.0, -1.0, -1.0, 0.0));
-TEST_CONSTEXPR(match_m256d(_mm256_cmp_pd(((__m256d){1.0, 2.0, 3.0, 4.0}), ((__m256d){5.0, 6.0, 3.0, 2.0}), _CMP_NGT_US), -1.0, -1.0, -1.0, 0.0));
+TEST_CONSTEXPR(match_m256d(_mm256_cmp_pd(((__m256d){1.0, 2.0, 3.0, 4.0}), ((__m256d){5.0, 6.0, 3.0, 2.0}), _CMP_NGT_US), ALL_ONES_D, ALL_ONES_D, ALL_ONES_D, 0.0));
+TEST_CONSTEXPR(match_m256d(_mm256_cmp_pd(((__m256d){1.0, 2.0, 3.0, 4.0}), ((__m256d){5.0, 6.0, 3.0, 2.0}), _CMP_NGT_US), ALL_ONES_D, ALL_ONES_D, ALL_ONES_D, 0.0));
__m256d test_mm256_cmp_pd_false_oq(__m256d a, __m256d b) {
// CHECK-LABEL: test_mm256_cmp_pd_false_oq
@@ -356,56 +367,56 @@ __m256d test_mm256_cmp_pd_neq_oq(__m256d a, __m256d b) {
// CHECK: fcmp one <4 x double> %{{.*}}, %{{.*}}
return _mm256_cmp_pd(a, b, _CMP_NEQ_OQ);
}
-TEST_CONSTEXPR(match_m256d(_mm256_cmp_pd(((__m256d){1.0, 2.0, 3.0, 4.0}), ((__m256d){5.0, 6.0, 7.0, 8.0}), _CMP_NEQ_OQ), -1.0, -1.0, -1.0, -1.0));
-TEST_CONSTEXPR(match_m256d(_mm256_cmp_pd(((__m256d){1.0, 2.0, 3.0, 4.0}), ((__m256d){1.0, 2.0, 3.0, 5.0}), _CMP_NEQ_OQ), 0.0, 0.0, 0.0, -1.0));
+TEST_CONSTEXPR(match_m256d(_mm256_cmp_pd(((__m256d){1.0, 2.0, 3.0, 4.0}), ((__m256d){5.0, 6.0, 7.0, 8.0}), _CMP_NEQ_OQ), ALL_ONES_D, ALL_ONES_D, ALL_ONES_D, ALL_ONES_D));
+TEST_CONSTEXPR(match_m256d(_mm256_cmp_pd(((__m256d){1.0, 2.0, 3.0, 4.0}), ((__m256d){1.0, 2.0, 3.0, 5.0}), _CMP_NEQ_OQ), 0.0, 0.0, 0.0, ALL_ONES_D));
__m256d test_mm256_cmp_pd_ge_os(__m256d a, __m256d b) {
// CHECK-LABEL: test_mm256_cmp_pd_ge_os
// CHECK: fcmp oge <4 x double> %{{.*}}, %{{.*}}
return _mm256_cmp_pd(a, b, _CMP_GE_OS);
}
-TEST_CONSTEXPR(match_m256d(_mm256_cmp_pd(((__m256d){5.0, 6.0, 7.0, 8.0}), ((__m256d){1.0, 2.0, 3.0, 4.0}), _CMP_GE_OS), -1.0, -1.0, -1.0, -1.0));
-TEST_CONSTEXPR(match_m256d(_mm256_cmp_pd(((__m256d){5.0, 6.0, 7.0, 8.0}), ((__m256d){1.0, 6.0, 3.0, 8.0}), _CMP_GE_OS), -1.0, -1.0, -1.0, -1.0));
+TEST_CONSTEXPR(match_m256d(_mm256_cmp_pd(((__m256d){5.0, 6.0, 7.0, 8.0}), ((__m256d){1.0, 2.0, 3.0, 4.0}), _CMP_GE_OS), ALL_ONES_D, ALL_ONES_D, ALL_ONES_D, ALL_ONES_D));
+TEST_CONSTEXPR(match_m256d(_mm256_cmp_pd(((__m256d){5.0, 6.0, 7.0, 8.0}), ((__m256d){1.0, 6.0, 3.0, 8.0}), _CMP_GE_OS), ALL_ONES_D, ALL_ONES_D, ALL_ONES_D, ALL_ONES_D));
__m256d test_mm256_cmp_pd_gt_os(__m256d a, __m256d b) {
// CHECK-LABEL: test_mm256_cmp_pd_gt_os
// CHECK: fcmp ogt <4 x double> %{{.*}}, %{{.*}}
return _mm256_cmp_pd(a, b, _CMP_GT_OS);
}
-TEST_CONSTEXPR(match_m256d(_mm256_cmp_pd(((__m256d){5.0, 6.0, 7.0, 8.0}), ((__m256d){1.0, 2.0, 3.0, 4.0}), _CMP_GT_OS), -1.0, -1.0, -1.0, -1.0));
-TEST_CONSTEXPR(match_m256d(_mm256_cmp_pd(((__m256d){5.0, 6.0, 7.0, 8.0}), ((__m256d){1.0, 6.0, 3.0, 8.0}), _CMP_GT_OS), -1.0, 0.0, -1.0, 0.0));
+TEST_CONSTEXPR(match_m256d(_mm256_cmp_pd(((__m256d){5.0, 6.0, 7.0, 8.0}), ((__m256d){1.0, 2.0, 3.0, 4.0}), _CMP_GT_OS), ALL_ONES_D, ALL_ONES_D, ALL_ONES_D, ALL_ONES_D));
+TEST_CONSTEXPR(match_m256d(_mm256_cmp_pd(((__m256d){5.0, 6.0, 7.0, 8.0}), ((__m256d){1.0, 6.0, 3.0, 8.0}), _CMP_GT_OS), ALL_ONES_D, 0.0, ALL_ONES_D, 0.0));
__m256d test_mm256_cmp_pd_true_uq(__m256d a, __m256d b) {
// CHECK-LABEL: test_mm256_cmp_pd_true_uq
// CHECK: fcmp true <4 x double> %{{.*}}, %{{.*}}
return _mm256_cmp_pd(a, b, _CMP_TRUE_UQ);
}
-TEST_CONSTEXPR(match_m256d(_mm256_cmp_pd(((__m256d){1.0, 2.0, 3.0, 4.0}), ((__m256d){5.0, 6.0, 7.0, 8.0}), _CMP_TRUE_UQ), -1.0, -1.0, -1.0, -1.0));
-TEST_CONSTEXPR(match_m256d(_mm256_cmp_pd(((__m256d){1.0, 2.0, 3.0, 4.0}), ((__m256d){1.0, 2.0, 3.0, 4.0}), _CMP_TRUE_UQ), -1.0, -1.0, -1.0, -1.0));
+TEST_CONSTEXPR(match_m256d(_mm256_cmp_pd(((__m256d){1.0, 2.0, 3.0, 4.0}), ((__m256d){5.0, 6.0, 7.0, 8.0}), _CMP_TRUE_UQ), ALL_ONES_D, ALL_ONES_D, ALL_ONES_D, ALL_ONES_D));
+TEST_CONSTEXPR(match_m256d(_mm256_cmp_pd(((__m256d){1.0, 2.0, 3.0, 4.0}), ((__m256d){1.0, 2.0, 3.0, 4.0}), _CMP_TRUE_UQ), ALL_ONES_D, ALL_ONES_D, ALL_ONES_D, ALL_ONES_D));
__m256d test_mm256_cmp_pd_eq_os(__m256d a, __m256d b) {
// CHECK-LABEL: test_mm256_cmp_pd_eq_os
// CHECK: fcmp oeq <4 x double> %{{.*}}, %{{.*}}
return _mm256_cmp_pd(a, b, _CMP_EQ_OS);
}
-TEST_CONSTEXPR(match_m256d(_mm256_cmp_pd(((__m256d){1.0, 2.0, 3.0, 4.0}), ((__m256d){1.0, 2.0, 3.0, 4.0}), _CMP_EQ_OS), -1.0, -1.0, -1.0, -1.0));
-TEST_CONSTEXPR(match_m256d(_mm256_cmp_pd(((__m256d){1.0, 2.0, 3.0, 4.0}), ((__m256d){1.0, 2.0, 3.0, 5.0}), _CMP_EQ_OS), -1.0, -1.0, -1.0, 0.0));
+TEST_CONSTEXPR(match_m256d(_mm256_cmp_pd(((__m256d){1.0, 2.0, 3.0, 4.0}), ((__m256d){1.0, 2.0, 3.0, 4.0}), _CMP_EQ_OS), ALL_ONES_D, ALL_ONES_D, ALL_ONES_D, ALL_ONES_D));
+TEST_CONSTEXPR(match_m256d(_mm256_cmp_pd(((__m256d){1.0, 2.0, 3.0, 4.0}), ((__m256d){1.0, 2.0, 3.0, 5.0}), _CMP_EQ_OS), ALL_ONES_D, ALL_ONES_D, ALL_ONES_D, 0.0));
__m256d test_mm256_cmp_pd_lt_oq(__m256d a, __m256d b) {
// CHECK-LABEL: test_mm256_cmp_pd_lt_oq
// CHECK: fcmp olt <4 x double> %{{.*}}, %{{.*}}
return _mm256_cmp_pd(a, b, _CMP_LT_OQ);
}
-TEST_CONSTEXPR(match_m256d(_mm256_cmp_pd(((__m256d){1.0, 2.0, 3.0, 4.0}), ((__m256d){5.0, 6.0, 7.0, 8.0}), _CMP_LT_OQ), -1.0, -1.0, -1.0, -1.0));
-TEST_CONSTEXPR(match_m256d(_mm256_cmp_pd(((__m256d){1.0, 2.0, 3.0, 4.0}), ((__m256d){5.0, 6.0, 3.0, 2.0}), _CMP_LT_OQ), -1.0, -1.0, 0.0, 0.0));
+TEST_CONSTEXPR(match_m256d(_mm256_cmp_pd(((__m256d){1.0, 2.0, 3.0, 4.0}), ((__m256d){5.0, 6.0, 7.0, 8.0}), _CMP_LT_OQ), ALL_ONES_D, ALL_ONES_D, ALL_ONES_D, ALL_ONES_D));
+TEST_CONSTEXPR(match_m256d(_mm256_cmp_pd(((__m256d){1.0, 2.0, 3.0, 4.0}), ((__m256d){5.0, 6.0, 3.0, 2.0}), _CMP_LT_OQ), ALL_ONES_D, ALL_ONES_D, 0.0, 0.0));
__m256d test_mm256_cmp_pd_le_oq(__m256d a, __m256d b) {
// CHECK-LABEL: test_mm256_cmp_pd_le_oq
// CHECK: fcmp ole <4 x double> %{{.*}}, %{{.*}}
return _mm256_cmp_pd(a, b, _CMP_LE_OQ);
}
-TEST_CONSTEXPR(match_m256d(_mm256_cmp_pd(((__m256d){1.0, 2.0, 3.0, 4.0}), ((__m256d){1.0, 2.0, 3.0, 4.0}), _CMP_LE_OQ), -1.0, -1.0, -1.0, -1.0));
-TEST_CONSTEXPR(match_m256d(_mm256_cmp_pd(((__m256d){1.0, 2.0, 3.0, 4.0}), ((__m256d){1.0, 2.0, 3.0, 5.0}), _CMP_LE_OQ), -1.0, -1.0, -1.0, -1.0));
+TEST_CONSTEXPR(match_m256d(_mm256_cmp_pd(((__m256d){1.0, 2.0, 3.0, 4.0}), ((__m256d){1.0, 2.0, 3.0, 4.0}), _CMP_LE_OQ), ALL_ONES_D, ALL_ONES_D, ALL_ONES_D, ALL_ONES_D));
+TEST_CONSTEXPR(match_m256d(_mm256_cmp_pd(((__m256d){1.0, 2.0, 3.0, 4.0}), ((__m256d){1.0, 2.0, 3.0, 5.0}), _CMP_LE_OQ), ALL_ONES_D, ALL_ONES_D, ALL_ONES_D, ALL_ONES_D));
__m256d test_mm256_cmp_pd_unord_s(__m256d a, __m256d b) {
// CHECK-LABEL: test_mm256_cmp_pd_unord_s
@@ -414,61 +425,69 @@ __m256d test_mm256_cmp_pd_unord_s(__m256d a, __m256d b) {
}
TEST_CONSTEXPR(match_m256d(_mm256_cmp_pd(((__m256d){1.0, 2.0, 3.0, 4.0}), ((__m256d){5.0, 6.0, 7.0, 8.0}), _CMP_UNORD_S), 0.0, 0.0, 0.0, 0.0));
TEST_CONSTEXPR(match_m256d(_mm256_cmp_pd(((__m256d){1.0, 2.0, 3.0, 4.0}), ((__m256d){1.0, 2.0, 3.0, 4.0}), _CMP_UNORD_S), 0.0, 0.0, 0.0, 0.0));
+TEST_CONSTEXPR(match_m256d(_mm256_cmp_pd(((__m256d){__builtin_nan(""), 2.0, 3.0, __builtin_nan("")}),
+ ((__m256d){1.0, 2.0, 3.0, 4.0}),
+ _CMP_UNORD_S),
+ ALL_ONES_D, 0.0, 0.0, ALL_ONES_D));
__m256d test_mm256_cmp_pd_neq_us(__m256d a, __m256d b) {
// CHECK-LABEL: test_mm256_cmp_pd_neq_us
// CHECK: fcmp une <4 x double> %{{.*}}, %{{.*}}
return _mm256_cmp_pd(a, b, _CMP_NEQ_US);
}
-TEST_CONSTEXPR(match_m256d(_mm256_cmp_pd(((__m256d){1.0, 2.0, 3.0, 4.0}), ((__m256d){5.0, 6.0, 7.0, 8.0}), _CMP_NEQ_US), -1.0, -1.0, -1.0, -1.0));
-TEST_CONSTEXPR(match_m256d(_mm256_cmp_pd(((__m256d){1.0, 2.0, 3.0, 4.0}), ((__m256d){1.0, 6.0, 3.0, 8.0}), _CMP_NEQ_US), 0.0, -1.0, 0.0, -1.0));
+TEST_CONSTEXPR(match_m256d(_mm256_cmp_pd(((__m256d){1.0, 2.0, 3.0, 4.0}), ((__m256d){5.0, 6.0, 7.0, 8.0}), _CMP_NEQ_US), ALL_ONES_D, ALL_ONES_D, ALL_ONES_D, ALL_ONES_D));
+TEST_CONSTEXPR(match_m256d(_mm256_cmp_pd(((__m256d){1.0, 2.0, 3.0, 4.0}), ((__m256d){1.0, 6.0, 3.0, 8.0}), _CMP_NEQ_US), 0.0, ALL_ONES_D, 0.0, ALL_ONES_D));
__m256d test_mm256_cmp_pd_nlt_uq(__m256d a, __m256d b) {
// CHECK-LABEL: test_mm256_cmp_pd_nlt_uq
// CHECK: fcmp uge <4 x double> %{{.*}}, %{{.*}}
return _mm256_cmp_pd(a, b, _CMP_NLT_UQ);
}
-TEST_CONSTEXPR(match_m256d(_mm256_cmp_pd(((__m256d){5.0, 6.0, 7.0, 8.0}), ((__m256d){1.0, 2.0, 3.0, 4.0}), _CMP_NLT_UQ), -1.0, -1.0, -1.0, -1.0));
-TEST_CONSTEXPR(match_m256d(_mm256_cmp_pd(((__m256d){5.0, 6.0, 7.0, 8.0}), ((__m256d){1.0, 6.0, 3.0, 8.0}), _CMP_NLT_UQ), -1.0, -1.0, -1.0, -1.0));
+TEST_CONSTEXPR(match_m256d(_mm256_cmp_pd(((__m256d){5.0, 6.0, 7.0, 8.0}), ((__m256d){1.0, 2.0, 3.0, 4.0}), _CMP_NLT_UQ), ALL_ONES_D, ALL_ONES_D, ALL_ONES_D, ALL_ONES_D));
+TEST_CONSTEXPR(match_m256d(_mm256_cmp_pd(((__m256d){5.0, 6.0, 7.0, 8.0}), ((__m256d){1.0, 6.0, 3.0, 8.0}), _CMP_NLT_UQ), ALL_ONES_D, ALL_ONES_D, ALL_ONES_D, ALL_ONES_D));
__m256d test_mm256_cmp_pd_nle_uq(__m256d a, __m256d b) {
// CHECK-LABEL: test_mm256_cmp_pd_nle_uq
// CHECK: fcmp ugt <4 x double> %{{.*}}, %{{.*}}
return _mm256_cmp_pd(a, b, _CMP_NLE_UQ);
}
-TEST_CONSTEXPR(match_m256d(_mm256_cmp_pd(((__m256d){5.0, 6.0, 7.0, 8.0}), ((__m256d){1.0, 2.0, 3.0, 4.0}), _CMP_NLE_UQ), -1.0, -1.0, -1.0, -1.0));
-TEST_CONSTEXPR(match_m256d(_mm256_cmp_pd(((__m256d){5.0, 6.0, 7.0, 8.0}), ((__m256d){1.0, 6.0, 3.0, 8.0}), _CMP_NLE_UQ), -1.0, 0.0, -1.0, 0.0));
+TEST_CONSTEXPR(match_m256d(_mm256_cmp_pd(((__m256d){5.0, 6.0, 7.0, 8.0}), ((__m256d){1.0, 2.0, 3.0, 4.0}), _CMP_NLE_UQ), ALL_ONES_D, ALL_ONES_D, ALL_ONES_D, ALL_ONES_D));
+TEST_CONSTEXPR(match_m256d(_mm256_cmp_pd(((__m256d){5.0, 6.0, 7.0, 8.0}), ((__m256d){1.0, 6.0, 3.0, 8.0}), _CMP_NLE_UQ), ALL_ONES_D, 0.0, ALL_ONES_D, 0.0));
__m256d test_mm256_cmp_pd_ord_s(__m256d a, __m256d b) {
// CHECK-LABEL: test_mm256_cmp_pd_ord_s
// CHECK: fcmp ord <4 x double> %{{.*}}, %{{.*}}
return _mm256_cmp_pd(a, b, _CMP_ORD_S);
}
-TEST_CONSTEXPR(match_m256d(_mm256_cmp_pd(((__m256d){1.0, 2.0, 3.0, 4.0}), ((__m256d){5.0, 6.0, 7.0, 8.0}), _CMP_ORD_S), -1.0, -1.0, -1.0, -1.0));
+TEST_CONSTEXPR(match_m256d(_mm256_cmp_pd(((__m256d){1.0, 2.0, 3.0, 4.0}), ((__m256d){5.0, 6.0, 7.0, 8.0}), _CMP_ORD_S), ALL_ONES_D, ALL_ONES_D, ALL_ONES_D, ALL_ONES_D));
+TEST_CONSTEXPR(match_m256d(_mm256_cmp_pd(((__m256d){__builtin_nan(""), 2.0, 3.0, __builtin_nan("")}),
+ ((__m256d){1.0, 2.0, 3.0, 4.0}),
+ _CMP_ORD_S),
+ 0.0, ALL_ONES_D, ALL_ONES_D, 0.0));
__m256d test_mm256_cmp_pd_eq_us(__m256d a, __m256d b) {
// CHECK-LABEL: test_mm256_cmp_pd_eq_us
// CHECK: fcmp ueq <4 x double> %{{.*}}, %{{.*}}
return _mm256_cmp_pd(a, b, _CMP_EQ_US);
}
-TEST_CONSTEXPR(match_m256d(_mm256_cmp_pd(((__m256d){1.0, 2.0, 3.0, 4.0}), ((__m256d){1.0, 2.0, 3.0, 4.0}), _CMP_EQ_US), -1.0, -1.0, -1.0, -1.0));
-TEST_CONSTEXPR(match_m256d(_mm256_cmp_pd(((__m256d){1.0, 2.0, 3.0, 4.0}), ((__m256d){1.0, 6.0, 3.0, 8.0}), _CMP_EQ_US), -1.0, 0.0, -1.0, 0.0));
+TEST_CONSTEXPR(match_m256d(_mm256_cmp_pd(((__m256d){1.0, 2.0, 3.0, 4.0}), ((__m256d){1.0, 2.0, 3.0, 4.0}), _CMP_EQ_US), ALL_ONES_D, ALL_ONES_D, ALL_ONES_D, ALL_ONES_D));
+TEST_CONSTEXPR(match_m256d(_mm256_cmp_pd(((__m256d){1.0, 2.0, 3.0, 4.0}), ((__m256d){1.0, 6.0, 3.0, 8.0}), _CMP_EQ_US), ALL_ONES_D, 0.0, ALL_ONES_D, 0.0));
__m256d test_mm256_cmp_pd_nge_uq(__m256d a, __m256d b) {
// CHECK-LABEL: test_mm256_cmp_pd_nge_uq
// CHECK: fcmp ult <4 x double> %{{.*}}, %{{.*}}
return _mm256_cmp_pd(a, b, _CMP_NGE_UQ);
}
-TEST_CONSTEXPR(match_m256d(_mm256_cmp_pd(((__m256d){1.0, 2.0, 3.0, 4.0}), ((__m256d){5.0, 6.0, 7.0, 8.0}), _CMP_NGE_UQ), -1.0, -1.0, -1.0, -1.0));
-TEST_CONSTEXPR(match_m256d(_mm256_cmp_pd(((__m256d){1.0, 2.0, 3.0, 4.0}), ((__m256d){5.0, 2.0, 7.0, 4.0}), _CMP_NGE_UQ), -1.0, 0.0, -1.0, 0.0));
+TEST_CONSTEXPR(match_m256d(_mm256_cmp_pd(((__m256d){1.0, 2.0, 3.0, 4.0}), ((__m256d){5.0, 6.0, 7.0, 8.0}), _CMP_NGE_UQ), ALL_ONES_D, ALL_ONES_D, ALL_ONES_D, ALL_ONES_D));
+TEST_CONSTEXPR(match_m256d(_mm256_cmp_pd(((__m256d){1.0, 2.0, 3.0, 4.0}), ((__m256d){5.0, 2.0, 7.0, 4.0}), _CMP_NGE_UQ), ALL_ONES_D, 0.0, ALL_ONES_D, 0.0));
__m256d test_mm256_cmp_pd_ngt_uq(__m256d a, __m256d b) {
// CHECK-LABEL: test_mm256_cmp_pd_ngt_uq
// CHECK: fcmp ule <4 x double> %{{.*}}, %{{.*}}
return _mm256_cmp_pd(a, b, _CMP_NGT_UQ);
}
-TEST_CONSTEXPR(match_m256d(_mm256_cmp_pd(((__m256d){1.0, 2.0, 3.0, 4.0}), ((__m256d){5.0, 6.0, 7.0, 8.0}), _CMP_NGT_UQ), -1.0, -1.0, -1.0, -1.0));
-TEST_CONSTEXPR(match_m256d(_mm256_cmp_pd(((__m256d){1.0, 2.0, 3.0, 4.0}), ((__m256d){5.0, 2.0, 7.0, 4.0}), _CMP_NGT_UQ), -1.0, -1.0, -1.0, -1.0));
+TEST_CONSTEXPR(match_m256d(_mm256_cmp_pd(((__m256d){1.0, 2.0, 3.0, 4.0}), ((__m256d){5.0, 6.0, 7.0, 8.0}), _CMP_NGT_UQ), ALL_ONES_D, ALL_ONES_D, ALL_ONES_D, ALL_ONES_D));
+TEST_CONSTEXPR(match_m256d(_mm256_cmp_pd(((__m256d){1.0, 2.0, 3.0, 4.0}), ((__m256d){5.0, 2.0, 7.0, 4.0}), _CMP_NGT_UQ), ALL_ONES_D, ALL_ONES_D, ALL_ONES_D, ALL_ONES_D));
__m256d test_mm256_cmp_pd_false_os(__m256d a, __m256d b) {
// CHECK-LABEL: test_mm256_cmp_pd_false_os
@@ -483,224 +502,272 @@ __m256d test_mm256_cmp_pd_neq_os(__m256d a, __m256d b) {
// CHECK: fcmp one <4 x double> %{{.*}}, %{{.*}}
return _mm256_cmp_pd(a, b, _CMP_NEQ_OS);
}
-TEST_CONSTEXPR(match_m256d(_mm256_cmp_pd(((__m256d){1.0, 2.0, 3.0, 4.0}), ((__m256d){5.0, 6.0, 7.0, 8.0}), _CMP_NEQ_OS), -1.0, -1.0, -1.0, -1.0));
-TEST_CONSTEXPR(match_m256d(_mm256_cmp_pd(((__m256d){1.0, 2.0, 3.0, 4.0}), ((__m256d){1.0, 6.0, 3.0, 8.0}), _CMP_NEQ_OS), 0.0, -1.0, 0.0, -1.0));
+TEST_CONSTEXPR(match_m256d(_mm256_cmp_pd(((__m256d){1.0, 2.0, 3.0, 4.0}), ((__m256d){5.0, 6.0, 7.0, 8.0}), _CMP_NEQ_OS), ALL_ONES_D, ALL_ONES_D, ALL_ONES_D, ALL_ONES_D));
+TEST_CONSTEXPR(match_m256d(_mm256_cmp_pd(((__m256d){1.0, 2.0, 3.0, 4.0}), ((__m256d){1.0, 6.0, 3.0, 8.0}), _CMP_NEQ_OS), 0.0, ALL_ONES_D, 0.0, ALL_ONES_D));
__m256d test_mm256_cmp_pd_ge_oq(__m256d a, __m256d b) {
// CHECK-LABEL: test_mm256_cmp_pd_ge_oq
// CHECK: fcmp oge <4 x double> %{{.*}}, %{{.*}}
return _mm256_cmp_pd(a, b, _CMP_GE_OQ);
}
-TEST_CONSTEXPR(match_m256d(_mm256_cmp_pd(((__m256d){5.0, 6.0, 7.0, 8.0}), ((__m256d){1.0, 2.0, 3.0, 4.0}), _CMP_GE_OQ), -1.0, -1.0, -1.0, -1.0));
-TEST_CONSTEXPR(match_m256d(_mm256_cmp_pd(((__m256d){5.0, 6.0, 7.0, 8.0}), ((__m256d){5.0, 6.0, 3.0, 8.0}), _CMP_GE_OQ), -1.0, -1.0, -1.0, -1.0));
+TEST_CONSTEXPR(match_m256d(_mm256_cmp_pd(((__m256d){5.0, 6.0, 7.0, 8.0}), ((__m256d){1.0, 2.0, 3.0, 4.0}), _CMP_GE_OQ), ALL_ONES_D, ALL_ONES_D, ALL_ONES_D, ALL_ONES_D));
+TEST_CONSTEXPR(match_m256d(_mm256_cmp_pd(((__m256d){5.0, 6.0, 7.0, 8.0}), ((__m256d){5.0, 6.0, 3.0, 8.0}), _CMP_GE_OQ), ALL_ONES_D, ALL_ONES_D, ALL_ONES_D, ALL_ONES_D));
__m256d test_mm256_cmp_pd_gt_oq(__m256d a, __m256d b) {
// CHECK-LABEL: test_mm256_cmp_pd_gt_oq
// CHECK: fcmp ogt <4 x double> %{{.*}}, %{{.*}}
return _mm256_cmp_pd(a, b, _CMP_GT_OQ);
}
-TEST_CONSTEXPR(match_m256d(_mm256_cmp_pd(((__m256d){5.0, 6.0, 7.0, 8.0}), ((__m256d){1.0, 2.0, 3.0, 4.0}), _CMP_GT_OQ), -1.0, -1.0, -1.0, -1.0));
-TEST_CONSTEXPR(match_m256d(_mm256_cmp_pd(((__m256d){5.0, 6.0, 7.0, 8.0}), ((__m256d){5.0, 6.0, 3.0, 8.0}), _CMP_GT_OQ), 0.0, 0.0, -1.0, 0.0));
+TEST_CONSTEXPR(match_m256d(_mm256_cmp_pd(((__m256d){5.0, 6.0, 7.0, 8.0}), ((__m256d){1.0, 2.0, 3.0, 4.0}), _CMP_GT_OQ), ALL_ONES_D, ALL_ONES_D, ALL_ONES_D, ALL_ONES_D));
+TEST_CONSTEXPR(match_m256d(_mm256_cmp_pd(((__m256d){5.0, 6.0, 7.0, 8.0}), ((__m256d){5.0, 6.0, 3.0, 8.0}), _CMP_GT_OQ), 0.0, 0.0, ALL_ONES_D, 0.0));
__m256d test_mm256_cmp_pd_true_us(__m256d a, __m256d b) {
// CHECK-LABEL: test_mm256_cmp_pd_true_us
// CHECK: fcmp true <4 x double> %{{.*}}, %{{.*}}
return _mm256_cmp_pd(a, b, _CMP_TRUE_US);
}
-TEST_CONSTEXPR(match_m256d(_mm256_cmp_pd(((__m256d){1.0, 2.0, 3.0, 4.0}), ((__m256d){5.0, 6.0, 7.0, 8.0}), _CMP_TRUE_US), -1.0, -1.0, -1.0, -1.0));
-TEST_CONSTEXPR(match_m256d(_mm256_cmp_pd(((__m256d){1.0, 2.0, 3.0, 4.0}), ((__m256d){1.0, 2.0, 3.0, 4.0}), _CMP_TRUE_US), -1.0, -1.0, -1.0, -1.0));
+TEST_CONSTEXPR(match_m256d(_mm256_cmp_pd(((__m256d){1.0, 2.0, 3.0, 4.0}), ((__m256d){5.0, 6.0, 7.0, 8.0}), _CMP_TRUE_US), ALL_ONES_D, ALL_ONES_D, ALL_ONES_D, ALL_ONES_D));
+TEST_CONSTEXPR(match_m256d(_mm256_cmp_pd(((__m256d){1.0, 2.0, 3.0, 4.0}), ((__m256d){1.0, 2.0, 3.0, 4.0}), _CMP_TRUE_US), ALL_ONES_D, ALL_ONES_D, ALL_ONES_D, ALL_ONES_D));
__m256 test_mm256_cmp_ps_eq_oq(__m256 a, __m256 b) {
// CHECK-LABEL: test_mm256_cmp_ps_eq_oq
// CHECK: fcmp oeq <8 x float> %{{.*}}, %{{.*}}
return _mm256_cmp_ps(a, b, _CMP_EQ_OQ);
}
+TEST_CONSTEXPR(match_m256(_mm256_cmp_ps(((__m256){1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f}), ((__m256){1.0f, 2.5f, 3.0f, 4.5f, 5.0f, 6.5f, 7.0f, 8.5f}), _CMP_EQ_OQ), __builtin_bit_cast(float, 0xFFFFFFFFU), 0.0f, __builtin_bit_cast(float, 0xFFFFFFFFU), 0.0f, __builtin_bit_cast(float, 0xFFFFFFFFU), 0.0f, __builtin_bit_cast(float, 0xFFFFFFFFU), 0.0f));
__m256 test_mm256_cmp_ps_lt_os(__m256 a, __m256 b) {
// CHECK-LABEL: test_mm256_cmp_ps_lt_os
// CHECK: fcmp olt <8 x float> %{{.*}}, %{{.*}}
return _mm256_cmp_ps(a, b, _CMP_LT_OS);
}
+TEST_CONSTEXPR(match_m256(_mm256_cmp_ps(((__m256){1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f}), ((__m256){2.0f, 2.0f, 4.0f, 4.0f, 6.0f, 6.0f, 8.0f, 8.0f}), _CMP_LT_OS), __builtin_bit_cast(float, 0xFFFFFFFFU), 0.0f, __builtin_bit_cast(float, 0xFFFFFFFFU), 0.0f, __builtin_bit_cast(float, 0xFFFFFFFFU), 0.0f, __builtin_bit_cast(float, 0xFFFFFFFFU), 0.0f));
__m256 test_mm256_cmp_ps_le_os(__m256 a, __m256 b) {
// CHECK-LABEL: test_mm256_cmp_ps_le_os
// CHECK: fcmp ole <8 x float> %{{.*}}, %{{.*}}
return _mm256_cmp_ps(a, b, _CMP_LE_OS);
}
+TEST_CONSTEXPR(match_m256(_mm256_cmp_ps(((__m256){1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f}), ((__m256){2.0f, 2.0f, 3.0f, 3.0f, 6.0f, 6.0f, 7.0f, 7.0f}), _CMP_LE_OS), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU), 0.0f, __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU), 0.0f));
__m256 test_mm256_cmp_ps_unord_q(__m256 a, __m256 b) {
// CHECK-LABEL: test_mm256_cmp_ps_unord_q
// CHECK: fcmp uno <8 x float> %{{.*}}, %{{.*}}
return _mm256_cmp_ps(a, b, _CMP_UNORD_Q);
}
+TEST_CONSTEXPR(match_m256(_mm256_cmp_ps(((__m256){1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f}), ((__m256){1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f}), _CMP_UNORD_Q), 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f));
+TEST_CONSTEXPR(match_m256(_mm256_cmp_ps(((__m256){__builtin_nanf(""), 2.0f, 3.0f, __builtin_nanf(""), 5.0f, 6.0f, 7.0f, 8.0f}),
+ ((__m256){1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f}),
+ _CMP_UNORD_Q),
+ ALL_ONES_F, 0.0f, 0.0f, ALL_ONES_F, 0.0f, 0.0f, 0.0f, 0.0f));
__m256 test_mm256_cmp_ps_neq_uq(__m256 a, __m256 b) {
// CHECK-LABEL: test_mm256_cmp_ps_neq_uq
// CHECK: fcmp une <8 x float> %{{.*}}, %{{.*}}
return _mm256_cmp_ps(a, b, _CMP_NEQ_UQ);
}
+TEST_CONSTEXPR(match_m256(_mm256_cmp_ps(((__m256){1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f}), ((__m256){1.0f, 2.5f, 3.0f, 4.5f, 5.0f, 6.5f, 7.0f, 8.5f}), _CMP_NEQ_UQ), 0.0f, __builtin_bit_cast(float, 0xFFFFFFFFU), 0.0f, __builtin_bit_cast(float, 0xFFFFFFFFU), 0.0f, __builtin_bit_cast(float, 0xFFFFFFFFU), 0.0f, __builtin_bit_cast(float, 0xFFFFFFFFU)));
__m256 test_mm256_cmp_ps_nlt_us(__m256 a, __m256 b) {
// CHECK-LABEL: test_mm256_cmp_ps_nlt_us
// CHECK: fcmp uge <8 x float> %{{.*}}, %{{.*}}
return _mm256_cmp_ps(a, b, _CMP_NLT_US);
}
+TEST_CONSTEXPR(match_m256(_mm256_cmp_ps(((__m256){2.0f, 2.0f, 4.0f, 4.0f, 6.0f, 6.0f, 8.0f, 8.0f}), ((__m256){1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f}), _CMP_NLT_US), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU)));
__m256 test_mm256_cmp_ps_nle_us(__m256 a, __m256 b) {
// CHECK-LABEL: test_mm256_cmp_ps_nle_us
// CHECK: fcmp ugt <8 x float> %{{.*}}, %{{.*}}
return _mm256_cmp_ps(a, b, _CMP_NLE_US);
}
+TEST_CONSTEXPR(match_m256(_mm256_cmp_ps(((__m256){2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f}), ((__m256){1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f}), _CMP_NLE_US), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU)));
__m256 test_mm256_cmp_ps_ord_q(__m256 a, __m256 b) {
// CHECK-LABEL: test_mm256_cmp_ps_ord_q
// CHECK: fcmp ord <8 x float> %{{.*}}, %{{.*}}
return _mm256_cmp_ps(a, b, _CMP_ORD_Q);
}
+TEST_CONSTEXPR(match_m256(_mm256_cmp_ps(((__m256){1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f}), ((__m256){1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f}), _CMP_ORD_Q), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU)));
+TEST_CONSTEXPR(match_m256(_mm256_cmp_ps(((__m256){__builtin_nanf(""), 2.0f, 3.0f, __builtin_nanf(""), 5.0f, 6.0f, 7.0f, 8.0f}),
+ ((__m256){1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f}),
+ _CMP_ORD_Q),
+ 0.0f, ALL_ONES_F, ALL_ONES_F, 0.0f, ALL_ONES_F, ALL_ONES_F, ALL_ONES_F, ALL_ONES_F));
__m256 test_mm256_cmp_ps_eq_uq(__m256 a, __m256 b) {
// CHECK-LABEL: test_mm256_cmp_ps_eq_uq
// CHECK: fcmp ueq <8 x float> %{{.*}}, %{{.*}}
return _mm256_cmp_ps(a, b, _CMP_EQ_UQ);
}
+TEST_CONSTEXPR(match_m256(_mm256_cmp_ps(((__m256){1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f}), ((__m256){1.0f, 2.5f, 3.0f, 4.5f, 5.0f, 6.5f, 7.0f, 8.5f}), _CMP_EQ_UQ), __builtin_bit_cast(float, 0xFFFFFFFFU), 0.0f, __builtin_bit_cast(float, 0xFFFFFFFFU), 0.0f, __builtin_bit_cast(float, 0xFFFFFFFFU), 0.0f, __builtin_bit_cast(float, 0xFFFFFFFFU), 0.0f));
__m256 test_mm256_cmp_ps_nge_us(__m256 a, __m256 b) {
// CHECK-LABEL: test_mm256_cmp_ps_nge_us
// CHECK: fcmp ult <8 x float> %{{.*}}, %{{.*}}
return _mm256_cmp_ps(a, b, _CMP_NGE_US);
}
+TEST_CONSTEXPR(match_m256(_mm256_cmp_ps(((__m256){1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f}), ((__m256){2.0f, 2.0f, 4.0f, 4.0f, 6.0f, 6.0f, 8.0f, 8.0f}), _CMP_NGE_US), __builtin_bit_cast(float, 0xFFFFFFFFU), 0.0f, __builtin_bit_cast(float, 0xFFFFFFFFU), 0.0f, __builtin_bit_cast(float, 0xFFFFFFFFU), 0.0f, __builtin_bit_cast(float, 0xFFFFFFFFU), 0.0f));
__m256 test_mm256_cmp_ps_ngt_us(__m256 a, __m256 b) {
// CHECK-LABEL: test_mm256_cmp_ps_ngt_us
// CHECK: fcmp ule <8 x float> %{{.*}}, %{{.*}}
return _mm256_cmp_ps(a, b, _CMP_NGT_US);
}
+TEST_CONSTEXPR(match_m256(_mm256_cmp_ps(((__m256){1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f}), ((__m256){2.0f, 2.0f, 3.0f, 3.0f, 6.0f, 6.0f, 7.0f, 7.0f}), _CMP_NGT_US), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU), 0.0f, __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU), 0.0f));
__m256 test_mm256_cmp_ps_false_oq(__m256 a, __m256 b) {
// CHECK-LABEL: test_mm256_cmp_ps_false_oq
// CHECK: fcmp false <8 x float> %{{.*}}, %{{.*}}
return _mm256_cmp_ps(a, b, _CMP_FALSE_OQ);
}
+TEST_CONSTEXPR(match_m256(_mm256_cmp_ps(((__m256){1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f}), ((__m256){9.0f, 8.0f, 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f}), _CMP_FALSE_OQ), 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f));
__m256 test_mm256_cmp_ps_neq_oq(__m256 a, __m256 b) {
// CHECK-LABEL: test_mm256_cmp_ps_neq_oq
// CHECK: fcmp one <8 x float> %{{.*}}, %{{.*}}
return _mm256_cmp_ps(a, b, _CMP_NEQ_OQ);
}
+TEST_CONSTEXPR(match_m256(_mm256_cmp_ps(((__m256){1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f}), ((__m256){1.0f, 2.5f, 3.0f, 4.5f, 5.0f, 6.5f, 7.0f, 8.5f}), _CMP_NEQ_OQ), 0.0f, __builtin_bit_cast(float, 0xFFFFFFFFU), 0.0f, __builtin_bit_cast(float, 0xFFFFFFFFU), 0.0f, __builtin_bit_cast(float, 0xFFFFFFFFU), 0.0f, __builtin_bit_cast(float, 0xFFFFFFFFU)));
__m256 test_mm256_cmp_ps_ge_os(__m256 a, __m256 b) {
// CHECK-LABEL: test_mm256_cmp_ps_ge_os
// CHECK: fcmp oge <8 x float> %{{.*}}, %{{.*}}
return _mm256_cmp_ps(a, b, _CMP_GE_OS);
}
+TEST_CONSTEXPR(match_m256(_mm256_cmp_ps(((__m256){2.0f, 2.0f, 4.0f, 4.0f, 6.0f, 6.0f, 8.0f, 8.0f}), ((__m256){1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f}), _CMP_GE_OS), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU)));
__m256 test_mm256_cmp_ps_gt_os(__m256 a, __m256 b) {
// CHECK-LABEL: test_mm256_cmp_ps_gt_os
// CHECK: fcmp ogt <8 x float> %{{.*}}, %{{.*}}
return _mm256_cmp_ps(a, b, _CMP_GT_OS);
}
+TEST_CONSTEXPR(match_m256(_mm256_cmp_ps(((__m256){2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f}), ((__m256){1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f}), _CMP_GT_OS), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU)));
__m256 test_mm256_cmp_ps_true_uq(__m256 a, __m256 b) {
// CHECK-LABEL: test_mm256_cmp_ps_true_uq
// CHECK: fcmp true <8 x float> %{{.*}}, %{{.*}}
return _mm256_cmp_ps(a, b, _CMP_TRUE_UQ);
}
+TEST_CONSTEXPR(match_m256(_mm256_cmp_ps(((__m256){1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f}), ((__m256){9.0f, 8.0f, 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f}), _CMP_TRUE_UQ), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU)));
__m256 test_mm256_cmp_ps_eq_os(__m256 a, __m256 b) {
// CHECK-LABEL: test_mm256_cmp_ps_eq_os
// CHECK: fcmp oeq <8 x float> %{{.*}}, %{{.*}}
return _mm256_cmp_ps(a, b, _CMP_EQ_OS);
}
+TEST_CONSTEXPR(match_m256(_mm256_cmp_ps(((__m256){1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f}), ((__m256){1.0f, 2.5f, 3.0f, 4.5f, 5.0f, 6.5f, 7.0f, 8.5f}), _CMP_EQ_OS), __builtin_bit_cast(float, 0xFFFFFFFFU), 0.0f, __builtin_bit_cast(float, 0xFFFFFFFFU), 0.0f, __builtin_bit_cast(float, 0xFFFFFFFFU), 0.0f, __builtin_bit_cast(float, 0xFFFFFFFFU), 0.0f));
__m256 test_mm256_cmp_ps_lt_oq(__m256 a, __m256 b) {
// CHECK-LABEL: test_mm256_cmp_ps_lt_oq
// CHECK: fcmp olt <8 x float> %{{.*}}, %{{.*}}
return _mm256_cmp_ps(a, b, _CMP_LT_OQ);
}
+TEST_CONSTEXPR(match_m256(_mm256_cmp_ps(((__m256){1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f}), ((__m256){2.0f, 2.0f, 4.0f, 4.0f, 6.0f, 6.0f, 8.0f, 8.0f}), _CMP_LT_OQ), __builtin_bit_cast(float, 0xFFFFFFFFU), 0.0f, __builtin_bit_cast(float, 0xFFFFFFFFU), 0.0f, __builtin_bit_cast(float, 0xFFFFFFFFU), 0.0f, __builtin_bit_cast(float, 0xFFFFFFFFU), 0.0f));
__m256 test_mm256_cmp_ps_le_oq(__m256 a, __m256 b) {
// CHECK-LABEL: test_mm256_cmp_ps_le_oq
// CHECK: fcmp ole <8 x float> %{{.*}}, %{{.*}}
return _mm256_cmp_ps(a, b, _CMP_LE_OQ);
}
+TEST_CONSTEXPR(match_m256(_mm256_cmp_ps(((__m256){1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f}), ((__m256){2.0f, 2.0f, 3.0f, 3.0f, 6.0f, 6.0f, 7.0f, 7.0f}), _CMP_LE_OQ), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU), 0.0f, __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU), 0.0f));
__m256 test_mm256_cmp_ps_unord_s(__m256 a, __m256 b) {
// CHECK-LABEL: test_mm256_cmp_ps_unord_s
// CHECK: fcmp uno <8 x float> %{{.*}}, %{{.*}}
return _mm256_cmp_ps(a, b, _CMP_UNORD_S);
}
+TEST_CONSTEXPR(match_m256(_mm256_cmp_ps(((__m256){1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f}), ((__m256){1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f}), _CMP_UNORD_S), 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f));
+TEST_CONSTEXPR(match_m256(_mm256_cmp_ps(((__m256){__builtin_nanf(""), 2.0f, 3.0f, __builtin_nanf(""), 5.0f, 6.0f, 7.0f, 8.0f}),
+ ((__m256){1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f}),
+ _CMP_UNORD_S),
+ ALL_ONES_F, 0.0f, 0.0f, ALL_ONES_F, 0.0f, 0.0f, 0.0f, 0.0f));
__m256 test_mm256_cmp_ps_neq_us(__m256 a, __m256 b) {
// CHECK-LABEL: test_mm256_cmp_ps_neq_us
// CHECK: fcmp une <8 x float> %{{.*}}, %{{.*}}
return _mm256_cmp_ps(a, b, _CMP_NEQ_US);
}
+TEST_CONSTEXPR(match_m256(_mm256_cmp_ps(((__m256){1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f}), ((__m256){1.0f, 2.5f, 3.0f, 4.5f, 5.0f, 6.5f, 7.0f, 8.5f}), _CMP_NEQ_US), 0.0f, __builtin_bit_cast(float, 0xFFFFFFFFU), 0.0f, __builtin_bit_cast(float, 0xFFFFFFFFU), 0.0f, __builtin_bit_cast(float, 0xFFFFFFFFU), 0.0f, __builtin_bit_cast(float, 0xFFFFFFFFU)));
__m256 test_mm256_cmp_ps_nlt_uq(__m256 a, __m256 b) {
// CHECK-LABEL: test_mm256_cmp_ps_nlt_uq
// CHECK: fcmp uge <8 x float> %{{.*}}, %{{.*}}
return _mm256_cmp_ps(a, b, _CMP_NLT_UQ);
}
+TEST_CONSTEXPR(match_m256(_mm256_cmp_ps(((__m256){2.0f, 2.0f, 4.0f, 4.0f, 6.0f, 6.0f, 8.0f, 8.0f}), ((__m256){1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f}), _CMP_NLT_UQ), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU)));
__m256 test_mm256_cmp_ps_nle_uq(__m256 a, __m256 b) {
// CHECK-LABEL: test_mm256_cmp_ps_nle_uq
// CHECK: fcmp ugt <8 x float> %{{.*}}, %{{.*}}
return _mm256_cmp_ps(a, b, _CMP_NLE_UQ);
}
+TEST_CONSTEXPR(match_m256(_mm256_cmp_ps(((__m256){2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f}), ((__m256){1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f}), _CMP_NLE_UQ), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU)));
__m256 test_mm256_cmp_ps_ord_s(__m256 a, __m256 b) {
// CHECK-LABEL: test_mm256_cmp_ps_ord_s
// CHECK: fcmp ord <8 x float> %{{.*}}, %{{.*}}
return _mm256_cmp_ps(a, b, _CMP_ORD_S);
}
+TEST_CONSTEXPR(match_m256(_mm256_cmp_ps(((__m256){1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f}), ((__m256){1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f}), _CMP_ORD_S), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU)));
+TEST_CONSTEXPR(match_m256(_mm256_cmp_ps(((__m256){__builtin_nanf(""), 2.0f, 3.0f, __builtin_nanf(""), 5.0f, 6.0f, 7.0f, 8.0f}),
+ ((__m256){1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f}),
+ _CMP_ORD_S),
+ 0.0f, ALL_ONES_F, ALL_ONES_F, 0.0f, ALL_ONES_F, ALL_ONES_F, ALL_ONES_F, ALL_ONES_F));
__m256 test_mm256_cmp_ps_eq_us(__m256 a, __m256 b) {
// CHECK-LABEL: test_mm256_cmp_ps_eq_us
// CHECK: fcmp ueq <8 x float> %{{.*}}, %{{.*}}
return _mm256_cmp_ps(a, b, _CMP_EQ_US);
}
+TEST_CONSTEXPR(match_m256(_mm256_cmp_ps(((__m256){1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f}), ((__m256){1.0f, 2.5f, 3.0f, 4.5f, 5.0f, 6.5f, 7.0f, 8.5f}), _CMP_EQ_US), __builtin_bit_cast(float, 0xFFFFFFFFU), 0.0f, __builtin_bit_cast(float, 0xFFFFFFFFU), 0.0f, __builtin_bit_cast(float, 0xFFFFFFFFU), 0.0f, __builtin_bit_cast(float, 0xFFFFFFFFU), 0.0f));
__m256 test_mm256_cmp_ps_nge_uq(__m256 a, __m256 b) {
// CHECK-LABEL: test_mm256_cmp_ps_nge_uq
// CHECK: fcmp ult <8 x float> %{{.*}}, %{{.*}}
return _mm256_cmp_ps(a, b, _CMP_NGE_UQ);
}
+TEST_CONSTEXPR(match_m256(_mm256_cmp_ps(((__m256){1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f}), ((__m256){2.0f, 2.0f, 4.0f, 4.0f, 6.0f, 6.0f, 8.0f, 8.0f}), _CMP_NGE_UQ), __builtin_bit_cast(float, 0xFFFFFFFFU), 0.0f, __builtin_bit_cast(float, 0xFFFFFFFFU), 0.0f, __builtin_bit_cast(float, 0xFFFFFFFFU), 0.0f, __builtin_bit_cast(float, 0xFFFFFFFFU), 0.0f));
__m256 test_mm256_cmp_ps_ngt_uq(__m256 a, __m256 b) {
// CHECK-LABEL: test_mm256_cmp_ps_ngt_uq
// CHECK: fcmp ule <8 x float> %{{.*}}, %{{.*}}
return _mm256_cmp_ps(a, b, _CMP_NGT_UQ);
}
+TEST_CONSTEXPR(match_m256(_mm256_cmp_ps(((__m256){1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f}), ((__m256){2.0f, 2.0f, 3.0f, 3.0f, 6.0f, 6.0f, 7.0f, 7.0f}), _CMP_NGT_UQ), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU), 0.0f, __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU), 0.0f));
__m256 test_mm256_cmp_ps_false_os(__m256 a, __m256 b) {
// CHECK-LABEL: test_mm256_cmp_ps_false_os
// CHECK: fcmp false <8 x float> %{{.*}}, %{{.*}}
return _mm256_cmp_ps(a, b, _CMP_FALSE_OS);
}
+TEST_CONSTEXPR(match_m256(_mm256_cmp_ps(((__m256){1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f}), ((__m256){9.0f, 8.0f, 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f}), _CMP_FALSE_OS), 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f));
__m256 test_mm256_cmp_ps_neq_os(__m256 a, __m256 b) {
// CHECK-LABEL: test_mm256_cmp_ps_neq_os
// CHECK: fcmp one <8 x float> %{{.*}}, %{{.*}}
return _mm256_cmp_ps(a, b, _CMP_NEQ_OS);
}
+TEST_CONSTEXPR(match_m256(_mm256_cmp_ps(((__m256){1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f}), ((__m256){1.0f, 2.5f, 3.0f, 4.5f, 5.0f, 6.5f, 7.0f, 8.5f}), _CMP_NEQ_OS), 0.0f, __builtin_bit_cast(float, 0xFFFFFFFFU), 0.0f, __builtin_bit_cast(float, 0xFFFFFFFFU), 0.0f, __builtin_bit_cast(float, 0xFFFFFFFFU), 0.0f, __builtin_bit_cast(float, 0xFFFFFFFFU)));
__m256 test_mm256_cmp_ps_ge_oq(__m256 a, __m256 b) {
// CHECK-LABEL: test_mm256_cmp_ps_ge_oq
// CHECK: fcmp oge <8 x float> %{{.*}}, %{{.*}}
return _mm256_cmp_ps(a, b, _CMP_GE_OQ);
}
+TEST_CONSTEXPR(match_m256(_mm256_cmp_ps(((__m256){2.0f, 2.0f, 4.0f, 4.0f, 6.0f, 6.0f, 8.0f, 8.0f}), ((__m256){1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f}), _CMP_GE_OQ), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU)));
__m256 test_mm256_cmp_ps_gt_oq(__m256 a, __m256 b) {
// CHECK-LABEL: test_mm256_cmp_ps_gt_oq
// CHECK: fcmp ogt <8 x float> %{{.*}}, %{{.*}}
return _mm256_cmp_ps(a, b, _CMP_GT_OQ);
}
+TEST_CONSTEXPR(match_m256(_mm256_cmp_ps(((__m256){2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f}), ((__m256){1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f}), _CMP_GT_OQ), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU)));
__m256 test_mm256_cmp_ps_true_us(__m256 a, __m256 b) {
// CHECK-LABEL: test_mm256_cmp_ps_true_us
// CHECK: fcmp true <8 x float> %{{.*}}, %{{.*}}
return _mm256_cmp_ps(a, b, _CMP_TRUE_US);
}
+TEST_CONSTEXPR(match_m256(_mm256_cmp_ps(((__m256){1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f}), ((__m256){9.0f, 8.0f, 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f}), _CMP_TRUE_US), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU)));
__m128d test_mm_cmp_pd_eq_uq(__m128d a, __m128d b) {
// CHECK-LABEL: test_mm_cmp_pd_eq_uq
@@ -851,156 +918,250 @@ __m128 test_mm_cmp_ps_eq_uq(__m128 a, __m128 b) {
// CHECK: fcmp ueq <4 x float> %{{.*}}, %{{.*}}
return _mm_cmp_ps(a, b, _CMP_EQ_UQ);
}
+TEST_CONSTEXPR(match_m128(_mm_cmp_ps(((__m128){1.0f, 2.0f, 3.0f, 4.0f}), ((__m128){1.0f, 2.0f, 3.0f, 4.0f}), _CMP_EQ_UQ), ALL_ONES_F, ALL_ONES_F, ALL_ONES_F, ALL_ONES_F));
+TEST_CONSTEXPR(match_m128(_mm_cmp_ps(((__m128){1.0f, 2.0f, 3.0f, 4.0f}), ((__m128){1.0f, 2.0f, 3.0f, 5.0f}), _CMP_EQ_UQ), ALL_ONES_F, ALL_ONES_F, ALL_ONES_F, 0.0f));
__m128 test_mm_cmp_ps_nge_us(__m128 a, __m128 b) {
// CHECK-LABEL: test_mm_cmp_ps_nge_us
// CHECK: fcmp ult <4 x float> %{{.*}}, %{{.*}}
return _mm_cmp_ps(a, b, _CMP_NGE_US);
}
+TEST_CONSTEXPR(match_m128(_mm_cmp_ps(((__m128){1.0f, 2.0f, 3.0f, 4.0f}), ((__m128){5.0f, 6.0f, 7.0f, 8.0f}), _CMP_NGE_US), ALL_ONES_F, ALL_ONES_F, ALL_ONES_F, ALL_ONES_F));
+TEST_CONSTEXPR(match_m128(_mm_cmp_ps(((__m128){1.0f, 2.0f, 3.0f, 4.0f}), ((__m128){5.0f, 6.0f, 3.0f, 2.0f}), _CMP_NGE_US), ALL_ONES_F, ALL_ONES_F, 0.0f, 0.0f));
__m128 test_mm_cmp_ps_ngt_us(__m128 a, __m128 b) {
// CHECK-LABEL: test_mm_cmp_ps_ngt_us
// CHECK: fcmp ule <4 x float> %{{.*}}, %{{.*}}
return _mm_cmp_ps(a, b, _CMP_NGT_US);
}
+TEST_CONSTEXPR(match_m128(_mm_cmp_ps(((__m128){1.0f, 2.0f, 3.0f, 4.0f}), ((__m128){5.0f, 6.0f, 3.0f, 2.0f}), _CMP_NGT_US), ALL_ONES_F, ALL_ONES_F, ALL_ONES_F, 0.0f));
+TEST_CONSTEXPR(match_m128(_mm_cmp_ps(((__m128){1.0f, 2.0f, 3.0f, 4.0f}), ((__m128){5.0f, 6.0f, 3.0f, 2.0f}), _CMP_NGT_US), ALL_ONES_F, ALL_ONES_F, ALL_ONES_F, 0.0f));
__m128 test_mm_cmp_ps_false_oq(__m128 a, __m128 b) {
// CHECK-LABEL: test_mm_cmp_ps_false_oq
// CHECK: fcmp false <4 x float> %{{.*}}, %{{.*}}
return _mm_cmp_ps(a, b, _CMP_FALSE_OQ);
}
+TEST_CONSTEXPR(match_m128(_mm_cmp_ps(((__m128){1.0f, 2.0f, 3.0f, 4.0f}), ((__m128){5.0f, 6.0f, 7.0f, 8.0f}), _CMP_FALSE_OQ), 0.0f, 0.0f, 0.0f, 0.0f));
+TEST_CONSTEXPR(match_m128(_mm_cmp_ps(((__m128){1.0f, 2.0f, 3.0f, 4.0f}), ((__m128){5.0f, 6.0f, 3.0f, 2.0f}), _CMP_FALSE_OQ), 0.0f, 0.0f, 0.0f, 0.0f));
__m128 test_mm_cmp_ps_neq_oq(__m128 a, __m128 b) {
// CHECK-LABEL: test_mm_cmp_ps_neq_oq
// CHECK: fcmp one <4 x float> %{{.*}}, %{{.*}}
return _mm_cmp_ps(a, b, _CMP_NEQ_OQ);
}
+TEST_CONSTEXPR(match_m128(_mm_cmp_ps(((__m128){1.0f, 2.0f, 3.0f, 4.0f}), ((__m128){5.0f, 6.0f, 7.0f, 8.0f}), _CMP_NEQ_OQ), ALL_ONES_F, ALL_ONES_F, ALL_ONES_F, ALL_ONES_F));
+TEST_CONSTEXPR(match_m128(_mm_cmp_ps(((__m128){1.0f, 2.0f, 3.0f, 4.0f}), ((__m128){1.0f, 2.0f, 3.0f, 5.0f}), _CMP_NEQ_OQ), 0.0f, 0.0f, 0.0f, ALL_ONES_F));
__m128 test_mm_cmp_ps_ge_os(__m128 a, __m128 b) {
// CHECK-LABEL: test_mm_cmp_ps_ge_os
// CHECK: fcmp oge <4 x float> %{{.*}}, %{{.*}}
return _mm_cmp_ps(a, b, _CMP_GE_OS);
}
+TEST_CONSTEXPR(match_m128(_mm_cmp_ps(((__m128){5.0f, 6.0f, 7.0f, 8.0f}), ((__m128){1.0f, 2.0f, 3.0f, 4.0f}), _CMP_GE_OS), ALL_ONES_F, ALL_ONES_F, ALL_ONES_F, ALL_ONES_F));
+TEST_CONSTEXPR(match_m128(_mm_cmp_ps(((__m128){5.0f, 6.0f, 7.0f, 8.0f}), ((__m128){1.0f, 6.0f, 3.0f, 8.0f}), _CMP_GE_OS), ALL_ONES_F, ALL_ONES_F, ALL_ONES_F, ALL_ONES_F));
__m128 test_mm_cmp_ps_gt_os(__m128 a, __m128 b) {
// CHECK-LABEL: test_mm_cmp_ps_gt_os
// CHECK: fcmp ogt <4 x float> %{{.*}}, %{{.*}}
return _mm_cmp_ps(a, b, _CMP_GT_OS);
}
+TEST_CONSTEXPR(match_m128(_mm_cmp_ps(((__m128){5.0f, 6.0f, 7.0f, 8.0f}), ((__m128){1.0f, 2.0f, 3.0f, 4.0f}), _CMP_GT_OS), ALL_ONES_F, ALL_ONES_F, ALL_ONES_F, ALL_ONES_F));
+TEST_CONSTEXPR(match_m128(_mm_cmp_ps(((__m128){5.0f, 6.0f, 7.0f, 8.0f}), ((__m128){1.0f, 6.0f, 3.0f, 8.0f}), _CMP_GT_OS), ALL_ONES_F, 0.0f, ALL_ONES_F, 0.0f));
__m128 test_mm_cmp_ps_true_uq(__m128 a, __m128 b) {
// CHECK-LABEL: test_mm_cmp_ps_true_uq
// CHECK: fcmp true <4 x float> %{{.*}}, %{{.*}}
return _mm_cmp_ps(a, b, _CMP_TRUE_UQ);
}
+TEST_CONSTEXPR(match_m128(_mm_cmp_ps(((__m128){1.0f, 2.0f, 3.0f, 4.0f}), ((__m128){5.0f, 6.0f, 7.0f, 8.0f}), _CMP_TRUE_UQ), ALL_ONES_F, ALL_ONES_F, ALL_ONES_F, ALL_ONES_F));
+TEST_CONSTEXPR(match_m128(_mm_cmp_ps(((__m128){1.0f, 2.0f, 3.0f, 4.0f}), ((__m128){1.0f, 2.0f, 3.0f, 4.0f}), _CMP_TRUE_UQ), ALL_ONES_F, ALL_ONES_F, ALL_ONES_F, ALL_ONES_F));
__m128 test_mm_cmp_ps_eq_os(__m128 a, __m128 b) {
// CHECK-LABEL: test_mm_cmp_ps_eq_os
// CHECK: fcmp oeq <4 x float> %{{.*}}, %{{.*}}
return _mm_cmp_ps(a, b, _CMP_EQ_OS);
}
+TEST_CONSTEXPR(match_m128(_mm_cmp_ps(((__m128){1.0f, 2.0f, 3.0f, 4.0f}), ((__m128){1.0f, 2.0f, 3.0f, 4.0f}), _CMP_EQ_OS), ALL_ONES_F, ALL_ONES_F, ALL_ONES_F, ALL_ONES_F));
+TEST_CONSTEXPR(match_m128(_mm_cmp_ps(((__m128){1.0f, 2.0f, 3.0f, 4.0f}), ((__m128){1.0f, 2.0f, 3.0f, 5.0f}), _CMP_EQ_OS), ALL_ONES_F, ALL_ONES_F, ALL_ONES_F, 0.0f));
__m128 test_mm_cmp_ps_lt_oq(__m128 a, __m128 b) {
// CHECK-LABEL: test_mm_cmp_ps_lt_oq
// CHECK: fcmp olt <4 x float> %{{.*}}, %{{.*}}
return _mm_cmp_ps(a, b, _CMP_LT_OQ);
}
+TEST_CONSTEXPR(match_m128(_mm_cmp_ps(((__m128){1.0f, 2.0f, 3.0f, 4.0f}), ((__m128){5.0f, 6.0f, 7.0f, 8.0f}), _CMP_LT_OQ), ALL_ONES_F, ALL_ONES_F, ALL_ONES_F, ALL_ONES_F));
+TEST_CONSTEXPR(match_m128(_mm_cmp_ps(((__m128){1.0f, 2.0f, 3.0f, 4.0f}), ((__m128){5.0f, 6.0f, 3.0f, 2.0f}), _CMP_LT_OQ), ALL_ONES_F, ALL_ONES_F, 0.0f, 0.0f));
__m128 test_mm_cmp_ps_le_oq(__m128 a, __m128 b) {
// CHECK-LABEL: test_mm_cmp_ps_le_oq
// CHECK: fcmp ole <4 x float> %{{.*}}, %{{.*}}
return _mm_cmp_ps(a, b, _CMP_LE_OQ);
}
+TEST_CONSTEXPR(match_m128(_mm_cmp_ps(((__m128){1.0f, 2.0f, 3.0f, 4.0f}), ((__m128){1.0f, 2.0f, 3.0f, 4.0f}), _CMP_LE_OQ), ALL_ONES_F, ALL_ONES_F, ALL_ONES_F, ALL_ONES_F));
+TEST_CONSTEXPR(match_m128(_mm_cmp_ps(((__m128){1.0f, 2.0f, 3.0f, 4.0f}), ((__m128){1.0f, 2.0f, 3.0f, 5.0f}), _CMP_LE_OQ), ALL_ONES_F, ALL_ONES_F, ALL_ONES_F, ALL_ONES_F));
__m128 test_mm_cmp_ps_unord_s(__m128 a, __m128 b) {
// CHECK-LABEL: test_mm_cmp_ps_unord_s
// CHECK: fcmp uno <4 x float> %{{.*}}, %{{.*}}
return _mm_cmp_ps(a, b, _CMP_UNORD_S);
}
+TEST_CONSTEXPR(match_m128(_mm_cmp_ps(((__m128){1.0f, 2.0f, 3.0f, 4.0f}), ((__m128){5.0f, 6.0f, 7.0f, 8.0f}), _CMP_UNORD_S), 0.0f, 0.0f, 0.0f, 0.0f));
+TEST_CONSTEXPR(match_m128(_mm_cmp_ps(((__m128){1.0f, 2.0f, 3.0f, 4.0f}), ((__m128){1.0f, 2.0f, 3.0f, 4.0f}), _CMP_UNORD_S), 0.0f, 0.0f, 0.0f, 0.0f));
__m128 test_mm_cmp_ps_neq_us(__m128 a, __m128 b) {
// CHECK-LABEL: test_mm_cmp_ps_neq_us
// CHECK: fcmp une <4 x float> %{{.*}}, %{{.*}}
return _mm_cmp_ps(a, b, _CMP_NEQ_US);
}
+TEST_CONSTEXPR(match_m128(_mm_cmp_ps(((__m128){1.0f, 2.0f, 3.0f, 4.0f}), ((__m128){5.0f, 6.0f, 7.0f, 8.0f}), _CMP_NEQ_US), ALL_ONES_F, ALL_ONES_F, ALL_ONES_F, ALL_ONES_F));
+TEST_CONSTEXPR(match_m128(_mm_cmp_ps(((__m128){1.0f, 2.0f, 3.0f, 4.0f}), ((__m128){1.0f, 6.0f, 3.0f, 8.0f}), _CMP_NEQ_US), 0.0f, ALL_ONES_F, 0.0f, ALL_ONES_F));
__m128 test_mm_cmp_ps_nlt_uq(__m128 a, __m128 b) {
// CHECK-LABEL: test_mm_cmp_ps_nlt_uq
// CHECK: fcmp uge <4 x float> %{{.*}}, %{{.*}}
return _mm_cmp_ps(a, b, _CMP_NLT_UQ);
}
+TEST_CONSTEXPR(match_m128(_mm_cmp_ps(((__m128){5.0f, 6.0f, 7.0f, 8.0f}), ((__m128){1.0f, 2.0f, 3.0f, 4.0f}), _CMP_NLT_UQ), ALL_ONES_F, ALL_ONES_F, ALL_ONES_F, ALL_ONES_F));
+TEST_CONSTEXPR(match_m128(_mm_cmp_ps(((__m128){5.0f, 6.0f, 7.0f, 8.0f}), ((__m128){1.0f, 6.0f, 3.0f, 8.0f}), _CMP_NLT_UQ), ALL_ONES_F, ALL_ONES_F, ALL_ONES_F, ALL_ONES_F));
__m128 test_mm_cmp_ps_nle_uq(__m128 a, __m128 b) {
// CHECK-LABEL: test_mm_cmp_ps_nle_uq
// CHECK: fcmp ugt <4 x float> %{{.*}}, %{{.*}}
return _mm_cmp_ps(a, b, _CMP_NLE_UQ);
}
+TEST_CONSTEXPR(match_m128(_mm_cmp_ps(((__m128){5.0f, 6.0f, 7.0f, 8.0f}), ((__m128){1.0f, 2.0f, 3.0f, 4.0f}), _CMP_NLE_UQ), ALL_ONES_F, ALL_ONES_F, ALL_ONES_F, ALL_ONES_F));
+TEST_CONSTEXPR(match_m128(_mm_cmp_ps(((__m128){5.0f, 6.0f, 7.0f, 8.0f}), ((__m128){1.0f, 6.0f, 3.0f, 8.0f}), _CMP_NLE_UQ), ALL_ONES_F, 0.0f, ALL_ONES_F, 0.0f));
__m128 test_mm_cmp_ps_ord_s(__m128 a, __m128 b) {
// CHECK-LABEL: test_mm_cmp_ps_ord_s
// CHECK: fcmp ord <4 x float> %{{.*}}, %{{.*}}
return _mm_cmp_ps(a, b, _CMP_ORD_S);
}
+TEST_CONSTEXPR(match_m128(_mm_cmp_ps(((__m128){1.0f, 2.0f, 3.0f, 4.0f}), ((__m128){5.0f, 6.0f, 7.0f, 8.0f}), _CMP_ORD_S), ALL_ONES_F, ALL_ONES_F, ALL_ONES_F, ALL_ONES_F));
__m128 test_mm_cmp_ps_eq_us(__m128 a, __m128 b) {
// CHECK-LABEL: test_mm_cmp_ps_eq_us
// CHECK: fcmp ueq <4 x float> %{{.*}}, %{{.*}}
return _mm_cmp_ps(a, b, _CMP_EQ_US);
}
+TEST_CONSTEXPR(match_m128(_mm_cmp_ps(((__m128){1.0f, 2.0f, 3.0f, 4.0f}), ((__m128){1.0f, 2.0f, 3.0f, 4.0f}), _CMP_EQ_US), ALL_ONES_F, ALL_ONES_F, ALL_ONES_F, ALL_ONES_F));
+TEST_CONSTEXPR(match_m128(_mm_cmp_ps(((__m128){1.0f, 2.0f, 3.0f, 4.0f}), ((__m128){1.0f, 6.0f, 3.0f, 8.0f}), _CMP_EQ_US), ALL_ONES_F, 0.0f, ALL_ONES_F, 0.0f));
__m128 test_mm_cmp_ps_nge_uq(__m128 a, __m128 b) {
// CHECK-LABEL: test_mm_cmp_ps_nge_uq
// CHECK: fcmp ult <4 x float> %{{.*}}, %{{.*}}
return _mm_cmp_ps(a, b, _CMP_NGE_UQ);
}
+TEST_CONSTEXPR(match_m128(_mm_cmp_ps(((__m128){1.0f, 2.0f, 3.0f, 4.0f}), ((__m128){5.0f, 6.0f, 7.0f, 8.0f}), _CMP_NGE_UQ), ALL_ONES_F, ALL_ONES_F, ALL_ONES_F, ALL_ONES_F));
+TEST_CONSTEXPR(match_m128(_mm_cmp_ps(((__m128){1.0f, 2.0f, 3.0f, 4.0f}), ((__m128){5.0f, 2.0f, 7.0f, 4.0f}), _CMP_NGE_UQ), ALL_ONES_F, 0.0f, ALL_ONES_F, 0.0f));
__m128 test_mm_cmp_ps_ngt_uq(__m128 a, __m128 b) {
// CHECK-LABEL: test_mm_cmp_ps_ngt_uq
// CHECK: fcmp ule <4 x float> %{{.*}}, %{{.*}}
return _mm_cmp_ps(a, b, _CMP_NGT_UQ);
}
+TEST_CONSTEXPR(match_m128(_mm_cmp_ps(((__m128){1.0f, 2.0f, 3.0f, 4.0f}), ((__m128){5.0f, 6.0f, 7.0f, 8.0f}), _CMP_NGT_UQ), ALL_ONES_F, ALL_ONES_F, ALL_ONES_F, ALL_ONES_F));
+TEST_CONSTEXPR(match_m128(_mm_cmp_ps(((__m128){1.0f, 2.0f, 3.0f, 4.0f}), ((__m128){5.0f, 2.0f, 7.0f, 4.0f}), _CMP_NGT_UQ), ALL_ONES_F, ALL_ONES_F, ALL_ONES_F, ALL_ONES_F));
__m128 test_mm_cmp_ps_false_os(__m128 a, __m128 b) {
// CHECK-LABEL: test_mm_cmp_ps_false_os
// CHECK: fcmp false <4 x float> %{{.*}}, %{{.*}}
return _mm_cmp_ps(a, b, _CMP_FALSE_OS);
}
+TEST_CONSTEXPR(match_m128(_mm_cmp_ps(((__m128){1.0f, 2.0f, 3.0f, 4.0f}), ((__m128){5.0f, 6.0f, 7.0f, 8.0f}), _CMP_FALSE_OS), 0.0f, 0.0f, 0.0f, 0.0f));
__m128 test_mm_cmp_ps_neq_os(__m128 a, __m128 b) {
// CHECK-LABEL: test_mm_cmp_ps_neq_os
// CHECK: fcmp one <4 x float> %{{.*}}, %{{.*}}
return _mm_cmp_ps(a, b, _CMP_NEQ_OS);
}
+TEST_CONSTEXPR(match_m128(_mm_cmp_ps(((__m128){1.0f, 2.0f, 3.0f, 4.0f}), ((__m128){5.0f, 6.0f, 7.0f, 8.0f}), _CMP_NEQ_OS), ALL_ONES_F, ALL_ONES_F, ALL_ONES_F, ALL_ONES_F));
+TEST_CONSTEXPR(match_m128(_mm_cmp_ps(((__m128){1.0f, 2.0f, 3.0f, 4.0f}), ((__m128){1.0f, 2.0f, 7.0f, 4.0f}), _CMP_NEQ_OS), 0.0f, 0.0f, ALL_ONES_F, 0.0f));
__m128 test_mm_cmp_ps_ge_oq(__m128 a, __m128 b) {
// CHECK-LABEL: test_mm_cmp_ps_ge_oq
// CHECK: fcmp oge <4 x float> %{{.*}}, %{{.*}}
return _mm_cmp_ps(a, b, _CMP_GE_OQ);
}
+TEST_CONSTEXPR(match_m128(_mm_cmp_ps(((__m128){5.0f, 6.0f, 7.0f, 8.0f}), ((__m128){1.0f, 2.0f, 3.0f, 4.0f}), _CMP_GE_OQ), ALL_ONES_F, ALL_ONES_F, ALL_ONES_F, ALL_ONES_F));
+TEST_CONSTEXPR(match_m128(_mm_cmp_ps(((__m128){5.0f, 6.0f, 7.0f, 8.0f}), ((__m128){1.0f, 6.0f, 3.0f, 8.0f}), _CMP_GE_OQ), ALL_ONES_F, ALL_ONES_F, ALL_ONES_F, ALL_ONES_F));
__m128 test_mm_cmp_ps_gt_oq(__m128 a, __m128 b) {
// CHECK-LABEL: test_mm_cmp_ps_gt_oq
// CHECK: fcmp ogt <4 x float> %{{.*}}, %{{.*}}
return _mm_cmp_ps(a, b, _CMP_GT_OQ);
}
+TEST_CONSTEXPR(match_m128(_mm_cmp_ps(((__m128){5.0f, 6.0f, 7.0f, 8.0f}), ((__m128){1.0f, 2.0f, 3.0f, 4.0f}), _CMP_GT_OQ), ALL_ONES_F, ALL_ONES_F, ALL_ONES_F, ALL_ONES_F));
+TEST_CONSTEXPR(match_m128(_mm_cmp_ps(((__m128){5.0f, 6.0f, 7.0f, 8.0f}), ((__m128){1.0f, 6.0f, 3.0f, 8.0f}), _CMP_GT_OQ), ALL_ONES_F, 0.0f, ALL_ONES_F, 0.0f));
__m128 test_mm_cmp_ps_true_us(__m128 a, __m128 b) {
// CHECK-LABEL: test_mm_cmp_ps_true_us
// CHECK: fcmp true <4 x float> %{{.*}}, %{{.*}}
return _mm_cmp_ps(a, b, _CMP_TRUE_US);
}
+TEST_CONSTEXPR(match_m128(_mm_cmp_ps(((__m128){1.0f, 2.0f, 3.0f, 4.0f}), ((__m128){5.0f, 6.0f, 7.0f, 8.0f}), _CMP_TRUE_US), ALL_ONES_F, ALL_ONES_F, ALL_ONES_F, ALL_ONES_F));
+TEST_CONSTEXPR(match_m128(_mm_cmp_ps(((__m128){1.0f, 2.0f, 3.0f, 4.0f}), ((__m128){1.0f, 2.0f, 3.0f, 4.0f}), _CMP_TRUE_US), ALL_ONES_F, ALL_ONES_F, ALL_ONES_F, ALL_ONES_F));
__m128d test_mm_cmp_sd(__m128d A, __m128d B) {
// CHECK-LABEL: test_mm_cmp_sd
// CHECK: call {{.*}}<2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 13)
return _mm_cmp_sd(A, B, _CMP_GE_OS);
}
+TEST_CONSTEXPR(match_m128d(
+ _mm_cmp_sd(((__m128d){2.0, __builtin_bit_cast(double, 0x1111111122222222ULL)}),
+ ((__m128d){1.0, __builtin_bit_cast(double, 0xaaaaaaaa55555555ULL)}),
+ _CMP_GE_OS),
+ ALL_ONES_D, __builtin_bit_cast(double, 0x1111111122222222ULL)));
+TEST_CONSTEXPR(match_m128d(
+ _mm_cmp_sd(((__m128d){1.0, __builtin_bit_cast(double, 0x1111111122222222ULL)}),
+ ((__m128d){2.0, __builtin_bit_cast(double, 0xaaaaaaaa55555555ULL)}),
+ _CMP_GE_OS),
+ 0.0, __builtin_bit_cast(double, 0x1111111122222222ULL)));
+TEST_CONSTEXPR(match_m128d(
+ _mm_cmp_sd(((__m128d){__builtin_nan(""), __builtin_bit_cast(double, 0x1111111122222222ULL)}),
+ ((__m128d){1.0, __builtin_bit_cast(double, 0xaaaaaaaa55555555ULL)}),
+ _CMP_UNORD_Q),
+ ALL_ONES_D, __builtin_bit_cast(double, 0x1111111122222222ULL)));
__m128 test_mm_cmp_ss(__m128 A, __m128 B) {
// CHECK-LABEL: test_mm_cmp_ss
// CHECK: call {{.*}}<4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 13)
return _mm_cmp_ss(A, B, _CMP_GE_OS);
}
+TEST_CONSTEXPR(match_m128(
+ _mm_cmp_ss(((__m128){2.0f, __builtin_bit_cast(float, 0x11111111U),
+ __builtin_bit_cast(float, 0x22222222U),
+ __builtin_bit_cast(float, 0x33333333U)}),
+ ((__m128){1.0f, __builtin_bit_cast(float, 0xaaaaaaaaU),
+ __builtin_bit_cast(float, 0xbbbbbbbbU),
+ __builtin_bit_cast(float, 0xccccccccU)}),
+ _CMP_GE_OS),
+ ALL_ONES_F, __builtin_bit_cast(float, 0x11111111U),
+ __builtin_bit_cast(float, 0x22222222U),
+ __builtin_bit_cast(float, 0x33333333U)));
+TEST_CONSTEXPR(match_m128(
+ _mm_cmp_ss(((__m128){1.0f, __builtin_bit_cast(float, 0x11111111U),
+ __builtin_bit_cast(float, 0x22222222U),
+ __builtin_bit_cast(float, 0x33333333U)}),
+ ((__m128){2.0f, __builtin_bit_cast(float, 0xaaaaaaaaU),
+ __builtin_bit_cast(float, 0xbbbbbbbbU),
+ __builtin_bit_cast(float, 0xccccccccU)}),
+ _CMP_GE_OS),
+ 0.0f, __builtin_bit_cast(float, 0x11111111U),
+ __builtin_bit_cast(float, 0x22222222U),
+ __builtin_bit_cast(float, 0x33333333U)));
+TEST_CONSTEXPR(match_m128(
+ _mm_cmp_ss(((__m128){__builtin_nan(""), __builtin_bit_cast(float, 0x11111111U),
+ __builtin_bit_cast(float, 0x22222222U),
+ __builtin_bit_cast(float, 0x33333333U)}),
+ ((__m128){1.0f, __builtin_bit_cast(float, 0xaaaaaaaaU),
+ __builtin_bit_cast(float, 0xbbbbbbbbU),
+ __builtin_bit_cast(float, 0xccccccccU)}),
+ _CMP_UNORD_S),
+ ALL_ONES_F, __builtin_bit_cast(float, 0x11111111U),
+ __builtin_bit_cast(float, 0x22222222U),
+ __builtin_bit_cast(float, 0x33333333U)));
__m256d test_mm256_cvtepi32_pd(__m128i A) {
// CHECK-LABEL: test_mm256_cvtepi32_pd
>From 8251f89e44db12909247c3b80534928b8ebab3e5 Mon Sep 17 00:00:00 2001
From: ZakyHermawan <zaky.hermawan9615 at gmail.com>
Date: Fri, 20 Feb 2026 19:43:04 +0700
Subject: [PATCH 05/14] Implement _mm_cmpeq/ge/gt/le/lt_ss/sd/ps/pd
Signed-off-by: ZakyHermawan <zaky.hermawan9615 at gmail.com>
---
clang/include/clang/Basic/BuiltinsX86.td | 6 +-
clang/lib/AST/ByteCode/InterpBuiltin.cpp | 118 ++++++++++++++++
clang/lib/AST/ExprConstShared.h | 118 +++++++++-------
clang/lib/AST/ExprConstant.cpp | 101 ++++++++++++-
clang/lib/Headers/emmintrin.h | 40 +++---
clang/lib/Headers/xmmintrin.h | 20 +--
clang/test/CodeGen/X86/avx-builtins.c | 3 -
clang/test/CodeGen/X86/builtin_test_helpers.h | 4 +
clang/test/CodeGen/X86/sse-builtins.c | 46 ++++++
clang/test/CodeGen/X86/sse2-builtins.c | 133 +++++++++++++-----
10 files changed, 465 insertions(+), 124 deletions(-)
diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td
index b571af5506ed0..9a943b41f0159 100644
--- a/clang/include/clang/Basic/BuiltinsX86.td
+++ b/clang/include/clang/Basic/BuiltinsX86.td
@@ -70,7 +70,7 @@ let Attributes = [Const, NoThrow, RequiredVectorWidth<128>] in {
}
let Attributes = [Const, Constexpr, NoThrow, RequiredVectorWidth<128>] in {
- foreach Cmp = ["eq", "lt", "le", "gt", "ge", "neq"] in {
+ foreach Cmp = ["eq", "lt", "le", "gt", "ge", "neq"] in {
let Features = "sse" in {
def comi#Cmp : X86Builtin<"int(_Vector<4, float>, _Vector<4, float>)">;
def ucomi#Cmp : X86Builtin<"int(_Vector<4, float>, _Vector<4, float>)">;
@@ -80,9 +80,7 @@ let Attributes = [Const, Constexpr, NoThrow, RequiredVectorWidth<128>] in {
def ucomisd#Cmp : X86Builtin<"int(_Vector<2, double>, _Vector<2, double>)">;
}
}
-
- foreach Cmp = ["cmpeq", "cmplt", "cmple", "cmpunord", "cmpneq", "cmpnlt",
- "cmpnle", "cmpord"] in {
+ foreach Cmp = ["cmpeq", "cmplt", "cmple", "cmpgt", "cmpge", "cmpunord", "cmpneq", "cmpnlt", "cmpnle", "cmpord"] in {
let Features = "sse" in {
def Cmp#ps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>)">;
def Cmp#ss : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>)">;
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index 277806b99012b..d05beb87bd366 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -4168,6 +4168,94 @@ static bool interp__builtin_x86_cmp(InterpState &S, CodePtr OpPC,
return true;
};
+// Helper for X86 floating point vector comparisons using immediate predicates.
+template <uint32_t Imm>
+static bool interp__builtin_x86_cmp_float_vector(InterpState &S, CodePtr OpPC,
+ const InterpFrame *Frame,
+ const CallExpr *Call,
+ unsigned ID, bool IsScalar) {
+ const Pointer &VectorB = S.Stk.pop<Pointer>();
+ const Pointer &VectorA = S.Stk.pop<Pointer>();
+ Pointer &Dst = S.Stk.peek<Pointer>();
+
+ const auto NumLanes = VectorA.getNumElems();
+ if (NumLanes != VectorB.getNumElems())
+ return false;
+
+ for (unsigned int i = 0; i < NumLanes; ++i) {
+ // Handle scalar variants (ss/sd): only first element is compared,
+ // upper elements are copied from first operand
+ if (IsScalar && i > 0) {
+ Dst.elem<Floating>(i) = VectorA.elem<Floating>(i);
+ continue;
+ }
+
+ llvm::APFloat AElement = VectorA.elem<Floating>(i).getAPFloat();
+ llvm::APFloat BElement = VectorB.elem<Floating>(i).getAPFloat();
+
+ auto CompareResult = AElement.compare(BElement);
+ const bool Matches = MatchesPredicate(Imm, CompareResult);
+
+ // Create bit patterns for comparison results:
+ // True = all bits set (0xFFFFFFFF for float, 0xFFFFFFFFFFFFFFFF for double)
+ // False = all bits zero
+ const llvm::fltSemantics &Sem = AElement.getSemantics();
+ const unsigned BitWidth = llvm::APFloat::getSizeInBits(Sem);
+ const llvm::APFloat True(Sem, llvm::APInt::getAllOnes(BitWidth));
+ const llvm::APFloat False(Sem, llvm::APInt(BitWidth, 0));
+
+ Dst.elem<Floating>(i) = Floating(Matches ? True : False);
+ }
+
+ Dst.initializeAllElements();
+ return true;
+}
+
+static bool interp__builtin_x86_cmpeq(InterpState &S, CodePtr OpPC,
+ const InterpFrame *Frame,
+ const CallExpr *Call, unsigned ID) {
+ const bool IsScalar = (ID == X86::BI__builtin_ia32_cmpeqss) ||
+ (ID == X86::BI__builtin_ia32_cmpeqsd);
+ return interp__builtin_x86_cmp_float_vector<X86CmpImm::CMP_EQ_OQ>(
+ S, OpPC, Frame, Call, ID, IsScalar);
+}
+
+static bool interp__builtin_x86_cmpge(InterpState &S, CodePtr OpPC,
+ const InterpFrame *Frame,
+ const CallExpr *Call, unsigned ID) {
+ const bool IsScalar = (ID == X86::BI__builtin_ia32_cmpgess) ||
+ (ID == X86::BI__builtin_ia32_cmpgesd);
+ return interp__builtin_x86_cmp_float_vector<X86CmpImm::CMP_GE_OS>(
+ S, OpPC, Frame, Call, ID, IsScalar);
+};
+
+static bool interp__builtin_x86_cmpgt(InterpState &S, CodePtr OpPC,
+ const InterpFrame *Frame,
+ const CallExpr *Call, unsigned ID) {
+ const bool IsScalar = (ID == X86::BI__builtin_ia32_cmpgtss) ||
+ (ID == X86::BI__builtin_ia32_cmpgtsd);
+ return interp__builtin_x86_cmp_float_vector<X86CmpImm::CMP_GT_OS>(
+ S, OpPC, Frame, Call, ID, IsScalar);
+};
+
+static bool interp__builtin_x86_cmple(InterpState &S, CodePtr OpPC,
+ const InterpFrame *Frame,
+ const CallExpr *Call, unsigned ID) {
+ const bool IsScalar = (ID == X86::BI__builtin_ia32_cmpless) ||
+ (ID == X86::BI__builtin_ia32_cmplesd);
+ return interp__builtin_x86_cmp_float_vector<X86CmpImm::CMP_LE_OS>(
+ S, OpPC, Frame, Call, ID, IsScalar);
+};
+
+static bool interp__builtin_x86_cmplt(InterpState &S, CodePtr OpPC,
+ const InterpFrame *Frame,
+ const CallExpr *Call, unsigned ID) {
+ const bool IsScalar = (ID == X86::BI__builtin_ia32_cmpltss) ||
+ (ID == X86::BI__builtin_ia32_cmpltsd);
+ return interp__builtin_x86_cmp_float_vector<X86CmpImm::CMP_LT_OS>(
+ S, OpPC, Frame, Call, ID, IsScalar);
+};
+
bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
uint32_t BuiltinID) {
if (!S.getASTContext().BuiltinInfo.isConstantEvaluated(BuiltinID))
@@ -5936,6 +6024,36 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
case X86::BI__builtin_ia32_cmpsd:
return interp__builtin_x86_cmp(S, OpPC, Frame, Call, BuiltinID);
+ case X86::BI__builtin_ia32_cmpeqps:
+ case X86::BI__builtin_ia32_cmpeqpd:
+ case X86::BI__builtin_ia32_cmpeqss:
+ case X86::BI__builtin_ia32_cmpeqsd:
+ return interp__builtin_x86_cmpeq(S, OpPC, Frame, Call, BuiltinID);
+
+ case X86::BI__builtin_ia32_cmpgeps:
+ case X86::BI__builtin_ia32_cmpgepd:
+ case X86::BI__builtin_ia32_cmpgess:
+ case X86::BI__builtin_ia32_cmpgesd:
+ return interp__builtin_x86_cmpge(S, OpPC, Frame, Call, BuiltinID);
+
+ case X86::BI__builtin_ia32_cmpgtps:
+ case X86::BI__builtin_ia32_cmpgtpd:
+ case X86::BI__builtin_ia32_cmpgtss:
+ case X86::BI__builtin_ia32_cmpgtsd:
+ return interp__builtin_x86_cmpgt(S, OpPC, Frame, Call, BuiltinID);
+
+ case X86::BI__builtin_ia32_cmpleps:
+ case X86::BI__builtin_ia32_cmplepd:
+ case X86::BI__builtin_ia32_cmpless:
+ case X86::BI__builtin_ia32_cmplesd:
+ return interp__builtin_x86_cmple(S, OpPC, Frame, Call, BuiltinID);
+
+ case X86::BI__builtin_ia32_cmpltps:
+ case X86::BI__builtin_ia32_cmpltpd:
+ case X86::BI__builtin_ia32_cmpltss:
+ case X86::BI__builtin_ia32_cmpltsd:
+ return interp__builtin_x86_cmplt(S, OpPC, Frame, Call, BuiltinID);
+
default:
S.FFDiag(S.Current->getLocation(OpPC),
diag::note_invalid_subexpr_in_const_expr)
diff --git a/clang/lib/AST/ExprConstShared.h b/clang/lib/AST/ExprConstShared.h
index c716af625a6b4..c33d3a3fd684c 100644
--- a/clang/lib/AST/ExprConstShared.h
+++ b/clang/lib/AST/ExprConstShared.h
@@ -38,6 +38,44 @@ struct FPCompareFlags {
bool IsLt;
};
+// SSE/AVX floating-point comparison immediates
+namespace X86CmpImm {
+ constexpr uint32_t CMP_EQ_OQ = 0x00; // Equal (ordered, quiet)
+ constexpr uint32_t CMP_LT_OS = 0x01; // Less than (ordered, signaling)
+ constexpr uint32_t CMP_LE_OS = 0x02; // Less than or equal (ordered, signaling)
+ constexpr uint32_t CMP_UNORD_Q = 0x03; // Unordered (quiet)
+ constexpr uint32_t CMP_NEQ_UQ = 0x04; // Not equal (unordered, quiet)
+ constexpr uint32_t CMP_NLT_US = 0x05; // Not less than (unordered, signaling)
+ constexpr uint32_t CMP_NLE_US = 0x06; // Not less than or equal (unordered, signaling)
+ constexpr uint32_t CMP_ORD_Q = 0x07; // Ordered (quiet)
+ constexpr uint32_t CMP_EQ_UQ = 0x08; // Equal (unordered, quiet)
+ constexpr uint32_t CMP_NGE_US = 0x09; // Not greater than or equal (unordered, signaling)
+ constexpr uint32_t CMP_NGT_US = 0x0A; // Not greater than (unordered, signaling)
+ constexpr uint32_t CMP_FALSE_OQ = 0x0B; // False (ordered, quiet)
+ constexpr uint32_t CMP_NEQ_OQ = 0x0C; // Not equal (ordered, quiet)
+ constexpr uint32_t CMP_GE_OS = 0x0D; // Greater than or equal (ordered, signaling)
+ constexpr uint32_t CMP_GT_OS = 0x0E; // Greater than (ordered, signaling)
+ constexpr uint32_t CMP_TRUE_UQ = 0x0F; // True (unordered, quiet)
+
+ // Signaling variants (0x10-0x1F)
+ constexpr uint32_t CMP_EQ_OS = 0x10; // Equal (ordered, signaling)
+ constexpr uint32_t CMP_LT_OQ = 0x11; // Less than (ordered, quiet)
+ constexpr uint32_t CMP_LE_OQ = 0x12; // Less than or equal (ordered, quiet)
+ constexpr uint32_t CMP_UNORD_S = 0x13; // Unordered (signaling)
+ constexpr uint32_t CMP_NEQ_US = 0x14; // Not equal (unordered, signaling)
+ constexpr uint32_t CMP_NLT_UQ = 0x15; // Not less than (unordered, quiet)
+ constexpr uint32_t CMP_NLE_UQ = 0x16; // Not less than or equal (unordered, quiet)
+ constexpr uint32_t CMP_ORD_S = 0x17; // Ordered (signaling)
+ constexpr uint32_t CMP_EQ_US = 0x18; // Equal (unordered, signaling)
+ constexpr uint32_t CMP_NGE_UQ = 0x19; // Not greater than or equal (unordered, quiet)
+ constexpr uint32_t CMP_NGT_UQ = 0x1A; // Not greater than (unordered, quiet)
+ constexpr uint32_t CMP_FALSE_OS = 0x1B; // False (ordered, signaling)
+ constexpr uint32_t CMP_NEQ_OS = 0x1C; // Not equal (ordered, signaling)
+ constexpr uint32_t CMP_GE_OQ = 0x1D; // Greater than or equal (ordered, quiet)
+ constexpr uint32_t CMP_GT_OQ = 0x1E; // Greater than (ordered, quiet)
+ constexpr uint32_t CMP_TRUE_US = 0x1F; // True (unordered, signaling)
+} // namespace X86CmpImm
+
// Return true if immediate and the comparison flags are matching
static bool MatchesPredicate(const uint32_t Imm, const llvm::APFloatBase::cmpResult CompareResult) {
using CmpResult = llvm::APFloatBase::cmpResult;
@@ -48,70 +86,54 @@ static bool MatchesPredicate(const uint32_t Imm, const llvm::APFloatBase::cmpRes
bool IsLt = (CompareResult == CmpResult::cmpLessThan);
switch (Imm & 0x1F) {
- case 0x00: /* _CMP_EQ_OQ */
- case 0x10: /* _CMP_EQ_OS */
+ case X86CmpImm::CMP_EQ_OQ:
+ case X86CmpImm::CMP_EQ_OS:
return IsEq && !IsUnordered;
- break;
- case 0x01: /* _CMP_LT_OS */
- case 0x11: /* _CMP_LT_OQ */
+ case X86CmpImm::CMP_LT_OS:
+ case X86CmpImm::CMP_LT_OQ:
return IsLt && !IsUnordered;
- break;
- case 0x02: /* _CMP_LE_OS */
- case 0x12: /* _CMP_LE_OQ */
+ case X86CmpImm::CMP_LE_OS:
+ case X86CmpImm::CMP_LE_OQ:
return !IsGt && !IsUnordered;
- break;
- case 0x03: /* _CMP_UNORD_Q */
- case 0x13: /* _CMP_UNORD_S */
+ case X86CmpImm::CMP_UNORD_Q:
+ case X86CmpImm::CMP_UNORD_S:
return IsUnordered;
- break;
- case 0x04: /* _CMP_NEQ_UQ */
- case 0x14: /* _CMP_NEQ_US */
+ case X86CmpImm::CMP_NEQ_UQ:
+ case X86CmpImm::CMP_NEQ_US:
return !IsEq || IsUnordered;
- break;
- case 0x05: /* _CMP_NLT_US */
- case 0x15: /* _CMP_NLT_UQ */
+ case X86CmpImm::CMP_NLT_US:
+ case X86CmpImm::CMP_NLT_UQ:
return !IsLt || IsUnordered;
- break;
- case 0x06: /* _CMP_NLE_US */
- case 0x16: /* _CMP_NLE_UQ */
+ case X86CmpImm::CMP_NLE_US:
+ case X86CmpImm::CMP_NLE_UQ:
return IsGt || IsUnordered;
- break;
- case 0x07: /* _CMP_ORD_Q */
- case 0x17: /* _CMP_ORD_S */
+ case X86CmpImm::CMP_ORD_Q:
+ case X86CmpImm::CMP_ORD_S:
return !IsUnordered;
- break;
- case 0x08: /* _CMP_EQ_UQ */
- case 0x18: /* _CMP_EQ_US */
+ case X86CmpImm::CMP_EQ_UQ:
+ case X86CmpImm::CMP_EQ_US:
return IsEq || IsUnordered;
- break;
- case 0x09: /* _CMP_NGE_US */
- case 0x19: /* _CMP_NGE_UQ */
+ case X86CmpImm::CMP_NGE_US:
+ case X86CmpImm::CMP_NGE_UQ:
return IsLt || IsUnordered;
- break;
- case 0x0a: /* _CMP_NGT_US */
- case 0x1a: /* _CMP_NGT_UQ */
+ case X86CmpImm::CMP_NGT_US:
+ case X86CmpImm::CMP_NGT_UQ:
return !IsGt || IsUnordered;
- break;
- case 0x0b: /* _CMP_FALSE_OQ */
- case 0x1b: /* _CMP_FALSE_OS */
+ case X86CmpImm::CMP_FALSE_OQ:
+ case X86CmpImm::CMP_FALSE_OS:
return false;
- break;
- case 0x0c: /* _CMP_NEQ_OQ */
- case 0x1c: /* _CMP_NEQ_OS */
+ case X86CmpImm::CMP_NEQ_OQ:
+ case X86CmpImm::CMP_NEQ_OS:
return !IsEq && !IsUnordered;
- break;
- case 0x0d: /* _CMP_GE_OS */
- case 0x1d: /* _CMP_GE_OQ */
+ case X86CmpImm::CMP_GE_OS:
+ case X86CmpImm::CMP_GE_OQ:
return !IsLt && !IsUnordered;
- break;
- case 0x0e: /* _CMP_GT_OS */
- case 0x1e: /* _CMP_GT_OQ */
+ case X86CmpImm::CMP_GT_OS:
+ case X86CmpImm::CMP_GT_OQ:
return IsGt && !IsUnordered;
- break;
- case 0x0f: /* _CMP_TRUE_UQ */
- case 0x1f: /* _CMP_TRUE_US */
+ case X86CmpImm::CMP_TRUE_UQ:
+ case X86CmpImm::CMP_TRUE_US:
return true;
- break;
}
return false;
};
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index b3617289e5691..404730daf3f41 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -14417,12 +14417,12 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
return false;
return Success(R, E);
}
+ case X86::BI__builtin_ia32_cmpss:
+ case X86::BI__builtin_ia32_cmpsd:
case X86::BI__builtin_ia32_cmpps:
case X86::BI__builtin_ia32_cmppd:
case X86::BI__builtin_ia32_cmpps256:
- case X86::BI__builtin_ia32_cmppd256:
- case X86::BI__builtin_ia32_cmpss:
- case X86::BI__builtin_ia32_cmpsd: {
+ case X86::BI__builtin_ia32_cmppd256: {
const Expr *A = E->getArg(0);
const Expr *B = E->getArg(1);
const Expr *Imm = E->getArg(2);
@@ -14482,6 +14482,101 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
return Success(APValue(ResultElements.data(), ResultElements.size()), E);
}
+ case X86::BI__builtin_ia32_cmpeqss:
+ case X86::BI__builtin_ia32_cmpeqsd:
+ case X86::BI__builtin_ia32_cmpeqps:
+ case X86::BI__builtin_ia32_cmpeqpd:
+ case X86::BI__builtin_ia32_cmpgess:
+ case X86::BI__builtin_ia32_cmpgesd:
+ case X86::BI__builtin_ia32_cmpgeps:
+ case X86::BI__builtin_ia32_cmpgepd:
+ case X86::BI__builtin_ia32_cmpltss:
+ case X86::BI__builtin_ia32_cmpltsd:
+ case X86::BI__builtin_ia32_cmpltps:
+ case X86::BI__builtin_ia32_cmpltpd:
+ case X86::BI__builtin_ia32_cmpless:
+ case X86::BI__builtin_ia32_cmplesd:
+ case X86::BI__builtin_ia32_cmpleps:
+ case X86::BI__builtin_ia32_cmplepd: {
+ const Expr *A = E->getArg(0);
+ const Expr *B = E->getArg(1);
+
+ APValue AV, BV;
+ if (!EvaluateVector(A, AV, Info) || !EvaluateVector(B, BV, Info))
+ return false;
+
+ const auto NumLanes = AV.getVectorLength();
+ if (NumLanes == 0 || BV.getVectorLength() != NumLanes)
+ return false;
+
+ const auto RetTy = E->getType();
+ const auto *VT = RetTy->getAs<VectorType>();
+ if (!VT)
+ return false;
+
+ const bool IsScalar = (BuiltinOp == X86::BI__builtin_ia32_cmpeqss) ||
+ (BuiltinOp == X86::BI__builtin_ia32_cmpeqsd) ||
+ (BuiltinOp == X86::BI__builtin_ia32_cmpgess) ||
+ (BuiltinOp == X86::BI__builtin_ia32_cmpgesd) ||
+ (BuiltinOp == X86::BI__builtin_ia32_cmpltss) ||
+ (BuiltinOp == X86::BI__builtin_ia32_cmpltsd) ||
+ (BuiltinOp == X86::BI__builtin_ia32_cmpless) ||
+ (BuiltinOp == X86::BI__builtin_ia32_cmplesd);
+
+ // Select comparison predicate based on builtin
+ uint32_t Imm = X86CmpImm::CMP_EQ_OQ;
+ if ((BuiltinOp == X86::BI__builtin_ia32_cmpgess) ||
+ (BuiltinOp == X86::BI__builtin_ia32_cmpgesd) ||
+ (BuiltinOp == X86::BI__builtin_ia32_cmpgeps) ||
+ (BuiltinOp == X86::BI__builtin_ia32_cmpgepd)) {
+ Imm = X86CmpImm::CMP_GE_OS;
+ } else if ((BuiltinOp == X86::BI__builtin_ia32_cmpltss) ||
+ (BuiltinOp == X86::BI__builtin_ia32_cmpltsd) ||
+ (BuiltinOp == X86::BI__builtin_ia32_cmpltps) ||
+ (BuiltinOp == X86::BI__builtin_ia32_cmpltpd)) {
+ Imm = X86CmpImm::CMP_LT_OS;
+ } else if ((BuiltinOp == X86::BI__builtin_ia32_cmpless) ||
+ (BuiltinOp == X86::BI__builtin_ia32_cmplesd) ||
+ (BuiltinOp == X86::BI__builtin_ia32_cmpleps) ||
+ (BuiltinOp == X86::BI__builtin_ia32_cmplepd)) {
+ Imm = X86CmpImm::CMP_LE_OS;
+ }
+
+ SmallVector<APValue, 8> ResultElements;
+ ResultElements.reserve(NumLanes);
+ for (unsigned i = 0; i < NumLanes; ++i) {
+ // Handle scalar variants (ss/sd): only first element is compared,
+ // upper elements are copied from first operand
+ if (IsScalar && i > 0) {
+ ResultElements.push_back(AV.getVectorElt(i));
+ continue;
+ }
+
+ const auto AElem = AV.getVectorElt(i);
+ const auto BElem = BV.getVectorElt(i);
+
+ const auto A0 = AElem.getFloat();
+ const auto B0 = BElem.getFloat();
+
+ const auto CompareResult = A0.compare(B0);
+ const bool Matches = MatchesPredicate(Imm, CompareResult);
+
+ // Create bit patterns for comparison results:
+ // True = all bits set (0xFFFFFFFF for float, 0xFFFFFFFFFFFFFFFF for double)
+ // False = all bits zero
+ const llvm::fltSemantics &Sem = A0.getSemantics();
+ const unsigned BitWidth = llvm::APFloat::getSizeInBits(Sem);
+ const llvm::APFloat True(Sem, llvm::APInt::getAllOnes(BitWidth));
+ const llvm::APFloat False(Sem, llvm::APInt(BitWidth, 0));
+
+ if (Matches)
+ ResultElements.push_back(APValue(True));
+ else
+ ResultElements.push_back(APValue(False));
+ }
+
+ return Success(APValue(ResultElements.data(), ResultElements.size()), E);
+ }
}
}
diff --git a/clang/lib/Headers/emmintrin.h b/clang/lib/Headers/emmintrin.h
index 61b35e97314fd..3b4f73855239c 100644
--- a/clang/lib/Headers/emmintrin.h
+++ b/clang/lib/Headers/emmintrin.h
@@ -437,8 +437,8 @@ static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_xor_pd(__m128d __a,
/// \param __b
/// A 128-bit vector of [2 x double].
/// \returns A 128-bit vector containing the comparison results.
-static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpeq_pd(__m128d __a,
- __m128d __b) {
+static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cmpeq_pd(__m128d __a,
+ __m128d __b) {
return (__m128d)__builtin_ia32_cmpeqpd((__v2df)__a, (__v2df)__b);
}
@@ -458,8 +458,8 @@ static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpeq_pd(__m128d __a,
/// \param __b
/// A 128-bit vector of [2 x double].
/// \returns A 128-bit vector containing the comparison results.
-static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmplt_pd(__m128d __a,
- __m128d __b) {
+static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cmplt_pd(__m128d __a,
+ __m128d __b) {
return (__m128d)__builtin_ia32_cmpltpd((__v2df)__a, (__v2df)__b);
}
@@ -479,8 +479,8 @@ static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmplt_pd(__m128d __a,
/// \param __b
/// A 128-bit vector of [2 x double].
/// \returns A 128-bit vector containing the comparison results.
-static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmple_pd(__m128d __a,
- __m128d __b) {
+static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cmple_pd(__m128d __a,
+ __m128d __b) {
return (__m128d)__builtin_ia32_cmplepd((__v2df)__a, (__v2df)__b);
}
@@ -500,8 +500,8 @@ static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmple_pd(__m128d __a,
/// \param __b
/// A 128-bit vector of [2 x double].
/// \returns A 128-bit vector containing the comparison results.
-static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpgt_pd(__m128d __a,
- __m128d __b) {
+static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cmpgt_pd(__m128d __a,
+ __m128d __b) {
return (__m128d)__builtin_ia32_cmpltpd((__v2df)__b, (__v2df)__a);
}
@@ -521,8 +521,8 @@ static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpgt_pd(__m128d __a,
/// \param __b
/// A 128-bit vector of [2 x double].
/// \returns A 128-bit vector containing the comparison results.
-static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpge_pd(__m128d __a,
- __m128d __b) {
+static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cmpge_pd(__m128d __a,
+ __m128d __b) {
return (__m128d)__builtin_ia32_cmplepd((__v2df)__b, (__v2df)__a);
}
@@ -694,8 +694,8 @@ static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpnge_pd(__m128d __a,
/// compared to the lower double-precision value of \a __a.
/// \returns A 128-bit vector. The lower 64 bits contains the comparison
/// results. The upper 64 bits are copied from the upper 64 bits of \a __a.
-static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpeq_sd(__m128d __a,
- __m128d __b) {
+static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cmpeq_sd(__m128d __a,
+ __m128d __b) {
return (__m128d)__builtin_ia32_cmpeqsd((__v2df)__a, (__v2df)__b);
}
@@ -719,8 +719,8 @@ static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpeq_sd(__m128d __a,
/// compared to the lower double-precision value of \a __a.
/// \returns A 128-bit vector. The lower 64 bits contains the comparison
/// results. The upper 64 bits are copied from the upper 64 bits of \a __a.
-static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmplt_sd(__m128d __a,
- __m128d __b) {
+static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cmplt_sd(__m128d __a,
+ __m128d __b) {
return (__m128d)__builtin_ia32_cmpltsd((__v2df)__a, (__v2df)__b);
}
@@ -744,8 +744,8 @@ static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmplt_sd(__m128d __a,
/// compared to the lower double-precision value of \a __a.
/// \returns A 128-bit vector. The lower 64 bits contains the comparison
/// results. The upper 64 bits are copied from the upper 64 bits of \a __a.
-static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmple_sd(__m128d __a,
- __m128d __b) {
+static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cmple_sd(__m128d __a,
+ __m128d __b) {
return (__m128d)__builtin_ia32_cmplesd((__v2df)__a, (__v2df)__b);
}
@@ -769,8 +769,8 @@ static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmple_sd(__m128d __a,
/// compared to the lower double-precision value of \a __a.
/// \returns A 128-bit vector. The lower 64 bits contains the comparison
/// results. The upper 64 bits are copied from the upper 64 bits of \a __a.
-static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpgt_sd(__m128d __a,
- __m128d __b) {
+static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cmpgt_sd(__m128d __a,
+ __m128d __b) {
__m128d __c = __builtin_ia32_cmpltsd((__v2df)__b, (__v2df)__a);
return __extension__(__m128d){__c[0], __a[1]};
}
@@ -795,8 +795,8 @@ static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpgt_sd(__m128d __a,
/// compared to the lower double-precision value of \a __a.
/// \returns A 128-bit vector. The lower 64 bits contains the comparison
/// results. The upper 64 bits are copied from the upper 64 bits of \a __a.
-static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpge_sd(__m128d __a,
- __m128d __b) {
+static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cmpge_sd(__m128d __a,
+ __m128d __b) {
__m128d __c = __builtin_ia32_cmplesd((__v2df)__b, (__v2df)__a);
return __extension__(__m128d){__c[0], __a[1]};
}
diff --git a/clang/lib/Headers/xmmintrin.h b/clang/lib/Headers/xmmintrin.h
index ab0f0c1690759..064fd9d3d3722 100644
--- a/clang/lib/Headers/xmmintrin.h
+++ b/clang/lib/Headers/xmmintrin.h
@@ -500,7 +500,7 @@ _mm_xor_ps(__m128 __a, __m128 __b) {
/// 32 bits of this operand are used in the comparison.
/// \returns A 128-bit vector of [4 x float] containing the comparison results
/// in the low-order bits.
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR
_mm_cmpeq_ss(__m128 __a, __m128 __b)
{
return (__m128)__builtin_ia32_cmpeqss((__v4sf)__a, (__v4sf)__b);
@@ -521,7 +521,7 @@ _mm_cmpeq_ss(__m128 __a, __m128 __b)
/// \param __b
/// A 128-bit vector of [4 x float].
/// \returns A 128-bit vector of [4 x float] containing the comparison results.
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR
_mm_cmpeq_ps(__m128 __a, __m128 __b)
{
return (__m128)__builtin_ia32_cmpeqps((__v4sf)__a, (__v4sf)__b);
@@ -547,7 +547,7 @@ _mm_cmpeq_ps(__m128 __a, __m128 __b)
/// 32 bits of this operand are used in the comparison.
/// \returns A 128-bit vector of [4 x float] containing the comparison results
/// in the low-order bits.
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR
_mm_cmplt_ss(__m128 __a, __m128 __b)
{
return (__m128)__builtin_ia32_cmpltss((__v4sf)__a, (__v4sf)__b);
@@ -569,7 +569,7 @@ _mm_cmplt_ss(__m128 __a, __m128 __b)
/// \param __b
/// A 128-bit vector of [4 x float].
/// \returns A 128-bit vector of [4 x float] containing the comparison results.
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR
_mm_cmplt_ps(__m128 __a, __m128 __b)
{
return (__m128)__builtin_ia32_cmpltps((__v4sf)__a, (__v4sf)__b);
@@ -595,7 +595,7 @@ _mm_cmplt_ps(__m128 __a, __m128 __b)
/// 32 bits of this operand are used in the comparison.
/// \returns A 128-bit vector of [4 x float] containing the comparison results
/// in the low-order bits.
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR
_mm_cmple_ss(__m128 __a, __m128 __b)
{
return (__m128)__builtin_ia32_cmpless((__v4sf)__a, (__v4sf)__b);
@@ -617,7 +617,7 @@ _mm_cmple_ss(__m128 __a, __m128 __b)
/// \param __b
/// A 128-bit vector of [4 x float].
/// \returns A 128-bit vector of [4 x float] containing the comparison results.
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR
_mm_cmple_ps(__m128 __a, __m128 __b)
{
return (__m128)__builtin_ia32_cmpleps((__v4sf)__a, (__v4sf)__b);
@@ -643,7 +643,7 @@ _mm_cmple_ps(__m128 __a, __m128 __b)
/// 32 bits of this operand are used in the comparison.
/// \returns A 128-bit vector of [4 x float] containing the comparison results
/// in the low-order bits.
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR
_mm_cmpgt_ss(__m128 __a, __m128 __b)
{
return (__m128)__builtin_shufflevector((__v4sf)__a,
@@ -667,7 +667,7 @@ _mm_cmpgt_ss(__m128 __a, __m128 __b)
/// \param __b
/// A 128-bit vector of [4 x float].
/// \returns A 128-bit vector of [4 x float] containing the comparison results.
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR
_mm_cmpgt_ps(__m128 __a, __m128 __b)
{
return (__m128)__builtin_ia32_cmpltps((__v4sf)__b, (__v4sf)__a);
@@ -693,7 +693,7 @@ _mm_cmpgt_ps(__m128 __a, __m128 __b)
/// 32 bits of this operand are used in the comparison.
/// \returns A 128-bit vector of [4 x float] containing the comparison results
/// in the low-order bits.
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR
_mm_cmpge_ss(__m128 __a, __m128 __b)
{
return (__m128)__builtin_shufflevector((__v4sf)__a,
@@ -717,7 +717,7 @@ _mm_cmpge_ss(__m128 __a, __m128 __b)
/// \param __b
/// A 128-bit vector of [4 x float].
/// \returns A 128-bit vector of [4 x float] containing the comparison results.
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR
_mm_cmpge_ps(__m128 __a, __m128 __b)
{
return (__m128)__builtin_ia32_cmpleps((__v4sf)__b, (__v4sf)__a);
diff --git a/clang/test/CodeGen/X86/avx-builtins.c b/clang/test/CodeGen/X86/avx-builtins.c
index 75e960d672746..93efdd44fb277 100644
--- a/clang/test/CodeGen/X86/avx-builtins.c
+++ b/clang/test/CodeGen/X86/avx-builtins.c
@@ -24,9 +24,6 @@
#include <immintrin.h>
#include "builtin_test_helpers.h"
-#define ALL_ONES_F __builtin_bit_cast(float, 0xFFFFFFFFU)
-#define ALL_ONES_D __builtin_bit_cast(double, 0xFFFFFFFFFFFFFFFFULL)
-
// NOTE: This should match the tests in llvm/test/CodeGen/X86/avx-intrinsics-fast-isel.ll
__m256d test_mm256_add_pd(__m256d A, __m256d B) {
diff --git a/clang/test/CodeGen/X86/builtin_test_helpers.h b/clang/test/CodeGen/X86/builtin_test_helpers.h
index fcaf360626a2d..c25567894af41 100644
--- a/clang/test/CodeGen/X86/builtin_test_helpers.h
+++ b/clang/test/CodeGen/X86/builtin_test_helpers.h
@@ -4,6 +4,10 @@
#if defined(__cplusplus) && (__cplusplus >= 201103L)
+// All-ones bit patterns for comparison results
+#define ALL_ONES_F __builtin_bit_cast(float, 0xFFFFFFFFU)
+#define ALL_ONES_D __builtin_bit_cast(double, 0xFFFFFFFFFFFFFFFFULL)
+
constexpr bool match_m64(__m64 _v, unsigned long long a) {
__v1du v = (__v1du)_v;
return v[0] == a;
diff --git a/clang/test/CodeGen/X86/sse-builtins.c b/clang/test/CodeGen/X86/sse-builtins.c
index edd9f00bae2b2..3b986acc8e230 100644
--- a/clang/test/CodeGen/X86/sse-builtins.c
+++ b/clang/test/CodeGen/X86/sse-builtins.c
@@ -108,12 +108,28 @@ __m128 test_mm_cmpeq_ps(__m128 __a, __m128 __b) {
// CHECK-NEXT: ret <4 x float> [[BC]]
return _mm_cmpeq_ps(__a, __b);
}
+// Test all elements equal - returns 0xFFFFFFFF per element (constexpr executable)
+#ifdef __cplusplus
+TEST_CONSTEXPR(match_m128(_mm_cmpeq_ps((__m128){+1.0f, +2.0f, +3.0f, +4.0f}, (__m128){+1.0f, +2.0f, +3.0f, +4.0f}), ALL_ONES_F, ALL_ONES_F, ALL_ONES_F, ALL_ONES_F));
+TEST_CONSTEXPR(match_m128(_mm_cmpeq_ps((__m128){+1.0f, +2.0f, +3.0f, +4.0f}, (__m128){+5.0f, +6.0f, +7.0f, +8.0f}), +0.0f, +0.0f, +0.0f, +0.0f));
+TEST_CONSTEXPR(match_m128(_mm_cmpeq_ps((__m128){+1.0f, +2.0f, +3.0f, +4.0f}, (__m128){-1.0f, -2.0f, -3.0f, -4.0f}), +0.0f, +0.0f, +0.0f, +0.0f));
+TEST_CONSTEXPR(match_m128(_mm_cmpeq_ps((__m128){-1.0f, -2.0f, -3.0f, -4.0f}, (__m128){+1.0f, +2.0f, +3.0f, +4.0f}), +0.0f, +0.0f, +0.0f, +0.0f));
+TEST_CONSTEXPR(match_m128(_mm_cmpeq_ps((__m128){__builtin_nanf(""), +2.0f, +3.0f, +4.0f}, (__m128){+1.0f, +2.0f, +3.0f, +4.0f}), +0.0f, ALL_ONES_F, ALL_ONES_F, ALL_ONES_F));
+#endif
__m128 test_mm_cmpeq_ss(__m128 __a, __m128 __b) {
// CHECK-LABEL: test_mm_cmpeq_ss
// CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 0)
return _mm_cmpeq_ss(__a, __b);
}
+// Test scalar comparisons - only affects lowest element
+#ifdef __cplusplus
+TEST_CONSTEXPR(match_m128(_mm_cmpeq_ss((__m128){+1.0f, +2.0f, +3.0f, +4.0f}, (__m128){+1.0f, +5.0f, +6.0f, +7.0f}), ALL_ONES_F, +2.0f, +3.0f, +4.0f));
+TEST_CONSTEXPR(match_m128(_mm_cmpeq_ss((__m128){+1.0f, +2.0f, +3.0f, +4.0f}, (__m128){+5.0f, +6.0f, +7.0f, +8.0f}), +0.0f, +2.0f, +3.0f, +4.0f));
+TEST_CONSTEXPR(match_m128(_mm_cmpeq_ss((__m128){-1.0f, +2.0f, +3.0f, +4.0f}, (__m128){+1.0f, +5.0f, +6.0f, +7.0f}), +0.0f, +2.0f, +3.0f, +4.0f));
+TEST_CONSTEXPR(match_m128(_mm_cmpeq_ss((__m128){-1.0f, -2.0f, -3.0f, -4.0f}, (__m128){-1.0f, +5.0f, +6.0f, +7.0f}), ALL_ONES_F, -2.0f, -3.0f, -4.0f));
+TEST_CONSTEXPR(match_m128(_mm_cmpeq_ss((__m128){__builtin_nanf(""), +2.0f, +3.0f, +4.0f}, (__m128){+1.0f, +9.0f, +8.0f, +7.0f}), +0.0f, +2.0f, +3.0f, +4.0f));
+#endif
__m128 test_mm_cmpge_ps(__m128 __a, __m128 __b) {
// CHECK-LABEL: test_mm_cmpge_ps
@@ -123,6 +139,11 @@ __m128 test_mm_cmpge_ps(__m128 __a, __m128 __b) {
// CHECK-NEXT: ret <4 x float> [[BC]]
return _mm_cmpge_ps(__a, __b);
}
+TEST_CONSTEXPR(match_m128(_mm_cmpge_ps((__m128){+1.0f, +2.0f, +3.0f, +4.0f}, (__m128){+1.0f, +2.0f, +3.0f, +4.0f}), ALL_ONES_F, ALL_ONES_F, ALL_ONES_F, ALL_ONES_F));
+TEST_CONSTEXPR(match_m128(_mm_cmpge_ps((__m128){+5.0f, +6.0f, +7.0f, +8.0f}, (__m128){+1.0f, +2.0f, +3.0f, +4.0f}), ALL_ONES_F, ALL_ONES_F, ALL_ONES_F, ALL_ONES_F));
+TEST_CONSTEXPR(match_m128(_mm_cmpge_ps((__m128){+1.0f, +2.0f, +3.0f, +4.0f}, (__m128){+5.0f, +6.0f, +7.0f, +8.0f}), +0.0f, +0.0f, +0.0f, +0.0f));
+TEST_CONSTEXPR(match_m128(_mm_cmpge_ps((__m128){+5.0f, +2.0f, +7.0f, +4.0f}, (__m128){+3.0f, +6.0f, +5.0f, +8.0f}), ALL_ONES_F, +0.0f, ALL_ONES_F, +0.0f));
+TEST_CONSTEXPR(match_m128(_mm_cmpge_ps((__m128){__builtin_nanf(""), +3.0f, +5.0f, +7.0f}, (__m128){+1.0f, +2.0f, +4.0f, +6.0f}), +0.0f, ALL_ONES_F, ALL_ONES_F, ALL_ONES_F));
__m128 test_mm_cmpge_ss(__m128 __a, __m128 __b) {
// CHECK-LABEL: test_mm_cmpge_ss
@@ -130,6 +151,11 @@ __m128 test_mm_cmpge_ss(__m128 __a, __m128 __b) {
// CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
return _mm_cmpge_ss(__a, __b);
}
+TEST_CONSTEXPR(match_m128(_mm_cmpge_ss((__m128){+1.0f, +2.0f, +3.0f, +4.0f}, (__m128){+1.0f, +5.0f, +6.0f, +7.0f}), ALL_ONES_F, +2.0f, +3.0f, +4.0f));
+TEST_CONSTEXPR(match_m128(_mm_cmpge_ss((__m128){+5.0f, +2.0f, +3.0f, +4.0f}, (__m128){+1.0f, +5.0f, +6.0f, +7.0f}), ALL_ONES_F, +2.0f, +3.0f, +4.0f));
+TEST_CONSTEXPR(match_m128(_mm_cmpge_ss((__m128){+1.0f, +2.0f, +3.0f, +4.0f}, (__m128){+5.0f, +6.0f, +7.0f, +8.0f}), +0.0f, +2.0f, +3.0f, +4.0f));
+TEST_CONSTEXPR(match_m128(_mm_cmpge_ss((__m128){-1.0f, -2.0f, -3.0f, -4.0f}, (__m128){-5.0f, +5.0f, +6.0f, +7.0f}), ALL_ONES_F, -2.0f, -3.0f, -4.0f));
+TEST_CONSTEXPR(match_m128(_mm_cmpge_ss((__m128){__builtin_nanf(""), +2.0f, +3.0f, +4.0f}, (__m128){+1.0f, +5.0f, +6.0f, +7.0f}), +0.0f, +2.0f, +3.0f, +4.0f));
__m128 test_mm_cmpgt_ps(__m128 __a, __m128 __b) {
// CHECK-LABEL: test_mm_cmpgt_ps
@@ -139,6 +165,9 @@ __m128 test_mm_cmpgt_ps(__m128 __a, __m128 __b) {
// CHECK-NEXT: ret <4 x float> [[BC]]
return _mm_cmpgt_ps(__a, __b);
}
+TEST_CONSTEXPR(match_m128(_mm_cmpgt_ps((__m128){+3.0f, +5.0f, +1.0f, +9.0f}, (__m128){+1.0f, +5.0f, +2.0f, +4.0f}), ALL_ONES_F, +0.0f, +0.0f, ALL_ONES_F));
+TEST_CONSTEXPR(match_m128(_mm_cmpgt_ps((__m128){-3.0f, +2.0f, +7.0f, +0.0f}, (__m128){-4.0f, +3.0f, +7.0f, -1.0f}), ALL_ONES_F, +0.0f, +0.0f, ALL_ONES_F));
+TEST_CONSTEXPR(match_m128(_mm_cmpgt_ps((__m128){__builtin_nanf(""), +4.0f, +6.0f, +8.0f}, (__m128){+1.0f, +3.0f, +5.0f, +7.0f}), +0.0f, ALL_ONES_F, ALL_ONES_F, ALL_ONES_F));
__m128 test_mm_cmpgt_ss(__m128 __a, __m128 __b) {
// CHECK-LABEL: test_mm_cmpgt_ss
@@ -146,6 +175,9 @@ __m128 test_mm_cmpgt_ss(__m128 __a, __m128 __b) {
// CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
return _mm_cmpgt_ss(__a, __b);
}
+TEST_CONSTEXPR(match_m128(_mm_cmpgt_ss((__m128){+3.0f, +2.0f, +3.0f, +4.0f}, (__m128){+1.0f, +9.0f, +8.0f, +7.0f}), ALL_ONES_F, +2.0f, +3.0f, +4.0f));
+TEST_CONSTEXPR(match_m128(_mm_cmpgt_ss((__m128){+1.0f, +2.0f, +3.0f, +4.0f}, (__m128){+3.0f, +9.0f, +8.0f, +7.0f}), +0.0f, +2.0f, +3.0f, +4.0f));
+TEST_CONSTEXPR(match_m128(_mm_cmpgt_ss((__m128){__builtin_nanf(""), +2.0f, +3.0f, +4.0f}, (__m128){+1.0f, +9.0f, +8.0f, +7.0f}), +0.0f, +2.0f, +3.0f, +4.0f));
__m128 test_mm_cmple_ps(__m128 __a, __m128 __b) {
// CHECK-LABEL: test_mm_cmple_ps
@@ -155,12 +187,19 @@ __m128 test_mm_cmple_ps(__m128 __a, __m128 __b) {
// CHECK-NEXT: ret <4 x float> [[BC]]
return _mm_cmple_ps(__a, __b);
}
+TEST_CONSTEXPR(match_m128(_mm_cmple_ps((__m128){+1.0f, +2.0f, +3.0f, +4.0f}, (__m128){+1.0f, +2.0f, +3.0f, +4.0f}), ALL_ONES_F, ALL_ONES_F, ALL_ONES_F, ALL_ONES_F));
+TEST_CONSTEXPR(match_m128(_mm_cmple_ps((__m128){+1.0f, +2.0f, +3.0f, +4.0f}, (__m128){+5.0f, +6.0f, +7.0f, +8.0f}), ALL_ONES_F, ALL_ONES_F, ALL_ONES_F, ALL_ONES_F));
+TEST_CONSTEXPR(match_m128(_mm_cmple_ps((__m128){+5.0f, +2.0f, +7.0f, +4.0f}, (__m128){+3.0f, +6.0f, +5.0f, +8.0f}), +0.0f, ALL_ONES_F, +0.0f, ALL_ONES_F));
+TEST_CONSTEXPR(match_m128(_mm_cmple_ps((__m128){__builtin_nanf(""), +2.0f, +3.0f, +4.0f}, (__m128){+1.0f, +2.0f, +3.0f, +4.0f}), +0.0f, ALL_ONES_F, ALL_ONES_F, ALL_ONES_F));
__m128 test_mm_cmple_ss(__m128 __a, __m128 __b) {
// CHECK-LABEL: test_mm_cmple_ss
// CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 2)
return _mm_cmple_ss(__a, __b);
}
+TEST_CONSTEXPR(match_m128(_mm_cmple_ss((__m128){+1.0f, +2.0f, +3.0f, +4.0f}, (__m128){+1.0f, +9.0f, +8.0f, +7.0f}), ALL_ONES_F, +2.0f, +3.0f, +4.0f));
+TEST_CONSTEXPR(match_m128(_mm_cmple_ss((__m128){+5.0f, +2.0f, +3.0f, +4.0f}, (__m128){+1.0f, +9.0f, +8.0f, +7.0f}), +0.0f, +2.0f, +3.0f, +4.0f));
+TEST_CONSTEXPR(match_m128(_mm_cmple_ss((__m128){__builtin_nanf(""), +2.0f, +3.0f, +4.0f}, (__m128){+1.0f, +9.0f, +8.0f, +7.0f}), +0.0f, +2.0f, +3.0f, +4.0f));
__m128 test_mm_cmplt_ps(__m128 __a, __m128 __b) {
// CHECK-LABEL: test_mm_cmplt_ps
@@ -170,12 +209,19 @@ __m128 test_mm_cmplt_ps(__m128 __a, __m128 __b) {
// CHECK-NEXT: ret <4 x float> [[BC]]
return _mm_cmplt_ps(__a, __b);
}
+TEST_CONSTEXPR(match_m128(_mm_cmplt_ps((__m128){+1.0f, +2.0f, +3.0f, +4.0f}, (__m128){+5.0f, +6.0f, +7.0f, +8.0f}), ALL_ONES_F, ALL_ONES_F, ALL_ONES_F, ALL_ONES_F));
+TEST_CONSTEXPR(match_m128(_mm_cmplt_ps((__m128){+5.0f, +6.0f, +7.0f, +8.0f}, (__m128){+1.0f, +2.0f, +3.0f, +4.0f}), +0.0f, +0.0f, +0.0f, +0.0f));
+TEST_CONSTEXPR(match_m128(_mm_cmplt_ps((__m128){+1.0f, +6.0f, +3.0f, +8.0f}, (__m128){+5.0f, +2.0f, +7.0f, +4.0f}), ALL_ONES_F, +0.0f, ALL_ONES_F, +0.0f));
+TEST_CONSTEXPR(match_m128(_mm_cmplt_ps((__m128){__builtin_nanf(""), +1.0f, +2.0f, +3.0f}, (__m128){+5.0f, +2.0f, +3.0f, +4.0f}), +0.0f, ALL_ONES_F, ALL_ONES_F, ALL_ONES_F));
__m128 test_mm_cmplt_ss(__m128 __a, __m128 __b) {
// CHECK-LABEL: test_mm_cmplt_ss
// CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 1)
return _mm_cmplt_ss(__a, __b);
}
+TEST_CONSTEXPR(match_m128(_mm_cmplt_ss((__m128){+1.0f, +2.0f, +3.0f, +4.0f}, (__m128){+5.0f, +9.0f, +8.0f, +7.0f}), ALL_ONES_F, +2.0f, +3.0f, +4.0f));
+TEST_CONSTEXPR(match_m128(_mm_cmplt_ss((__m128){+5.0f, +2.0f, +3.0f, +4.0f}, (__m128){+1.0f, +9.0f, +8.0f, +7.0f}), +0.0f, +2.0f, +3.0f, +4.0f));
+TEST_CONSTEXPR(match_m128(_mm_cmplt_ss((__m128){__builtin_nanf(""), +2.0f, +3.0f, +4.0f}, (__m128){+5.0f, +9.0f, +8.0f, +7.0f}), +0.0f, +2.0f, +3.0f, +4.0f));
__m128 test_mm_cmpneq_ps(__m128 __a, __m128 __b) {
// CHECK-LABEL: test_mm_cmpneq_ps
diff --git a/clang/test/CodeGen/X86/sse2-builtins.c b/clang/test/CodeGen/X86/sse2-builtins.c
index c71c466cc2e5f..eba440a03dd8a 100644
--- a/clang/test/CodeGen/X86/sse2-builtins.c
+++ b/clang/test/CodeGen/X86/sse2-builtins.c
@@ -209,23 +209,23 @@ __m128d test_mm_cmp_pd_eq_oq(__m128d a, __m128d b) {
return _mm_cmp_pd(a, b, _CMP_EQ_OQ);
}
TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+1.0, +2.0}), ((__m128d){+2.0, +3.0}), _CMP_EQ_OQ), 0.0, 0.0));
-TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+2.0, +3.0}), ((__m128d){+2.0, +3.0}), _CMP_EQ_OQ), -1.0, -1.0));
+TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+2.0, +3.0}), ((__m128d){+2.0, +3.0}), _CMP_EQ_OQ), ALL_ONES_D, ALL_ONES_D));
__m128d test_mm_cmp_pd_lt_os(__m128d a, __m128d b) {
// CHECK-LABEL: test_mm_cmp_pd_lt_os
// CHECK: fcmp olt <2 x double> %{{.*}}, %{{.*}}
return _mm_cmp_pd(a, b, _CMP_LT_OS);
}
-TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+1.0, +2.0}), ((__m128d){+2.0, +3.0}), _CMP_LT_OS), -1.0, -1.0));
-TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+1.0, +5.0}), ((__m128d){+2.0, +3.0}), _CMP_LT_OS), -1.0, 0.0));
+TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+1.0, +2.0}), ((__m128d){+2.0, +3.0}), _CMP_LT_OS), ALL_ONES_D, ALL_ONES_D));
+TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+1.0, +5.0}), ((__m128d){+2.0, +3.0}), _CMP_LT_OS), ALL_ONES_D, 0.0));
__m128d test_mm_cmp_pd_le_os(__m128d a, __m128d b) {
// CHECK-LABEL: test_mm_cmp_pd_le_os
// CHECK: fcmp ole <2 x double> %{{.*}}, %{{.*}}
return _mm_cmp_pd(a, b, _CMP_LE_OS);
}
-TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+1.0, +5.0}), ((__m128d){+2.0, +3.0}), _CMP_LE_OS), -1.0, 0.0));
-TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+2.0, +3.0}), ((__m128d){+2.0, +3.0}), _CMP_LE_OS), -1.0, -1.0));
+TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+1.0, +5.0}), ((__m128d){+2.0, +3.0}), _CMP_LE_OS), ALL_ONES_D, 0.0));
+TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+2.0, +3.0}), ((__m128d){+2.0, +3.0}), _CMP_LE_OS), ALL_ONES_D, ALL_ONES_D));
__m128d test_mm_cmp_pd_unord_q(__m128d a, __m128d b) {
// CHECK-LABEL: test_mm_cmp_pd_unord_q
@@ -239,15 +239,15 @@ __m128d test_mm_cmp_pd_neq_uq(__m128d a, __m128d b) {
// CHECK: fcmp une <2 x double> %{{.*}}, %{{.*}}
return _mm_cmp_pd(a, b, _CMP_NEQ_UQ);
}
-TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+1.0, +3.0}), ((__m128d){+2.0, +3.0}), _CMP_NEQ_UQ), -1.0, 0.0));
-TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+1.0, +2.0}), ((__m128d){+2.0, +3.0}), _CMP_NEQ_UQ), -1.0, -1.0));
+TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+1.0, +3.0}), ((__m128d){+2.0, +3.0}), _CMP_NEQ_UQ), ALL_ONES_D, 0.0));
+TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+1.0, +2.0}), ((__m128d){+2.0, +3.0}), _CMP_NEQ_UQ), ALL_ONES_D, ALL_ONES_D));
__m128d test_mm_cmp_pd_nlt_us(__m128d a, __m128d b) {
// CHECK-LABEL: test_mm_cmp_pd_nlt_us
// CHECK: fcmp uge <2 x double> %{{.*}}, %{{.*}}
return _mm_cmp_pd(a, b, _CMP_NLT_US);
}
-TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+3.0, +4.0}), ((__m128d){+2.0, +3.0}), _CMP_NLT_US), -1.0, -1.0));
+TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+3.0, +4.0}), ((__m128d){+2.0, +3.0}), _CMP_NLT_US), ALL_ONES_D, ALL_ONES_D));
TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+1.0, +2.0}), ((__m128d){+2.0, +3.0}), _CMP_NLT_US), 0.0, 0.0));
__m128d test_mm_cmp_pd_nle_us(__m128d a, __m128d b) {
@@ -255,7 +255,7 @@ __m128d test_mm_cmp_pd_nle_us(__m128d a, __m128d b) {
// CHECK: fcmp ugt <2 x double> %{{.*}}, %{{.*}}
return _mm_cmp_pd(a, b, _CMP_NLE_US);
}
-TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+3.0, +4.0}), ((__m128d){+2.0, +3.0}), _CMP_NLE_US), -1.0, -1.0));
+TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+3.0, +4.0}), ((__m128d){+2.0, +3.0}), _CMP_NLE_US), ALL_ONES_D, ALL_ONES_D));
TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+1.0, +2.0}), ((__m128d){+2.0, +3.0}), _CMP_NLE_US), 0.0, 0.0));
__m128d test_mm_cmp_pd_ord_q(__m128d a, __m128d b) {
@@ -263,77 +263,77 @@ __m128d test_mm_cmp_pd_ord_q(__m128d a, __m128d b) {
// CHECK: fcmp ord <2 x double> %{{.*}}, %{{.*}}
return _mm_cmp_pd(a, b, _CMP_ORD_Q);
}
-TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+1.0, +2.0}), ((__m128d){+2.0, +3.0}), _CMP_ORD_Q), -1.0, -1.0));
+TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+1.0, +2.0}), ((__m128d){+2.0, +3.0}), _CMP_ORD_Q), ALL_ONES_D, ALL_ONES_D));
-TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+2.0, +3.0}), ((__m128d){+2.0, +3.0}), _CMP_EQ_UQ), -1.0, -1.0));
+TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+2.0, +3.0}), ((__m128d){+2.0, +3.0}), _CMP_EQ_UQ), ALL_ONES_D, ALL_ONES_D));
TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+1.0, +2.0}), ((__m128d){+2.0, +3.0}), _CMP_EQ_UQ), 0.0, 0.0));
-TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+1.0, +1.0}), ((__m128d){+2.0, +3.0}), _CMP_NGE_US), -1.0, -1.0));
+TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+1.0, +1.0}), ((__m128d){+2.0, +3.0}), _CMP_NGE_US), ALL_ONES_D, ALL_ONES_D));
TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+3.0, +4.0}), ((__m128d){+2.0, +3.0}), _CMP_NGE_US), 0.0, 0.0));
-TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+1.0, +1.0}), ((__m128d){+2.0, +3.0}), _CMP_NGT_US), -1.0, -1.0));
+TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+1.0, +1.0}), ((__m128d){+2.0, +3.0}), _CMP_NGT_US), ALL_ONES_D, ALL_ONES_D));
TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+3.0, +4.0}), ((__m128d){+2.0, +3.0}), _CMP_NGT_US), 0.0, 0.0));
TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+1.0, +1.0}), ((__m128d){+2.0, +3.0}), _CMP_FALSE_OQ), 0.0, 0.0));
TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+1.0, +1.0}), ((__m128d){+1.0, +1.0}), _CMP_FALSE_OQ), 0.0, 0.0));
-TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+1.0, +3.0}), ((__m128d){+2.0, +3.0}), _CMP_NEQ_OQ), -1.0, 0.0));
-TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+1.0, +2.0}), ((__m128d){+2.0, +3.0}), _CMP_NEQ_OQ), -1.0, -1.0));
+TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+1.0, +3.0}), ((__m128d){+2.0, +3.0}), _CMP_NEQ_OQ), ALL_ONES_D, 0.0));
+TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+1.0, +2.0}), ((__m128d){+2.0, +3.0}), _CMP_NEQ_OQ), ALL_ONES_D, ALL_ONES_D));
-TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+3.0, +4.0}), ((__m128d){+2.0, +3.0}), _CMP_GE_OS), -1.0, -1.0));
+TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+3.0, +4.0}), ((__m128d){+2.0, +3.0}), _CMP_GE_OS), ALL_ONES_D, ALL_ONES_D));
TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+1.0, +2.0}), ((__m128d){+2.0, +3.0}), _CMP_GE_OS), 0.0, 0.0));
-TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+3.0, +4.0}), ((__m128d){+2.0, +3.0}), _CMP_GT_OS), -1.0, -1.0));
+TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+3.0, +4.0}), ((__m128d){+2.0, +3.0}), _CMP_GT_OS), ALL_ONES_D, ALL_ONES_D));
TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+1.0, +2.0}), ((__m128d){+2.0, +3.0}), _CMP_GT_OS), 0.0, 0.0));
-TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+1.0, +1.0}), ((__m128d){+2.0, +3.0}), _CMP_TRUE_UQ), -1.0, -1.0));
-TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+1.0, +1.0}), ((__m128d){+1.0, +1.0}), _CMP_TRUE_UQ), -1.0, -1.0));
+TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+1.0, +1.0}), ((__m128d){+2.0, +3.0}), _CMP_TRUE_UQ), ALL_ONES_D, ALL_ONES_D));
+TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+1.0, +1.0}), ((__m128d){+1.0, +1.0}), _CMP_TRUE_UQ), ALL_ONES_D, ALL_ONES_D));
-TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+2.0, +3.0}), ((__m128d){+2.0, +3.0}), _CMP_EQ_OS), -1.0, -1.0));
+TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+2.0, +3.0}), ((__m128d){+2.0, +3.0}), _CMP_EQ_OS), ALL_ONES_D, ALL_ONES_D));
TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+1.0, +2.0}), ((__m128d){+2.0, +3.0}), _CMP_EQ_OS), 0.0, 0.0));
-TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+1.0, +1.0}), ((__m128d){+2.0, +3.0}), _CMP_LT_OQ), -1.0, -1.0));
+TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+1.0, +1.0}), ((__m128d){+2.0, +3.0}), _CMP_LT_OQ), ALL_ONES_D, ALL_ONES_D));
TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+3.0, +4.0}), ((__m128d){+2.0, +3.0}), _CMP_LT_OQ), 0.0, 0.0));
-TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+1.0, +1.0}), ((__m128d){+2.0, +3.0}), _CMP_LE_OQ), -1.0, -1.0));
+TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+1.0, +1.0}), ((__m128d){+2.0, +3.0}), _CMP_LE_OQ), ALL_ONES_D, ALL_ONES_D));
TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+3.0, +4.0}), ((__m128d){+2.0, +3.0}), _CMP_LE_OQ), 0.0, 0.0));
TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+1.0, +2.0}), ((__m128d){+2.0, +3.0}), _CMP_UNORD_S), 0.0, 0.0));
-TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+1.0, +3.0}), ((__m128d){+2.0, +3.0}), _CMP_NEQ_US), -1.0, 0.0));
-TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+1.0, +2.0}), ((__m128d){+2.0, +3.0}), _CMP_NEQ_US), -1.0, -1.0));
+TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+1.0, +3.0}), ((__m128d){+2.0, +3.0}), _CMP_NEQ_US), ALL_ONES_D, 0.0));
+TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+1.0, +2.0}), ((__m128d){+2.0, +3.0}), _CMP_NEQ_US), ALL_ONES_D, ALL_ONES_D));
-TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+3.0, +4.0}), ((__m128d){+2.0, +3.0}), _CMP_NLT_UQ), -1.0, -1.0));
+TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+3.0, +4.0}), ((__m128d){+2.0, +3.0}), _CMP_NLT_UQ), ALL_ONES_D, ALL_ONES_D));
TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+1.0, +2.0}), ((__m128d){+2.0, +3.0}), _CMP_NLT_UQ), 0.0, 0.0));
-TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+3.0, +4.0}), ((__m128d){+2.0, +3.0}), _CMP_NLE_UQ), -1.0, -1.0));
+TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+3.0, +4.0}), ((__m128d){+2.0, +3.0}), _CMP_NLE_UQ), ALL_ONES_D, ALL_ONES_D));
TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+1.0, +2.0}), ((__m128d){+2.0, +3.0}), _CMP_NLE_UQ), 0.0, 0.0));
-TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+1.0, +2.0}), ((__m128d){+2.0, +3.0}), _CMP_ORD_S), -1.0, -1.0));
+TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+1.0, +2.0}), ((__m128d){+2.0, +3.0}), _CMP_ORD_S), ALL_ONES_D, ALL_ONES_D));
-TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+2.0, +3.0}), ((__m128d){+2.0, +3.0}), _CMP_EQ_US), -1.0, -1.0));
+TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+2.0, +3.0}), ((__m128d){+2.0, +3.0}), _CMP_EQ_US), ALL_ONES_D, ALL_ONES_D));
TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+1.0, +2.0}), ((__m128d){+2.0, +3.0}), _CMP_EQ_US), 0.0, 0.0));
-TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+1.0, +1.0}), ((__m128d){+2.0, +3.0}), _CMP_NGE_UQ), -1.0, -1.0));
+TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+1.0, +1.0}), ((__m128d){+2.0, +3.0}), _CMP_NGE_UQ), ALL_ONES_D, ALL_ONES_D));
TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+3.0, +4.0}), ((__m128d){+2.0, +3.0}), _CMP_NGE_UQ), 0.0, 0.0));
-TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+1.0, +1.0}), ((__m128d){+2.0, +3.0}), _CMP_NGT_UQ), -1.0, -1.0));
+TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+1.0, +1.0}), ((__m128d){+2.0, +3.0}), _CMP_NGT_UQ), ALL_ONES_D, ALL_ONES_D));
TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+3.0, +4.0}), ((__m128d){+2.0, +3.0}), _CMP_NGT_UQ), 0.0, 0.0));
TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+1.0, +1.0}), ((__m128d){+2.0, +3.0}), _CMP_FALSE_OS), 0.0, 0.0));
TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+1.0, +1.0}), ((__m128d){+1.0, +1.0}), _CMP_FALSE_OS), 0.0, 0.0));
-TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+1.0, +3.0}), ((__m128d){+2.0, +3.0}), _CMP_NEQ_OS), -1.0, 0.0));
-TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+1.0, +2.0}), ((__m128d){+2.0, +3.0}), _CMP_NEQ_OS), -1.0, -1.0));
+TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+1.0, +3.0}), ((__m128d){+2.0, +3.0}), _CMP_NEQ_OS), ALL_ONES_D, 0.0));
+TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+1.0, +2.0}), ((__m128d){+2.0, +3.0}), _CMP_NEQ_OS), ALL_ONES_D, ALL_ONES_D));
-TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+3.0, +4.0}), ((__m128d){+2.0, +3.0}), _CMP_GE_OQ), -1.0, -1.0));
+TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+3.0, +4.0}), ((__m128d){+2.0, +3.0}), _CMP_GE_OQ), ALL_ONES_D, ALL_ONES_D));
TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+1.0, +2.0}), ((__m128d){+2.0, +3.0}), _CMP_GE_OQ), 0.0, 0.0));
-TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+3.0, +4.0}), ((__m128d){+2.0, +3.0}), _CMP_GT_OQ), -1.0, -1.0));
+TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+3.0, +4.0}), ((__m128d){+2.0, +3.0}), _CMP_GT_OQ), ALL_ONES_D, ALL_ONES_D));
TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+1.0, +2.0}), ((__m128d){+2.0, +3.0}), _CMP_GT_OQ), 0.0, 0.0));
-TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+1.0, +1.0}), ((__m128d){+2.0, +3.0}), _CMP_TRUE_US), -1.0, -1.0));
-TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+1.0, +1.0}), ((__m128d){+1.0, +1.0}), _CMP_TRUE_US), -1.0, -1.0));
+TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+1.0, +1.0}), ((__m128d){+2.0, +3.0}), _CMP_TRUE_US), ALL_ONES_D, ALL_ONES_D));
+TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+1.0, +1.0}), ((__m128d){+1.0, +1.0}), _CMP_TRUE_US), ALL_ONES_D, ALL_ONES_D));
__m128d test_mm_cmp_sd(__m128d A, __m128d B) {
// CHECK-LABEL: test_mm_cmp_sd
@@ -369,12 +369,40 @@ __m128d test_mm_cmpeq_pd(__m128d A, __m128d B) {
// CHECK-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
return _mm_cmpeq_pd(A, B);
}
+// Test all elements equal - returns 0xFFFFFFFFFFFFFFFF per element (constexpr executable)
+#ifdef __cplusplus
+TEST_CONSTEXPR(match_m128d(_mm_cmpeq_pd((__m128d){+1.0, +2.0}, (__m128d){+1.0, +2.0}), ALL_ONES_D, ALL_ONES_D));
+// Test no elements equal - cmpeq returns zeros
+TEST_CONSTEXPR(match_m128d(_mm_cmpeq_pd((__m128d){+1.0, +2.0}, (__m128d){+3.0, +4.0}), +0.0, +0.0));
+// Test with first element equal, second not equal
+TEST_CONSTEXPR(match_m128d(_mm_cmpeq_pd((__m128d){+1.0, +2.0}, (__m128d){+1.0, +3.0}), ALL_ONES_D, +0.0));
+// Test with negative values not equal
+TEST_CONSTEXPR(match_m128d(_mm_cmpeq_pd((__m128d){-1.5, -2.5}, (__m128d){+1.5, +2.5}), +0.0, +0.0));
+// Test second element equal, first not equal
+TEST_CONSTEXPR(match_m128d(_mm_cmpeq_pd((__m128d){+0.0, -0.0}, (__m128d){+1.0, -0.0}), +0.0, ALL_ONES_D));
+// Test unordered NaN compares false
+TEST_CONSTEXPR(match_m128d(_mm_cmpeq_pd((__m128d){__builtin_nan(""), +2.0}, (__m128d){+1.0, +2.0}), +0.0, ALL_ONES_D));
+#endif
__m128d test_mm_cmpeq_sd(__m128d A, __m128d B) {
// CHECK-LABEL: test_mm_cmpeq_sd
// CHECK: call {{.*}}<2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 0)
return _mm_cmpeq_sd(A, B);
}
+// Test scalar equal - replaces lower element with 0xFFFFFFFFFFFFFFFF bits when equal (constexpr executable)
+#ifdef __cplusplus
+TEST_CONSTEXPR(match_m128d(_mm_cmpeq_sd((__m128d){+1.0, +2.0}, (__m128d){+1.0, +3.0}), ALL_ONES_D, +2.0));
+// Test scalar not equal - replaces lower element with zero, preserves upper
+TEST_CONSTEXPR(match_m128d(_mm_cmpeq_sd((__m128d){+1.0, +2.0}, (__m128d){+3.0, +4.0}), +0.0, +2.0));
+// Test scalar with different values
+TEST_CONSTEXPR(match_m128d(_mm_cmpeq_sd((__m128d){+1.0, +2.0}, (__m128d){+5.0, +3.0}), +0.0, +2.0));
+// Test scalar with negative values not equal
+TEST_CONSTEXPR(match_m128d(_mm_cmpeq_sd((__m128d){-1.5, -2.5}, (__m128d){+1.5, -3.5}), +0.0, -2.5));
+// Test scalar with upper element preserved
+TEST_CONSTEXPR(match_m128d(_mm_cmpeq_sd((__m128d){+0.0, +2.0}, (__m128d){+1.0, +3.0}), +0.0, +2.0));
+// Test scalar unordered NaN compares false and preserves upper
+TEST_CONSTEXPR(match_m128d(_mm_cmpeq_sd((__m128d){__builtin_nan(""), +2.0}, (__m128d){+1.0, +3.0}), +0.0, +2.0));
+#endif
__m128d test_mm_cmpge_pd(__m128d A, __m128d B) {
// CHECK-LABEL: test_mm_cmpge_pd
@@ -383,6 +411,13 @@ __m128d test_mm_cmpge_pd(__m128d A, __m128d B) {
// CHECK-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
return _mm_cmpge_pd(A, B);
}
+TEST_CONSTEXPR(match_m128d(_mm_cmpge_pd((__m128d){+1.0, +2.0}, (__m128d){+1.0, +2.0}), ALL_ONES_D, ALL_ONES_D));
+TEST_CONSTEXPR(match_m128d(_mm_cmpge_pd((__m128d){+3.0, +4.0}, (__m128d){+1.0, +2.0}), ALL_ONES_D, ALL_ONES_D));
+TEST_CONSTEXPR(match_m128d(_mm_cmpge_pd((__m128d){+1.0, +2.0}, (__m128d){+3.0, +4.0}), +0.0, +0.0));
+TEST_CONSTEXPR(match_m128d(_mm_cmpge_pd((__m128d){+3.0, +2.0}, (__m128d){+1.0, +4.0}), ALL_ONES_D, +0.0));
+TEST_CONSTEXPR(match_m128d(_mm_cmpge_pd((__m128d){-1.5, -2.5}, (__m128d){+1.5, +2.5}), +0.0, +0.0));
+TEST_CONSTEXPR(match_m128d(_mm_cmpge_pd((__m128d){+0.0, -0.0}, (__m128d){+1.0, -0.0}), +0.0, ALL_ONES_D));
+TEST_CONSTEXPR(match_m128d(_mm_cmpge_pd((__m128d){__builtin_nan(""), +5.0}, (__m128d){+1.0, +4.0}), +0.0, ALL_ONES_D));
__m128d test_mm_cmpge_sd(__m128d A, __m128d B) {
// CHECK-LABEL: test_mm_cmpge_sd
@@ -393,6 +428,12 @@ __m128d test_mm_cmpge_sd(__m128d A, __m128d B) {
// CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1
return _mm_cmpge_sd(A, B);
}
+TEST_CONSTEXPR(match_m128d(_mm_cmpge_sd((__m128d){+1.0, +2.0}, (__m128d){+1.0, +3.0}), ALL_ONES_D, +2.0));
+TEST_CONSTEXPR(match_m128d(_mm_cmpge_sd((__m128d){+3.0, +2.0}, (__m128d){+1.0, +4.0}), ALL_ONES_D, +2.0));
+TEST_CONSTEXPR(match_m128d(_mm_cmpge_sd((__m128d){+1.0, +2.0}, (__m128d){+3.0, +4.0}), +0.0, +2.0));
+TEST_CONSTEXPR(match_m128d(_mm_cmpge_sd((__m128d){-1.5, -2.5}, (__m128d){+1.5, -3.5}), +0.0, -2.5));
+TEST_CONSTEXPR(match_m128d(_mm_cmpge_sd((__m128d){+0.0, +2.0}, (__m128d){+1.0, +3.0}), +0.0, +2.0));
+TEST_CONSTEXPR(match_m128d(_mm_cmpge_sd((__m128d){__builtin_nan(""), +2.0}, (__m128d){+1.0, +3.0}), +0.0, +2.0));
__m128i test_mm_cmpgt_epi8(__m128i A, __m128i B) {
// CHECK-LABEL: test_mm_cmpgt_epi8
@@ -425,6 +466,9 @@ __m128d test_mm_cmpgt_pd(__m128d A, __m128d B) {
// CHECK-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
return _mm_cmpgt_pd(A, B);
}
+TEST_CONSTEXPR(match_m128d(_mm_cmpgt_pd((__m128d){+3.0, +5.0}, (__m128d){+1.0, +5.0}), ALL_ONES_D, +0.0));
+TEST_CONSTEXPR(match_m128d(_mm_cmpgt_pd((__m128d){-3.0, +7.0}, (__m128d){-4.0, +8.0}), ALL_ONES_D, +0.0));
+TEST_CONSTEXPR(match_m128d(_mm_cmpgt_pd((__m128d){__builtin_nan(""), +6.0}, (__m128d){+1.0, +5.0}), +0.0, ALL_ONES_D));
__m128d test_mm_cmpgt_sd(__m128d A, __m128d B) {
// CHECK-LABEL: test_mm_cmpgt_sd
@@ -435,6 +479,9 @@ __m128d test_mm_cmpgt_sd(__m128d A, __m128d B) {
// CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1
return _mm_cmpgt_sd(A, B);
}
+TEST_CONSTEXPR(match_m128d(_mm_cmpgt_sd((__m128d){+3.0, +2.0}, (__m128d){+1.0, +9.0}), ALL_ONES_D, +2.0));
+TEST_CONSTEXPR(match_m128d(_mm_cmpgt_sd((__m128d){+1.0, +2.0}, (__m128d){+3.0, +9.0}), +0.0, +2.0));
+TEST_CONSTEXPR(match_m128d(_mm_cmpgt_sd((__m128d){__builtin_nan(""), +2.0}, (__m128d){+1.0, +9.0}), +0.0, +2.0));
__m128d test_mm_cmple_pd(__m128d A, __m128d B) {
// CHECK-LABEL: test_mm_cmple_pd
@@ -443,12 +490,19 @@ __m128d test_mm_cmple_pd(__m128d A, __m128d B) {
// CHECK-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
return _mm_cmple_pd(A, B);
}
+TEST_CONSTEXPR(match_m128d(_mm_cmple_pd((__m128d){+1.0, +2.0}, (__m128d){+1.0, +2.0}), ALL_ONES_D, ALL_ONES_D));
+TEST_CONSTEXPR(match_m128d(_mm_cmple_pd((__m128d){+1.0, +2.0}, (__m128d){+3.0, +4.0}), ALL_ONES_D, ALL_ONES_D));
+TEST_CONSTEXPR(match_m128d(_mm_cmple_pd((__m128d){+3.0, +5.0}, (__m128d){+1.0, +4.0}), +0.0, +0.0));
+TEST_CONSTEXPR(match_m128d(_mm_cmple_pd((__m128d){__builtin_nan(""), +2.0}, (__m128d){+1.0, +2.0}), +0.0, ALL_ONES_D));
__m128d test_mm_cmple_sd(__m128d A, __m128d B) {
// CHECK-LABEL: test_mm_cmple_sd
// CHECK: call {{.*}}<2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 2)
return _mm_cmple_sd(A, B);
}
+TEST_CONSTEXPR(match_m128d(_mm_cmple_sd((__m128d){+1.0, +2.0}, (__m128d){+1.0, +9.0}), ALL_ONES_D, +2.0));
+TEST_CONSTEXPR(match_m128d(_mm_cmple_sd((__m128d){+5.0, +2.0}, (__m128d){+1.0, +9.0}), +0.0, +2.0));
+TEST_CONSTEXPR(match_m128d(_mm_cmple_sd((__m128d){__builtin_nan(""), +2.0}, (__m128d){+1.0, +9.0}), +0.0, +2.0));
__m128i test_mm_cmplt_epi8(__m128i A, __m128i B) {
// CHECK-LABEL: test_mm_cmplt_epi8
@@ -481,12 +535,19 @@ __m128d test_mm_cmplt_pd(__m128d A, __m128d B) {
// CHECK-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
return _mm_cmplt_pd(A, B);
}
+TEST_CONSTEXPR(match_m128d(_mm_cmplt_pd((__m128d){+1.0, +2.0}, (__m128d){+3.0, +4.0}), ALL_ONES_D, ALL_ONES_D));
+TEST_CONSTEXPR(match_m128d(_mm_cmplt_pd((__m128d){+3.0, +4.0}, (__m128d){+1.0, +2.0}), +0.0, +0.0));
+TEST_CONSTEXPR(match_m128d(_mm_cmplt_pd((__m128d){+1.0, +5.0}, (__m128d){+3.0, +2.0}), ALL_ONES_D, +0.0));
+TEST_CONSTEXPR(match_m128d(_mm_cmplt_pd((__m128d){__builtin_nan(""), +1.0}, (__m128d){+3.0, +2.0}), +0.0, ALL_ONES_D));
__m128d test_mm_cmplt_sd(__m128d A, __m128d B) {
// CHECK-LABEL: test_mm_cmplt_sd
// CHECK: call {{.*}}<2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 1)
return _mm_cmplt_sd(A, B);
}
+TEST_CONSTEXPR(match_m128d(_mm_cmplt_sd((__m128d){+1.0, +2.0}, (__m128d){+3.0, +9.0}), ALL_ONES_D, +2.0));
+TEST_CONSTEXPR(match_m128d(_mm_cmplt_sd((__m128d){+3.0, +2.0}, (__m128d){+1.0, +9.0}), +0.0, +2.0));
+TEST_CONSTEXPR(match_m128d(_mm_cmplt_sd((__m128d){__builtin_nan(""), +2.0}, (__m128d){+3.0, +9.0}), +0.0, +2.0));
__m128d test_mm_cmpneq_pd(__m128d A, __m128d B) {
// CHECK-LABEL: test_mm_cmpneq_pd
>From 3123274e470c17930d38b432891983592f67f707 Mon Sep 17 00:00:00 2001
From: ZakyHermawan <zaky.hermawan9615 at gmail.com>
Date: Fri, 20 Feb 2026 21:00:24 +0700
Subject: [PATCH 06/14] Add constexpr support for SSE/SSE2
cmpn*/cmpord/cmpunord builtins
Signed-off-by: ZakyHermawan <zaky.hermawan9615 at gmail.com>
---
clang/include/clang/Basic/BuiltinsX86.td | 3 +-
clang/lib/AST/ByteCode/InterpBuiltin.cpp | 115 ++++++++++++++++++++++-
clang/lib/AST/ExprConstant.cpp | 92 +++++++++++++++++-
clang/lib/Headers/emmintrin.h | 54 +++++------
clang/lib/Headers/xmmintrin.h | 28 +++---
clang/test/CodeGen/X86/sse-builtins.c | 20 ++++
clang/test/CodeGen/X86/sse2-builtins.c | 20 ++++
7 files changed, 283 insertions(+), 49 deletions(-)
diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td
index 9a943b41f0159..c62e347b3896b 100644
--- a/clang/include/clang/Basic/BuiltinsX86.td
+++ b/clang/include/clang/Basic/BuiltinsX86.td
@@ -80,7 +80,8 @@ let Attributes = [Const, Constexpr, NoThrow, RequiredVectorWidth<128>] in {
def ucomisd#Cmp : X86Builtin<"int(_Vector<2, double>, _Vector<2, double>)">;
}
}
- foreach Cmp = ["cmpeq", "cmplt", "cmple", "cmpgt", "cmpge", "cmpunord", "cmpneq", "cmpnlt", "cmpnle", "cmpord"] in {
+ foreach Cmp = ["cmpeq", "cmpge", "cmpgt", "cmple", "cmplt", "cmpneq",
+ "cmpnge", "cmpngt", "cmpnle", "cmpnlt", "cmpunord", "cmpord"] in {
let Features = "sse" in {
def Cmp#ps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>)">;
def Cmp#ss : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>)">;
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index d05beb87bd366..7098cd4a8f892 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -4217,7 +4217,7 @@ static bool interp__builtin_x86_cmpeq(InterpState &S, CodePtr OpPC,
const bool IsScalar = (ID == X86::BI__builtin_ia32_cmpeqss) ||
(ID == X86::BI__builtin_ia32_cmpeqsd);
return interp__builtin_x86_cmp_float_vector<X86CmpImm::CMP_EQ_OQ>(
- S, OpPC, Frame, Call, ID, IsScalar);
+ S, OpPC, Frame, Call, ID, IsScalar);
}
static bool interp__builtin_x86_cmpge(InterpState &S, CodePtr OpPC,
@@ -4226,14 +4226,14 @@ static bool interp__builtin_x86_cmpge(InterpState &S, CodePtr OpPC,
const bool IsScalar = (ID == X86::BI__builtin_ia32_cmpgess) ||
(ID == X86::BI__builtin_ia32_cmpgesd);
return interp__builtin_x86_cmp_float_vector<X86CmpImm::CMP_GE_OS>(
- S, OpPC, Frame, Call, ID, IsScalar);
+ S, OpPC, Frame, Call, ID, IsScalar);
};
static bool interp__builtin_x86_cmpgt(InterpState &S, CodePtr OpPC,
const InterpFrame *Frame,
const CallExpr *Call, unsigned ID) {
const bool IsScalar = (ID == X86::BI__builtin_ia32_cmpgtss) ||
- (ID == X86::BI__builtin_ia32_cmpgtsd);
+ (ID == X86::BI__builtin_ia32_cmpgtsd);
return interp__builtin_x86_cmp_float_vector<X86CmpImm::CMP_GT_OS>(
S, OpPC, Frame, Call, ID, IsScalar);
};
@@ -4244,18 +4244,81 @@ static bool interp__builtin_x86_cmple(InterpState &S, CodePtr OpPC,
const bool IsScalar = (ID == X86::BI__builtin_ia32_cmpless) ||
(ID == X86::BI__builtin_ia32_cmplesd);
return interp__builtin_x86_cmp_float_vector<X86CmpImm::CMP_LE_OS>(
- S, OpPC, Frame, Call, ID, IsScalar);
+ S, OpPC, Frame, Call, ID, IsScalar);
};
static bool interp__builtin_x86_cmplt(InterpState &S, CodePtr OpPC,
const InterpFrame *Frame,
const CallExpr *Call, unsigned ID) {
const bool IsScalar = (ID == X86::BI__builtin_ia32_cmpltss) ||
- (ID == X86::BI__builtin_ia32_cmpltsd);
+ (ID == X86::BI__builtin_ia32_cmpltsd);
return interp__builtin_x86_cmp_float_vector<X86CmpImm::CMP_LT_OS>(
S, OpPC, Frame, Call, ID, IsScalar);
};
+static bool interp__builtin_x86_cmpneq(InterpState &S, CodePtr OpPC,
+ const InterpFrame *Frame,
+ const CallExpr *Call, unsigned ID) {
+ const bool IsScalar = (ID == X86::BI__builtin_ia32_cmpneqss) ||
+ (ID == X86::BI__builtin_ia32_cmpneqsd);
+ return interp__builtin_x86_cmp_float_vector<X86CmpImm::CMP_NEQ_UQ>(
+ S, OpPC, Frame, Call, ID, IsScalar);
+};
+
+static bool interp__builtin_x86_cmpnge(InterpState &S, CodePtr OpPC,
+ const InterpFrame *Frame,
+ const CallExpr *Call, unsigned ID) {
+ const bool IsScalar = (ID == X86::BI__builtin_ia32_cmpngess) ||
+ (ID == X86::BI__builtin_ia32_cmpngesd);
+ return interp__builtin_x86_cmp_float_vector<X86CmpImm::CMP_NGE_US>(
+ S, OpPC, Frame, Call, ID, IsScalar);
+};
+
+static bool interp__builtin_x86_cmpngt(InterpState &S, CodePtr OpPC,
+ const InterpFrame *Frame,
+ const CallExpr *Call, unsigned ID) {
+ const bool IsScalar = (ID == X86::BI__builtin_ia32_cmpngtss) ||
+ (ID == X86::BI__builtin_ia32_cmpngtsd);
+ return interp__builtin_x86_cmp_float_vector<X86CmpImm::CMP_NGT_US>(
+ S, OpPC, Frame, Call, ID, IsScalar);
+};
+
+static bool interp__builtin_x86_cmpnle(InterpState &S, CodePtr OpPC,
+ const InterpFrame *Frame,
+ const CallExpr *Call, unsigned ID) {
+ const bool IsScalar = (ID == X86::BI__builtin_ia32_cmpnless) ||
+ (ID == X86::BI__builtin_ia32_cmpnlesd);
+ return interp__builtin_x86_cmp_float_vector<X86CmpImm::CMP_NLE_US>(
+ S, OpPC, Frame, Call, ID, IsScalar);
+};
+
+static bool interp__builtin_x86_cmpnlt(InterpState &S, CodePtr OpPC,
+ const InterpFrame *Frame,
+ const CallExpr *Call, unsigned ID) {
+ const bool IsScalar = (ID == X86::BI__builtin_ia32_cmpnltss) ||
+ (ID == X86::BI__builtin_ia32_cmpnltsd);
+ return interp__builtin_x86_cmp_float_vector<X86CmpImm::CMP_NLT_US>(
+ S, OpPC, Frame, Call, ID, IsScalar);
+};
+
+static bool interp__builtin_x86_cmpord(InterpState &S, CodePtr OpPC,
+ const InterpFrame *Frame,
+ const CallExpr *Call, unsigned ID) {
+ const bool IsScalar = (ID == X86::BI__builtin_ia32_cmpordss) ||
+ (ID == X86::BI__builtin_ia32_cmpordsd);
+ return interp__builtin_x86_cmp_float_vector<X86CmpImm::CMP_ORD_Q>(
+ S, OpPC, Frame, Call, ID, IsScalar);
+};
+
+static bool interp__builtin_x86_cmpunord(InterpState &S, CodePtr OpPC,
+ const InterpFrame *Frame,
+ const CallExpr *Call, unsigned ID) {
+ const bool IsScalar = (ID == X86::BI__builtin_ia32_cmpunordss) ||
+ (ID == X86::BI__builtin_ia32_cmpunordsd);
+ return interp__builtin_x86_cmp_float_vector<X86CmpImm::CMP_UNORD_Q>(
+ S, OpPC, Frame, Call, ID, IsScalar);
+};
+
bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
uint32_t BuiltinID) {
if (!S.getASTContext().BuiltinInfo.isConstantEvaluated(BuiltinID))
@@ -6054,6 +6117,48 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
case X86::BI__builtin_ia32_cmpltsd:
return interp__builtin_x86_cmplt(S, OpPC, Frame, Call, BuiltinID);
+ case X86::BI__builtin_ia32_cmpneqps:
+ case X86::BI__builtin_ia32_cmpneqpd:
+ case X86::BI__builtin_ia32_cmpneqss:
+ case X86::BI__builtin_ia32_cmpneqsd:
+ return interp__builtin_x86_cmpneq(S, OpPC, Frame, Call, BuiltinID);
+
+ case X86::BI__builtin_ia32_cmpngeps:
+ case X86::BI__builtin_ia32_cmpngepd:
+ case X86::BI__builtin_ia32_cmpngess:
+ case X86::BI__builtin_ia32_cmpngesd:
+ return interp__builtin_x86_cmpnge(S, OpPC, Frame, Call, BuiltinID);
+
+ case X86::BI__builtin_ia32_cmpngtps:
+ case X86::BI__builtin_ia32_cmpngtpd:
+ case X86::BI__builtin_ia32_cmpngtss:
+ case X86::BI__builtin_ia32_cmpngtsd:
+ return interp__builtin_x86_cmpngt(S, OpPC, Frame, Call, BuiltinID);
+
+ case X86::BI__builtin_ia32_cmpnleps:
+ case X86::BI__builtin_ia32_cmpnlepd:
+ case X86::BI__builtin_ia32_cmpnless:
+ case X86::BI__builtin_ia32_cmpnlesd:
+ return interp__builtin_x86_cmpnle(S, OpPC, Frame, Call, BuiltinID);
+
+ case X86::BI__builtin_ia32_cmpnltps:
+ case X86::BI__builtin_ia32_cmpnltpd:
+ case X86::BI__builtin_ia32_cmpnltss:
+ case X86::BI__builtin_ia32_cmpnltsd:
+ return interp__builtin_x86_cmpnlt(S, OpPC, Frame, Call, BuiltinID);
+
+ case X86::BI__builtin_ia32_cmpordps:
+ case X86::BI__builtin_ia32_cmpordpd:
+ case X86::BI__builtin_ia32_cmpordss:
+ case X86::BI__builtin_ia32_cmpordsd:
+ return interp__builtin_x86_cmpord(S, OpPC, Frame, Call, BuiltinID);
+
+ case X86::BI__builtin_ia32_cmpunordps:
+ case X86::BI__builtin_ia32_cmpunordpd:
+ case X86::BI__builtin_ia32_cmpunordss:
+ case X86::BI__builtin_ia32_cmpunordsd:
+ return interp__builtin_x86_cmpunord(S, OpPC, Frame, Call, BuiltinID);
+
default:
S.FFDiag(S.Current->getLocation(OpPC),
diag::note_invalid_subexpr_in_const_expr)
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 404730daf3f41..bb17d50e62582 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -14490,6 +14490,10 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
case X86::BI__builtin_ia32_cmpgesd:
case X86::BI__builtin_ia32_cmpgeps:
case X86::BI__builtin_ia32_cmpgepd:
+ case X86::BI__builtin_ia32_cmpgtss:
+ case X86::BI__builtin_ia32_cmpgtsd:
+ case X86::BI__builtin_ia32_cmpgtps:
+ case X86::BI__builtin_ia32_cmpgtpd:
case X86::BI__builtin_ia32_cmpltss:
case X86::BI__builtin_ia32_cmpltsd:
case X86::BI__builtin_ia32_cmpltps:
@@ -14497,7 +14501,35 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
case X86::BI__builtin_ia32_cmpless:
case X86::BI__builtin_ia32_cmplesd:
case X86::BI__builtin_ia32_cmpleps:
- case X86::BI__builtin_ia32_cmplepd: {
+ case X86::BI__builtin_ia32_cmplepd:
+ case X86::BI__builtin_ia32_cmpneqss:
+ case X86::BI__builtin_ia32_cmpneqsd:
+ case X86::BI__builtin_ia32_cmpneqps:
+ case X86::BI__builtin_ia32_cmpneqpd:
+ case X86::BI__builtin_ia32_cmpngess:
+ case X86::BI__builtin_ia32_cmpngesd:
+ case X86::BI__builtin_ia32_cmpngeps:
+ case X86::BI__builtin_ia32_cmpngepd:
+ case X86::BI__builtin_ia32_cmpngtss:
+ case X86::BI__builtin_ia32_cmpngtsd:
+ case X86::BI__builtin_ia32_cmpngtps:
+ case X86::BI__builtin_ia32_cmpngtpd:
+ case X86::BI__builtin_ia32_cmpnless:
+ case X86::BI__builtin_ia32_cmpnlesd:
+ case X86::BI__builtin_ia32_cmpnleps:
+ case X86::BI__builtin_ia32_cmpnlepd:
+ case X86::BI__builtin_ia32_cmpnltss:
+ case X86::BI__builtin_ia32_cmpnltsd:
+ case X86::BI__builtin_ia32_cmpnltps:
+ case X86::BI__builtin_ia32_cmpnltpd:
+ case X86::BI__builtin_ia32_cmpordss:
+ case X86::BI__builtin_ia32_cmpordsd:
+ case X86::BI__builtin_ia32_cmpordps:
+ case X86::BI__builtin_ia32_cmpordpd:
+ case X86::BI__builtin_ia32_cmpunordss:
+ case X86::BI__builtin_ia32_cmpunordsd:
+ case X86::BI__builtin_ia32_cmpunordps:
+ case X86::BI__builtin_ia32_cmpunordpd: {
const Expr *A = E->getArg(0);
const Expr *B = E->getArg(1);
@@ -14518,10 +14550,26 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
(BuiltinOp == X86::BI__builtin_ia32_cmpeqsd) ||
(BuiltinOp == X86::BI__builtin_ia32_cmpgess) ||
(BuiltinOp == X86::BI__builtin_ia32_cmpgesd) ||
+ (BuiltinOp == X86::BI__builtin_ia32_cmpgtss) ||
+ (BuiltinOp == X86::BI__builtin_ia32_cmpgtsd) ||
(BuiltinOp == X86::BI__builtin_ia32_cmpltss) ||
(BuiltinOp == X86::BI__builtin_ia32_cmpltsd) ||
(BuiltinOp == X86::BI__builtin_ia32_cmpless) ||
- (BuiltinOp == X86::BI__builtin_ia32_cmplesd);
+ (BuiltinOp == X86::BI__builtin_ia32_cmplesd) ||
+ (BuiltinOp == X86::BI__builtin_ia32_cmpneqss) ||
+ (BuiltinOp == X86::BI__builtin_ia32_cmpneqsd) ||
+ (BuiltinOp == X86::BI__builtin_ia32_cmpngess) ||
+ (BuiltinOp == X86::BI__builtin_ia32_cmpngesd) ||
+ (BuiltinOp == X86::BI__builtin_ia32_cmpngtss) ||
+ (BuiltinOp == X86::BI__builtin_ia32_cmpngtsd) ||
+ (BuiltinOp == X86::BI__builtin_ia32_cmpnless) ||
+ (BuiltinOp == X86::BI__builtin_ia32_cmpnlesd) ||
+ (BuiltinOp == X86::BI__builtin_ia32_cmpnltss) ||
+ (BuiltinOp == X86::BI__builtin_ia32_cmpnltsd) ||
+ (BuiltinOp == X86::BI__builtin_ia32_cmpordss) ||
+ (BuiltinOp == X86::BI__builtin_ia32_cmpordsd) ||
+ (BuiltinOp == X86::BI__builtin_ia32_cmpunordss) ||
+ (BuiltinOp == X86::BI__builtin_ia32_cmpunordsd);
// Select comparison predicate based on builtin
uint32_t Imm = X86CmpImm::CMP_EQ_OQ;
@@ -14530,6 +14578,11 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
(BuiltinOp == X86::BI__builtin_ia32_cmpgeps) ||
(BuiltinOp == X86::BI__builtin_ia32_cmpgepd)) {
Imm = X86CmpImm::CMP_GE_OS;
+ } else if ((BuiltinOp == X86::BI__builtin_ia32_cmpgtss) ||
+ (BuiltinOp == X86::BI__builtin_ia32_cmpgtsd) ||
+ (BuiltinOp == X86::BI__builtin_ia32_cmpgtps) ||
+ (BuiltinOp == X86::BI__builtin_ia32_cmpgtpd)) {
+ Imm = X86CmpImm::CMP_GT_OS;
} else if ((BuiltinOp == X86::BI__builtin_ia32_cmpltss) ||
(BuiltinOp == X86::BI__builtin_ia32_cmpltsd) ||
(BuiltinOp == X86::BI__builtin_ia32_cmpltps) ||
@@ -14540,6 +14593,41 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
(BuiltinOp == X86::BI__builtin_ia32_cmpleps) ||
(BuiltinOp == X86::BI__builtin_ia32_cmplepd)) {
Imm = X86CmpImm::CMP_LE_OS;
+ } else if ((BuiltinOp == X86::BI__builtin_ia32_cmpneqss) ||
+ (BuiltinOp == X86::BI__builtin_ia32_cmpneqsd) ||
+ (BuiltinOp == X86::BI__builtin_ia32_cmpneqps) ||
+ (BuiltinOp == X86::BI__builtin_ia32_cmpneqpd)) {
+ Imm = X86CmpImm::CMP_NEQ_UQ;
+ } else if ((BuiltinOp == X86::BI__builtin_ia32_cmpngess) ||
+ (BuiltinOp == X86::BI__builtin_ia32_cmpngesd) ||
+ (BuiltinOp == X86::BI__builtin_ia32_cmpngeps) ||
+ (BuiltinOp == X86::BI__builtin_ia32_cmpngepd)) {
+ Imm = X86CmpImm::CMP_NGE_US;
+ } else if ((BuiltinOp == X86::BI__builtin_ia32_cmpngtss) ||
+ (BuiltinOp == X86::BI__builtin_ia32_cmpngtsd) ||
+ (BuiltinOp == X86::BI__builtin_ia32_cmpngtps) ||
+ (BuiltinOp == X86::BI__builtin_ia32_cmpngtpd)) {
+ Imm = X86CmpImm::CMP_NGT_US;
+ } else if ((BuiltinOp == X86::BI__builtin_ia32_cmpnless) ||
+ (BuiltinOp == X86::BI__builtin_ia32_cmpnlesd) ||
+ (BuiltinOp == X86::BI__builtin_ia32_cmpnleps) ||
+ (BuiltinOp == X86::BI__builtin_ia32_cmpnlepd)) {
+ Imm = X86CmpImm::CMP_NLE_US;
+ } else if ((BuiltinOp == X86::BI__builtin_ia32_cmpnltss) ||
+ (BuiltinOp == X86::BI__builtin_ia32_cmpnltsd) ||
+ (BuiltinOp == X86::BI__builtin_ia32_cmpnltps) ||
+ (BuiltinOp == X86::BI__builtin_ia32_cmpnltpd)) {
+ Imm = X86CmpImm::CMP_NLT_US;
+ } else if ((BuiltinOp == X86::BI__builtin_ia32_cmpordss) ||
+ (BuiltinOp == X86::BI__builtin_ia32_cmpordsd) ||
+ (BuiltinOp == X86::BI__builtin_ia32_cmpordps) ||
+ (BuiltinOp == X86::BI__builtin_ia32_cmpordpd)) {
+ Imm = X86CmpImm::CMP_ORD_Q;
+ } else if ((BuiltinOp == X86::BI__builtin_ia32_cmpunordss) ||
+ (BuiltinOp == X86::BI__builtin_ia32_cmpunordsd) ||
+ (BuiltinOp == X86::BI__builtin_ia32_cmpunordps) ||
+ (BuiltinOp == X86::BI__builtin_ia32_cmpunordpd)) {
+ Imm = X86CmpImm::CMP_UNORD_Q;
}
SmallVector<APValue, 8> ResultElements;
diff --git a/clang/lib/Headers/emmintrin.h b/clang/lib/Headers/emmintrin.h
index 3b4f73855239c..c29c7d830de18 100644
--- a/clang/lib/Headers/emmintrin.h
+++ b/clang/lib/Headers/emmintrin.h
@@ -543,8 +543,8 @@ static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cmpge_pd(__m128d __a,
/// \param __b
/// A 128-bit vector of [2 x double].
/// \returns A 128-bit vector containing the comparison results.
-static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpord_pd(__m128d __a,
- __m128d __b) {
+static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cmpord_pd(__m128d __a,
+ __m128d __b) {
return (__m128d)__builtin_ia32_cmpordpd((__v2df)__a, (__v2df)__b);
}
@@ -566,8 +566,8 @@ static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpord_pd(__m128d __a,
/// \param __b
/// A 128-bit vector of [2 x double].
/// \returns A 128-bit vector containing the comparison results.
-static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpunord_pd(__m128d __a,
- __m128d __b) {
+static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cmpunord_pd(__m128d __a,
+ __m128d __b) {
return (__m128d)__builtin_ia32_cmpunordpd((__v2df)__a, (__v2df)__b);
}
@@ -587,8 +587,8 @@ static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpunord_pd(__m128d __a,
/// \param __b
/// A 128-bit vector of [2 x double].
/// \returns A 128-bit vector containing the comparison results.
-static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpneq_pd(__m128d __a,
- __m128d __b) {
+static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cmpneq_pd(__m128d __a,
+ __m128d __b) {
return (__m128d)__builtin_ia32_cmpneqpd((__v2df)__a, (__v2df)__b);
}
@@ -608,8 +608,8 @@ static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpneq_pd(__m128d __a,
/// \param __b
/// A 128-bit vector of [2 x double].
/// \returns A 128-bit vector containing the comparison results.
-static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpnlt_pd(__m128d __a,
- __m128d __b) {
+static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cmpnlt_pd(__m128d __a,
+ __m128d __b) {
return (__m128d)__builtin_ia32_cmpnltpd((__v2df)__a, (__v2df)__b);
}
@@ -629,8 +629,8 @@ static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpnlt_pd(__m128d __a,
/// \param __b
/// A 128-bit vector of [2 x double].
/// \returns A 128-bit vector containing the comparison results.
-static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpnle_pd(__m128d __a,
- __m128d __b) {
+static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cmpnle_pd(__m128d __a,
+ __m128d __b) {
return (__m128d)__builtin_ia32_cmpnlepd((__v2df)__a, (__v2df)__b);
}
@@ -650,8 +650,8 @@ static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpnle_pd(__m128d __a,
/// \param __b
/// A 128-bit vector of [2 x double].
/// \returns A 128-bit vector containing the comparison results.
-static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpngt_pd(__m128d __a,
- __m128d __b) {
+static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cmpngt_pd(__m128d __a,
+ __m128d __b) {
return (__m128d)__builtin_ia32_cmpnltpd((__v2df)__b, (__v2df)__a);
}
@@ -671,8 +671,8 @@ static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpngt_pd(__m128d __a,
/// \param __b
/// A 128-bit vector of [2 x double].
/// \returns A 128-bit vector containing the comparison results.
-static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpnge_pd(__m128d __a,
- __m128d __b) {
+static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cmpnge_pd(__m128d __a,
+ __m128d __b) {
return (__m128d)__builtin_ia32_cmpnlepd((__v2df)__b, (__v2df)__a);
}
@@ -822,8 +822,8 @@ static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cmpge_sd(__m128d __a,
/// compared to the lower double-precision value of \a __a.
/// \returns A 128-bit vector. The lower 64 bits contains the comparison
/// results. The upper 64 bits are copied from the upper 64 bits of \a __a.
-static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpord_sd(__m128d __a,
- __m128d __b) {
+static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cmpord_sd(__m128d __a,
+ __m128d __b) {
return (__m128d)__builtin_ia32_cmpordsd((__v2df)__a, (__v2df)__b);
}
@@ -849,8 +849,8 @@ static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpord_sd(__m128d __a,
/// compared to the lower double-precision value of \a __a.
/// \returns A 128-bit vector. The lower 64 bits contains the comparison
/// results. The upper 64 bits are copied from the upper 64 bits of \a __a.
-static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpunord_sd(__m128d __a,
- __m128d __b) {
+static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cmpunord_sd(__m128d __a,
+ __m128d __b) {
return (__m128d)__builtin_ia32_cmpunordsd((__v2df)__a, (__v2df)__b);
}
@@ -874,8 +874,8 @@ static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpunord_sd(__m128d __a,
/// compared to the lower double-precision value of \a __a.
/// \returns A 128-bit vector. The lower 64 bits contains the comparison
/// results. The upper 64 bits are copied from the upper 64 bits of \a __a.
-static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpneq_sd(__m128d __a,
- __m128d __b) {
+static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cmpneq_sd(__m128d __a,
+ __m128d __b) {
return (__m128d)__builtin_ia32_cmpneqsd((__v2df)__a, (__v2df)__b);
}
@@ -899,8 +899,8 @@ static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpneq_sd(__m128d __a,
/// compared to the lower double-precision value of \a __a.
/// \returns A 128-bit vector. The lower 64 bits contains the comparison
/// results. The upper 64 bits are copied from the upper 64 bits of \a __a.
-static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpnlt_sd(__m128d __a,
- __m128d __b) {
+static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cmpnlt_sd(__m128d __a,
+ __m128d __b) {
return (__m128d)__builtin_ia32_cmpnltsd((__v2df)__a, (__v2df)__b);
}
@@ -924,8 +924,8 @@ static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpnlt_sd(__m128d __a,
/// compared to the lower double-precision value of \a __a.
/// \returns A 128-bit vector. The lower 64 bits contains the comparison
/// results. The upper 64 bits are copied from the upper 64 bits of \a __a.
-static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpnle_sd(__m128d __a,
- __m128d __b) {
+static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cmpnle_sd(__m128d __a,
+ __m128d __b) {
return (__m128d)__builtin_ia32_cmpnlesd((__v2df)__a, (__v2df)__b);
}
@@ -949,8 +949,8 @@ static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpnle_sd(__m128d __a,
/// compared to the lower double-precision value of \a __a.
/// \returns A 128-bit vector. The lower 64 bits contains the comparison
/// results. The upper 64 bits are copied from the upper 64 bits of \a __a.
-static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpngt_sd(__m128d __a,
- __m128d __b) {
+static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cmpngt_sd(__m128d __a,
+ __m128d __b) {
__m128d __c = __builtin_ia32_cmpnltsd((__v2df)__b, (__v2df)__a);
return __extension__(__m128d){__c[0], __a[1]};
}
@@ -975,7 +975,7 @@ static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpngt_sd(__m128d __a,
/// compared to the lower double-precision value of \a __a.
/// \returns A 128-bit vector. The lower 64 bits contains the comparison
/// results. The upper 64 bits are copied from the upper 64 bits of \a __a.
-static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpnge_sd(__m128d __a,
+static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cmpnge_sd(__m128d __a,
__m128d __b) {
__m128d __c = __builtin_ia32_cmpnlesd((__v2df)__b, (__v2df)__a);
return __extension__(__m128d){__c[0], __a[1]};
diff --git a/clang/lib/Headers/xmmintrin.h b/clang/lib/Headers/xmmintrin.h
index 064fd9d3d3722..ae595291f888a 100644
--- a/clang/lib/Headers/xmmintrin.h
+++ b/clang/lib/Headers/xmmintrin.h
@@ -743,7 +743,7 @@ _mm_cmpge_ps(__m128 __a, __m128 __b)
/// 32 bits of this operand are used in the comparison.
/// \returns A 128-bit vector of [4 x float] containing the comparison results
/// in the low-order bits.
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR
_mm_cmpneq_ss(__m128 __a, __m128 __b)
{
return (__m128)__builtin_ia32_cmpneqss((__v4sf)__a, (__v4sf)__b);
@@ -765,7 +765,7 @@ _mm_cmpneq_ss(__m128 __a, __m128 __b)
/// \param __b
/// A 128-bit vector of [4 x float].
/// \returns A 128-bit vector of [4 x float] containing the comparison results.
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR
_mm_cmpneq_ps(__m128 __a, __m128 __b)
{
return (__m128)__builtin_ia32_cmpneqps((__v4sf)__a, (__v4sf)__b);
@@ -792,7 +792,7 @@ _mm_cmpneq_ps(__m128 __a, __m128 __b)
/// 32 bits of this operand are used in the comparison.
/// \returns A 128-bit vector of [4 x float] containing the comparison results
/// in the low-order bits.
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR
_mm_cmpnlt_ss(__m128 __a, __m128 __b)
{
return (__m128)__builtin_ia32_cmpnltss((__v4sf)__a, (__v4sf)__b);
@@ -815,7 +815,7 @@ _mm_cmpnlt_ss(__m128 __a, __m128 __b)
/// \param __b
/// A 128-bit vector of [4 x float].
/// \returns A 128-bit vector of [4 x float] containing the comparison results.
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR
_mm_cmpnlt_ps(__m128 __a, __m128 __b)
{
return (__m128)__builtin_ia32_cmpnltps((__v4sf)__a, (__v4sf)__b);
@@ -842,7 +842,7 @@ _mm_cmpnlt_ps(__m128 __a, __m128 __b)
/// 32 bits of this operand are used in the comparison.
/// \returns A 128-bit vector of [4 x float] containing the comparison results
/// in the low-order bits.
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR
_mm_cmpnle_ss(__m128 __a, __m128 __b)
{
return (__m128)__builtin_ia32_cmpnless((__v4sf)__a, (__v4sf)__b);
@@ -865,7 +865,7 @@ _mm_cmpnle_ss(__m128 __a, __m128 __b)
/// \param __b
/// A 128-bit vector of [4 x float].
/// \returns A 128-bit vector of [4 x float] containing the comparison results.
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR
_mm_cmpnle_ps(__m128 __a, __m128 __b)
{
return (__m128)__builtin_ia32_cmpnleps((__v4sf)__a, (__v4sf)__b);
@@ -892,7 +892,7 @@ _mm_cmpnle_ps(__m128 __a, __m128 __b)
/// 32 bits of this operand are used in the comparison.
/// \returns A 128-bit vector of [4 x float] containing the comparison results
/// in the low-order bits.
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR
_mm_cmpngt_ss(__m128 __a, __m128 __b)
{
return (__m128)__builtin_shufflevector((__v4sf)__a,
@@ -917,7 +917,7 @@ _mm_cmpngt_ss(__m128 __a, __m128 __b)
/// \param __b
/// A 128-bit vector of [4 x float].
/// \returns A 128-bit vector of [4 x float] containing the comparison results.
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR
_mm_cmpngt_ps(__m128 __a, __m128 __b)
{
return (__m128)__builtin_ia32_cmpnltps((__v4sf)__b, (__v4sf)__a);
@@ -944,7 +944,7 @@ _mm_cmpngt_ps(__m128 __a, __m128 __b)
/// 32 bits of this operand are used in the comparison.
/// \returns A 128-bit vector of [4 x float] containing the comparison results
/// in the low-order bits.
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR
_mm_cmpnge_ss(__m128 __a, __m128 __b)
{
return (__m128)__builtin_shufflevector((__v4sf)__a,
@@ -969,7 +969,7 @@ _mm_cmpnge_ss(__m128 __a, __m128 __b)
/// \param __b
/// A 128-bit vector of [4 x float].
/// \returns A 128-bit vector of [4 x float] containing the comparison results.
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR
_mm_cmpnge_ps(__m128 __a, __m128 __b)
{
return (__m128)__builtin_ia32_cmpnleps((__v4sf)__b, (__v4sf)__a);
@@ -996,7 +996,7 @@ _mm_cmpnge_ps(__m128 __a, __m128 __b)
/// 32 bits of this operand are used in the comparison.
/// \returns A 128-bit vector of [4 x float] containing the comparison results
/// in the low-order bits.
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR
_mm_cmpord_ss(__m128 __a, __m128 __b)
{
return (__m128)__builtin_ia32_cmpordss((__v4sf)__a, (__v4sf)__b);
@@ -1020,7 +1020,7 @@ _mm_cmpord_ss(__m128 __a, __m128 __b)
/// \param __b
/// A 128-bit vector of [4 x float].
/// \returns A 128-bit vector of [4 x float] containing the comparison results.
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR
_mm_cmpord_ps(__m128 __a, __m128 __b)
{
return (__m128)__builtin_ia32_cmpordps((__v4sf)__a, (__v4sf)__b);
@@ -1047,7 +1047,7 @@ _mm_cmpord_ps(__m128 __a, __m128 __b)
/// 32 bits of this operand are used in the comparison.
/// \returns A 128-bit vector of [4 x float] containing the comparison results
/// in the low-order bits.
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR
_mm_cmpunord_ss(__m128 __a, __m128 __b)
{
return (__m128)__builtin_ia32_cmpunordss((__v4sf)__a, (__v4sf)__b);
@@ -1071,7 +1071,7 @@ _mm_cmpunord_ss(__m128 __a, __m128 __b)
/// \param __b
/// A 128-bit vector of [4 x float].
/// \returns A 128-bit vector of [4 x float] containing the comparison results.
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR
_mm_cmpunord_ps(__m128 __a, __m128 __b)
{
return (__m128)__builtin_ia32_cmpunordps((__v4sf)__a, (__v4sf)__b);
diff --git a/clang/test/CodeGen/X86/sse-builtins.c b/clang/test/CodeGen/X86/sse-builtins.c
index 3b986acc8e230..991a496999f71 100644
--- a/clang/test/CodeGen/X86/sse-builtins.c
+++ b/clang/test/CodeGen/X86/sse-builtins.c
@@ -168,6 +168,9 @@ __m128 test_mm_cmpgt_ps(__m128 __a, __m128 __b) {
TEST_CONSTEXPR(match_m128(_mm_cmpgt_ps((__m128){+3.0f, +5.0f, +1.0f, +9.0f}, (__m128){+1.0f, +5.0f, +2.0f, +4.0f}), ALL_ONES_F, +0.0f, +0.0f, ALL_ONES_F));
TEST_CONSTEXPR(match_m128(_mm_cmpgt_ps((__m128){-3.0f, +2.0f, +7.0f, +0.0f}, (__m128){-4.0f, +3.0f, +7.0f, -1.0f}), ALL_ONES_F, +0.0f, +0.0f, ALL_ONES_F));
TEST_CONSTEXPR(match_m128(_mm_cmpgt_ps((__m128){__builtin_nanf(""), +4.0f, +6.0f, +8.0f}, (__m128){+1.0f, +3.0f, +5.0f, +7.0f}), +0.0f, ALL_ONES_F, ALL_ONES_F, ALL_ONES_F));
+TEST_CONSTEXPR(match_m128((__m128)__builtin_ia32_cmpgtps((__v4sf)((__m128){+3.0f, +5.0f, +1.0f, __builtin_nanf("")}),
+ (__v4sf)((__m128){+1.0f, +5.0f, +2.0f, +0.0f})),
+ ALL_ONES_F, +0.0f, +0.0f, +0.0f));
__m128 test_mm_cmpgt_ss(__m128 __a, __m128 __b) {
// CHECK-LABEL: test_mm_cmpgt_ss
@@ -178,6 +181,9 @@ __m128 test_mm_cmpgt_ss(__m128 __a, __m128 __b) {
TEST_CONSTEXPR(match_m128(_mm_cmpgt_ss((__m128){+3.0f, +2.0f, +3.0f, +4.0f}, (__m128){+1.0f, +9.0f, +8.0f, +7.0f}), ALL_ONES_F, +2.0f, +3.0f, +4.0f));
TEST_CONSTEXPR(match_m128(_mm_cmpgt_ss((__m128){+1.0f, +2.0f, +3.0f, +4.0f}, (__m128){+3.0f, +9.0f, +8.0f, +7.0f}), +0.0f, +2.0f, +3.0f, +4.0f));
TEST_CONSTEXPR(match_m128(_mm_cmpgt_ss((__m128){__builtin_nanf(""), +2.0f, +3.0f, +4.0f}, (__m128){+1.0f, +9.0f, +8.0f, +7.0f}), +0.0f, +2.0f, +3.0f, +4.0f));
+TEST_CONSTEXPR(match_m128((__m128)__builtin_ia32_cmpgtss((__v4sf)((__m128){+3.0f, +2.0f, +3.0f, +4.0f}),
+ (__v4sf)((__m128){+1.0f, +9.0f, +8.0f, +7.0f})),
+ ALL_ONES_F, +2.0f, +3.0f, +4.0f));
__m128 test_mm_cmple_ps(__m128 __a, __m128 __b) {
// CHECK-LABEL: test_mm_cmple_ps
@@ -231,12 +237,14 @@ __m128 test_mm_cmpneq_ps(__m128 __a, __m128 __b) {
// CHECK-NEXT: ret <4 x float> [[BC]]
return _mm_cmpneq_ps(__a, __b);
}
+TEST_CONSTEXPR(match_m128(_mm_cmpneq_ps((__m128){+1.0f, +2.0f, +3.0f, __builtin_nanf("")}, (__m128){+1.0f, +9.0f, +3.0f, +4.0f}), +0.0f, ALL_ONES_F, +0.0f, ALL_ONES_F));
__m128 test_mm_cmpneq_ss(__m128 __a, __m128 __b) {
// CHECK-LABEL: test_mm_cmpneq_ss
// CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 4)
return _mm_cmpneq_ss(__a, __b);
}
+TEST_CONSTEXPR(match_m128(_mm_cmpneq_ss((__m128){__builtin_nanf(""), +2.0f, +3.0f, +4.0f}, (__m128){+1.0f, +8.0f, +7.0f, +6.0f}), ALL_ONES_F, +2.0f, +3.0f, +4.0f));
__m128 test_mm_cmpnge_ps(__m128 __a, __m128 __b) {
// CHECK-LABEL: test_mm_cmpnge_ps
@@ -246,6 +254,7 @@ __m128 test_mm_cmpnge_ps(__m128 __a, __m128 __b) {
// CHECK-NEXT: ret <4 x float> [[BC]]
return _mm_cmpnge_ps(__a, __b);
}
+TEST_CONSTEXPR(match_m128(_mm_cmpnge_ps((__m128){+1.0f, +4.0f, __builtin_nanf(""), +5.0f}, (__m128){+1.0f, +3.0f, +2.0f, +7.0f}), +0.0f, +0.0f, ALL_ONES_F, ALL_ONES_F));
__m128 test_mm_cmpnge_ss(__m128 __a, __m128 __b) {
// CHECK-LABEL: test_mm_cmpnge_ss
@@ -253,6 +262,7 @@ __m128 test_mm_cmpnge_ss(__m128 __a, __m128 __b) {
// CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
return _mm_cmpnge_ss(__a, __b);
}
+TEST_CONSTEXPR(match_m128(_mm_cmpnge_ss((__m128){+4.0f, +2.0f, +3.0f, +4.0f}, (__m128){+3.0f, +9.0f, +8.0f, +7.0f}), +0.0f, +2.0f, +3.0f, +4.0f));
__m128 test_mm_cmpngt_ps(__m128 __a, __m128 __b) {
// CHECK-LABEL: test_mm_cmpngt_ps
@@ -262,6 +272,7 @@ __m128 test_mm_cmpngt_ps(__m128 __a, __m128 __b) {
// CHECK-NEXT: ret <4 x float> [[BC]]
return _mm_cmpngt_ps(__a, __b);
}
+TEST_CONSTEXPR(match_m128(_mm_cmpngt_ps((__m128){+1.0f, +4.0f, __builtin_nanf(""), +7.0f}, (__m128){+1.0f, +5.0f, +2.0f, +7.0f}), ALL_ONES_F, ALL_ONES_F, ALL_ONES_F, ALL_ONES_F));
__m128 test_mm_cmpngt_ss(__m128 __a, __m128 __b) {
// CHECK-LABEL: test_mm_cmpngt_ss
@@ -269,6 +280,7 @@ __m128 test_mm_cmpngt_ss(__m128 __a, __m128 __b) {
// CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
return _mm_cmpngt_ss(__a, __b);
}
+TEST_CONSTEXPR(match_m128(_mm_cmpngt_ss((__m128){+1.0f, +2.0f, +3.0f, +4.0f}, (__m128){+1.0f, +9.0f, +8.0f, +7.0f}), ALL_ONES_F, +2.0f, +3.0f, +4.0f));
__m128 test_mm_cmpnle_ps(__m128 __a, __m128 __b) {
// CHECK-LABEL: test_mm_cmpnle_ps
@@ -278,12 +290,14 @@ __m128 test_mm_cmpnle_ps(__m128 __a, __m128 __b) {
// CHECK-NEXT: ret <4 x float> [[BC]]
return _mm_cmpnle_ps(__a, __b);
}
+TEST_CONSTEXPR(match_m128(_mm_cmpnle_ps((__m128){+1.0f, +4.0f, __builtin_nanf(""), +5.0f}, (__m128){+1.0f, +3.0f, +2.0f, +7.0f}), +0.0f, ALL_ONES_F, ALL_ONES_F, +0.0f));
__m128 test_mm_cmpnle_ss(__m128 __a, __m128 __b) {
// CHECK-LABEL: test_mm_cmpnle_ss
// CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 6)
return _mm_cmpnle_ss(__a, __b);
}
+TEST_CONSTEXPR(match_m128(_mm_cmpnle_ss((__m128){+2.0f, +2.0f, +3.0f, +4.0f}, (__m128){+1.0f, +9.0f, +8.0f, +7.0f}), ALL_ONES_F, +2.0f, +3.0f, +4.0f));
__m128 test_mm_cmpnlt_ps(__m128 __a, __m128 __b) {
// CHECK-LABEL: test_mm_cmpnlt_ps
@@ -293,12 +307,14 @@ __m128 test_mm_cmpnlt_ps(__m128 __a, __m128 __b) {
// CHECK-NEXT: ret <4 x float> [[BC]]
return _mm_cmpnlt_ps(__a, __b);
}
+TEST_CONSTEXPR(match_m128(_mm_cmpnlt_ps((__m128){+1.0f, +4.0f, __builtin_nanf(""), +7.0f}, (__m128){+1.0f, +5.0f, +2.0f, +7.0f}), ALL_ONES_F, +0.0f, ALL_ONES_F, ALL_ONES_F));
__m128 test_mm_cmpnlt_ss(__m128 __a, __m128 __b) {
// CHECK-LABEL: test_mm_cmpnlt_ss
// CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 5)
return _mm_cmpnlt_ss(__a, __b);
}
+TEST_CONSTEXPR(match_m128(_mm_cmpnlt_ss((__m128){+1.0f, +2.0f, +3.0f, +4.0f}, (__m128){+1.0f, +9.0f, +8.0f, +7.0f}), ALL_ONES_F, +2.0f, +3.0f, +4.0f));
__m128 test_mm_cmpord_ps(__m128 __a, __m128 __b) {
// CHECK-LABEL: test_mm_cmpord_ps
@@ -308,12 +324,14 @@ __m128 test_mm_cmpord_ps(__m128 __a, __m128 __b) {
// CHECK-NEXT: ret <4 x float> [[BC]]
return _mm_cmpord_ps(__a, __b);
}
+TEST_CONSTEXPR(match_m128(_mm_cmpord_ps((__m128){+1.0f, __builtin_nanf(""), +3.0f, +4.0f}, (__m128){+5.0f, +6.0f, +7.0f, +8.0f}), ALL_ONES_F, +0.0f, ALL_ONES_F, ALL_ONES_F));
__m128 test_mm_cmpord_ss(__m128 __a, __m128 __b) {
// CHECK-LABEL: test_mm_cmpord_ss
// CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 7)
return _mm_cmpord_ss(__a, __b);
}
+TEST_CONSTEXPR(match_m128(_mm_cmpord_ss((__m128){+1.0f, +2.0f, +3.0f, +4.0f}, (__m128){+5.0f, +9.0f, +8.0f, +7.0f}), ALL_ONES_F, +2.0f, +3.0f, +4.0f));
__m128 test_mm_cmpunord_ps(__m128 __a, __m128 __b) {
// CHECK-LABEL: test_mm_cmpunord_ps
@@ -323,12 +341,14 @@ __m128 test_mm_cmpunord_ps(__m128 __a, __m128 __b) {
// CHECK-NEXT: ret <4 x float> [[BC]]
return _mm_cmpunord_ps(__a, __b);
}
+TEST_CONSTEXPR(match_m128(_mm_cmpunord_ps((__m128){+1.0f, __builtin_nanf(""), +3.0f, __builtin_nanf("")}, (__m128){+5.0f, +6.0f, +7.0f, +8.0f}), +0.0f, ALL_ONES_F, +0.0f, ALL_ONES_F));
__m128 test_mm_cmpunord_ss(__m128 __a, __m128 __b) {
// CHECK-LABEL: test_mm_cmpunord_ss
// CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 3)
return _mm_cmpunord_ss(__a, __b);
}
+TEST_CONSTEXPR(match_m128(_mm_cmpunord_ss((__m128){__builtin_nanf(""), +2.0f, +3.0f, +4.0f}, (__m128){+5.0f, +9.0f, +8.0f, +7.0f}), ALL_ONES_F, +2.0f, +3.0f, +4.0f));
int test_mm_comieq_ss(__m128 A, __m128 B) {
// CHECK-LABEL: test_mm_comieq_ss
diff --git a/clang/test/CodeGen/X86/sse2-builtins.c b/clang/test/CodeGen/X86/sse2-builtins.c
index eba440a03dd8a..601a41a6d68c2 100644
--- a/clang/test/CodeGen/X86/sse2-builtins.c
+++ b/clang/test/CodeGen/X86/sse2-builtins.c
@@ -469,6 +469,9 @@ __m128d test_mm_cmpgt_pd(__m128d A, __m128d B) {
TEST_CONSTEXPR(match_m128d(_mm_cmpgt_pd((__m128d){+3.0, +5.0}, (__m128d){+1.0, +5.0}), ALL_ONES_D, +0.0));
TEST_CONSTEXPR(match_m128d(_mm_cmpgt_pd((__m128d){-3.0, +7.0}, (__m128d){-4.0, +8.0}), ALL_ONES_D, +0.0));
TEST_CONSTEXPR(match_m128d(_mm_cmpgt_pd((__m128d){__builtin_nan(""), +6.0}, (__m128d){+1.0, +5.0}), +0.0, ALL_ONES_D));
+TEST_CONSTEXPR(match_m128d((__m128d)__builtin_ia32_cmpgtpd((__v2df)((__m128d){+3.0, __builtin_nan("")}),
+ (__v2df)((__m128d){+1.0, +2.0})),
+ ALL_ONES_D, +0.0));
__m128d test_mm_cmpgt_sd(__m128d A, __m128d B) {
// CHECK-LABEL: test_mm_cmpgt_sd
@@ -482,6 +485,9 @@ __m128d test_mm_cmpgt_sd(__m128d A, __m128d B) {
TEST_CONSTEXPR(match_m128d(_mm_cmpgt_sd((__m128d){+3.0, +2.0}, (__m128d){+1.0, +9.0}), ALL_ONES_D, +2.0));
TEST_CONSTEXPR(match_m128d(_mm_cmpgt_sd((__m128d){+1.0, +2.0}, (__m128d){+3.0, +9.0}), +0.0, +2.0));
TEST_CONSTEXPR(match_m128d(_mm_cmpgt_sd((__m128d){__builtin_nan(""), +2.0}, (__m128d){+1.0, +9.0}), +0.0, +2.0));
+TEST_CONSTEXPR(match_m128d((__m128d)__builtin_ia32_cmpgtsd((__v2df)((__m128d){+3.0, +2.0}),
+ (__v2df)((__m128d){+1.0, +9.0})),
+ ALL_ONES_D, +2.0));
__m128d test_mm_cmple_pd(__m128d A, __m128d B) {
// CHECK-LABEL: test_mm_cmple_pd
@@ -556,12 +562,14 @@ __m128d test_mm_cmpneq_pd(__m128d A, __m128d B) {
// CHECK-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
return _mm_cmpneq_pd(A, B);
}
+TEST_CONSTEXPR(match_m128d(_mm_cmpneq_pd((__m128d){+1.0, __builtin_nan("")}, (__m128d){+1.0, +4.0}), +0.0, ALL_ONES_D));
__m128d test_mm_cmpneq_sd(__m128d A, __m128d B) {
// CHECK-LABEL: test_mm_cmpneq_sd
// CHECK: call {{.*}}<2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 4)
return _mm_cmpneq_sd(A, B);
}
+TEST_CONSTEXPR(match_m128d(_mm_cmpneq_sd((__m128d){__builtin_nan(""), +2.0}, (__m128d){+1.0, +9.0}), ALL_ONES_D, +2.0));
__m128d test_mm_cmpnge_pd(__m128d A, __m128d B) {
// CHECK-LABEL: test_mm_cmpnge_pd
@@ -570,6 +578,7 @@ __m128d test_mm_cmpnge_pd(__m128d A, __m128d B) {
// CHECK-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
return _mm_cmpnge_pd(A, B);
}
+TEST_CONSTEXPR(match_m128d(_mm_cmpnge_pd((__m128d){+1.0, __builtin_nan("")}, (__m128d){+1.0, +2.0}), +0.0, ALL_ONES_D));
__m128d test_mm_cmpnge_sd(__m128d A, __m128d B) {
// CHECK-LABEL: test_mm_cmpnge_sd
@@ -580,6 +589,7 @@ __m128d test_mm_cmpnge_sd(__m128d A, __m128d B) {
// CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1
return _mm_cmpnge_sd(A, B);
}
+TEST_CONSTEXPR(match_m128d(_mm_cmpnge_sd((__m128d){+4.0, +2.0}, (__m128d){+3.0, +9.0}), +0.0, +2.0));
__m128d test_mm_cmpngt_pd(__m128d A, __m128d B) {
// CHECK-LABEL: test_mm_cmpngt_pd
@@ -588,6 +598,7 @@ __m128d test_mm_cmpngt_pd(__m128d A, __m128d B) {
// CHECK-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
return _mm_cmpngt_pd(A, B);
}
+TEST_CONSTEXPR(match_m128d(_mm_cmpngt_pd((__m128d){+1.0, __builtin_nan("")}, (__m128d){+1.0, +2.0}), ALL_ONES_D, ALL_ONES_D));
__m128d test_mm_cmpngt_sd(__m128d A, __m128d B) {
// CHECK-LABEL: test_mm_cmpngt_sd
@@ -598,6 +609,7 @@ __m128d test_mm_cmpngt_sd(__m128d A, __m128d B) {
// CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1
return _mm_cmpngt_sd(A, B);
}
+TEST_CONSTEXPR(match_m128d(_mm_cmpngt_sd((__m128d){+1.0, +2.0}, (__m128d){+1.0, +9.0}), ALL_ONES_D, +2.0));
__m128d test_mm_cmpnle_pd(__m128d A, __m128d B) {
// CHECK-LABEL: test_mm_cmpnle_pd
@@ -606,12 +618,14 @@ __m128d test_mm_cmpnle_pd(__m128d A, __m128d B) {
// CHECK-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
return _mm_cmpnle_pd(A, B);
}
+TEST_CONSTEXPR(match_m128d(_mm_cmpnle_pd((__m128d){+1.0, __builtin_nan("")}, (__m128d){+1.0, +2.0}), +0.0, ALL_ONES_D));
__m128d test_mm_cmpnle_sd(__m128d A, __m128d B) {
// CHECK-LABEL: test_mm_cmpnle_sd
// CHECK: call {{.*}}<2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 6)
return _mm_cmpnle_sd(A, B);
}
+TEST_CONSTEXPR(match_m128d(_mm_cmpnle_sd((__m128d){+2.0, +2.0}, (__m128d){+1.0, +9.0}), ALL_ONES_D, +2.0));
__m128d test_mm_cmpnlt_pd(__m128d A, __m128d B) {
// CHECK-LABEL: test_mm_cmpnlt_pd
@@ -620,12 +634,14 @@ __m128d test_mm_cmpnlt_pd(__m128d A, __m128d B) {
// CHECK-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
return _mm_cmpnlt_pd(A, B);
}
+TEST_CONSTEXPR(match_m128d(_mm_cmpnlt_pd((__m128d){+1.0, __builtin_nan("")}, (__m128d){+1.0, +2.0}), ALL_ONES_D, ALL_ONES_D));
__m128d test_mm_cmpnlt_sd(__m128d A, __m128d B) {
// CHECK-LABEL: test_mm_cmpnlt_sd
// CHECK: call {{.*}}<2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 5)
return _mm_cmpnlt_sd(A, B);
}
+TEST_CONSTEXPR(match_m128d(_mm_cmpnlt_sd((__m128d){+1.0, +2.0}, (__m128d){+1.0, +9.0}), ALL_ONES_D, +2.0));
__m128d test_mm_cmpord_pd(__m128d A, __m128d B) {
// CHECK-LABEL: test_mm_cmpord_pd
@@ -634,12 +650,14 @@ __m128d test_mm_cmpord_pd(__m128d A, __m128d B) {
// CHECK-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
return _mm_cmpord_pd(A, B);
}
+TEST_CONSTEXPR(match_m128d(_mm_cmpord_pd((__m128d){+1.0, __builtin_nan("")}, (__m128d){+5.0, +6.0}), ALL_ONES_D, +0.0));
__m128d test_mm_cmpord_sd(__m128d A, __m128d B) {
// CHECK-LABEL: test_mm_cmpord_sd
// CHECK: call {{.*}}<2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 7)
return _mm_cmpord_sd(A, B);
}
+TEST_CONSTEXPR(match_m128d(_mm_cmpord_sd((__m128d){+1.0, +2.0}, (__m128d){+5.0, +9.0}), ALL_ONES_D, +2.0));
__m128d test_mm_cmpunord_pd(__m128d A, __m128d B) {
// CHECK-LABEL: test_mm_cmpunord_pd
@@ -648,12 +666,14 @@ __m128d test_mm_cmpunord_pd(__m128d A, __m128d B) {
// CHECK-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
return _mm_cmpunord_pd(A, B);
}
+TEST_CONSTEXPR(match_m128d(_mm_cmpunord_pd((__m128d){+1.0, __builtin_nan("")}, (__m128d){+5.0, +6.0}), +0.0, ALL_ONES_D));
__m128d test_mm_cmpunord_sd(__m128d A, __m128d B) {
// CHECK-LABEL: test_mm_cmpunord_sd
// CHECK: call {{.*}}<2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 3)
return _mm_cmpunord_sd(A, B);
}
+TEST_CONSTEXPR(match_m128d(_mm_cmpunord_sd((__m128d){__builtin_nan(""), +2.0}, (__m128d){+5.0, +9.0}), ALL_ONES_D, +2.0));
int test_mm_comieq_sd(__m128d A, __m128d B) {
// CHECK-LABEL: test_mm_comieq_sd
>From bec230b816bb4829c12e266f52d311913292d372 Mon Sep 17 00:00:00 2001
From: ZakyHermawan <zaky.hermawan9615 at gmail.com>
Date: Fri, 20 Feb 2026 23:19:25 +0700
Subject: [PATCH 07/14] Add more tests
Signed-off-by: ZakyHermawan <zaky.hermawan9615 at gmail.com>
---
clang/test/CodeGen/X86/avx-builtins.c | 173 +++++++++++++++++++++----
clang/test/CodeGen/X86/sse-builtins.c | 135 ++++++++++++++++++-
clang/test/CodeGen/X86/sse2-builtins.c | 168 +++++++++++-------------
3 files changed, 354 insertions(+), 122 deletions(-)
diff --git a/clang/test/CodeGen/X86/avx-builtins.c b/clang/test/CodeGen/X86/avx-builtins.c
index 93efdd44fb277..ffdf4bef88375 100644
--- a/clang/test/CodeGen/X86/avx-builtins.c
+++ b/clang/test/CodeGen/X86/avx-builtins.c
@@ -531,21 +531,21 @@ __m256 test_mm256_cmp_ps_eq_oq(__m256 a, __m256 b) {
// CHECK: fcmp oeq <8 x float> %{{.*}}, %{{.*}}
return _mm256_cmp_ps(a, b, _CMP_EQ_OQ);
}
-TEST_CONSTEXPR(match_m256(_mm256_cmp_ps(((__m256){1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f}), ((__m256){1.0f, 2.5f, 3.0f, 4.5f, 5.0f, 6.5f, 7.0f, 8.5f}), _CMP_EQ_OQ), __builtin_bit_cast(float, 0xFFFFFFFFU), 0.0f, __builtin_bit_cast(float, 0xFFFFFFFFU), 0.0f, __builtin_bit_cast(float, 0xFFFFFFFFU), 0.0f, __builtin_bit_cast(float, 0xFFFFFFFFU), 0.0f));
+TEST_CONSTEXPR(match_m256(_mm256_cmp_ps(((__m256){1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f}), ((__m256){1.0f, 2.5f, 3.0f, 4.5f, 5.0f, 6.5f, 7.0f, 8.5f}), _CMP_EQ_OQ), ALL_ONES_F, 0.0f, ALL_ONES_F, 0.0f, ALL_ONES_F, 0.0f, ALL_ONES_F, 0.0f));
__m256 test_mm256_cmp_ps_lt_os(__m256 a, __m256 b) {
// CHECK-LABEL: test_mm256_cmp_ps_lt_os
// CHECK: fcmp olt <8 x float> %{{.*}}, %{{.*}}
return _mm256_cmp_ps(a, b, _CMP_LT_OS);
}
-TEST_CONSTEXPR(match_m256(_mm256_cmp_ps(((__m256){1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f}), ((__m256){2.0f, 2.0f, 4.0f, 4.0f, 6.0f, 6.0f, 8.0f, 8.0f}), _CMP_LT_OS), __builtin_bit_cast(float, 0xFFFFFFFFU), 0.0f, __builtin_bit_cast(float, 0xFFFFFFFFU), 0.0f, __builtin_bit_cast(float, 0xFFFFFFFFU), 0.0f, __builtin_bit_cast(float, 0xFFFFFFFFU), 0.0f));
+TEST_CONSTEXPR(match_m256(_mm256_cmp_ps(((__m256){1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f}), ((__m256){2.0f, 2.0f, 4.0f, 4.0f, 6.0f, 6.0f, 8.0f, 8.0f}), _CMP_LT_OS), ALL_ONES_F, 0.0f, ALL_ONES_F, 0.0f, ALL_ONES_F, 0.0f, ALL_ONES_F, 0.0f));
__m256 test_mm256_cmp_ps_le_os(__m256 a, __m256 b) {
// CHECK-LABEL: test_mm256_cmp_ps_le_os
// CHECK: fcmp ole <8 x float> %{{.*}}, %{{.*}}
return _mm256_cmp_ps(a, b, _CMP_LE_OS);
}
-TEST_CONSTEXPR(match_m256(_mm256_cmp_ps(((__m256){1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f}), ((__m256){2.0f, 2.0f, 3.0f, 3.0f, 6.0f, 6.0f, 7.0f, 7.0f}), _CMP_LE_OS), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU), 0.0f, __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU), 0.0f));
+TEST_CONSTEXPR(match_m256(_mm256_cmp_ps(((__m256){1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f}), ((__m256){2.0f, 2.0f, 3.0f, 3.0f, 6.0f, 6.0f, 7.0f, 7.0f}), _CMP_LE_OS), ALL_ONES_F, ALL_ONES_F, ALL_ONES_F, 0.0f, ALL_ONES_F, ALL_ONES_F, ALL_ONES_F, 0.0f));
__m256 test_mm256_cmp_ps_unord_q(__m256 a, __m256 b) {
// CHECK-LABEL: test_mm256_cmp_ps_unord_q
@@ -563,28 +563,28 @@ __m256 test_mm256_cmp_ps_neq_uq(__m256 a, __m256 b) {
// CHECK: fcmp une <8 x float> %{{.*}}, %{{.*}}
return _mm256_cmp_ps(a, b, _CMP_NEQ_UQ);
}
-TEST_CONSTEXPR(match_m256(_mm256_cmp_ps(((__m256){1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f}), ((__m256){1.0f, 2.5f, 3.0f, 4.5f, 5.0f, 6.5f, 7.0f, 8.5f}), _CMP_NEQ_UQ), 0.0f, __builtin_bit_cast(float, 0xFFFFFFFFU), 0.0f, __builtin_bit_cast(float, 0xFFFFFFFFU), 0.0f, __builtin_bit_cast(float, 0xFFFFFFFFU), 0.0f, __builtin_bit_cast(float, 0xFFFFFFFFU)));
+TEST_CONSTEXPR(match_m256(_mm256_cmp_ps(((__m256){1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f}), ((__m256){1.0f, 2.5f, 3.0f, 4.5f, 5.0f, 6.5f, 7.0f, 8.5f}), _CMP_NEQ_UQ), 0.0f, ALL_ONES_F, 0.0f, ALL_ONES_F, 0.0f, ALL_ONES_F, 0.0f, ALL_ONES_F));
__m256 test_mm256_cmp_ps_nlt_us(__m256 a, __m256 b) {
// CHECK-LABEL: test_mm256_cmp_ps_nlt_us
// CHECK: fcmp uge <8 x float> %{{.*}}, %{{.*}}
return _mm256_cmp_ps(a, b, _CMP_NLT_US);
}
-TEST_CONSTEXPR(match_m256(_mm256_cmp_ps(((__m256){2.0f, 2.0f, 4.0f, 4.0f, 6.0f, 6.0f, 8.0f, 8.0f}), ((__m256){1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f}), _CMP_NLT_US), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU)));
+TEST_CONSTEXPR(match_m256(_mm256_cmp_ps(((__m256){2.0f, 2.0f, 4.0f, 4.0f, 6.0f, 6.0f, 8.0f, 8.0f}), ((__m256){1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f}), _CMP_NLT_US), ALL_ONES_F, ALL_ONES_F, ALL_ONES_F, ALL_ONES_F, ALL_ONES_F, ALL_ONES_F, ALL_ONES_F, ALL_ONES_F));
__m256 test_mm256_cmp_ps_nle_us(__m256 a, __m256 b) {
// CHECK-LABEL: test_mm256_cmp_ps_nle_us
// CHECK: fcmp ugt <8 x float> %{{.*}}, %{{.*}}
return _mm256_cmp_ps(a, b, _CMP_NLE_US);
}
-TEST_CONSTEXPR(match_m256(_mm256_cmp_ps(((__m256){2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f}), ((__m256){1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f}), _CMP_NLE_US), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU)));
+TEST_CONSTEXPR(match_m256(_mm256_cmp_ps(((__m256){2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f}), ((__m256){1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f}), _CMP_NLE_US), ALL_ONES_F, ALL_ONES_F, ALL_ONES_F, ALL_ONES_F, ALL_ONES_F, ALL_ONES_F, ALL_ONES_F, ALL_ONES_F));
__m256 test_mm256_cmp_ps_ord_q(__m256 a, __m256 b) {
// CHECK-LABEL: test_mm256_cmp_ps_ord_q
// CHECK: fcmp ord <8 x float> %{{.*}}, %{{.*}}
return _mm256_cmp_ps(a, b, _CMP_ORD_Q);
}
-TEST_CONSTEXPR(match_m256(_mm256_cmp_ps(((__m256){1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f}), ((__m256){1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f}), _CMP_ORD_Q), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU)));
+TEST_CONSTEXPR(match_m256(_mm256_cmp_ps(((__m256){1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f}), ((__m256){1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f}), _CMP_ORD_Q), ALL_ONES_F, ALL_ONES_F, ALL_ONES_F, ALL_ONES_F, ALL_ONES_F, ALL_ONES_F, ALL_ONES_F, ALL_ONES_F));
TEST_CONSTEXPR(match_m256(_mm256_cmp_ps(((__m256){__builtin_nanf(""), 2.0f, 3.0f, __builtin_nanf(""), 5.0f, 6.0f, 7.0f, 8.0f}),
((__m256){1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f}),
_CMP_ORD_Q),
@@ -595,21 +595,21 @@ __m256 test_mm256_cmp_ps_eq_uq(__m256 a, __m256 b) {
// CHECK: fcmp ueq <8 x float> %{{.*}}, %{{.*}}
return _mm256_cmp_ps(a, b, _CMP_EQ_UQ);
}
-TEST_CONSTEXPR(match_m256(_mm256_cmp_ps(((__m256){1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f}), ((__m256){1.0f, 2.5f, 3.0f, 4.5f, 5.0f, 6.5f, 7.0f, 8.5f}), _CMP_EQ_UQ), __builtin_bit_cast(float, 0xFFFFFFFFU), 0.0f, __builtin_bit_cast(float, 0xFFFFFFFFU), 0.0f, __builtin_bit_cast(float, 0xFFFFFFFFU), 0.0f, __builtin_bit_cast(float, 0xFFFFFFFFU), 0.0f));
+TEST_CONSTEXPR(match_m256(_mm256_cmp_ps(((__m256){1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f}), ((__m256){1.0f, 2.5f, 3.0f, 4.5f, 5.0f, 6.5f, 7.0f, 8.5f}), _CMP_EQ_UQ), ALL_ONES_F, 0.0f, ALL_ONES_F, 0.0f, ALL_ONES_F, 0.0f, ALL_ONES_F, 0.0f));
__m256 test_mm256_cmp_ps_nge_us(__m256 a, __m256 b) {
// CHECK-LABEL: test_mm256_cmp_ps_nge_us
// CHECK: fcmp ult <8 x float> %{{.*}}, %{{.*}}
return _mm256_cmp_ps(a, b, _CMP_NGE_US);
}
-TEST_CONSTEXPR(match_m256(_mm256_cmp_ps(((__m256){1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f}), ((__m256){2.0f, 2.0f, 4.0f, 4.0f, 6.0f, 6.0f, 8.0f, 8.0f}), _CMP_NGE_US), __builtin_bit_cast(float, 0xFFFFFFFFU), 0.0f, __builtin_bit_cast(float, 0xFFFFFFFFU), 0.0f, __builtin_bit_cast(float, 0xFFFFFFFFU), 0.0f, __builtin_bit_cast(float, 0xFFFFFFFFU), 0.0f));
+TEST_CONSTEXPR(match_m256(_mm256_cmp_ps(((__m256){1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f}), ((__m256){2.0f, 2.0f, 4.0f, 4.0f, 6.0f, 6.0f, 8.0f, 8.0f}), _CMP_NGE_US), ALL_ONES_F, 0.0f, ALL_ONES_F, 0.0f, ALL_ONES_F, 0.0f, ALL_ONES_F, 0.0f));
__m256 test_mm256_cmp_ps_ngt_us(__m256 a, __m256 b) {
// CHECK-LABEL: test_mm256_cmp_ps_ngt_us
// CHECK: fcmp ule <8 x float> %{{.*}}, %{{.*}}
return _mm256_cmp_ps(a, b, _CMP_NGT_US);
}
-TEST_CONSTEXPR(match_m256(_mm256_cmp_ps(((__m256){1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f}), ((__m256){2.0f, 2.0f, 3.0f, 3.0f, 6.0f, 6.0f, 7.0f, 7.0f}), _CMP_NGT_US), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU), 0.0f, __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU), 0.0f));
+TEST_CONSTEXPR(match_m256(_mm256_cmp_ps(((__m256){1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f}), ((__m256){2.0f, 2.0f, 3.0f, 3.0f, 6.0f, 6.0f, 7.0f, 7.0f}), _CMP_NGT_US), ALL_ONES_F, ALL_ONES_F, ALL_ONES_F, 0.0f, ALL_ONES_F, ALL_ONES_F, ALL_ONES_F, 0.0f));
__m256 test_mm256_cmp_ps_false_oq(__m256 a, __m256 b) {
// CHECK-LABEL: test_mm256_cmp_ps_false_oq
@@ -623,49 +623,49 @@ __m256 test_mm256_cmp_ps_neq_oq(__m256 a, __m256 b) {
// CHECK: fcmp one <8 x float> %{{.*}}, %{{.*}}
return _mm256_cmp_ps(a, b, _CMP_NEQ_OQ);
}
-TEST_CONSTEXPR(match_m256(_mm256_cmp_ps(((__m256){1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f}), ((__m256){1.0f, 2.5f, 3.0f, 4.5f, 5.0f, 6.5f, 7.0f, 8.5f}), _CMP_NEQ_OQ), 0.0f, __builtin_bit_cast(float, 0xFFFFFFFFU), 0.0f, __builtin_bit_cast(float, 0xFFFFFFFFU), 0.0f, __builtin_bit_cast(float, 0xFFFFFFFFU), 0.0f, __builtin_bit_cast(float, 0xFFFFFFFFU)));
+TEST_CONSTEXPR(match_m256(_mm256_cmp_ps(((__m256){1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f}), ((__m256){1.0f, 2.5f, 3.0f, 4.5f, 5.0f, 6.5f, 7.0f, 8.5f}), _CMP_NEQ_OQ), 0.0f, ALL_ONES_F, 0.0f, ALL_ONES_F, 0.0f, ALL_ONES_F, 0.0f, ALL_ONES_F));
__m256 test_mm256_cmp_ps_ge_os(__m256 a, __m256 b) {
// CHECK-LABEL: test_mm256_cmp_ps_ge_os
// CHECK: fcmp oge <8 x float> %{{.*}}, %{{.*}}
return _mm256_cmp_ps(a, b, _CMP_GE_OS);
}
-TEST_CONSTEXPR(match_m256(_mm256_cmp_ps(((__m256){2.0f, 2.0f, 4.0f, 4.0f, 6.0f, 6.0f, 8.0f, 8.0f}), ((__m256){1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f}), _CMP_GE_OS), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU)));
+TEST_CONSTEXPR(match_m256(_mm256_cmp_ps(((__m256){2.0f, 2.0f, 4.0f, 4.0f, 6.0f, 6.0f, 8.0f, 8.0f}), ((__m256){1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f}), _CMP_GE_OS), ALL_ONES_F, ALL_ONES_F, ALL_ONES_F, ALL_ONES_F, ALL_ONES_F, ALL_ONES_F, ALL_ONES_F, ALL_ONES_F));
__m256 test_mm256_cmp_ps_gt_os(__m256 a, __m256 b) {
// CHECK-LABEL: test_mm256_cmp_ps_gt_os
// CHECK: fcmp ogt <8 x float> %{{.*}}, %{{.*}}
return _mm256_cmp_ps(a, b, _CMP_GT_OS);
}
-TEST_CONSTEXPR(match_m256(_mm256_cmp_ps(((__m256){2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f}), ((__m256){1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f}), _CMP_GT_OS), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU)));
+TEST_CONSTEXPR(match_m256(_mm256_cmp_ps(((__m256){2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f}), ((__m256){1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f}), _CMP_GT_OS), ALL_ONES_F, ALL_ONES_F, ALL_ONES_F, ALL_ONES_F, ALL_ONES_F, ALL_ONES_F, ALL_ONES_F, ALL_ONES_F));
__m256 test_mm256_cmp_ps_true_uq(__m256 a, __m256 b) {
// CHECK-LABEL: test_mm256_cmp_ps_true_uq
// CHECK: fcmp true <8 x float> %{{.*}}, %{{.*}}
return _mm256_cmp_ps(a, b, _CMP_TRUE_UQ);
}
-TEST_CONSTEXPR(match_m256(_mm256_cmp_ps(((__m256){1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f}), ((__m256){9.0f, 8.0f, 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f}), _CMP_TRUE_UQ), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU)));
+TEST_CONSTEXPR(match_m256(_mm256_cmp_ps(((__m256){1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f}), ((__m256){9.0f, 8.0f, 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f}), _CMP_TRUE_UQ), ALL_ONES_F, ALL_ONES_F, ALL_ONES_F, ALL_ONES_F, ALL_ONES_F, ALL_ONES_F, ALL_ONES_F, ALL_ONES_F));
__m256 test_mm256_cmp_ps_eq_os(__m256 a, __m256 b) {
// CHECK-LABEL: test_mm256_cmp_ps_eq_os
// CHECK: fcmp oeq <8 x float> %{{.*}}, %{{.*}}
return _mm256_cmp_ps(a, b, _CMP_EQ_OS);
}
-TEST_CONSTEXPR(match_m256(_mm256_cmp_ps(((__m256){1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f}), ((__m256){1.0f, 2.5f, 3.0f, 4.5f, 5.0f, 6.5f, 7.0f, 8.5f}), _CMP_EQ_OS), __builtin_bit_cast(float, 0xFFFFFFFFU), 0.0f, __builtin_bit_cast(float, 0xFFFFFFFFU), 0.0f, __builtin_bit_cast(float, 0xFFFFFFFFU), 0.0f, __builtin_bit_cast(float, 0xFFFFFFFFU), 0.0f));
+TEST_CONSTEXPR(match_m256(_mm256_cmp_ps(((__m256){1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f}), ((__m256){1.0f, 2.5f, 3.0f, 4.5f, 5.0f, 6.5f, 7.0f, 8.5f}), _CMP_EQ_OS), ALL_ONES_F, 0.0f, ALL_ONES_F, 0.0f, ALL_ONES_F, 0.0f, ALL_ONES_F, 0.0f));
__m256 test_mm256_cmp_ps_lt_oq(__m256 a, __m256 b) {
// CHECK-LABEL: test_mm256_cmp_ps_lt_oq
// CHECK: fcmp olt <8 x float> %{{.*}}, %{{.*}}
return _mm256_cmp_ps(a, b, _CMP_LT_OQ);
}
-TEST_CONSTEXPR(match_m256(_mm256_cmp_ps(((__m256){1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f}), ((__m256){2.0f, 2.0f, 4.0f, 4.0f, 6.0f, 6.0f, 8.0f, 8.0f}), _CMP_LT_OQ), __builtin_bit_cast(float, 0xFFFFFFFFU), 0.0f, __builtin_bit_cast(float, 0xFFFFFFFFU), 0.0f, __builtin_bit_cast(float, 0xFFFFFFFFU), 0.0f, __builtin_bit_cast(float, 0xFFFFFFFFU), 0.0f));
+TEST_CONSTEXPR(match_m256(_mm256_cmp_ps(((__m256){1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f}), ((__m256){2.0f, 2.0f, 4.0f, 4.0f, 6.0f, 6.0f, 8.0f, 8.0f}), _CMP_LT_OQ), ALL_ONES_F, 0.0f, ALL_ONES_F, 0.0f, ALL_ONES_F, 0.0f, ALL_ONES_F, 0.0f));
__m256 test_mm256_cmp_ps_le_oq(__m256 a, __m256 b) {
// CHECK-LABEL: test_mm256_cmp_ps_le_oq
// CHECK: fcmp ole <8 x float> %{{.*}}, %{{.*}}
return _mm256_cmp_ps(a, b, _CMP_LE_OQ);
}
-TEST_CONSTEXPR(match_m256(_mm256_cmp_ps(((__m256){1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f}), ((__m256){2.0f, 2.0f, 3.0f, 3.0f, 6.0f, 6.0f, 7.0f, 7.0f}), _CMP_LE_OQ), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU), 0.0f, __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU), 0.0f));
+TEST_CONSTEXPR(match_m256(_mm256_cmp_ps(((__m256){1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f}), ((__m256){2.0f, 2.0f, 3.0f, 3.0f, 6.0f, 6.0f, 7.0f, 7.0f}), _CMP_LE_OQ), ALL_ONES_F, ALL_ONES_F, ALL_ONES_F, 0.0f, ALL_ONES_F, ALL_ONES_F, ALL_ONES_F, 0.0f));
__m256 test_mm256_cmp_ps_unord_s(__m256 a, __m256 b) {
// CHECK-LABEL: test_mm256_cmp_ps_unord_s
@@ -683,28 +683,28 @@ __m256 test_mm256_cmp_ps_neq_us(__m256 a, __m256 b) {
// CHECK: fcmp une <8 x float> %{{.*}}, %{{.*}}
return _mm256_cmp_ps(a, b, _CMP_NEQ_US);
}
-TEST_CONSTEXPR(match_m256(_mm256_cmp_ps(((__m256){1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f}), ((__m256){1.0f, 2.5f, 3.0f, 4.5f, 5.0f, 6.5f, 7.0f, 8.5f}), _CMP_NEQ_US), 0.0f, __builtin_bit_cast(float, 0xFFFFFFFFU), 0.0f, __builtin_bit_cast(float, 0xFFFFFFFFU), 0.0f, __builtin_bit_cast(float, 0xFFFFFFFFU), 0.0f, __builtin_bit_cast(float, 0xFFFFFFFFU)));
+TEST_CONSTEXPR(match_m256(_mm256_cmp_ps(((__m256){1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f}), ((__m256){1.0f, 2.5f, 3.0f, 4.5f, 5.0f, 6.5f, 7.0f, 8.5f}), _CMP_NEQ_US), 0.0f, ALL_ONES_F, 0.0f, ALL_ONES_F, 0.0f, ALL_ONES_F, 0.0f, ALL_ONES_F));
__m256 test_mm256_cmp_ps_nlt_uq(__m256 a, __m256 b) {
// CHECK-LABEL: test_mm256_cmp_ps_nlt_uq
// CHECK: fcmp uge <8 x float> %{{.*}}, %{{.*}}
return _mm256_cmp_ps(a, b, _CMP_NLT_UQ);
}
-TEST_CONSTEXPR(match_m256(_mm256_cmp_ps(((__m256){2.0f, 2.0f, 4.0f, 4.0f, 6.0f, 6.0f, 8.0f, 8.0f}), ((__m256){1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f}), _CMP_NLT_UQ), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU)));
+TEST_CONSTEXPR(match_m256(_mm256_cmp_ps(((__m256){2.0f, 2.0f, 4.0f, 4.0f, 6.0f, 6.0f, 8.0f, 8.0f}), ((__m256){1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f}), _CMP_NLT_UQ), ALL_ONES_F, ALL_ONES_F, ALL_ONES_F, ALL_ONES_F, ALL_ONES_F, ALL_ONES_F, ALL_ONES_F, ALL_ONES_F));
__m256 test_mm256_cmp_ps_nle_uq(__m256 a, __m256 b) {
// CHECK-LABEL: test_mm256_cmp_ps_nle_uq
// CHECK: fcmp ugt <8 x float> %{{.*}}, %{{.*}}
return _mm256_cmp_ps(a, b, _CMP_NLE_UQ);
}
-TEST_CONSTEXPR(match_m256(_mm256_cmp_ps(((__m256){2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f}), ((__m256){1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f}), _CMP_NLE_UQ), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU)));
+TEST_CONSTEXPR(match_m256(_mm256_cmp_ps(((__m256){2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f}), ((__m256){1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f}), _CMP_NLE_UQ), ALL_ONES_F, ALL_ONES_F, ALL_ONES_F, ALL_ONES_F, ALL_ONES_F, ALL_ONES_F, ALL_ONES_F, ALL_ONES_F));
__m256 test_mm256_cmp_ps_ord_s(__m256 a, __m256 b) {
// CHECK-LABEL: test_mm256_cmp_ps_ord_s
// CHECK: fcmp ord <8 x float> %{{.*}}, %{{.*}}
return _mm256_cmp_ps(a, b, _CMP_ORD_S);
}
-TEST_CONSTEXPR(match_m256(_mm256_cmp_ps(((__m256){1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f}), ((__m256){1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f}), _CMP_ORD_S), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU)));
+TEST_CONSTEXPR(match_m256(_mm256_cmp_ps(((__m256){1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f}), ((__m256){1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f}), _CMP_ORD_S), ALL_ONES_F, ALL_ONES_F, ALL_ONES_F, ALL_ONES_F, ALL_ONES_F, ALL_ONES_F, ALL_ONES_F, ALL_ONES_F));
TEST_CONSTEXPR(match_m256(_mm256_cmp_ps(((__m256){__builtin_nanf(""), 2.0f, 3.0f, __builtin_nanf(""), 5.0f, 6.0f, 7.0f, 8.0f}),
((__m256){1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f}),
_CMP_ORD_S),
@@ -715,21 +715,21 @@ __m256 test_mm256_cmp_ps_eq_us(__m256 a, __m256 b) {
// CHECK: fcmp ueq <8 x float> %{{.*}}, %{{.*}}
return _mm256_cmp_ps(a, b, _CMP_EQ_US);
}
-TEST_CONSTEXPR(match_m256(_mm256_cmp_ps(((__m256){1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f}), ((__m256){1.0f, 2.5f, 3.0f, 4.5f, 5.0f, 6.5f, 7.0f, 8.5f}), _CMP_EQ_US), __builtin_bit_cast(float, 0xFFFFFFFFU), 0.0f, __builtin_bit_cast(float, 0xFFFFFFFFU), 0.0f, __builtin_bit_cast(float, 0xFFFFFFFFU), 0.0f, __builtin_bit_cast(float, 0xFFFFFFFFU), 0.0f));
+TEST_CONSTEXPR(match_m256(_mm256_cmp_ps(((__m256){1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f}), ((__m256){1.0f, 2.5f, 3.0f, 4.5f, 5.0f, 6.5f, 7.0f, 8.5f}), _CMP_EQ_US), ALL_ONES_F, 0.0f, ALL_ONES_F, 0.0f, ALL_ONES_F, 0.0f, ALL_ONES_F, 0.0f));
__m256 test_mm256_cmp_ps_nge_uq(__m256 a, __m256 b) {
// CHECK-LABEL: test_mm256_cmp_ps_nge_uq
// CHECK: fcmp ult <8 x float> %{{.*}}, %{{.*}}
return _mm256_cmp_ps(a, b, _CMP_NGE_UQ);
}
-TEST_CONSTEXPR(match_m256(_mm256_cmp_ps(((__m256){1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f}), ((__m256){2.0f, 2.0f, 4.0f, 4.0f, 6.0f, 6.0f, 8.0f, 8.0f}), _CMP_NGE_UQ), __builtin_bit_cast(float, 0xFFFFFFFFU), 0.0f, __builtin_bit_cast(float, 0xFFFFFFFFU), 0.0f, __builtin_bit_cast(float, 0xFFFFFFFFU), 0.0f, __builtin_bit_cast(float, 0xFFFFFFFFU), 0.0f));
+TEST_CONSTEXPR(match_m256(_mm256_cmp_ps(((__m256){1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f}), ((__m256){2.0f, 2.0f, 4.0f, 4.0f, 6.0f, 6.0f, 8.0f, 8.0f}), _CMP_NGE_UQ), ALL_ONES_F, 0.0f, ALL_ONES_F, 0.0f, ALL_ONES_F, 0.0f, ALL_ONES_F, 0.0f));
__m256 test_mm256_cmp_ps_ngt_uq(__m256 a, __m256 b) {
// CHECK-LABEL: test_mm256_cmp_ps_ngt_uq
// CHECK: fcmp ule <8 x float> %{{.*}}, %{{.*}}
return _mm256_cmp_ps(a, b, _CMP_NGT_UQ);
}
-TEST_CONSTEXPR(match_m256(_mm256_cmp_ps(((__m256){1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f}), ((__m256){2.0f, 2.0f, 3.0f, 3.0f, 6.0f, 6.0f, 7.0f, 7.0f}), _CMP_NGT_UQ), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU), 0.0f, __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU), 0.0f));
+TEST_CONSTEXPR(match_m256(_mm256_cmp_ps(((__m256){1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f}), ((__m256){2.0f, 2.0f, 3.0f, 3.0f, 6.0f, 6.0f, 7.0f, 7.0f}), _CMP_NGT_UQ), ALL_ONES_F, ALL_ONES_F, ALL_ONES_F, 0.0f, ALL_ONES_F, ALL_ONES_F, ALL_ONES_F, 0.0f));
__m256 test_mm256_cmp_ps_false_os(__m256 a, __m256 b) {
// CHECK-LABEL: test_mm256_cmp_ps_false_os
@@ -743,172 +743,289 @@ __m256 test_mm256_cmp_ps_neq_os(__m256 a, __m256 b) {
// CHECK: fcmp one <8 x float> %{{.*}}, %{{.*}}
return _mm256_cmp_ps(a, b, _CMP_NEQ_OS);
}
-TEST_CONSTEXPR(match_m256(_mm256_cmp_ps(((__m256){1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f}), ((__m256){1.0f, 2.5f, 3.0f, 4.5f, 5.0f, 6.5f, 7.0f, 8.5f}), _CMP_NEQ_OS), 0.0f, __builtin_bit_cast(float, 0xFFFFFFFFU), 0.0f, __builtin_bit_cast(float, 0xFFFFFFFFU), 0.0f, __builtin_bit_cast(float, 0xFFFFFFFFU), 0.0f, __builtin_bit_cast(float, 0xFFFFFFFFU)));
+TEST_CONSTEXPR(match_m256(_mm256_cmp_ps(((__m256){1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f}), ((__m256){1.0f, 2.5f, 3.0f, 4.5f, 5.0f, 6.5f, 7.0f, 8.5f}), _CMP_NEQ_OS), 0.0f, ALL_ONES_F, 0.0f, ALL_ONES_F, 0.0f, ALL_ONES_F, 0.0f, ALL_ONES_F));
__m256 test_mm256_cmp_ps_ge_oq(__m256 a, __m256 b) {
// CHECK-LABEL: test_mm256_cmp_ps_ge_oq
// CHECK: fcmp oge <8 x float> %{{.*}}, %{{.*}}
return _mm256_cmp_ps(a, b, _CMP_GE_OQ);
}
-TEST_CONSTEXPR(match_m256(_mm256_cmp_ps(((__m256){2.0f, 2.0f, 4.0f, 4.0f, 6.0f, 6.0f, 8.0f, 8.0f}), ((__m256){1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f}), _CMP_GE_OQ), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU)));
+TEST_CONSTEXPR(match_m256(_mm256_cmp_ps(((__m256){2.0f, 2.0f, 4.0f, 4.0f, 6.0f, 6.0f, 8.0f, 8.0f}), ((__m256){1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f}), _CMP_GE_OQ), ALL_ONES_F, ALL_ONES_F, ALL_ONES_F, ALL_ONES_F, ALL_ONES_F, ALL_ONES_F, ALL_ONES_F, ALL_ONES_F));
__m256 test_mm256_cmp_ps_gt_oq(__m256 a, __m256 b) {
// CHECK-LABEL: test_mm256_cmp_ps_gt_oq
// CHECK: fcmp ogt <8 x float> %{{.*}}, %{{.*}}
return _mm256_cmp_ps(a, b, _CMP_GT_OQ);
}
-TEST_CONSTEXPR(match_m256(_mm256_cmp_ps(((__m256){2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f}), ((__m256){1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f}), _CMP_GT_OQ), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU)));
+TEST_CONSTEXPR(match_m256(_mm256_cmp_ps(((__m256){2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f}), ((__m256){1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f}), _CMP_GT_OQ), ALL_ONES_F, ALL_ONES_F, ALL_ONES_F, ALL_ONES_F, ALL_ONES_F, ALL_ONES_F, ALL_ONES_F, ALL_ONES_F));
__m256 test_mm256_cmp_ps_true_us(__m256 a, __m256 b) {
// CHECK-LABEL: test_mm256_cmp_ps_true_us
// CHECK: fcmp true <8 x float> %{{.*}}, %{{.*}}
return _mm256_cmp_ps(a, b, _CMP_TRUE_US);
}
-TEST_CONSTEXPR(match_m256(_mm256_cmp_ps(((__m256){1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f}), ((__m256){9.0f, 8.0f, 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f}), _CMP_TRUE_US), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU), __builtin_bit_cast(float, 0xFFFFFFFFU)));
+TEST_CONSTEXPR(match_m256(_mm256_cmp_ps(((__m256){1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f}), ((__m256){9.0f, 8.0f, 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f}), _CMP_TRUE_US), ALL_ONES_F, ALL_ONES_F, ALL_ONES_F, ALL_ONES_F, ALL_ONES_F, ALL_ONES_F, ALL_ONES_F, ALL_ONES_F));
+
+#define CMP256_PS_CASE(A, B, PRED, RES) \
+ TEST_CONSTEXPR(match_m256(_mm256_cmp_ps(((__m256){A, A, A, A, A, A, A, A}), \
+ ((__m256){B, B, B, B, B, B, B, B}), PRED), \
+ RES, RES, RES, RES, RES, RES, RES, RES));
+
+CMP256_PS_CASE(+1.0f, +1.0f, _CMP_EQ_OQ, ALL_ONES_F)
+CMP256_PS_CASE(+1.0f, +2.0f, _CMP_EQ_OQ, +0.0f)
+CMP256_PS_CASE(+1.0f, +2.0f, _CMP_LT_OS, ALL_ONES_F)
+CMP256_PS_CASE(+2.0f, +1.0f, _CMP_LT_OS, +0.0f)
+CMP256_PS_CASE(+1.0f, +1.0f, _CMP_LE_OS, ALL_ONES_F)
+CMP256_PS_CASE(+2.0f, +1.0f, _CMP_LE_OS, +0.0f)
+CMP256_PS_CASE(+1.0f, +2.0f, _CMP_UNORD_Q, +0.0f)
+CMP256_PS_CASE(__builtin_nanf(""), +2.0f, _CMP_UNORD_Q, ALL_ONES_F)
+CMP256_PS_CASE(+1.0f, +2.0f, _CMP_NEQ_UQ, ALL_ONES_F)
+CMP256_PS_CASE(+1.0f, +1.0f, _CMP_NEQ_UQ, +0.0f)
+CMP256_PS_CASE(+2.0f, +1.0f, _CMP_NLT_US, ALL_ONES_F)
+CMP256_PS_CASE(+1.0f, +2.0f, _CMP_NLT_US, +0.0f)
+CMP256_PS_CASE(+2.0f, +1.0f, _CMP_NLE_US, ALL_ONES_F)
+CMP256_PS_CASE(+1.0f, +2.0f, _CMP_NLE_US, +0.0f)
+CMP256_PS_CASE(+1.0f, +2.0f, _CMP_ORD_Q, ALL_ONES_F)
+CMP256_PS_CASE(__builtin_nanf(""), +2.0f, _CMP_ORD_Q, +0.0f)
+
+CMP256_PS_CASE(+1.0f, +1.0f, _CMP_EQ_UQ, ALL_ONES_F)
+CMP256_PS_CASE(+1.0f, +2.0f, _CMP_EQ_UQ, +0.0f)
+CMP256_PS_CASE(+1.0f, +2.0f, _CMP_NGE_US, ALL_ONES_F)
+CMP256_PS_CASE(+2.0f, +1.0f, _CMP_NGE_US, +0.0f)
+CMP256_PS_CASE(+1.0f, +2.0f, _CMP_NGT_US, ALL_ONES_F)
+CMP256_PS_CASE(+2.0f, +1.0f, _CMP_NGT_US, +0.0f)
+CMP256_PS_CASE(+1.0f, +2.0f, _CMP_FALSE_OQ, +0.0f)
+CMP256_PS_CASE(+1.0f, +2.0f, _CMP_NEQ_OQ, ALL_ONES_F)
+CMP256_PS_CASE(+1.0f, +1.0f, _CMP_NEQ_OQ, +0.0f)
+CMP256_PS_CASE(+2.0f, +1.0f, _CMP_GE_OS, ALL_ONES_F)
+CMP256_PS_CASE(+1.0f, +2.0f, _CMP_GE_OS, +0.0f)
+CMP256_PS_CASE(+2.0f, +1.0f, _CMP_GT_OS, ALL_ONES_F)
+CMP256_PS_CASE(+1.0f, +2.0f, _CMP_GT_OS, +0.0f)
+CMP256_PS_CASE(+1.0f, +2.0f, _CMP_TRUE_UQ, ALL_ONES_F)
+
+CMP256_PS_CASE(+1.0f, +1.0f, _CMP_EQ_OS, ALL_ONES_F)
+CMP256_PS_CASE(+1.0f, +2.0f, _CMP_EQ_OS, +0.0f)
+CMP256_PS_CASE(+1.0f, +2.0f, _CMP_LT_OQ, ALL_ONES_F)
+CMP256_PS_CASE(+2.0f, +1.0f, _CMP_LT_OQ, +0.0f)
+CMP256_PS_CASE(+1.0f, +1.0f, _CMP_LE_OQ, ALL_ONES_F)
+CMP256_PS_CASE(+2.0f, +1.0f, _CMP_LE_OQ, +0.0f)
+CMP256_PS_CASE(+1.0f, +2.0f, _CMP_UNORD_S, +0.0f)
+CMP256_PS_CASE(__builtin_nanf(""), +2.0f, _CMP_UNORD_S, ALL_ONES_F)
+CMP256_PS_CASE(+1.0f, +2.0f, _CMP_NEQ_US, ALL_ONES_F)
+CMP256_PS_CASE(+1.0f, +1.0f, _CMP_NEQ_US, +0.0f)
+CMP256_PS_CASE(+2.0f, +1.0f, _CMP_NLT_UQ, ALL_ONES_F)
+CMP256_PS_CASE(+1.0f, +2.0f, _CMP_NLT_UQ, +0.0f)
+CMP256_PS_CASE(+2.0f, +1.0f, _CMP_NLE_UQ, ALL_ONES_F)
+CMP256_PS_CASE(+1.0f, +2.0f, _CMP_NLE_UQ, +0.0f)
+CMP256_PS_CASE(+1.0f, +2.0f, _CMP_ORD_S, ALL_ONES_F)
+CMP256_PS_CASE(__builtin_nanf(""), +2.0f, _CMP_ORD_S, +0.0f)
+
+CMP256_PS_CASE(+1.0f, +1.0f, _CMP_EQ_US, ALL_ONES_F)
+CMP256_PS_CASE(+1.0f, +2.0f, _CMP_EQ_US, +0.0f)
+CMP256_PS_CASE(+1.0f, +2.0f, _CMP_NGE_UQ, ALL_ONES_F)
+CMP256_PS_CASE(+2.0f, +1.0f, _CMP_NGE_UQ, +0.0f)
+CMP256_PS_CASE(+1.0f, +2.0f, _CMP_NGT_UQ, ALL_ONES_F)
+CMP256_PS_CASE(+2.0f, +1.0f, _CMP_NGT_UQ, +0.0f)
+CMP256_PS_CASE(+1.0f, +2.0f, _CMP_FALSE_OS, +0.0f)
+CMP256_PS_CASE(+1.0f, +2.0f, _CMP_NEQ_OS, ALL_ONES_F)
+CMP256_PS_CASE(+1.0f, +1.0f, _CMP_NEQ_OS, +0.0f)
+CMP256_PS_CASE(+2.0f, +1.0f, _CMP_GE_OQ, ALL_ONES_F)
+CMP256_PS_CASE(+1.0f, +2.0f, _CMP_GE_OQ, +0.0f)
+CMP256_PS_CASE(+2.0f, +1.0f, _CMP_GT_OQ, ALL_ONES_F)
+CMP256_PS_CASE(+1.0f, +2.0f, _CMP_GT_OQ, +0.0f)
+CMP256_PS_CASE(+1.0f, +2.0f, _CMP_TRUE_US, ALL_ONES_F)
+
+#undef CMP256_PS_CASE
__m128d test_mm_cmp_pd_eq_uq(__m128d a, __m128d b) {
// CHECK-LABEL: test_mm_cmp_pd_eq_uq
// CHECK: fcmp ueq <2 x double> %{{.*}}, %{{.*}}
return _mm_cmp_pd(a, b, _CMP_EQ_UQ);
}
+TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+2.0, +3.0}), ((__m128d){+2.0, +3.0}), _CMP_EQ_UQ), ALL_ONES_D, ALL_ONES_D));
+TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+1.0, +2.0}), ((__m128d){+2.0, +3.0}), _CMP_EQ_UQ), 0.0, 0.0));
__m128d test_mm_cmp_pd_nge_us(__m128d a, __m128d b) {
// CHECK-LABEL: test_mm_cmp_pd_nge_us
// CHECK: fcmp ult <2 x double> %{{.*}}, %{{.*}}
return _mm_cmp_pd(a, b, _CMP_NGE_US);
}
+TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+1.0, +1.0}), ((__m128d){+2.0, +3.0}), _CMP_NGE_US), ALL_ONES_D, ALL_ONES_D));
+TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+3.0, +4.0}), ((__m128d){+2.0, +3.0}), _CMP_NGE_US), 0.0, 0.0));
__m128d test_mm_cmp_pd_ngt_us(__m128d a, __m128d b) {
// CHECK-LABEL: test_mm_cmp_pd_ngt_us
// CHECK: fcmp ule <2 x double> %{{.*}}, %{{.*}}
return _mm_cmp_pd(a, b, _CMP_NGT_US);
}
+TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+1.0, +1.0}), ((__m128d){+2.0, +3.0}), _CMP_NGT_US), ALL_ONES_D, ALL_ONES_D));
+TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+3.0, +4.0}), ((__m128d){+2.0, +3.0}), _CMP_NGT_US), 0.0, 0.0));
__m128d test_mm_cmp_pd_false_oq(__m128d a, __m128d b) {
// CHECK-LABEL: test_mm_cmp_pd_false_oq
// CHECK: fcmp false <2 x double> %{{.*}}, %{{.*}}
return _mm_cmp_pd(a, b, _CMP_FALSE_OQ);
}
+TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+1.0, +1.0}), ((__m128d){+2.0, +3.0}), _CMP_FALSE_OQ), 0.0, 0.0));
+TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+1.0, +1.0}), ((__m128d){+1.0, +1.0}), _CMP_FALSE_OQ), 0.0, 0.0));
__m128d test_mm_cmp_pd_neq_oq(__m128d a, __m128d b) {
// CHECK-LABEL: test_mm_cmp_pd_neq_oq
// CHECK: fcmp one <2 x double> %{{.*}}, %{{.*}}
return _mm_cmp_pd(a, b, _CMP_NEQ_OQ);
}
+TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+1.0, +3.0}), ((__m128d){+2.0, +3.0}), _CMP_NEQ_OQ), ALL_ONES_D, 0.0));
+TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+1.0, +2.0}), ((__m128d){+2.0, +3.0}), _CMP_NEQ_OQ), ALL_ONES_D, ALL_ONES_D));
__m128d test_mm_cmp_pd_ge_os(__m128d a, __m128d b) {
// CHECK-LABEL: test_mm_cmp_pd_ge_os
// CHECK: fcmp oge <2 x double> %{{.*}}, %{{.*}}
return _mm_cmp_pd(a, b, _CMP_GE_OS);
}
+TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+3.0, +4.0}), ((__m128d){+2.0, +3.0}), _CMP_GE_OS), ALL_ONES_D, ALL_ONES_D));
+TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+1.0, +2.0}), ((__m128d){+2.0, +3.0}), _CMP_GE_OS), 0.0, 0.0));
__m128d test_mm_cmp_pd_gt_os(__m128d a, __m128d b) {
// CHECK-LABEL: test_mm_cmp_pd_gt_os
// CHECK: fcmp ogt <2 x double> %{{.*}}, %{{.*}}
return _mm_cmp_pd(a, b, _CMP_GT_OS);
}
+TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+3.0, +4.0}), ((__m128d){+2.0, +3.0}), _CMP_GT_OS), ALL_ONES_D, ALL_ONES_D));
+TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+1.0, +2.0}), ((__m128d){+2.0, +3.0}), _CMP_GT_OS), 0.0, 0.0));
__m128d test_mm_cmp_pd_true_uq(__m128d a, __m128d b) {
// CHECK-LABEL: test_mm_cmp_pd_true_uq
// CHECK: fcmp true <2 x double> %{{.*}}, %{{.*}}
return _mm_cmp_pd(a, b, _CMP_TRUE_UQ);
}
+TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+1.0, +1.0}), ((__m128d){+2.0, +3.0}), _CMP_TRUE_UQ), ALL_ONES_D, ALL_ONES_D));
+TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+1.0, +1.0}), ((__m128d){+1.0, +1.0}), _CMP_TRUE_UQ), ALL_ONES_D, ALL_ONES_D));
__m128d test_mm_cmp_pd_eq_os(__m128d a, __m128d b) {
// CHECK-LABEL: test_mm_cmp_pd_eq_os
// CHECK: fcmp oeq <2 x double> %{{.*}}, %{{.*}}
return _mm_cmp_pd(a, b, _CMP_EQ_OS);
}
+TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+2.0, +3.0}), ((__m128d){+2.0, +3.0}), _CMP_EQ_OS), ALL_ONES_D, ALL_ONES_D));
+TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+1.0, +2.0}), ((__m128d){+2.0, +3.0}), _CMP_EQ_OS), 0.0, 0.0));
__m128d test_mm_cmp_pd_lt_oq(__m128d a, __m128d b) {
// CHECK-LABEL: test_mm_cmp_pd_lt_oq
// CHECK: fcmp olt <2 x double> %{{.*}}, %{{.*}}
return _mm_cmp_pd(a, b, _CMP_LT_OQ);
}
+TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+1.0, +1.0}), ((__m128d){+2.0, +3.0}), _CMP_LT_OQ), ALL_ONES_D, ALL_ONES_D));
+TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+3.0, +4.0}), ((__m128d){+2.0, +3.0}), _CMP_LT_OQ), 0.0, 0.0));
__m128d test_mm_cmp_pd_le_oq(__m128d a, __m128d b) {
// CHECK-LABEL: test_mm_cmp_pd_le_oq
// CHECK: fcmp ole <2 x double> %{{.*}}, %{{.*}}
return _mm_cmp_pd(a, b, _CMP_LE_OQ);
}
+TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+1.0, +1.0}), ((__m128d){+2.0, +3.0}), _CMP_LE_OQ), ALL_ONES_D, ALL_ONES_D));
+TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+3.0, +4.0}), ((__m128d){+2.0, +3.0}), _CMP_LE_OQ), 0.0, 0.0));
__m128d test_mm_cmp_pd_unord_s(__m128d a, __m128d b) {
// CHECK-LABEL: test_mm_cmp_pd_unord_s
// CHECK: fcmp uno <2 x double> %{{.*}}, %{{.*}}
return _mm_cmp_pd(a, b, _CMP_UNORD_S);
}
+TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+1.0, +2.0}), ((__m128d){+2.0, +3.0}), _CMP_UNORD_S), 0.0, 0.0));
__m128d test_mm_cmp_pd_neq_us(__m128d a, __m128d b) {
// CHECK-LABEL: test_mm_cmp_pd_neq_us
// CHECK: fcmp une <2 x double> %{{.*}}, %{{.*}}
return _mm_cmp_pd(a, b, _CMP_NEQ_US);
}
+TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+1.0, +3.0}), ((__m128d){+2.0, +3.0}), _CMP_NEQ_US), ALL_ONES_D, 0.0));
+TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+1.0, +2.0}), ((__m128d){+2.0, +3.0}), _CMP_NEQ_US), ALL_ONES_D, ALL_ONES_D));
__m128d test_mm_cmp_pd_nlt_uq(__m128d a, __m128d b) {
// CHECK-LABEL: test_mm_cmp_pd_nlt_uq
// CHECK: fcmp uge <2 x double> %{{.*}}, %{{.*}}
return _mm_cmp_pd(a, b, _CMP_NLT_UQ);
}
+TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+3.0, +4.0}), ((__m128d){+2.0, +3.0}), _CMP_NLT_UQ), ALL_ONES_D, ALL_ONES_D));
+TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+1.0, +2.0}), ((__m128d){+2.0, +3.0}), _CMP_NLT_UQ), 0.0, 0.0));
__m128d test_mm_cmp_pd_nle_uq(__m128d a, __m128d b) {
// CHECK-LABEL: test_mm_cmp_pd_nle_uq
// CHECK: fcmp ugt <2 x double> %{{.*}}, %{{.*}}
return _mm_cmp_pd(a, b, _CMP_NLE_UQ);
}
+TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+3.0, +4.0}), ((__m128d){+2.0, +3.0}), _CMP_NLE_UQ), ALL_ONES_D, ALL_ONES_D));
+TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+1.0, +2.0}), ((__m128d){+2.0, +3.0}), _CMP_NLE_UQ), 0.0, 0.0));
__m128d test_mm_cmp_pd_ord_s(__m128d a, __m128d b) {
// CHECK-LABEL: test_mm_cmp_pd_ord_s
// CHECK: fcmp ord <2 x double> %{{.*}}, %{{.*}}
return _mm_cmp_pd(a, b, _CMP_ORD_S);
}
+TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+1.0, +2.0}), ((__m128d){+2.0, +3.0}), _CMP_ORD_S), ALL_ONES_D, ALL_ONES_D));
__m128d test_mm_cmp_pd_eq_us(__m128d a, __m128d b) {
// CHECK-LABEL: test_mm_cmp_pd_eq_us
// CHECK: fcmp ueq <2 x double> %{{.*}}, %{{.*}}
return _mm_cmp_pd(a, b, _CMP_EQ_US);
}
+TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+2.0, +3.0}), ((__m128d){+2.0, +3.0}), _CMP_EQ_US), ALL_ONES_D, ALL_ONES_D));
+TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+1.0, +2.0}), ((__m128d){+2.0, +3.0}), _CMP_EQ_US), 0.0, 0.0));
__m128d test_mm_cmp_pd_nge_uq(__m128d a, __m128d b) {
// CHECK-LABEL: test_mm_cmp_pd_nge_uq
// CHECK: fcmp ult <2 x double> %{{.*}}, %{{.*}}
return _mm_cmp_pd(a, b, _CMP_NGE_UQ);
}
+TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+1.0, +1.0}), ((__m128d){+2.0, +3.0}), _CMP_NGE_UQ), ALL_ONES_D, ALL_ONES_D));
+TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+3.0, +4.0}), ((__m128d){+2.0, +3.0}), _CMP_NGE_UQ), 0.0, 0.0));
__m128d test_mm_cmp_pd_ngt_uq(__m128d a, __m128d b) {
// CHECK-LABEL: test_mm_cmp_pd_ngt_uq
// CHECK: fcmp ule <2 x double> %{{.*}}, %{{.*}}
return _mm_cmp_pd(a, b, _CMP_NGT_UQ);
}
+TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+1.0, +1.0}), ((__m128d){+2.0, +3.0}), _CMP_NGT_UQ), ALL_ONES_D, ALL_ONES_D));
+TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+3.0, +4.0}), ((__m128d){+2.0, +3.0}), _CMP_NGT_UQ), 0.0, 0.0));
__m128d test_mm_cmp_pd_false_os(__m128d a, __m128d b) {
// CHECK-LABEL: test_mm_cmp_pd_false_os
// CHECK: fcmp false <2 x double> %{{.*}}, %{{.*}}
return _mm_cmp_pd(a, b, _CMP_FALSE_OS);
}
+TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+1.0, +1.0}), ((__m128d){+2.0, +3.0}), _CMP_FALSE_OS), 0.0, 0.0));
+TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+1.0, +1.0}), ((__m128d){+1.0, +1.0}), _CMP_FALSE_OS), 0.0, 0.0));
__m128d test_mm_cmp_pd_neq_os(__m128d a, __m128d b) {
// CHECK-LABEL: test_mm_cmp_pd_neq_os
// CHECK: fcmp one <2 x double> %{{.*}}, %{{.*}}
return _mm_cmp_pd(a, b, _CMP_NEQ_OS);
}
+TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+1.0, +3.0}), ((__m128d){+2.0, +3.0}), _CMP_NEQ_OS), ALL_ONES_D, 0.0));
+TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+1.0, +2.0}), ((__m128d){+2.0, +3.0}), _CMP_NEQ_OS), ALL_ONES_D, ALL_ONES_D));
__m128d test_mm_cmp_pd_ge_oq(__m128d a, __m128d b) {
// CHECK-LABEL: test_mm_cmp_pd_ge_oq
// CHECK: fcmp oge <2 x double> %{{.*}}, %{{.*}}
return _mm_cmp_pd(a, b, _CMP_GE_OQ);
}
+TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+3.0, +4.0}), ((__m128d){+2.0, +3.0}), _CMP_GE_OQ), ALL_ONES_D, ALL_ONES_D));
+TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+1.0, +2.0}), ((__m128d){+2.0, +3.0}), _CMP_GE_OQ), 0.0, 0.0));
__m128d test_mm_cmp_pd_gt_oq(__m128d a, __m128d b) {
// CHECK-LABEL: test_mm_cmp_pd_gt_oq
// CHECK: fcmp ogt <2 x double> %{{.*}}, %{{.*}}
return _mm_cmp_pd(a, b, _CMP_GT_OQ);
}
+TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+3.0, +4.0}), ((__m128d){+2.0, +3.0}), _CMP_GT_OQ), ALL_ONES_D, ALL_ONES_D));
+TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+1.0, +2.0}), ((__m128d){+2.0, +3.0}), _CMP_GT_OQ), 0.0, 0.0));
__m128d test_mm_cmp_pd_true_us(__m128d a, __m128d b) {
// CHECK-LABEL: test_mm_cmp_pd_true_us
// CHECK: fcmp true <2 x double> %{{.*}}, %{{.*}}
return _mm_cmp_pd(a, b, _CMP_TRUE_US);
}
+TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+1.0, +1.0}), ((__m128d){+2.0, +3.0}), _CMP_TRUE_US), ALL_ONES_D, ALL_ONES_D));
+TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+1.0, +1.0}), ((__m128d){+1.0, +1.0}), _CMP_TRUE_US), ALL_ONES_D, ALL_ONES_D));
__m128 test_mm_cmp_ps_eq_uq(__m128 a, __m128 b) {
// CHECK-LABEL: test_mm_cmp_ps_eq_uq
diff --git a/clang/test/CodeGen/X86/sse-builtins.c b/clang/test/CodeGen/X86/sse-builtins.c
index 991a496999f71..baea2e8718f18 100644
--- a/clang/test/CodeGen/X86/sse-builtins.c
+++ b/clang/test/CodeGen/X86/sse-builtins.c
@@ -51,48 +51,95 @@ __m128 test_mm_cmp_ps_eq_oq(__m128 a, __m128 b) {
// CHECK: fcmp oeq <4 x float> %{{.*}}, %{{.*}}
return _mm_cmp_ps(a, b, _CMP_EQ_OQ);
}
+TEST_CONSTEXPR(match_m128(_mm_cmp_ps(((__m128){+1.0f, +2.0f, +3.0f, +4.0f}), ((__m128){+1.0f, +2.0f, +3.0f, +4.0f}), _CMP_EQ_OQ),
+ ALL_ONES_F, ALL_ONES_F, ALL_ONES_F, ALL_ONES_F));
+TEST_CONSTEXPR(match_m128(_mm_cmp_ps(((__m128){+1.0f, +2.0f, __builtin_nanf(""), +4.0f}), ((__m128){+1.0f, +3.0f, +5.0f, __builtin_nanf("")}), _CMP_EQ_OQ),
+ ALL_ONES_F, +0.0f, +0.0f, +0.0f));
__m128 test_mm_cmp_ps_lt_os(__m128 a, __m128 b) {
// CHECK-LABEL: test_mm_cmp_ps_lt_os
// CHECK: fcmp olt <4 x float> %{{.*}}, %{{.*}}
return _mm_cmp_ps(a, b, _CMP_LT_OS);
}
+TEST_CONSTEXPR(match_m128(_mm_cmp_ps(((__m128){+1.0f, +2.0f, +3.0f, +4.0f}), ((__m128){+2.0f, +3.0f, +4.0f, +5.0f}), _CMP_LT_OS),
+ ALL_ONES_F, ALL_ONES_F, ALL_ONES_F, ALL_ONES_F));
+TEST_CONSTEXPR(match_m128(_mm_cmp_ps(((__m128){+3.0f, +4.0f, +5.0f, +6.0f}), ((__m128){+2.0f, +3.0f, +4.0f, +5.0f}), _CMP_LT_OS),
+ +0.0f, +0.0f, +0.0f, +0.0f));
+TEST_CONSTEXPR(match_m128(_mm_cmp_ps(((__m128){+1.0f, +2.0f, __builtin_nanf(""), +4.0f}), ((__m128){+1.0f, +3.0f, +5.0f, __builtin_nanf("")}), _CMP_LT_OS),
+ +0.0f, ALL_ONES_F, +0.0f, +0.0f));
__m128 test_mm_cmp_ps_le_os(__m128 a, __m128 b) {
// CHECK-LABEL: test_mm_cmp_ps_le_os
// CHECK: fcmp ole <4 x float> %{{.*}}, %{{.*}}
return _mm_cmp_ps(a, b, _CMP_LE_OS);
}
+TEST_CONSTEXPR(match_m128(_mm_cmp_ps(((__m128){+1.0f, +2.0f, +3.0f, +4.0f}), ((__m128){+1.0f, +3.0f, +3.0f, +5.0f}), _CMP_LE_OS),
+ ALL_ONES_F, ALL_ONES_F, ALL_ONES_F, ALL_ONES_F));
+TEST_CONSTEXPR(match_m128(_mm_cmp_ps(((__m128){+3.0f, +4.0f, +5.0f, +6.0f}), ((__m128){+2.0f, +3.0f, +4.0f, +5.0f}), _CMP_LE_OS),
+ +0.0f, +0.0f, +0.0f, +0.0f));
+TEST_CONSTEXPR(match_m128(_mm_cmp_ps(((__m128){+1.0f, +2.0f, __builtin_nanf(""), +4.0f}), ((__m128){+1.0f, +3.0f, +5.0f, __builtin_nanf("")}), _CMP_LE_OS),
+ ALL_ONES_F, ALL_ONES_F, +0.0f, +0.0f));
__m128 test_mm_cmp_ps_unord_q(__m128 a, __m128 b) {
// CHECK-LABEL: test_mm_cmp_ps_unord_q
// CHECK: fcmp uno <4 x float> %{{.*}}, %{{.*}}
return _mm_cmp_ps(a, b, _CMP_UNORD_Q);
}
+TEST_CONSTEXPR(match_m128(_mm_cmp_ps(((__m128){+1.0f, +2.0f, +3.0f, +4.0f}), ((__m128){+2.0f, +3.0f, +4.0f, +5.0f}), _CMP_UNORD_Q),
+ +0.0f, +0.0f, +0.0f, +0.0f));
+TEST_CONSTEXPR(match_m128(_mm_cmp_ps(((__m128){__builtin_nanf(""), __builtin_nanf(""), __builtin_nanf(""), __builtin_nanf("")}),
+ ((__m128){+2.0f, +3.0f, +4.0f, +5.0f}), _CMP_UNORD_Q),
+ ALL_ONES_F, ALL_ONES_F, ALL_ONES_F, ALL_ONES_F));
+TEST_CONSTEXPR(match_m128(_mm_cmp_ps(((__m128){+1.0f, +2.0f, __builtin_nanf(""), +4.0f}), ((__m128){+1.0f, +3.0f, +5.0f, __builtin_nanf("")}), _CMP_UNORD_Q),
+ +0.0f, +0.0f, ALL_ONES_F, ALL_ONES_F));
__m128 test_mm_cmp_ps_neq_uq(__m128 a, __m128 b) {
// CHECK-LABEL: test_mm_cmp_ps_neq_uq
// CHECK: fcmp une <4 x float> %{{.*}}, %{{.*}}
return _mm_cmp_ps(a, b, _CMP_NEQ_UQ);
}
+TEST_CONSTEXPR(match_m128(_mm_cmp_ps(((__m128){+1.0f, +2.0f, +3.0f, +4.0f}), ((__m128){+2.0f, +2.0f, +4.0f, +4.0f}), _CMP_NEQ_UQ),
+ ALL_ONES_F, +0.0f, ALL_ONES_F, +0.0f));
+TEST_CONSTEXPR(match_m128(_mm_cmp_ps(((__m128){+1.0f, +2.0f, +3.0f, +4.0f}), ((__m128){+1.0f, +2.0f, +3.0f, +4.0f}), _CMP_NEQ_UQ),
+ +0.0f, +0.0f, +0.0f, +0.0f));
+TEST_CONSTEXPR(match_m128(_mm_cmp_ps(((__m128){+1.0f, +2.0f, __builtin_nanf(""), +4.0f}), ((__m128){+1.0f, +3.0f, +5.0f, __builtin_nanf("")}), _CMP_NEQ_UQ),
+ +0.0f, ALL_ONES_F, ALL_ONES_F, ALL_ONES_F));
__m128 test_mm_cmp_ps_nlt_us(__m128 a, __m128 b) {
// CHECK-LABEL: test_mm_cmp_ps_nlt_us
// CHECK: fcmp uge <4 x float> %{{.*}}, %{{.*}}
return _mm_cmp_ps(a, b, _CMP_NLT_US);
}
+TEST_CONSTEXPR(match_m128(_mm_cmp_ps(((__m128){+3.0f, +2.0f, +5.0f, +4.0f}), ((__m128){+2.0f, +2.0f, +4.0f, +5.0f}), _CMP_NLT_US),
+ ALL_ONES_F, ALL_ONES_F, ALL_ONES_F, +0.0f));
+TEST_CONSTEXPR(match_m128(_mm_cmp_ps(((__m128){+1.0f, +2.0f, +3.0f, +4.0f}), ((__m128){+2.0f, +3.0f, +4.0f, +5.0f}), _CMP_NLT_US),
+ +0.0f, +0.0f, +0.0f, +0.0f));
+TEST_CONSTEXPR(match_m128(_mm_cmp_ps(((__m128){+1.0f, +2.0f, __builtin_nanf(""), +4.0f}), ((__m128){+1.0f, +3.0f, +5.0f, __builtin_nanf("")}), _CMP_NLT_US),
+ ALL_ONES_F, +0.0f, ALL_ONES_F, ALL_ONES_F));
__m128 test_mm_cmp_ps_nle_us(__m128 a, __m128 b) {
// CHECK-LABEL: test_mm_cmp_ps_nle_us
// CHECK: fcmp ugt <4 x float> %{{.*}}, %{{.*}}
return _mm_cmp_ps(a, b, _CMP_NLE_US);
}
+TEST_CONSTEXPR(match_m128(_mm_cmp_ps(((__m128){+3.0f, +2.0f, +5.0f, +4.0f}), ((__m128){+2.0f, +2.0f, +4.0f, +5.0f}), _CMP_NLE_US),
+ ALL_ONES_F, +0.0f, ALL_ONES_F, +0.0f));
+TEST_CONSTEXPR(match_m128(_mm_cmp_ps(((__m128){+1.0f, +2.0f, +3.0f, +4.0f}), ((__m128){+2.0f, +3.0f, +4.0f, +5.0f}), _CMP_NLE_US),
+ +0.0f, +0.0f, +0.0f, +0.0f));
+TEST_CONSTEXPR(match_m128(_mm_cmp_ps(((__m128){+1.0f, +2.0f, __builtin_nanf(""), +4.0f}), ((__m128){+1.0f, +3.0f, +5.0f, __builtin_nanf("")}), _CMP_NLE_US),
+ +0.0f, +0.0f, ALL_ONES_F, ALL_ONES_F));
__m128 test_mm_cmp_ps_ord_q(__m128 a, __m128 b) {
// CHECK-LABEL: test_mm_cmp_ps_ord_q
// CHECK: fcmp ord <4 x float> %{{.*}}, %{{.*}}
return _mm_cmp_ps(a, b, _CMP_ORD_Q);
}
+TEST_CONSTEXPR(match_m128(_mm_cmp_ps(((__m128){+1.0f, +2.0f, +3.0f, +4.0f}), ((__m128){+2.0f, +3.0f, +4.0f, +5.0f}), _CMP_ORD_Q),
+ ALL_ONES_F, ALL_ONES_F, ALL_ONES_F, ALL_ONES_F));
+TEST_CONSTEXPR(match_m128(_mm_cmp_ps(((__m128){__builtin_nanf(""), +2.0f, +3.0f, +4.0f}), ((__m128){+2.0f, +3.0f, +4.0f, +5.0f}), _CMP_ORD_Q),
+ +0.0f, ALL_ONES_F, ALL_ONES_F, ALL_ONES_F));
+TEST_CONSTEXPR(match_m128(_mm_cmp_ps(((__m128){+1.0f, +2.0f, __builtin_nanf(""), +4.0f}), ((__m128){+1.0f, +3.0f, +5.0f, __builtin_nanf("")}), _CMP_ORD_Q),
+ ALL_ONES_F, ALL_ONES_F, +0.0f, +0.0f));
__m128 test_mm_cmp_ss(__m128 A, __m128 B) {
// CHECK-LABEL: test_mm_cmp_ss
@@ -100,6 +147,77 @@ __m128 test_mm_cmp_ss(__m128 A, __m128 B) {
return _mm_cmp_ss(A, B, _CMP_ORD_Q);
}
+#define CMP_SS_CASE(A0, B0, PRED, RES0) \
+ TEST_CONSTEXPR(match_m128(_mm_cmp_ss(((__m128){A0, +9.0f, +8.0f, +7.0f}), \
+ ((__m128){B0, +6.0f, +5.0f, +4.0f}), PRED), \
+ RES0, +9.0f, +8.0f, +7.0f));
+
+CMP_SS_CASE(+1.0f, +2.0f, _CMP_EQ_OQ, +0.0f)
+CMP_SS_CASE(+2.0f, +2.0f, _CMP_EQ_OQ, ALL_ONES_F)
+CMP_SS_CASE(+1.0f, +2.0f, _CMP_LT_OS, ALL_ONES_F)
+CMP_SS_CASE(+3.0f, +2.0f, _CMP_LT_OS, +0.0f)
+CMP_SS_CASE(+1.0f, +2.0f, _CMP_LE_OS, ALL_ONES_F)
+CMP_SS_CASE(+3.0f, +2.0f, _CMP_LE_OS, +0.0f)
+CMP_SS_CASE(+1.0f, +2.0f, _CMP_UNORD_Q, +0.0f)
+CMP_SS_CASE(__builtin_nanf(""), +2.0f, _CMP_UNORD_Q, ALL_ONES_F)
+CMP_SS_CASE(+1.0f, +2.0f, _CMP_NEQ_UQ, ALL_ONES_F)
+CMP_SS_CASE(+2.0f, +2.0f, _CMP_NEQ_UQ, +0.0f)
+CMP_SS_CASE(+3.0f, +2.0f, _CMP_NLT_US, ALL_ONES_F)
+CMP_SS_CASE(+1.0f, +2.0f, _CMP_NLT_US, +0.0f)
+CMP_SS_CASE(+3.0f, +2.0f, _CMP_NLE_US, ALL_ONES_F)
+CMP_SS_CASE(+1.0f, +2.0f, _CMP_NLE_US, +0.0f)
+CMP_SS_CASE(+1.0f, +2.0f, _CMP_ORD_Q, ALL_ONES_F)
+CMP_SS_CASE(__builtin_nanf(""), +2.0f, _CMP_ORD_Q, +0.0f)
+
+CMP_SS_CASE(+2.0f, +2.0f, _CMP_EQ_UQ, ALL_ONES_F)
+CMP_SS_CASE(+1.0f, +2.0f, _CMP_EQ_UQ, +0.0f)
+CMP_SS_CASE(+1.0f, +2.0f, _CMP_NGE_US, ALL_ONES_F)
+CMP_SS_CASE(+3.0f, +2.0f, _CMP_NGE_US, +0.0f)
+CMP_SS_CASE(+1.0f, +2.0f, _CMP_NGT_US, ALL_ONES_F)
+CMP_SS_CASE(+3.0f, +2.0f, _CMP_NGT_US, +0.0f)
+CMP_SS_CASE(+1.0f, +2.0f, _CMP_FALSE_OQ, +0.0f)
+CMP_SS_CASE(+1.0f, +1.0f, _CMP_FALSE_OQ, +0.0f)
+CMP_SS_CASE(+1.0f, +2.0f, _CMP_NEQ_OQ, ALL_ONES_F)
+CMP_SS_CASE(+2.0f, +2.0f, _CMP_NEQ_OQ, +0.0f)
+CMP_SS_CASE(+3.0f, +2.0f, _CMP_GE_OS, ALL_ONES_F)
+CMP_SS_CASE(+1.0f, +2.0f, _CMP_GE_OS, +0.0f)
+CMP_SS_CASE(+3.0f, +2.0f, _CMP_GT_OS, ALL_ONES_F)
+CMP_SS_CASE(+1.0f, +2.0f, _CMP_GT_OS, +0.0f)
+CMP_SS_CASE(+1.0f, +2.0f, _CMP_TRUE_UQ, ALL_ONES_F)
+CMP_SS_CASE(+1.0f, +1.0f, _CMP_TRUE_UQ, ALL_ONES_F)
+CMP_SS_CASE(+2.0f, +2.0f, _CMP_EQ_OS, ALL_ONES_F)
+CMP_SS_CASE(+1.0f, +2.0f, _CMP_EQ_OS, +0.0f)
+CMP_SS_CASE(+1.0f, +2.0f, _CMP_LT_OQ, ALL_ONES_F)
+CMP_SS_CASE(+3.0f, +2.0f, _CMP_LT_OQ, +0.0f)
+CMP_SS_CASE(+1.0f, +2.0f, _CMP_LE_OQ, ALL_ONES_F)
+CMP_SS_CASE(+3.0f, +2.0f, _CMP_LE_OQ, +0.0f)
+CMP_SS_CASE(+1.0f, +2.0f, _CMP_UNORD_S, +0.0f)
+CMP_SS_CASE(+1.0f, +2.0f, _CMP_NEQ_US, ALL_ONES_F)
+CMP_SS_CASE(+2.0f, +2.0f, _CMP_NEQ_US, +0.0f)
+CMP_SS_CASE(+3.0f, +2.0f, _CMP_NLT_UQ, ALL_ONES_F)
+CMP_SS_CASE(+1.0f, +2.0f, _CMP_NLT_UQ, +0.0f)
+CMP_SS_CASE(+3.0f, +2.0f, _CMP_NLE_UQ, ALL_ONES_F)
+CMP_SS_CASE(+1.0f, +2.0f, _CMP_NLE_UQ, +0.0f)
+CMP_SS_CASE(+1.0f, +2.0f, _CMP_ORD_S, ALL_ONES_F)
+CMP_SS_CASE(+2.0f, +2.0f, _CMP_EQ_US, ALL_ONES_F)
+CMP_SS_CASE(+1.0f, +2.0f, _CMP_EQ_US, +0.0f)
+CMP_SS_CASE(+1.0f, +2.0f, _CMP_NGE_UQ, ALL_ONES_F)
+CMP_SS_CASE(+3.0f, +2.0f, _CMP_NGE_UQ, +0.0f)
+CMP_SS_CASE(+1.0f, +2.0f, _CMP_NGT_UQ, ALL_ONES_F)
+CMP_SS_CASE(+3.0f, +2.0f, _CMP_NGT_UQ, +0.0f)
+CMP_SS_CASE(+1.0f, +2.0f, _CMP_FALSE_OS, +0.0f)
+CMP_SS_CASE(+1.0f, +1.0f, _CMP_FALSE_OS, +0.0f)
+CMP_SS_CASE(+1.0f, +2.0f, _CMP_NEQ_OS, ALL_ONES_F)
+CMP_SS_CASE(+2.0f, +2.0f, _CMP_NEQ_OS, +0.0f)
+CMP_SS_CASE(+3.0f, +2.0f, _CMP_GE_OQ, ALL_ONES_F)
+CMP_SS_CASE(+1.0f, +2.0f, _CMP_GE_OQ, +0.0f)
+CMP_SS_CASE(+3.0f, +2.0f, _CMP_GT_OQ, ALL_ONES_F)
+CMP_SS_CASE(+1.0f, +2.0f, _CMP_GT_OQ, +0.0f)
+CMP_SS_CASE(+1.0f, +2.0f, _CMP_TRUE_US, ALL_ONES_F)
+CMP_SS_CASE(+1.0f, +1.0f, _CMP_TRUE_US, ALL_ONES_F)
+
+#undef CMP_SS_CASE
+
__m128 test_mm_cmpeq_ps(__m128 __a, __m128 __b) {
// CHECK-LABEL: test_mm_cmpeq_ps
// CHECK: [[CMP:%.*]] = fcmp oeq <4 x float>
@@ -109,13 +227,11 @@ __m128 test_mm_cmpeq_ps(__m128 __a, __m128 __b) {
return _mm_cmpeq_ps(__a, __b);
}
// Test all elements equal - returns 0xFFFFFFFF per element (constexpr executable)
-#ifdef __cplusplus
TEST_CONSTEXPR(match_m128(_mm_cmpeq_ps((__m128){+1.0f, +2.0f, +3.0f, +4.0f}, (__m128){+1.0f, +2.0f, +3.0f, +4.0f}), ALL_ONES_F, ALL_ONES_F, ALL_ONES_F, ALL_ONES_F));
TEST_CONSTEXPR(match_m128(_mm_cmpeq_ps((__m128){+1.0f, +2.0f, +3.0f, +4.0f}, (__m128){+5.0f, +6.0f, +7.0f, +8.0f}), +0.0f, +0.0f, +0.0f, +0.0f));
TEST_CONSTEXPR(match_m128(_mm_cmpeq_ps((__m128){+1.0f, +2.0f, +3.0f, +4.0f}, (__m128){-1.0f, -2.0f, -3.0f, -4.0f}), +0.0f, +0.0f, +0.0f, +0.0f));
TEST_CONSTEXPR(match_m128(_mm_cmpeq_ps((__m128){-1.0f, -2.0f, -3.0f, -4.0f}, (__m128){+1.0f, +2.0f, +3.0f, +4.0f}), +0.0f, +0.0f, +0.0f, +0.0f));
TEST_CONSTEXPR(match_m128(_mm_cmpeq_ps((__m128){__builtin_nanf(""), +2.0f, +3.0f, +4.0f}, (__m128){+1.0f, +2.0f, +3.0f, +4.0f}), +0.0f, ALL_ONES_F, ALL_ONES_F, ALL_ONES_F));
-#endif
__m128 test_mm_cmpeq_ss(__m128 __a, __m128 __b) {
// CHECK-LABEL: test_mm_cmpeq_ss
@@ -123,13 +239,11 @@ __m128 test_mm_cmpeq_ss(__m128 __a, __m128 __b) {
return _mm_cmpeq_ss(__a, __b);
}
// Test scalar comparisons - only affects lowest element
-#ifdef __cplusplus
TEST_CONSTEXPR(match_m128(_mm_cmpeq_ss((__m128){+1.0f, +2.0f, +3.0f, +4.0f}, (__m128){+1.0f, +5.0f, +6.0f, +7.0f}), ALL_ONES_F, +2.0f, +3.0f, +4.0f));
TEST_CONSTEXPR(match_m128(_mm_cmpeq_ss((__m128){+1.0f, +2.0f, +3.0f, +4.0f}, (__m128){+5.0f, +6.0f, +7.0f, +8.0f}), +0.0f, +2.0f, +3.0f, +4.0f));
TEST_CONSTEXPR(match_m128(_mm_cmpeq_ss((__m128){-1.0f, +2.0f, +3.0f, +4.0f}, (__m128){+1.0f, +5.0f, +6.0f, +7.0f}), +0.0f, +2.0f, +3.0f, +4.0f));
TEST_CONSTEXPR(match_m128(_mm_cmpeq_ss((__m128){-1.0f, -2.0f, -3.0f, -4.0f}, (__m128){-1.0f, +5.0f, +6.0f, +7.0f}), ALL_ONES_F, -2.0f, -3.0f, -4.0f));
TEST_CONSTEXPR(match_m128(_mm_cmpeq_ss((__m128){__builtin_nanf(""), +2.0f, +3.0f, +4.0f}, (__m128){+1.0f, +9.0f, +8.0f, +7.0f}), +0.0f, +2.0f, +3.0f, +4.0f));
-#endif
__m128 test_mm_cmpge_ps(__m128 __a, __m128 __b) {
// CHECK-LABEL: test_mm_cmpge_ps
@@ -238,6 +352,7 @@ __m128 test_mm_cmpneq_ps(__m128 __a, __m128 __b) {
return _mm_cmpneq_ps(__a, __b);
}
TEST_CONSTEXPR(match_m128(_mm_cmpneq_ps((__m128){+1.0f, +2.0f, +3.0f, __builtin_nanf("")}, (__m128){+1.0f, +9.0f, +3.0f, +4.0f}), +0.0f, ALL_ONES_F, +0.0f, ALL_ONES_F));
+TEST_CONSTEXPR(match_m128(_mm_cmpneq_ps((__m128){+1.0f, +2.0f, +3.0f, +4.0f}, (__m128){+1.0f, +9.0f, +3.0f, +8.0f}), +0.0f, ALL_ONES_F, +0.0f, ALL_ONES_F));
__m128 test_mm_cmpneq_ss(__m128 __a, __m128 __b) {
// CHECK-LABEL: test_mm_cmpneq_ss
@@ -245,6 +360,7 @@ __m128 test_mm_cmpneq_ss(__m128 __a, __m128 __b) {
return _mm_cmpneq_ss(__a, __b);
}
TEST_CONSTEXPR(match_m128(_mm_cmpneq_ss((__m128){__builtin_nanf(""), +2.0f, +3.0f, +4.0f}, (__m128){+1.0f, +8.0f, +7.0f, +6.0f}), ALL_ONES_F, +2.0f, +3.0f, +4.0f));
+TEST_CONSTEXPR(match_m128(_mm_cmpneq_ss((__m128){+1.0f, +2.0f, +3.0f, +4.0f}, (__m128){+1.0f, +8.0f, +7.0f, +6.0f}), +0.0f, +2.0f, +3.0f, +4.0f));
__m128 test_mm_cmpnge_ps(__m128 __a, __m128 __b) {
// CHECK-LABEL: test_mm_cmpnge_ps
@@ -255,6 +371,7 @@ __m128 test_mm_cmpnge_ps(__m128 __a, __m128 __b) {
return _mm_cmpnge_ps(__a, __b);
}
TEST_CONSTEXPR(match_m128(_mm_cmpnge_ps((__m128){+1.0f, +4.0f, __builtin_nanf(""), +5.0f}, (__m128){+1.0f, +3.0f, +2.0f, +7.0f}), +0.0f, +0.0f, ALL_ONES_F, ALL_ONES_F));
+TEST_CONSTEXPR(match_m128(_mm_cmpnge_ps((__m128){+1.0f, +4.0f, +6.0f, +5.0f}, (__m128){+1.0f, +3.0f, +2.0f, +7.0f}), +0.0f, +0.0f, +0.0f, ALL_ONES_F));
__m128 test_mm_cmpnge_ss(__m128 __a, __m128 __b) {
// CHECK-LABEL: test_mm_cmpnge_ss
@@ -263,6 +380,7 @@ __m128 test_mm_cmpnge_ss(__m128 __a, __m128 __b) {
return _mm_cmpnge_ss(__a, __b);
}
TEST_CONSTEXPR(match_m128(_mm_cmpnge_ss((__m128){+4.0f, +2.0f, +3.0f, +4.0f}, (__m128){+3.0f, +9.0f, +8.0f, +7.0f}), +0.0f, +2.0f, +3.0f, +4.0f));
+TEST_CONSTEXPR(match_m128(_mm_cmpnge_ss((__m128){+2.0f, +2.0f, +3.0f, +4.0f}, (__m128){+3.0f, +9.0f, +8.0f, +7.0f}), ALL_ONES_F, +2.0f, +3.0f, +4.0f));
__m128 test_mm_cmpngt_ps(__m128 __a, __m128 __b) {
// CHECK-LABEL: test_mm_cmpngt_ps
@@ -273,6 +391,7 @@ __m128 test_mm_cmpngt_ps(__m128 __a, __m128 __b) {
return _mm_cmpngt_ps(__a, __b);
}
TEST_CONSTEXPR(match_m128(_mm_cmpngt_ps((__m128){+1.0f, +4.0f, __builtin_nanf(""), +7.0f}, (__m128){+1.0f, +5.0f, +2.0f, +7.0f}), ALL_ONES_F, ALL_ONES_F, ALL_ONES_F, ALL_ONES_F));
+TEST_CONSTEXPR(match_m128(_mm_cmpngt_ps((__m128){+1.0f, +4.0f, +6.0f, +7.0f}, (__m128){+1.0f, +5.0f, +2.0f, +7.0f}), ALL_ONES_F, ALL_ONES_F, +0.0f, ALL_ONES_F));
__m128 test_mm_cmpngt_ss(__m128 __a, __m128 __b) {
// CHECK-LABEL: test_mm_cmpngt_ss
@@ -281,6 +400,7 @@ __m128 test_mm_cmpngt_ss(__m128 __a, __m128 __b) {
return _mm_cmpngt_ss(__a, __b);
}
TEST_CONSTEXPR(match_m128(_mm_cmpngt_ss((__m128){+1.0f, +2.0f, +3.0f, +4.0f}, (__m128){+1.0f, +9.0f, +8.0f, +7.0f}), ALL_ONES_F, +2.0f, +3.0f, +4.0f));
+TEST_CONSTEXPR(match_m128(_mm_cmpngt_ss((__m128){+4.0f, +2.0f, +3.0f, +4.0f}, (__m128){+1.0f, +9.0f, +8.0f, +7.0f}), +0.0f, +2.0f, +3.0f, +4.0f));
__m128 test_mm_cmpnle_ps(__m128 __a, __m128 __b) {
// CHECK-LABEL: test_mm_cmpnle_ps
@@ -291,6 +411,7 @@ __m128 test_mm_cmpnle_ps(__m128 __a, __m128 __b) {
return _mm_cmpnle_ps(__a, __b);
}
TEST_CONSTEXPR(match_m128(_mm_cmpnle_ps((__m128){+1.0f, +4.0f, __builtin_nanf(""), +5.0f}, (__m128){+1.0f, +3.0f, +2.0f, +7.0f}), +0.0f, ALL_ONES_F, ALL_ONES_F, +0.0f));
+TEST_CONSTEXPR(match_m128(_mm_cmpnle_ps((__m128){+1.0f, +4.0f, +6.0f, +5.0f}, (__m128){+1.0f, +3.0f, +2.0f, +7.0f}), +0.0f, ALL_ONES_F, ALL_ONES_F, +0.0f));
__m128 test_mm_cmpnle_ss(__m128 __a, __m128 __b) {
// CHECK-LABEL: test_mm_cmpnle_ss
@@ -298,6 +419,7 @@ __m128 test_mm_cmpnle_ss(__m128 __a, __m128 __b) {
return _mm_cmpnle_ss(__a, __b);
}
TEST_CONSTEXPR(match_m128(_mm_cmpnle_ss((__m128){+2.0f, +2.0f, +3.0f, +4.0f}, (__m128){+1.0f, +9.0f, +8.0f, +7.0f}), ALL_ONES_F, +2.0f, +3.0f, +4.0f));
+TEST_CONSTEXPR(match_m128(_mm_cmpnle_ss((__m128){+1.0f, +2.0f, +3.0f, +4.0f}, (__m128){+2.0f, +9.0f, +8.0f, +7.0f}), +0.0f, +2.0f, +3.0f, +4.0f));
__m128 test_mm_cmpnlt_ps(__m128 __a, __m128 __b) {
// CHECK-LABEL: test_mm_cmpnlt_ps
@@ -308,6 +430,7 @@ __m128 test_mm_cmpnlt_ps(__m128 __a, __m128 __b) {
return _mm_cmpnlt_ps(__a, __b);
}
TEST_CONSTEXPR(match_m128(_mm_cmpnlt_ps((__m128){+1.0f, +4.0f, __builtin_nanf(""), +7.0f}, (__m128){+1.0f, +5.0f, +2.0f, +7.0f}), ALL_ONES_F, +0.0f, ALL_ONES_F, ALL_ONES_F));
+TEST_CONSTEXPR(match_m128(_mm_cmpnlt_ps((__m128){+1.0f, +4.0f, +6.0f, +7.0f}, (__m128){+1.0f, +5.0f, +2.0f, +7.0f}), ALL_ONES_F, +0.0f, ALL_ONES_F, ALL_ONES_F));
__m128 test_mm_cmpnlt_ss(__m128 __a, __m128 __b) {
// CHECK-LABEL: test_mm_cmpnlt_ss
@@ -315,6 +438,7 @@ __m128 test_mm_cmpnlt_ss(__m128 __a, __m128 __b) {
return _mm_cmpnlt_ss(__a, __b);
}
TEST_CONSTEXPR(match_m128(_mm_cmpnlt_ss((__m128){+1.0f, +2.0f, +3.0f, +4.0f}, (__m128){+1.0f, +9.0f, +8.0f, +7.0f}), ALL_ONES_F, +2.0f, +3.0f, +4.0f));
+TEST_CONSTEXPR(match_m128(_mm_cmpnlt_ss((__m128){+1.0f, +2.0f, +3.0f, +4.0f}, (__m128){+2.0f, +9.0f, +8.0f, +7.0f}), +0.0f, +2.0f, +3.0f, +4.0f));
__m128 test_mm_cmpord_ps(__m128 __a, __m128 __b) {
// CHECK-LABEL: test_mm_cmpord_ps
@@ -325,6 +449,7 @@ __m128 test_mm_cmpord_ps(__m128 __a, __m128 __b) {
return _mm_cmpord_ps(__a, __b);
}
TEST_CONSTEXPR(match_m128(_mm_cmpord_ps((__m128){+1.0f, __builtin_nanf(""), +3.0f, +4.0f}, (__m128){+5.0f, +6.0f, +7.0f, +8.0f}), ALL_ONES_F, +0.0f, ALL_ONES_F, ALL_ONES_F));
+TEST_CONSTEXPR(match_m128(_mm_cmpord_ps((__m128){+1.0f, +2.0f, +3.0f, +4.0f}, (__m128){+5.0f, +6.0f, +7.0f, +8.0f}), ALL_ONES_F, ALL_ONES_F, ALL_ONES_F, ALL_ONES_F));
__m128 test_mm_cmpord_ss(__m128 __a, __m128 __b) {
// CHECK-LABEL: test_mm_cmpord_ss
@@ -342,6 +467,7 @@ __m128 test_mm_cmpunord_ps(__m128 __a, __m128 __b) {
return _mm_cmpunord_ps(__a, __b);
}
TEST_CONSTEXPR(match_m128(_mm_cmpunord_ps((__m128){+1.0f, __builtin_nanf(""), +3.0f, __builtin_nanf("")}, (__m128){+5.0f, +6.0f, +7.0f, +8.0f}), +0.0f, ALL_ONES_F, +0.0f, ALL_ONES_F));
+TEST_CONSTEXPR(match_m128(_mm_cmpunord_ps((__m128){+1.0f, +2.0f, +3.0f, +4.0f}, (__m128){+5.0f, +6.0f, +7.0f, +8.0f}), +0.0f, +0.0f, +0.0f, +0.0f));
__m128 test_mm_cmpunord_ss(__m128 __a, __m128 __b) {
// CHECK-LABEL: test_mm_cmpunord_ss
@@ -349,6 +475,7 @@ __m128 test_mm_cmpunord_ss(__m128 __a, __m128 __b) {
return _mm_cmpunord_ss(__a, __b);
}
TEST_CONSTEXPR(match_m128(_mm_cmpunord_ss((__m128){__builtin_nanf(""), +2.0f, +3.0f, +4.0f}, (__m128){+5.0f, +9.0f, +8.0f, +7.0f}), ALL_ONES_F, +2.0f, +3.0f, +4.0f));
+TEST_CONSTEXPR(match_m128(_mm_cmpunord_ss((__m128){+1.0f, +2.0f, +3.0f, +4.0f}, (__m128){+5.0f, +9.0f, +8.0f, +7.0f}), +0.0f, +2.0f, +3.0f, +4.0f));
int test_mm_comieq_ss(__m128 A, __m128 B) {
// CHECK-LABEL: test_mm_comieq_ss
diff --git a/clang/test/CodeGen/X86/sse2-builtins.c b/clang/test/CodeGen/X86/sse2-builtins.c
index 601a41a6d68c2..ab65cb39a7c2b 100644
--- a/clang/test/CodeGen/X86/sse2-builtins.c
+++ b/clang/test/CodeGen/X86/sse2-builtins.c
@@ -265,82 +265,83 @@ __m128d test_mm_cmp_pd_ord_q(__m128d a, __m128d b) {
}
TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+1.0, +2.0}), ((__m128d){+2.0, +3.0}), _CMP_ORD_Q), ALL_ONES_D, ALL_ONES_D));
-TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+2.0, +3.0}), ((__m128d){+2.0, +3.0}), _CMP_EQ_UQ), ALL_ONES_D, ALL_ONES_D));
-TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+1.0, +2.0}), ((__m128d){+2.0, +3.0}), _CMP_EQ_UQ), 0.0, 0.0));
-
-TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+1.0, +1.0}), ((__m128d){+2.0, +3.0}), _CMP_NGE_US), ALL_ONES_D, ALL_ONES_D));
-TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+3.0, +4.0}), ((__m128d){+2.0, +3.0}), _CMP_NGE_US), 0.0, 0.0));
-
-TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+1.0, +1.0}), ((__m128d){+2.0, +3.0}), _CMP_NGT_US), ALL_ONES_D, ALL_ONES_D));
-TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+3.0, +4.0}), ((__m128d){+2.0, +3.0}), _CMP_NGT_US), 0.0, 0.0));
-
-TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+1.0, +1.0}), ((__m128d){+2.0, +3.0}), _CMP_FALSE_OQ), 0.0, 0.0));
-TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+1.0, +1.0}), ((__m128d){+1.0, +1.0}), _CMP_FALSE_OQ), 0.0, 0.0));
-
-TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+1.0, +3.0}), ((__m128d){+2.0, +3.0}), _CMP_NEQ_OQ), ALL_ONES_D, 0.0));
-TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+1.0, +2.0}), ((__m128d){+2.0, +3.0}), _CMP_NEQ_OQ), ALL_ONES_D, ALL_ONES_D));
-
-TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+3.0, +4.0}), ((__m128d){+2.0, +3.0}), _CMP_GE_OS), ALL_ONES_D, ALL_ONES_D));
-TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+1.0, +2.0}), ((__m128d){+2.0, +3.0}), _CMP_GE_OS), 0.0, 0.0));
-
-TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+3.0, +4.0}), ((__m128d){+2.0, +3.0}), _CMP_GT_OS), ALL_ONES_D, ALL_ONES_D));
-TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+1.0, +2.0}), ((__m128d){+2.0, +3.0}), _CMP_GT_OS), 0.0, 0.0));
-
-TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+1.0, +1.0}), ((__m128d){+2.0, +3.0}), _CMP_TRUE_UQ), ALL_ONES_D, ALL_ONES_D));
-TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+1.0, +1.0}), ((__m128d){+1.0, +1.0}), _CMP_TRUE_UQ), ALL_ONES_D, ALL_ONES_D));
-
-TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+2.0, +3.0}), ((__m128d){+2.0, +3.0}), _CMP_EQ_OS), ALL_ONES_D, ALL_ONES_D));
-TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+1.0, +2.0}), ((__m128d){+2.0, +3.0}), _CMP_EQ_OS), 0.0, 0.0));
-
-TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+1.0, +1.0}), ((__m128d){+2.0, +3.0}), _CMP_LT_OQ), ALL_ONES_D, ALL_ONES_D));
-TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+3.0, +4.0}), ((__m128d){+2.0, +3.0}), _CMP_LT_OQ), 0.0, 0.0));
-
-TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+1.0, +1.0}), ((__m128d){+2.0, +3.0}), _CMP_LE_OQ), ALL_ONES_D, ALL_ONES_D));
-TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+3.0, +4.0}), ((__m128d){+2.0, +3.0}), _CMP_LE_OQ), 0.0, 0.0));
-
-TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+1.0, +2.0}), ((__m128d){+2.0, +3.0}), _CMP_UNORD_S), 0.0, 0.0));
-
-TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+1.0, +3.0}), ((__m128d){+2.0, +3.0}), _CMP_NEQ_US), ALL_ONES_D, 0.0));
-TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+1.0, +2.0}), ((__m128d){+2.0, +3.0}), _CMP_NEQ_US), ALL_ONES_D, ALL_ONES_D));
-
-TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+3.0, +4.0}), ((__m128d){+2.0, +3.0}), _CMP_NLT_UQ), ALL_ONES_D, ALL_ONES_D));
-TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+1.0, +2.0}), ((__m128d){+2.0, +3.0}), _CMP_NLT_UQ), 0.0, 0.0));
-
-TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+3.0, +4.0}), ((__m128d){+2.0, +3.0}), _CMP_NLE_UQ), ALL_ONES_D, ALL_ONES_D));
-TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+1.0, +2.0}), ((__m128d){+2.0, +3.0}), _CMP_NLE_UQ), 0.0, 0.0));
-
-TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+1.0, +2.0}), ((__m128d){+2.0, +3.0}), _CMP_ORD_S), ALL_ONES_D, ALL_ONES_D));
-
-TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+2.0, +3.0}), ((__m128d){+2.0, +3.0}), _CMP_EQ_US), ALL_ONES_D, ALL_ONES_D));
-TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+1.0, +2.0}), ((__m128d){+2.0, +3.0}), _CMP_EQ_US), 0.0, 0.0));
-
-TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+1.0, +1.0}), ((__m128d){+2.0, +3.0}), _CMP_NGE_UQ), ALL_ONES_D, ALL_ONES_D));
-TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+3.0, +4.0}), ((__m128d){+2.0, +3.0}), _CMP_NGE_UQ), 0.0, 0.0));
-
-TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+1.0, +1.0}), ((__m128d){+2.0, +3.0}), _CMP_NGT_UQ), ALL_ONES_D, ALL_ONES_D));
-TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+3.0, +4.0}), ((__m128d){+2.0, +3.0}), _CMP_NGT_UQ), 0.0, 0.0));
-
-TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+1.0, +1.0}), ((__m128d){+2.0, +3.0}), _CMP_FALSE_OS), 0.0, 0.0));
-TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+1.0, +1.0}), ((__m128d){+1.0, +1.0}), _CMP_FALSE_OS), 0.0, 0.0));
-
-TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+1.0, +3.0}), ((__m128d){+2.0, +3.0}), _CMP_NEQ_OS), ALL_ONES_D, 0.0));
-TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+1.0, +2.0}), ((__m128d){+2.0, +3.0}), _CMP_NEQ_OS), ALL_ONES_D, ALL_ONES_D));
-
-TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+3.0, +4.0}), ((__m128d){+2.0, +3.0}), _CMP_GE_OQ), ALL_ONES_D, ALL_ONES_D));
-TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+1.0, +2.0}), ((__m128d){+2.0, +3.0}), _CMP_GE_OQ), 0.0, 0.0));
-
-TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+3.0, +4.0}), ((__m128d){+2.0, +3.0}), _CMP_GT_OQ), ALL_ONES_D, ALL_ONES_D));
-TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+1.0, +2.0}), ((__m128d){+2.0, +3.0}), _CMP_GT_OQ), 0.0, 0.0));
-
-TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+1.0, +1.0}), ((__m128d){+2.0, +3.0}), _CMP_TRUE_US), ALL_ONES_D, ALL_ONES_D));
-TEST_CONSTEXPR(match_m128d((__m128d)_mm_cmp_pd(((__m128d){+1.0, +1.0}), ((__m128d){+1.0, +1.0}), _CMP_TRUE_US), ALL_ONES_D, ALL_ONES_D));
-
__m128d test_mm_cmp_sd(__m128d A, __m128d B) {
// CHECK-LABEL: test_mm_cmp_sd
// CHECK: call {{.*}}<2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 7)
return _mm_cmp_sd(A, B, _CMP_ORD_Q);
}
+#define CMP_SD_CASE(A0, B0, PRED, RES0) \
+ TEST_CONSTEXPR(match_m128d(_mm_cmp_sd(((__m128d){A0, +9.0}), \
+ ((__m128d){B0, +7.0}), PRED), \
+ RES0, +9.0));
+
+CMP_SD_CASE(+1.0, +2.0, _CMP_EQ_OQ, +0.0)
+CMP_SD_CASE(+2.0, +2.0, _CMP_EQ_OQ, ALL_ONES_D)
+CMP_SD_CASE(+1.0, +2.0, _CMP_LT_OS, ALL_ONES_D)
+CMP_SD_CASE(+3.0, +2.0, _CMP_LT_OS, +0.0)
+CMP_SD_CASE(+1.0, +2.0, _CMP_LE_OS, ALL_ONES_D)
+CMP_SD_CASE(+3.0, +2.0, _CMP_LE_OS, +0.0)
+CMP_SD_CASE(+1.0, +2.0, _CMP_UNORD_Q, +0.0)
+CMP_SD_CASE(__builtin_nan(""), +2.0, _CMP_UNORD_Q, ALL_ONES_D)
+CMP_SD_CASE(+1.0, +2.0, _CMP_NEQ_UQ, ALL_ONES_D)
+CMP_SD_CASE(+2.0, +2.0, _CMP_NEQ_UQ, +0.0)
+CMP_SD_CASE(+3.0, +2.0, _CMP_NLT_US, ALL_ONES_D)
+CMP_SD_CASE(+1.0, +2.0, _CMP_NLT_US, +0.0)
+CMP_SD_CASE(+3.0, +2.0, _CMP_NLE_US, ALL_ONES_D)
+CMP_SD_CASE(+1.0, +2.0, _CMP_NLE_US, +0.0)
+CMP_SD_CASE(+1.0, +2.0, _CMP_ORD_Q, ALL_ONES_D)
+CMP_SD_CASE(__builtin_nan(""), +2.0, _CMP_ORD_Q, +0.0)
+
+CMP_SD_CASE(+2.0, +2.0, _CMP_EQ_UQ, ALL_ONES_D)
+CMP_SD_CASE(+1.0, +2.0, _CMP_EQ_UQ, +0.0)
+CMP_SD_CASE(+1.0, +2.0, _CMP_NGE_US, ALL_ONES_D)
+CMP_SD_CASE(+3.0, +2.0, _CMP_NGE_US, +0.0)
+CMP_SD_CASE(+1.0, +2.0, _CMP_NGT_US, ALL_ONES_D)
+CMP_SD_CASE(+3.0, +2.0, _CMP_NGT_US, +0.0)
+CMP_SD_CASE(+1.0, +2.0, _CMP_FALSE_OQ, +0.0)
+CMP_SD_CASE(+1.0, +1.0, _CMP_FALSE_OQ, +0.0)
+CMP_SD_CASE(+1.0, +2.0, _CMP_NEQ_OQ, ALL_ONES_D)
+CMP_SD_CASE(+2.0, +2.0, _CMP_NEQ_OQ, +0.0)
+CMP_SD_CASE(+3.0, +2.0, _CMP_GE_OS, ALL_ONES_D)
+CMP_SD_CASE(+1.0, +2.0, _CMP_GE_OS, +0.0)
+CMP_SD_CASE(+3.0, +2.0, _CMP_GT_OS, ALL_ONES_D)
+CMP_SD_CASE(+1.0, +2.0, _CMP_GT_OS, +0.0)
+CMP_SD_CASE(+1.0, +2.0, _CMP_TRUE_UQ, ALL_ONES_D)
+CMP_SD_CASE(+1.0, +1.0, _CMP_TRUE_UQ, ALL_ONES_D)
+CMP_SD_CASE(+2.0, +2.0, _CMP_EQ_OS, ALL_ONES_D)
+CMP_SD_CASE(+1.0, +2.0, _CMP_EQ_OS, +0.0)
+CMP_SD_CASE(+1.0, +2.0, _CMP_LT_OQ, ALL_ONES_D)
+CMP_SD_CASE(+3.0, +2.0, _CMP_LT_OQ, +0.0)
+CMP_SD_CASE(+1.0, +2.0, _CMP_LE_OQ, ALL_ONES_D)
+CMP_SD_CASE(+3.0, +2.0, _CMP_LE_OQ, +0.0)
+CMP_SD_CASE(+1.0, +2.0, _CMP_UNORD_S, +0.0)
+CMP_SD_CASE(+1.0, +2.0, _CMP_NEQ_US, ALL_ONES_D)
+CMP_SD_CASE(+2.0, +2.0, _CMP_NEQ_US, +0.0)
+CMP_SD_CASE(+3.0, +2.0, _CMP_NLT_UQ, ALL_ONES_D)
+CMP_SD_CASE(+1.0, +2.0, _CMP_NLT_UQ, +0.0)
+CMP_SD_CASE(+3.0, +2.0, _CMP_NLE_UQ, ALL_ONES_D)
+CMP_SD_CASE(+1.0, +2.0, _CMP_NLE_UQ, +0.0)
+CMP_SD_CASE(+1.0, +2.0, _CMP_ORD_S, ALL_ONES_D)
+CMP_SD_CASE(+2.0, +2.0, _CMP_EQ_US, ALL_ONES_D)
+CMP_SD_CASE(+1.0, +2.0, _CMP_EQ_US, +0.0)
+CMP_SD_CASE(+1.0, +2.0, _CMP_NGE_UQ, ALL_ONES_D)
+CMP_SD_CASE(+3.0, +2.0, _CMP_NGE_UQ, +0.0)
+CMP_SD_CASE(+1.0, +2.0, _CMP_NGT_UQ, ALL_ONES_D)
+CMP_SD_CASE(+3.0, +2.0, _CMP_NGT_UQ, +0.0)
+CMP_SD_CASE(+1.0, +2.0, _CMP_FALSE_OS, +0.0)
+CMP_SD_CASE(+1.0, +1.0, _CMP_FALSE_OS, +0.0)
+CMP_SD_CASE(+1.0, +2.0, _CMP_NEQ_OS, ALL_ONES_D)
+CMP_SD_CASE(+2.0, +2.0, _CMP_NEQ_OS, +0.0)
+CMP_SD_CASE(+3.0, +2.0, _CMP_GE_OQ, ALL_ONES_D)
+CMP_SD_CASE(+1.0, +2.0, _CMP_GE_OQ, +0.0)
+CMP_SD_CASE(+3.0, +2.0, _CMP_GT_OQ, ALL_ONES_D)
+CMP_SD_CASE(+1.0, +2.0, _CMP_GT_OQ, +0.0)
+CMP_SD_CASE(+1.0, +2.0, _CMP_TRUE_US, ALL_ONES_D)
+CMP_SD_CASE(+1.0, +1.0, _CMP_TRUE_US, ALL_ONES_D)
+
+#undef CMP_SD_CASE
+
__m128i test_mm_cmpeq_epi8(__m128i A, __m128i B) {
// CHECK-LABEL: test_mm_cmpeq_epi8
// CHECK: icmp eq <16 x i8>
@@ -369,40 +370,24 @@ __m128d test_mm_cmpeq_pd(__m128d A, __m128d B) {
// CHECK-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
return _mm_cmpeq_pd(A, B);
}
-// Test all elements equal - returns 0xFFFFFFFFFFFFFFFF per element (constexpr executable)
-#ifdef __cplusplus
TEST_CONSTEXPR(match_m128d(_mm_cmpeq_pd((__m128d){+1.0, +2.0}, (__m128d){+1.0, +2.0}), ALL_ONES_D, ALL_ONES_D));
-// Test no elements equal - cmpeq returns zeros
TEST_CONSTEXPR(match_m128d(_mm_cmpeq_pd((__m128d){+1.0, +2.0}, (__m128d){+3.0, +4.0}), +0.0, +0.0));
-// Test with first element equal, second not equal
TEST_CONSTEXPR(match_m128d(_mm_cmpeq_pd((__m128d){+1.0, +2.0}, (__m128d){+1.0, +3.0}), ALL_ONES_D, +0.0));
-// Test with negative values not equal
TEST_CONSTEXPR(match_m128d(_mm_cmpeq_pd((__m128d){-1.5, -2.5}, (__m128d){+1.5, +2.5}), +0.0, +0.0));
-// Test second element equal, first not equal
TEST_CONSTEXPR(match_m128d(_mm_cmpeq_pd((__m128d){+0.0, -0.0}, (__m128d){+1.0, -0.0}), +0.0, ALL_ONES_D));
-// Test unordered NaN compares false
TEST_CONSTEXPR(match_m128d(_mm_cmpeq_pd((__m128d){__builtin_nan(""), +2.0}, (__m128d){+1.0, +2.0}), +0.0, ALL_ONES_D));
-#endif
__m128d test_mm_cmpeq_sd(__m128d A, __m128d B) {
// CHECK-LABEL: test_mm_cmpeq_sd
// CHECK: call {{.*}}<2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 0)
return _mm_cmpeq_sd(A, B);
}
-// Test scalar equal - replaces lower element with 0xFFFFFFFFFFFFFFFF bits when equal (constexpr executable)
-#ifdef __cplusplus
TEST_CONSTEXPR(match_m128d(_mm_cmpeq_sd((__m128d){+1.0, +2.0}, (__m128d){+1.0, +3.0}), ALL_ONES_D, +2.0));
-// Test scalar not equal - replaces lower element with zero, preserves upper
TEST_CONSTEXPR(match_m128d(_mm_cmpeq_sd((__m128d){+1.0, +2.0}, (__m128d){+3.0, +4.0}), +0.0, +2.0));
-// Test scalar with different values
TEST_CONSTEXPR(match_m128d(_mm_cmpeq_sd((__m128d){+1.0, +2.0}, (__m128d){+5.0, +3.0}), +0.0, +2.0));
-// Test scalar with negative values not equal
TEST_CONSTEXPR(match_m128d(_mm_cmpeq_sd((__m128d){-1.5, -2.5}, (__m128d){+1.5, -3.5}), +0.0, -2.5));
-// Test scalar with upper element preserved
TEST_CONSTEXPR(match_m128d(_mm_cmpeq_sd((__m128d){+0.0, +2.0}, (__m128d){+1.0, +3.0}), +0.0, +2.0));
-// Test scalar unordered NaN compares false and preserves upper
TEST_CONSTEXPR(match_m128d(_mm_cmpeq_sd((__m128d){__builtin_nan(""), +2.0}, (__m128d){+1.0, +3.0}), +0.0, +2.0));
-#endif
__m128d test_mm_cmpge_pd(__m128d A, __m128d B) {
// CHECK-LABEL: test_mm_cmpge_pd
@@ -470,8 +455,7 @@ TEST_CONSTEXPR(match_m128d(_mm_cmpgt_pd((__m128d){+3.0, +5.0}, (__m128d){+1.0, +
TEST_CONSTEXPR(match_m128d(_mm_cmpgt_pd((__m128d){-3.0, +7.0}, (__m128d){-4.0, +8.0}), ALL_ONES_D, +0.0));
TEST_CONSTEXPR(match_m128d(_mm_cmpgt_pd((__m128d){__builtin_nan(""), +6.0}, (__m128d){+1.0, +5.0}), +0.0, ALL_ONES_D));
TEST_CONSTEXPR(match_m128d((__m128d)__builtin_ia32_cmpgtpd((__v2df)((__m128d){+3.0, __builtin_nan("")}),
- (__v2df)((__m128d){+1.0, +2.0})),
- ALL_ONES_D, +0.0));
+ (__v2df)((__m128d){+1.0, +2.0})), ALL_ONES_D, +0.0));
__m128d test_mm_cmpgt_sd(__m128d A, __m128d B) {
// CHECK-LABEL: test_mm_cmpgt_sd
@@ -486,8 +470,7 @@ TEST_CONSTEXPR(match_m128d(_mm_cmpgt_sd((__m128d){+3.0, +2.0}, (__m128d){+1.0, +
TEST_CONSTEXPR(match_m128d(_mm_cmpgt_sd((__m128d){+1.0, +2.0}, (__m128d){+3.0, +9.0}), +0.0, +2.0));
TEST_CONSTEXPR(match_m128d(_mm_cmpgt_sd((__m128d){__builtin_nan(""), +2.0}, (__m128d){+1.0, +9.0}), +0.0, +2.0));
TEST_CONSTEXPR(match_m128d((__m128d)__builtin_ia32_cmpgtsd((__v2df)((__m128d){+3.0, +2.0}),
- (__v2df)((__m128d){+1.0, +9.0})),
- ALL_ONES_D, +2.0));
+ (__v2df)((__m128d){+1.0, +9.0})), ALL_ONES_D, +2.0));
__m128d test_mm_cmple_pd(__m128d A, __m128d B) {
// CHECK-LABEL: test_mm_cmple_pd
@@ -590,6 +573,7 @@ __m128d test_mm_cmpnge_sd(__m128d A, __m128d B) {
return _mm_cmpnge_sd(A, B);
}
TEST_CONSTEXPR(match_m128d(_mm_cmpnge_sd((__m128d){+4.0, +2.0}, (__m128d){+3.0, +9.0}), +0.0, +2.0));
+TEST_CONSTEXPR(match_m128d(_mm_cmpnge_sd((__m128d){__builtin_nan(""), +2.0}, (__m128d){+3.0, +9.0}), ALL_ONES_D, +2.0));
__m128d test_mm_cmpngt_pd(__m128d A, __m128d B) {
// CHECK-LABEL: test_mm_cmpngt_pd
@@ -610,6 +594,7 @@ __m128d test_mm_cmpngt_sd(__m128d A, __m128d B) {
return _mm_cmpngt_sd(A, B);
}
TEST_CONSTEXPR(match_m128d(_mm_cmpngt_sd((__m128d){+1.0, +2.0}, (__m128d){+1.0, +9.0}), ALL_ONES_D, +2.0));
+TEST_CONSTEXPR(match_m128d(_mm_cmpngt_sd((__m128d){__builtin_nan(""), +2.0}, (__m128d){+1.0, +9.0}), ALL_ONES_D, +2.0));
__m128d test_mm_cmpnle_pd(__m128d A, __m128d B) {
// CHECK-LABEL: test_mm_cmpnle_pd
@@ -626,6 +611,7 @@ __m128d test_mm_cmpnle_sd(__m128d A, __m128d B) {
return _mm_cmpnle_sd(A, B);
}
TEST_CONSTEXPR(match_m128d(_mm_cmpnle_sd((__m128d){+2.0, +2.0}, (__m128d){+1.0, +9.0}), ALL_ONES_D, +2.0));
+TEST_CONSTEXPR(match_m128d(_mm_cmpnle_sd((__m128d){__builtin_nan(""), +2.0}, (__m128d){+1.0, +9.0}), ALL_ONES_D, +2.0));
__m128d test_mm_cmpnlt_pd(__m128d A, __m128d B) {
// CHECK-LABEL: test_mm_cmpnlt_pd
@@ -642,6 +628,7 @@ __m128d test_mm_cmpnlt_sd(__m128d A, __m128d B) {
return _mm_cmpnlt_sd(A, B);
}
TEST_CONSTEXPR(match_m128d(_mm_cmpnlt_sd((__m128d){+1.0, +2.0}, (__m128d){+1.0, +9.0}), ALL_ONES_D, +2.0));
+TEST_CONSTEXPR(match_m128d(_mm_cmpnlt_sd((__m128d){__builtin_nan(""), +2.0}, (__m128d){+1.0, +9.0}), ALL_ONES_D, +2.0));
__m128d test_mm_cmpord_pd(__m128d A, __m128d B) {
// CHECK-LABEL: test_mm_cmpord_pd
@@ -658,6 +645,7 @@ __m128d test_mm_cmpord_sd(__m128d A, __m128d B) {
return _mm_cmpord_sd(A, B);
}
TEST_CONSTEXPR(match_m128d(_mm_cmpord_sd((__m128d){+1.0, +2.0}, (__m128d){+5.0, +9.0}), ALL_ONES_D, +2.0));
+TEST_CONSTEXPR(match_m128d(_mm_cmpord_sd((__m128d){__builtin_nan(""), +2.0}, (__m128d){+5.0, +9.0}), +0.0, +2.0));
__m128d test_mm_cmpunord_pd(__m128d A, __m128d B) {
// CHECK-LABEL: test_mm_cmpunord_pd
>From 18af7d75fc0f90ddd1e58b3d679698c455a02930 Mon Sep 17 00:00:00 2001
From: ZakyHermawan <zaky.hermawan9615 at gmail.com>
Date: Sat, 21 Feb 2026 00:43:54 +0700
Subject: [PATCH 08/14] [Clang][X86] Add constexpr support for
_mm_comi*/_mm_ucomi* (ss/sd/sh)
Signed-off-by: ZakyHermawan <zaky.hermawan9615 at gmail.com>
---
clang/include/clang/Basic/BuiltinsX86.td | 2 +-
clang/lib/AST/ByteCode/InterpBuiltin.cpp | 433 ++++++++++---------
clang/lib/AST/ExprConstant.cpp | 368 +++++++++-------
clang/lib/Headers/avx512fp16intrin.h | 48 +-
clang/lib/Headers/emmintrin.h | 48 +-
clang/lib/Headers/xmmintrin.h | 24 +-
clang/test/CodeGen/X86/avx512fp16-builtins.c | 85 +++-
clang/test/CodeGen/X86/sse-builtins.c | 42 ++
clang/test/CodeGen/X86/sse2-builtins.c | 41 ++
9 files changed, 671 insertions(+), 420 deletions(-)
diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td
index c62e347b3896b..e76006ea2ad4b 100644
--- a/clang/include/clang/Basic/BuiltinsX86.td
+++ b/clang/include/clang/Basic/BuiltinsX86.td
@@ -3391,7 +3391,7 @@ let Features = "avx512vp2intersect,avx512vl", Attributes = [NoThrow, RequiredVec
def vp2intersect_d_128 : X86Builtin<"void(_Vector<4, int>, _Vector<4, int>, unsigned char *, unsigned char *)">;
}
-let Features = "avx512fp16", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+let Features = "avx512fp16", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
def vcomish : X86Builtin<"int(_Vector<8, _Float16>, _Vector<8, _Float16>, _Constant int, _Constant int)">;
}
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index 7098cd4a8f892..ae8ca192687f6 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -4119,18 +4119,195 @@ static bool interp__builtin_ia32_gfni_mul(InterpState &S, CodePtr OpPC,
return true;
}
+static bool interp__builtin_x86_comi(InterpState &S, CodePtr OpPC,
+ const InterpFrame *Frame,
+ const CallExpr *Call, unsigned ID) {
+ uint32_t Predicate;
+ switch (ID) {
+ case X86::BI__builtin_ia32_comieq:
+ case X86::BI__builtin_ia32_ucomieq:
+ case X86::BI__builtin_ia32_comisdeq:
+ case X86::BI__builtin_ia32_ucomisdeq:
+ Predicate = X86CmpImm::CMP_EQ_OQ;
+ break;
+ case X86::BI__builtin_ia32_comilt:
+ case X86::BI__builtin_ia32_ucomilt:
+ case X86::BI__builtin_ia32_comisdlt:
+ case X86::BI__builtin_ia32_ucomisdlt:
+ Predicate = X86CmpImm::CMP_LT_OQ;
+ break;
+ case X86::BI__builtin_ia32_comile:
+ case X86::BI__builtin_ia32_ucomile:
+ case X86::BI__builtin_ia32_comisdle:
+ case X86::BI__builtin_ia32_ucomisdle:
+ Predicate = X86CmpImm::CMP_LE_OQ;
+ break;
+ case X86::BI__builtin_ia32_comigt:
+ case X86::BI__builtin_ia32_ucomigt:
+ case X86::BI__builtin_ia32_comisdgt:
+ case X86::BI__builtin_ia32_ucomisdgt:
+ Predicate = X86CmpImm::CMP_GT_OQ;
+ break;
+ case X86::BI__builtin_ia32_comige:
+ case X86::BI__builtin_ia32_ucomige:
+ case X86::BI__builtin_ia32_comisdge:
+ case X86::BI__builtin_ia32_ucomisdge:
+ Predicate = X86CmpImm::CMP_GE_OQ;
+ break;
+ case X86::BI__builtin_ia32_comineq:
+ case X86::BI__builtin_ia32_ucomineq:
+ case X86::BI__builtin_ia32_comisdneq:
+ case X86::BI__builtin_ia32_ucomisdneq:
+ Predicate = X86CmpImm::CMP_NEQ_UQ;
+ break;
+ case X86::BI__builtin_ia32_vcomish: {
+ discard(S.Stk, *S.getContext().classify(Call->getArg(3)));
+ const APSInt Imm = popToAPSInt(S, Call->getArg(2));
+ Predicate = Imm.getZExtValue();
+ break;
+ }
+ default:
+ llvm_unreachable("unhandled x86 comi builtin");
+ }
+
+ const Pointer &VectorB = S.Stk.pop<Pointer>();
+ const Pointer &VectorA = S.Stk.pop<Pointer>();
+
+ if (VectorA.getNumElems() == 0 || VectorA.getNumElems() != VectorB.getNumElems())
+ return false;
+
+ const llvm::APFloat A = VectorA.elem<Floating>(0).getAPFloat();
+ const llvm::APFloat B = VectorB.elem<Floating>(0).getAPFloat();
+ const bool Matches = MatchesPredicate(Predicate, A.compare(B));
+ pushInteger(S, Matches, Call->getType());
+ return true;
+}
+
static bool interp__builtin_x86_cmp(InterpState &S, CodePtr OpPC,
const InterpFrame *Frame,
const CallExpr *Call, unsigned ID) {
- const auto ImmAPS =
- popToAPSInt(S.Stk, *S.getContext().classify(Call->getArg(2)));
- const uint32_t ImmZExt = ImmAPS.getZExtValue();
+ const bool HasImmArg =
+ ID == X86::BI__builtin_ia32_cmpps ||
+ ID == X86::BI__builtin_ia32_cmppd ||
+ ID == X86::BI__builtin_ia32_cmpps256 ||
+ ID == X86::BI__builtin_ia32_cmppd256 ||
+ ID == X86::BI__builtin_ia32_cmpss ||
+ ID == X86::BI__builtin_ia32_cmpsd;
+
+ uint32_t Predicate;
+ if (HasImmArg) {
+ const APSInt ImmAPS = popToAPSInt(S, Call->getArg(2));
+ Predicate = ImmAPS.getZExtValue();
+ } else {
+ switch (ID) {
+ case X86::BI__builtin_ia32_cmpeqss:
+ case X86::BI__builtin_ia32_cmpeqsd:
+ case X86::BI__builtin_ia32_cmpeqps:
+ case X86::BI__builtin_ia32_cmpeqpd:
+ Predicate = X86CmpImm::CMP_EQ_OQ;
+ break;
+ case X86::BI__builtin_ia32_cmpgess:
+ case X86::BI__builtin_ia32_cmpgesd:
+ case X86::BI__builtin_ia32_cmpgeps:
+ case X86::BI__builtin_ia32_cmpgepd:
+ Predicate = X86CmpImm::CMP_GE_OS;
+ break;
+ case X86::BI__builtin_ia32_cmpgtss:
+ case X86::BI__builtin_ia32_cmpgtsd:
+ case X86::BI__builtin_ia32_cmpgtps:
+ case X86::BI__builtin_ia32_cmpgtpd:
+ Predicate = X86CmpImm::CMP_GT_OS;
+ break;
+ case X86::BI__builtin_ia32_cmpltss:
+ case X86::BI__builtin_ia32_cmpltsd:
+ case X86::BI__builtin_ia32_cmpltps:
+ case X86::BI__builtin_ia32_cmpltpd:
+ Predicate = X86CmpImm::CMP_LT_OS;
+ break;
+ case X86::BI__builtin_ia32_cmpless:
+ case X86::BI__builtin_ia32_cmplesd:
+ case X86::BI__builtin_ia32_cmpleps:
+ case X86::BI__builtin_ia32_cmplepd:
+ Predicate = X86CmpImm::CMP_LE_OS;
+ break;
+ case X86::BI__builtin_ia32_cmpneqss:
+ case X86::BI__builtin_ia32_cmpneqsd:
+ case X86::BI__builtin_ia32_cmpneqps:
+ case X86::BI__builtin_ia32_cmpneqpd:
+ Predicate = X86CmpImm::CMP_NEQ_UQ;
+ break;
+ case X86::BI__builtin_ia32_cmpngess:
+ case X86::BI__builtin_ia32_cmpngesd:
+ case X86::BI__builtin_ia32_cmpngeps:
+ case X86::BI__builtin_ia32_cmpngepd:
+ Predicate = X86CmpImm::CMP_NGE_US;
+ break;
+ case X86::BI__builtin_ia32_cmpngtss:
+ case X86::BI__builtin_ia32_cmpngtsd:
+ case X86::BI__builtin_ia32_cmpngtps:
+ case X86::BI__builtin_ia32_cmpngtpd:
+ Predicate = X86CmpImm::CMP_NGT_US;
+ break;
+ case X86::BI__builtin_ia32_cmpnless:
+ case X86::BI__builtin_ia32_cmpnlesd:
+ case X86::BI__builtin_ia32_cmpnleps:
+ case X86::BI__builtin_ia32_cmpnlepd:
+ Predicate = X86CmpImm::CMP_NLE_US;
+ break;
+ case X86::BI__builtin_ia32_cmpnltss:
+ case X86::BI__builtin_ia32_cmpnltsd:
+ case X86::BI__builtin_ia32_cmpnltps:
+ case X86::BI__builtin_ia32_cmpnltpd:
+ Predicate = X86CmpImm::CMP_NLT_US;
+ break;
+ case X86::BI__builtin_ia32_cmpordss:
+ case X86::BI__builtin_ia32_cmpordsd:
+ case X86::BI__builtin_ia32_cmpordps:
+ case X86::BI__builtin_ia32_cmpordpd:
+ Predicate = X86CmpImm::CMP_ORD_Q;
+ break;
+ case X86::BI__builtin_ia32_cmpunordss:
+ case X86::BI__builtin_ia32_cmpunordsd:
+ case X86::BI__builtin_ia32_cmpunordps:
+ case X86::BI__builtin_ia32_cmpunordpd:
+ Predicate = X86CmpImm::CMP_UNORD_Q;
+ break;
+ default:
+ llvm_unreachable("unhandled x86 cmp builtin");
+ }
+ }
+
const Pointer &VectorB = S.Stk.pop<Pointer>();
const Pointer &VectorA = S.Stk.pop<Pointer>();
Pointer &Dst = S.Stk.peek<Pointer>();
- const bool IsScalar = ID == X86::BI__builtin_ia32_cmpss ||
- ID == X86::BI__builtin_ia32_cmpsd;
+ const bool IsScalar =
+ ID == X86::BI__builtin_ia32_cmpss ||
+ ID == X86::BI__builtin_ia32_cmpsd ||
+ ID == X86::BI__builtin_ia32_cmpeqss ||
+ ID == X86::BI__builtin_ia32_cmpeqsd ||
+ ID == X86::BI__builtin_ia32_cmpgess ||
+ ID == X86::BI__builtin_ia32_cmpgesd ||
+ ID == X86::BI__builtin_ia32_cmpgtss ||
+ ID == X86::BI__builtin_ia32_cmpgtsd ||
+ ID == X86::BI__builtin_ia32_cmpltss ||
+ ID == X86::BI__builtin_ia32_cmpltsd ||
+ ID == X86::BI__builtin_ia32_cmpless ||
+ ID == X86::BI__builtin_ia32_cmplesd ||
+ ID == X86::BI__builtin_ia32_cmpneqss ||
+ ID == X86::BI__builtin_ia32_cmpneqsd ||
+ ID == X86::BI__builtin_ia32_cmpngess ||
+ ID == X86::BI__builtin_ia32_cmpngesd ||
+ ID == X86::BI__builtin_ia32_cmpngtss ||
+ ID == X86::BI__builtin_ia32_cmpngtsd ||
+ ID == X86::BI__builtin_ia32_cmpnless ||
+ ID == X86::BI__builtin_ia32_cmpnlesd ||
+ ID == X86::BI__builtin_ia32_cmpnltss ||
+ ID == X86::BI__builtin_ia32_cmpnltsd ||
+ ID == X86::BI__builtin_ia32_cmpordss ||
+ ID == X86::BI__builtin_ia32_cmpordsd ||
+ ID == X86::BI__builtin_ia32_cmpunordss ||
+ ID == X86::BI__builtin_ia32_cmpunordsd;
const auto NumLanes = VectorA.getNumElems();
if (NumLanes != VectorB.getNumElems())
@@ -4148,7 +4325,7 @@ static bool interp__builtin_x86_cmp(InterpState &S, CodePtr OpPC,
llvm::APFloat BElement = VectorB.elem<Floating>(i).getAPFloat();
auto CompareResult = AElement.compare(BElement);
- const bool Matches = MatchesPredicate(ImmZExt, CompareResult);
+ const bool Matches = MatchesPredicate(Predicate, CompareResult);
// Create bit patterns for comparison results:
// True = all bits set (0xFFFFFFFF for float, 0xFFFFFFFFFFFFFFFF for double)
@@ -4168,157 +4345,6 @@ static bool interp__builtin_x86_cmp(InterpState &S, CodePtr OpPC,
return true;
};
-// Helper for X86 floating point vector comparisons using immediate predicates.
-template <uint32_t Imm>
-static bool interp__builtin_x86_cmp_float_vector(InterpState &S, CodePtr OpPC,
- const InterpFrame *Frame,
- const CallExpr *Call,
- unsigned ID, bool IsScalar) {
- const Pointer &VectorB = S.Stk.pop<Pointer>();
- const Pointer &VectorA = S.Stk.pop<Pointer>();
- Pointer &Dst = S.Stk.peek<Pointer>();
-
- const auto NumLanes = VectorA.getNumElems();
- if (NumLanes != VectorB.getNumElems())
- return false;
-
- for (unsigned int i = 0; i < NumLanes; ++i) {
- // Handle scalar variants (ss/sd): only first element is compared,
- // upper elements are copied from first operand
- if (IsScalar && i > 0) {
- Dst.elem<Floating>(i) = VectorA.elem<Floating>(i);
- continue;
- }
-
- llvm::APFloat AElement = VectorA.elem<Floating>(i).getAPFloat();
- llvm::APFloat BElement = VectorB.elem<Floating>(i).getAPFloat();
-
- auto CompareResult = AElement.compare(BElement);
- const bool Matches = MatchesPredicate(Imm, CompareResult);
-
- // Create bit patterns for comparison results:
- // True = all bits set (0xFFFFFFFF for float, 0xFFFFFFFFFFFFFFFF for double)
- // False = all bits zero
- const llvm::fltSemantics &Sem = AElement.getSemantics();
- const unsigned BitWidth = llvm::APFloat::getSizeInBits(Sem);
- const llvm::APFloat True(Sem, llvm::APInt::getAllOnes(BitWidth));
- const llvm::APFloat False(Sem, llvm::APInt(BitWidth, 0));
-
- Dst.elem<Floating>(i) = Floating(Matches ? True : False);
- }
-
- Dst.initializeAllElements();
- return true;
-}
-
-static bool interp__builtin_x86_cmpeq(InterpState &S, CodePtr OpPC,
- const InterpFrame *Frame,
- const CallExpr *Call, unsigned ID) {
- const bool IsScalar = (ID == X86::BI__builtin_ia32_cmpeqss) ||
- (ID == X86::BI__builtin_ia32_cmpeqsd);
- return interp__builtin_x86_cmp_float_vector<X86CmpImm::CMP_EQ_OQ>(
- S, OpPC, Frame, Call, ID, IsScalar);
-}
-
-static bool interp__builtin_x86_cmpge(InterpState &S, CodePtr OpPC,
- const InterpFrame *Frame,
- const CallExpr *Call, unsigned ID) {
- const bool IsScalar = (ID == X86::BI__builtin_ia32_cmpgess) ||
- (ID == X86::BI__builtin_ia32_cmpgesd);
- return interp__builtin_x86_cmp_float_vector<X86CmpImm::CMP_GE_OS>(
- S, OpPC, Frame, Call, ID, IsScalar);
-};
-
-static bool interp__builtin_x86_cmpgt(InterpState &S, CodePtr OpPC,
- const InterpFrame *Frame,
- const CallExpr *Call, unsigned ID) {
- const bool IsScalar = (ID == X86::BI__builtin_ia32_cmpgtss) ||
- (ID == X86::BI__builtin_ia32_cmpgtsd);
- return interp__builtin_x86_cmp_float_vector<X86CmpImm::CMP_GT_OS>(
- S, OpPC, Frame, Call, ID, IsScalar);
-};
-
-static bool interp__builtin_x86_cmple(InterpState &S, CodePtr OpPC,
- const InterpFrame *Frame,
- const CallExpr *Call, unsigned ID) {
- const bool IsScalar = (ID == X86::BI__builtin_ia32_cmpless) ||
- (ID == X86::BI__builtin_ia32_cmplesd);
- return interp__builtin_x86_cmp_float_vector<X86CmpImm::CMP_LE_OS>(
- S, OpPC, Frame, Call, ID, IsScalar);
-};
-
-static bool interp__builtin_x86_cmplt(InterpState &S, CodePtr OpPC,
- const InterpFrame *Frame,
- const CallExpr *Call, unsigned ID) {
- const bool IsScalar = (ID == X86::BI__builtin_ia32_cmpltss) ||
- (ID == X86::BI__builtin_ia32_cmpltsd);
- return interp__builtin_x86_cmp_float_vector<X86CmpImm::CMP_LT_OS>(
- S, OpPC, Frame, Call, ID, IsScalar);
-};
-
-static bool interp__builtin_x86_cmpneq(InterpState &S, CodePtr OpPC,
- const InterpFrame *Frame,
- const CallExpr *Call, unsigned ID) {
- const bool IsScalar = (ID == X86::BI__builtin_ia32_cmpneqss) ||
- (ID == X86::BI__builtin_ia32_cmpneqsd);
- return interp__builtin_x86_cmp_float_vector<X86CmpImm::CMP_NEQ_UQ>(
- S, OpPC, Frame, Call, ID, IsScalar);
-};
-
-static bool interp__builtin_x86_cmpnge(InterpState &S, CodePtr OpPC,
- const InterpFrame *Frame,
- const CallExpr *Call, unsigned ID) {
- const bool IsScalar = (ID == X86::BI__builtin_ia32_cmpngess) ||
- (ID == X86::BI__builtin_ia32_cmpngesd);
- return interp__builtin_x86_cmp_float_vector<X86CmpImm::CMP_NGE_US>(
- S, OpPC, Frame, Call, ID, IsScalar);
-};
-
-static bool interp__builtin_x86_cmpngt(InterpState &S, CodePtr OpPC,
- const InterpFrame *Frame,
- const CallExpr *Call, unsigned ID) {
- const bool IsScalar = (ID == X86::BI__builtin_ia32_cmpngtss) ||
- (ID == X86::BI__builtin_ia32_cmpngtsd);
- return interp__builtin_x86_cmp_float_vector<X86CmpImm::CMP_NGT_US>(
- S, OpPC, Frame, Call, ID, IsScalar);
-};
-
-static bool interp__builtin_x86_cmpnle(InterpState &S, CodePtr OpPC,
- const InterpFrame *Frame,
- const CallExpr *Call, unsigned ID) {
- const bool IsScalar = (ID == X86::BI__builtin_ia32_cmpnless) ||
- (ID == X86::BI__builtin_ia32_cmpnlesd);
- return interp__builtin_x86_cmp_float_vector<X86CmpImm::CMP_NLE_US>(
- S, OpPC, Frame, Call, ID, IsScalar);
-};
-
-static bool interp__builtin_x86_cmpnlt(InterpState &S, CodePtr OpPC,
- const InterpFrame *Frame,
- const CallExpr *Call, unsigned ID) {
- const bool IsScalar = (ID == X86::BI__builtin_ia32_cmpnltss) ||
- (ID == X86::BI__builtin_ia32_cmpnltsd);
- return interp__builtin_x86_cmp_float_vector<X86CmpImm::CMP_NLT_US>(
- S, OpPC, Frame, Call, ID, IsScalar);
-};
-
-static bool interp__builtin_x86_cmpord(InterpState &S, CodePtr OpPC,
- const InterpFrame *Frame,
- const CallExpr *Call, unsigned ID) {
- const bool IsScalar = (ID == X86::BI__builtin_ia32_cmpordss) ||
- (ID == X86::BI__builtin_ia32_cmpordsd);
- return interp__builtin_x86_cmp_float_vector<X86CmpImm::CMP_ORD_Q>(
- S, OpPC, Frame, Call, ID, IsScalar);
-};
-
-static bool interp__builtin_x86_cmpunord(InterpState &S, CodePtr OpPC,
- const InterpFrame *Frame,
- const CallExpr *Call, unsigned ID) {
- const bool IsScalar = (ID == X86::BI__builtin_ia32_cmpunordss) ||
- (ID == X86::BI__builtin_ia32_cmpunordsd);
- return interp__builtin_x86_cmp_float_vector<X86CmpImm::CMP_UNORD_Q>(
- S, OpPC, Frame, Call, ID, IsScalar);
-};
-
bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
uint32_t BuiltinID) {
if (!S.getASTContext().BuiltinInfo.isConstantEvaluated(BuiltinID))
@@ -6079,85 +6105,88 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
return llvm::maximum(A, B);
});
+ case X86::BI__builtin_ia32_comieq:
+ case X86::BI__builtin_ia32_comilt:
+ case X86::BI__builtin_ia32_comile:
+ case X86::BI__builtin_ia32_comigt:
+ case X86::BI__builtin_ia32_comige:
+ case X86::BI__builtin_ia32_comineq:
+ case X86::BI__builtin_ia32_ucomieq:
+ case X86::BI__builtin_ia32_ucomilt:
+ case X86::BI__builtin_ia32_ucomile:
+ case X86::BI__builtin_ia32_ucomigt:
+ case X86::BI__builtin_ia32_ucomige:
+ case X86::BI__builtin_ia32_ucomineq:
+ case X86::BI__builtin_ia32_comisdeq:
+ case X86::BI__builtin_ia32_comisdlt:
+ case X86::BI__builtin_ia32_comisdle:
+ case X86::BI__builtin_ia32_comisdgt:
+ case X86::BI__builtin_ia32_comisdge:
+ case X86::BI__builtin_ia32_comisdneq:
+ case X86::BI__builtin_ia32_ucomisdeq:
+ case X86::BI__builtin_ia32_ucomisdlt:
+ case X86::BI__builtin_ia32_ucomisdle:
+ case X86::BI__builtin_ia32_ucomisdgt:
+ case X86::BI__builtin_ia32_ucomisdge:
+ case X86::BI__builtin_ia32_ucomisdneq:
+ case X86::BI__builtin_ia32_vcomish:
+ return interp__builtin_x86_comi(S, OpPC, Frame, Call, BuiltinID);
+
case X86::BI__builtin_ia32_cmpps:
case X86::BI__builtin_ia32_cmppd:
case X86::BI__builtin_ia32_cmpps256:
case X86::BI__builtin_ia32_cmppd256:
case X86::BI__builtin_ia32_cmpss:
case X86::BI__builtin_ia32_cmpsd:
- return interp__builtin_x86_cmp(S, OpPC, Frame, Call, BuiltinID);
-
- case X86::BI__builtin_ia32_cmpeqps:
- case X86::BI__builtin_ia32_cmpeqpd:
case X86::BI__builtin_ia32_cmpeqss:
+ case X86::BI__builtin_ia32_cmpeqps:
case X86::BI__builtin_ia32_cmpeqsd:
- return interp__builtin_x86_cmpeq(S, OpPC, Frame, Call, BuiltinID);
-
+ case X86::BI__builtin_ia32_cmpeqpd:
+ case X86::BI__builtin_ia32_cmpgess:
case X86::BI__builtin_ia32_cmpgeps:
case X86::BI__builtin_ia32_cmpgepd:
- case X86::BI__builtin_ia32_cmpgess:
case X86::BI__builtin_ia32_cmpgesd:
- return interp__builtin_x86_cmpge(S, OpPC, Frame, Call, BuiltinID);
-
- case X86::BI__builtin_ia32_cmpgtps:
- case X86::BI__builtin_ia32_cmpgtpd:
case X86::BI__builtin_ia32_cmpgtss:
+ case X86::BI__builtin_ia32_cmpgtps:
case X86::BI__builtin_ia32_cmpgtsd:
- return interp__builtin_x86_cmpgt(S, OpPC, Frame, Call, BuiltinID);
-
+ case X86::BI__builtin_ia32_cmpgtpd:
+ case X86::BI__builtin_ia32_cmpless:
case X86::BI__builtin_ia32_cmpleps:
case X86::BI__builtin_ia32_cmplepd:
- case X86::BI__builtin_ia32_cmpless:
case X86::BI__builtin_ia32_cmplesd:
- return interp__builtin_x86_cmple(S, OpPC, Frame, Call, BuiltinID);
-
- case X86::BI__builtin_ia32_cmpltps:
- case X86::BI__builtin_ia32_cmpltpd:
case X86::BI__builtin_ia32_cmpltss:
+ case X86::BI__builtin_ia32_cmpltps:
case X86::BI__builtin_ia32_cmpltsd:
- return interp__builtin_x86_cmplt(S, OpPC, Frame, Call, BuiltinID);
-
- case X86::BI__builtin_ia32_cmpneqps:
- case X86::BI__builtin_ia32_cmpneqpd:
+ case X86::BI__builtin_ia32_cmpltpd:
case X86::BI__builtin_ia32_cmpneqss:
+ case X86::BI__builtin_ia32_cmpneqps:
case X86::BI__builtin_ia32_cmpneqsd:
- return interp__builtin_x86_cmpneq(S, OpPC, Frame, Call, BuiltinID);
-
+ case X86::BI__builtin_ia32_cmpneqpd:
+ case X86::BI__builtin_ia32_cmpngess:
case X86::BI__builtin_ia32_cmpngeps:
case X86::BI__builtin_ia32_cmpngepd:
- case X86::BI__builtin_ia32_cmpngess:
case X86::BI__builtin_ia32_cmpngesd:
- return interp__builtin_x86_cmpnge(S, OpPC, Frame, Call, BuiltinID);
-
- case X86::BI__builtin_ia32_cmpngtps:
- case X86::BI__builtin_ia32_cmpngtpd:
case X86::BI__builtin_ia32_cmpngtss:
+ case X86::BI__builtin_ia32_cmpngtps:
case X86::BI__builtin_ia32_cmpngtsd:
- return interp__builtin_x86_cmpngt(S, OpPC, Frame, Call, BuiltinID);
-
+ case X86::BI__builtin_ia32_cmpngtpd:
+ case X86::BI__builtin_ia32_cmpnless:
case X86::BI__builtin_ia32_cmpnleps:
case X86::BI__builtin_ia32_cmpnlepd:
- case X86::BI__builtin_ia32_cmpnless:
case X86::BI__builtin_ia32_cmpnlesd:
- return interp__builtin_x86_cmpnle(S, OpPC, Frame, Call, BuiltinID);
-
- case X86::BI__builtin_ia32_cmpnltps:
- case X86::BI__builtin_ia32_cmpnltpd:
case X86::BI__builtin_ia32_cmpnltss:
+ case X86::BI__builtin_ia32_cmpnltps:
case X86::BI__builtin_ia32_cmpnltsd:
- return interp__builtin_x86_cmpnlt(S, OpPC, Frame, Call, BuiltinID);
-
- case X86::BI__builtin_ia32_cmpordps:
- case X86::BI__builtin_ia32_cmpordpd:
+ case X86::BI__builtin_ia32_cmpnltpd:
case X86::BI__builtin_ia32_cmpordss:
+ case X86::BI__builtin_ia32_cmpordps:
case X86::BI__builtin_ia32_cmpordsd:
- return interp__builtin_x86_cmpord(S, OpPC, Frame, Call, BuiltinID);
-
- case X86::BI__builtin_ia32_cmpunordps:
- case X86::BI__builtin_ia32_cmpunordpd:
+ case X86::BI__builtin_ia32_cmpordpd:
case X86::BI__builtin_ia32_cmpunordss:
+ case X86::BI__builtin_ia32_cmpunordps:
case X86::BI__builtin_ia32_cmpunordsd:
- return interp__builtin_x86_cmpunord(S, OpPC, Frame, Call, BuiltinID);
+ case X86::BI__builtin_ia32_cmpunordpd:
+ return interp__builtin_x86_cmp(S, OpPC, Frame, Call, BuiltinID);
default:
S.FFDiag(S.Current->getLocation(OpPC),
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index bb17d50e62582..c096993e116f2 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -14422,66 +14422,7 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
case X86::BI__builtin_ia32_cmpps:
case X86::BI__builtin_ia32_cmppd:
case X86::BI__builtin_ia32_cmpps256:
- case X86::BI__builtin_ia32_cmppd256: {
- const Expr *A = E->getArg(0);
- const Expr *B = E->getArg(1);
- const Expr *Imm = E->getArg(2);
-
- APValue AV, BV;
- APSInt ImmVal;
- if (!EvaluateVector(A, AV, Info) || !EvaluateVector(B, BV, Info))
- return false;
- if (!EvaluateInteger(Imm, ImmVal, Info))
- return false;
-
- const auto NumLanes = AV.getVectorLength();
- if (NumLanes == 0 || BV.getVectorLength() != NumLanes)
- return false;
-
- const auto RetTy = E->getType();
- const auto *VT = RetTy->getAs<VectorType>();
- if (!VT)
- return false;
-
- const uint32_t ImmZExt = ImmVal.getZExtValue();
- const bool IsScalar = (BuiltinOp == X86::BI__builtin_ia32_cmpss) ||
- (BuiltinOp == X86::BI__builtin_ia32_cmpsd);
-
- SmallVector<APValue, 8> ResultElements;
- ResultElements.reserve(NumLanes);
- for (unsigned i = 0; i < NumLanes; ++i) {
- // Handle cmpss/cmpsd
- if (IsScalar && i > 0) {
- // Copy the upper 3 packed elements from a to the upper elements of dst
- ResultElements.push_back(AV.getVectorElt(i));
- continue;
- }
-
- const auto AElem = AV.getVectorElt(i);
- const auto BElem = BV.getVectorElt(i);
-
- const auto A0 = AElem.getFloat();
- const auto B0 = BElem.getFloat();
-
- const auto CompareResult = A0.compare(B0);
- const bool Matches = MatchesPredicate(ImmZExt, CompareResult);
-
- // Create bit patterns for comparison results:
- // True = all bits set (0xFFFFFFFF for float, 0xFFFFFFFFFFFFFFFF for double)
- // False = all bits zero
- const llvm::fltSemantics &Sem = A0.getSemantics();
- const unsigned BitWidth = llvm::APFloat::getSizeInBits(Sem);
- const llvm::APFloat True(Sem, llvm::APInt::getAllOnes(BitWidth));
- const llvm::APFloat False(Sem, llvm::APInt(BitWidth, 0));
-
- if (Matches)
- ResultElements.push_back(APValue(True));
- else
- ResultElements.push_back(APValue(False));
- }
-
- return Success(APValue(ResultElements.data(), ResultElements.size()), E);
- }
+ case X86::BI__builtin_ia32_cmppd256:
case X86::BI__builtin_ia32_cmpeqss:
case X86::BI__builtin_ia32_cmpeqsd:
case X86::BI__builtin_ia32_cmpeqps:
@@ -14530,104 +14471,140 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
case X86::BI__builtin_ia32_cmpunordsd:
case X86::BI__builtin_ia32_cmpunordps:
case X86::BI__builtin_ia32_cmpunordpd: {
- const Expr *A = E->getArg(0);
- const Expr *B = E->getArg(1);
-
APValue AV, BV;
- if (!EvaluateVector(A, AV, Info) || !EvaluateVector(B, BV, Info))
+ if (!EvaluateVector(E->getArg(0), AV, Info) ||
+ !EvaluateVector(E->getArg(1), BV, Info))
return false;
const auto NumLanes = AV.getVectorLength();
if (NumLanes == 0 || BV.getVectorLength() != NumLanes)
return false;
- const auto RetTy = E->getType();
- const auto *VT = RetTy->getAs<VectorType>();
- if (!VT)
- return false;
-
- const bool IsScalar = (BuiltinOp == X86::BI__builtin_ia32_cmpeqss) ||
- (BuiltinOp == X86::BI__builtin_ia32_cmpeqsd) ||
- (BuiltinOp == X86::BI__builtin_ia32_cmpgess) ||
- (BuiltinOp == X86::BI__builtin_ia32_cmpgesd) ||
- (BuiltinOp == X86::BI__builtin_ia32_cmpgtss) ||
- (BuiltinOp == X86::BI__builtin_ia32_cmpgtsd) ||
- (BuiltinOp == X86::BI__builtin_ia32_cmpltss) ||
- (BuiltinOp == X86::BI__builtin_ia32_cmpltsd) ||
- (BuiltinOp == X86::BI__builtin_ia32_cmpless) ||
- (BuiltinOp == X86::BI__builtin_ia32_cmplesd) ||
- (BuiltinOp == X86::BI__builtin_ia32_cmpneqss) ||
- (BuiltinOp == X86::BI__builtin_ia32_cmpneqsd) ||
- (BuiltinOp == X86::BI__builtin_ia32_cmpngess) ||
- (BuiltinOp == X86::BI__builtin_ia32_cmpngesd) ||
- (BuiltinOp == X86::BI__builtin_ia32_cmpngtss) ||
- (BuiltinOp == X86::BI__builtin_ia32_cmpngtsd) ||
- (BuiltinOp == X86::BI__builtin_ia32_cmpnless) ||
- (BuiltinOp == X86::BI__builtin_ia32_cmpnlesd) ||
- (BuiltinOp == X86::BI__builtin_ia32_cmpnltss) ||
- (BuiltinOp == X86::BI__builtin_ia32_cmpnltsd) ||
- (BuiltinOp == X86::BI__builtin_ia32_cmpordss) ||
- (BuiltinOp == X86::BI__builtin_ia32_cmpordsd) ||
- (BuiltinOp == X86::BI__builtin_ia32_cmpunordss) ||
- (BuiltinOp == X86::BI__builtin_ia32_cmpunordsd);
-
- // Select comparison predicate based on builtin
- uint32_t Imm = X86CmpImm::CMP_EQ_OQ;
- if ((BuiltinOp == X86::BI__builtin_ia32_cmpgess) ||
+ const bool HasImmArg = (BuiltinOp == X86::BI__builtin_ia32_cmpss) ||
+ (BuiltinOp == X86::BI__builtin_ia32_cmpsd) ||
+ (BuiltinOp == X86::BI__builtin_ia32_cmpps) ||
+ (BuiltinOp == X86::BI__builtin_ia32_cmppd) ||
+ (BuiltinOp == X86::BI__builtin_ia32_cmpps256) ||
+ (BuiltinOp == X86::BI__builtin_ia32_cmppd256);
+
+ const bool IsScalar =
+ (BuiltinOp == X86::BI__builtin_ia32_cmpss) ||
+ (BuiltinOp == X86::BI__builtin_ia32_cmpsd) ||
+ (BuiltinOp == X86::BI__builtin_ia32_cmpeqss) ||
+ (BuiltinOp == X86::BI__builtin_ia32_cmpeqsd) ||
+ (BuiltinOp == X86::BI__builtin_ia32_cmpgess) ||
(BuiltinOp == X86::BI__builtin_ia32_cmpgesd) ||
- (BuiltinOp == X86::BI__builtin_ia32_cmpgeps) ||
- (BuiltinOp == X86::BI__builtin_ia32_cmpgepd)) {
- Imm = X86CmpImm::CMP_GE_OS;
- } else if ((BuiltinOp == X86::BI__builtin_ia32_cmpgtss) ||
- (BuiltinOp == X86::BI__builtin_ia32_cmpgtsd) ||
- (BuiltinOp == X86::BI__builtin_ia32_cmpgtps) ||
- (BuiltinOp == X86::BI__builtin_ia32_cmpgtpd)) {
- Imm = X86CmpImm::CMP_GT_OS;
- } else if ((BuiltinOp == X86::BI__builtin_ia32_cmpltss) ||
- (BuiltinOp == X86::BI__builtin_ia32_cmpltsd) ||
- (BuiltinOp == X86::BI__builtin_ia32_cmpltps) ||
- (BuiltinOp == X86::BI__builtin_ia32_cmpltpd)) {
- Imm = X86CmpImm::CMP_LT_OS;
- } else if ((BuiltinOp == X86::BI__builtin_ia32_cmpless) ||
- (BuiltinOp == X86::BI__builtin_ia32_cmplesd) ||
- (BuiltinOp == X86::BI__builtin_ia32_cmpleps) ||
- (BuiltinOp == X86::BI__builtin_ia32_cmplepd)) {
- Imm = X86CmpImm::CMP_LE_OS;
- } else if ((BuiltinOp == X86::BI__builtin_ia32_cmpneqss) ||
- (BuiltinOp == X86::BI__builtin_ia32_cmpneqsd) ||
- (BuiltinOp == X86::BI__builtin_ia32_cmpneqps) ||
- (BuiltinOp == X86::BI__builtin_ia32_cmpneqpd)) {
- Imm = X86CmpImm::CMP_NEQ_UQ;
- } else if ((BuiltinOp == X86::BI__builtin_ia32_cmpngess) ||
- (BuiltinOp == X86::BI__builtin_ia32_cmpngesd) ||
- (BuiltinOp == X86::BI__builtin_ia32_cmpngeps) ||
- (BuiltinOp == X86::BI__builtin_ia32_cmpngepd)) {
- Imm = X86CmpImm::CMP_NGE_US;
- } else if ((BuiltinOp == X86::BI__builtin_ia32_cmpngtss) ||
- (BuiltinOp == X86::BI__builtin_ia32_cmpngtsd) ||
- (BuiltinOp == X86::BI__builtin_ia32_cmpngtps) ||
- (BuiltinOp == X86::BI__builtin_ia32_cmpngtpd)) {
- Imm = X86CmpImm::CMP_NGT_US;
- } else if ((BuiltinOp == X86::BI__builtin_ia32_cmpnless) ||
- (BuiltinOp == X86::BI__builtin_ia32_cmpnlesd) ||
- (BuiltinOp == X86::BI__builtin_ia32_cmpnleps) ||
- (BuiltinOp == X86::BI__builtin_ia32_cmpnlepd)) {
- Imm = X86CmpImm::CMP_NLE_US;
- } else if ((BuiltinOp == X86::BI__builtin_ia32_cmpnltss) ||
- (BuiltinOp == X86::BI__builtin_ia32_cmpnltsd) ||
- (BuiltinOp == X86::BI__builtin_ia32_cmpnltps) ||
- (BuiltinOp == X86::BI__builtin_ia32_cmpnltpd)) {
- Imm = X86CmpImm::CMP_NLT_US;
- } else if ((BuiltinOp == X86::BI__builtin_ia32_cmpordss) ||
- (BuiltinOp == X86::BI__builtin_ia32_cmpordsd) ||
- (BuiltinOp == X86::BI__builtin_ia32_cmpordps) ||
- (BuiltinOp == X86::BI__builtin_ia32_cmpordpd)) {
- Imm = X86CmpImm::CMP_ORD_Q;
- } else if ((BuiltinOp == X86::BI__builtin_ia32_cmpunordss) ||
- (BuiltinOp == X86::BI__builtin_ia32_cmpunordsd) ||
- (BuiltinOp == X86::BI__builtin_ia32_cmpunordps) ||
- (BuiltinOp == X86::BI__builtin_ia32_cmpunordpd)) {
- Imm = X86CmpImm::CMP_UNORD_Q;
+ (BuiltinOp == X86::BI__builtin_ia32_cmpgtss) ||
+ (BuiltinOp == X86::BI__builtin_ia32_cmpgtsd) ||
+ (BuiltinOp == X86::BI__builtin_ia32_cmpltss) ||
+ (BuiltinOp == X86::BI__builtin_ia32_cmpltsd) ||
+ (BuiltinOp == X86::BI__builtin_ia32_cmpless) ||
+ (BuiltinOp == X86::BI__builtin_ia32_cmplesd) ||
+ (BuiltinOp == X86::BI__builtin_ia32_cmpneqss) ||
+ (BuiltinOp == X86::BI__builtin_ia32_cmpneqsd) ||
+ (BuiltinOp == X86::BI__builtin_ia32_cmpngess) ||
+ (BuiltinOp == X86::BI__builtin_ia32_cmpngesd) ||
+ (BuiltinOp == X86::BI__builtin_ia32_cmpngtss) ||
+ (BuiltinOp == X86::BI__builtin_ia32_cmpngtsd) ||
+ (BuiltinOp == X86::BI__builtin_ia32_cmpnless) ||
+ (BuiltinOp == X86::BI__builtin_ia32_cmpnlesd) ||
+ (BuiltinOp == X86::BI__builtin_ia32_cmpnltss) ||
+ (BuiltinOp == X86::BI__builtin_ia32_cmpnltsd) ||
+ (BuiltinOp == X86::BI__builtin_ia32_cmpordss) ||
+ (BuiltinOp == X86::BI__builtin_ia32_cmpordsd) ||
+ (BuiltinOp == X86::BI__builtin_ia32_cmpunordss) ||
+ (BuiltinOp == X86::BI__builtin_ia32_cmpunordsd);
+
+ uint32_t Predicate = X86CmpImm::CMP_EQ_OQ;
+ if (HasImmArg) {
+ APSInt ImmVal;
+ if (!EvaluateInteger(E->getArg(2), ImmVal, Info))
+ return false;
+ Predicate = ImmVal.getZExtValue();
+ }
+
+ switch (BuiltinOp) {
+ case X86::BI__builtin_ia32_cmpss:
+ case X86::BI__builtin_ia32_cmpsd:
+ case X86::BI__builtin_ia32_cmpps:
+ case X86::BI__builtin_ia32_cmppd:
+ case X86::BI__builtin_ia32_cmpps256:
+ case X86::BI__builtin_ia32_cmppd256:
+ break;
+ case X86::BI__builtin_ia32_cmpeqss:
+ case X86::BI__builtin_ia32_cmpeqsd:
+ case X86::BI__builtin_ia32_cmpeqps:
+ case X86::BI__builtin_ia32_cmpeqpd:
+ Predicate = X86CmpImm::CMP_EQ_OQ;
+ break;
+ case X86::BI__builtin_ia32_cmpgess:
+ case X86::BI__builtin_ia32_cmpgesd:
+ case X86::BI__builtin_ia32_cmpgeps:
+ case X86::BI__builtin_ia32_cmpgepd:
+ Predicate = X86CmpImm::CMP_GE_OS;
+ break;
+ case X86::BI__builtin_ia32_cmpgtss:
+ case X86::BI__builtin_ia32_cmpgtsd:
+ case X86::BI__builtin_ia32_cmpgtps:
+ case X86::BI__builtin_ia32_cmpgtpd:
+ Predicate = X86CmpImm::CMP_GT_OS;
+ break;
+ case X86::BI__builtin_ia32_cmpltss:
+ case X86::BI__builtin_ia32_cmpltsd:
+ case X86::BI__builtin_ia32_cmpltps:
+ case X86::BI__builtin_ia32_cmpltpd:
+ Predicate = X86CmpImm::CMP_LT_OS;
+ break;
+ case X86::BI__builtin_ia32_cmpless:
+ case X86::BI__builtin_ia32_cmplesd:
+ case X86::BI__builtin_ia32_cmpleps:
+ case X86::BI__builtin_ia32_cmplepd:
+ Predicate = X86CmpImm::CMP_LE_OS;
+ break;
+ case X86::BI__builtin_ia32_cmpneqss:
+ case X86::BI__builtin_ia32_cmpneqsd:
+ case X86::BI__builtin_ia32_cmpneqps:
+ case X86::BI__builtin_ia32_cmpneqpd:
+ Predicate = X86CmpImm::CMP_NEQ_UQ;
+ break;
+ case X86::BI__builtin_ia32_cmpngess:
+ case X86::BI__builtin_ia32_cmpngesd:
+ case X86::BI__builtin_ia32_cmpngeps:
+ case X86::BI__builtin_ia32_cmpngepd:
+ Predicate = X86CmpImm::CMP_NGE_US;
+ break;
+ case X86::BI__builtin_ia32_cmpngtss:
+ case X86::BI__builtin_ia32_cmpngtsd:
+ case X86::BI__builtin_ia32_cmpngtps:
+ case X86::BI__builtin_ia32_cmpngtpd:
+ Predicate = X86CmpImm::CMP_NGT_US;
+ break;
+ case X86::BI__builtin_ia32_cmpnless:
+ case X86::BI__builtin_ia32_cmpnlesd:
+ case X86::BI__builtin_ia32_cmpnleps:
+ case X86::BI__builtin_ia32_cmpnlepd:
+ Predicate = X86CmpImm::CMP_NLE_US;
+ break;
+ case X86::BI__builtin_ia32_cmpnltss:
+ case X86::BI__builtin_ia32_cmpnltsd:
+ case X86::BI__builtin_ia32_cmpnltps:
+ case X86::BI__builtin_ia32_cmpnltpd:
+ Predicate = X86CmpImm::CMP_NLT_US;
+ break;
+ case X86::BI__builtin_ia32_cmpordss:
+ case X86::BI__builtin_ia32_cmpordsd:
+ case X86::BI__builtin_ia32_cmpordps:
+ case X86::BI__builtin_ia32_cmpordpd:
+ Predicate = X86CmpImm::CMP_ORD_Q;
+ break;
+ case X86::BI__builtin_ia32_cmpunordss:
+ case X86::BI__builtin_ia32_cmpunordsd:
+ case X86::BI__builtin_ia32_cmpunordps:
+ case X86::BI__builtin_ia32_cmpunordpd:
+ Predicate = X86CmpImm::CMP_UNORD_Q;
+ break;
+ default:
+ llvm_unreachable("unhandled x86 cmp builtin");
}
SmallVector<APValue, 8> ResultElements;
@@ -14647,7 +14624,7 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
const auto B0 = BElem.getFloat();
const auto CompareResult = A0.compare(B0);
- const bool Matches = MatchesPredicate(Imm, CompareResult);
+ const bool Matches = MatchesPredicate(Predicate, CompareResult);
// Create bit patterns for comparison results:
// True = all bits set (0xFFFFFFFF for float, 0xFFFFFFFFFFFFFFFF for double)
@@ -17460,6 +17437,95 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E,
return ((A & B) != 0) && ((~A & B) != 0);
});
}
+
+ case X86::BI__builtin_ia32_comieq:
+ case X86::BI__builtin_ia32_comilt:
+ case X86::BI__builtin_ia32_comile:
+ case X86::BI__builtin_ia32_comigt:
+ case X86::BI__builtin_ia32_comige:
+ case X86::BI__builtin_ia32_comineq:
+ case X86::BI__builtin_ia32_ucomieq:
+ case X86::BI__builtin_ia32_ucomilt:
+ case X86::BI__builtin_ia32_ucomile:
+ case X86::BI__builtin_ia32_ucomigt:
+ case X86::BI__builtin_ia32_ucomige:
+ case X86::BI__builtin_ia32_ucomineq:
+ case X86::BI__builtin_ia32_comisdeq:
+ case X86::BI__builtin_ia32_comisdlt:
+ case X86::BI__builtin_ia32_comisdle:
+ case X86::BI__builtin_ia32_comisdgt:
+ case X86::BI__builtin_ia32_comisdge:
+ case X86::BI__builtin_ia32_comisdneq:
+ case X86::BI__builtin_ia32_ucomisdeq:
+ case X86::BI__builtin_ia32_ucomisdlt:
+ case X86::BI__builtin_ia32_ucomisdle:
+ case X86::BI__builtin_ia32_ucomisdgt:
+ case X86::BI__builtin_ia32_ucomisdge:
+ case X86::BI__builtin_ia32_ucomisdneq:
+ case X86::BI__builtin_ia32_vcomish: {
+ APValue AV, BV;
+ if (!EvaluateVector(E->getArg(0), AV, Info) ||
+ !EvaluateVector(E->getArg(1), BV, Info))
+ return false;
+
+ if (AV.getVectorLength() == 0 || BV.getVectorLength() != AV.getVectorLength())
+ return false;
+
+ uint32_t Predicate;
+ switch (BuiltinOp) {
+ case X86::BI__builtin_ia32_comieq:
+ case X86::BI__builtin_ia32_ucomieq:
+ case X86::BI__builtin_ia32_comisdeq:
+ case X86::BI__builtin_ia32_ucomisdeq:
+ Predicate = X86CmpImm::CMP_EQ_OQ;
+ break;
+ case X86::BI__builtin_ia32_comilt:
+ case X86::BI__builtin_ia32_ucomilt:
+ case X86::BI__builtin_ia32_comisdlt:
+ case X86::BI__builtin_ia32_ucomisdlt:
+ Predicate = X86CmpImm::CMP_LT_OQ;
+ break;
+ case X86::BI__builtin_ia32_comile:
+ case X86::BI__builtin_ia32_ucomile:
+ case X86::BI__builtin_ia32_comisdle:
+ case X86::BI__builtin_ia32_ucomisdle:
+ Predicate = X86CmpImm::CMP_LE_OQ;
+ break;
+ case X86::BI__builtin_ia32_comigt:
+ case X86::BI__builtin_ia32_ucomigt:
+ case X86::BI__builtin_ia32_comisdgt:
+ case X86::BI__builtin_ia32_ucomisdgt:
+ Predicate = X86CmpImm::CMP_GT_OQ;
+ break;
+ case X86::BI__builtin_ia32_comige:
+ case X86::BI__builtin_ia32_ucomige:
+ case X86::BI__builtin_ia32_comisdge:
+ case X86::BI__builtin_ia32_ucomisdge:
+ Predicate = X86CmpImm::CMP_GE_OQ;
+ break;
+ case X86::BI__builtin_ia32_comineq:
+ case X86::BI__builtin_ia32_ucomineq:
+ case X86::BI__builtin_ia32_comisdneq:
+ case X86::BI__builtin_ia32_ucomisdneq:
+ Predicate = X86CmpImm::CMP_NEQ_UQ;
+ break;
+ case X86::BI__builtin_ia32_vcomish: {
+ APSInt Imm;
+ if (!EvaluateInteger(E->getArg(2), Imm, Info))
+ return false;
+ Predicate = Imm.getZExtValue();
+ break;
+ }
+ default:
+ llvm_unreachable("unhandled x86 comi builtin");
+ }
+
+ const APFloat A = AV.getVectorElt(0).getFloat();
+ const APFloat B = BV.getVectorElt(0).getFloat();
+ const bool Matches = MatchesPredicate(Predicate, A.compare(B));
+ return Success(Info.Ctx.MakeIntValue(Matches, E->getType()), E);
+ }
+
case X86::BI__builtin_ia32_kandqi:
case X86::BI__builtin_ia32_kandhi:
case X86::BI__builtin_ia32_kandsi:
diff --git a/clang/lib/Headers/avx512fp16intrin.h b/clang/lib/Headers/avx512fp16intrin.h
index 9a1d1930f66b6..dbf83e8ae2704 100644
--- a/clang/lib/Headers/avx512fp16intrin.h
+++ b/clang/lib/Headers/avx512fp16intrin.h
@@ -299,74 +299,74 @@ _mm512_zextph256_ph512(__m256h __a) {
#define _mm_comi_sh(A, B, pred) \
_mm_comi_round_sh((A), (B), (pred), _MM_FROUND_CUR_DIRECTION)
-static __inline__ int __DEFAULT_FN_ATTRS128 _mm_comieq_sh(__m128h __A,
- __m128h __B) {
+static __inline__ int __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_comieq_sh(__m128h __A,
+ __m128h __B) {
return __builtin_ia32_vcomish((__v8hf)__A, (__v8hf)__B, _CMP_EQ_OS,
_MM_FROUND_CUR_DIRECTION);
}
-static __inline__ int __DEFAULT_FN_ATTRS128 _mm_comilt_sh(__m128h __A,
- __m128h __B) {
+static __inline__ int __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_comilt_sh(__m128h __A,
+ __m128h __B) {
return __builtin_ia32_vcomish((__v8hf)__A, (__v8hf)__B, _CMP_LT_OS,
_MM_FROUND_CUR_DIRECTION);
}
-static __inline__ int __DEFAULT_FN_ATTRS128 _mm_comile_sh(__m128h __A,
- __m128h __B) {
+static __inline__ int __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_comile_sh(__m128h __A,
+ __m128h __B) {
return __builtin_ia32_vcomish((__v8hf)__A, (__v8hf)__B, _CMP_LE_OS,
_MM_FROUND_CUR_DIRECTION);
}
-static __inline__ int __DEFAULT_FN_ATTRS128 _mm_comigt_sh(__m128h __A,
- __m128h __B) {
+static __inline__ int __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_comigt_sh(__m128h __A,
+ __m128h __B) {
return __builtin_ia32_vcomish((__v8hf)__A, (__v8hf)__B, _CMP_GT_OS,
_MM_FROUND_CUR_DIRECTION);
}
-static __inline__ int __DEFAULT_FN_ATTRS128 _mm_comige_sh(__m128h __A,
- __m128h __B) {
+static __inline__ int __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_comige_sh(__m128h __A,
+ __m128h __B) {
return __builtin_ia32_vcomish((__v8hf)__A, (__v8hf)__B, _CMP_GE_OS,
_MM_FROUND_CUR_DIRECTION);
}
-static __inline__ int __DEFAULT_FN_ATTRS128 _mm_comineq_sh(__m128h __A,
- __m128h __B) {
+static __inline__ int __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_comineq_sh(__m128h __A,
+ __m128h __B) {
return __builtin_ia32_vcomish((__v8hf)__A, (__v8hf)__B, _CMP_NEQ_US,
_MM_FROUND_CUR_DIRECTION);
}
-static __inline__ int __DEFAULT_FN_ATTRS128 _mm_ucomieq_sh(__m128h __A,
- __m128h __B) {
+static __inline__ int __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_ucomieq_sh(__m128h __A,
+ __m128h __B) {
return __builtin_ia32_vcomish((__v8hf)__A, (__v8hf)__B, _CMP_EQ_OQ,
_MM_FROUND_CUR_DIRECTION);
}
-static __inline__ int __DEFAULT_FN_ATTRS128 _mm_ucomilt_sh(__m128h __A,
- __m128h __B) {
+static __inline__ int __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_ucomilt_sh(__m128h __A,
+ __m128h __B) {
return __builtin_ia32_vcomish((__v8hf)__A, (__v8hf)__B, _CMP_LT_OQ,
_MM_FROUND_CUR_DIRECTION);
}
-static __inline__ int __DEFAULT_FN_ATTRS128 _mm_ucomile_sh(__m128h __A,
- __m128h __B) {
+static __inline__ int __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_ucomile_sh(__m128h __A,
+ __m128h __B) {
return __builtin_ia32_vcomish((__v8hf)__A, (__v8hf)__B, _CMP_LE_OQ,
_MM_FROUND_CUR_DIRECTION);
}
-static __inline__ int __DEFAULT_FN_ATTRS128 _mm_ucomigt_sh(__m128h __A,
- __m128h __B) {
+static __inline__ int __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_ucomigt_sh(__m128h __A,
+ __m128h __B) {
return __builtin_ia32_vcomish((__v8hf)__A, (__v8hf)__B, _CMP_GT_OQ,
_MM_FROUND_CUR_DIRECTION);
}
-static __inline__ int __DEFAULT_FN_ATTRS128 _mm_ucomige_sh(__m128h __A,
- __m128h __B) {
+static __inline__ int __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_ucomige_sh(__m128h __A,
+ __m128h __B) {
return __builtin_ia32_vcomish((__v8hf)__A, (__v8hf)__B, _CMP_GE_OQ,
_MM_FROUND_CUR_DIRECTION);
}
-static __inline__ int __DEFAULT_FN_ATTRS128 _mm_ucomineq_sh(__m128h __A,
- __m128h __B) {
+static __inline__ int __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_ucomineq_sh(__m128h __A,
+ __m128h __B) {
return __builtin_ia32_vcomish((__v8hf)__A, (__v8hf)__B, _CMP_NEQ_UQ,
_MM_FROUND_CUR_DIRECTION);
}
diff --git a/clang/lib/Headers/emmintrin.h b/clang/lib/Headers/emmintrin.h
index c29c7d830de18..935849a372722 100644
--- a/clang/lib/Headers/emmintrin.h
+++ b/clang/lib/Headers/emmintrin.h
@@ -998,8 +998,8 @@ static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cmpnge_sd(__m128d __a
/// A 128-bit vector of [2 x double]. The lower double-precision value is
/// compared to the lower double-precision value of \a __a.
/// \returns An integer containing the comparison results.
-static __inline__ int __DEFAULT_FN_ATTRS _mm_comieq_sd(__m128d __a,
- __m128d __b) {
+static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR _mm_comieq_sd(__m128d __a,
+ __m128d __b) {
return __builtin_ia32_comisdeq((__v2df)__a, (__v2df)__b);
}
@@ -1022,8 +1022,8 @@ static __inline__ int __DEFAULT_FN_ATTRS _mm_comieq_sd(__m128d __a,
/// A 128-bit vector of [2 x double]. The lower double-precision value is
/// compared to the lower double-precision value of \a __a.
/// \returns An integer containing the comparison results.
-static __inline__ int __DEFAULT_FN_ATTRS _mm_comilt_sd(__m128d __a,
- __m128d __b) {
+static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR _mm_comilt_sd(__m128d __a,
+ __m128d __b) {
return __builtin_ia32_comisdlt((__v2df)__a, (__v2df)__b);
}
@@ -1046,8 +1046,8 @@ static __inline__ int __DEFAULT_FN_ATTRS _mm_comilt_sd(__m128d __a,
/// A 128-bit vector of [2 x double]. The lower double-precision value is
/// compared to the lower double-precision value of \a __a.
/// \returns An integer containing the comparison results.
-static __inline__ int __DEFAULT_FN_ATTRS _mm_comile_sd(__m128d __a,
- __m128d __b) {
+static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR _mm_comile_sd(__m128d __a,
+ __m128d __b) {
return __builtin_ia32_comisdle((__v2df)__a, (__v2df)__b);
}
@@ -1070,8 +1070,8 @@ static __inline__ int __DEFAULT_FN_ATTRS _mm_comile_sd(__m128d __a,
/// A 128-bit vector of [2 x double]. The lower double-precision value is
/// compared to the lower double-precision value of \a __a.
/// \returns An integer containing the comparison results.
-static __inline__ int __DEFAULT_FN_ATTRS _mm_comigt_sd(__m128d __a,
- __m128d __b) {
+static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR _mm_comigt_sd(__m128d __a,
+ __m128d __b) {
return __builtin_ia32_comisdgt((__v2df)__a, (__v2df)__b);
}
@@ -1094,8 +1094,8 @@ static __inline__ int __DEFAULT_FN_ATTRS _mm_comigt_sd(__m128d __a,
/// A 128-bit vector of [2 x double]. The lower double-precision value is
/// compared to the lower double-precision value of \a __a.
/// \returns An integer containing the comparison results.
-static __inline__ int __DEFAULT_FN_ATTRS _mm_comige_sd(__m128d __a,
- __m128d __b) {
+static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR _mm_comige_sd(__m128d __a,
+ __m128d __b) {
return __builtin_ia32_comisdge((__v2df)__a, (__v2df)__b);
}
@@ -1118,8 +1118,8 @@ static __inline__ int __DEFAULT_FN_ATTRS _mm_comige_sd(__m128d __a,
/// A 128-bit vector of [2 x double]. The lower double-precision value is
/// compared to the lower double-precision value of \a __a.
/// \returns An integer containing the comparison results.
-static __inline__ int __DEFAULT_FN_ATTRS _mm_comineq_sd(__m128d __a,
- __m128d __b) {
+static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR _mm_comineq_sd(__m128d __a,
+ __m128d __b) {
return __builtin_ia32_comisdneq((__v2df)__a, (__v2df)__b);
}
@@ -1140,8 +1140,8 @@ static __inline__ int __DEFAULT_FN_ATTRS _mm_comineq_sd(__m128d __a,
/// A 128-bit vector of [2 x double]. The lower double-precision value is
/// compared to the lower double-precision value of \a __a.
/// \returns An integer containing the comparison results.
-static __inline__ int __DEFAULT_FN_ATTRS _mm_ucomieq_sd(__m128d __a,
- __m128d __b) {
+static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR _mm_ucomieq_sd(__m128d __a,
+ __m128d __b) {
return __builtin_ia32_ucomisdeq((__v2df)__a, (__v2df)__b);
}
@@ -1164,8 +1164,8 @@ static __inline__ int __DEFAULT_FN_ATTRS _mm_ucomieq_sd(__m128d __a,
/// A 128-bit vector of [2 x double]. The lower double-precision value is
/// compared to the lower double-precision value of \a __a.
/// \returns An integer containing the comparison results.
-static __inline__ int __DEFAULT_FN_ATTRS _mm_ucomilt_sd(__m128d __a,
- __m128d __b) {
+static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR _mm_ucomilt_sd(__m128d __a,
+ __m128d __b) {
return __builtin_ia32_ucomisdlt((__v2df)__a, (__v2df)__b);
}
@@ -1188,8 +1188,8 @@ static __inline__ int __DEFAULT_FN_ATTRS _mm_ucomilt_sd(__m128d __a,
/// A 128-bit vector of [2 x double]. The lower double-precision value is
/// compared to the lower double-precision value of \a __a.
/// \returns An integer containing the comparison results.
-static __inline__ int __DEFAULT_FN_ATTRS _mm_ucomile_sd(__m128d __a,
- __m128d __b) {
+static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR _mm_ucomile_sd(__m128d __a,
+ __m128d __b) {
return __builtin_ia32_ucomisdle((__v2df)__a, (__v2df)__b);
}
@@ -1212,8 +1212,8 @@ static __inline__ int __DEFAULT_FN_ATTRS _mm_ucomile_sd(__m128d __a,
/// A 128-bit vector of [2 x double]. The lower double-precision value is
/// compared to the lower double-precision value of \a __a.
/// \returns An integer containing the comparison results.
-static __inline__ int __DEFAULT_FN_ATTRS _mm_ucomigt_sd(__m128d __a,
- __m128d __b) {
+static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR _mm_ucomigt_sd(__m128d __a,
+ __m128d __b) {
return __builtin_ia32_ucomisdgt((__v2df)__a, (__v2df)__b);
}
@@ -1236,8 +1236,8 @@ static __inline__ int __DEFAULT_FN_ATTRS _mm_ucomigt_sd(__m128d __a,
/// A 128-bit vector of [2 x double]. The lower double-precision value is
/// compared to the lower double-precision value of \a __a.
/// \returns An integer containing the comparison results.
-static __inline__ int __DEFAULT_FN_ATTRS _mm_ucomige_sd(__m128d __a,
- __m128d __b) {
+static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR _mm_ucomige_sd(__m128d __a,
+ __m128d __b) {
return __builtin_ia32_ucomisdge((__v2df)__a, (__v2df)__b);
}
@@ -1260,8 +1260,8 @@ static __inline__ int __DEFAULT_FN_ATTRS _mm_ucomige_sd(__m128d __a,
/// A 128-bit vector of [2 x double]. The lower double-precision value is
/// compared to the lower double-precision value of \a __a.
/// \returns An integer containing the comparison result.
-static __inline__ int __DEFAULT_FN_ATTRS _mm_ucomineq_sd(__m128d __a,
- __m128d __b) {
+static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR _mm_ucomineq_sd(__m128d __a,
+ __m128d __b) {
return __builtin_ia32_ucomisdneq((__v2df)__a, (__v2df)__b);
}
diff --git a/clang/lib/Headers/xmmintrin.h b/clang/lib/Headers/xmmintrin.h
index ae595291f888a..7b14c868241d3 100644
--- a/clang/lib/Headers/xmmintrin.h
+++ b/clang/lib/Headers/xmmintrin.h
@@ -1095,7 +1095,7 @@ _mm_cmpunord_ps(__m128 __a, __m128 __b)
/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are
/// used in the comparison.
/// \returns An integer containing the comparison results.
-static __inline__ int __DEFAULT_FN_ATTRS
+static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR
_mm_comieq_ss(__m128 __a, __m128 __b)
{
return __builtin_ia32_comieq((__v4sf)__a, (__v4sf)__b);
@@ -1120,7 +1120,7 @@ _mm_comieq_ss(__m128 __a, __m128 __b)
/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are
/// used in the comparison.
/// \returns An integer containing the comparison results.
-static __inline__ int __DEFAULT_FN_ATTRS
+static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR
_mm_comilt_ss(__m128 __a, __m128 __b)
{
return __builtin_ia32_comilt((__v4sf)__a, (__v4sf)__b);
@@ -1144,7 +1144,7 @@ _mm_comilt_ss(__m128 __a, __m128 __b)
/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are
/// used in the comparison.
/// \returns An integer containing the comparison results.
-static __inline__ int __DEFAULT_FN_ATTRS
+static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR
_mm_comile_ss(__m128 __a, __m128 __b)
{
return __builtin_ia32_comile((__v4sf)__a, (__v4sf)__b);
@@ -1168,7 +1168,7 @@ _mm_comile_ss(__m128 __a, __m128 __b)
/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are
/// used in the comparison.
/// \returns An integer containing the comparison results.
-static __inline__ int __DEFAULT_FN_ATTRS
+static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR
_mm_comigt_ss(__m128 __a, __m128 __b)
{
return __builtin_ia32_comigt((__v4sf)__a, (__v4sf)__b);
@@ -1192,7 +1192,7 @@ _mm_comigt_ss(__m128 __a, __m128 __b)
/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are
/// used in the comparison.
/// \returns An integer containing the comparison results.
-static __inline__ int __DEFAULT_FN_ATTRS
+static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR
_mm_comige_ss(__m128 __a, __m128 __b)
{
return __builtin_ia32_comige((__v4sf)__a, (__v4sf)__b);
@@ -1216,7 +1216,7 @@ _mm_comige_ss(__m128 __a, __m128 __b)
/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are
/// used in the comparison.
/// \returns An integer containing the comparison results.
-static __inline__ int __DEFAULT_FN_ATTRS
+static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR
_mm_comineq_ss(__m128 __a, __m128 __b)
{
return __builtin_ia32_comineq((__v4sf)__a, (__v4sf)__b);
@@ -1239,7 +1239,7 @@ _mm_comineq_ss(__m128 __a, __m128 __b)
/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are
/// used in the comparison.
/// \returns An integer containing the comparison results.
-static __inline__ int __DEFAULT_FN_ATTRS
+static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR
_mm_ucomieq_ss(__m128 __a, __m128 __b)
{
return __builtin_ia32_ucomieq((__v4sf)__a, (__v4sf)__b);
@@ -1263,7 +1263,7 @@ _mm_ucomieq_ss(__m128 __a, __m128 __b)
/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are
/// used in the comparison.
/// \returns An integer containing the comparison results.
-static __inline__ int __DEFAULT_FN_ATTRS
+static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR
_mm_ucomilt_ss(__m128 __a, __m128 __b)
{
return __builtin_ia32_ucomilt((__v4sf)__a, (__v4sf)__b);
@@ -1287,7 +1287,7 @@ _mm_ucomilt_ss(__m128 __a, __m128 __b)
/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are
/// used in the comparison.
/// \returns An integer containing the comparison results.
-static __inline__ int __DEFAULT_FN_ATTRS
+static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR
_mm_ucomile_ss(__m128 __a, __m128 __b)
{
return __builtin_ia32_ucomile((__v4sf)__a, (__v4sf)__b);
@@ -1311,7 +1311,7 @@ _mm_ucomile_ss(__m128 __a, __m128 __b)
/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are
/// used in the comparison.
/// \returns An integer containing the comparison results.
-static __inline__ int __DEFAULT_FN_ATTRS
+static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR
_mm_ucomigt_ss(__m128 __a, __m128 __b)
{
return __builtin_ia32_ucomigt((__v4sf)__a, (__v4sf)__b);
@@ -1335,7 +1335,7 @@ _mm_ucomigt_ss(__m128 __a, __m128 __b)
/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are
/// used in the comparison.
/// \returns An integer containing the comparison results.
-static __inline__ int __DEFAULT_FN_ATTRS
+static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR
_mm_ucomige_ss(__m128 __a, __m128 __b)
{
return __builtin_ia32_ucomige((__v4sf)__a, (__v4sf)__b);
@@ -1358,7 +1358,7 @@ _mm_ucomige_ss(__m128 __a, __m128 __b)
/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are
/// used in the comparison.
/// \returns An integer containing the comparison results.
-static __inline__ int __DEFAULT_FN_ATTRS
+static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR
_mm_ucomineq_ss(__m128 __a, __m128 __b)
{
return __builtin_ia32_ucomineq((__v4sf)__a, (__v4sf)__b);
diff --git a/clang/test/CodeGen/X86/avx512fp16-builtins.c b/clang/test/CodeGen/X86/avx512fp16-builtins.c
index dbd24d0899c60..51b01329d1b85 100644
--- a/clang/test/CodeGen/X86/avx512fp16-builtins.c
+++ b/clang/test/CodeGen/X86/avx512fp16-builtins.c
@@ -421,89 +421,162 @@ __m512h test_mm512_zextph256_ph512(__m256h __a) {
}
TEST_CONSTEXPR(match_m512h(_mm512_zextph256_ph512((__m256h){1.0f16, 2.0f16, 3.0f16, 4.0f16, 5.0f16, 6.0f16, 7.0f16, 8.0f16, 9.0f16, 10.0f16, 11.0f16, 12.0f16, 13.0f16, 14.0f16, 15.0f16, 16.0f16}), 1.0f16, 2.0f16, 3.0f16, 4.0f16, 5.0f16, 6.0f16, 7.0f16, 8.0f16, 9.0f16, 10.0f16, 11.0f16, 12.0f16, 13.0f16, 14.0f16, 15.0f16, 16.0f16, 0.0f16, 0.0f16, 0.0f16, 0.0f16, 0.0f16, 0.0f16, 0.0f16, 0.0f16, 0.0f16, 0.0f16, 0.0f16, 0.0f16, 0.0f16, 0.0f16, 0.0f16, 0.0f16));
-int test_mm_comi_round_sh(__m128h __A, __m128h __B) {
- // CHECK-LABEL: test_mm_comi_round_sh
- // CHECK: @llvm.x86.avx512fp16.vcomi.sh(<8 x half> %{{.}}, <8 x half> %{{.}}, i32 0, i32 8)
- return _mm_comi_round_sh(__A, __B, 0, _MM_FROUND_NO_EXC);
-}
-
int test_mm_comi_sh(__m128h __A, __m128h __B) {
// CHECK-LABEL: test_mm_comi_sh
// CHECK: @llvm.x86.avx512fp16.vcomi.sh(<8 x half> %{{.}}, <8 x half> %{{.}}, i32 0, i32 4)
return _mm_comi_sh(__A, __B, 0);
}
+#define HNAN ((_Float16)__builtin_nanf(""))
+#define COMI_SH_CASE(FN, A0, B0, EXPECT) \
+ TEST_CONSTEXPR(FN((__m128h){A0, +0.0f16, +0.0f16, +0.0f16, +0.0f16, +0.0f16, \
+ +0.0f16, +0.0f16}, \
+ (__m128h){B0, +1.0f16, +1.0f16, +1.0f16, +1.0f16, +1.0f16, \
+ +1.0f16, +1.0f16}) == (EXPECT));
+#define COMI_SH_A(X) \
+ ((__m128h){X, +0.0f16, +0.0f16, +0.0f16, +0.0f16, +0.0f16, +0.0f16, +0.0f16})
+#define COMI_SH_B(X) \
+ ((__m128h){X, +1.0f16, +1.0f16, +1.0f16, +1.0f16, +1.0f16, +1.0f16, +1.0f16})
+#define COMI_SH_PRED_CASE(A0, B0, PRED, EXPECT) \
+ TEST_CONSTEXPR(_mm_comi_sh(COMI_SH_A(A0), COMI_SH_B(B0), PRED) == (EXPECT));
+
+COMI_SH_PRED_CASE(+2.0f16, +2.0f16, _CMP_EQ_OQ, 1)
+COMI_SH_PRED_CASE(+1.0f16, +2.0f16, _CMP_EQ_OQ, 0)
+COMI_SH_PRED_CASE(HNAN, +2.0f16, _CMP_EQ_OQ, 0)
+
+COMI_SH_PRED_CASE(+1.0f16, +2.0f16, _CMP_LT_OQ, 1)
+COMI_SH_PRED_CASE(+2.0f16, +2.0f16, _CMP_LT_OQ, 0)
+COMI_SH_PRED_CASE(HNAN, +2.0f16, _CMP_LT_OQ, 0)
+
+COMI_SH_PRED_CASE(+2.0f16, +2.0f16, _CMP_LE_OQ, 1)
+COMI_SH_PRED_CASE(+3.0f16, +2.0f16, _CMP_LE_OQ, 0)
+COMI_SH_PRED_CASE(HNAN, +2.0f16, _CMP_LE_OQ, 0)
+
+COMI_SH_PRED_CASE(+3.0f16, +2.0f16, _CMP_GE_OQ, 1)
+COMI_SH_PRED_CASE(+1.0f16, +2.0f16, _CMP_GE_OQ, 0)
+COMI_SH_PRED_CASE(HNAN, +2.0f16, _CMP_GE_OQ, 0)
+
+COMI_SH_PRED_CASE(+3.0f16, +2.0f16, _CMP_GT_OQ, 1)
+COMI_SH_PRED_CASE(+2.0f16, +2.0f16, _CMP_GT_OQ, 0)
+COMI_SH_PRED_CASE(HNAN, +2.0f16, _CMP_GT_OQ, 0)
+
+COMI_SH_PRED_CASE(+1.0f16, +2.0f16, _CMP_NEQ_UQ, 1)
+COMI_SH_PRED_CASE(+2.0f16, +2.0f16, _CMP_NEQ_UQ, 0)
+COMI_SH_PRED_CASE(HNAN, +2.0f16, _CMP_NEQ_UQ, 1)
+
int test_mm_comieq_sh(__m128h __A, __m128h __B) {
// CHECK-LABEL: test_mm_comieq_sh
// CHECK: @llvm.x86.avx512fp16.vcomi.sh(<8 x half> %{{.}}, <8 x half> %{{.}}, i32 16, i32 4)
return _mm_comieq_sh(__A, __B);
}
+COMI_SH_CASE(_mm_comieq_sh, +2.0f16, +2.0f16, 1)
+COMI_SH_CASE(_mm_comieq_sh, +1.0f16, +2.0f16, 0)
+COMI_SH_CASE(_mm_comieq_sh, HNAN, +2.0f16, 0)
int test_mm_comilt_sh(__m128h __A, __m128h __B) {
// CHECK-LABEL: test_mm_comilt_sh
// CHECK: @llvm.x86.avx512fp16.vcomi.sh(<8 x half> %{{.}}, <8 x half> %{{.}}, i32 1, i32 4)
return _mm_comilt_sh(__A, __B);
}
+COMI_SH_CASE(_mm_comilt_sh, +1.0f16, +2.0f16, 1)
+COMI_SH_CASE(_mm_comilt_sh, +2.0f16, +2.0f16, 0)
+COMI_SH_CASE(_mm_comilt_sh, HNAN, +2.0f16, 0)
int test_mm_comile_sh(__m128h __A, __m128h __B) {
// CHECK-LABEL: test_mm_comile_sh
// CHECK: @llvm.x86.avx512fp16.vcomi.sh(<8 x half> %{{.}}, <8 x half> %{{.}}, i32 2, i32 4)
return _mm_comile_sh(__A, __B);
}
+COMI_SH_CASE(_mm_comile_sh, +2.0f16, +2.0f16, 1)
+COMI_SH_CASE(_mm_comile_sh, +3.0f16, +2.0f16, 0)
+COMI_SH_CASE(_mm_comile_sh, HNAN, +2.0f16, 0)
int test_mm_comigt_sh(__m128h __A, __m128h __B) {
// CHECK-LABEL: test_mm_comigt_sh
// CHECK: @llvm.x86.avx512fp16.vcomi.sh(<8 x half> %{{.}}, <8 x half> %{{.}}, i32 14, i32 4)
return _mm_comigt_sh(__A, __B);
}
+COMI_SH_CASE(_mm_comigt_sh, +3.0f16, +2.0f16, 1)
+COMI_SH_CASE(_mm_comigt_sh, +2.0f16, +2.0f16, 0)
+COMI_SH_CASE(_mm_comigt_sh, HNAN, +2.0f16, 0)
int test_mm_comige_sh(__m128h __A, __m128h __B) {
// CHECK-LABEL: test_mm_comige_sh
// CHECK: @llvm.x86.avx512fp16.vcomi.sh(<8 x half> %{{.}}, <8 x half> %{{.}}, i32 13, i32 4)
return _mm_comige_sh(__A, __B);
}
+COMI_SH_CASE(_mm_comige_sh, +3.0f16, +2.0f16, 1)
+COMI_SH_CASE(_mm_comige_sh, +1.0f16, +2.0f16, 0)
+COMI_SH_CASE(_mm_comige_sh, HNAN, +2.0f16, 0)
int test_mm_comineq_sh(__m128h __A, __m128h __B) {
// CHECK-LABEL: test_mm_comineq_sh
// CHECK: @llvm.x86.avx512fp16.vcomi.sh(<8 x half> %{{.}}, <8 x half> %{{.}}, i32 20, i32 4)
return _mm_comineq_sh(__A, __B);
}
+COMI_SH_CASE(_mm_comineq_sh, +1.0f16, +2.0f16, 1)
+COMI_SH_CASE(_mm_comineq_sh, +2.0f16, +2.0f16, 0)
+COMI_SH_CASE(_mm_comineq_sh, HNAN, +2.0f16, 1)
int test_mm_ucomieq_sh(__m128h __A, __m128h __B) {
// CHECK-LABEL: test_mm_ucomieq_sh
// CHECK: @llvm.x86.avx512fp16.vcomi.sh(<8 x half> %{{.}}, <8 x half> %{{.}}, i32 0, i32 4)
return _mm_ucomieq_sh(__A, __B);
}
+COMI_SH_CASE(_mm_ucomieq_sh, +2.0f16, +2.0f16, 1)
+COMI_SH_CASE(_mm_ucomieq_sh, +1.0f16, +2.0f16, 0)
+COMI_SH_CASE(_mm_ucomieq_sh, HNAN, +2.0f16, 0)
int test_mm_ucomilt_sh(__m128h __A, __m128h __B) {
// CHECK-LABEL: test_mm_ucomilt_sh
// CHECK: @llvm.x86.avx512fp16.vcomi.sh(<8 x half> %{{.}}, <8 x half> %{{.}}, i32 17, i32 4)
return _mm_ucomilt_sh(__A, __B);
}
+COMI_SH_CASE(_mm_ucomilt_sh, +1.0f16, +2.0f16, 1)
+COMI_SH_CASE(_mm_ucomilt_sh, +2.0f16, +2.0f16, 0)
+COMI_SH_CASE(_mm_ucomilt_sh, HNAN, +2.0f16, 0)
int test_mm_ucomile_sh(__m128h __A, __m128h __B) {
// CHECK-LABEL: test_mm_ucomile_sh
// CHECK: @llvm.x86.avx512fp16.vcomi.sh(<8 x half> %{{.}}, <8 x half> %{{.}}, i32 18, i32 4)
return _mm_ucomile_sh(__A, __B);
}
+COMI_SH_CASE(_mm_ucomile_sh, +2.0f16, +2.0f16, 1)
+COMI_SH_CASE(_mm_ucomile_sh, +3.0f16, +2.0f16, 0)
+COMI_SH_CASE(_mm_ucomile_sh, HNAN, +2.0f16, 0)
int test_mm_ucomigt_sh(__m128h __A, __m128h __B) {
// CHECK-LABEL: test_mm_ucomigt_sh
// CHECK: @llvm.x86.avx512fp16.vcomi.sh(<8 x half> %{{.}}, <8 x half> %{{.}}, i32 30, i32 4)
return _mm_ucomigt_sh(__A, __B);
}
+COMI_SH_CASE(_mm_ucomigt_sh, +3.0f16, +2.0f16, 1)
+COMI_SH_CASE(_mm_ucomigt_sh, +2.0f16, +2.0f16, 0)
+COMI_SH_CASE(_mm_ucomigt_sh, HNAN, +2.0f16, 0)
int test_mm_ucomige_sh(__m128h __A, __m128h __B) {
// CHECK-LABEL: test_mm_ucomige_sh
// CHECK: @llvm.x86.avx512fp16.vcomi.sh(<8 x half> %{{.}}, <8 x half> %{{.}}, i32 29, i32 4)
return _mm_ucomige_sh(__A, __B);
}
+COMI_SH_CASE(_mm_ucomige_sh, +3.0f16, +2.0f16, 1)
+COMI_SH_CASE(_mm_ucomige_sh, +1.0f16, +2.0f16, 0)
+COMI_SH_CASE(_mm_ucomige_sh, HNAN, +2.0f16, 0)
int test_mm_ucomineq_sh(__m128h __A, __m128h __B) {
// CHECK-LABEL: test_mm_ucomineq_sh
// CHECK: @llvm.x86.avx512fp16.vcomi.sh(<8 x half> %{{.}}, <8 x half> %{{.}}, i32 4, i32 4)
return _mm_ucomineq_sh(__A, __B);
}
+COMI_SH_CASE(_mm_ucomineq_sh, +1.0f16, +2.0f16, 1)
+COMI_SH_CASE(_mm_ucomineq_sh, +2.0f16, +2.0f16, 0)
+COMI_SH_CASE(_mm_ucomineq_sh, HNAN, +2.0f16, 1)
+
+#undef COMI_SH_CASE
+#undef COMI_SH_PRED_CASE
+#undef COMI_SH_B
+#undef COMI_SH_A
+#undef HNAN
__m512h test_mm512_add_ph(__m512h __A, __m512h __B) {
// CHECK-LABEL: test_mm512_add_ph
diff --git a/clang/test/CodeGen/X86/sse-builtins.c b/clang/test/CodeGen/X86/sse-builtins.c
index baea2e8718f18..abf6b3fbc87ab 100644
--- a/clang/test/CodeGen/X86/sse-builtins.c
+++ b/clang/test/CodeGen/X86/sse-builtins.c
@@ -477,41 +477,63 @@ __m128 test_mm_cmpunord_ss(__m128 __a, __m128 __b) {
TEST_CONSTEXPR(match_m128(_mm_cmpunord_ss((__m128){__builtin_nanf(""), +2.0f, +3.0f, +4.0f}, (__m128){+5.0f, +9.0f, +8.0f, +7.0f}), ALL_ONES_F, +2.0f, +3.0f, +4.0f));
TEST_CONSTEXPR(match_m128(_mm_cmpunord_ss((__m128){+1.0f, +2.0f, +3.0f, +4.0f}, (__m128){+5.0f, +9.0f, +8.0f, +7.0f}), +0.0f, +2.0f, +3.0f, +4.0f));
+#define COMI_SS_CASE(FN, A0, B0, EXPECT) \
+ TEST_CONSTEXPR(FN((__m128){A0, +0.0f, +0.0f, +0.0f}, \
+ (__m128){B0, +1.0f, +1.0f, +1.0f}) == (EXPECT));
+
int test_mm_comieq_ss(__m128 A, __m128 B) {
// CHECK-LABEL: test_mm_comieq_ss
// CHECK: call {{.*}}i32 @llvm.x86.sse.comieq.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}})
return _mm_comieq_ss(A, B);
}
+COMI_SS_CASE(_mm_comieq_ss, +2.0f, +2.0f, 1)
+COMI_SS_CASE(_mm_comieq_ss, +1.0f, +2.0f, 0)
+COMI_SS_CASE(_mm_comieq_ss, __builtin_nanf(""), +2.0f, 0)
int test_mm_comige_ss(__m128 A, __m128 B) {
// CHECK-LABEL: test_mm_comige_ss
// CHECK: call {{.*}}i32 @llvm.x86.sse.comige.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}})
return _mm_comige_ss(A, B);
}
+COMI_SS_CASE(_mm_comige_ss, +3.0f, +2.0f, 1)
+COMI_SS_CASE(_mm_comige_ss, +1.0f, +2.0f, 0)
+COMI_SS_CASE(_mm_comige_ss, __builtin_nanf(""), +2.0f, 0)
int test_mm_comigt_ss(__m128 A, __m128 B) {
// CHECK-LABEL: test_mm_comigt_ss
// CHECK: call {{.*}}i32 @llvm.x86.sse.comigt.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}})
return _mm_comigt_ss(A, B);
}
+COMI_SS_CASE(_mm_comigt_ss, +3.0f, +2.0f, 1)
+COMI_SS_CASE(_mm_comigt_ss, +2.0f, +2.0f, 0)
+COMI_SS_CASE(_mm_comigt_ss, __builtin_nanf(""), +2.0f, 0)
int test_mm_comile_ss(__m128 A, __m128 B) {
// CHECK-LABEL: test_mm_comile_ss
// CHECK: call {{.*}}i32 @llvm.x86.sse.comile.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}})
return _mm_comile_ss(A, B);
}
+COMI_SS_CASE(_mm_comile_ss, +2.0f, +2.0f, 1)
+COMI_SS_CASE(_mm_comile_ss, +3.0f, +2.0f, 0)
+COMI_SS_CASE(_mm_comile_ss, __builtin_nanf(""), +2.0f, 0)
int test_mm_comilt_ss(__m128 A, __m128 B) {
// CHECK-LABEL: test_mm_comilt_ss
// CHECK: call {{.*}}i32 @llvm.x86.sse.comilt.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}})
return _mm_comilt_ss(A, B);
}
+COMI_SS_CASE(_mm_comilt_ss, +1.0f, +2.0f, 1)
+COMI_SS_CASE(_mm_comilt_ss, +2.0f, +2.0f, 0)
+COMI_SS_CASE(_mm_comilt_ss, __builtin_nanf(""), +2.0f, 0)
int test_mm_comineq_ss(__m128 A, __m128 B) {
// CHECK-LABEL: test_mm_comineq_ss
// CHECK: call {{.*}}i32 @llvm.x86.sse.comineq.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}})
return _mm_comineq_ss(A, B);
}
+COMI_SS_CASE(_mm_comineq_ss, +1.0f, +2.0f, 1)
+COMI_SS_CASE(_mm_comineq_ss, +2.0f, +2.0f, 0)
+COMI_SS_CASE(_mm_comineq_ss, __builtin_nanf(""), +2.0f, 1)
int test_mm_cvt_ss2si(__m128 A) {
// CHECK-LABEL: test_mm_cvt_ss2si
@@ -1058,36 +1080,56 @@ int test_mm_ucomieq_ss(__m128 A, __m128 B) {
// CHECK: call {{.*}}i32 @llvm.x86.sse.ucomieq.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}})
return _mm_ucomieq_ss(A, B);
}
+COMI_SS_CASE(_mm_ucomieq_ss, +2.0f, +2.0f, 1)
+COMI_SS_CASE(_mm_ucomieq_ss, +1.0f, +2.0f, 0)
+COMI_SS_CASE(_mm_ucomieq_ss, __builtin_nanf(""), +2.0f, 0)
int test_mm_ucomige_ss(__m128 A, __m128 B) {
// CHECK-LABEL: test_mm_ucomige_ss
// CHECK: call {{.*}}i32 @llvm.x86.sse.ucomige.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}})
return _mm_ucomige_ss(A, B);
}
+COMI_SS_CASE(_mm_ucomige_ss, +3.0f, +2.0f, 1)
+COMI_SS_CASE(_mm_ucomige_ss, +1.0f, +2.0f, 0)
+COMI_SS_CASE(_mm_ucomige_ss, __builtin_nanf(""), +2.0f, 0)
int test_mm_ucomigt_ss(__m128 A, __m128 B) {
// CHECK-LABEL: test_mm_ucomigt_ss
// CHECK: call {{.*}}i32 @llvm.x86.sse.ucomigt.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}})
return _mm_ucomigt_ss(A, B);
}
+COMI_SS_CASE(_mm_ucomigt_ss, +3.0f, +2.0f, 1)
+COMI_SS_CASE(_mm_ucomigt_ss, +2.0f, +2.0f, 0)
+COMI_SS_CASE(_mm_ucomigt_ss, __builtin_nanf(""), +2.0f, 0)
int test_mm_ucomile_ss(__m128 A, __m128 B) {
// CHECK-LABEL: test_mm_ucomile_ss
// CHECK: call {{.*}}i32 @llvm.x86.sse.ucomile.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}})
return _mm_ucomile_ss(A, B);
}
+COMI_SS_CASE(_mm_ucomile_ss, +2.0f, +2.0f, 1)
+COMI_SS_CASE(_mm_ucomile_ss, +3.0f, +2.0f, 0)
+COMI_SS_CASE(_mm_ucomile_ss, __builtin_nanf(""), +2.0f, 0)
int test_mm_ucomilt_ss(__m128 A, __m128 B) {
// CHECK-LABEL: test_mm_ucomilt_ss
// CHECK: call {{.*}}i32 @llvm.x86.sse.ucomilt.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}})
return _mm_ucomilt_ss(A, B);
}
+COMI_SS_CASE(_mm_ucomilt_ss, +1.0f, +2.0f, 1)
+COMI_SS_CASE(_mm_ucomilt_ss, +2.0f, +2.0f, 0)
+COMI_SS_CASE(_mm_ucomilt_ss, __builtin_nanf(""), +2.0f, 0)
int test_mm_ucomineq_ss(__m128 A, __m128 B) {
// CHECK-LABEL: test_mm_ucomineq_ss
// CHECK: call {{.*}}i32 @llvm.x86.sse.ucomineq.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}})
return _mm_ucomineq_ss(A, B);
}
+COMI_SS_CASE(_mm_ucomineq_ss, +1.0f, +2.0f, 1)
+COMI_SS_CASE(_mm_ucomineq_ss, +2.0f, +2.0f, 0)
+COMI_SS_CASE(_mm_ucomineq_ss, __builtin_nanf(""), +2.0f, 1)
+
+#undef COMI_SS_CASE
__m128 test_mm_undefined_ps(void) {
// CHECK-LABEL: test_mm_undefined_ps
diff --git a/clang/test/CodeGen/X86/sse2-builtins.c b/clang/test/CodeGen/X86/sse2-builtins.c
index ab65cb39a7c2b..5c0ff1e2d8d4e 100644
--- a/clang/test/CodeGen/X86/sse2-builtins.c
+++ b/clang/test/CodeGen/X86/sse2-builtins.c
@@ -663,41 +663,62 @@ __m128d test_mm_cmpunord_sd(__m128d A, __m128d B) {
}
TEST_CONSTEXPR(match_m128d(_mm_cmpunord_sd((__m128d){__builtin_nan(""), +2.0}, (__m128d){+5.0, +9.0}), ALL_ONES_D, +2.0));
+#define COMI_SD_CASE(FN, A0, B0, EXPECT) \
+ TEST_CONSTEXPR(FN((__m128d){A0, +0.0}, (__m128d){B0, +1.0}) == (EXPECT));
+
int test_mm_comieq_sd(__m128d A, __m128d B) {
// CHECK-LABEL: test_mm_comieq_sd
// CHECK: call {{.*}}i32 @llvm.x86.sse2.comieq.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
return _mm_comieq_sd(A, B);
}
+COMI_SD_CASE(_mm_comieq_sd, +2.0, +2.0, 1)
+COMI_SD_CASE(_mm_comieq_sd, +1.0, +2.0, 0)
+COMI_SD_CASE(_mm_comieq_sd, __builtin_nan(""), +2.0, 0)
int test_mm_comige_sd(__m128d A, __m128d B) {
// CHECK-LABEL: test_mm_comige_sd
// CHECK: call {{.*}}i32 @llvm.x86.sse2.comige.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
return _mm_comige_sd(A, B);
}
+COMI_SD_CASE(_mm_comige_sd, +3.0, +2.0, 1)
+COMI_SD_CASE(_mm_comige_sd, +1.0, +2.0, 0)
+COMI_SD_CASE(_mm_comige_sd, __builtin_nan(""), +2.0, 0)
int test_mm_comigt_sd(__m128d A, __m128d B) {
// CHECK-LABEL: test_mm_comigt_sd
// CHECK: call {{.*}}i32 @llvm.x86.sse2.comigt.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
return _mm_comigt_sd(A, B);
}
+COMI_SD_CASE(_mm_comigt_sd, +3.0, +2.0, 1)
+COMI_SD_CASE(_mm_comigt_sd, +2.0, +2.0, 0)
+COMI_SD_CASE(_mm_comigt_sd, __builtin_nan(""), +2.0, 0)
int test_mm_comile_sd(__m128d A, __m128d B) {
// CHECK-LABEL: test_mm_comile_sd
// CHECK: call {{.*}}i32 @llvm.x86.sse2.comile.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
return _mm_comile_sd(A, B);
}
+COMI_SD_CASE(_mm_comile_sd, +2.0, +2.0, 1)
+COMI_SD_CASE(_mm_comile_sd, +3.0, +2.0, 0)
+COMI_SD_CASE(_mm_comile_sd, __builtin_nan(""), +2.0, 0)
int test_mm_comilt_sd(__m128d A, __m128d B) {
// CHECK-LABEL: test_mm_comilt_sd
// CHECK: call {{.*}}i32 @llvm.x86.sse2.comilt.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
return _mm_comilt_sd(A, B);
}
+COMI_SD_CASE(_mm_comilt_sd, +1.0, +2.0, 1)
+COMI_SD_CASE(_mm_comilt_sd, +2.0, +2.0, 0)
+COMI_SD_CASE(_mm_comilt_sd, __builtin_nan(""), +2.0, 0)
int test_mm_comineq_sd(__m128d A, __m128d B) {
// CHECK-LABEL: test_mm_comineq_sd
// CHECK: call {{.*}}i32 @llvm.x86.sse2.comineq.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
return _mm_comineq_sd(A, B);
}
+COMI_SD_CASE(_mm_comineq_sd, +1.0, +2.0, 1)
+COMI_SD_CASE(_mm_comineq_sd, +2.0, +2.0, 0)
+COMI_SD_CASE(_mm_comineq_sd, __builtin_nan(""), +2.0, 1)
__m128d test_mm_cvtepi32_pd(__m128i A) {
// CHECK-LABEL: test_mm_cvtepi32_pd
@@ -2019,36 +2040,56 @@ int test_mm_ucomieq_sd(__m128d A, __m128d B) {
// CHECK: call {{.*}}i32 @llvm.x86.sse2.ucomieq.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
return _mm_ucomieq_sd(A, B);
}
+COMI_SD_CASE(_mm_ucomieq_sd, +2.0, +2.0, 1)
+COMI_SD_CASE(_mm_ucomieq_sd, +1.0, +2.0, 0)
+COMI_SD_CASE(_mm_ucomieq_sd, __builtin_nan(""), +2.0, 0)
int test_mm_ucomige_sd(__m128d A, __m128d B) {
// CHECK-LABEL: test_mm_ucomige_sd
// CHECK: call {{.*}}i32 @llvm.x86.sse2.ucomige.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
return _mm_ucomige_sd(A, B);
}
+COMI_SD_CASE(_mm_ucomige_sd, +3.0, +2.0, 1)
+COMI_SD_CASE(_mm_ucomige_sd, +1.0, +2.0, 0)
+COMI_SD_CASE(_mm_ucomige_sd, __builtin_nan(""), +2.0, 0)
int test_mm_ucomigt_sd(__m128d A, __m128d B) {
// CHECK-LABEL: test_mm_ucomigt_sd
// CHECK: call {{.*}}i32 @llvm.x86.sse2.ucomigt.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
return _mm_ucomigt_sd(A, B);
}
+COMI_SD_CASE(_mm_ucomigt_sd, +3.0, +2.0, 1)
+COMI_SD_CASE(_mm_ucomigt_sd, +2.0, +2.0, 0)
+COMI_SD_CASE(_mm_ucomigt_sd, __builtin_nan(""), +2.0, 0)
int test_mm_ucomile_sd(__m128d A, __m128d B) {
// CHECK-LABEL: test_mm_ucomile_sd
// CHECK: call {{.*}}i32 @llvm.x86.sse2.ucomile.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
return _mm_ucomile_sd(A, B);
}
+COMI_SD_CASE(_mm_ucomile_sd, +2.0, +2.0, 1)
+COMI_SD_CASE(_mm_ucomile_sd, +3.0, +2.0, 0)
+COMI_SD_CASE(_mm_ucomile_sd, __builtin_nan(""), +2.0, 0)
int test_mm_ucomilt_sd(__m128d A, __m128d B) {
// CHECK-LABEL: test_mm_ucomilt_sd
// CHECK: call {{.*}}i32 @llvm.x86.sse2.ucomilt.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
return _mm_ucomilt_sd(A, B);
}
+COMI_SD_CASE(_mm_ucomilt_sd, +1.0, +2.0, 1)
+COMI_SD_CASE(_mm_ucomilt_sd, +2.0, +2.0, 0)
+COMI_SD_CASE(_mm_ucomilt_sd, __builtin_nan(""), +2.0, 0)
int test_mm_ucomineq_sd(__m128d A, __m128d B) {
// CHECK-LABEL: test_mm_ucomineq_sd
// CHECK: call {{.*}}i32 @llvm.x86.sse2.ucomineq.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
return _mm_ucomineq_sd(A, B);
}
+COMI_SD_CASE(_mm_ucomineq_sd, +1.0, +2.0, 1)
+COMI_SD_CASE(_mm_ucomineq_sd, +2.0, +2.0, 0)
+COMI_SD_CASE(_mm_ucomineq_sd, __builtin_nan(""), +2.0, 1)
+
+#undef COMI_SD_CASE
__m128d test_mm_undefined_pd(void) {
// X64-LABEL: test_mm_undefined_pd
>From 43905074592cdfa4feafb33406b5e41b35aa18de Mon Sep 17 00:00:00 2001
From: ZakyHermawan <zaky.hermawan9615 at gmail.com>
Date: Sat, 21 Feb 2026 03:39:22 +0700
Subject: [PATCH 09/14] clang-format
---
clang/lib/AST/ByteCode/InterpBuiltin.cpp | 62 ++++++++++++------------
clang/lib/AST/ExprConstant.cpp | 56 ++++++++++-----------
clang/lib/Headers/avx512fp16intrin.h | 48 +++++++++---------
clang/lib/Headers/emmintrin.h | 4 +-
clang/lib/Headers/xmmintrin.h | 60 +++++++++--------------
5 files changed, 108 insertions(+), 122 deletions(-)
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index ae8ca192687f6..7073125ca38ac 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -4173,7 +4173,8 @@ static bool interp__builtin_x86_comi(InterpState &S, CodePtr OpPC,
const Pointer &VectorB = S.Stk.pop<Pointer>();
const Pointer &VectorA = S.Stk.pop<Pointer>();
- if (VectorA.getNumElems() == 0 || VectorA.getNumElems() != VectorB.getNumElems())
+ if (VectorA.getNumElems() == 0 ||
+ VectorA.getNumElems() != VectorB.getNumElems())
return false;
const llvm::APFloat A = VectorA.elem<Floating>(0).getAPFloat();
@@ -4187,12 +4188,10 @@ static bool interp__builtin_x86_cmp(InterpState &S, CodePtr OpPC,
const InterpFrame *Frame,
const CallExpr *Call, unsigned ID) {
const bool HasImmArg =
- ID == X86::BI__builtin_ia32_cmpps ||
- ID == X86::BI__builtin_ia32_cmppd ||
+ ID == X86::BI__builtin_ia32_cmpps || ID == X86::BI__builtin_ia32_cmppd ||
ID == X86::BI__builtin_ia32_cmpps256 ||
ID == X86::BI__builtin_ia32_cmppd256 ||
- ID == X86::BI__builtin_ia32_cmpss ||
- ID == X86::BI__builtin_ia32_cmpsd;
+ ID == X86::BI__builtin_ia32_cmpss || ID == X86::BI__builtin_ia32_cmpsd;
uint32_t Predicate;
if (HasImmArg) {
@@ -4281,33 +4280,32 @@ static bool interp__builtin_x86_cmp(InterpState &S, CodePtr OpPC,
const Pointer &VectorA = S.Stk.pop<Pointer>();
Pointer &Dst = S.Stk.peek<Pointer>();
- const bool IsScalar =
- ID == X86::BI__builtin_ia32_cmpss ||
- ID == X86::BI__builtin_ia32_cmpsd ||
- ID == X86::BI__builtin_ia32_cmpeqss ||
- ID == X86::BI__builtin_ia32_cmpeqsd ||
- ID == X86::BI__builtin_ia32_cmpgess ||
- ID == X86::BI__builtin_ia32_cmpgesd ||
- ID == X86::BI__builtin_ia32_cmpgtss ||
- ID == X86::BI__builtin_ia32_cmpgtsd ||
- ID == X86::BI__builtin_ia32_cmpltss ||
- ID == X86::BI__builtin_ia32_cmpltsd ||
- ID == X86::BI__builtin_ia32_cmpless ||
- ID == X86::BI__builtin_ia32_cmplesd ||
- ID == X86::BI__builtin_ia32_cmpneqss ||
- ID == X86::BI__builtin_ia32_cmpneqsd ||
- ID == X86::BI__builtin_ia32_cmpngess ||
- ID == X86::BI__builtin_ia32_cmpngesd ||
- ID == X86::BI__builtin_ia32_cmpngtss ||
- ID == X86::BI__builtin_ia32_cmpngtsd ||
- ID == X86::BI__builtin_ia32_cmpnless ||
- ID == X86::BI__builtin_ia32_cmpnlesd ||
- ID == X86::BI__builtin_ia32_cmpnltss ||
- ID == X86::BI__builtin_ia32_cmpnltsd ||
- ID == X86::BI__builtin_ia32_cmpordss ||
- ID == X86::BI__builtin_ia32_cmpordsd ||
- ID == X86::BI__builtin_ia32_cmpunordss ||
- ID == X86::BI__builtin_ia32_cmpunordsd;
+ const bool IsScalar = ID == X86::BI__builtin_ia32_cmpss ||
+ ID == X86::BI__builtin_ia32_cmpsd ||
+ ID == X86::BI__builtin_ia32_cmpeqss ||
+ ID == X86::BI__builtin_ia32_cmpeqsd ||
+ ID == X86::BI__builtin_ia32_cmpgess ||
+ ID == X86::BI__builtin_ia32_cmpgesd ||
+ ID == X86::BI__builtin_ia32_cmpgtss ||
+ ID == X86::BI__builtin_ia32_cmpgtsd ||
+ ID == X86::BI__builtin_ia32_cmpltss ||
+ ID == X86::BI__builtin_ia32_cmpltsd ||
+ ID == X86::BI__builtin_ia32_cmpless ||
+ ID == X86::BI__builtin_ia32_cmplesd ||
+ ID == X86::BI__builtin_ia32_cmpneqss ||
+ ID == X86::BI__builtin_ia32_cmpneqsd ||
+ ID == X86::BI__builtin_ia32_cmpngess ||
+ ID == X86::BI__builtin_ia32_cmpngesd ||
+ ID == X86::BI__builtin_ia32_cmpngtss ||
+ ID == X86::BI__builtin_ia32_cmpngtsd ||
+ ID == X86::BI__builtin_ia32_cmpnless ||
+ ID == X86::BI__builtin_ia32_cmpnlesd ||
+ ID == X86::BI__builtin_ia32_cmpnltss ||
+ ID == X86::BI__builtin_ia32_cmpnltsd ||
+ ID == X86::BI__builtin_ia32_cmpordss ||
+ ID == X86::BI__builtin_ia32_cmpordsd ||
+ ID == X86::BI__builtin_ia32_cmpunordss ||
+ ID == X86::BI__builtin_ia32_cmpunordsd;
const auto NumLanes = VectorA.getNumElems();
if (NumLanes != VectorB.getNumElems())
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index c096993e116f2..469b688b66627 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -14487,33 +14487,32 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
(BuiltinOp == X86::BI__builtin_ia32_cmpps256) ||
(BuiltinOp == X86::BI__builtin_ia32_cmppd256);
- const bool IsScalar =
- (BuiltinOp == X86::BI__builtin_ia32_cmpss) ||
- (BuiltinOp == X86::BI__builtin_ia32_cmpsd) ||
- (BuiltinOp == X86::BI__builtin_ia32_cmpeqss) ||
- (BuiltinOp == X86::BI__builtin_ia32_cmpeqsd) ||
- (BuiltinOp == X86::BI__builtin_ia32_cmpgess) ||
- (BuiltinOp == X86::BI__builtin_ia32_cmpgesd) ||
- (BuiltinOp == X86::BI__builtin_ia32_cmpgtss) ||
- (BuiltinOp == X86::BI__builtin_ia32_cmpgtsd) ||
- (BuiltinOp == X86::BI__builtin_ia32_cmpltss) ||
- (BuiltinOp == X86::BI__builtin_ia32_cmpltsd) ||
- (BuiltinOp == X86::BI__builtin_ia32_cmpless) ||
- (BuiltinOp == X86::BI__builtin_ia32_cmplesd) ||
- (BuiltinOp == X86::BI__builtin_ia32_cmpneqss) ||
- (BuiltinOp == X86::BI__builtin_ia32_cmpneqsd) ||
- (BuiltinOp == X86::BI__builtin_ia32_cmpngess) ||
- (BuiltinOp == X86::BI__builtin_ia32_cmpngesd) ||
- (BuiltinOp == X86::BI__builtin_ia32_cmpngtss) ||
- (BuiltinOp == X86::BI__builtin_ia32_cmpngtsd) ||
- (BuiltinOp == X86::BI__builtin_ia32_cmpnless) ||
- (BuiltinOp == X86::BI__builtin_ia32_cmpnlesd) ||
- (BuiltinOp == X86::BI__builtin_ia32_cmpnltss) ||
- (BuiltinOp == X86::BI__builtin_ia32_cmpnltsd) ||
- (BuiltinOp == X86::BI__builtin_ia32_cmpordss) ||
- (BuiltinOp == X86::BI__builtin_ia32_cmpordsd) ||
- (BuiltinOp == X86::BI__builtin_ia32_cmpunordss) ||
- (BuiltinOp == X86::BI__builtin_ia32_cmpunordsd);
+ const bool IsScalar = (BuiltinOp == X86::BI__builtin_ia32_cmpss) ||
+ (BuiltinOp == X86::BI__builtin_ia32_cmpsd) ||
+ (BuiltinOp == X86::BI__builtin_ia32_cmpeqss) ||
+ (BuiltinOp == X86::BI__builtin_ia32_cmpeqsd) ||
+ (BuiltinOp == X86::BI__builtin_ia32_cmpgess) ||
+ (BuiltinOp == X86::BI__builtin_ia32_cmpgesd) ||
+ (BuiltinOp == X86::BI__builtin_ia32_cmpgtss) ||
+ (BuiltinOp == X86::BI__builtin_ia32_cmpgtsd) ||
+ (BuiltinOp == X86::BI__builtin_ia32_cmpltss) ||
+ (BuiltinOp == X86::BI__builtin_ia32_cmpltsd) ||
+ (BuiltinOp == X86::BI__builtin_ia32_cmpless) ||
+ (BuiltinOp == X86::BI__builtin_ia32_cmplesd) ||
+ (BuiltinOp == X86::BI__builtin_ia32_cmpneqss) ||
+ (BuiltinOp == X86::BI__builtin_ia32_cmpneqsd) ||
+ (BuiltinOp == X86::BI__builtin_ia32_cmpngess) ||
+ (BuiltinOp == X86::BI__builtin_ia32_cmpngesd) ||
+ (BuiltinOp == X86::BI__builtin_ia32_cmpngtss) ||
+ (BuiltinOp == X86::BI__builtin_ia32_cmpngtsd) ||
+ (BuiltinOp == X86::BI__builtin_ia32_cmpnless) ||
+ (BuiltinOp == X86::BI__builtin_ia32_cmpnlesd) ||
+ (BuiltinOp == X86::BI__builtin_ia32_cmpnltss) ||
+ (BuiltinOp == X86::BI__builtin_ia32_cmpnltsd) ||
+ (BuiltinOp == X86::BI__builtin_ia32_cmpordss) ||
+ (BuiltinOp == X86::BI__builtin_ia32_cmpordsd) ||
+ (BuiltinOp == X86::BI__builtin_ia32_cmpunordss) ||
+ (BuiltinOp == X86::BI__builtin_ia32_cmpunordsd);
uint32_t Predicate = X86CmpImm::CMP_EQ_OQ;
if (HasImmArg) {
@@ -17468,7 +17467,8 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E,
!EvaluateVector(E->getArg(1), BV, Info))
return false;
- if (AV.getVectorLength() == 0 || BV.getVectorLength() != AV.getVectorLength())
+ if (AV.getVectorLength() == 0 ||
+ BV.getVectorLength() != AV.getVectorLength())
return false;
uint32_t Predicate;
diff --git a/clang/lib/Headers/avx512fp16intrin.h b/clang/lib/Headers/avx512fp16intrin.h
index dbf83e8ae2704..0b7b6b53a25c7 100644
--- a/clang/lib/Headers/avx512fp16intrin.h
+++ b/clang/lib/Headers/avx512fp16intrin.h
@@ -299,74 +299,74 @@ _mm512_zextph256_ph512(__m256h __a) {
#define _mm_comi_sh(A, B, pred) \
_mm_comi_round_sh((A), (B), (pred), _MM_FROUND_CUR_DIRECTION)
-static __inline__ int __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_comieq_sh(__m128h __A,
- __m128h __B) {
+static __inline__ int
+ __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_comieq_sh(__m128h __A, __m128h __B) {
return __builtin_ia32_vcomish((__v8hf)__A, (__v8hf)__B, _CMP_EQ_OS,
_MM_FROUND_CUR_DIRECTION);
}
-static __inline__ int __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_comilt_sh(__m128h __A,
- __m128h __B) {
+static __inline__ int __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_comilt_sh(__m128h __A, __m128h __B) {
return __builtin_ia32_vcomish((__v8hf)__A, (__v8hf)__B, _CMP_LT_OS,
_MM_FROUND_CUR_DIRECTION);
}
-static __inline__ int __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_comile_sh(__m128h __A,
- __m128h __B) {
+static __inline__ int __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_comile_sh(__m128h __A, __m128h __B) {
return __builtin_ia32_vcomish((__v8hf)__A, (__v8hf)__B, _CMP_LE_OS,
_MM_FROUND_CUR_DIRECTION);
}
-static __inline__ int __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_comigt_sh(__m128h __A,
- __m128h __B) {
+static __inline__ int __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_comigt_sh(__m128h __A, __m128h __B) {
return __builtin_ia32_vcomish((__v8hf)__A, (__v8hf)__B, _CMP_GT_OS,
_MM_FROUND_CUR_DIRECTION);
}
-static __inline__ int __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_comige_sh(__m128h __A,
- __m128h __B) {
+static __inline__ int __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_comige_sh(__m128h __A, __m128h __B) {
return __builtin_ia32_vcomish((__v8hf)__A, (__v8hf)__B, _CMP_GE_OS,
_MM_FROUND_CUR_DIRECTION);
}
-static __inline__ int __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_comineq_sh(__m128h __A,
- __m128h __B) {
+static __inline__ int __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_comineq_sh(__m128h __A, __m128h __B) {
return __builtin_ia32_vcomish((__v8hf)__A, (__v8hf)__B, _CMP_NEQ_US,
_MM_FROUND_CUR_DIRECTION);
}
-static __inline__ int __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_ucomieq_sh(__m128h __A,
- __m128h __B) {
+static __inline__ int __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_ucomieq_sh(__m128h __A, __m128h __B) {
return __builtin_ia32_vcomish((__v8hf)__A, (__v8hf)__B, _CMP_EQ_OQ,
_MM_FROUND_CUR_DIRECTION);
}
-static __inline__ int __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_ucomilt_sh(__m128h __A,
- __m128h __B) {
+static __inline__ int __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_ucomilt_sh(__m128h __A, __m128h __B) {
return __builtin_ia32_vcomish((__v8hf)__A, (__v8hf)__B, _CMP_LT_OQ,
_MM_FROUND_CUR_DIRECTION);
}
-static __inline__ int __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_ucomile_sh(__m128h __A,
- __m128h __B) {
+static __inline__ int __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_ucomile_sh(__m128h __A, __m128h __B) {
return __builtin_ia32_vcomish((__v8hf)__A, (__v8hf)__B, _CMP_LE_OQ,
_MM_FROUND_CUR_DIRECTION);
}
-static __inline__ int __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_ucomigt_sh(__m128h __A,
- __m128h __B) {
+static __inline__ int __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_ucomigt_sh(__m128h __A, __m128h __B) {
return __builtin_ia32_vcomish((__v8hf)__A, (__v8hf)__B, _CMP_GT_OQ,
_MM_FROUND_CUR_DIRECTION);
}
-static __inline__ int __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_ucomige_sh(__m128h __A,
- __m128h __B) {
+static __inline__ int __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_ucomige_sh(__m128h __A, __m128h __B) {
return __builtin_ia32_vcomish((__v8hf)__A, (__v8hf)__B, _CMP_GE_OQ,
_MM_FROUND_CUR_DIRECTION);
}
-static __inline__ int __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_ucomineq_sh(__m128h __A,
- __m128h __B) {
+static __inline__ int __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_ucomineq_sh(__m128h __A, __m128h __B) {
return __builtin_ia32_vcomish((__v8hf)__A, (__v8hf)__B, _CMP_NEQ_UQ,
_MM_FROUND_CUR_DIRECTION);
}
diff --git a/clang/lib/Headers/emmintrin.h b/clang/lib/Headers/emmintrin.h
index 935849a372722..2f0d28d0c495d 100644
--- a/clang/lib/Headers/emmintrin.h
+++ b/clang/lib/Headers/emmintrin.h
@@ -1260,8 +1260,8 @@ static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR _mm_ucomige_sd(__m128d __a,
/// A 128-bit vector of [2 x double]. The lower double-precision value is
/// compared to the lower double-precision value of \a __a.
/// \returns An integer containing the comparison result.
-static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR _mm_ucomineq_sd(__m128d __a,
- __m128d __b) {
+static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_ucomineq_sd(__m128d __a, __m128d __b) {
return __builtin_ia32_ucomisdneq((__v2df)__a, (__v2df)__b);
}
diff --git a/clang/lib/Headers/xmmintrin.h b/clang/lib/Headers/xmmintrin.h
index 7b14c868241d3..b0294b8f45619 100644
--- a/clang/lib/Headers/xmmintrin.h
+++ b/clang/lib/Headers/xmmintrin.h
@@ -1095,9 +1095,8 @@ _mm_cmpunord_ps(__m128 __a, __m128 __b)
/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are
/// used in the comparison.
/// \returns An integer containing the comparison results.
-static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR
-_mm_comieq_ss(__m128 __a, __m128 __b)
-{
+static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR _mm_comieq_ss(__m128 __a,
+ __m128 __b) {
return __builtin_ia32_comieq((__v4sf)__a, (__v4sf)__b);
}
@@ -1120,9 +1119,8 @@ _mm_comieq_ss(__m128 __a, __m128 __b)
/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are
/// used in the comparison.
/// \returns An integer containing the comparison results.
-static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR
-_mm_comilt_ss(__m128 __a, __m128 __b)
-{
+static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR _mm_comilt_ss(__m128 __a,
+ __m128 __b) {
return __builtin_ia32_comilt((__v4sf)__a, (__v4sf)__b);
}
@@ -1144,9 +1142,8 @@ _mm_comilt_ss(__m128 __a, __m128 __b)
/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are
/// used in the comparison.
/// \returns An integer containing the comparison results.
-static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR
-_mm_comile_ss(__m128 __a, __m128 __b)
-{
+static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR _mm_comile_ss(__m128 __a,
+ __m128 __b) {
return __builtin_ia32_comile((__v4sf)__a, (__v4sf)__b);
}
@@ -1168,9 +1165,8 @@ _mm_comile_ss(__m128 __a, __m128 __b)
/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are
/// used in the comparison.
/// \returns An integer containing the comparison results.
-static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR
-_mm_comigt_ss(__m128 __a, __m128 __b)
-{
+static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR _mm_comigt_ss(__m128 __a,
+ __m128 __b) {
return __builtin_ia32_comigt((__v4sf)__a, (__v4sf)__b);
}
@@ -1192,9 +1188,8 @@ _mm_comigt_ss(__m128 __a, __m128 __b)
/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are
/// used in the comparison.
/// \returns An integer containing the comparison results.
-static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR
-_mm_comige_ss(__m128 __a, __m128 __b)
-{
+static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR _mm_comige_ss(__m128 __a,
+ __m128 __b) {
return __builtin_ia32_comige((__v4sf)__a, (__v4sf)__b);
}
@@ -1216,9 +1211,8 @@ _mm_comige_ss(__m128 __a, __m128 __b)
/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are
/// used in the comparison.
/// \returns An integer containing the comparison results.
-static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR
-_mm_comineq_ss(__m128 __a, __m128 __b)
-{
+static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR _mm_comineq_ss(__m128 __a,
+ __m128 __b) {
return __builtin_ia32_comineq((__v4sf)__a, (__v4sf)__b);
}
@@ -1239,9 +1233,8 @@ _mm_comineq_ss(__m128 __a, __m128 __b)
/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are
/// used in the comparison.
/// \returns An integer containing the comparison results.
-static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR
-_mm_ucomieq_ss(__m128 __a, __m128 __b)
-{
+static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR _mm_ucomieq_ss(__m128 __a,
+ __m128 __b) {
return __builtin_ia32_ucomieq((__v4sf)__a, (__v4sf)__b);
}
@@ -1263,9 +1256,8 @@ _mm_ucomieq_ss(__m128 __a, __m128 __b)
/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are
/// used in the comparison.
/// \returns An integer containing the comparison results.
-static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR
-_mm_ucomilt_ss(__m128 __a, __m128 __b)
-{
+static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR _mm_ucomilt_ss(__m128 __a,
+ __m128 __b) {
return __builtin_ia32_ucomilt((__v4sf)__a, (__v4sf)__b);
}
@@ -1287,9 +1279,8 @@ _mm_ucomilt_ss(__m128 __a, __m128 __b)
/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are
/// used in the comparison.
/// \returns An integer containing the comparison results.
-static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR
-_mm_ucomile_ss(__m128 __a, __m128 __b)
-{
+static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR _mm_ucomile_ss(__m128 __a,
+ __m128 __b) {
return __builtin_ia32_ucomile((__v4sf)__a, (__v4sf)__b);
}
@@ -1311,9 +1302,8 @@ _mm_ucomile_ss(__m128 __a, __m128 __b)
/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are
/// used in the comparison.
/// \returns An integer containing the comparison results.
-static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR
-_mm_ucomigt_ss(__m128 __a, __m128 __b)
-{
+static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR _mm_ucomigt_ss(__m128 __a,
+ __m128 __b) {
return __builtin_ia32_ucomigt((__v4sf)__a, (__v4sf)__b);
}
@@ -1335,9 +1325,8 @@ _mm_ucomigt_ss(__m128 __a, __m128 __b)
/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are
/// used in the comparison.
/// \returns An integer containing the comparison results.
-static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR
-_mm_ucomige_ss(__m128 __a, __m128 __b)
-{
+static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR _mm_ucomige_ss(__m128 __a,
+ __m128 __b) {
return __builtin_ia32_ucomige((__v4sf)__a, (__v4sf)__b);
}
@@ -1358,9 +1347,8 @@ _mm_ucomige_ss(__m128 __a, __m128 __b)
/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are
/// used in the comparison.
/// \returns An integer containing the comparison results.
-static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR
-_mm_ucomineq_ss(__m128 __a, __m128 __b)
-{
+static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR _mm_ucomineq_ss(__m128 __a,
+ __m128 __b) {
return __builtin_ia32_ucomineq((__v4sf)__a, (__v4sf)__b);
}
>From db1ce17f3d823256b737545c76e86abbe8235506 Mon Sep 17 00:00:00 2001
From: ZakyHermawan <zaky.hermawan9615 at gmail.com>
Date: Sat, 21 Feb 2026 03:44:46 +0700
Subject: [PATCH 10/14] clang-format
Signed-off-by: ZakyHermawan <zaky.hermawan9615 at gmail.com>
---
clang/lib/AST/ExprConstShared.h | 76 ++++++++++++++------------
clang/lib/AST/ExprConstant.cpp | 5 +-
clang/lib/Headers/emmintrin.h | 96 ++++++++++++++++-----------------
clang/lib/Headers/xmmintrin.h | 92 ++++++++++++-------------------
4 files changed, 125 insertions(+), 144 deletions(-)
diff --git a/clang/lib/AST/ExprConstShared.h b/clang/lib/AST/ExprConstShared.h
index c33d3a3fd684c..a160f31e2d06c 100644
--- a/clang/lib/AST/ExprConstShared.h
+++ b/clang/lib/AST/ExprConstShared.h
@@ -40,44 +40,50 @@ struct FPCompareFlags {
// SSE/AVX floating-point comparison immediates
namespace X86CmpImm {
- constexpr uint32_t CMP_EQ_OQ = 0x00; // Equal (ordered, quiet)
- constexpr uint32_t CMP_LT_OS = 0x01; // Less than (ordered, signaling)
- constexpr uint32_t CMP_LE_OS = 0x02; // Less than or equal (ordered, signaling)
- constexpr uint32_t CMP_UNORD_Q = 0x03; // Unordered (quiet)
- constexpr uint32_t CMP_NEQ_UQ = 0x04; // Not equal (unordered, quiet)
- constexpr uint32_t CMP_NLT_US = 0x05; // Not less than (unordered, signaling)
- constexpr uint32_t CMP_NLE_US = 0x06; // Not less than or equal (unordered, signaling)
- constexpr uint32_t CMP_ORD_Q = 0x07; // Ordered (quiet)
- constexpr uint32_t CMP_EQ_UQ = 0x08; // Equal (unordered, quiet)
- constexpr uint32_t CMP_NGE_US = 0x09; // Not greater than or equal (unordered, signaling)
- constexpr uint32_t CMP_NGT_US = 0x0A; // Not greater than (unordered, signaling)
- constexpr uint32_t CMP_FALSE_OQ = 0x0B; // False (ordered, quiet)
- constexpr uint32_t CMP_NEQ_OQ = 0x0C; // Not equal (ordered, quiet)
- constexpr uint32_t CMP_GE_OS = 0x0D; // Greater than or equal (ordered, signaling)
- constexpr uint32_t CMP_GT_OS = 0x0E; // Greater than (ordered, signaling)
- constexpr uint32_t CMP_TRUE_UQ = 0x0F; // True (unordered, quiet)
-
- // Signaling variants (0x10-0x1F)
- constexpr uint32_t CMP_EQ_OS = 0x10; // Equal (ordered, signaling)
- constexpr uint32_t CMP_LT_OQ = 0x11; // Less than (ordered, quiet)
- constexpr uint32_t CMP_LE_OQ = 0x12; // Less than or equal (ordered, quiet)
- constexpr uint32_t CMP_UNORD_S = 0x13; // Unordered (signaling)
- constexpr uint32_t CMP_NEQ_US = 0x14; // Not equal (unordered, signaling)
- constexpr uint32_t CMP_NLT_UQ = 0x15; // Not less than (unordered, quiet)
- constexpr uint32_t CMP_NLE_UQ = 0x16; // Not less than or equal (unordered, quiet)
- constexpr uint32_t CMP_ORD_S = 0x17; // Ordered (signaling)
- constexpr uint32_t CMP_EQ_US = 0x18; // Equal (unordered, signaling)
- constexpr uint32_t CMP_NGE_UQ = 0x19; // Not greater than or equal (unordered, quiet)
- constexpr uint32_t CMP_NGT_UQ = 0x1A; // Not greater than (unordered, quiet)
- constexpr uint32_t CMP_FALSE_OS = 0x1B; // False (ordered, signaling)
- constexpr uint32_t CMP_NEQ_OS = 0x1C; // Not equal (ordered, signaling)
- constexpr uint32_t CMP_GE_OQ = 0x1D; // Greater than or equal (ordered, quiet)
- constexpr uint32_t CMP_GT_OQ = 0x1E; // Greater than (ordered, quiet)
- constexpr uint32_t CMP_TRUE_US = 0x1F; // True (unordered, signaling)
+constexpr uint32_t CMP_EQ_OQ = 0x00; // Equal (ordered, quiet)
+constexpr uint32_t CMP_LT_OS = 0x01; // Less than (ordered, signaling)
+constexpr uint32_t CMP_LE_OS = 0x02; // Less than or equal (ordered, signaling)
+constexpr uint32_t CMP_UNORD_Q = 0x03; // Unordered (quiet)
+constexpr uint32_t CMP_NEQ_UQ = 0x04; // Not equal (unordered, quiet)
+constexpr uint32_t CMP_NLT_US = 0x05; // Not less than (unordered, signaling)
+constexpr uint32_t CMP_NLE_US =
+ 0x06; // Not less than or equal (unordered, signaling)
+constexpr uint32_t CMP_ORD_Q = 0x07; // Ordered (quiet)
+constexpr uint32_t CMP_EQ_UQ = 0x08; // Equal (unordered, quiet)
+constexpr uint32_t CMP_NGE_US =
+ 0x09; // Not greater than or equal (unordered, signaling)
+constexpr uint32_t CMP_NGT_US = 0x0A; // Not greater than (unordered, signaling)
+constexpr uint32_t CMP_FALSE_OQ = 0x0B; // False (ordered, quiet)
+constexpr uint32_t CMP_NEQ_OQ = 0x0C; // Not equal (ordered, quiet)
+constexpr uint32_t CMP_GE_OS =
+ 0x0D; // Greater than or equal (ordered, signaling)
+constexpr uint32_t CMP_GT_OS = 0x0E; // Greater than (ordered, signaling)
+constexpr uint32_t CMP_TRUE_UQ = 0x0F; // True (unordered, quiet)
+
+// Signaling variants (0x10-0x1F)
+constexpr uint32_t CMP_EQ_OS = 0x10; // Equal (ordered, signaling)
+constexpr uint32_t CMP_LT_OQ = 0x11; // Less than (ordered, quiet)
+constexpr uint32_t CMP_LE_OQ = 0x12; // Less than or equal (ordered, quiet)
+constexpr uint32_t CMP_UNORD_S = 0x13; // Unordered (signaling)
+constexpr uint32_t CMP_NEQ_US = 0x14; // Not equal (unordered, signaling)
+constexpr uint32_t CMP_NLT_UQ = 0x15; // Not less than (unordered, quiet)
+constexpr uint32_t CMP_NLE_UQ =
+ 0x16; // Not less than or equal (unordered, quiet)
+constexpr uint32_t CMP_ORD_S = 0x17; // Ordered (signaling)
+constexpr uint32_t CMP_EQ_US = 0x18; // Equal (unordered, signaling)
+constexpr uint32_t CMP_NGE_UQ =
+ 0x19; // Not greater than or equal (unordered, quiet)
+constexpr uint32_t CMP_NGT_UQ = 0x1A; // Not greater than (unordered, quiet)
+constexpr uint32_t CMP_FALSE_OS = 0x1B; // False (ordered, signaling)
+constexpr uint32_t CMP_NEQ_OS = 0x1C; // Not equal (ordered, signaling)
+constexpr uint32_t CMP_GE_OQ = 0x1D; // Greater than or equal (ordered, quiet)
+constexpr uint32_t CMP_GT_OQ = 0x1E; // Greater than (ordered, quiet)
+constexpr uint32_t CMP_TRUE_US = 0x1F; // True (unordered, signaling)
} // namespace X86CmpImm
// Return true if immediate and the comparison flags are matching
-static bool MatchesPredicate(const uint32_t Imm, const llvm::APFloatBase::cmpResult CompareResult) {
+static bool MatchesPredicate(const uint32_t Imm,
+ const llvm::APFloatBase::cmpResult CompareResult) {
using CmpResult = llvm::APFloatBase::cmpResult;
bool IsUnordered = (CompareResult == llvm::APFloatBase::cmpUnordered);
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 469b688b66627..bd489703be924 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -14626,8 +14626,8 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
const bool Matches = MatchesPredicate(Predicate, CompareResult);
// Create bit patterns for comparison results:
- // True = all bits set (0xFFFFFFFF for float, 0xFFFFFFFFFFFFFFFF for double)
- // False = all bits zero
+ // True = all bits set (0xFFFFFFFF for float, 0xFFFFFFFFFFFFFFFF for
+ // double) False = all bits zero
const llvm::fltSemantics &Sem = A0.getSemantics();
const unsigned BitWidth = llvm::APFloat::getSizeInBits(Sem);
const llvm::APFloat True(Sem, llvm::APInt::getAllOnes(BitWidth));
@@ -17793,7 +17793,6 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E,
}
return Success(APValue(RetMask), E);
}
-
}
}
diff --git a/clang/lib/Headers/emmintrin.h b/clang/lib/Headers/emmintrin.h
index 2f0d28d0c495d..3104a8fe55df5 100644
--- a/clang/lib/Headers/emmintrin.h
+++ b/clang/lib/Headers/emmintrin.h
@@ -437,8 +437,8 @@ static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_xor_pd(__m128d __a,
/// \param __b
/// A 128-bit vector of [2 x double].
/// \returns A 128-bit vector containing the comparison results.
-static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cmpeq_pd(__m128d __a,
- __m128d __b) {
+static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_cmpeq_pd(__m128d __a, __m128d __b) {
return (__m128d)__builtin_ia32_cmpeqpd((__v2df)__a, (__v2df)__b);
}
@@ -458,8 +458,8 @@ static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cmpeq_pd(__m128d __a,
/// \param __b
/// A 128-bit vector of [2 x double].
/// \returns A 128-bit vector containing the comparison results.
-static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cmplt_pd(__m128d __a,
- __m128d __b) {
+static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_cmplt_pd(__m128d __a, __m128d __b) {
return (__m128d)__builtin_ia32_cmpltpd((__v2df)__a, (__v2df)__b);
}
@@ -479,8 +479,8 @@ static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cmplt_pd(__m128d __a,
/// \param __b
/// A 128-bit vector of [2 x double].
/// \returns A 128-bit vector containing the comparison results.
-static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cmple_pd(__m128d __a,
- __m128d __b) {
+static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_cmple_pd(__m128d __a, __m128d __b) {
return (__m128d)__builtin_ia32_cmplepd((__v2df)__a, (__v2df)__b);
}
@@ -500,8 +500,8 @@ static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cmple_pd(__m128d __a,
/// \param __b
/// A 128-bit vector of [2 x double].
/// \returns A 128-bit vector containing the comparison results.
-static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cmpgt_pd(__m128d __a,
- __m128d __b) {
+static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_cmpgt_pd(__m128d __a, __m128d __b) {
return (__m128d)__builtin_ia32_cmpltpd((__v2df)__b, (__v2df)__a);
}
@@ -521,8 +521,8 @@ static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cmpgt_pd(__m128d __a,
/// \param __b
/// A 128-bit vector of [2 x double].
/// \returns A 128-bit vector containing the comparison results.
-static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cmpge_pd(__m128d __a,
- __m128d __b) {
+static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_cmpge_pd(__m128d __a, __m128d __b) {
return (__m128d)__builtin_ia32_cmplepd((__v2df)__b, (__v2df)__a);
}
@@ -543,8 +543,8 @@ static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cmpge_pd(__m128d __a,
/// \param __b
/// A 128-bit vector of [2 x double].
/// \returns A 128-bit vector containing the comparison results.
-static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cmpord_pd(__m128d __a,
- __m128d __b) {
+static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_cmpord_pd(__m128d __a, __m128d __b) {
return (__m128d)__builtin_ia32_cmpordpd((__v2df)__a, (__v2df)__b);
}
@@ -566,8 +566,8 @@ static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cmpord_pd(__m128d __a
/// \param __b
/// A 128-bit vector of [2 x double].
/// \returns A 128-bit vector containing the comparison results.
-static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cmpunord_pd(__m128d __a,
- __m128d __b) {
+static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_cmpunord_pd(__m128d __a, __m128d __b) {
return (__m128d)__builtin_ia32_cmpunordpd((__v2df)__a, (__v2df)__b);
}
@@ -587,8 +587,8 @@ static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cmpunord_pd(__m128d _
/// \param __b
/// A 128-bit vector of [2 x double].
/// \returns A 128-bit vector containing the comparison results.
-static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cmpneq_pd(__m128d __a,
- __m128d __b) {
+static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_cmpneq_pd(__m128d __a, __m128d __b) {
return (__m128d)__builtin_ia32_cmpneqpd((__v2df)__a, (__v2df)__b);
}
@@ -608,8 +608,8 @@ static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cmpneq_pd(__m128d __a
/// \param __b
/// A 128-bit vector of [2 x double].
/// \returns A 128-bit vector containing the comparison results.
-static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cmpnlt_pd(__m128d __a,
- __m128d __b) {
+static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_cmpnlt_pd(__m128d __a, __m128d __b) {
return (__m128d)__builtin_ia32_cmpnltpd((__v2df)__a, (__v2df)__b);
}
@@ -629,8 +629,8 @@ static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cmpnlt_pd(__m128d __a
/// \param __b
/// A 128-bit vector of [2 x double].
/// \returns A 128-bit vector containing the comparison results.
-static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cmpnle_pd(__m128d __a,
- __m128d __b) {
+static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_cmpnle_pd(__m128d __a, __m128d __b) {
return (__m128d)__builtin_ia32_cmpnlepd((__v2df)__a, (__v2df)__b);
}
@@ -650,8 +650,8 @@ static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cmpnle_pd(__m128d __a
/// \param __b
/// A 128-bit vector of [2 x double].
/// \returns A 128-bit vector containing the comparison results.
-static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cmpngt_pd(__m128d __a,
- __m128d __b) {
+static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_cmpngt_pd(__m128d __a, __m128d __b) {
return (__m128d)__builtin_ia32_cmpnltpd((__v2df)__b, (__v2df)__a);
}
@@ -671,8 +671,8 @@ static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cmpngt_pd(__m128d __a
/// \param __b
/// A 128-bit vector of [2 x double].
/// \returns A 128-bit vector containing the comparison results.
-static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cmpnge_pd(__m128d __a,
- __m128d __b) {
+static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_cmpnge_pd(__m128d __a, __m128d __b) {
return (__m128d)__builtin_ia32_cmpnlepd((__v2df)__b, (__v2df)__a);
}
@@ -694,8 +694,8 @@ static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cmpnge_pd(__m128d __a
/// compared to the lower double-precision value of \a __a.
/// \returns A 128-bit vector. The lower 64 bits contains the comparison
/// results. The upper 64 bits are copied from the upper 64 bits of \a __a.
-static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cmpeq_sd(__m128d __a,
- __m128d __b) {
+static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_cmpeq_sd(__m128d __a, __m128d __b) {
return (__m128d)__builtin_ia32_cmpeqsd((__v2df)__a, (__v2df)__b);
}
@@ -719,8 +719,8 @@ static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cmpeq_sd(__m128d __a,
/// compared to the lower double-precision value of \a __a.
/// \returns A 128-bit vector. The lower 64 bits contains the comparison
/// results. The upper 64 bits are copied from the upper 64 bits of \a __a.
-static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cmplt_sd(__m128d __a,
- __m128d __b) {
+static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_cmplt_sd(__m128d __a, __m128d __b) {
return (__m128d)__builtin_ia32_cmpltsd((__v2df)__a, (__v2df)__b);
}
@@ -744,8 +744,8 @@ static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cmplt_sd(__m128d __a,
/// compared to the lower double-precision value of \a __a.
/// \returns A 128-bit vector. The lower 64 bits contains the comparison
/// results. The upper 64 bits are copied from the upper 64 bits of \a __a.
-static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cmple_sd(__m128d __a,
- __m128d __b) {
+static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_cmple_sd(__m128d __a, __m128d __b) {
return (__m128d)__builtin_ia32_cmplesd((__v2df)__a, (__v2df)__b);
}
@@ -769,8 +769,8 @@ static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cmple_sd(__m128d __a,
/// compared to the lower double-precision value of \a __a.
/// \returns A 128-bit vector. The lower 64 bits contains the comparison
/// results. The upper 64 bits are copied from the upper 64 bits of \a __a.
-static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cmpgt_sd(__m128d __a,
- __m128d __b) {
+static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_cmpgt_sd(__m128d __a, __m128d __b) {
__m128d __c = __builtin_ia32_cmpltsd((__v2df)__b, (__v2df)__a);
return __extension__(__m128d){__c[0], __a[1]};
}
@@ -795,8 +795,8 @@ static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cmpgt_sd(__m128d __a,
/// compared to the lower double-precision value of \a __a.
/// \returns A 128-bit vector. The lower 64 bits contains the comparison
/// results. The upper 64 bits are copied from the upper 64 bits of \a __a.
-static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cmpge_sd(__m128d __a,
- __m128d __b) {
+static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_cmpge_sd(__m128d __a, __m128d __b) {
__m128d __c = __builtin_ia32_cmplesd((__v2df)__b, (__v2df)__a);
return __extension__(__m128d){__c[0], __a[1]};
}
@@ -822,8 +822,8 @@ static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cmpge_sd(__m128d __a,
/// compared to the lower double-precision value of \a __a.
/// \returns A 128-bit vector. The lower 64 bits contains the comparison
/// results. The upper 64 bits are copied from the upper 64 bits of \a __a.
-static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cmpord_sd(__m128d __a,
- __m128d __b) {
+static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_cmpord_sd(__m128d __a, __m128d __b) {
return (__m128d)__builtin_ia32_cmpordsd((__v2df)__a, (__v2df)__b);
}
@@ -849,8 +849,8 @@ static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cmpord_sd(__m128d __a
/// compared to the lower double-precision value of \a __a.
/// \returns A 128-bit vector. The lower 64 bits contains the comparison
/// results. The upper 64 bits are copied from the upper 64 bits of \a __a.
-static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cmpunord_sd(__m128d __a,
- __m128d __b) {
+static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_cmpunord_sd(__m128d __a, __m128d __b) {
return (__m128d)__builtin_ia32_cmpunordsd((__v2df)__a, (__v2df)__b);
}
@@ -874,8 +874,8 @@ static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cmpunord_sd(__m128d _
/// compared to the lower double-precision value of \a __a.
/// \returns A 128-bit vector. The lower 64 bits contains the comparison
/// results. The upper 64 bits are copied from the upper 64 bits of \a __a.
-static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cmpneq_sd(__m128d __a,
- __m128d __b) {
+static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_cmpneq_sd(__m128d __a, __m128d __b) {
return (__m128d)__builtin_ia32_cmpneqsd((__v2df)__a, (__v2df)__b);
}
@@ -899,8 +899,8 @@ static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cmpneq_sd(__m128d __a
/// compared to the lower double-precision value of \a __a.
/// \returns A 128-bit vector. The lower 64 bits contains the comparison
/// results. The upper 64 bits are copied from the upper 64 bits of \a __a.
-static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cmpnlt_sd(__m128d __a,
- __m128d __b) {
+static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_cmpnlt_sd(__m128d __a, __m128d __b) {
return (__m128d)__builtin_ia32_cmpnltsd((__v2df)__a, (__v2df)__b);
}
@@ -924,8 +924,8 @@ static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cmpnlt_sd(__m128d __a
/// compared to the lower double-precision value of \a __a.
/// \returns A 128-bit vector. The lower 64 bits contains the comparison
/// results. The upper 64 bits are copied from the upper 64 bits of \a __a.
-static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cmpnle_sd(__m128d __a,
- __m128d __b) {
+static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_cmpnle_sd(__m128d __a, __m128d __b) {
return (__m128d)__builtin_ia32_cmpnlesd((__v2df)__a, (__v2df)__b);
}
@@ -949,8 +949,8 @@ static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cmpnle_sd(__m128d __a
/// compared to the lower double-precision value of \a __a.
/// \returns A 128-bit vector. The lower 64 bits contains the comparison
/// results. The upper 64 bits are copied from the upper 64 bits of \a __a.
-static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cmpngt_sd(__m128d __a,
- __m128d __b) {
+static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_cmpngt_sd(__m128d __a, __m128d __b) {
__m128d __c = __builtin_ia32_cmpnltsd((__v2df)__b, (__v2df)__a);
return __extension__(__m128d){__c[0], __a[1]};
}
@@ -975,8 +975,8 @@ static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cmpngt_sd(__m128d __a
/// compared to the lower double-precision value of \a __a.
/// \returns A 128-bit vector. The lower 64 bits contains the comparison
/// results. The upper 64 bits are copied from the upper 64 bits of \a __a.
-static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cmpnge_sd(__m128d __a,
- __m128d __b) {
+static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_cmpnge_sd(__m128d __a, __m128d __b) {
__m128d __c = __builtin_ia32_cmpnlesd((__v2df)__b, (__v2df)__a);
return __extension__(__m128d){__c[0], __a[1]};
}
diff --git a/clang/lib/Headers/xmmintrin.h b/clang/lib/Headers/xmmintrin.h
index b0294b8f45619..48a96aff441d4 100644
--- a/clang/lib/Headers/xmmintrin.h
+++ b/clang/lib/Headers/xmmintrin.h
@@ -500,9 +500,8 @@ _mm_xor_ps(__m128 __a, __m128 __b) {
/// 32 bits of this operand are used in the comparison.
/// \returns A 128-bit vector of [4 x float] containing the comparison results
/// in the low-order bits.
-static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR
-_mm_cmpeq_ss(__m128 __a, __m128 __b)
-{
+static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cmpeq_ss(__m128 __a,
+ __m128 __b) {
return (__m128)__builtin_ia32_cmpeqss((__v4sf)__a, (__v4sf)__b);
}
@@ -521,9 +520,8 @@ _mm_cmpeq_ss(__m128 __a, __m128 __b)
/// \param __b
/// A 128-bit vector of [4 x float].
/// \returns A 128-bit vector of [4 x float] containing the comparison results.
-static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR
-_mm_cmpeq_ps(__m128 __a, __m128 __b)
-{
+static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cmpeq_ps(__m128 __a,
+ __m128 __b) {
return (__m128)__builtin_ia32_cmpeqps((__v4sf)__a, (__v4sf)__b);
}
@@ -547,9 +545,8 @@ _mm_cmpeq_ps(__m128 __a, __m128 __b)
/// 32 bits of this operand are used in the comparison.
/// \returns A 128-bit vector of [4 x float] containing the comparison results
/// in the low-order bits.
-static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR
-_mm_cmplt_ss(__m128 __a, __m128 __b)
-{
+static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cmplt_ss(__m128 __a,
+ __m128 __b) {
return (__m128)__builtin_ia32_cmpltss((__v4sf)__a, (__v4sf)__b);
}
@@ -569,9 +566,8 @@ _mm_cmplt_ss(__m128 __a, __m128 __b)
/// \param __b
/// A 128-bit vector of [4 x float].
/// \returns A 128-bit vector of [4 x float] containing the comparison results.
-static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR
-_mm_cmplt_ps(__m128 __a, __m128 __b)
-{
+static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cmplt_ps(__m128 __a,
+ __m128 __b) {
return (__m128)__builtin_ia32_cmpltps((__v4sf)__a, (__v4sf)__b);
}
@@ -595,9 +591,8 @@ _mm_cmplt_ps(__m128 __a, __m128 __b)
/// 32 bits of this operand are used in the comparison.
/// \returns A 128-bit vector of [4 x float] containing the comparison results
/// in the low-order bits.
-static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR
-_mm_cmple_ss(__m128 __a, __m128 __b)
-{
+static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cmple_ss(__m128 __a,
+ __m128 __b) {
return (__m128)__builtin_ia32_cmpless((__v4sf)__a, (__v4sf)__b);
}
@@ -617,9 +612,8 @@ _mm_cmple_ss(__m128 __a, __m128 __b)
/// \param __b
/// A 128-bit vector of [4 x float].
/// \returns A 128-bit vector of [4 x float] containing the comparison results.
-static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR
-_mm_cmple_ps(__m128 __a, __m128 __b)
-{
+static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cmple_ps(__m128 __a,
+ __m128 __b) {
return (__m128)__builtin_ia32_cmpleps((__v4sf)__a, (__v4sf)__b);
}
@@ -643,9 +637,8 @@ _mm_cmple_ps(__m128 __a, __m128 __b)
/// 32 bits of this operand are used in the comparison.
/// \returns A 128-bit vector of [4 x float] containing the comparison results
/// in the low-order bits.
-static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR
-_mm_cmpgt_ss(__m128 __a, __m128 __b)
-{
+static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cmpgt_ss(__m128 __a,
+ __m128 __b) {
return (__m128)__builtin_shufflevector((__v4sf)__a,
(__v4sf)__builtin_ia32_cmpltss((__v4sf)__b, (__v4sf)__a),
4, 1, 2, 3);
@@ -667,9 +660,8 @@ _mm_cmpgt_ss(__m128 __a, __m128 __b)
/// \param __b
/// A 128-bit vector of [4 x float].
/// \returns A 128-bit vector of [4 x float] containing the comparison results.
-static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR
-_mm_cmpgt_ps(__m128 __a, __m128 __b)
-{
+static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cmpgt_ps(__m128 __a,
+ __m128 __b) {
return (__m128)__builtin_ia32_cmpltps((__v4sf)__b, (__v4sf)__a);
}
@@ -693,9 +685,8 @@ _mm_cmpgt_ps(__m128 __a, __m128 __b)
/// 32 bits of this operand are used in the comparison.
/// \returns A 128-bit vector of [4 x float] containing the comparison results
/// in the low-order bits.
-static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR
-_mm_cmpge_ss(__m128 __a, __m128 __b)
-{
+static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cmpge_ss(__m128 __a,
+ __m128 __b) {
return (__m128)__builtin_shufflevector((__v4sf)__a,
(__v4sf)__builtin_ia32_cmpless((__v4sf)__b, (__v4sf)__a),
4, 1, 2, 3);
@@ -717,9 +708,8 @@ _mm_cmpge_ss(__m128 __a, __m128 __b)
/// \param __b
/// A 128-bit vector of [4 x float].
/// \returns A 128-bit vector of [4 x float] containing the comparison results.
-static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR
-_mm_cmpge_ps(__m128 __a, __m128 __b)
-{
+static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cmpge_ps(__m128 __a,
+ __m128 __b) {
return (__m128)__builtin_ia32_cmpleps((__v4sf)__b, (__v4sf)__a);
}
@@ -744,8 +734,7 @@ _mm_cmpge_ps(__m128 __a, __m128 __b)
/// \returns A 128-bit vector of [4 x float] containing the comparison results
/// in the low-order bits.
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR
-_mm_cmpneq_ss(__m128 __a, __m128 __b)
-{
+_mm_cmpneq_ss(__m128 __a, __m128 __b) {
return (__m128)__builtin_ia32_cmpneqss((__v4sf)__a, (__v4sf)__b);
}
@@ -766,8 +755,7 @@ _mm_cmpneq_ss(__m128 __a, __m128 __b)
/// A 128-bit vector of [4 x float].
/// \returns A 128-bit vector of [4 x float] containing the comparison results.
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR
-_mm_cmpneq_ps(__m128 __a, __m128 __b)
-{
+_mm_cmpneq_ps(__m128 __a, __m128 __b) {
return (__m128)__builtin_ia32_cmpneqps((__v4sf)__a, (__v4sf)__b);
}
@@ -793,8 +781,7 @@ _mm_cmpneq_ps(__m128 __a, __m128 __b)
/// \returns A 128-bit vector of [4 x float] containing the comparison results
/// in the low-order bits.
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR
-_mm_cmpnlt_ss(__m128 __a, __m128 __b)
-{
+_mm_cmpnlt_ss(__m128 __a, __m128 __b) {
return (__m128)__builtin_ia32_cmpnltss((__v4sf)__a, (__v4sf)__b);
}
@@ -816,8 +803,7 @@ _mm_cmpnlt_ss(__m128 __a, __m128 __b)
/// A 128-bit vector of [4 x float].
/// \returns A 128-bit vector of [4 x float] containing the comparison results.
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR
-_mm_cmpnlt_ps(__m128 __a, __m128 __b)
-{
+_mm_cmpnlt_ps(__m128 __a, __m128 __b) {
return (__m128)__builtin_ia32_cmpnltps((__v4sf)__a, (__v4sf)__b);
}
@@ -843,8 +829,7 @@ _mm_cmpnlt_ps(__m128 __a, __m128 __b)
/// \returns A 128-bit vector of [4 x float] containing the comparison results
/// in the low-order bits.
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR
-_mm_cmpnle_ss(__m128 __a, __m128 __b)
-{
+_mm_cmpnle_ss(__m128 __a, __m128 __b) {
return (__m128)__builtin_ia32_cmpnless((__v4sf)__a, (__v4sf)__b);
}
@@ -866,8 +851,7 @@ _mm_cmpnle_ss(__m128 __a, __m128 __b)
/// A 128-bit vector of [4 x float].
/// \returns A 128-bit vector of [4 x float] containing the comparison results.
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR
-_mm_cmpnle_ps(__m128 __a, __m128 __b)
-{
+_mm_cmpnle_ps(__m128 __a, __m128 __b) {
return (__m128)__builtin_ia32_cmpnleps((__v4sf)__a, (__v4sf)__b);
}
@@ -893,8 +877,7 @@ _mm_cmpnle_ps(__m128 __a, __m128 __b)
/// \returns A 128-bit vector of [4 x float] containing the comparison results
/// in the low-order bits.
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR
-_mm_cmpngt_ss(__m128 __a, __m128 __b)
-{
+_mm_cmpngt_ss(__m128 __a, __m128 __b) {
return (__m128)__builtin_shufflevector((__v4sf)__a,
(__v4sf)__builtin_ia32_cmpnltss((__v4sf)__b, (__v4sf)__a),
4, 1, 2, 3);
@@ -918,8 +901,7 @@ _mm_cmpngt_ss(__m128 __a, __m128 __b)
/// A 128-bit vector of [4 x float].
/// \returns A 128-bit vector of [4 x float] containing the comparison results.
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR
-_mm_cmpngt_ps(__m128 __a, __m128 __b)
-{
+_mm_cmpngt_ps(__m128 __a, __m128 __b) {
return (__m128)__builtin_ia32_cmpnltps((__v4sf)__b, (__v4sf)__a);
}
@@ -945,8 +927,7 @@ _mm_cmpngt_ps(__m128 __a, __m128 __b)
/// \returns A 128-bit vector of [4 x float] containing the comparison results
/// in the low-order bits.
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR
-_mm_cmpnge_ss(__m128 __a, __m128 __b)
-{
+_mm_cmpnge_ss(__m128 __a, __m128 __b) {
return (__m128)__builtin_shufflevector((__v4sf)__a,
(__v4sf)__builtin_ia32_cmpnless((__v4sf)__b, (__v4sf)__a),
4, 1, 2, 3);
@@ -970,8 +951,7 @@ _mm_cmpnge_ss(__m128 __a, __m128 __b)
/// A 128-bit vector of [4 x float].
/// \returns A 128-bit vector of [4 x float] containing the comparison results.
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR
-_mm_cmpnge_ps(__m128 __a, __m128 __b)
-{
+_mm_cmpnge_ps(__m128 __a, __m128 __b) {
return (__m128)__builtin_ia32_cmpnleps((__v4sf)__b, (__v4sf)__a);
}
@@ -997,8 +977,7 @@ _mm_cmpnge_ps(__m128 __a, __m128 __b)
/// \returns A 128-bit vector of [4 x float] containing the comparison results
/// in the low-order bits.
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR
-_mm_cmpord_ss(__m128 __a, __m128 __b)
-{
+_mm_cmpord_ss(__m128 __a, __m128 __b) {
return (__m128)__builtin_ia32_cmpordss((__v4sf)__a, (__v4sf)__b);
}
@@ -1021,8 +1000,7 @@ _mm_cmpord_ss(__m128 __a, __m128 __b)
/// A 128-bit vector of [4 x float].
/// \returns A 128-bit vector of [4 x float] containing the comparison results.
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR
-_mm_cmpord_ps(__m128 __a, __m128 __b)
-{
+_mm_cmpord_ps(__m128 __a, __m128 __b) {
return (__m128)__builtin_ia32_cmpordps((__v4sf)__a, (__v4sf)__b);
}
@@ -1048,8 +1026,7 @@ _mm_cmpord_ps(__m128 __a, __m128 __b)
/// \returns A 128-bit vector of [4 x float] containing the comparison results
/// in the low-order bits.
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR
-_mm_cmpunord_ss(__m128 __a, __m128 __b)
-{
+_mm_cmpunord_ss(__m128 __a, __m128 __b) {
return (__m128)__builtin_ia32_cmpunordss((__v4sf)__a, (__v4sf)__b);
}
@@ -1072,8 +1049,7 @@ _mm_cmpunord_ss(__m128 __a, __m128 __b)
/// A 128-bit vector of [4 x float].
/// \returns A 128-bit vector of [4 x float] containing the comparison results.
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR
-_mm_cmpunord_ps(__m128 __a, __m128 __b)
-{
+_mm_cmpunord_ps(__m128 __a, __m128 __b) {
return (__m128)__builtin_ia32_cmpunordps((__v4sf)__a, (__v4sf)__b);
}
>From 9497e455c031ad25080afa5405157282d87dc9ca Mon Sep 17 00:00:00 2001
From: ZakyHermawan <zaky.hermawan9615 at gmail.com>
Date: Sat, 21 Feb 2026 03:51:12 +0700
Subject: [PATCH 11/14] Fix extra-semicolon error
Signed-off-by: ZakyHermawan <zaky.hermawan9615 at gmail.com>
---
clang/lib/AST/ByteCode/InterpBuiltin.cpp | 2 +-
clang/lib/AST/ExprConstShared.h | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index 7073125ca38ac..8f3cf28a8eeaa 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -4341,7 +4341,7 @@ static bool interp__builtin_x86_cmp(InterpState &S, CodePtr OpPC,
Dst.initializeAllElements();
return true;
-};
+}
bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
uint32_t BuiltinID) {
diff --git a/clang/lib/AST/ExprConstShared.h b/clang/lib/AST/ExprConstShared.h
index a160f31e2d06c..4505309e7cf0b 100644
--- a/clang/lib/AST/ExprConstShared.h
+++ b/clang/lib/AST/ExprConstShared.h
@@ -142,7 +142,7 @@ static bool MatchesPredicate(const uint32_t Imm,
return true;
}
return false;
-};
+}
} // namespace clang
using namespace clang;
>From 72e86bf989de31ee0fd5cc53e36bc1f246cfdf63 Mon Sep 17 00:00:00 2001
From: ZakyHermawan <zaky.hermawan9615 at gmail.com>
Date: Sat, 21 Feb 2026 04:09:03 +0700
Subject: [PATCH 12/14] [Clang] Make MatchesPredicate non-static
Signed-off-by: ZakyHermawan <zaky.hermawan9615 at gmail.com>
---
clang/lib/AST/ExprConstShared.h | 62 +--------------------------------
clang/lib/AST/ExprConstant.cpp | 62 +++++++++++++++++++++++++++++++++
2 files changed, 63 insertions(+), 61 deletions(-)
diff --git a/clang/lib/AST/ExprConstShared.h b/clang/lib/AST/ExprConstShared.h
index 4505309e7cf0b..012723dbf297a 100644
--- a/clang/lib/AST/ExprConstShared.h
+++ b/clang/lib/AST/ExprConstShared.h
@@ -82,67 +82,7 @@ constexpr uint32_t CMP_TRUE_US = 0x1F; // True (unordered, signaling)
} // namespace X86CmpImm
// Return true if immediate and the comparison flags are matching
-static bool MatchesPredicate(const uint32_t Imm,
- const llvm::APFloatBase::cmpResult CompareResult) {
- using CmpResult = llvm::APFloatBase::cmpResult;
-
- bool IsUnordered = (CompareResult == llvm::APFloatBase::cmpUnordered);
- bool IsEq = (CompareResult == CmpResult::cmpEqual);
- bool IsGt = (CompareResult == CmpResult::cmpGreaterThan);
- bool IsLt = (CompareResult == CmpResult::cmpLessThan);
-
- switch (Imm & 0x1F) {
- case X86CmpImm::CMP_EQ_OQ:
- case X86CmpImm::CMP_EQ_OS:
- return IsEq && !IsUnordered;
- case X86CmpImm::CMP_LT_OS:
- case X86CmpImm::CMP_LT_OQ:
- return IsLt && !IsUnordered;
- case X86CmpImm::CMP_LE_OS:
- case X86CmpImm::CMP_LE_OQ:
- return !IsGt && !IsUnordered;
- case X86CmpImm::CMP_UNORD_Q:
- case X86CmpImm::CMP_UNORD_S:
- return IsUnordered;
- case X86CmpImm::CMP_NEQ_UQ:
- case X86CmpImm::CMP_NEQ_US:
- return !IsEq || IsUnordered;
- case X86CmpImm::CMP_NLT_US:
- case X86CmpImm::CMP_NLT_UQ:
- return !IsLt || IsUnordered;
- case X86CmpImm::CMP_NLE_US:
- case X86CmpImm::CMP_NLE_UQ:
- return IsGt || IsUnordered;
- case X86CmpImm::CMP_ORD_Q:
- case X86CmpImm::CMP_ORD_S:
- return !IsUnordered;
- case X86CmpImm::CMP_EQ_UQ:
- case X86CmpImm::CMP_EQ_US:
- return IsEq || IsUnordered;
- case X86CmpImm::CMP_NGE_US:
- case X86CmpImm::CMP_NGE_UQ:
- return IsLt || IsUnordered;
- case X86CmpImm::CMP_NGT_US:
- case X86CmpImm::CMP_NGT_UQ:
- return !IsGt || IsUnordered;
- case X86CmpImm::CMP_FALSE_OQ:
- case X86CmpImm::CMP_FALSE_OS:
- return false;
- case X86CmpImm::CMP_NEQ_OQ:
- case X86CmpImm::CMP_NEQ_OS:
- return !IsEq && !IsUnordered;
- case X86CmpImm::CMP_GE_OS:
- case X86CmpImm::CMP_GE_OQ:
- return !IsLt && !IsUnordered;
- case X86CmpImm::CMP_GT_OS:
- case X86CmpImm::CMP_GT_OQ:
- return IsGt && !IsUnordered;
- case X86CmpImm::CMP_TRUE_UQ:
- case X86CmpImm::CMP_TRUE_US:
- return true;
- }
- return false;
-}
+bool MatchesPredicate(const uint32_t Imm, const llvm::APFloatBase::cmpResult CompareResult);
} // namespace clang
using namespace clang;
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index bd489703be924..1f1fb8092fc21 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -12141,6 +12141,68 @@ static bool evalShiftWithCount(
return true;
}
+bool clang::MatchesPredicate(
+ const uint32_t Imm, const llvm::APFloatBase::cmpResult CompareResult) {
+ using CmpResult = llvm::APFloatBase::cmpResult;
+
+ bool IsUnordered = (CompareResult == llvm::APFloatBase::cmpUnordered);
+ bool IsEq = (CompareResult == CmpResult::cmpEqual);
+ bool IsGt = (CompareResult == CmpResult::cmpGreaterThan);
+ bool IsLt = (CompareResult == CmpResult::cmpLessThan);
+
+ switch (Imm & 0x1F) {
+ case X86CmpImm::CMP_EQ_OQ:
+ case X86CmpImm::CMP_EQ_OS:
+ return IsEq && !IsUnordered;
+ case X86CmpImm::CMP_LT_OS:
+ case X86CmpImm::CMP_LT_OQ:
+ return IsLt && !IsUnordered;
+ case X86CmpImm::CMP_LE_OS:
+ case X86CmpImm::CMP_LE_OQ:
+ return !IsGt && !IsUnordered;
+ case X86CmpImm::CMP_UNORD_Q:
+ case X86CmpImm::CMP_UNORD_S:
+ return IsUnordered;
+ case X86CmpImm::CMP_NEQ_UQ:
+ case X86CmpImm::CMP_NEQ_US:
+ return !IsEq || IsUnordered;
+ case X86CmpImm::CMP_NLT_US:
+ case X86CmpImm::CMP_NLT_UQ:
+ return !IsLt || IsUnordered;
+ case X86CmpImm::CMP_NLE_US:
+ case X86CmpImm::CMP_NLE_UQ:
+ return IsGt || IsUnordered;
+ case X86CmpImm::CMP_ORD_Q:
+ case X86CmpImm::CMP_ORD_S:
+ return !IsUnordered;
+ case X86CmpImm::CMP_EQ_UQ:
+ case X86CmpImm::CMP_EQ_US:
+ return IsEq || IsUnordered;
+ case X86CmpImm::CMP_NGE_US:
+ case X86CmpImm::CMP_NGE_UQ:
+ return IsLt || IsUnordered;
+ case X86CmpImm::CMP_NGT_US:
+ case X86CmpImm::CMP_NGT_UQ:
+ return !IsGt || IsUnordered;
+ case X86CmpImm::CMP_FALSE_OQ:
+ case X86CmpImm::CMP_FALSE_OS:
+ return false;
+ case X86CmpImm::CMP_NEQ_OQ:
+ case X86CmpImm::CMP_NEQ_OS:
+ return !IsEq && !IsUnordered;
+ case X86CmpImm::CMP_GE_OS:
+ case X86CmpImm::CMP_GE_OQ:
+ return !IsLt && !IsUnordered;
+ case X86CmpImm::CMP_GT_OS:
+ case X86CmpImm::CMP_GT_OQ:
+ return IsGt && !IsUnordered;
+ case X86CmpImm::CMP_TRUE_UQ:
+ case X86CmpImm::CMP_TRUE_US:
+ return true;
+ }
+ return false;
+}
+
bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
if (!IsConstantEvaluatedBuiltinCall(E))
return ExprEvaluatorBaseTy::VisitCallExpr(E);
>From 7c96fa74ccc33a31730a4d832da4dfdbd499aa1e Mon Sep 17 00:00:00 2001
From: ZakyHermawan <zaky.hermawan9615 at gmail.com>
Date: Sat, 21 Feb 2026 04:12:43 +0700
Subject: [PATCH 13/14] [Clang][NFC] clang-format
Signed-off-by: ZakyHermawan <zaky.hermawan9615 at gmail.com>
---
clang/lib/AST/ExprConstShared.h | 3 ++-
clang/lib/AST/ExprConstant.cpp | 4 ++--
2 files changed, 4 insertions(+), 3 deletions(-)
diff --git a/clang/lib/AST/ExprConstShared.h b/clang/lib/AST/ExprConstShared.h
index 012723dbf297a..2508dbe3d67ae 100644
--- a/clang/lib/AST/ExprConstShared.h
+++ b/clang/lib/AST/ExprConstShared.h
@@ -82,7 +82,8 @@ constexpr uint32_t CMP_TRUE_US = 0x1F; // True (unordered, signaling)
} // namespace X86CmpImm
// Return true if immediate and the comparison flags are matching
-bool MatchesPredicate(const uint32_t Imm, const llvm::APFloatBase::cmpResult CompareResult);
+bool MatchesPredicate(const uint32_t Imm,
+ const llvm::APFloatBase::cmpResult CompareResult);
} // namespace clang
using namespace clang;
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 1f1fb8092fc21..5badc97dffad4 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -12141,8 +12141,8 @@ static bool evalShiftWithCount(
return true;
}
-bool clang::MatchesPredicate(
- const uint32_t Imm, const llvm::APFloatBase::cmpResult CompareResult) {
+bool clang::MatchesPredicate(const uint32_t Imm,
+ const llvm::APFloatBase::cmpResult CompareResult) {
using CmpResult = llvm::APFloatBase::cmpResult;
bool IsUnordered = (CompareResult == llvm::APFloatBase::cmpUnordered);
>From ce212c259de3d40d06bc61ae6f77b46bca922964 Mon Sep 17 00:00:00 2001
From: ZakyHermawan <zaky.hermawan9615 at gmail.com>
Date: Sun, 22 Feb 2026 03:16:28 +0700
Subject: [PATCH 14/14] [Clang][X86]Remove const for trivial types and declare
variable on-demand
Signed-off-by: ZakyHermawan <zaky.hermawan9615 at gmail.com>
---
clang/lib/AST/ByteCode/InterpBuiltin.cpp | 96 ++++++++++++------------
1 file changed, 48 insertions(+), 48 deletions(-)
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index 8f3cf28a8eeaa..c3964515701cb 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -4162,8 +4162,7 @@ static bool interp__builtin_x86_comi(InterpState &S, CodePtr OpPC,
break;
case X86::BI__builtin_ia32_vcomish: {
discard(S.Stk, *S.getContext().classify(Call->getArg(3)));
- const APSInt Imm = popToAPSInt(S, Call->getArg(2));
- Predicate = Imm.getZExtValue();
+ Predicate = popToUInt64(S, Call->getArg(2));
break;
}
default:
@@ -4177,9 +4176,9 @@ static bool interp__builtin_x86_comi(InterpState &S, CodePtr OpPC,
VectorA.getNumElems() != VectorB.getNumElems())
return false;
- const llvm::APFloat A = VectorA.elem<Floating>(0).getAPFloat();
- const llvm::APFloat B = VectorB.elem<Floating>(0).getAPFloat();
- const bool Matches = MatchesPredicate(Predicate, A.compare(B));
+ llvm::APFloat A = VectorA.elem<Floating>(0).getAPFloat();
+ llvm::APFloat B = VectorB.elem<Floating>(0).getAPFloat();
+ bool Matches = MatchesPredicate(Predicate, A.compare(B));
pushInteger(S, Matches, Call->getType());
return true;
}
@@ -4194,9 +4193,11 @@ static bool interp__builtin_x86_cmp(InterpState &S, CodePtr OpPC,
ID == X86::BI__builtin_ia32_cmpss || ID == X86::BI__builtin_ia32_cmpsd;
uint32_t Predicate;
+ bool IsScalar = false;
if (HasImmArg) {
- const APSInt ImmAPS = popToAPSInt(S, Call->getArg(2));
- Predicate = ImmAPS.getZExtValue();
+ Predicate = popToUInt64(S, Call->getArg(2));
+ IsScalar = ID == X86::BI__builtin_ia32_cmpss ||
+ ID == X86::BI__builtin_ia32_cmpsd;
} else {
switch (ID) {
case X86::BI__builtin_ia32_cmpeqss:
@@ -4204,72 +4205,96 @@ static bool interp__builtin_x86_cmp(InterpState &S, CodePtr OpPC,
case X86::BI__builtin_ia32_cmpeqps:
case X86::BI__builtin_ia32_cmpeqpd:
Predicate = X86CmpImm::CMP_EQ_OQ;
+ IsScalar = ID == X86::BI__builtin_ia32_cmpeqss ||
+ ID == X86::BI__builtin_ia32_cmpeqsd;
break;
case X86::BI__builtin_ia32_cmpgess:
case X86::BI__builtin_ia32_cmpgesd:
case X86::BI__builtin_ia32_cmpgeps:
case X86::BI__builtin_ia32_cmpgepd:
Predicate = X86CmpImm::CMP_GE_OS;
+ IsScalar = ID == X86::BI__builtin_ia32_cmpgess ||
+ ID == X86::BI__builtin_ia32_cmpgesd;
break;
case X86::BI__builtin_ia32_cmpgtss:
case X86::BI__builtin_ia32_cmpgtsd:
case X86::BI__builtin_ia32_cmpgtps:
case X86::BI__builtin_ia32_cmpgtpd:
Predicate = X86CmpImm::CMP_GT_OS;
+ IsScalar = ID == X86::BI__builtin_ia32_cmpgtss ||
+ ID == X86::BI__builtin_ia32_cmpgtsd;
break;
case X86::BI__builtin_ia32_cmpltss:
case X86::BI__builtin_ia32_cmpltsd:
case X86::BI__builtin_ia32_cmpltps:
case X86::BI__builtin_ia32_cmpltpd:
Predicate = X86CmpImm::CMP_LT_OS;
+ IsScalar = ID == X86::BI__builtin_ia32_cmpltss ||
+ ID == X86::BI__builtin_ia32_cmpltsd;
break;
case X86::BI__builtin_ia32_cmpless:
case X86::BI__builtin_ia32_cmplesd:
case X86::BI__builtin_ia32_cmpleps:
case X86::BI__builtin_ia32_cmplepd:
Predicate = X86CmpImm::CMP_LE_OS;
+ IsScalar = ID == X86::BI__builtin_ia32_cmpless ||
+ ID == X86::BI__builtin_ia32_cmplesd;
break;
case X86::BI__builtin_ia32_cmpneqss:
case X86::BI__builtin_ia32_cmpneqsd:
case X86::BI__builtin_ia32_cmpneqps:
case X86::BI__builtin_ia32_cmpneqpd:
Predicate = X86CmpImm::CMP_NEQ_UQ;
+ IsScalar = ID == X86::BI__builtin_ia32_cmpneqss ||
+ ID == X86::BI__builtin_ia32_cmpneqsd;
break;
case X86::BI__builtin_ia32_cmpngess:
case X86::BI__builtin_ia32_cmpngesd:
case X86::BI__builtin_ia32_cmpngeps:
case X86::BI__builtin_ia32_cmpngepd:
Predicate = X86CmpImm::CMP_NGE_US;
+ IsScalar = ID == X86::BI__builtin_ia32_cmpngess ||
+ ID == X86::BI__builtin_ia32_cmpngesd;
break;
case X86::BI__builtin_ia32_cmpngtss:
case X86::BI__builtin_ia32_cmpngtsd:
case X86::BI__builtin_ia32_cmpngtps:
case X86::BI__builtin_ia32_cmpngtpd:
Predicate = X86CmpImm::CMP_NGT_US;
+ IsScalar = ID == X86::BI__builtin_ia32_cmpngtss ||
+ ID == X86::BI__builtin_ia32_cmpngtsd;
break;
case X86::BI__builtin_ia32_cmpnless:
case X86::BI__builtin_ia32_cmpnlesd:
case X86::BI__builtin_ia32_cmpnleps:
case X86::BI__builtin_ia32_cmpnlepd:
Predicate = X86CmpImm::CMP_NLE_US;
+ IsScalar = ID == X86::BI__builtin_ia32_cmpnless ||
+ ID == X86::BI__builtin_ia32_cmpnlesd;
break;
case X86::BI__builtin_ia32_cmpnltss:
case X86::BI__builtin_ia32_cmpnltsd:
case X86::BI__builtin_ia32_cmpnltps:
case X86::BI__builtin_ia32_cmpnltpd:
Predicate = X86CmpImm::CMP_NLT_US;
+ IsScalar = ID == X86::BI__builtin_ia32_cmpnltss ||
+ ID == X86::BI__builtin_ia32_cmpnltsd;
break;
case X86::BI__builtin_ia32_cmpordss:
case X86::BI__builtin_ia32_cmpordsd:
case X86::BI__builtin_ia32_cmpordps:
case X86::BI__builtin_ia32_cmpordpd:
Predicate = X86CmpImm::CMP_ORD_Q;
+ IsScalar = ID == X86::BI__builtin_ia32_cmpordss ||
+ ID == X86::BI__builtin_ia32_cmpordsd;
break;
case X86::BI__builtin_ia32_cmpunordss:
case X86::BI__builtin_ia32_cmpunordsd:
case X86::BI__builtin_ia32_cmpunordps:
case X86::BI__builtin_ia32_cmpunordpd:
Predicate = X86CmpImm::CMP_UNORD_Q;
+ IsScalar = ID == X86::BI__builtin_ia32_cmpunordss ||
+ ID == X86::BI__builtin_ia32_cmpunordsd;
break;
default:
llvm_unreachable("unhandled x86 cmp builtin");
@@ -4280,63 +4305,38 @@ static bool interp__builtin_x86_cmp(InterpState &S, CodePtr OpPC,
const Pointer &VectorA = S.Stk.pop<Pointer>();
Pointer &Dst = S.Stk.peek<Pointer>();
- const bool IsScalar = ID == X86::BI__builtin_ia32_cmpss ||
- ID == X86::BI__builtin_ia32_cmpsd ||
- ID == X86::BI__builtin_ia32_cmpeqss ||
- ID == X86::BI__builtin_ia32_cmpeqsd ||
- ID == X86::BI__builtin_ia32_cmpgess ||
- ID == X86::BI__builtin_ia32_cmpgesd ||
- ID == X86::BI__builtin_ia32_cmpgtss ||
- ID == X86::BI__builtin_ia32_cmpgtsd ||
- ID == X86::BI__builtin_ia32_cmpltss ||
- ID == X86::BI__builtin_ia32_cmpltsd ||
- ID == X86::BI__builtin_ia32_cmpless ||
- ID == X86::BI__builtin_ia32_cmplesd ||
- ID == X86::BI__builtin_ia32_cmpneqss ||
- ID == X86::BI__builtin_ia32_cmpneqsd ||
- ID == X86::BI__builtin_ia32_cmpngess ||
- ID == X86::BI__builtin_ia32_cmpngesd ||
- ID == X86::BI__builtin_ia32_cmpngtss ||
- ID == X86::BI__builtin_ia32_cmpngtsd ||
- ID == X86::BI__builtin_ia32_cmpnless ||
- ID == X86::BI__builtin_ia32_cmpnlesd ||
- ID == X86::BI__builtin_ia32_cmpnltss ||
- ID == X86::BI__builtin_ia32_cmpnltsd ||
- ID == X86::BI__builtin_ia32_cmpordss ||
- ID == X86::BI__builtin_ia32_cmpordsd ||
- ID == X86::BI__builtin_ia32_cmpunordss ||
- ID == X86::BI__builtin_ia32_cmpunordsd;
-
- const auto NumLanes = VectorA.getNumElems();
+ unsigned NumLanes = VectorA.getNumElems();
if (NumLanes != VectorB.getNumElems())
return false;
- for (unsigned int i = 0; i < NumLanes; ++i) {
+ for (unsigned I = 0; I != NumLanes; ++I) {
// Handle cmpss/cmpsd
- if (IsScalar && i > 0) {
+ if (IsScalar && I > 0) {
// Copy the upper 3 packed elements from a to the upper elements of dst
- Dst.elem<Floating>(i) = VectorA.elem<Floating>(i);
+ Dst.elem<Floating>(I) = VectorA.elem<Floating>(I);
continue;
}
- llvm::APFloat AElement = VectorA.elem<Floating>(i).getAPFloat();
- llvm::APFloat BElement = VectorB.elem<Floating>(i).getAPFloat();
+ llvm::APFloat AElement = VectorA.elem<Floating>(I).getAPFloat();
+ llvm::APFloat BElement = VectorB.elem<Floating>(I).getAPFloat();
auto CompareResult = AElement.compare(BElement);
- const bool Matches = MatchesPredicate(Predicate, CompareResult);
+ bool Matches = MatchesPredicate(Predicate, CompareResult);
// Create bit patterns for comparison results:
// True = all bits set (0xFFFFFFFF for float, 0xFFFFFFFFFFFFFFFF for double)
// False = all bits zero
const llvm::fltSemantics &Sem = AElement.getSemantics();
- const unsigned BitWidth = llvm::APFloat::getSizeInBits(Sem);
- const llvm::APFloat True(Sem, llvm::APInt::getAllOnes(BitWidth));
- const llvm::APFloat False(Sem, llvm::APInt(BitWidth, 0));
+ unsigned BitWidth = llvm::APFloat::getSizeInBits(Sem);
- if (Matches)
- Dst.elem<Floating>(i) = Floating(True);
- else
- Dst.elem<Floating>(i) = Floating(False);
+ if (Matches) {
+ llvm::APFloat True(Sem, llvm::APInt::getAllOnes(BitWidth));
+ Dst.elem<Floating>(I) = Floating(True);
+ }
+ else {
+ llvm::APFloat False(Sem, llvm::APInt(BitWidth, 0));
+ Dst.elem<Floating>(I) = Floating(False);
+ }
}
Dst.initializeAllElements();
More information about the cfe-commits
mailing list