[llvm] SelectionDAG: Support nofpclass (PR #108350)
YunQiang Su via llvm-commits
llvm-commits at lists.llvm.org
Sat Sep 21 23:27:11 PDT 2024
https://github.com/wzssyqa updated https://github.com/llvm/llvm-project/pull/108350
>From 4566628a98447c863f90965aa5ff360d54a34c9b Mon Sep 17 00:00:00 2001
From: YunQiang Su <syq at debian.org>
Date: Thu, 12 Sep 2024 17:00:13 +0800
Subject: [PATCH 1/2] SelectionDAG: Support nofpclass
Currently SelectionDAG ignroes the nofpclass information from arguments.
Such as
define dso_local float @f(float noundef nofpclass(nan zero) %a, float noundef nofpclass(nan zero) %b) #0 {
entry:
%cond = tail call float @llvm.maximumnum.f32(float %a, float %b)
ret float %cond
}
In SelectionDAG::isKnownNeverNaN, a false is returned.
TODO:
1) bool SelectionDAG::isKnownNeverNaN(SDValue Op, bool SNaN, unsigned Depth)
needs to process hasNoSNaN;
2) bool SelectionDAG::isKnownNeverZeroFloat(SDValue Op)
needs to process Zero and SignedZero.
These 2 problems will be fixed with other PRs.
---
llvm/include/llvm/CodeGen/SelectionDAG.h | 8 +++
llvm/include/llvm/CodeGen/SelectionDAGNodes.h | 33 +++++++++++--
.../lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 49 +++++++++++++++++--
.../SelectionDAG/SelectionDAGBuilder.cpp | 37 +++++++++++++-
.../CodeGen/SelectionDAG/TargetLowering.cpp | 4 ++
llvm/test/CodeGen/AMDGPU/known-never-snan.ll | 1 -
llvm/test/CodeGen/AMDGPU/reduction.ll | 28 +++--------
llvm/test/CodeGen/X86/fminimum-fmaximum.ll | 41 +++++-----------
8 files changed, 142 insertions(+), 59 deletions(-)
diff --git a/llvm/include/llvm/CodeGen/SelectionDAG.h b/llvm/include/llvm/CodeGen/SelectionDAG.h
index d6c2c36a0d482a..1ee3e9382a2466 100644
--- a/llvm/include/llvm/CodeGen/SelectionDAG.h
+++ b/llvm/include/llvm/CodeGen/SelectionDAG.h
@@ -2136,6 +2136,14 @@ class SelectionDAG {
/// positive or negative zero.
bool isKnownNeverZeroFloat(SDValue Op) const;
+ /// Test whether the given floating point SDValue is known to never be
+ /// positive zero.
+ bool isKnownNeverPosZeroFloat(SDValue Op) const;
+
+ /// Test whether the given floating point SDValue is known to never be
+ /// negative zero.
+ bool isKnownNeverNegZeroFloat(SDValue Op) const;
+
/// Test whether the given SDValue is known to contain non-zero value(s).
bool isKnownNeverZero(SDValue Op, unsigned Depth = 0) const;
diff --git a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h
index 6067b3b29ea181..737927aba67edf 100644
--- a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h
+++ b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h
@@ -383,6 +383,7 @@ struct SDNodeFlags {
bool Exact : 1;
bool Disjoint : 1;
bool NonNeg : 1;
+ // deprecated: Use NoQNanS && NoSNaNs
bool NoNaNs : 1;
bool NoInfs : 1;
bool NoSignedZeros : 1;
@@ -400,6 +401,11 @@ struct SDNodeFlags {
// Instructions with attached 'unpredictable' metadata on IR level.
bool Unpredictable : 1;
+ bool NoQNaNs : 1;
+ bool NoSNaNs : 1;
+ bool NoPosZeros : 1;
+ bool NoNegZeros : 1;
+
public:
/// Default constructor turns off all optimization flags.
SDNodeFlags()
@@ -407,12 +413,15 @@ struct SDNodeFlags {
Disjoint(false), NonNeg(false), NoNaNs(false), NoInfs(false),
NoSignedZeros(false), AllowReciprocal(false), AllowContract(false),
ApproximateFuncs(false), AllowReassociation(false), NoFPExcept(false),
- Unpredictable(false) {}
+ Unpredictable(false), NoQNaNs(false), NoSNaNs(false), NoPosZeros(false),
+ NoNegZeros(false) {}
/// Propagate the fast-math-flags from an IR FPMathOperator.
void copyFMF(const FPMathOperator &FPMO) {
- setNoNaNs(FPMO.hasNoNaNs());
+ setNoSNaNs(FPMO.hasNoNaNs());
+ setNoQNaNs(FPMO.hasNoNaNs());
setNoInfs(FPMO.hasNoInfs());
+ setNoNegZeros(FPMO.hasNoSignedZeros());
setNoSignedZeros(FPMO.hasNoSignedZeros());
setAllowReciprocal(FPMO.hasAllowReciprocal());
setAllowContract(FPMO.hasAllowContract());
@@ -426,8 +435,20 @@ struct SDNodeFlags {
void setExact(bool b) { Exact = b; }
void setDisjoint(bool b) { Disjoint = b; }
void setNonNeg(bool b) { NonNeg = b; }
- void setNoNaNs(bool b) { NoNaNs = b; }
+ [[deprecated("Use SetSNaNs() and SetQNaNs()")]] void setNoNaNs(bool b) {
+ NoNaNs = NoQNaNs = NoSNaNs = b;
+ }
+ void setNoQNaNs(bool b) {
+ NoQNaNs = b;
+ NoNaNs = (NoQNaNs && NoSNaNs);
+ }
+ void setNoSNaNs(bool b) {
+ NoSNaNs = b;
+ NoNaNs = (NoQNaNs && NoSNaNs);
+ }
void setNoInfs(bool b) { NoInfs = b; }
+ void setNoPosZeros(bool b) { NoPosZeros = b; }
+ void setNoNegZeros(bool b) { NoNegZeros = b; }
void setNoSignedZeros(bool b) { NoSignedZeros = b; }
void setAllowReciprocal(bool b) { AllowReciprocal = b; }
void setAllowContract(bool b) { AllowContract = b; }
@@ -442,8 +463,12 @@ struct SDNodeFlags {
bool hasExact() const { return Exact; }
bool hasDisjoint() const { return Disjoint; }
bool hasNonNeg() const { return NonNeg; }
- bool hasNoNaNs() const { return NoNaNs; }
+ bool hasNoNaNs() const { return (NoSNaNs && NoQNaNs); }
+ bool hasNoSNaNs() const { return NoSNaNs; }
+ bool hasNoQNaNs() const { return NoQNaNs; }
bool hasNoInfs() const { return NoInfs; }
+ bool hasNoPosZeros() const { return NoPosZeros; }
+ bool hasNoNegZeros() const { return NoNegZeros; }
bool hasNoSignedZeros() const { return NoSignedZeros; }
bool hasAllowReciprocal() const { return AllowReciprocal; }
bool hasAllowContract() const { return AllowContract; }
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 9b96dbb666198a..9d61567a283649 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -5435,7 +5435,12 @@ bool SelectionDAG::isBaseWithConstantOffset(SDValue Op) const {
bool SelectionDAG::isKnownNeverNaN(SDValue Op, bool SNaN, unsigned Depth) const {
// If we're told that NaNs won't happen, assume they won't.
- if (getTarget().Options.NoNaNsFPMath || Op->getFlags().hasNoNaNs())
+ if (getTarget().Options.NoNaNsFPMath)
+ return true;
+ SDNodeFlags OpFlags = Op->getFlags();
+ if (SNaN && OpFlags.hasNoSNaNs())
+ return true;
+ if (OpFlags.hasNoSNaNs() && OpFlags.hasNoQNaNs())
return true;
if (Depth >= MaxRecursionDepth)
@@ -5569,11 +5574,39 @@ bool SelectionDAG::isKnownNeverZeroFloat(SDValue Op) const {
assert(Op.getValueType().isFloatingPoint() &&
"Floating point type expected");
+ SDNodeFlags OpFlags = Op->getFlags();
+ if (OpFlags.hasNoPosZeros() && OpFlags.hasNoNegZeros())
+ return true;
+
// If the value is a constant, we can obviously see if it is a zero or not.
return ISD::matchUnaryFpPredicate(
Op, [](ConstantFPSDNode *C) { return !C->isZero(); });
}
+bool SelectionDAG::isKnownNeverPosZeroFloat(SDValue Op) const {
+ assert(Op.getValueType().isFloatingPoint() && "Floating point type expected");
+
+ SDNodeFlags OpFlags = Op->getFlags();
+ if (OpFlags.hasNoPosZeros())
+ return true;
+
+ // If the value is a constant, we can obviously see if it is a zero or not.
+ return ISD::matchUnaryFpPredicate(
+ Op, [](ConstantFPSDNode *C) { return !C->isZero() || C->isNegative(); });
+}
+
+bool SelectionDAG::isKnownNeverNegZeroFloat(SDValue Op) const {
+ assert(Op.getValueType().isFloatingPoint() && "Floating point type expected");
+
+ SDNodeFlags OpFlags = Op->getFlags();
+ if (OpFlags.hasNoNegZeros())
+ return true;
+
+ // If the value is a constant, we can obviously see if it is a zero or not.
+ return ISD::matchUnaryFpPredicate(
+ Op, [](ConstantFPSDNode *C) { return !C->isZero() || !C->isNegative(); });
+}
+
bool SelectionDAG::isKnownNeverZero(SDValue Op, unsigned Depth) const {
if (Depth >= MaxRecursionDepth)
return false; // Limit search depth.
@@ -7490,6 +7523,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
N2.getOpcode() != ISD::DELETED_NODE &&
N3.getOpcode() != ISD::DELETED_NODE &&
"Operand is DELETED_NODE!");
+ SDNodeFlags NewFlags = Flags;
// Perform various simplifications.
switch (Opcode) {
case ISD::FMA:
@@ -7535,6 +7569,10 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
assert((!VT.isVector() || VT.getVectorElementCount() ==
N1.getValueType().getVectorElementCount()) &&
"SETCC vector element counts must match!");
+ if (N1->getFlags().hasNoNaNs() && N2->getFlags().hasNoNaNs()) {
+ NewFlags.setNoQNaNs(true);
+ NewFlags.setNoSNaNs(true);
+ }
// Use FoldSetCC to simplify SETCC's.
if (SDValue V = FoldSetCC(VT, N1, N2, cast<CondCodeSDNode>(N3)->get(), DL))
return V;
@@ -7548,6 +7586,11 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
}
case ISD::SELECT:
case ISD::VSELECT:
+ if ((N1->getFlags().hasNoNaNs() && N2->getFlags().hasNoNaNs()) ||
+ N3->getFlags().hasNoNaNs()) {
+ NewFlags.setNoQNaNs(true);
+ NewFlags.setNoSNaNs(true);
+ }
if (SDValue V = simplifySelect(N1, N2, N3))
return V;
break;
@@ -7654,12 +7697,12 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
AddNodeIDNode(ID, Opcode, VTs, Ops);
void *IP = nullptr;
if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP)) {
- E->intersectFlagsWith(Flags);
+ E->intersectFlagsWith(NewFlags);
return SDValue(E, 0);
}
N = newSDNode<SDNode>(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs);
- N->setFlags(Flags);
+ N->setFlags(NewFlags);
createOperands(N, Ops);
CSEMap.InsertNode(N, IP);
} else {
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 25213f587116d5..0bfb0c14dd9020 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -3708,8 +3708,24 @@ void SelectionDAGBuilder::visitSelect(const User &I) {
bool Negate = false;
SDNodeFlags Flags;
- if (auto *FPOp = dyn_cast<FPMathOperator>(&I))
+ SelectInst *NewI = dyn_cast<SelectInst>(cast<SelectInst>(I).clone());
+ if (auto *FPOp = dyn_cast<FPMathOperator>(&I)) {
Flags.copyFMF(*FPOp);
+ if (Cond->getFlags().hasNoNaNs() ||
+ (LHSVal->getFlags().hasNoNaNs() && RHSVal->getFlags().hasNoNaNs())) {
+ FastMathFlags FMF = FPOp->getFastMathFlags();
+ FMF.setNoNaNs(true);
+ NewI->setFastMathFlags(FMF);
+ CmpInst *CmpCond = dyn_cast<CmpInst>(NewI->getCondition());
+ if (isa<FPMathOperator>(CmpCond)) {
+ FastMathFlags CondFMF = CmpCond->getFastMathFlags();
+ CondFMF.setNoNaNs(true);
+ CmpCond->setFastMathFlags(CondFMF);
+ }
+ Flags.setNoQNaNs(true);
+ Flags.setNoSNaNs(true);
+ }
+ }
Flags.setUnpredictable(
cast<SelectInst>(I).getMetadata(LLVMContext::MD_unpredictable));
@@ -3735,7 +3751,7 @@ void SelectionDAGBuilder::visitSelect(const User &I) {
// so we can't lower to FMINIMUM/FMAXIMUM because those nodes specify that
// -0.0 is less than +0.0.
const Value *LHS, *RHS;
- auto SPR = matchSelectPattern(&I, LHS, RHS);
+ auto SPR = matchSelectPattern(NewI, LHS, RHS);
ISD::NodeType Opc = ISD::DELETED_NODE;
switch (SPR.Flavor) {
case SPF_UMAX: Opc = ISD::UMAX; break;
@@ -3798,6 +3814,7 @@ void SelectionDAGBuilder::visitSelect(const User &I) {
BaseOps.clear();
}
}
+ NewI->deleteValue();
if (IsUnaryAbs) {
for (unsigned i = 0; i != NumValues; ++i) {
@@ -11775,6 +11792,22 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
AssertOp = ISD::AssertSext;
else if (Arg.hasAttribute(Attribute::ZExt))
AssertOp = ISD::AssertZext;
+ if (Arg.hasAttribute(Attribute::NoFPClass)) {
+ SDNodeFlags InValFlags = InVals[i]->getFlags();
+ bool NoSNaN = ((Arg.getNoFPClass() & llvm::fcSNan) == llvm::fcSNan);
+ bool NoQNaN = ((Arg.getNoFPClass() & llvm::fcQNan) == llvm::fcQNan);
+ InValFlags.setNoSNaNs(NoSNaN);
+ InValFlags.setNoQNaNs(NoQNaN);
+ bool NoPosZeros =
+ ((Arg.getNoFPClass() & llvm::fcPosZero) == llvm::fcPosZero);
+ bool NoNegZeros =
+ ((Arg.getNoFPClass() & llvm::fcNegZero) == llvm::fcNegZero);
+ InValFlags.setNoPosZeros(NoPosZeros);
+ InValFlags.setNoNegZeros(NoNegZeros);
+ InValFlags.setNoInfs((Arg.getNoFPClass() & llvm::fcInf) ==
+ llvm::fcInf);
+ InVals[i]->setFlags(InValFlags);
+ }
ArgValues.push_back(getCopyFromParts(DAG, dl, &InVals[i], NumParts,
PartVT, VT, nullptr, NewRoot,
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index a2a232ed93b72f..617946e4bc2a7e 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -8422,6 +8422,10 @@ TargetLowering::createSelectForFMINNUM_FMAXNUM(SDNode *Node,
Opcode == ISD::STRICT_FMINNUM || Opcode == ISD::STRICT_FMAXNUM) &&
"Wrong opcode");
+ EVT VT = Node->getValueType(0);
+ if (VT.isVector() && isOperationLegal(Opcode, VT.getScalarType()))
+ return SDValue();
+
if (Node->getFlags().hasNoNaNs()) {
ISD::CondCode Pred = Opcode == ISD::FMINNUM ? ISD::SETLT : ISD::SETGT;
SDValue Op1 = Node->getOperand(0);
diff --git a/llvm/test/CodeGen/AMDGPU/known-never-snan.ll b/llvm/test/CodeGen/AMDGPU/known-never-snan.ll
index 64948c374e4ddc..10cab8f4a6529d 100644
--- a/llvm/test/CodeGen/AMDGPU/known-never-snan.ll
+++ b/llvm/test/CodeGen/AMDGPU/known-never-snan.ll
@@ -248,7 +248,6 @@ define float @v_select_possible_nan_lhs_input_fmed3_r_i_i_f32(float %a, float %b
; GCN-NEXT: v_add_f32_e32 v1, 1.0, v1
; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2
; GCN-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
-; GCN-NEXT: v_mul_f32_e32 v0, 1.0, v0
; GCN-NEXT: v_med3_f32 v0, v0, 2.0, 4.0
; GCN-NEXT: s_setpc_b64 s[30:31]
%b.nnan.add = fadd nnan float %b, 1.0
diff --git a/llvm/test/CodeGen/AMDGPU/reduction.ll b/llvm/test/CodeGen/AMDGPU/reduction.ll
index 53a036b6177257..1663330bd1b9f1 100644
--- a/llvm/test/CodeGen/AMDGPU/reduction.ll
+++ b/llvm/test/CodeGen/AMDGPU/reduction.ll
@@ -498,18 +498,11 @@ entry:
; XVI-NEXT: s_setpc_b64
; GFX9: s_waitcnt
-; GFX9-NEXT: v_pk_max_f16 [[CANON1:v[0-9]+]], v1, v1
-; GFX9-NEXT: v_pk_max_f16 [[CANON0:v[0-9]+]], v0, v0
-; GFX9-NEXT: v_pk_max_f16 [[MAX:v[0-9]+]], [[CANON0]], [[CANON1]]{{$}}
+; GFX9-NEXT: v_pk_max_f16 [[MAX:v[0-9]+]], v0, v1{{$}}
; GFX9-NEXT: v_max_f16_sdwa v{{[0-9]+}}, [[MAX]], [[MAX]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; VI-DAG: v_max_f16_sdwa [[CANON1:v[0-9]+]], v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; VI-DAG: v_max_f16_sdwa [[CANON3:v[0-9]+]], v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; VI-DAG: v_max_f16_e32 [[CANON0:v[0-9]+]], v0, v0
-; VI-DAG: v_max_f16_e32 [[CANON2:v[0-9]+]], v1, v1
-
-; VI-DAG: v_max_f16_e32 [[MAX0:v[0-9]+]], [[CANON1]], [[CANON3]]
-; VI-DAG: v_max_f16_e32 [[MAX1:v[0-9]+]], [[CANON0]], [[CANON2]]
+; VI-DAG: v_max_f16_sdwa [[MAX0:v[0-9]+]], v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; VI-DAG: v_max_f16_e32 [[MAX1:v[0-9]+]], v0, v1
; VI: v_max_f16_e32 v0, [[MAX1]], [[MAX0]]
define half @reduction_fast_max_pattern_v4f16(<4 x half> %vec4) {
entry:
@@ -537,19 +530,12 @@ entry:
; XVI-NEXT: s_setpc_b64
; GFX9: s_waitcnt
-; GFX9-NEXT: v_pk_max_f16 [[CANON1:v[0-9]+]], v1, v1
-; GFX9-NEXT: v_pk_max_f16 [[CANON0:v[0-9]+]], v0, v0
-; GFX9-NEXT: v_pk_min_f16 [[MIN:v[0-9]+]], [[CANON0]], [[CANON1]]{{$}}
+; GFX9-NEXT: v_pk_min_f16 [[MIN:v[0-9]+]], v0, v1{{$}}
; GFX9-NEXT: v_min_f16_sdwa v{{[0-9]+}}, [[MIN]], [[MIN]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; VI-DAG: v_max_f16_sdwa [[CANON1:v[0-9]+]], v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; VI-DAG: v_max_f16_sdwa [[CANON3:v[0-9]+]], v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; VI-DAG: v_max_f16_e32 [[CANON0:v[0-9]+]], v0, v0
-; VI-DAG: v_max_f16_e32 [[CANON2:v[0-9]+]], v1, v1
-
-; VI-DAG: v_min_f16_e32 [[MAX0:v[0-9]+]], [[CANON1]], [[CANON3]]
-; VI-DAG: v_min_f16_e32 [[MAX1:v[0-9]+]], [[CANON0]], [[CANON2]]
-; VI: v_min_f16_e32 v0, [[MAX1]], [[MAX0]]
+; VI-DAG: v_min_f16_sdwa [[MIN0:v[0-9]+]], v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; VI-DAG: v_min_f16_e32 [[MIN1:v[0-9]+]], v0, v1
+; VI: v_min_f16_e32 v0, [[MIN1]], [[MIN0]]
define half @reduction_fast_min_pattern_v4f16(<4 x half> %vec4) {
entry:
%rdx.shuf = shufflevector <4 x half> %vec4, <4 x half> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
diff --git a/llvm/test/CodeGen/X86/fminimum-fmaximum.ll b/llvm/test/CodeGen/X86/fminimum-fmaximum.ll
index 41d9a867c0a960..fde92a0605e5d5 100644
--- a/llvm/test/CodeGen/X86/fminimum-fmaximum.ll
+++ b/llvm/test/CodeGen/X86/fminimum-fmaximum.ll
@@ -411,23 +411,16 @@ define float @test_fmaximum_combine_cmps(float %x, float %y) nounwind {
; SSE2-NEXT: divss %xmm0, %xmm1
; SSE2-NEXT: movd %xmm0, %eax
; SSE2-NEXT: testl %eax, %eax
-; SSE2-NEXT: movaps %xmm0, %xmm3
-; SSE2-NEXT: js .LBB9_2
-; SSE2-NEXT: # %bb.1:
-; SSE2-NEXT: movaps %xmm1, %xmm3
-; SSE2-NEXT: .LBB9_2:
-; SSE2-NEXT: movaps %xmm3, %xmm2
-; SSE2-NEXT: cmpunordss %xmm3, %xmm2
-; SSE2-NEXT: movaps %xmm2, %xmm4
-; SSE2-NEXT: andps %xmm3, %xmm4
-; SSE2-NEXT: js .LBB9_4
-; SSE2-NEXT: # %bb.3:
+; SSE2-NEXT: js .LBB9_1
+; SSE2-NEXT: # %bb.2:
+; SSE2-NEXT: movaps %xmm0, %xmm2
+; SSE2-NEXT: jmp .LBB9_3
+; SSE2-NEXT: .LBB9_1:
+; SSE2-NEXT: movaps %xmm1, %xmm2
; SSE2-NEXT: movaps %xmm0, %xmm1
-; SSE2-NEXT: .LBB9_4:
-; SSE2-NEXT: maxss %xmm1, %xmm3
-; SSE2-NEXT: andnps %xmm3, %xmm2
-; SSE2-NEXT: orps %xmm4, %xmm2
-; SSE2-NEXT: movaps %xmm2, %xmm0
+; SSE2-NEXT: .LBB9_3:
+; SSE2-NEXT: maxss %xmm2, %xmm1
+; SSE2-NEXT: movaps %xmm1, %xmm0
; SSE2-NEXT: retq
;
; AVX1-LABEL: test_fmaximum_combine_cmps:
@@ -437,15 +430,11 @@ define float @test_fmaximum_combine_cmps(float %x, float %y) nounwind {
; AVX1-NEXT: testl %eax, %eax
; AVX1-NEXT: js .LBB9_1
; AVX1-NEXT: # %bb.2:
-; AVX1-NEXT: vmovaps %xmm0, %xmm2
-; AVX1-NEXT: jmp .LBB9_3
+; AVX1-NEXT: vmaxss %xmm0, %xmm1, %xmm0
+; AVX1-NEXT: retq
; AVX1-NEXT: .LBB9_1:
; AVX1-NEXT: vmovaps %xmm1, %xmm2
-; AVX1-NEXT: vmovaps %xmm0, %xmm1
-; AVX1-NEXT: .LBB9_3:
-; AVX1-NEXT: vmaxss %xmm2, %xmm1, %xmm0
-; AVX1-NEXT: vcmpunordss %xmm1, %xmm1, %xmm2
-; AVX1-NEXT: vblendvps %xmm2, %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vmaxss %xmm2, %xmm0, %xmm0
; AVX1-NEXT: retq
;
; AVX512F-LABEL: test_fmaximum_combine_cmps:
@@ -459,8 +448,6 @@ define float @test_fmaximum_combine_cmps(float %x, float %y) nounwind {
; AVX512F-NEXT: vmovss %xmm1, %xmm2, %xmm2 {%k1}
; AVX512F-NEXT: vmovss %xmm0, %xmm1, %xmm1 {%k1}
; AVX512F-NEXT: vmaxss %xmm2, %xmm1, %xmm0
-; AVX512F-NEXT: vcmpunordss %xmm1, %xmm1, %k1
-; AVX512F-NEXT: vmovss %xmm1, %xmm0, %xmm0 {%k1}
; AVX512F-NEXT: retq
;
; AVX512DQ-LABEL: test_fmaximum_combine_cmps:
@@ -490,9 +477,7 @@ define float @test_fmaximum_combine_cmps(float %x, float %y) nounwind {
; X86-NEXT: vmovaps %xmm0, %xmm2
; X86-NEXT: vmovaps %xmm1, %xmm0
; X86-NEXT: .LBB9_3:
-; X86-NEXT: vmaxss %xmm2, %xmm0, %xmm1
-; X86-NEXT: vcmpunordss %xmm0, %xmm0, %xmm2
-; X86-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0
+; X86-NEXT: vmaxss %xmm2, %xmm0, %xmm0
; X86-NEXT: vmovss %xmm0, (%esp)
; X86-NEXT: flds (%esp)
; X86-NEXT: popl %eax
>From 41e074082e00ed93dc7ac7855672e4554b9e03ed Mon Sep 17 00:00:00 2001
From: YunQiang Su <syq at debian.org>
Date: Sun, 22 Sep 2024 14:26:41 +0800
Subject: [PATCH 2/2] Add fcmp+select testcase
---
llvm/test/CodeGen/X86/fcmp-nofpclass.ll | 118 ++++++++++++++++++++++++
1 file changed, 118 insertions(+)
create mode 100644 llvm/test/CodeGen/X86/fcmp-nofpclass.ll
diff --git a/llvm/test/CodeGen/X86/fcmp-nofpclass.ll b/llvm/test/CodeGen/X86/fcmp-nofpclass.ll
new file mode 100644
index 00000000000000..e30b74197f781d
--- /dev/null
+++ b/llvm/test/CodeGen/X86/fcmp-nofpclass.ll
@@ -0,0 +1,118 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=X86
+
+define dso_local noundef float @maxs(float noundef nofpclass(nan) %a, float noundef nofpclass(nan) %b) local_unnamed_addr #0 {
+; SSE2-LABEL: maxs:
+; SSE2: # %bb.0: # %entry
+; SSE2-NEXT: maxss %xmm1, %xmm0
+; SSE2-NEXT: retq
+;
+; AVX-LABEL: maxs:
+; AVX: # %bb.0: # %entry
+; AVX-NEXT: vmaxss %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
+;
+; X86-LABEL: maxs:
+; X86: # %bb.0: # %entry
+; X86-NEXT: pushl %eax
+; X86-NEXT: .cfi_def_cfa_offset 8
+; X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-NEXT: vmaxss {{[0-9]+}}(%esp), %xmm0, %xmm0
+; X86-NEXT: vmovss %xmm0, (%esp)
+; X86-NEXT: flds (%esp)
+; X86-NEXT: popl %eax
+; X86-NEXT: .cfi_def_cfa_offset 4
+; X86-NEXT: retl
+entry:
+ %cmp = fcmp ogt float %a, %b
+ %cond = select i1 %cmp, float %a, float %b
+ ret float %cond
+}
+
+define dso_local noundef <8 x float> @maxs_v8f32(<8 x float> noundef nofpclass(nan) %a, <8 x float> noundef nofpclass(nan) %b) local_unnamed_addr #0 {
+; SSE2-LABEL: maxs_v8f32:
+; SSE2: # %bb.0: # %entry
+; SSE2-NEXT: maxps %xmm2, %xmm0
+; SSE2-NEXT: maxps %xmm3, %xmm1
+; SSE2-NEXT: retq
+;
+; AVX-LABEL: maxs_v8f32:
+; AVX: # %bb.0: # %entry
+; AVX-NEXT: vmaxps %ymm1, %ymm0, %ymm0
+; AVX-NEXT: retq
+;
+; X86-LABEL: maxs_v8f32:
+; X86: # %bb.0: # %entry
+; X86-NEXT: vmaxps %ymm1, %ymm0, %ymm0
+; X86-NEXT: retl
+entry:
+ %cmp = fcmp ogt <8 x float> %a, %b
+ %cond = select <8 x i1> %cmp, <8 x float> %a, <8 x float> %b
+ ret <8 x float> %cond
+}
+
+define dso_local noundef float @maxd(float noundef nofpclass(nan) %a, float noundef nofpclass(nan) %b) local_unnamed_addr #0 {
+; SSE2-LABEL: maxd:
+; SSE2: # %bb.0: # %entry
+; SSE2-NEXT: maxss %xmm1, %xmm0
+; SSE2-NEXT: retq
+;
+; AVX-LABEL: maxd:
+; AVX: # %bb.0: # %entry
+; AVX-NEXT: vmaxss %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
+;
+; X86-LABEL: maxd:
+; X86: # %bb.0: # %entry
+; X86-NEXT: pushl %eax
+; X86-NEXT: .cfi_def_cfa_offset 8
+; X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-NEXT: vmaxss {{[0-9]+}}(%esp), %xmm0, %xmm0
+; X86-NEXT: vmovss %xmm0, (%esp)
+; X86-NEXT: flds (%esp)
+; X86-NEXT: popl %eax
+; X86-NEXT: .cfi_def_cfa_offset 4
+; X86-NEXT: retl
+entry:
+ %cmp = fcmp ogt float %a, %b
+ %cond = select i1 %cmp, float %a, float %b
+ ret float %cond
+}
+
+define dso_local noundef <8 x double> @mind_v8f32(<8 x double> noundef nofpclass(nan) %a, <8 x double> noundef nofpclass(nan) %b) local_unnamed_addr #0 {
+; SSE2-LABEL: mind_v8f32:
+; SSE2: # %bb.0: # %entry
+; SSE2-NEXT: maxpd %xmm4, %xmm0
+; SSE2-NEXT: maxpd %xmm5, %xmm1
+; SSE2-NEXT: maxpd %xmm6, %xmm2
+; SSE2-NEXT: maxpd %xmm7, %xmm3
+; SSE2-NEXT: retq
+;
+; AVX-LABEL: mind_v8f32:
+; AVX: # %bb.0: # %entry
+; AVX-NEXT: vmaxpd %ymm2, %ymm0, %ymm0
+; AVX-NEXT: vmaxpd %ymm3, %ymm1, %ymm1
+; AVX-NEXT: retq
+;
+; X86-LABEL: mind_v8f32:
+; X86: # %bb.0: # %entry
+; X86-NEXT: pushl %ebp
+; X86-NEXT: .cfi_def_cfa_offset 8
+; X86-NEXT: .cfi_offset %ebp, -8
+; X86-NEXT: movl %esp, %ebp
+; X86-NEXT: .cfi_def_cfa_register %ebp
+; X86-NEXT: andl $-32, %esp
+; X86-NEXT: subl $32, %esp
+; X86-NEXT: vmaxpd %ymm2, %ymm0, %ymm0
+; X86-NEXT: vmaxpd 8(%ebp), %ymm1, %ymm1
+; X86-NEXT: movl %ebp, %esp
+; X86-NEXT: popl %ebp
+; X86-NEXT: .cfi_def_cfa %esp, 4
+; X86-NEXT: retl
+entry:
+ %cmp = fcmp ogt <8 x double> %a, %b
+ %cond = select <8 x i1> %cmp, <8 x double> %a, <8 x double> %b
+ ret <8 x double> %cond
+}
More information about the llvm-commits
mailing list