[llvm] 89d48cc - AMDGPU: Fix not emitting nofpexcept on fdiv expansion
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Mon Jun 1 11:11:05 PDT 2020
Author: Matt Arsenault
Date: 2020-06-01T14:10:26-04:00
New Revision: 89d48ccabe6a950369b2bd922b1d8e987b856ac7
URL: https://github.com/llvm/llvm-project/commit/89d48ccabe6a950369b2bd922b1d8e987b856ac7
DIFF: https://github.com/llvm/llvm-project/commit/89d48ccabe6a950369b2bd922b1d8e987b856ac7.diff
LOG: AMDGPU: Fix not emitting nofpexcept on fdiv expansion
In this awkward case, we have to emit custom pseudo-constrained FP
wrappers. InstrEmitter concludes that since a mayRaiseFPException
instruction had a chain, it can't add nofpexcept.
Test deferred until mayRaiseFPException is really set on everything.
Added:
Modified:
llvm/lib/Target/AMDGPU/SIISelLowering.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 08effeea1812..ffbf8529fdeb 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -7907,9 +7907,10 @@ SDValue SITargetLowering::lowerFastUnsafeFDIV(SDValue Op,
}
static SDValue getFPBinOp(SelectionDAG &DAG, unsigned Opcode, const SDLoc &SL,
- EVT VT, SDValue A, SDValue B, SDValue GlueChain) {
+ EVT VT, SDValue A, SDValue B, SDValue GlueChain,
+ SDNodeFlags Flags) {
if (GlueChain->getNumValues() <= 1) {
- return DAG.getNode(Opcode, SL, VT, A, B);
+ return DAG.getNode(Opcode, SL, VT, A, B, Flags);
}
assert(GlueChain->getNumValues() == 3);
@@ -7922,15 +7923,16 @@ static SDValue getFPBinOp(SelectionDAG &DAG, unsigned Opcode, const SDLoc &SL,
break;
}
- return DAG.getNode(Opcode, SL, VTList, GlueChain.getValue(1), A, B,
- GlueChain.getValue(2));
+ return DAG.getNode(Opcode, SL, VTList,
+ {GlueChain.getValue(1), A, B, GlueChain.getValue(2)},
+ Flags);
}
static SDValue getFPTernOp(SelectionDAG &DAG, unsigned Opcode, const SDLoc &SL,
EVT VT, SDValue A, SDValue B, SDValue C,
- SDValue GlueChain) {
+ SDValue GlueChain, SDNodeFlags Flags) {
if (GlueChain->getNumValues() <= 1) {
- return DAG.getNode(Opcode, SL, VT, A, B, C);
+ return DAG.getNode(Opcode, SL, VT, {A, B, C}, Flags);
}
assert(GlueChain->getNumValues() == 3);
@@ -7943,8 +7945,9 @@ static SDValue getFPTernOp(SelectionDAG &DAG, unsigned Opcode, const SDLoc &SL,
break;
}
- return DAG.getNode(Opcode, SL, VTList, GlueChain.getValue(1), A, B, C,
- GlueChain.getValue(2));
+ return DAG.getNode(Opcode, SL, VTList,
+ {GlueChain.getValue(1), A, B, C, GlueChain.getValue(2)},
+ Flags);
}
SDValue SITargetLowering::LowerFDIV16(SDValue Op, SelectionDAG &DAG) const {
@@ -8018,6 +8021,13 @@ SDValue SITargetLowering::LowerFDIV32(SDValue Op, SelectionDAG &DAG) const {
if (SDValue FastLowered = lowerFastUnsafeFDIV(Op, DAG))
return FastLowered;
+ // The selection matcher assumes anything with a chain selecting to a
+ // mayRaiseFPException machine instruction. Since we're introducing a chain
+ // here, we need to explicitly report nofpexcept for the regular fdiv
+ // lowering.
+ SDNodeFlags Flags = Op->getFlags();
+ Flags.setNoFPExcept(true);
+
SDLoc SL(Op);
SDValue LHS = Op.getOperand(0);
SDValue RHS = Op.getOperand(1);
@@ -8027,15 +8037,15 @@ SDValue SITargetLowering::LowerFDIV32(SDValue Op, SelectionDAG &DAG) const {
SDVTList ScaleVT = DAG.getVTList(MVT::f32, MVT::i1);
SDValue DenominatorScaled = DAG.getNode(AMDGPUISD::DIV_SCALE, SL, ScaleVT,
- RHS, RHS, LHS);
+ {RHS, RHS, LHS}, Flags);
SDValue NumeratorScaled = DAG.getNode(AMDGPUISD::DIV_SCALE, SL, ScaleVT,
- LHS, RHS, LHS);
+ {LHS, RHS, LHS}, Flags);
// Denominator is scaled to not be denormal, so using rcp is ok.
SDValue ApproxRcp = DAG.getNode(AMDGPUISD::RCP, SL, MVT::f32,
- DenominatorScaled);
+ DenominatorScaled, Flags);
SDValue NegDivScale0 = DAG.getNode(ISD::FNEG, SL, MVT::f32,
- DenominatorScaled);
+ DenominatorScaled, Flags);
const unsigned Denorm32Reg = AMDGPU::Hwreg::ID_MODE |
(4 << AMDGPU::Hwreg::OFFSET_SHIFT_) |
@@ -8045,6 +8055,10 @@ SDValue SITargetLowering::LowerFDIV32(SDValue Op, SelectionDAG &DAG) const {
const bool HasFP32Denormals = hasFP32Denormals(DAG.getMachineFunction());
if (!HasFP32Denormals) {
+ // Note we can't use the STRICT_FMA/STRICT_FMUL for the non-strict FDIV
+ // lowering. The chain dependence is insufficient, and we need glue. We do
+ // not need the glue variants in a strictfp function.
+
SDVTList BindParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
SDNode *EnableDenorm;
@@ -8072,21 +8086,22 @@ SDValue SITargetLowering::LowerFDIV32(SDValue Op, SelectionDAG &DAG) const {
}
SDValue Fma0 = getFPTernOp(DAG, ISD::FMA, SL, MVT::f32, NegDivScale0,
- ApproxRcp, One, NegDivScale0);
+ ApproxRcp, One, NegDivScale0, Flags);
SDValue Fma1 = getFPTernOp(DAG, ISD::FMA, SL, MVT::f32, Fma0, ApproxRcp,
- ApproxRcp, Fma0);
+ ApproxRcp, Fma0, Flags);
SDValue Mul = getFPBinOp(DAG, ISD::FMUL, SL, MVT::f32, NumeratorScaled,
- Fma1, Fma1);
+ Fma1, Fma1, Flags);
SDValue Fma2 = getFPTernOp(DAG, ISD::FMA, SL, MVT::f32, NegDivScale0, Mul,
- NumeratorScaled, Mul);
+ NumeratorScaled, Mul, Flags);
- SDValue Fma3 = getFPTernOp(DAG, ISD::FMA, SL, MVT::f32, Fma2, Fma1, Mul, Fma2);
+ SDValue Fma3 = getFPTernOp(DAG, ISD::FMA, SL, MVT::f32,
+ Fma2, Fma1, Mul, Fma2, Flags);
SDValue Fma4 = getFPTernOp(DAG, ISD::FMA, SL, MVT::f32, NegDivScale0, Fma3,
- NumeratorScaled, Fma3);
+ NumeratorScaled, Fma3, Flags);
if (!HasFP32Denormals) {
SDNode *DisableDenorm;
@@ -8113,9 +8128,9 @@ SDValue SITargetLowering::LowerFDIV32(SDValue Op, SelectionDAG &DAG) const {
SDValue Scale = NumeratorScaled.getValue(1);
SDValue Fmas = DAG.getNode(AMDGPUISD::DIV_FMAS, SL, MVT::f32,
- Fma4, Fma1, Fma3, Scale);
+ {Fma4, Fma1, Fma3, Scale}, Flags);
- return DAG.getNode(AMDGPUISD::DIV_FIXUP, SL, MVT::f32, Fmas, RHS, LHS);
+ return DAG.getNode(AMDGPUISD::DIV_FIXUP, SL, MVT::f32, Fmas, RHS, LHS, Flags);
}
SDValue SITargetLowering::LowerFDIV64(SDValue Op, SelectionDAG &DAG) const {
More information about the llvm-commits
mailing list