[PATCH] D155443: AMDGPU: Preserve flags in fdiv_fast lowering
Matt Arsenault via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Mon Jul 17 04:44:12 PDT 2023
arsenm created this revision.
arsenm added reviewers: AMDGPU, foad, Pierre-vh, rampitec.
Herald added subscribers: StephenFan, kerbowa, hiraditya, tpr, dstuttard, yaxunl, jvesely, kzhuravl.
Herald added a project: All.
arsenm requested review of this revision.
Herald added a subscriber: wdng.
Herald added a project: LLVM.
We were dropping the flags and thus blocking contract into potential
fadd users. GlobalISel was already preserving the flags here.
https://reviews.llvm.org/D155443
Files:
llvm/lib/Target/AMDGPU/SIISelLowering.cpp
llvm/test/CodeGen/AMDGPU/fdiv.ll
Index: llvm/test/CodeGen/AMDGPU/fdiv.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/fdiv.ll
+++ llvm/test/CodeGen/AMDGPU/fdiv.ll
@@ -3828,8 +3828,7 @@
; GFX11-NEXT: v_rcp_f32_e32 v1, v1
; GFX11-NEXT: s_waitcnt_depctr 0xfff
; GFX11-NEXT: v_mul_f32_e32 v0, v0, v1
-; GFX11-NEXT: v_mul_f32_e32 v0, v3, v0
-; GFX11-NEXT: v_add_f32_e32 v0, v0, v2
+; GFX11-NEXT: v_fma_f32 v0, v3, v0, v2
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; EG-LABEL: v_fdiv_f32_daz_25ulp_contractable_user:
@@ -5087,8 +5086,7 @@
; GFX11-NEXT: v_rcp_f32_e32 v1, v1
; GFX11-NEXT: s_waitcnt_depctr 0xfff
; GFX11-NEXT: v_mul_f32_e32 v0, v0, v1
-; GFX11-NEXT: v_mul_f32_e32 v0, v3, v0
-; GFX11-NEXT: v_add_f32_e32 v0, v0, v2
+; GFX11-NEXT: v_fma_f32 v0, v3, v0, v2
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; EG-LABEL: v_fdiv_f32_daz_25ulp__nnan_ninf_contractable_user:
Index: llvm/lib/Target/AMDGPU/SIISelLowering.cpp
===================================================================
--- llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -9271,11 +9271,12 @@
// Faster 2.5 ULP division that does not support denormals.
SDValue SITargetLowering::lowerFDIV_FAST(SDValue Op, SelectionDAG &DAG) const {
+ SDNodeFlags Flags = Op->getFlags();
SDLoc SL(Op);
SDValue LHS = Op.getOperand(1);
SDValue RHS = Op.getOperand(2);
- SDValue r1 = DAG.getNode(ISD::FABS, SL, MVT::f32, RHS);
+ SDValue r1 = DAG.getNode(ISD::FABS, SL, MVT::f32, RHS, Flags);
const APFloat K0Val(0x1p+96f);
const SDValue K0 = DAG.getConstantFP(K0Val, SL, MVT::f32);
@@ -9290,17 +9291,16 @@
SDValue r2 = DAG.getSetCC(SL, SetCCVT, r1, K0, ISD::SETOGT);
- SDValue r3 = DAG.getNode(ISD::SELECT, SL, MVT::f32, r2, K1, One);
+ SDValue r3 = DAG.getNode(ISD::SELECT, SL, MVT::f32, r2, K1, One, Flags);
- // TODO: Should this propagate fast-math-flags?
- r1 = DAG.getNode(ISD::FMUL, SL, MVT::f32, RHS, r3);
+ r1 = DAG.getNode(ISD::FMUL, SL, MVT::f32, RHS, r3, Flags);
// rcp does not support denormals.
- SDValue r0 = DAG.getNode(AMDGPUISD::RCP, SL, MVT::f32, r1);
+ SDValue r0 = DAG.getNode(AMDGPUISD::RCP, SL, MVT::f32, r1, Flags);
- SDValue Mul = DAG.getNode(ISD::FMUL, SL, MVT::f32, LHS, r0);
+ SDValue Mul = DAG.getNode(ISD::FMUL, SL, MVT::f32, LHS, r0, Flags);
- return DAG.getNode(ISD::FMUL, SL, MVT::f32, r3, Mul);
+ return DAG.getNode(ISD::FMUL, SL, MVT::f32, r3, Mul, Flags);
}
// Returns immediate value for setting the F32 denorm mode when using the
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D155443.540955.patch
Type: text/x-patch
Size: 2579 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20230717/8a83a92f/attachment.bin>
More information about the llvm-commits
mailing list