[llvm] [AMDGPU] Adopt new lowering sequence for `fdiv16` (PR #109295)
Shilei Tian via llvm-commits
llvm-commits at lists.llvm.org
Fri Sep 20 09:06:38 PDT 2024
================
@@ -10606,19 +10606,39 @@ SDValue SITargetLowering::LowerFDIV16(SDValue Op, SelectionDAG &DAG) const {
return FastLowered;
SDLoc SL(Op);
- SDValue Src0 = Op.getOperand(0);
- SDValue Src1 = Op.getOperand(1);
-
- SDValue CvtSrc0 = DAG.getNode(ISD::FP_EXTEND, SL, MVT::f32, Src0);
- SDValue CvtSrc1 = DAG.getNode(ISD::FP_EXTEND, SL, MVT::f32, Src1);
-
- SDValue RcpSrc1 = DAG.getNode(AMDGPUISD::RCP, SL, MVT::f32, CvtSrc1);
- SDValue Quot = DAG.getNode(ISD::FMUL, SL, MVT::f32, CvtSrc0, RcpSrc1);
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ // a32.u = opx(V_CVT_F32_F16, a.u);
+ // b32.u = opx(V_CVT_F32_F16, b.u);
+ // r32.u = opx(V_RCP_F32, b32.u);
+ // q32.u = opx(V_MUL_F32, a32.u, r32.u);
+ // e32.u = opx(V_MAD_F32, (b32.u^_neg32), q32.u, a32.u);
+ // q32.u = opx(V_MAD_F32, e32.u, r32.u, q32.u);
+ // e32.u = opx(V_MAD_F32, (b32.u^_neg32), q32.u, a32.u);
+ // tmp.u = opx(V_MUL_F32, e32.u, r32.u);
+ // tmp.u = opx(V_AND_B32, tmp.u, 0xff800000);
+ // q32.u = opx(V_ADD_F32, tmp.u, q32.u);
+ // q16.u = opx(V_CVT_F16_F32, q32.u);
+ // q16.u = opx(V_DIV_FIXUP_F16, q16.u, b.u, a.u);
+
+ SDValue LHSExt = DAG.getNode(ISD::FP_EXTEND, SL, MVT::f32, LHS);
+ SDValue RHSExt = DAG.getNode(ISD::FP_EXTEND, SL, MVT::f32, RHS);
+ SDValue NegRHSExt = DAG.getNode(ISD::FNEG, SL, MVT::f32, RHSExt);
+ SDValue Rcp = DAG.getNode(AMDGPUISD::RCP, SL, MVT::f32, RHSExt);
+ SDValue Quot = DAG.getNode(ISD::FMUL, SL, MVT::f32, LHSExt, Rcp);
+ SDValue Err = DAG.getNode(ISD::FMA, SL, MVT::f32, NegRHSExt, Quot, LHSExt);
+ Quot = DAG.getNode(ISD::FMA, SL, MVT::f32, Err, Rcp, Quot);
+ Err = DAG.getNode(ISD::FMA, SL, MVT::f32, NegRHSExt, Quot, LHSExt);
+ SDValue Tmp = DAG.getNode(ISD::FMUL, SL, MVT::f32, Err, Rcp);
+ SDValue TmpCast = DAG.getNode(ISD::BITCAST, SL, MVT::i32, Tmp);
+ TmpCast = DAG.getNode(ISD::AND, SL, MVT::i32, TmpCast,
----------------
shiltian wrote:
It doesn't look equivalent. What `v_frexp_mant_f32` does is:
```
if (S0 == inf || S0 == -inf || S0 == nan)
return S0;
else
return mantissa(S0);
```
The difference is, when `S0 == nan`, `v_frexp_mant_f32` returns `S0`, a value with non-zero fraction, while this `&` sequence returns a value with zero fraction, effectively an `+-inf`.
https://github.com/llvm/llvm-project/pull/109295
More information about the llvm-commits
mailing list