[llvm] [AMDGPU] Adopt new lowering sequence for `fdiv16` (PR #109295)

Sun Sep 22 00:47:55 PDT 2024

================
@@ -10606,19 +10606,40 @@ SDValue SITargetLowering::LowerFDIV16(SDValue Op, SelectionDAG &DAG) const {
     return FastLowered;
 
   SDLoc SL(Op);
-  SDValue Src0 = Op.getOperand(0);
-  SDValue Src1 = Op.getOperand(1);
-
-  SDValue CvtSrc0 = DAG.getNode(ISD::FP_EXTEND, SL, MVT::f32, Src0);
-  SDValue CvtSrc1 = DAG.getNode(ISD::FP_EXTEND, SL, MVT::f32, Src1);
-
-  SDValue RcpSrc1 = DAG.getNode(AMDGPUISD::RCP, SL, MVT::f32, CvtSrc1);
-  SDValue Quot = DAG.getNode(ISD::FMUL, SL, MVT::f32, CvtSrc0, RcpSrc1);
-
-  SDValue FPRoundFlag = DAG.getTargetConstant(0, SL, MVT::i32);
-  SDValue BestQuot = DAG.getNode(ISD::FP_ROUND, SL, MVT::f16, Quot, FPRoundFlag);
+  SDValue LHS = Op.getOperand(0);
+  SDValue RHS = Op.getOperand(1);
 
-  return DAG.getNode(AMDGPUISD::DIV_FIXUP, SL, MVT::f16, BestQuot, Src1, Src0);
+  // a32.u = opx(V_CVT_F32_F16, a.u);
+  // b32.u = opx(V_CVT_F32_F16, b.u);
+  // r32.u = opx(V_RCP_F32, b32.u);
+  // q32.u = opx(V_MUL_F32, a32.u, r32.u);
----------------
arsenm wrote:

The comment would be more useful if it was spelling out the reasons for these instructions, rather than opx(opcode). e.g. in the comment ticket, // err = -d*q + n)

https://github.com/llvm/llvm-project/pull/109295