[PATCH] D77517: AMDGPU: Use DAG patterns for div_fmas

Sun Apr 5 17:38:07 PDT 2020

arsenm created this revision.
arsenm added reviewers: rampitec, kerbowa.
Herald added subscribers: hiraditya, t-tye, tpr, dstuttard, yaxunl, nhaehnle, wdng, jvesely, kzhuravl.
arsenm added a child revision: D77518: AMDGPU/GlobalISel: Fix llvm.amdgcn.div.fmas.ll.

https://reviews.llvm.org/D77517

Files:
  llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
  llvm/lib/Target/AMDGPU/VOP3Instructions.td


Index: llvm/lib/Target/AMDGPU/VOP3Instructions.td
===================================================================

--- llvm/lib/Target/AMDGPU/VOP3Instructions.td
+++ llvm/lib/Target/AMDGPU/VOP3Instructions.td
@@ -716,6 +716,24 @@
     V_PERMLANEX16_B32>;
 } // End SubtargetPredicate = isGFX10Plus
 
+class DivFmasPat<ValueType vt, Instruction inst, Register CondReg> : GCNPat<
+  (AMDGPUdiv_fmas (vt (VOP3Mods vt:$src0, i32:$src0_modifiers)),
+                      (VOP3Mods vt:$src1, i32:$src1_modifiers),
+                      (VOP3Mods vt:$src2, i32:$src2_modifiers),
+                      (i1 CondReg)),
+  (inst $src0_modifiers, $src0, $src1_modifiers, $src1, $src2_modifiers, $src2)
+>;
+
+let WaveSizePredicate = isWave64 in {
+def : DivFmasPat<f32, V_DIV_FMAS_F32, VCC>;
+def : DivFmasPat<f64, V_DIV_FMAS_F64, VCC>;
+}
+
+let WaveSizePredicate = isWave32 in {
+def : DivFmasPat<f32, V_DIV_FMAS_F32, VCC_LO>;
+def : DivFmasPat<f64, V_DIV_FMAS_F64, VCC_LO>;
+}
+
 //===----------------------------------------------------------------------===//
 // Integer Clamp Patterns
 //===----------------------------------------------------------------------===//
Index: llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
===================================================================
--- llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -278,7 +278,6 @@
   void SelectAddcSubb(SDNode *N);
   void SelectUADDO_USUBO(SDNode *N);
   void SelectDIV_SCALE(SDNode *N);
-  void SelectDIV_FMAS(SDNode *N);
   void SelectMAD_64_32(SDNode *N);
   void SelectFMA_W_CHAIN(SDNode *N);
   void SelectFMUL_W_CHAIN(SDNode *N);
@@ -871,10 +870,6 @@
     SelectDIV_SCALE(N);
     return;
   }
-  case AMDGPUISD::DIV_FMAS: {
-    SelectDIV_FMAS(N);
-    return;
-  }
   case AMDGPUISD::MAD_I64_I32:
   case AMDGPUISD::MAD_U64_U32: {
     SelectMAD_64_32(N);
@@ -1128,35 +1123,6 @@
   CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
 }
 
-void AMDGPUDAGToDAGISel::SelectDIV_FMAS(SDNode *N) {
-  const GCNSubtarget *ST = static_cast<const GCNSubtarget *>(Subtarget);
-  const SIRegisterInfo *TRI = ST->getRegisterInfo();
-
-  SDLoc SL(N);
-  EVT VT = N->getValueType(0);
-
-  assert(VT == MVT::f32 || VT == MVT::f64);
-
-  unsigned Opc
-    = (VT == MVT::f64) ? AMDGPU::V_DIV_FMAS_F64 : AMDGPU::V_DIV_FMAS_F32;
-
-  SDValue CarryIn = N->getOperand(3);
-  // V_DIV_FMAS implicitly reads VCC.
-  SDValue VCC = CurDAG->getCopyToReg(CurDAG->getEntryNode(), SL,
-                                     TRI->getVCC(), CarryIn, SDValue());
-
-  SDValue Ops[10];
-
-  SelectVOP3Mods0(N->getOperand(0), Ops[1], Ops[0], Ops[6], Ops[7]);
-  SelectVOP3Mods(N->getOperand(1), Ops[3], Ops[2]);
-  SelectVOP3Mods(N->getOperand(2), Ops[5], Ops[4]);
-
-  Ops[8] = VCC;
-  Ops[9] = VCC.getValue(1);
-
-  CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
-}
-
 // We need to handle this here because tablegen doesn't support matching
 // instructions with multiple outputs.
 void AMDGPUDAGToDAGISel::SelectMAD_64_32(SDNode *N) {


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D77517.255211.patch
Type: text/x-patch
Size: 3045 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20200406/4af38ef5/attachment-0001.bin>