[llvm] r268247 - AMDGPU/SI: Use hazard recognizer to detect DPP hazards

Mon May 2 09:23:10 PDT 2016

Author: tstellar
Date: Mon May  2 11:23:09 2016
New Revision: 268247

URL: http://llvm.org/viewvc/llvm-project?rev=268247&view=rev
Log:
AMDGPU/SI: Use hazard recognizer to detect DPP hazards

Reviewers: arsenm

Subscribers: arsenm, llvm-commits

Differential Revision: http://reviews.llvm.org/D18603

Modified:
    llvm/trunk/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
    llvm/trunk/lib/Target/AMDGPU/GCNHazardRecognizer.h
    llvm/trunk/lib/Target/AMDGPU/SIInsertWaits.cpp
    llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.mov.dpp.ll

Modified: llvm/trunk/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/GCNHazardRecognizer.cpp?rev=268247&r1=268246&r2=268247&view=diff
==============================================================================

--- llvm/trunk/lib/Target/AMDGPU/GCNHazardRecognizer.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/GCNHazardRecognizer.cpp Mon May  2 11:23:09 2016
@@ -47,6 +47,9 @@ GCNHazardRecognizer::getHazardType(SUnit
   if (SIInstrInfo::isVMEM(*MI) && checkVMEMHazards(MI) > 0)
     return NoopHazard;
 
+  if (SIInstrInfo::isDPP(*MI) && checkDPPHazards(MI) > 0)
+    return NoopHazard;
+
   return NoHazard;
 }
 
@@ -61,6 +64,9 @@ unsigned GCNHazardRecognizer::PreEmitNoo
   if (SIInstrInfo::isVMEM(*MI))
     return std::max(0, checkVMEMHazards(MI));
 
+  if (SIInstrInfo::isDPP(*MI))
+    return std::max(0, checkDPPHazards(MI));
+
   return 0;
 }
 
@@ -175,3 +181,23 @@ int GCNHazardRecognizer::checkVMEMHazard
   }
   return WaitStatesNeeded;
 }
+
+int GCNHazardRecognizer::checkDPPHazards(MachineInstr *DPP) {
+  const AMDGPUSubtarget &ST = MF.getSubtarget<AMDGPUSubtarget>();
+  const SIRegisterInfo *TRI =
+      static_cast<const SIRegisterInfo*>(ST.getRegisterInfo());
+
+  // Check for DPP VGPR read after VALU VGPR write.
+  int DppVgprWaitStates = 2;
+  int WaitStatesNeeded = 0;
+
+  for (const MachineOperand &Use : DPP->uses()) {
+    if (!Use.isReg() || !TRI->isVGPR(MF.getRegInfo(), Use.getReg()))
+      continue;
+    int WaitStatesNeededForUse =
+        DppVgprWaitStates - getWaitStatesSinceDef(Use.getReg());
+    WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
+  }
+
+  return WaitStatesNeeded;
+}

Modified: llvm/trunk/lib/Target/AMDGPU/GCNHazardRecognizer.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/GCNHazardRecognizer.h?rev=268247&r1=268246&r2=268247&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/GCNHazardRecognizer.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/GCNHazardRecognizer.h Mon May  2 11:23:09 2016
@@ -40,6 +40,7 @@ class GCNHazardRecognizer final : public
 
   int checkSMRDHazards(MachineInstr *SMRD);
   int checkVMEMHazards(MachineInstr* VMEM);
+  int checkDPPHazards(MachineInstr *DPP);
 public:
   GCNHazardRecognizer(const MachineFunction &MF);
   // We can only issue one instruction per cycle.

Modified: llvm/trunk/lib/Target/AMDGPU/SIInsertWaits.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIInsertWaits.cpp?rev=268247&r1=268246&r2=268247&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIInsertWaits.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIInsertWaits.cpp Mon May  2 11:23:09 2016
@@ -127,18 +127,6 @@ private:
   /// \brief Insert S_NOP between an instruction writing M0 and S_SENDMSG.
   void handleSendMsg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I);
 
-  /// \param DPP The DPP instruction
-  /// \param SearchI The iterator to start look for hazards.
-  /// \param SearchMBB The basic block we are operating on.
-  /// \param WaitStates Then number of wait states that need to be inserted
-  ///                    When a hazard is detected.
-  void insertDPPWaitStates(MachineBasicBlock::iterator DPP,
-                           MachineBasicBlock::reverse_iterator SearchI,
-                           MachineBasicBlock *SearchMBB,
-                           unsigned WaitStates);
-
-  void insertDPPWaitStates(MachineBasicBlock::iterator DPP);
-
   /// Return true if there are LGKM instrucitons that haven't been waited on
   /// yet.
   bool hasOutstandingLGKM() const;
@@ -522,45 +510,6 @@ void SIInsertWaits::handleSendMsg(Machin
   }
 }
 
-void SIInsertWaits::insertDPPWaitStates(MachineBasicBlock::iterator DPP,
-                                        MachineBasicBlock::reverse_iterator SearchI,
-                                        MachineBasicBlock *SearchMBB,
-                                        unsigned WaitStates) {
-
-  MachineBasicBlock::reverse_iterator E = SearchMBB->rend();
-
-  for (; WaitStates > 0; --WaitStates, ++SearchI) {
-
-    // If we have reached the start of the block, we need to check predecessors.
-    if (SearchI == E) {
-      for (MachineBasicBlock *Pred : SearchMBB->predecessors()) {
-        // We only need to check fall-through blocks.  Branch instructions
-        // give us enough wait states.
-        if (Pred->getFirstTerminator() == Pred->end()) {
-          insertDPPWaitStates(DPP, Pred->rbegin(), Pred, WaitStates);
-          break;
-        }
-      }
-      return;
-    }
-
-    for (MachineOperand &Op : SearchI->operands()) {
-      if (!Op.isReg() || !Op.isDef())
-        continue;
-
-      if (DPP->readsRegister(Op.getReg(), TRI)) {
-        TII->insertWaitStates(*DPP->getParent(), DPP, WaitStates);
-        return;
-      }
-    }
-  }
-}
-
-void SIInsertWaits::insertDPPWaitStates(MachineBasicBlock::iterator DPP) {
-  MachineBasicBlock::reverse_iterator I(DPP);
-  insertDPPWaitStates(DPP, I, DPP->getParent(), 2);
-}
-
 // FIXME: Insert waits listed in Table 4.2 "Required User-Inserted Wait States"
 // around other non-memory instructions.
 bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) {
@@ -630,10 +579,6 @@ bool SIInsertWaits::runOnMachineFunction
         }
       }
 
-      if (TII->isDPP(*I)) {
-        insertDPPWaitStates(I);
-      }
-
       // Record pre-existing, explicitly requested waits
       if (I->getOpcode() == AMDGPU::S_WAITCNT) {
         handleExistingWait(*I);

Modified: llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.mov.dpp.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.mov.dpp.ll?rev=268247&r1=268246&r2=268247&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.mov.dpp.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.mov.dpp.ll Mon May  2 11:23:09 2016
@@ -1,4 +1,5 @@
-; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs -show-mc-encoding < %s | FileCheck -check-prefix=VI %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs -show-mc-encoding < %s | FileCheck -check-prefix=VI -check-prefix=VI-OPT %s
+; RUN: llc -O0 -march=amdgcn -mcpu=tonga -verify-machineinstrs -show-mc-encoding < %s | FileCheck -check-prefix=VI -check-prefix=VI-NOOPT %s
 
 ; FIXME: The register allocator / scheduler should be able to avoid these hazards.
 
@@ -26,7 +27,10 @@ define void @dpp_wait_states(i32 addrspa
 }
 
 ; VI-LABEL: {{^}}dpp_first_in_bb:
-; VI: s_nop 1
+; VI: ; %endif
+; VI-OPT: s_mov_b32
+; VI-OPT: s_mov_b32
+; VI-NOOPT: s_nop 1
 ; VI: v_mov_b32_dpp [[VGPR0:v[0-9]+]], v{{[0-9]+}} quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1 bound_ctrl:0
 ; VI: s_nop 1
 ; VI: v_mov_b32_dpp [[VGPR1:v[0-9]+]], [[VGPR0]] quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1 bound_ctrl:0