[llvm] 9dff14b - [AMDGPU] Add support for GFX11 hazards

Jay Foad via llvm-commits llvm-commits at lists.llvm.org
Thu Jun 16 00:23:01 PDT 2022


Author: Jay Foad
Date: 2022-06-16T08:15:21+01:00
New Revision: 9dff14be9ed6cf8da651bd675a839cde0d4294a2

URL: https://github.com/llvm/llvm-project/commit/9dff14be9ed6cf8da651bd675a839cde0d4294a2
DIFF: https://github.com/llvm/llvm-project/commit/9dff14be9ed6cf8da651bd675a839cde0d4294a2.diff

LOG: [AMDGPU] Add support for GFX11 hazards

Add support for partial stall over EXEC hazard and trans use hazard.

Differential Revision: https://reviews.llvm.org/D127872

Added: 
    llvm/test/CodeGen/AMDGPU/partial-forwarding-hazards.mir
    llvm/test/CodeGen/AMDGPU/trans-forwarding-hazards.mir

Modified: 
    llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
    llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h
    llvm/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
index 0b5185ef4ba10..8adae23a43d5f 100644
--- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
@@ -424,8 +424,52 @@ void GCNHazardRecognizer::RecedeCycle() {
 // Helper Functions
 //===----------------------------------------------------------------------===//
 
+typedef enum { HazardFound, HazardExpired, NoHazardFound } HazardFnResult;
+
 typedef function_ref<bool(const MachineInstr &, int WaitStates)> IsExpiredFn;
 
+// Search for a hazard in a block and its predecessors.
+template <typename StateT>
+static bool
+hasHazard(StateT State,
+          function_ref<HazardFnResult(StateT &, const MachineInstr &)> IsHazard,
+          function_ref<void(StateT &, const MachineInstr &)> UpdateState,
+          const MachineBasicBlock *MBB,
+          MachineBasicBlock::const_reverse_instr_iterator I,
+          DenseSet<const MachineBasicBlock *> &Visited) {
+  for (auto E = MBB->instr_rend(); I != E; ++I) {
+    // No need to look at parent BUNDLE instructions.
+    if (I->isBundle())
+      continue;
+
+    switch (IsHazard(State, *I)) {
+    case HazardFound:
+      return true;
+    case HazardExpired:
+      return false;
+    default:
+      // Continue search
+      break;
+    }
+
+    if (I->isInlineAsm() || I->isMetaInstruction())
+      continue;
+
+    UpdateState(State, *I);
+  }
+
+  for (MachineBasicBlock *Pred : MBB->predecessors()) {
+    if (!Visited.insert(Pred).second)
+      continue;
+
+    if (hasHazard(State, IsHazard, UpdateState, Pred, Pred->instr_rbegin(),
+                  Visited))
+      return true;
+  }
+
+  return false;
+}
+
 // Returns a minimum wait states since \p I walking all predecessors.
 // Only scans until \p IsExpired does not return true.
 // Can only be run in a hazard recognizer mode.
@@ -1031,6 +1075,8 @@ void GCNHazardRecognizer::fixHazards(MachineInstr *MI) {
   fixSMEMtoVectorWriteHazards(MI);
   fixVcmpxExecWARHazard(MI);
   fixLdsBranchVmemWARHazard(MI);
+  fixVALUPartialForwardingHazard(MI);
+  fixVALUTransUseHazard(MI);
 }
 
 bool GCNHazardRecognizer::fixVcmpxPermlaneHazards(MachineInstr *MI) {
@@ -1320,6 +1366,233 @@ bool GCNHazardRecognizer::fixLdsBranchVmemWARHazard(MachineInstr *MI) {
   return true;
 }
 
+bool GCNHazardRecognizer::fixVALUPartialForwardingHazard(MachineInstr *MI) {
+  if (!ST.isWave64())
+    return false;
+  if (!ST.hasVALUPartialForwardingHazard())
+    return false;
+  if (!SIInstrInfo::isVALU(*MI))
+    return false;
+
+  SmallSetVector<Register, 4> SrcVGPRs;
+
+  for (const MachineOperand &Use : MI->explicit_uses()) {
+    if (Use.isReg() && TRI.isVGPR(MF.getRegInfo(), Use.getReg()))
+      SrcVGPRs.insert(Use.getReg());
+  }
+
+  // Only applies with >= 2 unique VGPR sources
+  if (SrcVGPRs.size() <= 1)
+    return false;
+
+  // Look for the following pattern:
+  //   Va <- VALU [PreExecPos]
+  //   intv1
+  //   Exec <- SALU [ExecPos]
+  //   intv2
+  //   Vb <- VALU [PostExecPos]
+  //   intv3
+  //   MI Va, Vb (WaitState = 0)
+  //
+  // Where:
+  // intv1 + intv2 <= 2 VALUs
+  // intv3 <= 4 VALUs
+  //
+  // If found, insert an appropriate S_WAITCNT_DEPCTR before MI.
+
+  const int Intv1plus2MaxVALUs = 2;
+  const int Intv3MaxVALUs = 4;
+  const int IntvMaxVALUs = 6;
+  const int NoHazardVALUWaitStates = IntvMaxVALUs + 2;
+
+  struct StateType {
+    SmallDenseMap<Register, int, 4> DefPos;
+    int ExecPos = std::numeric_limits<int>::max();
+    int VALUs = 0;
+  };
+
+  StateType State;
+
+  // This overloads expiry testing with all the hazard detection
+  auto IsHazardFn = [&, this](StateType &State, const MachineInstr &I) {
+    // Too many VALU states have passed
+    if (State.VALUs > NoHazardVALUWaitStates)
+      return HazardExpired;
+
+    // Instructions which cause va_vdst==0 expire hazard
+    if (SIInstrInfo::isVMEM(I) || SIInstrInfo::isFLAT(I) ||
+        SIInstrInfo::isDS(I) || SIInstrInfo::isEXP(I) ||
+        (I.getOpcode() == AMDGPU::S_WAITCNT_DEPCTR &&
+         I.getOperand(0).getImm() == 0x0fff))
+      return HazardExpired;
+
+    // Track registers writes
+    bool Changed = false;
+    if (SIInstrInfo::isVALU(I)) {
+      for (Register Src : SrcVGPRs) {
+        if (!State.DefPos.count(Src) && I.modifiesRegister(Src, &TRI)) {
+          State.DefPos[Src] = State.VALUs;
+          Changed = true;
+        }
+      }
+    } else if (SIInstrInfo::isSALU(I)) {
+      if (State.ExecPos == std::numeric_limits<int>::max()) {
+        if (!State.DefPos.empty() && I.modifiesRegister(AMDGPU::EXEC, &TRI)) {
+          State.ExecPos = State.VALUs;
+          Changed = true;
+        }
+      }
+    }
+
+    // Early expiration: too many VALUs in intv3
+    if (State.VALUs > Intv3MaxVALUs && State.DefPos.empty())
+      return HazardExpired;
+
+    // Only evaluate state if something changed
+    if (!Changed)
+      return NoHazardFound;
+
+    // Determine positions of VALUs pre/post exec change
+    if (State.ExecPos == std::numeric_limits<int>::max())
+      return NoHazardFound;
+
+    int PreExecPos = std::numeric_limits<int>::max();
+    int PostExecPos = std::numeric_limits<int>::max();
+
+    for (auto Entry : State.DefPos) {
+      int DefVALUs = Entry.second;
+      if (DefVALUs != std::numeric_limits<int>::max()) {
+        if (DefVALUs >= State.ExecPos)
+          PreExecPos = std::min(PreExecPos, DefVALUs);
+        else if (DefVALUs < State.ExecPos)
+          PostExecPos = std::min(PostExecPos, DefVALUs);
+      }
+    }
+
+    // Need a VALUs post exec change
+    if (PostExecPos == std::numeric_limits<int>::max())
+      return NoHazardFound;
+
+    // Too many VALUs in intv3?
+    int Intv3VALUs = PostExecPos;
+    if (Intv3VALUs > Intv3MaxVALUs)
+      return HazardExpired;
+
+    // Too many VALUs in intv2?
+    int Intv2VALUs = (State.ExecPos - PostExecPos) - 1;
+    if (Intv2VALUs > Intv1plus2MaxVALUs)
+      return HazardExpired;
+
+    // Need a VALUs pre exec change
+    if (PreExecPos == std::numeric_limits<int>::max())
+      return NoHazardFound;
+
+    // Too many VALUs in intv1?
+    int Intv1VALUs = PreExecPos - State.ExecPos;
+    if (Intv1VALUs > Intv1plus2MaxVALUs)
+      return HazardExpired;
+
+    // Too many VALUs in intv1 + intv2
+    if (Intv1VALUs + Intv2VALUs > Intv1plus2MaxVALUs)
+      return HazardExpired;
+
+    return HazardFound;
+  };
+  auto UpdateStateFn = [](StateType &State, const MachineInstr &MI) {
+    if (SIInstrInfo::isVALU(MI))
+      State.VALUs += 1;
+  };
+
+  DenseSet<const MachineBasicBlock *> Visited;
+  if (!hasHazard<StateType>(State, IsHazardFn, UpdateStateFn, MI->getParent(),
+                            std::next(MI->getReverseIterator()), Visited))
+    return false;
+
+  BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
+          TII.get(AMDGPU::S_WAITCNT_DEPCTR))
+      .addImm(0x0fff);
+
+  return true;
+}
+
+bool GCNHazardRecognizer::fixVALUTransUseHazard(MachineInstr *MI) {
+  if (!ST.hasVALUTransUseHazard())
+    return false;
+  if (!SIInstrInfo::isVALU(*MI))
+    return false;
+
+  SmallSet<Register, 4> SrcVGPRs;
+
+  for (const MachineOperand &Use : MI->explicit_uses()) {
+    if (Use.isReg() && TRI.isVGPR(MF.getRegInfo(), Use.getReg()))
+      SrcVGPRs.insert(Use.getReg());
+  }
+
+  // Look for the following pattern:
+  //   Va <- TRANS VALU
+  //   intv
+  //   MI Va (WaitState = 0)
+  //
+  // Where:
+  // intv <= 5 VALUs / 1 TRANS
+  //
+  // If found, insert an appropriate S_WAITCNT_DEPCTR before MI.
+
+  const int IntvMaxVALUs = 5;
+  const int IntvMaxTRANS = 1;
+
+  struct StateType {
+    int VALUs = 0;
+    int TRANS = 0;
+  };
+
+  StateType State;
+
+  // This overloads expiry testing with all the hazard detection
+  auto IsHazardFn = [&, this](StateType &State, const MachineInstr &I) {
+    // Too many VALU states have passed
+    if (State.VALUs > IntvMaxVALUs || State.TRANS > IntvMaxTRANS)
+      return HazardExpired;
+
+    // Instructions which cause va_vdst==0 expire hazard
+    if (SIInstrInfo::isVMEM(I) || SIInstrInfo::isFLAT(I) ||
+        SIInstrInfo::isDS(I) || SIInstrInfo::isEXP(I) ||
+        (I.getOpcode() == AMDGPU::S_WAITCNT_DEPCTR &&
+         I.getOperand(0).getImm() == 0x0fff))
+      return HazardExpired;
+
+    // Track registers writes
+    if (SIInstrInfo::isTRANS(I)) {
+      for (Register Src : SrcVGPRs) {
+        if (I.modifiesRegister(Src, &TRI)) {
+          return HazardFound;
+        }
+      }
+    }
+
+    return NoHazardFound;
+  };
+  auto UpdateStateFn = [](StateType &State, const MachineInstr &MI) {
+    if (SIInstrInfo::isVALU(MI))
+      State.VALUs += 1;
+    if (SIInstrInfo::isTRANS(MI))
+      State.TRANS += 1;
+  };
+
+  DenseSet<const MachineBasicBlock *> Visited;
+  if (!hasHazard<StateType>(State, IsHazardFn, UpdateStateFn, MI->getParent(),
+                            std::next(MI->getReverseIterator()), Visited))
+    return false;
+
+  // Hazard is observed - insert a wait on va_dst counter to ensure hazard is
+  // avoided (mask 0x0fff achieves this).
+  BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
+          TII.get(AMDGPU::S_WAITCNT_DEPCTR))
+      .addImm(0x0fff);
+
+  return true;
+}
+
 int GCNHazardRecognizer::checkNSAtoVMEMHazard(MachineInstr *MI) {
   int NSAtoVMEMWaitStates = 1;
 

diff  --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h
index 5700cd80d5666..1e12e0820f2ef 100644
--- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h
+++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h
@@ -96,6 +96,8 @@ class GCNHazardRecognizer final : public ScheduleHazardRecognizer {
   bool fixSMEMtoVectorWriteHazards(MachineInstr *MI);
   bool fixVcmpxExecWARHazard(MachineInstr *MI);
   bool fixLdsBranchVmemWARHazard(MachineInstr *MI);
+  bool fixVALUPartialForwardingHazard(MachineInstr *MI);
+  bool fixVALUTransUseHazard(MachineInstr *MI);
 
   int checkMAIHazards(MachineInstr *MI);
   int checkMAIHazards908(MachineInstr *MI);

diff  --git a/llvm/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll b/llvm/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll
index 8fc3d04e4248a..03f6a5a57a18a 100644
--- a/llvm/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll
+++ b/llvm/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll
@@ -3117,6 +3117,7 @@ define amdgpu_kernel void @sub_i64_uniform(i64 addrspace(1)* %out, i64 %subitive
 ; GFX1164-NEXT:    v_mad_u64_u32 v[3:4], null, s2, v2, 0
 ; GFX1164-NEXT:    v_readfirstlane_b32 s2, v0
 ; GFX1164-NEXT:    v_readfirstlane_b32 s4, v1
+; GFX1164-NEXT:    s_waitcnt_depctr 0xfff
 ; GFX1164-NEXT:    v_mad_u64_u32 v[5:6], null, s3, v2, v[4:5]
 ; GFX1164-NEXT:    v_sub_co_u32 v0, vcc, s2, v3
 ; GFX1164-NEXT:    s_mov_b32 s3, 0x31016000

diff  --git a/llvm/test/CodeGen/AMDGPU/partial-forwarding-hazards.mir b/llvm/test/CodeGen/AMDGPU/partial-forwarding-hazards.mir
new file mode 100644
index 0000000000000..5aff87f5cb213
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/partial-forwarding-hazards.mir
@@ -0,0 +1,399 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -march=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 -verify-machineinstrs -run-pass post-RA-hazard-rec -o - %s | FileCheck -check-prefix=GCN %s
+
+---
+name:            partial_forwarding_1_hazard
+body:            |
+  bb.0:
+    ; GCN-LABEL: name: partial_forwarding_1_hazard
+    ; GCN: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+    ; GCN: $exec = S_MOV_B64 -1
+    ; GCN: $vgpr1 = V_MOV_B32_e32 0, implicit $exec
+    ; GCN: S_WAITCNT_DEPCTR 4095
+    ; GCN: $vgpr2 = V_ADD_F32_e32 $vgpr0, $vgpr1, implicit $mode, implicit $exec
+    ; GCN: S_ENDPGM 0
+    $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+    $exec = S_MOV_B64 -1
+    $vgpr1 = V_MOV_B32_e32 0, implicit $exec
+    $vgpr2 = V_ADD_F32_e32 $vgpr0, $vgpr1, implicit $mode, implicit $exec
+    S_ENDPGM 0
+...
+
+---
+name:            partial_forwarding_2_hazard
+body:            |
+  bb.0:
+    ; GCN-LABEL: name: partial_forwarding_2_hazard
+    ; GCN: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+    ; GCN: $sgpr0 = S_MOV_B32 0
+    ; GCN: $sgpr1 = S_MOV_B32 0
+    ; GCN: $sgpr2 = S_MOV_B32 0
+    ; GCN: $exec = S_MOV_B64 -1
+    ; GCN: $sgpr3 = S_MOV_B32 0
+    ; GCN: $sgpr4 = S_MOV_B32 0
+    ; GCN: $sgpr5 = S_MOV_B32 0
+    ; GCN: $vgpr1 = V_MOV_B32_e32 0, implicit $exec
+    ; GCN: $sgpr6 = S_MOV_B32 0
+    ; GCN: $sgpr7 = S_MOV_B32 0
+    ; GCN: $sgpr8 = S_MOV_B32 0
+    ; GCN: $sgpr9 = S_MOV_B32 0
+    ; GCN: $sgpr10 = S_MOV_B32 0
+    ; GCN: S_WAITCNT_DEPCTR 4095
+    ; GCN: $vgpr2 = V_ADD_F32_e32 $vgpr0, $vgpr1, implicit $mode, implicit $exec
+    ; GCN: S_ENDPGM 0
+    $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+    $sgpr0 = S_MOV_B32 0
+    $sgpr1 = S_MOV_B32 0
+    $sgpr2 = S_MOV_B32 0
+    $exec = S_MOV_B64 -1
+    $sgpr3 = S_MOV_B32 0
+    $sgpr4 = S_MOV_B32 0
+    $sgpr5 = S_MOV_B32 0
+    $vgpr1 = V_MOV_B32_e32 0, implicit $exec
+    $sgpr6 = S_MOV_B32 0
+    $sgpr7 = S_MOV_B32 0
+    $sgpr8 = S_MOV_B32 0
+    $sgpr9 = S_MOV_B32 0
+    $sgpr10 = S_MOV_B32 0
+    $vgpr2 = V_ADD_F32_e32 $vgpr0, $vgpr1, implicit $mode, implicit $exec
+    S_ENDPGM 0
+...
+
+---
+name:            partial_forwarding_3_hazard
+body:            |
+  bb.0:
+    ; GCN-LABEL: name: partial_forwarding_3_hazard
+    ; GCN: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+    ; GCN: $vgpr10 = V_MOV_B32_e32 0, implicit $exec
+    ; GCN: $exec = S_MOV_B64 -1
+    ; GCN: $vgpr11 = V_MOV_B32_e32 0, implicit $exec
+    ; GCN: $vgpr1 = V_MOV_B32_e32 0, implicit $exec
+    ; GCN: $vgpr12 = V_MOV_B32_e32 0, implicit $exec
+    ; GCN: $vgpr13 = V_MOV_B32_e32 0, implicit $exec
+    ; GCN: $vgpr14 = V_MOV_B32_e32 0, implicit $exec
+    ; GCN: $vgpr15 = V_MOV_B32_e32 0, implicit $exec
+    ; GCN: S_WAITCNT_DEPCTR 4095
+    ; GCN: $vgpr2 = V_ADD_F32_e32 $vgpr0, $vgpr1, implicit $mode, implicit $exec
+    ; GCN: S_ENDPGM 0
+    $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+    $vgpr10 = V_MOV_B32_e32 0, implicit $exec
+    $exec = S_MOV_B64 -1
+    $vgpr11 = V_MOV_B32_e32 0, implicit $exec
+    $vgpr1 = V_MOV_B32_e32 0, implicit $exec
+    $vgpr12 = V_MOV_B32_e32 0, implicit $exec
+    $vgpr13 = V_MOV_B32_e32 0, implicit $exec
+    $vgpr14 = V_MOV_B32_e32 0, implicit $exec
+    $vgpr15 = V_MOV_B32_e32 0, implicit $exec
+    $vgpr2 = V_ADD_F32_e32 $vgpr0, $vgpr1, implicit $mode, implicit $exec
+    S_ENDPGM 0
+...
+
+---
+name:            partial_forwarding_3_no_hazard_1
+body:            |
+  bb.0:
+    ; GCN-LABEL: name: partial_forwarding_3_no_hazard_1
+    ; GCN: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+    ; GCN: $vgpr10 = V_MOV_B32_e32 0, implicit $exec
+    ; GCN: $vgpr20 = V_MOV_B32_e32 0, implicit $exec
+    ; GCN: $exec = S_MOV_B64 -1
+    ; GCN: $vgpr11 = V_MOV_B32_e32 0, implicit $exec
+    ; GCN: $vgpr1 = V_MOV_B32_e32 0, implicit $exec
+    ; GCN: $vgpr12 = V_MOV_B32_e32 0, implicit $exec
+    ; GCN: $vgpr13 = V_MOV_B32_e32 0, implicit $exec
+    ; GCN: $vgpr14 = V_MOV_B32_e32 0, implicit $exec
+    ; GCN: $vgpr15 = V_MOV_B32_e32 0, implicit $exec
+    ; GCN: $vgpr2 = V_ADD_F32_e32 $vgpr0, $vgpr1, implicit $mode, implicit $exec
+    ; GCN: S_ENDPGM 0
+    $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+    $vgpr10 = V_MOV_B32_e32 0, implicit $exec
+    $vgpr20 = V_MOV_B32_e32 0, implicit $exec
+    $exec = S_MOV_B64 -1
+    $vgpr11 = V_MOV_B32_e32 0, implicit $exec
+    $vgpr1 = V_MOV_B32_e32 0, implicit $exec
+    $vgpr12 = V_MOV_B32_e32 0, implicit $exec
+    $vgpr13 = V_MOV_B32_e32 0, implicit $exec
+    $vgpr14 = V_MOV_B32_e32 0, implicit $exec
+    $vgpr15 = V_MOV_B32_e32 0, implicit $exec
+    $vgpr2 = V_ADD_F32_e32 $vgpr0, $vgpr1, implicit $mode, implicit $exec
+    S_ENDPGM 0
+...
+
+---
+name:            partial_forwarding_3_no_hazard_2
+body:            |
+  bb.0:
+    ; GCN-LABEL: name: partial_forwarding_3_no_hazard_2
+    ; GCN: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+    ; GCN: $vgpr10 = V_MOV_B32_e32 0, implicit $exec
+    ; GCN: $exec = S_MOV_B64 -1
+    ; GCN: $vgpr11 = V_MOV_B32_e32 0, implicit $exec
+    ; GCN: $vgpr20 = V_MOV_B32_e32 0, implicit $exec
+    ; GCN: $vgpr1 = V_MOV_B32_e32 0, implicit $exec
+    ; GCN: $vgpr12 = V_MOV_B32_e32 0, implicit $exec
+    ; GCN: $vgpr13 = V_MOV_B32_e32 0, implicit $exec
+    ; GCN: $vgpr14 = V_MOV_B32_e32 0, implicit $exec
+    ; GCN: $vgpr15 = V_MOV_B32_e32 0, implicit $exec
+    ; GCN: $vgpr2 = V_ADD_F32_e32 $vgpr0, $vgpr1, implicit $mode, implicit $exec
+    ; GCN: S_ENDPGM 0
+    $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+    $vgpr10 = V_MOV_B32_e32 0, implicit $exec
+    $exec = S_MOV_B64 -1
+    $vgpr11 = V_MOV_B32_e32 0, implicit $exec
+    $vgpr20 = V_MOV_B32_e32 0, implicit $exec
+    $vgpr1 = V_MOV_B32_e32 0, implicit $exec
+    $vgpr12 = V_MOV_B32_e32 0, implicit $exec
+    $vgpr13 = V_MOV_B32_e32 0, implicit $exec
+    $vgpr14 = V_MOV_B32_e32 0, implicit $exec
+    $vgpr15 = V_MOV_B32_e32 0, implicit $exec
+    $vgpr2 = V_ADD_F32_e32 $vgpr0, $vgpr1, implicit $mode, implicit $exec
+    S_ENDPGM 0
+...
+
+---
+name:            partial_forwarding_3_no_hazard_3
+body:            |
+  bb.0:
+    ; GCN-LABEL: name: partial_forwarding_3_no_hazard_3
+    ; GCN: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+    ; GCN: $vgpr10 = V_MOV_B32_e32 0, implicit $exec
+    ; GCN: $exec = S_MOV_B64 -1
+    ; GCN: $vgpr11 = V_MOV_B32_e32 0, implicit $exec
+    ; GCN: $vgpr1 = V_MOV_B32_e32 0, implicit $exec
+    ; GCN: $vgpr12 = V_MOV_B32_e32 0, implicit $exec
+    ; GCN: $vgpr13 = V_MOV_B32_e32 0, implicit $exec
+    ; GCN: $vgpr14 = V_MOV_B32_e32 0, implicit $exec
+    ; GCN: $vgpr15 = V_MOV_B32_e32 0, implicit $exec
+    ; GCN: $vgpr20 = V_MOV_B32_e32 0, implicit $exec
+    ; GCN: $vgpr2 = V_ADD_F32_e32 $vgpr0, $vgpr1, implicit $mode, implicit $exec
+    ; GCN: S_ENDPGM 0
+    $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+    $vgpr10 = V_MOV_B32_e32 0, implicit $exec
+    $exec = S_MOV_B64 -1
+    $vgpr11 = V_MOV_B32_e32 0, implicit $exec
+    $vgpr1 = V_MOV_B32_e32 0, implicit $exec
+    $vgpr12 = V_MOV_B32_e32 0, implicit $exec
+    $vgpr13 = V_MOV_B32_e32 0, implicit $exec
+    $vgpr14 = V_MOV_B32_e32 0, implicit $exec
+    $vgpr15 = V_MOV_B32_e32 0, implicit $exec
+    $vgpr20 = V_MOV_B32_e32 0, implicit $exec
+    $vgpr2 = V_ADD_F32_e32 $vgpr0, $vgpr1, implicit $mode, implicit $exec
+    S_ENDPGM 0
+...
+
+---
+name:            partial_forwarding_4_hazard
+body:            |
+  bb.0:
+    ; GCN-LABEL: name: partial_forwarding_4_hazard
+    ; GCN: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+    ; GCN: $exec = S_MOV_B64 -1
+    ; GCN: $vgpr10 = V_MOV_B32_e32 0, implicit $exec
+    ; GCN: $vgpr11 = V_MOV_B32_e32 0, implicit $exec
+    ; GCN: $vgpr1 = V_MOV_B32_e32 0, implicit $exec
+    ; GCN: $vgpr12 = V_MOV_B32_e32 0, implicit $exec
+    ; GCN: $vgpr13 = V_MOV_B32_e32 0, implicit $exec
+    ; GCN: $vgpr14 = V_MOV_B32_e32 0, implicit $exec
+    ; GCN: $vgpr15 = V_MOV_B32_e32 0, implicit $exec
+    ; GCN: S_WAITCNT_DEPCTR 4095
+    ; GCN: $vgpr2 = V_ADD_F32_e32 $vgpr0, $vgpr1, implicit $mode, implicit $exec
+    ; GCN: S_ENDPGM 0
+    $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+    $exec = S_MOV_B64 -1
+    $vgpr10 = V_MOV_B32_e32 0, implicit $exec
+    $vgpr11 = V_MOV_B32_e32 0, implicit $exec
+    $vgpr1 = V_MOV_B32_e32 0, implicit $exec
+    $vgpr12 = V_MOV_B32_e32 0, implicit $exec
+    $vgpr13 = V_MOV_B32_e32 0, implicit $exec
+    $vgpr14 = V_MOV_B32_e32 0, implicit $exec
+    $vgpr15 = V_MOV_B32_e32 0, implicit $exec
+    $vgpr2 = V_ADD_F32_e32 $vgpr0, $vgpr1, implicit $mode, implicit $exec
+    S_ENDPGM 0
+...
+
+---
+name:            partial_forwarding_4_no_hazard
+body:            |
+  bb.0:
+    ; GCN-LABEL: name: partial_forwarding_4_no_hazard
+    ; GCN: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+    ; GCN: $exec = S_MOV_B64 -1
+    ; GCN: $vgpr10 = V_MOV_B32_e32 0, implicit $exec
+    ; GCN: $vgpr11 = V_MOV_B32_e32 0, implicit $exec
+    ; GCN: $vgpr21 = V_MOV_B32_e32 0, implicit $exec
+    ; GCN: $vgpr1 = V_MOV_B32_e32 0, implicit $exec
+    ; GCN: $vgpr12 = V_MOV_B32_e32 0, implicit $exec
+    ; GCN: $vgpr13 = V_MOV_B32_e32 0, implicit $exec
+    ; GCN: $vgpr14 = V_MOV_B32_e32 0, implicit $exec
+    ; GCN: $vgpr15 = V_MOV_B32_e32 0, implicit $exec
+    ; GCN: $vgpr2 = V_ADD_F32_e32 $vgpr0, $vgpr1, implicit $mode, implicit $exec
+    ; GCN: S_ENDPGM 0
+    $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+    $exec = S_MOV_B64 -1
+    $vgpr10 = V_MOV_B32_e32 0, implicit $exec
+    $vgpr11 = V_MOV_B32_e32 0, implicit $exec
+    $vgpr21 = V_MOV_B32_e32 0, implicit $exec
+    $vgpr1 = V_MOV_B32_e32 0, implicit $exec
+    $vgpr12 = V_MOV_B32_e32 0, implicit $exec
+    $vgpr13 = V_MOV_B32_e32 0, implicit $exec
+    $vgpr14 = V_MOV_B32_e32 0, implicit $exec
+    $vgpr15 = V_MOV_B32_e32 0, implicit $exec
+    $vgpr2 = V_ADD_F32_e32 $vgpr0, $vgpr1, implicit $mode, implicit $exec
+    S_ENDPGM 0
+...
+
+---
+name:            partial_forwarding_5_hazard
+body:            |
+  bb.0:
+    ; GCN-LABEL: name: partial_forwarding_5_hazard
+    ; GCN: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+    ; GCN: $vgpr10 = V_MOV_B32_e32 0, implicit $exec
+    ; GCN: $vgpr11 = V_MOV_B32_e32 0, implicit $exec
+    ; GCN: $exec = S_MOV_B64 -1
+    ; GCN: $vgpr1 = V_MOV_B32_e32 0, implicit $exec
+    ; GCN: $vgpr12 = V_MOV_B32_e32 0, implicit $exec
+    ; GCN: $vgpr13 = V_MOV_B32_e32 0, implicit $exec
+    ; GCN: $vgpr14 = V_MOV_B32_e32 0, implicit $exec
+    ; GCN: $vgpr15 = V_MOV_B32_e32 0, implicit $exec
+    ; GCN: S_WAITCNT_DEPCTR 4095
+    ; GCN: $vgpr2 = V_ADD_F32_e32 $vgpr0, $vgpr1, implicit $mode, implicit $exec
+    ; GCN: S_ENDPGM 0
+    $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+    $vgpr10 = V_MOV_B32_e32 0, implicit $exec
+    $vgpr11 = V_MOV_B32_e32 0, implicit $exec
+    $exec = S_MOV_B64 -1
+    $vgpr1 = V_MOV_B32_e32 0, implicit $exec
+    $vgpr12 = V_MOV_B32_e32 0, implicit $exec
+    $vgpr13 = V_MOV_B32_e32 0, implicit $exec
+    $vgpr14 = V_MOV_B32_e32 0, implicit $exec
+    $vgpr15 = V_MOV_B32_e32 0, implicit $exec
+    $vgpr2 = V_ADD_F32_e32 $vgpr0, $vgpr1, implicit $mode, implicit $exec
+    S_ENDPGM 0
+...
+
+---
+name:            partial_forwarding_5_no_hazard
+body:            |
+  bb.0:
+    ; GCN-LABEL: name: partial_forwarding_5_no_hazard
+    ; GCN: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+    ; GCN: $vgpr10 = V_MOV_B32_e32 0, implicit $exec
+    ; GCN: $vgpr11 = V_MOV_B32_e32 0, implicit $exec
+    ; GCN: $vgpr21 = V_MOV_B32_e32 0, implicit $exec
+    ; GCN: $exec = S_MOV_B64 -1
+    ; GCN: $vgpr1 = V_MOV_B32_e32 0, implicit $exec
+    ; GCN: $vgpr12 = V_MOV_B32_e32 0, implicit $exec
+    ; GCN: $vgpr13 = V_MOV_B32_e32 0, implicit $exec
+    ; GCN: $vgpr14 = V_MOV_B32_e32 0, implicit $exec
+    ; GCN: $vgpr15 = V_MOV_B32_e32 0, implicit $exec
+    ; GCN: $vgpr2 = V_ADD_F32_e32 $vgpr0, $vgpr1, implicit $mode, implicit $exec
+    ; GCN: S_ENDPGM 0
+    $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+    $vgpr10 = V_MOV_B32_e32 0, implicit $exec
+    $vgpr11 = V_MOV_B32_e32 0, implicit $exec
+    $vgpr21 = V_MOV_B32_e32 0, implicit $exec
+    $exec = S_MOV_B64 -1
+    $vgpr1 = V_MOV_B32_e32 0, implicit $exec
+    $vgpr12 = V_MOV_B32_e32 0, implicit $exec
+    $vgpr13 = V_MOV_B32_e32 0, implicit $exec
+    $vgpr14 = V_MOV_B32_e32 0, implicit $exec
+    $vgpr15 = V_MOV_B32_e32 0, implicit $exec
+    $vgpr2 = V_ADD_F32_e32 $vgpr0, $vgpr1, implicit $mode, implicit $exec
+    S_ENDPGM 0
+...
+
+---
+name:            partial_forwarding_branching_1a
+body:            |
+  ; GCN-LABEL: name: partial_forwarding_branching_1a
+  ; GCN: bb.0:
+  ; GCN:   successors: %bb.2(0x80000000)
+  ; GCN:   $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+  ; GCN:   $exec = S_MOV_B64 -1
+  ; GCN:   S_BRANCH %bb.2
+  ; GCN: bb.1:
+  ; GCN:   successors: %bb.2(0x80000000)
+  ; GCN:   $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+  ; GCN:   $vgpr30 = V_MOV_B32_e32 0, implicit $exec
+  ; GCN:   $vgpr31 = V_MOV_B32_e32 0, implicit $exec
+  ; GCN:   S_BRANCH %bb.2
+  ; GCN: bb.2:
+  ; GCN:   $vgpr10 = V_MOV_B32_e32 0, implicit $exec
+  ; GCN:   $vgpr11 = V_MOV_B32_e32 0, implicit $exec
+  ; GCN:   $vgpr1 = V_MOV_B32_e32 0, implicit $exec
+  ; GCN:   $vgpr12 = V_MOV_B32_e32 0, implicit $exec
+  ; GCN:   $vgpr13 = V_MOV_B32_e32 0, implicit $exec
+  ; GCN:   $vgpr14 = V_MOV_B32_e32 0, implicit $exec
+  ; GCN:   $vgpr15 = V_MOV_B32_e32 0, implicit $exec
+  ; GCN:   S_WAITCNT_DEPCTR 4095
+  ; GCN:   $vgpr2 = V_ADD_F32_e32 $vgpr0, $vgpr1, implicit $mode, implicit $exec
+  ; GCN:   S_ENDPGM 0
+  bb.0:
+    $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+    $exec = S_MOV_B64 -1
+    S_BRANCH %bb.2
+  bb.1:
+    $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+    $vgpr30 = V_MOV_B32_e32 0, implicit $exec
+    $vgpr31 = V_MOV_B32_e32 0, implicit $exec
+    S_BRANCH %bb.2
+  bb.2:
+    $vgpr10 = V_MOV_B32_e32 0, implicit $exec
+    $vgpr11 = V_MOV_B32_e32 0, implicit $exec
+    $vgpr1 = V_MOV_B32_e32 0, implicit $exec
+    $vgpr12 = V_MOV_B32_e32 0, implicit $exec
+    $vgpr13 = V_MOV_B32_e32 0, implicit $exec
+    $vgpr14 = V_MOV_B32_e32 0, implicit $exec
+    $vgpr15 = V_MOV_B32_e32 0, implicit $exec
+    $vgpr2 = V_ADD_F32_e32 $vgpr0, $vgpr1, implicit $mode, implicit $exec
+    S_ENDPGM 0
+...
+
+---
+name:            partial_forwarding_branching_1b
+body:            |
+  ; GCN-LABEL: name: partial_forwarding_branching_1b
+  ; GCN: bb.0:
+  ; GCN:   successors: %bb.2(0x80000000)
+  ; GCN:   $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+  ; GCN:   $vgpr30 = V_MOV_B32_e32 0, implicit $exec
+  ; GCN:   $vgpr31 = V_MOV_B32_e32 0, implicit $exec
+  ; GCN:   S_BRANCH %bb.2
+  ; GCN: bb.1:
+  ; GCN:   successors: %bb.2(0x80000000)
+  ; GCN:   $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+  ; GCN:   $exec = S_MOV_B64 -1
+  ; GCN:   S_BRANCH %bb.2
+  ; GCN: bb.2:
+  ; GCN:   $vgpr10 = V_MOV_B32_e32 0, implicit $exec
+  ; GCN:   $vgpr11 = V_MOV_B32_e32 0, implicit $exec
+  ; GCN:   $vgpr1 = V_MOV_B32_e32 0, implicit $exec
+  ; GCN:   $vgpr12 = V_MOV_B32_e32 0, implicit $exec
+  ; GCN:   $vgpr13 = V_MOV_B32_e32 0, implicit $exec
+  ; GCN:   $vgpr14 = V_MOV_B32_e32 0, implicit $exec
+  ; GCN:   $vgpr15 = V_MOV_B32_e32 0, implicit $exec
+  ; GCN:   S_WAITCNT_DEPCTR 4095
+  ; GCN:   $vgpr2 = V_ADD_F32_e32 $vgpr0, $vgpr1, implicit $mode, implicit $exec
+  ; GCN:   S_ENDPGM 0
+  bb.0:
+    $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+    $vgpr30 = V_MOV_B32_e32 0, implicit $exec
+    $vgpr31 = V_MOV_B32_e32 0, implicit $exec
+    S_BRANCH %bb.2
+  bb.1:
+    $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+    $exec = S_MOV_B64 -1
+    S_BRANCH %bb.2
+  bb.2:
+    $vgpr10 = V_MOV_B32_e32 0, implicit $exec
+    $vgpr11 = V_MOV_B32_e32 0, implicit $exec
+    $vgpr1 = V_MOV_B32_e32 0, implicit $exec
+    $vgpr12 = V_MOV_B32_e32 0, implicit $exec
+    $vgpr13 = V_MOV_B32_e32 0, implicit $exec
+    $vgpr14 = V_MOV_B32_e32 0, implicit $exec
+    $vgpr15 = V_MOV_B32_e32 0, implicit $exec
+    $vgpr2 = V_ADD_F32_e32 $vgpr0, $vgpr1, implicit $mode, implicit $exec
+    S_ENDPGM 0
+...

diff  --git a/llvm/test/CodeGen/AMDGPU/trans-forwarding-hazards.mir b/llvm/test/CodeGen/AMDGPU/trans-forwarding-hazards.mir
new file mode 100644
index 0000000000000..26f2b126d0f94
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/trans-forwarding-hazards.mir
@@ -0,0 +1,334 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -march=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32 -verify-machineinstrs -run-pass post-RA-hazard-rec -o - %s | FileCheck -check-prefix=GCN %s
+
+---
+name:            trans_use_1_hazard
+body:            |
+  bb.0:
+    ; GCN-LABEL: name: trans_use_1_hazard
+    ; GCN: $vgpr1 = V_SQRT_F32_e32 $vgpr0, implicit $mode, implicit $exec
+    ; GCN-NEXT: $vgpr2 = V_MOV_B32_e32 0, implicit $exec
+    ; GCN-NEXT: S_WAITCNT_DEPCTR 4095
+    ; GCN-NEXT: $vgpr3 = V_ADD_F32_e32 $vgpr1, $vgpr2, implicit $mode, implicit $exec
+    ; GCN-NEXT: S_ENDPGM 0
+    $vgpr1 = V_SQRT_F32_e32 $vgpr0, implicit $mode, implicit $exec
+    $vgpr2 = V_MOV_B32_e32 0, implicit $exec
+    $vgpr3 = V_ADD_F32_e32 $vgpr1, $vgpr2, implicit $mode, implicit $exec
+    S_ENDPGM 0
+...
+
+---
+name:            trans_use_1_no_hazard_1
+body:            |
+  bb.0:
+    ; GCN-LABEL: name: trans_use_1_no_hazard_1
+    ; GCN: $vgpr1 = V_SQRT_F32_e32 $vgpr0, implicit $mode, implicit $exec
+    ; GCN-NEXT: S_WAITCNT_DEPCTR 4095
+    ; GCN-NEXT: $vgpr2 = V_MOV_B32_e32 0, implicit $exec
+    ; GCN-NEXT: $vgpr3 = V_ADD_F32_e32 $vgpr1, $vgpr2, implicit $mode, implicit $exec
+    ; GCN-NEXT: S_ENDPGM 0
+    $vgpr1 = V_SQRT_F32_e32 $vgpr0, implicit $mode, implicit $exec
+    S_WAITCNT_DEPCTR 4095
+    $vgpr2 = V_MOV_B32_e32 0, implicit $exec
+    $vgpr3 = V_ADD_F32_e32 $vgpr1, $vgpr2, implicit $mode, implicit $exec
+    S_ENDPGM 0
+...
+
+---
+name:            trans_use_2_hazard
+body:            |
+  bb.0:
+    ; GCN-LABEL: name: trans_use_2_hazard
+    ; GCN: $vgpr1 = V_SQRT_F32_e32 $vgpr0, implicit $mode, implicit $exec
+    ; GCN-NEXT: $sgpr0 = S_MOV_B32 0
+    ; GCN-NEXT: $sgpr1 = S_MOV_B32 0
+    ; GCN-NEXT: $sgpr2 = S_MOV_B32 0
+    ; GCN-NEXT: $sgpr3 = S_MOV_B32 0
+    ; GCN-NEXT: $sgpr4 = S_MOV_B32 0
+    ; GCN-NEXT: $sgpr5 = S_MOV_B32 0
+    ; GCN-NEXT: $vgpr2 = V_MOV_B32_e32 0, implicit $exec
+    ; GCN-NEXT: $sgpr6 = S_MOV_B32 0
+    ; GCN-NEXT: $sgpr7 = S_MOV_B32 0
+    ; GCN-NEXT: $sgpr8 = S_MOV_B32 0
+    ; GCN-NEXT: $sgpr9 = S_MOV_B32 0
+    ; GCN-NEXT: $sgpr10 = S_MOV_B32 0
+    ; GCN-NEXT: S_WAITCNT_DEPCTR 4095
+    ; GCN-NEXT: $vgpr3 = V_ADD_F32_e32 $vgpr1, $vgpr2, implicit $mode, implicit $exec
+    ; GCN-NEXT: S_ENDPGM 0
+    $vgpr1 = V_SQRT_F32_e32 $vgpr0, implicit $mode, implicit $exec
+    $sgpr0 = S_MOV_B32 0
+    $sgpr1 = S_MOV_B32 0
+    $sgpr2 = S_MOV_B32 0
+    $sgpr3 = S_MOV_B32 0
+    $sgpr4 = S_MOV_B32 0
+    $sgpr5 = S_MOV_B32 0
+    $vgpr2 = V_MOV_B32_e32 0, implicit $exec
+    $sgpr6 = S_MOV_B32 0
+    $sgpr7 = S_MOV_B32 0
+    $sgpr8 = S_MOV_B32 0
+    $sgpr9 = S_MOV_B32 0
+    $sgpr10 = S_MOV_B32 0
+    $vgpr3 = V_ADD_F32_e32 $vgpr1, $vgpr2, implicit $mode, implicit $exec
+    S_ENDPGM 0
+...
+
+---
+name:            trans_use_3_hazard
+body:            |
+  bb.0:
+    ; GCN-LABEL: name: trans_use_3_hazard
+    ; GCN: $vgpr1 = V_SQRT_F32_e32 $vgpr0, implicit $mode, implicit $exec
+    ; GCN-NEXT: $vgpr2 = V_MOV_B32_e32 0, implicit $exec
+    ; GCN-NEXT: $vgpr10 = V_MOV_B32_e32 0, implicit $exec
+    ; GCN-NEXT: $vgpr11 = V_MOV_B32_e32 0, implicit $exec
+    ; GCN-NEXT: $vgpr12 = V_MOV_B32_e32 0, implicit $exec
+    ; GCN-NEXT: $vgpr13 = V_MOV_B32_e32 0, implicit $exec
+    ; GCN-NEXT: S_WAITCNT_DEPCTR 4095
+    ; GCN-NEXT: $vgpr3 = V_ADD_F32_e32 $vgpr1, $vgpr2, implicit $mode, implicit $exec
+    ; GCN-NEXT: S_ENDPGM 0
+    $vgpr1 = V_SQRT_F32_e32 $vgpr0, implicit $mode, implicit $exec
+    $vgpr2 = V_MOV_B32_e32 0, implicit $exec
+    $vgpr10 = V_MOV_B32_e32 0, implicit $exec
+    $vgpr11 = V_MOV_B32_e32 0, implicit $exec
+    $vgpr12 = V_MOV_B32_e32 0, implicit $exec
+    $vgpr13 = V_MOV_B32_e32 0, implicit $exec
+    $vgpr3 = V_ADD_F32_e32 $vgpr1, $vgpr2, implicit $mode, implicit $exec
+    S_ENDPGM 0
+...
+
+---
+name:            trans_use_3_no_hazard_1
+body:            |
+  bb.0:
+    ; GCN-LABEL: name: trans_use_3_no_hazard_1
+    ; GCN: $vgpr1 = V_SQRT_F32_e32 $vgpr0, implicit $mode, implicit $exec
+    ; GCN-NEXT: $vgpr2 = V_MOV_B32_e32 0, implicit $exec
+    ; GCN-NEXT: $vgpr10 = V_MOV_B32_e32 0, implicit $exec
+    ; GCN-NEXT: $vgpr11 = V_MOV_B32_e32 0, implicit $exec
+    ; GCN-NEXT: $vgpr12 = V_MOV_B32_e32 0, implicit $exec
+    ; GCN-NEXT: $vgpr13 = V_MOV_B32_e32 0, implicit $exec
+    ; GCN-NEXT: $vgpr14 = V_MOV_B32_e32 0, implicit $exec
+    ; GCN-NEXT: $vgpr3 = V_ADD_F32_e32 $vgpr1, $vgpr2, implicit $mode, implicit $exec
+    ; GCN-NEXT: S_ENDPGM 0
+    $vgpr1 = V_SQRT_F32_e32 $vgpr0, implicit $mode, implicit $exec
+    $vgpr2 = V_MOV_B32_e32 0, implicit $exec
+    $vgpr10 = V_MOV_B32_e32 0, implicit $exec
+    $vgpr11 = V_MOV_B32_e32 0, implicit $exec
+    $vgpr12 = V_MOV_B32_e32 0, implicit $exec
+    $vgpr13 = V_MOV_B32_e32 0, implicit $exec
+    $vgpr14 = V_MOV_B32_e32 0, implicit $exec
+    $vgpr3 = V_ADD_F32_e32 $vgpr1, $vgpr2, implicit $mode, implicit $exec
+    S_ENDPGM 0
+...
+
+---
+name:            trans_use_3_no_hazard_2
+body:            |
+  bb.0:
+    ; GCN-LABEL: name: trans_use_3_no_hazard_2
+    ; GCN: $vgpr1 = V_SQRT_F32_e32 $vgpr0, implicit $mode, implicit $exec
+    ; GCN-NEXT: $vgpr10 = V_MOV_B32_e32 0, implicit $exec
+    ; GCN-NEXT: $vgpr11 = V_MOV_B32_e32 0, implicit $exec
+    ; GCN-NEXT: $vgpr2 = V_MOV_B32_e32 0, implicit $exec
+    ; GCN-NEXT: $vgpr12 = V_MOV_B32_e32 0, implicit $exec
+    ; GCN-NEXT: $vgpr13 = V_MOV_B32_e32 0, implicit $exec
+    ; GCN-NEXT: $vgpr14 = V_MOV_B32_e32 0, implicit $exec
+    ; GCN-NEXT: $vgpr15 = V_MOV_B32_e32 0, implicit $exec
+    ; GCN-NEXT: $vgpr3 = V_ADD_F32_e32 $vgpr1, $vgpr2, implicit $mode, implicit $exec
+    ; GCN-NEXT: S_ENDPGM 0
+    $vgpr1 = V_SQRT_F32_e32 $vgpr0, implicit $mode, implicit $exec
+    $vgpr10 = V_MOV_B32_e32 0, implicit $exec
+    $vgpr11 = V_MOV_B32_e32 0, implicit $exec
+    $vgpr2 = V_MOV_B32_e32 0, implicit $exec
+    $vgpr12 = V_MOV_B32_e32 0, implicit $exec
+    $vgpr13 = V_MOV_B32_e32 0, implicit $exec
+    $vgpr14 = V_MOV_B32_e32 0, implicit $exec
+    $vgpr15 = V_MOV_B32_e32 0, implicit $exec
+    $vgpr3 = V_ADD_F32_e32 $vgpr1, $vgpr2, implicit $mode, implicit $exec
+    S_ENDPGM 0
+...
+
+---
+name:            trans_use_3_no_hazard_3
+body:            |
+  bb.0:
+    ; GCN-LABEL: name: trans_use_3_no_hazard_3
+    ; GCN: $vgpr1 = V_SQRT_F32_e32 $vgpr0, implicit $mode, implicit $exec
+    ; GCN-NEXT: $vgpr10 = V_SQRT_F32_e32 $vgpr11, implicit $mode, implicit $exec
+    ; GCN-NEXT: $vgpr12 = V_SQRT_F32_e32 $vgpr13, implicit $mode, implicit $exec
+    ; GCN-NEXT: $vgpr3 = V_ADD_F32_e32 $vgpr1, $vgpr2, implicit $mode, implicit $exec
+    ; GCN-NEXT: S_ENDPGM 0
+    $vgpr1 = V_SQRT_F32_e32 $vgpr0, implicit $mode, implicit $exec
+    $vgpr10 = V_SQRT_F32_e32 $vgpr11, implicit $mode, implicit $exec
+    $vgpr12 = V_SQRT_F32_e32 $vgpr13, implicit $mode, implicit $exec
+    $vgpr3 = V_ADD_F32_e32 $vgpr1, $vgpr2, implicit $mode, implicit $exec
+    S_ENDPGM 0
+...
+
+---
+name:            trans_use_4_one_depctr_1
+body:            |
+  bb.0:
+    ; GCN-LABEL: name: trans_use_4_one_depctr_1
+    ; GCN: $vgpr1 = V_SQRT_F32_e32 $vgpr0, implicit $mode, implicit $exec
+    ; GCN-NEXT: $vgpr3 = V_SQRT_F32_e32 $vgpr2, implicit $mode, implicit $exec
+    ; GCN-NEXT: S_WAITCNT_DEPCTR 4095
+    ; GCN-NEXT: $vgpr5 = V_ADD_F32_e32 $vgpr1, $vgpr4, implicit $mode, implicit $exec
+    ; GCN-NEXT: $vgpr7 = V_ADD_F32_e32 $vgpr3, $vgpr6, implicit $mode, implicit $exec
+    ; GCN-NEXT: S_ENDPGM 0
+    $vgpr1 = V_SQRT_F32_e32 $vgpr0, implicit $mode, implicit $exec
+    $vgpr3 = V_SQRT_F32_e32 $vgpr2, implicit $mode, implicit $exec
+    $vgpr5 = V_ADD_F32_e32 $vgpr1, $vgpr4, implicit $mode, implicit $exec
+    $vgpr7 = V_ADD_F32_e32 $vgpr3, $vgpr6, implicit $mode, implicit $exec
+    S_ENDPGM 0
+...
+
+---
+name:            trans_use_4_one_depctr_2
+body:            |
+  bb.0:
+    ; GCN-LABEL: name: trans_use_4_one_depctr_2
+    ; GCN: $vgpr1 = V_SQRT_F32_e32 $vgpr0, implicit $mode, implicit $exec
+    ; GCN-NEXT: $vgpr3 = V_SQRT_F32_e32 $vgpr2, implicit $mode, implicit $exec
+    ; GCN-NEXT: S_WAITCNT_DEPCTR 4095
+    ; GCN-NEXT: $vgpr5 = V_ADD_F32_e32 $vgpr3, $vgpr4, implicit $mode, implicit $exec
+    ; GCN-NEXT: $vgpr7 = V_ADD_F32_e32 $vgpr1, $vgpr6, implicit $mode, implicit $exec
+    ; GCN-NEXT: S_ENDPGM 0
+    $vgpr1 = V_SQRT_F32_e32 $vgpr0, implicit $mode, implicit $exec
+    $vgpr3 = V_SQRT_F32_e32 $vgpr2, implicit $mode, implicit $exec
+    $vgpr5 = V_ADD_F32_e32 $vgpr3, $vgpr4, implicit $mode, implicit $exec
+    $vgpr7 = V_ADD_F32_e32 $vgpr1, $vgpr6, implicit $mode, implicit $exec
+    S_ENDPGM 0
+...
+
+---
+name:            trans_use_4
+body:            |
+  bb.0:
+    ; GCN-LABEL: name: trans_use_4
+    ; GCN: $vgpr1 = V_SQRT_F32_e32 $vgpr0, implicit $mode, implicit $exec
+    ; GCN-NEXT: $vgpr10 = V_SQRT_F32_e32 $vgpr11, implicit $mode, implicit $exec
+    ; GCN-NEXT: $vgpr2 = V_MOV_B32_e32 0, implicit $exec
+    ; GCN-NEXT: S_WAITCNT_DEPCTR 4095
+    ; GCN-NEXT: $vgpr3 = V_ADD_F32_e32 $vgpr1, $vgpr2, implicit $mode, implicit $exec
+    ; GCN-NEXT: S_ENDPGM 0
+    $vgpr1 = V_SQRT_F32_e32 $vgpr0, implicit $mode, implicit $exec
+    $vgpr10 = V_SQRT_F32_e32 $vgpr11, implicit $mode, implicit $exec
+    $vgpr2 = V_MOV_B32_e32 0, implicit $exec
+    $vgpr3 = V_ADD_F32_e32 $vgpr1, $vgpr2, implicit $mode, implicit $exec
+    S_ENDPGM 0
+...
+
+---
+name:            trans_use_branching_1a
+body:            |
+  ; GCN-LABEL: name: trans_use_branching_1a
+  ; GCN: bb.0:
+  ; GCN-NEXT:   successors: %bb.2(0x80000000)
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT:   $vgpr1 = V_SQRT_F32_e32 $vgpr0, implicit $mode, implicit $exec
+  ; GCN-NEXT:   S_BRANCH %bb.2
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT: bb.1:
+  ; GCN-NEXT:   successors: %bb.2(0x80000000)
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT:   $vgpr2 = V_MOV_B32_e32 0, implicit $exec
+  ; GCN-NEXT:   $vgpr30 = V_MOV_B32_e32 0, implicit $exec
+  ; GCN-NEXT:   $vgpr31 = V_MOV_B32_e32 0, implicit $exec
+  ; GCN-NEXT:   $vgpr32 = V_MOV_B32_e32 0, implicit $exec
+  ; GCN-NEXT:   $vgpr33 = V_MOV_B32_e32 0, implicit $exec
+  ; GCN-NEXT:   S_BRANCH %bb.2
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT: bb.2:
+  ; GCN-NEXT:   $vgpr3 = V_MOV_B32_e32 0, implicit $exec
+  ; GCN-NEXT:   S_WAITCNT_DEPCTR 4095
+  ; GCN-NEXT:   $vgpr4 = V_ADD_F32_e32 $vgpr1, $vgpr2, implicit $mode, implicit $exec
+  ; GCN-NEXT:   S_ENDPGM 0
+  bb.0:
+    $vgpr1 = V_SQRT_F32_e32 $vgpr0, implicit $mode, implicit $exec
+    S_BRANCH %bb.2
+  bb.1:
+    $vgpr2 = V_MOV_B32_e32 0, implicit $exec
+    $vgpr30 = V_MOV_B32_e32 0, implicit $exec
+    $vgpr31 = V_MOV_B32_e32 0, implicit $exec
+    $vgpr32 = V_MOV_B32_e32 0, implicit $exec
+    $vgpr33 = V_MOV_B32_e32 0, implicit $exec
+    S_BRANCH %bb.2
+  bb.2:
+    $vgpr3 = V_MOV_B32_e32 0, implicit $exec
+    $vgpr4 = V_ADD_F32_e32 $vgpr1, $vgpr2, implicit $mode, implicit $exec
+    S_ENDPGM 0
+...
+
+---
+name:            trans_use_branching_1b
+body:            |
+  ; GCN-LABEL: name: trans_use_branching_1b
+  ; GCN: bb.0:
+  ; GCN-NEXT:   successors: %bb.2(0x80000000)
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT:   $vgpr1 = V_SQRT_F32_e32 $vgpr0, implicit $mode, implicit $exec
+  ; GCN-NEXT:   S_BRANCH %bb.2
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT: bb.1:
+  ; GCN-NEXT:   successors: %bb.2(0x80000000)
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT:   $vgpr2 = V_MOV_B32_e32 0, implicit $exec
+  ; GCN-NEXT:   $vgpr30 = V_MOV_B32_e32 0, implicit $exec
+  ; GCN-NEXT:   S_WAITCNT_DEPCTR 4095
+  ; GCN-NEXT:   S_BRANCH %bb.2
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT: bb.2:
+  ; GCN-NEXT:   $vgpr3 = V_MOV_B32_e32 0, implicit $exec
+  ; GCN-NEXT:   S_WAITCNT_DEPCTR 4095
+  ; GCN-NEXT:   $vgpr4 = V_ADD_F32_e32 $vgpr1, $vgpr2, implicit $mode, implicit $exec
+  ; GCN-NEXT:   S_ENDPGM 0
+  bb.0:
+    $vgpr1 = V_SQRT_F32_e32 $vgpr0, implicit $mode, implicit $exec
+    S_BRANCH %bb.2
+  bb.1:
+    $vgpr2 = V_MOV_B32_e32 0, implicit $exec
+    $vgpr30 = V_MOV_B32_e32 0, implicit $exec
+    S_WAITCNT_DEPCTR 4095
+    S_BRANCH %bb.2
+  bb.2:
+    $vgpr3 = V_MOV_B32_e32 0, implicit $exec
+    $vgpr4 = V_ADD_F32_e32 $vgpr1, $vgpr2, implicit $mode, implicit $exec
+    S_ENDPGM 0
+...
+
+---
+name:            trans_use_branching_1c_no_hazard_1
+body:            |
+  ; GCN-LABEL: name: trans_use_branching_1c_no_hazard_1
+  ; GCN: bb.0:
+  ; GCN-NEXT:   successors: %bb.2(0x80000000)
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT:   $vgpr1 = V_SQRT_F32_e32 $vgpr0, implicit $mode, implicit $exec
+  ; GCN-NEXT:   S_WAITCNT_DEPCTR 4095
+  ; GCN-NEXT:   S_BRANCH %bb.2
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT: bb.1:
+  ; GCN-NEXT:   successors: %bb.2(0x80000000)
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT:   $vgpr2 = V_MOV_B32_e32 0, implicit $exec
+  ; GCN-NEXT:   $vgpr30 = V_MOV_B32_e32 0, implicit $exec
+  ; GCN-NEXT:   S_BRANCH %bb.2
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT: bb.2:
+  ; GCN-NEXT:   $vgpr3 = V_MOV_B32_e32 0, implicit $exec
+  ; GCN-NEXT:   $vgpr4 = V_ADD_F32_e32 $vgpr1, $vgpr2, implicit $mode, implicit $exec
+  ; GCN-NEXT:   S_ENDPGM 0
+  bb.0:
+    $vgpr1 = V_SQRT_F32_e32 $vgpr0, implicit $mode, implicit $exec
+    S_WAITCNT_DEPCTR 4095
+    S_BRANCH %bb.2
+  bb.1:
+    $vgpr2 = V_MOV_B32_e32 0, implicit $exec
+    $vgpr30 = V_MOV_B32_e32 0, implicit $exec
+    S_BRANCH %bb.2
+  bb.2:
+    $vgpr3 = V_MOV_B32_e32 0, implicit $exec
+    $vgpr4 = V_ADD_F32_e32 $vgpr1, $vgpr2, implicit $mode, implicit $exec
+    S_ENDPGM 0
+...


        


More information about the llvm-commits mailing list