[llvm] 9dff14b - [AMDGPU] Add support for GFX11 hazards
Jay Foad via llvm-commits
llvm-commits at lists.llvm.org
Thu Jun 16 00:23:01 PDT 2022
Author: Jay Foad
Date: 2022-06-16T08:15:21+01:00
New Revision: 9dff14be9ed6cf8da651bd675a839cde0d4294a2
URL: https://github.com/llvm/llvm-project/commit/9dff14be9ed6cf8da651bd675a839cde0d4294a2
DIFF: https://github.com/llvm/llvm-project/commit/9dff14be9ed6cf8da651bd675a839cde0d4294a2.diff
LOG: [AMDGPU] Add support for GFX11 hazards
Add support for partial stall over EXEC hazard and trans use hazard.
Differential Revision: https://reviews.llvm.org/D127872
Added:
llvm/test/CodeGen/AMDGPU/partial-forwarding-hazards.mir
llvm/test/CodeGen/AMDGPU/trans-forwarding-hazards.mir
Modified:
llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h
llvm/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
index 0b5185ef4ba10..8adae23a43d5f 100644
--- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
@@ -424,8 +424,52 @@ void GCNHazardRecognizer::RecedeCycle() {
// Helper Functions
//===----------------------------------------------------------------------===//
+typedef enum { HazardFound, HazardExpired, NoHazardFound } HazardFnResult;
+
typedef function_ref<bool(const MachineInstr &, int WaitStates)> IsExpiredFn;
+// Search for a hazard in a block and its predecessors.
+template <typename StateT>
+static bool
+hasHazard(StateT State,
+ function_ref<HazardFnResult(StateT &, const MachineInstr &)> IsHazard,
+ function_ref<void(StateT &, const MachineInstr &)> UpdateState,
+ const MachineBasicBlock *MBB,
+ MachineBasicBlock::const_reverse_instr_iterator I,
+ DenseSet<const MachineBasicBlock *> &Visited) {
+ for (auto E = MBB->instr_rend(); I != E; ++I) {
+ // No need to look at parent BUNDLE instructions.
+ if (I->isBundle())
+ continue;
+
+ switch (IsHazard(State, *I)) {
+ case HazardFound:
+ return true;
+ case HazardExpired:
+ return false;
+ default:
+ // Continue search
+ break;
+ }
+
+ if (I->isInlineAsm() || I->isMetaInstruction())
+ continue;
+
+ UpdateState(State, *I);
+ }
+
+ for (MachineBasicBlock *Pred : MBB->predecessors()) {
+ if (!Visited.insert(Pred).second)
+ continue;
+
+ if (hasHazard(State, IsHazard, UpdateState, Pred, Pred->instr_rbegin(),
+ Visited))
+ return true;
+ }
+
+ return false;
+}
+
// Returns a minimum wait states since \p I walking all predecessors.
// Only scans until \p IsExpired does not return true.
// Can only be run in a hazard recognizer mode.
@@ -1031,6 +1075,8 @@ void GCNHazardRecognizer::fixHazards(MachineInstr *MI) {
fixSMEMtoVectorWriteHazards(MI);
fixVcmpxExecWARHazard(MI);
fixLdsBranchVmemWARHazard(MI);
+ fixVALUPartialForwardingHazard(MI);
+ fixVALUTransUseHazard(MI);
}
bool GCNHazardRecognizer::fixVcmpxPermlaneHazards(MachineInstr *MI) {
@@ -1320,6 +1366,233 @@ bool GCNHazardRecognizer::fixLdsBranchVmemWARHazard(MachineInstr *MI) {
return true;
}
+bool GCNHazardRecognizer::fixVALUPartialForwardingHazard(MachineInstr *MI) {
+ if (!ST.isWave64())
+ return false;
+ if (!ST.hasVALUPartialForwardingHazard())
+ return false;
+ if (!SIInstrInfo::isVALU(*MI))
+ return false;
+
+ SmallSetVector<Register, 4> SrcVGPRs;
+
+ for (const MachineOperand &Use : MI->explicit_uses()) {
+ if (Use.isReg() && TRI.isVGPR(MF.getRegInfo(), Use.getReg()))
+ SrcVGPRs.insert(Use.getReg());
+ }
+
+ // Only applies with >= 2 unique VGPR sources
+ if (SrcVGPRs.size() <= 1)
+ return false;
+
+ // Look for the following pattern:
+ // Va <- VALU [PreExecPos]
+ // intv1
+ // Exec <- SALU [ExecPos]
+ // intv2
+ // Vb <- VALU [PostExecPos]
+ // intv3
+ // MI Va, Vb (WaitState = 0)
+ //
+ // Where:
+ // intv1 + intv2 <= 2 VALUs
+ // intv3 <= 4 VALUs
+ //
+ // If found, insert an appropriate S_WAITCNT_DEPCTR before MI.
+
+ const int Intv1plus2MaxVALUs = 2;
+ const int Intv3MaxVALUs = 4;
+ const int IntvMaxVALUs = 6;
+ const int NoHazardVALUWaitStates = IntvMaxVALUs + 2;
+
+ struct StateType {
+ SmallDenseMap<Register, int, 4> DefPos;
+ int ExecPos = std::numeric_limits<int>::max();
+ int VALUs = 0;
+ };
+
+ StateType State;
+
+ // This overloads expiry testing with all the hazard detection
+ auto IsHazardFn = [&, this](StateType &State, const MachineInstr &I) {
+ // Too many VALU states have passed
+ if (State.VALUs > NoHazardVALUWaitStates)
+ return HazardExpired;
+
+ // Instructions which cause va_vdst==0 expire hazard
+ if (SIInstrInfo::isVMEM(I) || SIInstrInfo::isFLAT(I) ||
+ SIInstrInfo::isDS(I) || SIInstrInfo::isEXP(I) ||
+ (I.getOpcode() == AMDGPU::S_WAITCNT_DEPCTR &&
+ I.getOperand(0).getImm() == 0x0fff))
+ return HazardExpired;
+
+ // Track registers writes
+ bool Changed = false;
+ if (SIInstrInfo::isVALU(I)) {
+ for (Register Src : SrcVGPRs) {
+ if (!State.DefPos.count(Src) && I.modifiesRegister(Src, &TRI)) {
+ State.DefPos[Src] = State.VALUs;
+ Changed = true;
+ }
+ }
+ } else if (SIInstrInfo::isSALU(I)) {
+ if (State.ExecPos == std::numeric_limits<int>::max()) {
+ if (!State.DefPos.empty() && I.modifiesRegister(AMDGPU::EXEC, &TRI)) {
+ State.ExecPos = State.VALUs;
+ Changed = true;
+ }
+ }
+ }
+
+ // Early expiration: too many VALUs in intv3
+ if (State.VALUs > Intv3MaxVALUs && State.DefPos.empty())
+ return HazardExpired;
+
+ // Only evaluate state if something changed
+ if (!Changed)
+ return NoHazardFound;
+
+ // Determine positions of VALUs pre/post exec change
+ if (State.ExecPos == std::numeric_limits<int>::max())
+ return NoHazardFound;
+
+ int PreExecPos = std::numeric_limits<int>::max();
+ int PostExecPos = std::numeric_limits<int>::max();
+
+ for (auto Entry : State.DefPos) {
+ int DefVALUs = Entry.second;
+ if (DefVALUs != std::numeric_limits<int>::max()) {
+ if (DefVALUs >= State.ExecPos)
+ PreExecPos = std::min(PreExecPos, DefVALUs);
+ else if (DefVALUs < State.ExecPos)
+ PostExecPos = std::min(PostExecPos, DefVALUs);
+ }
+ }
+
+ // Need a VALUs post exec change
+ if (PostExecPos == std::numeric_limits<int>::max())
+ return NoHazardFound;
+
+ // Too many VALUs in intv3?
+ int Intv3VALUs = PostExecPos;
+ if (Intv3VALUs > Intv3MaxVALUs)
+ return HazardExpired;
+
+ // Too many VALUs in intv2?
+ int Intv2VALUs = (State.ExecPos - PostExecPos) - 1;
+ if (Intv2VALUs > Intv1plus2MaxVALUs)
+ return HazardExpired;
+
+ // Need a VALUs pre exec change
+ if (PreExecPos == std::numeric_limits<int>::max())
+ return NoHazardFound;
+
+ // Too many VALUs in intv1?
+ int Intv1VALUs = PreExecPos - State.ExecPos;
+ if (Intv1VALUs > Intv1plus2MaxVALUs)
+ return HazardExpired;
+
+ // Too many VALUs in intv1 + intv2
+ if (Intv1VALUs + Intv2VALUs > Intv1plus2MaxVALUs)
+ return HazardExpired;
+
+ return HazardFound;
+ };
+ auto UpdateStateFn = [](StateType &State, const MachineInstr &MI) {
+ if (SIInstrInfo::isVALU(MI))
+ State.VALUs += 1;
+ };
+
+ DenseSet<const MachineBasicBlock *> Visited;
+ if (!hasHazard<StateType>(State, IsHazardFn, UpdateStateFn, MI->getParent(),
+ std::next(MI->getReverseIterator()), Visited))
+ return false;
+
+ BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
+ TII.get(AMDGPU::S_WAITCNT_DEPCTR))
+ .addImm(0x0fff);
+
+ return true;
+}
+
+bool GCNHazardRecognizer::fixVALUTransUseHazard(MachineInstr *MI) {
+ if (!ST.hasVALUTransUseHazard())
+ return false;
+ if (!SIInstrInfo::isVALU(*MI))
+ return false;
+
+ SmallSet<Register, 4> SrcVGPRs;
+
+ for (const MachineOperand &Use : MI->explicit_uses()) {
+ if (Use.isReg() && TRI.isVGPR(MF.getRegInfo(), Use.getReg()))
+ SrcVGPRs.insert(Use.getReg());
+ }
+
+ // Look for the following pattern:
+ // Va <- TRANS VALU
+ // intv
+ // MI Va (WaitState = 0)
+ //
+ // Where:
+ // intv <= 5 VALUs / 1 TRANS
+ //
+ // If found, insert an appropriate S_WAITCNT_DEPCTR before MI.
+
+ const int IntvMaxVALUs = 5;
+ const int IntvMaxTRANS = 1;
+
+ struct StateType {
+ int VALUs = 0;
+ int TRANS = 0;
+ };
+
+ StateType State;
+
+ // This overloads expiry testing with all the hazard detection
+ auto IsHazardFn = [&, this](StateType &State, const MachineInstr &I) {
+ // Too many VALU states have passed
+ if (State.VALUs > IntvMaxVALUs || State.TRANS > IntvMaxTRANS)
+ return HazardExpired;
+
+ // Instructions which cause va_vdst==0 expire hazard
+ if (SIInstrInfo::isVMEM(I) || SIInstrInfo::isFLAT(I) ||
+ SIInstrInfo::isDS(I) || SIInstrInfo::isEXP(I) ||
+ (I.getOpcode() == AMDGPU::S_WAITCNT_DEPCTR &&
+ I.getOperand(0).getImm() == 0x0fff))
+ return HazardExpired;
+
+ // Track registers writes
+ if (SIInstrInfo::isTRANS(I)) {
+ for (Register Src : SrcVGPRs) {
+ if (I.modifiesRegister(Src, &TRI)) {
+ return HazardFound;
+ }
+ }
+ }
+
+ return NoHazardFound;
+ };
+ auto UpdateStateFn = [](StateType &State, const MachineInstr &MI) {
+ if (SIInstrInfo::isVALU(MI))
+ State.VALUs += 1;
+ if (SIInstrInfo::isTRANS(MI))
+ State.TRANS += 1;
+ };
+
+ DenseSet<const MachineBasicBlock *> Visited;
+ if (!hasHazard<StateType>(State, IsHazardFn, UpdateStateFn, MI->getParent(),
+ std::next(MI->getReverseIterator()), Visited))
+ return false;
+
+ // Hazard is observed - insert a wait on va_dst counter to ensure hazard is
+ // avoided (mask 0x0fff achieves this).
+ BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
+ TII.get(AMDGPU::S_WAITCNT_DEPCTR))
+ .addImm(0x0fff);
+
+ return true;
+}
+
int GCNHazardRecognizer::checkNSAtoVMEMHazard(MachineInstr *MI) {
int NSAtoVMEMWaitStates = 1;
diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h
index 5700cd80d5666..1e12e0820f2ef 100644
--- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h
+++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h
@@ -96,6 +96,8 @@ class GCNHazardRecognizer final : public ScheduleHazardRecognizer {
bool fixSMEMtoVectorWriteHazards(MachineInstr *MI);
bool fixVcmpxExecWARHazard(MachineInstr *MI);
bool fixLdsBranchVmemWARHazard(MachineInstr *MI);
+ bool fixVALUPartialForwardingHazard(MachineInstr *MI);
+ bool fixVALUTransUseHazard(MachineInstr *MI);
int checkMAIHazards(MachineInstr *MI);
int checkMAIHazards908(MachineInstr *MI);
diff --git a/llvm/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll b/llvm/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll
index 8fc3d04e4248a..03f6a5a57a18a 100644
--- a/llvm/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll
+++ b/llvm/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll
@@ -3117,6 +3117,7 @@ define amdgpu_kernel void @sub_i64_uniform(i64 addrspace(1)* %out, i64 %subitive
; GFX1164-NEXT: v_mad_u64_u32 v[3:4], null, s2, v2, 0
; GFX1164-NEXT: v_readfirstlane_b32 s2, v0
; GFX1164-NEXT: v_readfirstlane_b32 s4, v1
+; GFX1164-NEXT: s_waitcnt_depctr 0xfff
; GFX1164-NEXT: v_mad_u64_u32 v[5:6], null, s3, v2, v[4:5]
; GFX1164-NEXT: v_sub_co_u32 v0, vcc, s2, v3
; GFX1164-NEXT: s_mov_b32 s3, 0x31016000
diff --git a/llvm/test/CodeGen/AMDGPU/partial-forwarding-hazards.mir b/llvm/test/CodeGen/AMDGPU/partial-forwarding-hazards.mir
new file mode 100644
index 0000000000000..5aff87f5cb213
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/partial-forwarding-hazards.mir
@@ -0,0 +1,399 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -march=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 -verify-machineinstrs -run-pass post-RA-hazard-rec -o - %s | FileCheck -check-prefix=GCN %s
+
+---
+name: partial_forwarding_1_hazard
+body: |
+ bb.0:
+ ; GCN-LABEL: name: partial_forwarding_1_hazard
+ ; GCN: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+ ; GCN: $exec = S_MOV_B64 -1
+ ; GCN: $vgpr1 = V_MOV_B32_e32 0, implicit $exec
+ ; GCN: S_WAITCNT_DEPCTR 4095
+ ; GCN: $vgpr2 = V_ADD_F32_e32 $vgpr0, $vgpr1, implicit $mode, implicit $exec
+ ; GCN: S_ENDPGM 0
+ $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+ $exec = S_MOV_B64 -1
+ $vgpr1 = V_MOV_B32_e32 0, implicit $exec
+ $vgpr2 = V_ADD_F32_e32 $vgpr0, $vgpr1, implicit $mode, implicit $exec
+ S_ENDPGM 0
+...
+
+---
+name: partial_forwarding_2_hazard
+body: |
+ bb.0:
+ ; GCN-LABEL: name: partial_forwarding_2_hazard
+ ; GCN: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+ ; GCN: $sgpr0 = S_MOV_B32 0
+ ; GCN: $sgpr1 = S_MOV_B32 0
+ ; GCN: $sgpr2 = S_MOV_B32 0
+ ; GCN: $exec = S_MOV_B64 -1
+ ; GCN: $sgpr3 = S_MOV_B32 0
+ ; GCN: $sgpr4 = S_MOV_B32 0
+ ; GCN: $sgpr5 = S_MOV_B32 0
+ ; GCN: $vgpr1 = V_MOV_B32_e32 0, implicit $exec
+ ; GCN: $sgpr6 = S_MOV_B32 0
+ ; GCN: $sgpr7 = S_MOV_B32 0
+ ; GCN: $sgpr8 = S_MOV_B32 0
+ ; GCN: $sgpr9 = S_MOV_B32 0
+ ; GCN: $sgpr10 = S_MOV_B32 0
+ ; GCN: S_WAITCNT_DEPCTR 4095
+ ; GCN: $vgpr2 = V_ADD_F32_e32 $vgpr0, $vgpr1, implicit $mode, implicit $exec
+ ; GCN: S_ENDPGM 0
+ $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+ $sgpr0 = S_MOV_B32 0
+ $sgpr1 = S_MOV_B32 0
+ $sgpr2 = S_MOV_B32 0
+ $exec = S_MOV_B64 -1
+ $sgpr3 = S_MOV_B32 0
+ $sgpr4 = S_MOV_B32 0
+ $sgpr5 = S_MOV_B32 0
+ $vgpr1 = V_MOV_B32_e32 0, implicit $exec
+ $sgpr6 = S_MOV_B32 0
+ $sgpr7 = S_MOV_B32 0
+ $sgpr8 = S_MOV_B32 0
+ $sgpr9 = S_MOV_B32 0
+ $sgpr10 = S_MOV_B32 0
+ $vgpr2 = V_ADD_F32_e32 $vgpr0, $vgpr1, implicit $mode, implicit $exec
+ S_ENDPGM 0
+...
+
+---
+name: partial_forwarding_3_hazard
+body: |
+ bb.0:
+ ; GCN-LABEL: name: partial_forwarding_3_hazard
+ ; GCN: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+ ; GCN: $vgpr10 = V_MOV_B32_e32 0, implicit $exec
+ ; GCN: $exec = S_MOV_B64 -1
+ ; GCN: $vgpr11 = V_MOV_B32_e32 0, implicit $exec
+ ; GCN: $vgpr1 = V_MOV_B32_e32 0, implicit $exec
+ ; GCN: $vgpr12 = V_MOV_B32_e32 0, implicit $exec
+ ; GCN: $vgpr13 = V_MOV_B32_e32 0, implicit $exec
+ ; GCN: $vgpr14 = V_MOV_B32_e32 0, implicit $exec
+ ; GCN: $vgpr15 = V_MOV_B32_e32 0, implicit $exec
+ ; GCN: S_WAITCNT_DEPCTR 4095
+ ; GCN: $vgpr2 = V_ADD_F32_e32 $vgpr0, $vgpr1, implicit $mode, implicit $exec
+ ; GCN: S_ENDPGM 0
+ $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+ $vgpr10 = V_MOV_B32_e32 0, implicit $exec
+ $exec = S_MOV_B64 -1
+ $vgpr11 = V_MOV_B32_e32 0, implicit $exec
+ $vgpr1 = V_MOV_B32_e32 0, implicit $exec
+ $vgpr12 = V_MOV_B32_e32 0, implicit $exec
+ $vgpr13 = V_MOV_B32_e32 0, implicit $exec
+ $vgpr14 = V_MOV_B32_e32 0, implicit $exec
+ $vgpr15 = V_MOV_B32_e32 0, implicit $exec
+ $vgpr2 = V_ADD_F32_e32 $vgpr0, $vgpr1, implicit $mode, implicit $exec
+ S_ENDPGM 0
+...
+
+---
+name: partial_forwarding_3_no_hazard_1
+body: |
+ bb.0:
+ ; GCN-LABEL: name: partial_forwarding_3_no_hazard_1
+ ; GCN: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+ ; GCN: $vgpr10 = V_MOV_B32_e32 0, implicit $exec
+ ; GCN: $vgpr20 = V_MOV_B32_e32 0, implicit $exec
+ ; GCN: $exec = S_MOV_B64 -1
+ ; GCN: $vgpr11 = V_MOV_B32_e32 0, implicit $exec
+ ; GCN: $vgpr1 = V_MOV_B32_e32 0, implicit $exec
+ ; GCN: $vgpr12 = V_MOV_B32_e32 0, implicit $exec
+ ; GCN: $vgpr13 = V_MOV_B32_e32 0, implicit $exec
+ ; GCN: $vgpr14 = V_MOV_B32_e32 0, implicit $exec
+ ; GCN: $vgpr15 = V_MOV_B32_e32 0, implicit $exec
+ ; GCN: $vgpr2 = V_ADD_F32_e32 $vgpr0, $vgpr1, implicit $mode, implicit $exec
+ ; GCN: S_ENDPGM 0
+ $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+ $vgpr10 = V_MOV_B32_e32 0, implicit $exec
+ $vgpr20 = V_MOV_B32_e32 0, implicit $exec
+ $exec = S_MOV_B64 -1
+ $vgpr11 = V_MOV_B32_e32 0, implicit $exec
+ $vgpr1 = V_MOV_B32_e32 0, implicit $exec
+ $vgpr12 = V_MOV_B32_e32 0, implicit $exec
+ $vgpr13 = V_MOV_B32_e32 0, implicit $exec
+ $vgpr14 = V_MOV_B32_e32 0, implicit $exec
+ $vgpr15 = V_MOV_B32_e32 0, implicit $exec
+ $vgpr2 = V_ADD_F32_e32 $vgpr0, $vgpr1, implicit $mode, implicit $exec
+ S_ENDPGM 0
+...
+
+---
+name: partial_forwarding_3_no_hazard_2
+body: |
+ bb.0:
+ ; GCN-LABEL: name: partial_forwarding_3_no_hazard_2
+ ; GCN: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+ ; GCN: $vgpr10 = V_MOV_B32_e32 0, implicit $exec
+ ; GCN: $exec = S_MOV_B64 -1
+ ; GCN: $vgpr11 = V_MOV_B32_e32 0, implicit $exec
+ ; GCN: $vgpr20 = V_MOV_B32_e32 0, implicit $exec
+ ; GCN: $vgpr1 = V_MOV_B32_e32 0, implicit $exec
+ ; GCN: $vgpr12 = V_MOV_B32_e32 0, implicit $exec
+ ; GCN: $vgpr13 = V_MOV_B32_e32 0, implicit $exec
+ ; GCN: $vgpr14 = V_MOV_B32_e32 0, implicit $exec
+ ; GCN: $vgpr15 = V_MOV_B32_e32 0, implicit $exec
+ ; GCN: $vgpr2 = V_ADD_F32_e32 $vgpr0, $vgpr1, implicit $mode, implicit $exec
+ ; GCN: S_ENDPGM 0
+ $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+ $vgpr10 = V_MOV_B32_e32 0, implicit $exec
+ $exec = S_MOV_B64 -1
+ $vgpr11 = V_MOV_B32_e32 0, implicit $exec
+ $vgpr20 = V_MOV_B32_e32 0, implicit $exec
+ $vgpr1 = V_MOV_B32_e32 0, implicit $exec
+ $vgpr12 = V_MOV_B32_e32 0, implicit $exec
+ $vgpr13 = V_MOV_B32_e32 0, implicit $exec
+ $vgpr14 = V_MOV_B32_e32 0, implicit $exec
+ $vgpr15 = V_MOV_B32_e32 0, implicit $exec
+ $vgpr2 = V_ADD_F32_e32 $vgpr0, $vgpr1, implicit $mode, implicit $exec
+ S_ENDPGM 0
+...
+
+---
+name: partial_forwarding_3_no_hazard_3
+body: |
+ bb.0:
+ ; GCN-LABEL: name: partial_forwarding_3_no_hazard_3
+ ; GCN: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+ ; GCN: $vgpr10 = V_MOV_B32_e32 0, implicit $exec
+ ; GCN: $exec = S_MOV_B64 -1
+ ; GCN: $vgpr11 = V_MOV_B32_e32 0, implicit $exec
+ ; GCN: $vgpr1 = V_MOV_B32_e32 0, implicit $exec
+ ; GCN: $vgpr12 = V_MOV_B32_e32 0, implicit $exec
+ ; GCN: $vgpr13 = V_MOV_B32_e32 0, implicit $exec
+ ; GCN: $vgpr14 = V_MOV_B32_e32 0, implicit $exec
+ ; GCN: $vgpr15 = V_MOV_B32_e32 0, implicit $exec
+ ; GCN: $vgpr20 = V_MOV_B32_e32 0, implicit $exec
+ ; GCN: $vgpr2 = V_ADD_F32_e32 $vgpr0, $vgpr1, implicit $mode, implicit $exec
+ ; GCN: S_ENDPGM 0
+ $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+ $vgpr10 = V_MOV_B32_e32 0, implicit $exec
+ $exec = S_MOV_B64 -1
+ $vgpr11 = V_MOV_B32_e32 0, implicit $exec
+ $vgpr1 = V_MOV_B32_e32 0, implicit $exec
+ $vgpr12 = V_MOV_B32_e32 0, implicit $exec
+ $vgpr13 = V_MOV_B32_e32 0, implicit $exec
+ $vgpr14 = V_MOV_B32_e32 0, implicit $exec
+ $vgpr15 = V_MOV_B32_e32 0, implicit $exec
+ $vgpr20 = V_MOV_B32_e32 0, implicit $exec
+ $vgpr2 = V_ADD_F32_e32 $vgpr0, $vgpr1, implicit $mode, implicit $exec
+ S_ENDPGM 0
+...
+
+---
+name: partial_forwarding_4_hazard
+body: |
+ bb.0:
+ ; GCN-LABEL: name: partial_forwarding_4_hazard
+ ; GCN: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+ ; GCN: $exec = S_MOV_B64 -1
+ ; GCN: $vgpr10 = V_MOV_B32_e32 0, implicit $exec
+ ; GCN: $vgpr11 = V_MOV_B32_e32 0, implicit $exec
+ ; GCN: $vgpr1 = V_MOV_B32_e32 0, implicit $exec
+ ; GCN: $vgpr12 = V_MOV_B32_e32 0, implicit $exec
+ ; GCN: $vgpr13 = V_MOV_B32_e32 0, implicit $exec
+ ; GCN: $vgpr14 = V_MOV_B32_e32 0, implicit $exec
+ ; GCN: $vgpr15 = V_MOV_B32_e32 0, implicit $exec
+ ; GCN: S_WAITCNT_DEPCTR 4095
+ ; GCN: $vgpr2 = V_ADD_F32_e32 $vgpr0, $vgpr1, implicit $mode, implicit $exec
+ ; GCN: S_ENDPGM 0
+ $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+ $exec = S_MOV_B64 -1
+ $vgpr10 = V_MOV_B32_e32 0, implicit $exec
+ $vgpr11 = V_MOV_B32_e32 0, implicit $exec
+ $vgpr1 = V_MOV_B32_e32 0, implicit $exec
+ $vgpr12 = V_MOV_B32_e32 0, implicit $exec
+ $vgpr13 = V_MOV_B32_e32 0, implicit $exec
+ $vgpr14 = V_MOV_B32_e32 0, implicit $exec
+ $vgpr15 = V_MOV_B32_e32 0, implicit $exec
+ $vgpr2 = V_ADD_F32_e32 $vgpr0, $vgpr1, implicit $mode, implicit $exec
+ S_ENDPGM 0
+...
+
+---
+name: partial_forwarding_4_no_hazard
+body: |
+ bb.0:
+ ; GCN-LABEL: name: partial_forwarding_4_no_hazard
+ ; GCN: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+ ; GCN: $exec = S_MOV_B64 -1
+ ; GCN: $vgpr10 = V_MOV_B32_e32 0, implicit $exec
+ ; GCN: $vgpr11 = V_MOV_B32_e32 0, implicit $exec
+ ; GCN: $vgpr21 = V_MOV_B32_e32 0, implicit $exec
+ ; GCN: $vgpr1 = V_MOV_B32_e32 0, implicit $exec
+ ; GCN: $vgpr12 = V_MOV_B32_e32 0, implicit $exec
+ ; GCN: $vgpr13 = V_MOV_B32_e32 0, implicit $exec
+ ; GCN: $vgpr14 = V_MOV_B32_e32 0, implicit $exec
+ ; GCN: $vgpr15 = V_MOV_B32_e32 0, implicit $exec
+ ; GCN: $vgpr2 = V_ADD_F32_e32 $vgpr0, $vgpr1, implicit $mode, implicit $exec
+ ; GCN: S_ENDPGM 0
+ $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+ $exec = S_MOV_B64 -1
+ $vgpr10 = V_MOV_B32_e32 0, implicit $exec
+ $vgpr11 = V_MOV_B32_e32 0, implicit $exec
+ $vgpr21 = V_MOV_B32_e32 0, implicit $exec
+ $vgpr1 = V_MOV_B32_e32 0, implicit $exec
+ $vgpr12 = V_MOV_B32_e32 0, implicit $exec
+ $vgpr13 = V_MOV_B32_e32 0, implicit $exec
+ $vgpr14 = V_MOV_B32_e32 0, implicit $exec
+ $vgpr15 = V_MOV_B32_e32 0, implicit $exec
+ $vgpr2 = V_ADD_F32_e32 $vgpr0, $vgpr1, implicit $mode, implicit $exec
+ S_ENDPGM 0
+...
+
+---
+name: partial_forwarding_5_hazard
+body: |
+ bb.0:
+ ; GCN-LABEL: name: partial_forwarding_5_hazard
+ ; GCN: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+ ; GCN: $vgpr10 = V_MOV_B32_e32 0, implicit $exec
+ ; GCN: $vgpr11 = V_MOV_B32_e32 0, implicit $exec
+ ; GCN: $exec = S_MOV_B64 -1
+ ; GCN: $vgpr1 = V_MOV_B32_e32 0, implicit $exec
+ ; GCN: $vgpr12 = V_MOV_B32_e32 0, implicit $exec
+ ; GCN: $vgpr13 = V_MOV_B32_e32 0, implicit $exec
+ ; GCN: $vgpr14 = V_MOV_B32_e32 0, implicit $exec
+ ; GCN: $vgpr15 = V_MOV_B32_e32 0, implicit $exec
+ ; GCN: S_WAITCNT_DEPCTR 4095
+ ; GCN: $vgpr2 = V_ADD_F32_e32 $vgpr0, $vgpr1, implicit $mode, implicit $exec
+ ; GCN: S_ENDPGM 0
+ $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+ $vgpr10 = V_MOV_B32_e32 0, implicit $exec
+ $vgpr11 = V_MOV_B32_e32 0, implicit $exec
+ $exec = S_MOV_B64 -1
+ $vgpr1 = V_MOV_B32_e32 0, implicit $exec
+ $vgpr12 = V_MOV_B32_e32 0, implicit $exec
+ $vgpr13 = V_MOV_B32_e32 0, implicit $exec
+ $vgpr14 = V_MOV_B32_e32 0, implicit $exec
+ $vgpr15 = V_MOV_B32_e32 0, implicit $exec
+ $vgpr2 = V_ADD_F32_e32 $vgpr0, $vgpr1, implicit $mode, implicit $exec
+ S_ENDPGM 0
+...
+
+---
+name: partial_forwarding_5_no_hazard
+body: |
+ bb.0:
+ ; GCN-LABEL: name: partial_forwarding_5_no_hazard
+ ; GCN: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+ ; GCN: $vgpr10 = V_MOV_B32_e32 0, implicit $exec
+ ; GCN: $vgpr11 = V_MOV_B32_e32 0, implicit $exec
+ ; GCN: $vgpr21 = V_MOV_B32_e32 0, implicit $exec
+ ; GCN: $exec = S_MOV_B64 -1
+ ; GCN: $vgpr1 = V_MOV_B32_e32 0, implicit $exec
+ ; GCN: $vgpr12 = V_MOV_B32_e32 0, implicit $exec
+ ; GCN: $vgpr13 = V_MOV_B32_e32 0, implicit $exec
+ ; GCN: $vgpr14 = V_MOV_B32_e32 0, implicit $exec
+ ; GCN: $vgpr15 = V_MOV_B32_e32 0, implicit $exec
+ ; GCN: $vgpr2 = V_ADD_F32_e32 $vgpr0, $vgpr1, implicit $mode, implicit $exec
+ ; GCN: S_ENDPGM 0
+ $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+ $vgpr10 = V_MOV_B32_e32 0, implicit $exec
+ $vgpr11 = V_MOV_B32_e32 0, implicit $exec
+ $vgpr21 = V_MOV_B32_e32 0, implicit $exec
+ $exec = S_MOV_B64 -1
+ $vgpr1 = V_MOV_B32_e32 0, implicit $exec
+ $vgpr12 = V_MOV_B32_e32 0, implicit $exec
+ $vgpr13 = V_MOV_B32_e32 0, implicit $exec
+ $vgpr14 = V_MOV_B32_e32 0, implicit $exec
+ $vgpr15 = V_MOV_B32_e32 0, implicit $exec
+ $vgpr2 = V_ADD_F32_e32 $vgpr0, $vgpr1, implicit $mode, implicit $exec
+ S_ENDPGM 0
+...
+
+---
+name: partial_forwarding_branching_1a
+body: |
+ ; GCN-LABEL: name: partial_forwarding_branching_1a
+ ; GCN: bb.0:
+ ; GCN: successors: %bb.2(0x80000000)
+ ; GCN: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+ ; GCN: $exec = S_MOV_B64 -1
+ ; GCN: S_BRANCH %bb.2
+ ; GCN: bb.1:
+ ; GCN: successors: %bb.2(0x80000000)
+ ; GCN: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+ ; GCN: $vgpr30 = V_MOV_B32_e32 0, implicit $exec
+ ; GCN: $vgpr31 = V_MOV_B32_e32 0, implicit $exec
+ ; GCN: S_BRANCH %bb.2
+ ; GCN: bb.2:
+ ; GCN: $vgpr10 = V_MOV_B32_e32 0, implicit $exec
+ ; GCN: $vgpr11 = V_MOV_B32_e32 0, implicit $exec
+ ; GCN: $vgpr1 = V_MOV_B32_e32 0, implicit $exec
+ ; GCN: $vgpr12 = V_MOV_B32_e32 0, implicit $exec
+ ; GCN: $vgpr13 = V_MOV_B32_e32 0, implicit $exec
+ ; GCN: $vgpr14 = V_MOV_B32_e32 0, implicit $exec
+ ; GCN: $vgpr15 = V_MOV_B32_e32 0, implicit $exec
+ ; GCN: S_WAITCNT_DEPCTR 4095
+ ; GCN: $vgpr2 = V_ADD_F32_e32 $vgpr0, $vgpr1, implicit $mode, implicit $exec
+ ; GCN: S_ENDPGM 0
+ bb.0:
+ $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+ $exec = S_MOV_B64 -1
+ S_BRANCH %bb.2
+ bb.1:
+ $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+ $vgpr30 = V_MOV_B32_e32 0, implicit $exec
+ $vgpr31 = V_MOV_B32_e32 0, implicit $exec
+ S_BRANCH %bb.2
+ bb.2:
+ $vgpr10 = V_MOV_B32_e32 0, implicit $exec
+ $vgpr11 = V_MOV_B32_e32 0, implicit $exec
+ $vgpr1 = V_MOV_B32_e32 0, implicit $exec
+ $vgpr12 = V_MOV_B32_e32 0, implicit $exec
+ $vgpr13 = V_MOV_B32_e32 0, implicit $exec
+ $vgpr14 = V_MOV_B32_e32 0, implicit $exec
+ $vgpr15 = V_MOV_B32_e32 0, implicit $exec
+ $vgpr2 = V_ADD_F32_e32 $vgpr0, $vgpr1, implicit $mode, implicit $exec
+ S_ENDPGM 0
+...
+
+---
+name: partial_forwarding_branching_1b
+body: |
+ ; GCN-LABEL: name: partial_forwarding_branching_1b
+ ; GCN: bb.0:
+ ; GCN: successors: %bb.2(0x80000000)
+ ; GCN: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+ ; GCN: $vgpr30 = V_MOV_B32_e32 0, implicit $exec
+ ; GCN: $vgpr31 = V_MOV_B32_e32 0, implicit $exec
+ ; GCN: S_BRANCH %bb.2
+ ; GCN: bb.1:
+ ; GCN: successors: %bb.2(0x80000000)
+ ; GCN: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+ ; GCN: $exec = S_MOV_B64 -1
+ ; GCN: S_BRANCH %bb.2
+ ; GCN: bb.2:
+ ; GCN: $vgpr10 = V_MOV_B32_e32 0, implicit $exec
+ ; GCN: $vgpr11 = V_MOV_B32_e32 0, implicit $exec
+ ; GCN: $vgpr1 = V_MOV_B32_e32 0, implicit $exec
+ ; GCN: $vgpr12 = V_MOV_B32_e32 0, implicit $exec
+ ; GCN: $vgpr13 = V_MOV_B32_e32 0, implicit $exec
+ ; GCN: $vgpr14 = V_MOV_B32_e32 0, implicit $exec
+ ; GCN: $vgpr15 = V_MOV_B32_e32 0, implicit $exec
+ ; GCN: S_WAITCNT_DEPCTR 4095
+ ; GCN: $vgpr2 = V_ADD_F32_e32 $vgpr0, $vgpr1, implicit $mode, implicit $exec
+ ; GCN: S_ENDPGM 0
+ bb.0:
+ $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+ $vgpr30 = V_MOV_B32_e32 0, implicit $exec
+ $vgpr31 = V_MOV_B32_e32 0, implicit $exec
+ S_BRANCH %bb.2
+ bb.1:
+ $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+ $exec = S_MOV_B64 -1
+ S_BRANCH %bb.2
+ bb.2:
+ $vgpr10 = V_MOV_B32_e32 0, implicit $exec
+ $vgpr11 = V_MOV_B32_e32 0, implicit $exec
+ $vgpr1 = V_MOV_B32_e32 0, implicit $exec
+ $vgpr12 = V_MOV_B32_e32 0, implicit $exec
+ $vgpr13 = V_MOV_B32_e32 0, implicit $exec
+ $vgpr14 = V_MOV_B32_e32 0, implicit $exec
+ $vgpr15 = V_MOV_B32_e32 0, implicit $exec
+ $vgpr2 = V_ADD_F32_e32 $vgpr0, $vgpr1, implicit $mode, implicit $exec
+ S_ENDPGM 0
+...
diff --git a/llvm/test/CodeGen/AMDGPU/trans-forwarding-hazards.mir b/llvm/test/CodeGen/AMDGPU/trans-forwarding-hazards.mir
new file mode 100644
index 0000000000000..26f2b126d0f94
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/trans-forwarding-hazards.mir
@@ -0,0 +1,334 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -march=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32 -verify-machineinstrs -run-pass post-RA-hazard-rec -o - %s | FileCheck -check-prefix=GCN %s
+
+---
+name: trans_use_1_hazard
+body: |
+ bb.0:
+ ; GCN-LABEL: name: trans_use_1_hazard
+ ; GCN: $vgpr1 = V_SQRT_F32_e32 $vgpr0, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr2 = V_MOV_B32_e32 0, implicit $exec
+ ; GCN-NEXT: S_WAITCNT_DEPCTR 4095
+ ; GCN-NEXT: $vgpr3 = V_ADD_F32_e32 $vgpr1, $vgpr2, implicit $mode, implicit $exec
+ ; GCN-NEXT: S_ENDPGM 0
+ $vgpr1 = V_SQRT_F32_e32 $vgpr0, implicit $mode, implicit $exec
+ $vgpr2 = V_MOV_B32_e32 0, implicit $exec
+ $vgpr3 = V_ADD_F32_e32 $vgpr1, $vgpr2, implicit $mode, implicit $exec
+ S_ENDPGM 0
+...
+
+---
+name: trans_use_1_no_hazard_1
+body: |
+ bb.0:
+ ; GCN-LABEL: name: trans_use_1_no_hazard_1
+ ; GCN: $vgpr1 = V_SQRT_F32_e32 $vgpr0, implicit $mode, implicit $exec
+ ; GCN-NEXT: S_WAITCNT_DEPCTR 4095
+ ; GCN-NEXT: $vgpr2 = V_MOV_B32_e32 0, implicit $exec
+ ; GCN-NEXT: $vgpr3 = V_ADD_F32_e32 $vgpr1, $vgpr2, implicit $mode, implicit $exec
+ ; GCN-NEXT: S_ENDPGM 0
+ $vgpr1 = V_SQRT_F32_e32 $vgpr0, implicit $mode, implicit $exec
+ S_WAITCNT_DEPCTR 4095
+ $vgpr2 = V_MOV_B32_e32 0, implicit $exec
+ $vgpr3 = V_ADD_F32_e32 $vgpr1, $vgpr2, implicit $mode, implicit $exec
+ S_ENDPGM 0
+...
+
+---
+name: trans_use_2_hazard
+body: |
+ bb.0:
+ ; GCN-LABEL: name: trans_use_2_hazard
+ ; GCN: $vgpr1 = V_SQRT_F32_e32 $vgpr0, implicit $mode, implicit $exec
+ ; GCN-NEXT: $sgpr0 = S_MOV_B32 0
+ ; GCN-NEXT: $sgpr1 = S_MOV_B32 0
+ ; GCN-NEXT: $sgpr2 = S_MOV_B32 0
+ ; GCN-NEXT: $sgpr3 = S_MOV_B32 0
+ ; GCN-NEXT: $sgpr4 = S_MOV_B32 0
+ ; GCN-NEXT: $sgpr5 = S_MOV_B32 0
+ ; GCN-NEXT: $vgpr2 = V_MOV_B32_e32 0, implicit $exec
+ ; GCN-NEXT: $sgpr6 = S_MOV_B32 0
+ ; GCN-NEXT: $sgpr7 = S_MOV_B32 0
+ ; GCN-NEXT: $sgpr8 = S_MOV_B32 0
+ ; GCN-NEXT: $sgpr9 = S_MOV_B32 0
+ ; GCN-NEXT: $sgpr10 = S_MOV_B32 0
+ ; GCN-NEXT: S_WAITCNT_DEPCTR 4095
+ ; GCN-NEXT: $vgpr3 = V_ADD_F32_e32 $vgpr1, $vgpr2, implicit $mode, implicit $exec
+ ; GCN-NEXT: S_ENDPGM 0
+ $vgpr1 = V_SQRT_F32_e32 $vgpr0, implicit $mode, implicit $exec
+ $sgpr0 = S_MOV_B32 0
+ $sgpr1 = S_MOV_B32 0
+ $sgpr2 = S_MOV_B32 0
+ $sgpr3 = S_MOV_B32 0
+ $sgpr4 = S_MOV_B32 0
+ $sgpr5 = S_MOV_B32 0
+ $vgpr2 = V_MOV_B32_e32 0, implicit $exec
+ $sgpr6 = S_MOV_B32 0
+ $sgpr7 = S_MOV_B32 0
+ $sgpr8 = S_MOV_B32 0
+ $sgpr9 = S_MOV_B32 0
+ $sgpr10 = S_MOV_B32 0
+ $vgpr3 = V_ADD_F32_e32 $vgpr1, $vgpr2, implicit $mode, implicit $exec
+ S_ENDPGM 0
+...
+
+---
+name: trans_use_3_hazard
+body: |
+ bb.0:
+ ; GCN-LABEL: name: trans_use_3_hazard
+ ; GCN: $vgpr1 = V_SQRT_F32_e32 $vgpr0, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr2 = V_MOV_B32_e32 0, implicit $exec
+ ; GCN-NEXT: $vgpr10 = V_MOV_B32_e32 0, implicit $exec
+ ; GCN-NEXT: $vgpr11 = V_MOV_B32_e32 0, implicit $exec
+ ; GCN-NEXT: $vgpr12 = V_MOV_B32_e32 0, implicit $exec
+ ; GCN-NEXT: $vgpr13 = V_MOV_B32_e32 0, implicit $exec
+ ; GCN-NEXT: S_WAITCNT_DEPCTR 4095
+ ; GCN-NEXT: $vgpr3 = V_ADD_F32_e32 $vgpr1, $vgpr2, implicit $mode, implicit $exec
+ ; GCN-NEXT: S_ENDPGM 0
+ $vgpr1 = V_SQRT_F32_e32 $vgpr0, implicit $mode, implicit $exec
+ $vgpr2 = V_MOV_B32_e32 0, implicit $exec
+ $vgpr10 = V_MOV_B32_e32 0, implicit $exec
+ $vgpr11 = V_MOV_B32_e32 0, implicit $exec
+ $vgpr12 = V_MOV_B32_e32 0, implicit $exec
+ $vgpr13 = V_MOV_B32_e32 0, implicit $exec
+ $vgpr3 = V_ADD_F32_e32 $vgpr1, $vgpr2, implicit $mode, implicit $exec
+ S_ENDPGM 0
+...
+
+---
+name: trans_use_3_no_hazard_1
+body: |
+ bb.0:
+ ; GCN-LABEL: name: trans_use_3_no_hazard_1
+ ; GCN: $vgpr1 = V_SQRT_F32_e32 $vgpr0, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr2 = V_MOV_B32_e32 0, implicit $exec
+ ; GCN-NEXT: $vgpr10 = V_MOV_B32_e32 0, implicit $exec
+ ; GCN-NEXT: $vgpr11 = V_MOV_B32_e32 0, implicit $exec
+ ; GCN-NEXT: $vgpr12 = V_MOV_B32_e32 0, implicit $exec
+ ; GCN-NEXT: $vgpr13 = V_MOV_B32_e32 0, implicit $exec
+ ; GCN-NEXT: $vgpr14 = V_MOV_B32_e32 0, implicit $exec
+ ; GCN-NEXT: $vgpr3 = V_ADD_F32_e32 $vgpr1, $vgpr2, implicit $mode, implicit $exec
+ ; GCN-NEXT: S_ENDPGM 0
+ $vgpr1 = V_SQRT_F32_e32 $vgpr0, implicit $mode, implicit $exec
+ $vgpr2 = V_MOV_B32_e32 0, implicit $exec
+ $vgpr10 = V_MOV_B32_e32 0, implicit $exec
+ $vgpr11 = V_MOV_B32_e32 0, implicit $exec
+ $vgpr12 = V_MOV_B32_e32 0, implicit $exec
+ $vgpr13 = V_MOV_B32_e32 0, implicit $exec
+ $vgpr14 = V_MOV_B32_e32 0, implicit $exec
+ $vgpr3 = V_ADD_F32_e32 $vgpr1, $vgpr2, implicit $mode, implicit $exec
+ S_ENDPGM 0
+...
+
+---
+name: trans_use_3_no_hazard_2
+body: |
+ bb.0:
+ ; GCN-LABEL: name: trans_use_3_no_hazard_2
+ ; GCN: $vgpr1 = V_SQRT_F32_e32 $vgpr0, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr10 = V_MOV_B32_e32 0, implicit $exec
+ ; GCN-NEXT: $vgpr11 = V_MOV_B32_e32 0, implicit $exec
+ ; GCN-NEXT: $vgpr2 = V_MOV_B32_e32 0, implicit $exec
+ ; GCN-NEXT: $vgpr12 = V_MOV_B32_e32 0, implicit $exec
+ ; GCN-NEXT: $vgpr13 = V_MOV_B32_e32 0, implicit $exec
+ ; GCN-NEXT: $vgpr14 = V_MOV_B32_e32 0, implicit $exec
+ ; GCN-NEXT: $vgpr15 = V_MOV_B32_e32 0, implicit $exec
+ ; GCN-NEXT: $vgpr3 = V_ADD_F32_e32 $vgpr1, $vgpr2, implicit $mode, implicit $exec
+ ; GCN-NEXT: S_ENDPGM 0
+ $vgpr1 = V_SQRT_F32_e32 $vgpr0, implicit $mode, implicit $exec
+ $vgpr10 = V_MOV_B32_e32 0, implicit $exec
+ $vgpr11 = V_MOV_B32_e32 0, implicit $exec
+ $vgpr2 = V_MOV_B32_e32 0, implicit $exec
+ $vgpr12 = V_MOV_B32_e32 0, implicit $exec
+ $vgpr13 = V_MOV_B32_e32 0, implicit $exec
+ $vgpr14 = V_MOV_B32_e32 0, implicit $exec
+ $vgpr15 = V_MOV_B32_e32 0, implicit $exec
+ $vgpr3 = V_ADD_F32_e32 $vgpr1, $vgpr2, implicit $mode, implicit $exec
+ S_ENDPGM 0
+...
+
+---
+name: trans_use_3_no_hazard_3
+body: |
+ bb.0:
+ ; GCN-LABEL: name: trans_use_3_no_hazard_3
+ ; GCN: $vgpr1 = V_SQRT_F32_e32 $vgpr0, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr10 = V_SQRT_F32_e32 $vgpr11, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr12 = V_SQRT_F32_e32 $vgpr13, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr3 = V_ADD_F32_e32 $vgpr1, $vgpr2, implicit $mode, implicit $exec
+ ; GCN-NEXT: S_ENDPGM 0
+ $vgpr1 = V_SQRT_F32_e32 $vgpr0, implicit $mode, implicit $exec
+ $vgpr10 = V_SQRT_F32_e32 $vgpr11, implicit $mode, implicit $exec
+ $vgpr12 = V_SQRT_F32_e32 $vgpr13, implicit $mode, implicit $exec
+ $vgpr3 = V_ADD_F32_e32 $vgpr1, $vgpr2, implicit $mode, implicit $exec
+ S_ENDPGM 0
+...
+
+---
+name: trans_use_4_one_depctr_1
+body: |
+ bb.0:
+ ; GCN-LABEL: name: trans_use_4_one_depctr_1
+ ; GCN: $vgpr1 = V_SQRT_F32_e32 $vgpr0, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr3 = V_SQRT_F32_e32 $vgpr2, implicit $mode, implicit $exec
+ ; GCN-NEXT: S_WAITCNT_DEPCTR 4095
+ ; GCN-NEXT: $vgpr5 = V_ADD_F32_e32 $vgpr1, $vgpr4, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr7 = V_ADD_F32_e32 $vgpr3, $vgpr6, implicit $mode, implicit $exec
+ ; GCN-NEXT: S_ENDPGM 0
+ $vgpr1 = V_SQRT_F32_e32 $vgpr0, implicit $mode, implicit $exec
+ $vgpr3 = V_SQRT_F32_e32 $vgpr2, implicit $mode, implicit $exec
+ $vgpr5 = V_ADD_F32_e32 $vgpr1, $vgpr4, implicit $mode, implicit $exec
+ $vgpr7 = V_ADD_F32_e32 $vgpr3, $vgpr6, implicit $mode, implicit $exec
+ S_ENDPGM 0
+...
+
+---
+name: trans_use_4_one_depctr_2
+body: |
+ bb.0:
+ ; GCN-LABEL: name: trans_use_4_one_depctr_2
+ ; GCN: $vgpr1 = V_SQRT_F32_e32 $vgpr0, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr3 = V_SQRT_F32_e32 $vgpr2, implicit $mode, implicit $exec
+ ; GCN-NEXT: S_WAITCNT_DEPCTR 4095
+ ; GCN-NEXT: $vgpr5 = V_ADD_F32_e32 $vgpr3, $vgpr4, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr7 = V_ADD_F32_e32 $vgpr1, $vgpr6, implicit $mode, implicit $exec
+ ; GCN-NEXT: S_ENDPGM 0
+ $vgpr1 = V_SQRT_F32_e32 $vgpr0, implicit $mode, implicit $exec
+ $vgpr3 = V_SQRT_F32_e32 $vgpr2, implicit $mode, implicit $exec
+ $vgpr5 = V_ADD_F32_e32 $vgpr3, $vgpr4, implicit $mode, implicit $exec
+ $vgpr7 = V_ADD_F32_e32 $vgpr1, $vgpr6, implicit $mode, implicit $exec
+ S_ENDPGM 0
+...
+
+---
+name: trans_use_4
+body: |
+ bb.0:
+ ; GCN-LABEL: name: trans_use_4
+ ; GCN: $vgpr1 = V_SQRT_F32_e32 $vgpr0, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr10 = V_SQRT_F32_e32 $vgpr11, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr2 = V_MOV_B32_e32 0, implicit $exec
+ ; GCN-NEXT: S_WAITCNT_DEPCTR 4095
+ ; GCN-NEXT: $vgpr3 = V_ADD_F32_e32 $vgpr1, $vgpr2, implicit $mode, implicit $exec
+ ; GCN-NEXT: S_ENDPGM 0
+ $vgpr1 = V_SQRT_F32_e32 $vgpr0, implicit $mode, implicit $exec
+ $vgpr10 = V_SQRT_F32_e32 $vgpr11, implicit $mode, implicit $exec
+ $vgpr2 = V_MOV_B32_e32 0, implicit $exec
+ $vgpr3 = V_ADD_F32_e32 $vgpr1, $vgpr2, implicit $mode, implicit $exec
+ S_ENDPGM 0
+...
+
+---
+name: trans_use_branching_1a
+body: |
+ ; GCN-LABEL: name: trans_use_branching_1a
+ ; GCN: bb.0:
+ ; GCN-NEXT: successors: %bb.2(0x80000000)
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: $vgpr1 = V_SQRT_F32_e32 $vgpr0, implicit $mode, implicit $exec
+ ; GCN-NEXT: S_BRANCH %bb.2
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: bb.1:
+ ; GCN-NEXT: successors: %bb.2(0x80000000)
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: $vgpr2 = V_MOV_B32_e32 0, implicit $exec
+ ; GCN-NEXT: $vgpr30 = V_MOV_B32_e32 0, implicit $exec
+ ; GCN-NEXT: $vgpr31 = V_MOV_B32_e32 0, implicit $exec
+ ; GCN-NEXT: $vgpr32 = V_MOV_B32_e32 0, implicit $exec
+ ; GCN-NEXT: $vgpr33 = V_MOV_B32_e32 0, implicit $exec
+ ; GCN-NEXT: S_BRANCH %bb.2
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: bb.2:
+ ; GCN-NEXT: $vgpr3 = V_MOV_B32_e32 0, implicit $exec
+ ; GCN-NEXT: S_WAITCNT_DEPCTR 4095
+ ; GCN-NEXT: $vgpr4 = V_ADD_F32_e32 $vgpr1, $vgpr2, implicit $mode, implicit $exec
+ ; GCN-NEXT: S_ENDPGM 0
+ bb.0:
+ $vgpr1 = V_SQRT_F32_e32 $vgpr0, implicit $mode, implicit $exec
+ S_BRANCH %bb.2
+ bb.1:
+ $vgpr2 = V_MOV_B32_e32 0, implicit $exec
+ $vgpr30 = V_MOV_B32_e32 0, implicit $exec
+ $vgpr31 = V_MOV_B32_e32 0, implicit $exec
+ $vgpr32 = V_MOV_B32_e32 0, implicit $exec
+ $vgpr33 = V_MOV_B32_e32 0, implicit $exec
+ S_BRANCH %bb.2
+ bb.2:
+ $vgpr3 = V_MOV_B32_e32 0, implicit $exec
+ $vgpr4 = V_ADD_F32_e32 $vgpr1, $vgpr2, implicit $mode, implicit $exec
+ S_ENDPGM 0
+...
+
+---
+name: trans_use_branching_1b
+body: |
+ ; GCN-LABEL: name: trans_use_branching_1b
+ ; GCN: bb.0:
+ ; GCN-NEXT: successors: %bb.2(0x80000000)
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: $vgpr1 = V_SQRT_F32_e32 $vgpr0, implicit $mode, implicit $exec
+ ; GCN-NEXT: S_BRANCH %bb.2
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: bb.1:
+ ; GCN-NEXT: successors: %bb.2(0x80000000)
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: $vgpr2 = V_MOV_B32_e32 0, implicit $exec
+ ; GCN-NEXT: $vgpr30 = V_MOV_B32_e32 0, implicit $exec
+ ; GCN-NEXT: S_WAITCNT_DEPCTR 4095
+ ; GCN-NEXT: S_BRANCH %bb.2
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: bb.2:
+ ; GCN-NEXT: $vgpr3 = V_MOV_B32_e32 0, implicit $exec
+ ; GCN-NEXT: S_WAITCNT_DEPCTR 4095
+ ; GCN-NEXT: $vgpr4 = V_ADD_F32_e32 $vgpr1, $vgpr2, implicit $mode, implicit $exec
+ ; GCN-NEXT: S_ENDPGM 0
+ bb.0:
+ $vgpr1 = V_SQRT_F32_e32 $vgpr0, implicit $mode, implicit $exec
+ S_BRANCH %bb.2
+ bb.1:
+ $vgpr2 = V_MOV_B32_e32 0, implicit $exec
+ $vgpr30 = V_MOV_B32_e32 0, implicit $exec
+ S_WAITCNT_DEPCTR 4095
+ S_BRANCH %bb.2
+ bb.2:
+ $vgpr3 = V_MOV_B32_e32 0, implicit $exec
+ $vgpr4 = V_ADD_F32_e32 $vgpr1, $vgpr2, implicit $mode, implicit $exec
+ S_ENDPGM 0
+...
+
+---
+name: trans_use_branching_1c_no_hazard_1
+body: |
+ ; GCN-LABEL: name: trans_use_branching_1c_no_hazard_1
+ ; GCN: bb.0:
+ ; GCN-NEXT: successors: %bb.2(0x80000000)
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: $vgpr1 = V_SQRT_F32_e32 $vgpr0, implicit $mode, implicit $exec
+ ; GCN-NEXT: S_WAITCNT_DEPCTR 4095
+ ; GCN-NEXT: S_BRANCH %bb.2
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: bb.1:
+ ; GCN-NEXT: successors: %bb.2(0x80000000)
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: $vgpr2 = V_MOV_B32_e32 0, implicit $exec
+ ; GCN-NEXT: $vgpr30 = V_MOV_B32_e32 0, implicit $exec
+ ; GCN-NEXT: S_BRANCH %bb.2
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: bb.2:
+ ; GCN-NEXT: $vgpr3 = V_MOV_B32_e32 0, implicit $exec
+ ; GCN-NEXT: $vgpr4 = V_ADD_F32_e32 $vgpr1, $vgpr2, implicit $mode, implicit $exec
+ ; GCN-NEXT: S_ENDPGM 0
+ bb.0:
+ $vgpr1 = V_SQRT_F32_e32 $vgpr0, implicit $mode, implicit $exec
+ S_WAITCNT_DEPCTR 4095
+ S_BRANCH %bb.2
+ bb.1:
+ $vgpr2 = V_MOV_B32_e32 0, implicit $exec
+ $vgpr30 = V_MOV_B32_e32 0, implicit $exec
+ S_BRANCH %bb.2
+ bb.2:
+ $vgpr3 = V_MOV_B32_e32 0, implicit $exec
+ $vgpr4 = V_ADD_F32_e32 $vgpr1, $vgpr2, implicit $mode, implicit $exec
+ S_ENDPGM 0
+...
More information about the llvm-commits
mailing list