[llvm] 13107c2 - [AMDGPU] Add support for GFX11 LDSDIR hazards
Jay Foad via llvm-commits
llvm-commits at lists.llvm.org
Mon Jun 20 15:25:47 PDT 2022
Author: Jay Foad
Date: 2022-06-20T21:58:12+01:00
New Revision: 13107c2770dfdbb95ad07fa9235116fbf26e38f0
URL: https://github.com/llvm/llvm-project/commit/13107c2770dfdbb95ad07fa9235116fbf26e38f0
DIFF: https://github.com/llvm/llvm-project/commit/13107c2770dfdbb95ad07fa9235116fbf26e38f0.diff
LOG: [AMDGPU] Add support for GFX11 LDSDIR hazards
Detect LDS direct WAR/WAW hazards and compute values for
wait_vdst (va_vdst) parameter. Where appropriate this
raises wait_vdst from the default 0 to allow concurrent
issue of LDS direct with VALU execution.
Also detect LDS direct versus VMEM source VGPR hazards
and insert vm_vsrc=0 waits using s_waitcnt_depctr.
Differential Revision: https://reviews.llvm.org/D127963
Added:
llvm/test/CodeGen/AMDGPU/lds-direct-hazards.mir
Modified:
llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
index 8adae23a43d5f..f16bbb99c4578 100644
--- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
@@ -427,6 +427,7 @@ void GCNHazardRecognizer::RecedeCycle() {
typedef enum { HazardFound, HazardExpired, NoHazardFound } HazardFnResult;
typedef function_ref<bool(const MachineInstr &, int WaitStates)> IsExpiredFn;
+typedef function_ref<unsigned int(const MachineInstr &)> GetNumWaitStatesFn;
// Search for a hazard in a block and its predecessors.
template <typename StateT>
@@ -473,11 +474,11 @@ hasHazard(StateT State,
// Returns a minimum wait states since \p I walking all predecessors.
// Only scans until \p IsExpired does not return true.
// Can only be run in a hazard recognizer mode.
-static int getWaitStatesSince(GCNHazardRecognizer::IsHazardFn IsHazard,
- const MachineBasicBlock *MBB,
- MachineBasicBlock::const_reverse_instr_iterator I,
- int WaitStates, IsExpiredFn IsExpired,
- DenseSet<const MachineBasicBlock *> &Visited) {
+static int getWaitStatesSince(
+ GCNHazardRecognizer::IsHazardFn IsHazard, const MachineBasicBlock *MBB,
+ MachineBasicBlock::const_reverse_instr_iterator I, int WaitStates,
+ IsExpiredFn IsExpired, DenseSet<const MachineBasicBlock *> &Visited,
+ GetNumWaitStatesFn GetNumWaitStates = SIInstrInfo::getNumWaitStates) {
for (auto E = MBB->instr_rend(); I != E; ++I) {
// Don't add WaitStates for parent BUNDLE instructions.
if (I->isBundle())
@@ -489,7 +490,7 @@ static int getWaitStatesSince(GCNHazardRecognizer::IsHazardFn IsHazard,
if (I->isInlineAsm())
continue;
- WaitStates += SIInstrInfo::getNumWaitStates(*I);
+ WaitStates += GetNumWaitStates(*I);
if (IsExpired(*I, WaitStates))
return std::numeric_limits<int>::max();
@@ -500,8 +501,8 @@ static int getWaitStatesSince(GCNHazardRecognizer::IsHazardFn IsHazard,
if (!Visited.insert(Pred).second)
continue;
- int W = getWaitStatesSince(IsHazard, Pred, Pred->instr_rbegin(),
- WaitStates, IsExpired, Visited);
+ int W = getWaitStatesSince(IsHazard, Pred, Pred->instr_rbegin(), WaitStates,
+ IsExpired, Visited, GetNumWaitStates);
MinWaitStates = std::min(MinWaitStates, W);
}
@@ -1075,6 +1076,10 @@ void GCNHazardRecognizer::fixHazards(MachineInstr *MI) {
fixSMEMtoVectorWriteHazards(MI);
fixVcmpxExecWARHazard(MI);
fixLdsBranchVmemWARHazard(MI);
+ if (ST.hasLdsDirect()) {
+ fixLdsDirectVALUHazard(MI);
+ fixLdsDirectVMEMHazard(MI);
+ }
fixVALUPartialForwardingHazard(MI);
fixVALUTransUseHazard(MI);
}
@@ -1366,6 +1371,81 @@ bool GCNHazardRecognizer::fixLdsBranchVmemWARHazard(MachineInstr *MI) {
return true;
}
+bool GCNHazardRecognizer::fixLdsDirectVALUHazard(MachineInstr *MI) {
+ if (!SIInstrInfo::isLDSDIR(*MI))
+ return false;
+
+ const int NoHazardWaitStates = 15;
+ const MachineOperand *VDST = TII.getNamedOperand(*MI, AMDGPU::OpName::vdst);
+ const Register VDSTReg = VDST->getReg();
+
+ bool VisitedTrans = false;
+ auto IsHazardFn = [this, VDSTReg, &VisitedTrans](const MachineInstr &I) {
+ if (!SIInstrInfo::isVALU(I))
+ return false;
+ VisitedTrans = VisitedTrans || SIInstrInfo::isTRANS(I);
+ // Cover both WAR and WAW
+ return I.readsRegister(VDSTReg, &TRI) || I.modifiesRegister(VDSTReg, &TRI);
+ };
+ auto IsExpiredFn = [&](const MachineInstr &I, int WaitStates) {
+ if (WaitStates >= NoHazardWaitStates)
+ return true;
+ // Instructions which cause va_vdst==0 expire hazard
+ return SIInstrInfo::isVMEM(I) || SIInstrInfo::isFLAT(I) ||
+ SIInstrInfo::isDS(I) || SIInstrInfo::isEXP(I);
+ };
+ auto GetWaitStatesFn = [](const MachineInstr &MI) {
+ return SIInstrInfo::isVALU(MI) ? 1 : 0;
+ };
+
+ DenseSet<const MachineBasicBlock *> Visited;
+ auto Count = ::getWaitStatesSince(IsHazardFn, MI->getParent(),
+ std::next(MI->getReverseIterator()), 0,
+ IsExpiredFn, Visited, GetWaitStatesFn);
+
+ // Transcendentals can execute in parallel to other VALUs.
+ // This makes va_vdst count unusable with a mixture of VALU and TRANS.
+ if (VisitedTrans)
+ Count = 0;
+
+ MachineOperand *WaitVdstOp =
+ TII.getNamedOperand(*MI, AMDGPU::OpName::waitvdst);
+ WaitVdstOp->setImm(std::min(Count, NoHazardWaitStates));
+
+ return true;
+}
+
+bool GCNHazardRecognizer::fixLdsDirectVMEMHazard(MachineInstr *MI) {
+ if (!SIInstrInfo::isLDSDIR(*MI))
+ return false;
+
+ const MachineOperand *VDST = TII.getNamedOperand(*MI, AMDGPU::OpName::vdst);
+ const Register VDSTReg = VDST->getReg();
+
+ auto IsHazardFn = [this, VDSTReg](const MachineInstr &I) {
+ if (!SIInstrInfo::isVMEM(I) && !SIInstrInfo::isFLAT(I) &&
+ !SIInstrInfo::isDS(I))
+ return false;
+ return I.readsRegister(VDSTReg, &TRI) || I.modifiesRegister(VDSTReg, &TRI);
+ };
+ auto IsExpiredFn = [](const MachineInstr &I, int) {
+ return SIInstrInfo::isVALU(I) || SIInstrInfo::isEXP(I) ||
+ (I.getOpcode() == AMDGPU::S_WAITCNT && !I.getOperand(0).getImm()) ||
+ (I.getOpcode() == AMDGPU::S_WAITCNT_DEPCTR &&
+ I.getOperand(0).getImm() == 0xffe3);
+ };
+
+ if (::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn) ==
+ std::numeric_limits<int>::max())
+ return false;
+
+ BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
+ TII.get(AMDGPU::S_WAITCNT_DEPCTR))
+ .addImm(0xffe3);
+
+ return true;
+}
+
bool GCNHazardRecognizer::fixVALUPartialForwardingHazard(MachineInstr *MI) {
if (!ST.isWave64())
return false;
diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h
index 1e12e0820f2ef..116e421e91722 100644
--- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h
+++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h
@@ -96,6 +96,8 @@ class GCNHazardRecognizer final : public ScheduleHazardRecognizer {
bool fixSMEMtoVectorWriteHazards(MachineInstr *MI);
bool fixVcmpxExecWARHazard(MachineInstr *MI);
bool fixLdsBranchVmemWARHazard(MachineInstr *MI);
+ bool fixLdsDirectVALUHazard(MachineInstr *MI);
+ bool fixLdsDirectVMEMHazard(MachineInstr *MI);
bool fixVALUPartialForwardingHazard(MachineInstr *MI);
bool fixVALUTransUseHazard(MachineInstr *MI);
diff --git a/llvm/test/CodeGen/AMDGPU/lds-direct-hazards.mir b/llvm/test/CodeGen/AMDGPU/lds-direct-hazards.mir
new file mode 100644
index 0000000000000..25311ef3f8b63
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/lds-direct-hazards.mir
@@ -0,0 +1,409 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs -run-pass post-RA-hazard-rec -o - %s | FileCheck -check-prefix=GCN %s
+
+---
+name: lds_param_load_no_war
+body: |
+ bb.0:
+ ; GCN-LABEL: name: lds_param_load_no_war
+ ; GCN: $vgpr0 = V_MUL_F32_e32 $vgpr0, $vgpr0, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr1 = LDS_PARAM_LOAD 0, 0, 15, implicit $m0, implicit $exec
+ ; GCN-NEXT: S_ENDPGM 0
+ $vgpr0 = V_MUL_F32_e32 $vgpr0, $vgpr0, implicit $mode, implicit $exec
+ $vgpr1 = LDS_PARAM_LOAD 0, 0, 0, implicit $m0, implicit $exec
+ S_ENDPGM 0
+...
+
+---
+name: lds_param_load_va_vdst0_war
+body: |
+ bb.0:
+ ; GCN-LABEL: name: lds_param_load_va_vdst0_war
+ ; GCN: $vgpr0 = V_MUL_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr1 = LDS_PARAM_LOAD 0, 0, 0, implicit $m0, implicit $exec
+ ; GCN-NEXT: S_ENDPGM 0
+ $vgpr0 = V_MUL_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec
+ $vgpr1 = LDS_PARAM_LOAD 0, 0, 15, implicit $m0, implicit $exec
+ S_ENDPGM 0
+...
+
+---
+name: lds_param_load_va_vdst0_war_salu
+body: |
+ bb.0:
+ ; GCN-LABEL: name: lds_param_load_va_vdst0_war_salu
+ ; GCN: $vgpr0 = V_MUL_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec
+ ; GCN-NEXT: $m0 = S_MOV_B32 killed $sgpr0
+ ; GCN-NEXT: $vgpr1 = LDS_PARAM_LOAD 0, 0, 0, implicit $m0, implicit $exec
+ ; GCN-NEXT: S_ENDPGM 0
+ $vgpr0 = V_MUL_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec
+ $m0 = S_MOV_B32 killed $sgpr0
+ $vgpr1 = LDS_PARAM_LOAD 0, 0, 15, implicit $m0, implicit $exec
+ S_ENDPGM 0
+...
+
+---
+name: lds_param_load_va_vdst1_war
+body: |
+ bb.0:
+ ; GCN-LABEL: name: lds_param_load_va_vdst1_war
+ ; GCN: $vgpr0 = V_MUL_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr2 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr1 = LDS_PARAM_LOAD 0, 0, 1, implicit $m0, implicit $exec
+ ; GCN-NEXT: S_ENDPGM 0
+ $vgpr0 = V_MUL_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec
+ $vgpr2 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ $vgpr1 = LDS_PARAM_LOAD 0, 0, 15, implicit $m0, implicit $exec
+ S_ENDPGM 0
+...
+
+---
+name: lds_param_load_va_vdst10_war
+body: |
+ bb.0:
+ ; GCN-LABEL: name: lds_param_load_va_vdst10_war
+ ; GCN: $vgpr0 = V_MUL_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr2 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr3 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr4 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr5 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr6 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr7 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr8 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr9 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr10 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr11 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr1 = LDS_PARAM_LOAD 0, 0, 10, implicit $m0, implicit $exec
+ ; GCN-NEXT: S_ENDPGM 0
+ $vgpr0 = V_MUL_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec
+ $vgpr2 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ $vgpr3 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ $vgpr4 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ $vgpr5 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ $vgpr6 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ $vgpr7 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ $vgpr8 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ $vgpr9 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ $vgpr10 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ $vgpr11 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ $vgpr1 = LDS_PARAM_LOAD 0, 0, 15, implicit $m0, implicit $exec
+ S_ENDPGM 0
+...
+
+---
+name: lds_param_load_va_vdst10_waw
+body: |
+ bb.0:
+ ; GCN-LABEL: name: lds_param_load_va_vdst10_waw
+ ; GCN: $vgpr1 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr2 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr3 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr4 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr5 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr6 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr7 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr8 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr9 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr10 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr11 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr1 = LDS_PARAM_LOAD 0, 0, 10, implicit $m0, implicit $exec
+ ; GCN-NEXT: S_ENDPGM 0
+ $vgpr1 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ $vgpr2 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ $vgpr3 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ $vgpr4 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ $vgpr5 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ $vgpr6 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ $vgpr7 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ $vgpr8 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ $vgpr9 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ $vgpr10 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ $vgpr11 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ $vgpr1 = LDS_PARAM_LOAD 0, 0, 15, implicit $m0, implicit $exec
+ S_ENDPGM 0
+...
+
+---
+name: lds_param_load_va_vdst20_war
+body: |
+ bb.0:
+ ; GCN-LABEL: name: lds_param_load_va_vdst20_war
+ ; GCN: $vgpr0 = V_MUL_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr2 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr3 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr4 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr5 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr6 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr7 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr8 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr9 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr10 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr11 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr12 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr13 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr14 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr15 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr16 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr17 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr18 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr19 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr20 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr21 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr1 = LDS_PARAM_LOAD 0, 0, 15, implicit $m0, implicit $exec
+ ; GCN-NEXT: S_ENDPGM 0
+ $vgpr0 = V_MUL_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec
+ $vgpr2 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ $vgpr3 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ $vgpr4 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ $vgpr5 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ $vgpr6 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ $vgpr7 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ $vgpr8 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ $vgpr9 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ $vgpr10 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ $vgpr11 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ $vgpr12 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ $vgpr13 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ $vgpr14 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ $vgpr15 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ $vgpr16 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ $vgpr17 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ $vgpr18 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ $vgpr19 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ $vgpr20 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ $vgpr21 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ $vgpr1 = LDS_PARAM_LOAD 0, 0, 15, implicit $m0, implicit $exec
+ S_ENDPGM 0
+...
+
+---
+name: lds_param_load_valu_war_trans
+body: |
+ bb.0:
+ ; GCN-LABEL: name: lds_param_load_valu_war_trans
+ ; GCN: $vgpr0 = V_MUL_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr2 = V_SQRT_F32_e32 $vgpr2, implicit $mode, implicit $exec
+ ; GCN-NEXT: S_WAITCNT_DEPCTR 4095
+ ; GCN-NEXT: $vgpr3 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr4 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr5 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr6 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr1 = LDS_PARAM_LOAD 0, 0, 0, implicit $m0, implicit $exec
+ ; GCN-NEXT: S_ENDPGM 0
+ $vgpr0 = V_MUL_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec
+ $vgpr2 = V_SQRT_F32_e32 $vgpr2, implicit $mode, implicit $exec
+ $vgpr3 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ $vgpr4 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ $vgpr5 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ $vgpr6 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ $vgpr1 = LDS_PARAM_LOAD 0, 0, 15, implicit $m0, implicit $exec
+ S_ENDPGM 0
+...
+
+---
+name: lds_param_load_trans_war_valu
+body: |
+ bb.0:
+ ; GCN-LABEL: name: lds_param_load_trans_war_valu
+ ; GCN: $vgpr0 = V_SQRT_F32_e32 $vgpr1, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr2 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr3 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr4 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr5 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr6 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr1 = LDS_PARAM_LOAD 0, 0, 0, implicit $m0, implicit $exec
+ ; GCN-NEXT: S_ENDPGM 0
+ $vgpr0 = V_SQRT_F32_e32 $vgpr1, implicit $mode, implicit $exec
+ $vgpr2 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ $vgpr3 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ $vgpr4 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ $vgpr5 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ $vgpr6 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ $vgpr1 = LDS_PARAM_LOAD 0, 0, 15, implicit $m0, implicit $exec
+ S_ENDPGM 0
+...
+
+---
+name: lds_param_load_valu_war_vmem
+body: |
+ bb.0:
+ ; GCN-LABEL: name: lds_param_load_valu_war_vmem
+ ; GCN: $vgpr0 = V_MUL_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr2 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr3 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr4 = IMAGE_LOAD_V1_V4 $vgpr8_vgpr9_vgpr10_vgpr11, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 2, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (load (s32))
+ ; GCN-NEXT: $vgpr5 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr6 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr1 = LDS_PARAM_LOAD 0, 0, 15, implicit $m0, implicit $exec
+ ; GCN-NEXT: S_ENDPGM 0
+ $vgpr0 = V_MUL_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec
+ $vgpr2 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ $vgpr3 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ $vgpr4 = IMAGE_LOAD_V1_V4 $vgpr8_vgpr9_vgpr10_vgpr11, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 2, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4)
+ $vgpr5 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ $vgpr6 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ $vgpr1 = LDS_PARAM_LOAD 0, 0, 15, implicit $m0, implicit $exec
+ S_ENDPGM 0
+...
+
+---
+name: lds_param_load_valu_war_lds
+body: |
+ bb.0:
+ ; GCN-LABEL: name: lds_param_load_valu_war_lds
+ ; GCN: $vgpr0 = V_MUL_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr2 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr3 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr10 = DS_READ_B32 $vgpr2, 0, 0, implicit $m0, implicit $exec
+ ; GCN-NEXT: $vgpr5 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr6 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr1 = LDS_PARAM_LOAD 0, 0, 15, implicit $m0, implicit $exec
+ ; GCN-NEXT: S_ENDPGM 0
+ $vgpr0 = V_MUL_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec
+ $vgpr2 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ $vgpr3 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ $vgpr10 = DS_READ_B32 $vgpr2, 0, 0, implicit $m0, implicit $exec
+ $vgpr5 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ $vgpr6 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ $vgpr1 = LDS_PARAM_LOAD 0, 0, 15, implicit $m0, implicit $exec
+ S_ENDPGM 0
+...
+
+---
+name: lds_param_load_valu_war_ldsdir
+body: |
+ bb.0:
+ ; GCN-LABEL: name: lds_param_load_valu_war_ldsdir
+ ; GCN: $vgpr0 = V_MUL_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr2 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr3 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr10 = LDS_PARAM_LOAD 0, 1, 15, implicit $m0, implicit $exec
+ ; GCN-NEXT: $vgpr5 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr6 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr1 = LDS_PARAM_LOAD 0, 0, 4, implicit $m0, implicit $exec
+ ; GCN-NEXT: S_ENDPGM 0
+ $vgpr0 = V_MUL_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec
+ $vgpr2 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ $vgpr3 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ $vgpr10 = LDS_PARAM_LOAD 0, 1, 15, implicit $m0, implicit $exec
+ $vgpr5 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ $vgpr6 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ $vgpr1 = LDS_PARAM_LOAD 0, 0, 4, implicit $m0, implicit $exec
+ S_ENDPGM 0
+...
+
+---
+name: lds_param_load_vmem_war
+body: |
+ bb.0:
+ ; GCN-LABEL: name: lds_param_load_vmem_war
+ ; GCN: $vgpr0 = IMAGE_LOAD_V1_V4 $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 2, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (load (s32))
+ ; GCN-NEXT: S_WAITCNT_DEPCTR 65507
+ ; GCN-NEXT: $vgpr1 = LDS_PARAM_LOAD 0, 0, 15, implicit $m0, implicit $exec
+ ; GCN-NEXT: S_ENDPGM 0
+ $vgpr0 = IMAGE_LOAD_V1_V4 $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 2, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4)
+ $vgpr1 = LDS_PARAM_LOAD 0, 0, 15, implicit $m0, implicit $exec
+ S_ENDPGM 0
+...
+
+---
+name: lds_param_load_vmem_war_valu
+body: |
+ bb.0:
+ ; GCN-LABEL: name: lds_param_load_vmem_war_valu
+ ; GCN: $vgpr0 = IMAGE_LOAD_V1_V4 $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 2, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (load (s32))
+ ; GCN-NEXT: $vgpr5 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr1 = LDS_PARAM_LOAD 0, 0, 15, implicit $m0, implicit $exec
+ ; GCN-NEXT: S_ENDPGM 0
+ $vgpr0 = IMAGE_LOAD_V1_V4 $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 2, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4)
+ $vgpr5 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ $vgpr1 = LDS_PARAM_LOAD 0, 0, 15, implicit $m0, implicit $exec
+ S_ENDPGM 0
+...
+
+---
+name: lds_param_load_vmem_war_exp
+body: |
+ bb.0:
+ ; GCN-LABEL: name: lds_param_load_vmem_war_exp
+ ; GCN: $vgpr0 = IMAGE_LOAD_V1_V4 $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 2, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (load (s32))
+ ; GCN-NEXT: EXP 0, $vgpr1, $vgpr1, $vgpr1, $vgpr1, -1, -1, 15, implicit $exec
+ ; GCN-NEXT: $vgpr1 = LDS_PARAM_LOAD 0, 0, 15, implicit $m0, implicit $exec
+ ; GCN-NEXT: S_ENDPGM 0
+ $vgpr0 = IMAGE_LOAD_V1_V4 $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 2, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4)
+ EXP 0, $vgpr1, $vgpr1, $vgpr1, $vgpr1, -1, -1, 15, implicit $exec
+ $vgpr1 = LDS_PARAM_LOAD 0, 0, 15, implicit $m0, implicit $exec
+ S_ENDPGM 0
+...
+
+---
+name: lds_param_load_vmem_war_waitcnt
+body: |
+ bb.0:
+ ; GCN-LABEL: name: lds_param_load_vmem_war_waitcnt
+ ; GCN: $vgpr0 = IMAGE_LOAD_V1_V4 $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 2, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (load (s32))
+ ; GCN-NEXT: S_WAITCNT 0
+ ; GCN-NEXT: $vgpr1 = LDS_PARAM_LOAD 0, 0, 15, implicit $m0, implicit $exec
+ ; GCN-NEXT: S_ENDPGM 0
+ $vgpr0 = IMAGE_LOAD_V1_V4 $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 2, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4)
+ S_WAITCNT 0
+ $vgpr1 = LDS_PARAM_LOAD 0, 0, 15, implicit $m0, implicit $exec
+ S_ENDPGM 0
+...
+
+---
+name: lds_param_load_vmem_war_waitcnt_depctr
+body: |
+ bb.0:
+ ; GCN-LABEL: name: lds_param_load_vmem_war_waitcnt_depctr
+ ; GCN: $vgpr0 = IMAGE_LOAD_V1_V4 $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 2, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (load (s32))
+ ; GCN-NEXT: S_WAITCNT_DEPCTR 65507
+ ; GCN-NEXT: $vgpr1 = LDS_PARAM_LOAD 0, 0, 15, implicit $m0, implicit $exec
+ ; GCN-NEXT: S_ENDPGM 0
+ $vgpr0 = IMAGE_LOAD_V1_V4 $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 2, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4)
+ S_WAITCNT_DEPCTR 65507
+ $vgpr1 = LDS_PARAM_LOAD 0, 0, 15, implicit $m0, implicit $exec
+ S_ENDPGM 0
+...
+
+---
+name: lds_param_load_vmem_war_waitcnt_depctr2
+body: |
+ bb.0:
+ ; GCN-LABEL: name: lds_param_load_vmem_war_waitcnt_depctr2
+ ; GCN: $vgpr0 = IMAGE_LOAD_V1_V4 $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 2, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (load (s32))
+ ; GCN-NEXT: S_WAITCNT_DEPCTR 65535
+ ; GCN-NEXT: S_WAITCNT_DEPCTR 65507
+ ; GCN-NEXT: $vgpr1 = LDS_PARAM_LOAD 0, 0, 15, implicit $m0, implicit $exec
+ ; GCN-NEXT: S_ENDPGM 0
+ $vgpr0 = IMAGE_LOAD_V1_V4 $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 2, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4)
+ S_WAITCNT_DEPCTR 65535
+ $vgpr1 = LDS_PARAM_LOAD 0, 0, 15, implicit $m0, implicit $exec
+ S_ENDPGM 0
+...
+
+---
+name: lds_direct_load_no_war
+body: |
+ bb.0:
+ ; GCN-LABEL: name: lds_direct_load_no_war
+ ; GCN: $vgpr0 = V_MUL_F32_e32 $vgpr0, $vgpr0, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr1 = LDS_DIRECT_LOAD 15, implicit $m0, implicit $exec
+ ; GCN-NEXT: S_ENDPGM 0
+ $vgpr0 = V_MUL_F32_e32 $vgpr0, $vgpr0, implicit $mode, implicit $exec
+ $vgpr1 = LDS_DIRECT_LOAD 0, implicit $m0, implicit $exec
+ S_ENDPGM 0
+...
+
+---
+name: lds_direct_load_va_vdst0_war
+body: |
+ bb.0:
+ ; GCN-LABEL: name: lds_direct_load_va_vdst0_war
+ ; GCN: $vgpr0 = V_MUL_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr1 = LDS_DIRECT_LOAD 0, implicit $m0, implicit $exec
+ ; GCN-NEXT: S_ENDPGM 0
+ $vgpr0 = V_MUL_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec
+ $vgpr1 = LDS_DIRECT_LOAD 15, implicit $m0, implicit $exec
+ S_ENDPGM 0
+...
More information about the llvm-commits
mailing list