[llvm] [AMDGPU] Support GFX12 VDSDIR instructions WAITVMSRC operand in GCNHazardRecognizer (PR #77628)
Jay Foad via llvm-commits
llvm-commits at lists.llvm.org
Wed Jan 10 09:26:27 PST 2024
https://github.com/jayfoad created https://github.com/llvm/llvm-project/pull/77628
Modify GCNHazardRecognizer::fixLdsDirectVMEMHazard() so the waitvsrc operand
in gfx12 DS_PARAM_LOAD or DS_DIRECT_LOAD instructions is set appropriately
depending on whether a hazard is found or not, rather than inserting an
S_WAITCNT_DEPCTR instruction if a hazard needs to be mitigated.
>From 37d5d3e40cd60401988c9c3f67fc9ed75d21714b Mon Sep 17 00:00:00 2001
From: Stephen Thomas <Stephen.Thomas at amd.com>
Date: Tue, 17 Jan 2023 13:26:29 +0000
Subject: [PATCH] [AMDGPU] Support GFX12 VDSDIR instructions WAITVMSRC operand
in GCNHazardRecognizer
Modify GCNHazardRecognizer::fixLdsDirectVMEMHazard() so the waitvsrc operand
in gfx12 DS_PARAM_LOAD or DS_DIRECT_LOAD instructions is set appropriately
depending on whether a hazard is found or not, rather than inserting an
S_WAITCNT_DEPCTR instruction if a hazard needs to be mitigated.
---
.../lib/Target/AMDGPU/GCNHazardRecognizer.cpp | 17 +-
llvm/lib/Target/AMDGPU/GCNSubtarget.h | 2 +
...zards.mir => lds-direct-hazards-gfx11.mir} | 0
.../AMDGPU/lds-direct-hazards-gfx12.mir | 391 ++++++++++++++++++
4 files changed, 405 insertions(+), 5 deletions(-)
rename llvm/test/CodeGen/AMDGPU/{lds-direct-hazards.mir => lds-direct-hazards-gfx11.mir} (100%)
create mode 100644 llvm/test/CodeGen/AMDGPU/lds-direct-hazards-gfx12.mir
diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
index a7d8ff0242b801..bcd93e30d6c2d1 100644
--- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
@@ -1450,20 +1450,27 @@ bool GCNHazardRecognizer::fixLdsDirectVMEMHazard(MachineInstr *MI) {
return false;
return I.readsRegister(VDSTReg, &TRI) || I.modifiesRegister(VDSTReg, &TRI);
};
- auto IsExpiredFn = [](const MachineInstr &I, int) {
+ bool LdsdirCanWait = ST.hasLdsWaitVMSRC();
+ auto IsExpiredFn = [this, LdsdirCanWait](const MachineInstr &I, int) {
return SIInstrInfo::isVALU(I) || SIInstrInfo::isEXP(I) ||
(I.getOpcode() == AMDGPU::S_WAITCNT && !I.getOperand(0).getImm()) ||
(I.getOpcode() == AMDGPU::S_WAITCNT_DEPCTR &&
- AMDGPU::DepCtr::decodeFieldVmVsrc(I.getOperand(0).getImm()) == 0);
+ AMDGPU::DepCtr::decodeFieldVmVsrc(I.getOperand(0).getImm()) == 0) ||
+ (LdsdirCanWait && SIInstrInfo::isLDSDIR(I) &&
+ !TII.getNamedOperand(I, AMDGPU::OpName::waitvsrc)->getImm());
};
if (::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn) ==
std::numeric_limits<int>::max())
return false;
- BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
- TII.get(AMDGPU::S_WAITCNT_DEPCTR))
- .addImm(AMDGPU::DepCtr::encodeFieldVmVsrc(0));
+ if (LdsdirCanWait) {
+ TII.getNamedOperand(*MI, AMDGPU::OpName::waitvsrc)->setImm(0);
+ } else {
+ BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
+ TII.get(AMDGPU::S_WAITCNT_DEPCTR))
+ .addImm(AMDGPU::DepCtr::encodeFieldVmVsrc(0));
+ }
return true;
}
diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
index f6f37f5170a403..85d062a9a6f5e8 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -1128,6 +1128,8 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
bool hasLdsDirect() const { return getGeneration() >= GFX11; }
+ bool hasLdsWaitVMSRC() const { return getGeneration() >= GFX12; }
+
bool hasVALUPartialForwardingHazard() const {
return getGeneration() >= GFX11;
}
diff --git a/llvm/test/CodeGen/AMDGPU/lds-direct-hazards.mir b/llvm/test/CodeGen/AMDGPU/lds-direct-hazards-gfx11.mir
similarity index 100%
rename from llvm/test/CodeGen/AMDGPU/lds-direct-hazards.mir
rename to llvm/test/CodeGen/AMDGPU/lds-direct-hazards-gfx11.mir
diff --git a/llvm/test/CodeGen/AMDGPU/lds-direct-hazards-gfx12.mir b/llvm/test/CodeGen/AMDGPU/lds-direct-hazards-gfx12.mir
new file mode 100644
index 00000000000000..1ef6ce88e61106
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/lds-direct-hazards-gfx12.mir
@@ -0,0 +1,391 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -march=amdgcn -mcpu=gfx1200 -verify-machineinstrs -run-pass post-RA-hazard-rec -o - %s | FileCheck -check-prefix=GCN %s
+
+---
+name: lds_param_load_no_war
+body: |
+ bb.0:
+ ; GCN-LABEL: name: lds_param_load_no_war
+ ; GCN: $vgpr0 = V_MUL_F32_e32 $vgpr0, $vgpr0, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr1 = DS_PARAM_LOAD 0, 0, 15, 1, implicit $m0, implicit $exec
+ ; GCN-NEXT: S_ENDPGM 0
+ $vgpr0 = V_MUL_F32_e32 $vgpr0, $vgpr0, implicit $mode, implicit $exec
+ $vgpr1 = DS_PARAM_LOAD 0, 0, 0, 1, implicit $m0, implicit $exec
+ S_ENDPGM 0
+...
+
+---
+name: lds_param_load_va_vdst0_war
+body: |
+ bb.0:
+ ; GCN-LABEL: name: lds_param_load_va_vdst0_war
+ ; GCN: $vgpr0 = V_MUL_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr1 = DS_PARAM_LOAD 0, 0, 0, 1, implicit $m0, implicit $exec
+ ; GCN-NEXT: S_ENDPGM 0
+ $vgpr0 = V_MUL_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec
+ $vgpr1 = DS_PARAM_LOAD 0, 0, 15, 1, implicit $m0, implicit $exec
+ S_ENDPGM 0
+...
+
+---
+name: lds_param_load_va_vdst0_war_salu
+body: |
+ bb.0:
+ ; GCN-LABEL: name: lds_param_load_va_vdst0_war_salu
+ ; GCN: $vgpr0 = V_MUL_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec
+ ; GCN-NEXT: $m0 = S_MOV_B32 killed $sgpr0
+ ; GCN-NEXT: $vgpr1 = DS_PARAM_LOAD 0, 0, 0, 1, implicit $m0, implicit $exec
+ ; GCN-NEXT: S_ENDPGM 0
+ $vgpr0 = V_MUL_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec
+ $m0 = S_MOV_B32 killed $sgpr0
+ $vgpr1 = DS_PARAM_LOAD 0, 0, 15, 1, implicit $m0, implicit $exec
+ S_ENDPGM 0
+...
+
+---
+name: lds_param_load_va_vdst1_war
+body: |
+ bb.0:
+ ; GCN-LABEL: name: lds_param_load_va_vdst1_war
+ ; GCN: $vgpr0 = V_MUL_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr2 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr1 = DS_PARAM_LOAD 0, 0, 1, 1, implicit $m0, implicit $exec
+ ; GCN-NEXT: S_ENDPGM 0
+ $vgpr0 = V_MUL_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec
+ $vgpr2 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ $vgpr1 = DS_PARAM_LOAD 0, 0, 15, 1, implicit $m0, implicit $exec
+ S_ENDPGM 0
+...
+
+---
+name: lds_param_load_va_vdst10_war
+body: |
+ bb.0:
+ ; GCN-LABEL: name: lds_param_load_va_vdst10_war
+ ; GCN: $vgpr0 = V_MUL_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr2 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr3 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr4 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr5 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr6 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr7 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr8 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr9 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr10 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr11 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr1 = DS_PARAM_LOAD 0, 0, 10, 1, implicit $m0, implicit $exec
+ ; GCN-NEXT: S_ENDPGM 0
+ $vgpr0 = V_MUL_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec
+ $vgpr2 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ $vgpr3 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ $vgpr4 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ $vgpr5 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ $vgpr6 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ $vgpr7 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ $vgpr8 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ $vgpr9 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ $vgpr10 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ $vgpr11 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ $vgpr1 = DS_PARAM_LOAD 0, 0, 15, 1, implicit $m0, implicit $exec
+ S_ENDPGM 0
+...
+
+---
+name: lds_param_load_va_vdst10_waw
+body: |
+ bb.0:
+ ; GCN-LABEL: name: lds_param_load_va_vdst10_waw
+ ; GCN: $vgpr1 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr2 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr3 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr4 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr5 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr6 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr7 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr8 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr9 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr10 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr11 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr1 = DS_PARAM_LOAD 0, 0, 10, 1, implicit $m0, implicit $exec
+ ; GCN-NEXT: S_ENDPGM 0
+ $vgpr1 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ $vgpr2 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ $vgpr3 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ $vgpr4 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ $vgpr5 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ $vgpr6 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ $vgpr7 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ $vgpr8 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ $vgpr9 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ $vgpr10 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ $vgpr11 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ $vgpr1 = DS_PARAM_LOAD 0, 0, 15, 1, implicit $m0, implicit $exec
+ S_ENDPGM 0
+...
+
+---
+name: lds_param_load_va_vdst20_war
+body: |
+ bb.0:
+ ; GCN-LABEL: name: lds_param_load_va_vdst20_war
+ ; GCN: $vgpr0 = V_MUL_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr2 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr3 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr4 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr5 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr6 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr7 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr8 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr9 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr10 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr11 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr12 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr13 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr14 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr15 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr16 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr17 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr18 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr19 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr20 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr21 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr1 = DS_PARAM_LOAD 0, 0, 15, 1, implicit $m0, implicit $exec
+ ; GCN-NEXT: S_ENDPGM 0
+ $vgpr0 = V_MUL_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec
+ $vgpr2 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ $vgpr3 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ $vgpr4 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ $vgpr5 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ $vgpr6 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ $vgpr7 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ $vgpr8 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ $vgpr9 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ $vgpr10 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ $vgpr11 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ $vgpr12 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ $vgpr13 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ $vgpr14 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ $vgpr15 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ $vgpr16 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ $vgpr17 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ $vgpr18 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ $vgpr19 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ $vgpr20 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ $vgpr21 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ $vgpr1 = DS_PARAM_LOAD 0, 0, 15, 1, implicit $m0, implicit $exec
+ S_ENDPGM 0
+...
+
+---
+name: lds_param_load_valu_war_trans
+body: |
+ bb.0:
+ ; GCN-LABEL: name: lds_param_load_valu_war_trans
+ ; GCN: $vgpr0 = V_MUL_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr2 = V_SQRT_F32_e32 $vgpr2, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr3 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr4 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr5 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr6 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr1 = DS_PARAM_LOAD 0, 0, 0, 1, implicit $m0, implicit $exec
+ ; GCN-NEXT: S_ENDPGM 0
+ $vgpr0 = V_MUL_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec
+ $vgpr2 = V_SQRT_F32_e32 $vgpr2, implicit $mode, implicit $exec
+ $vgpr3 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ $vgpr4 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ $vgpr5 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ $vgpr6 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ $vgpr1 = DS_PARAM_LOAD 0, 0, 15, 1, implicit $m0, implicit $exec
+ S_ENDPGM 0
+...
+
+---
+name: lds_param_load_trans_war_valu
+body: |
+ bb.0:
+ ; GCN-LABEL: name: lds_param_load_trans_war_valu
+ ; GCN: $vgpr0 = V_SQRT_F32_e32 $vgpr1, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr2 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr3 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr4 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr5 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr6 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr1 = DS_PARAM_LOAD 0, 0, 0, 1, implicit $m0, implicit $exec
+ ; GCN-NEXT: S_ENDPGM 0
+ $vgpr0 = V_SQRT_F32_e32 $vgpr1, implicit $mode, implicit $exec
+ $vgpr2 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ $vgpr3 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ $vgpr4 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ $vgpr5 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ $vgpr6 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ $vgpr1 = DS_PARAM_LOAD 0, 0, 15, 1, implicit $m0, implicit $exec
+ S_ENDPGM 0
+...
+
+---
+name: lds_param_load_valu_war_vmem
+body: |
+ bb.0:
+ ; GCN-LABEL: name: lds_param_load_valu_war_vmem
+ ; GCN: $vgpr0 = V_MUL_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr2 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr3 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr4 = IMAGE_LOAD_V1_V4 $vgpr8_vgpr9_vgpr10_vgpr11, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 2, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (load (s32))
+ ; GCN-NEXT: $vgpr5 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr6 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr1 = DS_PARAM_LOAD 0, 0, 15, 1, implicit $m0, implicit $exec
+ ; GCN-NEXT: S_ENDPGM 0
+ $vgpr0 = V_MUL_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec
+ $vgpr2 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ $vgpr3 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ $vgpr4 = IMAGE_LOAD_V1_V4 $vgpr8_vgpr9_vgpr10_vgpr11, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 2, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4)
+ $vgpr5 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ $vgpr6 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ $vgpr1 = DS_PARAM_LOAD 0, 0, 15, 1, implicit $m0, implicit $exec
+ S_ENDPGM 0
+...
+
+---
+name: lds_param_load_valu_war_lds
+body: |
+ bb.0:
+ ; GCN-LABEL: name: lds_param_load_valu_war_lds
+ ; GCN: $vgpr0 = V_MUL_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr2 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr3 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr10 = DS_READ_B32 $vgpr2, 0, 0, implicit $m0, implicit $exec
+ ; GCN-NEXT: $vgpr5 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr6 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr1 = DS_PARAM_LOAD 0, 0, 15, 1, implicit $m0, implicit $exec
+ ; GCN-NEXT: S_ENDPGM 0
+ $vgpr0 = V_MUL_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec
+ $vgpr2 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ $vgpr3 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ $vgpr10 = DS_READ_B32 $vgpr2, 0, 0, implicit $m0, implicit $exec
+ $vgpr5 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ $vgpr6 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ $vgpr1 = DS_PARAM_LOAD 0, 0, 15, 1, implicit $m0, implicit $exec
+ S_ENDPGM 0
+...
+
+---
+name: lds_param_load_valu_war_ldsdir
+body: |
+ bb.0:
+ ; GCN-LABEL: name: lds_param_load_valu_war_ldsdir
+ ; GCN: $vgpr0 = V_MUL_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr2 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr3 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr10 = DS_PARAM_LOAD 0, 1, 15, 1, implicit $m0, implicit $exec
+ ; GCN-NEXT: $vgpr5 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr6 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr1 = DS_PARAM_LOAD 0, 0, 4, 1, implicit $m0, implicit $exec
+ ; GCN-NEXT: S_ENDPGM 0
+ $vgpr0 = V_MUL_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec
+ $vgpr2 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ $vgpr3 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ $vgpr10 = DS_PARAM_LOAD 0, 1, 15, 1, implicit $m0, implicit $exec
+ $vgpr5 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ $vgpr6 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ $vgpr1 = DS_PARAM_LOAD 0, 0, 4, 1, implicit $m0, implicit $exec
+ S_ENDPGM 0
+...
+
+---
+name: lds_param_load_vmem_war
+body: |
+ bb.0:
+ ; GCN-LABEL: name: lds_param_load_vmem_war
+ ; GCN: $vgpr0 = IMAGE_LOAD_V1_V4 $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 2, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (load (s32))
+ ; GCN-NEXT: $vgpr1 = DS_PARAM_LOAD 0, 0, 15, 0, implicit $m0, implicit $exec
+ ; GCN-NEXT: S_ENDPGM 0
+ $vgpr0 = IMAGE_LOAD_V1_V4 $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 2, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4)
+ $vgpr1 = DS_PARAM_LOAD 0, 0, 15, 1, implicit $m0, implicit $exec
+ S_ENDPGM 0
+...
+
+---
+name: lds_param_load_vmem_war_valu
+body: |
+ bb.0:
+ ; GCN-LABEL: name: lds_param_load_vmem_war_valu
+ ; GCN: $vgpr0 = IMAGE_LOAD_V1_V4 $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 2, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (load (s32))
+ ; GCN-NEXT: $vgpr5 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr1 = DS_PARAM_LOAD 0, 0, 15, 1, implicit $m0, implicit $exec
+ ; GCN-NEXT: S_ENDPGM 0
+ $vgpr0 = IMAGE_LOAD_V1_V4 $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 2, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4)
+ $vgpr5 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
+ $vgpr1 = DS_PARAM_LOAD 0, 0, 15, 1, implicit $m0, implicit $exec
+ S_ENDPGM 0
+...
+
+---
+name: lds_param_load_vmem_war_exp
+body: |
+ bb.0:
+ ; GCN-LABEL: name: lds_param_load_vmem_war_exp
+ ; GCN: $vgpr0 = IMAGE_LOAD_V1_V4 $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 2, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (load (s32))
+ ; GCN-NEXT: EXP 0, $vgpr1, $vgpr1, $vgpr1, $vgpr1, -1, -1, 15, implicit $exec
+ ; GCN-NEXT: $vgpr1 = DS_PARAM_LOAD 0, 0, 15, 1, implicit $m0, implicit $exec
+ ; GCN-NEXT: S_ENDPGM 0
+ $vgpr0 = IMAGE_LOAD_V1_V4 $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 2, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4)
+ EXP 0, $vgpr1, $vgpr1, $vgpr1, $vgpr1, -1, -1, 15, implicit $exec
+ $vgpr1 = DS_PARAM_LOAD 0, 0, 15, 1, implicit $m0, implicit $exec
+ S_ENDPGM 0
+...
+
+---
+name: lds_param_load_vmem_war_waitcnt_depctr
+body: |
+ bb.0:
+ ; GCN-LABEL: name: lds_param_load_vmem_war_waitcnt_depctr
+ ; GCN: $vgpr0 = IMAGE_LOAD_V1_V4 $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 2, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (load (s32))
+ ; GCN-NEXT: S_WAITCNT_DEPCTR 65507
+ ; GCN-NEXT: $vgpr1 = DS_PARAM_LOAD 0, 0, 15, 1, implicit $m0, implicit $exec
+ ; GCN-NEXT: S_ENDPGM 0
+ $vgpr0 = IMAGE_LOAD_V1_V4 $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 2, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4)
+ S_WAITCNT_DEPCTR 65507
+ $vgpr1 = DS_PARAM_LOAD 0, 0, 15, 1, implicit $m0, implicit $exec
+ S_ENDPGM 0
+...
+
+---
+name: lds_param_load_vmem_war_waitcnt_depctr2
+body: |
+ bb.0:
+ ; GCN-LABEL: name: lds_param_load_vmem_war_waitcnt_depctr2
+ ; GCN: $vgpr0 = IMAGE_LOAD_V1_V4 $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 2, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (load (s32))
+ ; GCN-NEXT: S_WAITCNT_DEPCTR 65535
+ ; GCN-NEXT: $vgpr1 = DS_PARAM_LOAD 0, 0, 15, 0, implicit $m0, implicit $exec
+ ; GCN-NEXT: S_ENDPGM 0
+ $vgpr0 = IMAGE_LOAD_V1_V4 $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 2, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4)
+ S_WAITCNT_DEPCTR 65535
+ $vgpr1 = DS_PARAM_LOAD 0, 0, 15, 1, implicit $m0, implicit $exec
+ S_ENDPGM 0
+...
+
+---
+name: lds_direct_load_no_war
+body: |
+ bb.0:
+ ; GCN-LABEL: name: lds_direct_load_no_war
+ ; GCN: $vgpr0 = V_MUL_F32_e32 $vgpr0, $vgpr0, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr1 = DS_DIRECT_LOAD 15, 1, implicit $m0, implicit $exec
+ ; GCN-NEXT: S_ENDPGM 0
+ $vgpr0 = V_MUL_F32_e32 $vgpr0, $vgpr0, implicit $mode, implicit $exec
+ $vgpr1 = DS_DIRECT_LOAD 0, 1, implicit $m0, implicit $exec
+ S_ENDPGM 0
+...
+
+---
+name: lds_direct_load_va_vdst0_war
+body: |
+ bb.0:
+ ; GCN-LABEL: name: lds_direct_load_va_vdst0_war
+ ; GCN: $vgpr0 = V_MUL_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr1 = DS_DIRECT_LOAD 0, 1, implicit $m0, implicit $exec
+ ; GCN-NEXT: S_ENDPGM 0
+ $vgpr0 = V_MUL_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec
+ $vgpr1 = DS_DIRECT_LOAD 15, 1, implicit $m0, implicit $exec
+ S_ENDPGM 0
+...
More information about the llvm-commits
mailing list