[llvm-branch-commits] [llvm] AMDGPU: Handle vcmpx+permalane gfx950 hazard (PR #117286)
via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Thu Nov 21 20:48:11 PST 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
Author: Matt Arsenault (arsenm)
<details>
<summary>Changes</summary>
Confusingly, this is a different hazard to the one on gfx10
with a subtarget feature.
---
Full diff: https://github.com/llvm/llvm-project/pull/117286.diff
3 Files Affected:
- (modified) llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp (+31-4)
- (modified) llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h (+1)
- (added) llvm/test/CodeGen/AMDGPU/hazards-gfx950.mir (+144)
``````````diff
diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
index 8008b5f7bcc991..45ff1f4a63cf03 100644
--- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
@@ -168,7 +168,11 @@ static bool isPermlane(const MachineInstr &MI) {
Opcode == AMDGPU::V_PERMLANE64_B32 ||
Opcode == AMDGPU::V_PERMLANEX16_B32_e64 ||
Opcode == AMDGPU::V_PERMLANE16_VAR_B32_e64 ||
- Opcode == AMDGPU::V_PERMLANEX16_VAR_B32_e64;
+ Opcode == AMDGPU::V_PERMLANEX16_VAR_B32_e64 ||
+ Opcode == AMDGPU::V_PERMLANE16_SWAP_B32_e32 ||
+ Opcode == AMDGPU::V_PERMLANE16_SWAP_B32_e64 ||
+ Opcode == AMDGPU::V_PERMLANE32_SWAP_B32_e32 ||
+ Opcode == AMDGPU::V_PERMLANE32_SWAP_B32_e64;
}
static bool isLdsDma(const MachineInstr &MI) {
@@ -395,6 +399,9 @@ unsigned GCNHazardRecognizer::PreEmitNoopsCommon(MachineInstr *MI) {
SIInstrInfo::isDS(*MI))
return std::max(WaitStates, checkMAILdStHazards(MI));
+ if (ST.hasGFX950Insts() && isPermlane(*MI))
+ return std::max(WaitStates, checkPermlaneHazards(MI));
+
return WaitStates;
}
@@ -1200,6 +1207,14 @@ void GCNHazardRecognizer::fixHazards(MachineInstr *MI) {
fixRequiredExportPriority(MI);
}
+static bool isVCmpXWritesExec(const SIInstrInfo &TII,
+ const SIRegisterInfo &TRI,
+ const MachineInstr &MI) {
+ return (TII.isVOPC(MI) ||
+ (MI.isCompare() && (TII.isVOP3(MI) || TII.isSDWA(MI)))) &&
+ MI.modifiesRegister(AMDGPU::EXEC, &TRI);
+}
+
bool GCNHazardRecognizer::fixVcmpxPermlaneHazards(MachineInstr *MI) {
if (!ST.hasVcmpxPermlaneHazard() || !isPermlane(*MI))
return false;
@@ -1207,9 +1222,7 @@ bool GCNHazardRecognizer::fixVcmpxPermlaneHazards(MachineInstr *MI) {
const SIInstrInfo *TII = ST.getInstrInfo();
const SIRegisterInfo *TRI = ST.getRegisterInfo();
auto IsHazardFn = [TII, TRI](const MachineInstr &MI) {
- return (TII->isVOPC(MI) ||
- ((TII->isVOP3(MI) || TII->isSDWA(MI)) && MI.isCompare())) &&
- MI.modifiesRegister(AMDGPU::EXEC, TRI);
+ return isVCmpXWritesExec(*TII, *TRI, MI);
};
auto IsExpiredFn = [](const MachineInstr &MI, int) {
@@ -2529,6 +2542,20 @@ int GCNHazardRecognizer::checkMAILdStHazards(MachineInstr *MI) {
return WaitStatesNeeded;
}
+int GCNHazardRecognizer::checkPermlaneHazards(MachineInstr *MI) {
+ assert(!ST.hasVcmpxPermlaneHazard() &&
+ "this is a different vcmpx+permlane hazard");
+ const SIRegisterInfo *TRI = ST.getRegisterInfo();
+ const SIInstrInfo *TII = ST.getInstrInfo();
+
+ auto IsVCmpXWritesExecFn = [TII, TRI](const MachineInstr &MI) {
+ return isVCmpXWritesExec(*TII, *TRI, MI);
+ };
+
+ const int NumWaitStates = 4;
+ return NumWaitStates - getWaitStatesSince(IsVCmpXWritesExecFn, NumWaitStates);
+}
+
static int GFX940_SMFMA_N_PassWriteVgprVALUWawWaitStates(int NumPasses) {
// 2 pass -> 4
// 4 pass -> 6
diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h
index adb2278c48eebe..83ce100c58f0a6 100644
--- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h
+++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h
@@ -134,6 +134,7 @@ class GCNHazardRecognizer final : public ScheduleHazardRecognizer {
int checkMFMAPadding(MachineInstr *MI);
int checkMAIVALUHazards(MachineInstr *MI);
int checkMAILdStHazards(MachineInstr *MI);
+ int checkPermlaneHazards(MachineInstr *MI);
public:
GCNHazardRecognizer(const MachineFunction &MF);
diff --git a/llvm/test/CodeGen/AMDGPU/hazards-gfx950.mir b/llvm/test/CodeGen/AMDGPU/hazards-gfx950.mir
new file mode 100644
index 00000000000000..97bef7be711ff2
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/hazards-gfx950.mir
@@ -0,0 +1,144 @@
+# RUN: llc -mtriple=amdgcn -mcpu=gfx950 -verify-machineinstrs -run-pass=post-RA-hazard-rec %s -o - | FileCheck -check-prefix=GCN %s
+
+---
+# GCN-LABEL: name: vcmpx_vopc_write_exec_permlane16_swap_vop1
+# GCN: V_CMPX_EQ_I32_e32
+# GCN-NEXT: S_NOP 3
+# GCN-NEXT: V_PERMLANE
+name: vcmpx_vopc_write_exec_permlane16_swap_vop1
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1
+ V_CMPX_EQ_I32_e32 $vgpr0, $vgpr1, implicit-def $exec, implicit-def $vcc, implicit $exec
+ renamable $vgpr0, renamable $vgpr1 = V_PERMLANE16_SWAP_B32_e32 killed $vgpr0, killed $vgpr1, implicit $exec
+...
+
+---
+# GCN-LABEL: name: vcmpx_vop3_write_exec_permlane16_swap_vop1
+# GCN: V_CMPX_EQ_I32_e64
+# GCN-NEXT: S_NOP 3
+# GCN-NEXT: V_PERMLANE
+name: vcmpx_vop3_write_exec_permlane16_swap_vop1
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1
+ $exec = V_CMPX_EQ_I32_e64 $vgpr0, $vgpr1, implicit $exec
+ renamable $vgpr0, renamable $vgpr1 = V_PERMLANE16_SWAP_B32_e32 killed $vgpr0, killed $vgpr1, implicit $exec
+...
+
+---
+# GCN-LABEL: name: vcmpx_vopc_write_exec_permlane16_swap_vop3
+# GCN: V_CMPX_EQ_I32_e32
+# GCN-NEXT: S_NOP 3
+# GCN-NEXT: V_PERMLANE
+name: vcmpx_vopc_write_exec_permlane16_swap_vop3
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1
+ V_CMPX_EQ_I32_e32 $vgpr0, $vgpr1, implicit-def $exec, implicit-def $vcc, implicit $exec
+ renamable $vgpr0, renamable $vgpr1 = V_PERMLANE16_SWAP_B32_e64 killed $vgpr0, killed $vgpr1, -1, 1, implicit $exec
+...
+
+---
+# GCN-LABEL: name: vcmpx_vop3_write_exec_permlane16_swap_vop3
+# GCN: V_CMPX_EQ_I32_e64
+# GCN-NEXT: S_NOP 3
+# GCN-NEXT: V_PERMLANE
+name: vcmpx_vop3_write_exec_permlane16_swap_vop3
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1
+ $exec = V_CMPX_EQ_I32_e64 $vgpr0, $vgpr1, implicit $exec
+ renamable $vgpr0, renamable $vgpr1 = V_PERMLANE16_SWAP_B32_e64 killed $vgpr0, killed $vgpr1, -1, 1, implicit $exec
+...
+
+---
+# GCN-LABEL: name: vcmpx_vopc_write_exec_permlane32_swap_vop1
+# GCN: V_CMPX_EQ_I32_e32
+# GCN-NEXT: S_NOP 3
+# GCN-NEXT: V_PERMLANE
+name: vcmpx_vopc_write_exec_permlane32_swap_vop1
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1
+ V_CMPX_EQ_I32_e32 $vgpr0, $vgpr1, implicit-def $exec, implicit-def $vcc, implicit $exec
+ renamable $vgpr0, renamable $vgpr1 = V_PERMLANE32_SWAP_B32_e32 killed $vgpr0, killed $vgpr1, implicit $exec
+...
+
+---
+# GCN-LABEL: name: vcmpx_vop3_write_exec_permlane32_swap_vop1
+# GCN: V_CMPX_EQ_I32_e64
+# GCN-NEXT: S_NOP 3
+# GCN-NEXT: V_PERMLANE
+name: vcmpx_vop3_write_exec_permlane32_swap_vop1
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1
+ $exec = V_CMPX_EQ_I32_e64 $vgpr0, $vgpr1, implicit $exec
+ renamable $vgpr0, renamable $vgpr1 = V_PERMLANE32_SWAP_B32_e32 killed $vgpr0, killed $vgpr1, implicit $exec
+...
+
+---
+# GCN-LABEL: name: vcmpx_vopc_write_exec_permlane32_swap_vop3
+# GCN: V_CMPX_EQ_I32_e32
+# GCN-NEXT: S_NOP 3
+# GCN-NEXT: V_PERMLANE
+name: vcmpx_vopc_write_exec_permlane32_swap_vop3
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1
+ V_CMPX_EQ_I32_e32 $vgpr0, $vgpr1, implicit-def $exec, implicit-def $vcc, implicit $exec
+ renamable $vgpr0, renamable $vgpr1 = V_PERMLANE32_SWAP_B32_e64 killed $vgpr0, killed $vgpr1, -1, 1, implicit $exec
+...
+
+---
+# GCN-LABEL: name: vcmpx_vop3_write_exec_permlane32_swap_vop3
+# GCN: V_CMPX_EQ_I32_e64
+# GCN-NEXT: S_NOP 3
+# GCN-NEXT: V_PERMLANE
+name: vcmpx_vop3_write_exec_permlane32_swap_vop3
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1
+ $exec = V_CMPX_EQ_I32_e64 $vgpr0, $vgpr1, implicit $exec
+ renamable $vgpr0, renamable $vgpr1 = V_PERMLANE32_SWAP_B32_e64 killed $vgpr0, killed $vgpr1, -1, 1, implicit $exec
+...
+
+---
+# GCN-LABEL: name: vcmpx_vopc_write_exec_permlane16_swap_vop1__nowait
+# GCN: V_CMPX_EQ_I32_e32
+# GCN-NEXT: V_MOV_B32
+# GCN-NEXT: V_MOV_B32
+# GCN-NEXT: V_MOV_B32
+# GCN-NEXT: V_MOV_B32
+# GCN-NEXT: V_PERMLANE
+name: vcmpx_vopc_write_exec_permlane16_swap_vop1__nowait
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1
+ V_CMPX_EQ_I32_e32 $vgpr0, $vgpr1, implicit-def $exec, implicit-def $vcc, implicit $exec
+ $vgpr2 = V_MOV_B32_e32 0, implicit $exec
+ $vgpr3 = V_MOV_B32_e32 0, implicit $exec
+ $vgpr4 = V_MOV_B32_e32 0, implicit $exec
+ $vgpr5 = V_MOV_B32_e32 0, implicit $exec
+ renamable $vgpr0, renamable $vgpr1 = V_PERMLANE16_SWAP_B32_e32 killed $vgpr0, killed $vgpr1, implicit $exec
+...
+
+---
+# GCN-LABEL: name: vcmpx_vopc_write_exec_permlane16_swap_vop1__wait1
+# GCN: V_CMPX_EQ_I32_e32
+# GCN-NEXT: V_MOV_B32
+# GCN-NEXT: V_MOV_B32
+# GCN-NEXT: V_MOV_B32
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: V_PERMLANE
+name: vcmpx_vopc_write_exec_permlane16_swap_vop1__wait1
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1
+ V_CMPX_EQ_I32_e32 $vgpr0, $vgpr1, implicit-def $exec, implicit-def $vcc, implicit $exec
+ $vgpr2 = V_MOV_B32_e32 0, implicit $exec
+ $vgpr3 = V_MOV_B32_e32 0, implicit $exec
+ $vgpr4 = V_MOV_B32_e32 0, implicit $exec
+ renamable $vgpr0, renamable $vgpr1 = V_PERMLANE16_SWAP_B32_e32 killed $vgpr0, killed $vgpr1, implicit $exec
+...
``````````
</details>
https://github.com/llvm/llvm-project/pull/117286
More information about the llvm-branch-commits
mailing list