[llvm] AMDGPU: Handle vcmpx+permalane gfx950 hazard (PR #117286)
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Mon Nov 25 09:24:22 PST 2024
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/117286
>From 5d1d9656abb688d2e2dc510fb1353bd0fb9891d8 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Mon, 18 Mar 2024 13:45:22 +0530
Subject: [PATCH] AMDGPU: Handle vcmpx+permalane gfx950 hazard
Confusingly, this is a different hazard to the one on gfx10
with a subtarget feature.
---
.../lib/Target/AMDGPU/GCNHazardRecognizer.cpp | 34 ++++-
llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h | 1 +
llvm/test/CodeGen/AMDGPU/hazards-gfx950.mir | 144 ++++++++++++++++++
3 files changed, 175 insertions(+), 4 deletions(-)
create mode 100644 llvm/test/CodeGen/AMDGPU/hazards-gfx950.mir
diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
index 8008b5f7bcc991..97995560842090 100644
--- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
@@ -168,7 +168,11 @@ static bool isPermlane(const MachineInstr &MI) {
Opcode == AMDGPU::V_PERMLANE64_B32 ||
Opcode == AMDGPU::V_PERMLANEX16_B32_e64 ||
Opcode == AMDGPU::V_PERMLANE16_VAR_B32_e64 ||
- Opcode == AMDGPU::V_PERMLANEX16_VAR_B32_e64;
+ Opcode == AMDGPU::V_PERMLANEX16_VAR_B32_e64 ||
+ Opcode == AMDGPU::V_PERMLANE16_SWAP_B32_e32 ||
+ Opcode == AMDGPU::V_PERMLANE16_SWAP_B32_e64 ||
+ Opcode == AMDGPU::V_PERMLANE32_SWAP_B32_e32 ||
+ Opcode == AMDGPU::V_PERMLANE32_SWAP_B32_e64;
}
static bool isLdsDma(const MachineInstr &MI) {
@@ -395,6 +399,9 @@ unsigned GCNHazardRecognizer::PreEmitNoopsCommon(MachineInstr *MI) {
SIInstrInfo::isDS(*MI))
return std::max(WaitStates, checkMAILdStHazards(MI));
+ if (ST.hasGFX950Insts() && isPermlane(*MI))
+ return std::max(WaitStates, checkPermlaneHazards(MI));
+
return WaitStates;
}
@@ -1200,6 +1207,13 @@ void GCNHazardRecognizer::fixHazards(MachineInstr *MI) {
fixRequiredExportPriority(MI);
}
+static bool isVCmpXWritesExec(const SIInstrInfo &TII, const SIRegisterInfo &TRI,
+ const MachineInstr &MI) {
+ return (TII.isVOPC(MI) ||
+ (MI.isCompare() && (TII.isVOP3(MI) || TII.isSDWA(MI)))) &&
+ MI.modifiesRegister(AMDGPU::EXEC, &TRI);
+}
+
bool GCNHazardRecognizer::fixVcmpxPermlaneHazards(MachineInstr *MI) {
if (!ST.hasVcmpxPermlaneHazard() || !isPermlane(*MI))
return false;
@@ -1207,9 +1221,7 @@ bool GCNHazardRecognizer::fixVcmpxPermlaneHazards(MachineInstr *MI) {
const SIInstrInfo *TII = ST.getInstrInfo();
const SIRegisterInfo *TRI = ST.getRegisterInfo();
auto IsHazardFn = [TII, TRI](const MachineInstr &MI) {
- return (TII->isVOPC(MI) ||
- ((TII->isVOP3(MI) || TII->isSDWA(MI)) && MI.isCompare())) &&
- MI.modifiesRegister(AMDGPU::EXEC, TRI);
+ return isVCmpXWritesExec(*TII, *TRI, MI);
};
auto IsExpiredFn = [](const MachineInstr &MI, int) {
@@ -2529,6 +2541,20 @@ int GCNHazardRecognizer::checkMAILdStHazards(MachineInstr *MI) {
return WaitStatesNeeded;
}
+int GCNHazardRecognizer::checkPermlaneHazards(MachineInstr *MI) {
+ assert(!ST.hasVcmpxPermlaneHazard() &&
+ "this is a different vcmpx+permlane hazard");
+ const SIRegisterInfo *TRI = ST.getRegisterInfo();
+ const SIInstrInfo *TII = ST.getInstrInfo();
+
+ auto IsVCmpXWritesExecFn = [TII, TRI](const MachineInstr &MI) {
+ return isVCmpXWritesExec(*TII, *TRI, MI);
+ };
+
+ const int NumWaitStates = 4;
+ return NumWaitStates - getWaitStatesSince(IsVCmpXWritesExecFn, NumWaitStates);
+}
+
static int GFX940_SMFMA_N_PassWriteVgprVALUWawWaitStates(int NumPasses) {
// 2 pass -> 4
// 4 pass -> 6
diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h
index adb2278c48eebe..83ce100c58f0a6 100644
--- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h
+++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h
@@ -134,6 +134,7 @@ class GCNHazardRecognizer final : public ScheduleHazardRecognizer {
int checkMFMAPadding(MachineInstr *MI);
int checkMAIVALUHazards(MachineInstr *MI);
int checkMAILdStHazards(MachineInstr *MI);
+ int checkPermlaneHazards(MachineInstr *MI);
public:
GCNHazardRecognizer(const MachineFunction &MF);
diff --git a/llvm/test/CodeGen/AMDGPU/hazards-gfx950.mir b/llvm/test/CodeGen/AMDGPU/hazards-gfx950.mir
new file mode 100644
index 00000000000000..97bef7be711ff2
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/hazards-gfx950.mir
@@ -0,0 +1,144 @@
+# RUN: llc -mtriple=amdgcn -mcpu=gfx950 -verify-machineinstrs -run-pass=post-RA-hazard-rec %s -o - | FileCheck -check-prefix=GCN %s
+
+---
+# GCN-LABEL: name: vcmpx_vopc_write_exec_permlane16_swap_vop1
+# GCN: V_CMPX_EQ_I32_e32
+# GCN-NEXT: S_NOP 3
+# GCN-NEXT: V_PERMLANE
+name: vcmpx_vopc_write_exec_permlane16_swap_vop1
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1
+ V_CMPX_EQ_I32_e32 $vgpr0, $vgpr1, implicit-def $exec, implicit-def $vcc, implicit $exec
+ renamable $vgpr0, renamable $vgpr1 = V_PERMLANE16_SWAP_B32_e32 killed $vgpr0, killed $vgpr1, implicit $exec
+...
+
+---
+# GCN-LABEL: name: vcmpx_vop3_write_exec_permlane16_swap_vop1
+# GCN: V_CMPX_EQ_I32_e64
+# GCN-NEXT: S_NOP 3
+# GCN-NEXT: V_PERMLANE
+name: vcmpx_vop3_write_exec_permlane16_swap_vop1
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1
+ $exec = V_CMPX_EQ_I32_e64 $vgpr0, $vgpr1, implicit $exec
+ renamable $vgpr0, renamable $vgpr1 = V_PERMLANE16_SWAP_B32_e32 killed $vgpr0, killed $vgpr1, implicit $exec
+...
+
+---
+# GCN-LABEL: name: vcmpx_vopc_write_exec_permlane16_swap_vop3
+# GCN: V_CMPX_EQ_I32_e32
+# GCN-NEXT: S_NOP 3
+# GCN-NEXT: V_PERMLANE
+name: vcmpx_vopc_write_exec_permlane16_swap_vop3
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1
+ V_CMPX_EQ_I32_e32 $vgpr0, $vgpr1, implicit-def $exec, implicit-def $vcc, implicit $exec
+ renamable $vgpr0, renamable $vgpr1 = V_PERMLANE16_SWAP_B32_e64 killed $vgpr0, killed $vgpr1, -1, 1, implicit $exec
+...
+
+---
+# GCN-LABEL: name: vcmpx_vop3_write_exec_permlane16_swap_vop3
+# GCN: V_CMPX_EQ_I32_e64
+# GCN-NEXT: S_NOP 3
+# GCN-NEXT: V_PERMLANE
+name: vcmpx_vop3_write_exec_permlane16_swap_vop3
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1
+ $exec = V_CMPX_EQ_I32_e64 $vgpr0, $vgpr1, implicit $exec
+ renamable $vgpr0, renamable $vgpr1 = V_PERMLANE16_SWAP_B32_e64 killed $vgpr0, killed $vgpr1, -1, 1, implicit $exec
+...
+
+---
+# GCN-LABEL: name: vcmpx_vopc_write_exec_permlane32_swap_vop1
+# GCN: V_CMPX_EQ_I32_e32
+# GCN-NEXT: S_NOP 3
+# GCN-NEXT: V_PERMLANE
+name: vcmpx_vopc_write_exec_permlane32_swap_vop1
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1
+ V_CMPX_EQ_I32_e32 $vgpr0, $vgpr1, implicit-def $exec, implicit-def $vcc, implicit $exec
+ renamable $vgpr0, renamable $vgpr1 = V_PERMLANE32_SWAP_B32_e32 killed $vgpr0, killed $vgpr1, implicit $exec
+...
+
+---
+# GCN-LABEL: name: vcmpx_vop3_write_exec_permlane32_swap_vop1
+# GCN: V_CMPX_EQ_I32_e64
+# GCN-NEXT: S_NOP 3
+# GCN-NEXT: V_PERMLANE
+name: vcmpx_vop3_write_exec_permlane32_swap_vop1
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1
+ $exec = V_CMPX_EQ_I32_e64 $vgpr0, $vgpr1, implicit $exec
+ renamable $vgpr0, renamable $vgpr1 = V_PERMLANE32_SWAP_B32_e32 killed $vgpr0, killed $vgpr1, implicit $exec
+...
+
+---
+# GCN-LABEL: name: vcmpx_vopc_write_exec_permlane32_swap_vop3
+# GCN: V_CMPX_EQ_I32_e32
+# GCN-NEXT: S_NOP 3
+# GCN-NEXT: V_PERMLANE
+name: vcmpx_vopc_write_exec_permlane32_swap_vop3
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1
+ V_CMPX_EQ_I32_e32 $vgpr0, $vgpr1, implicit-def $exec, implicit-def $vcc, implicit $exec
+ renamable $vgpr0, renamable $vgpr1 = V_PERMLANE32_SWAP_B32_e64 killed $vgpr0, killed $vgpr1, -1, 1, implicit $exec
+...
+
+---
+# GCN-LABEL: name: vcmpx_vop3_write_exec_permlane32_swap_vop3
+# GCN: V_CMPX_EQ_I32_e64
+# GCN-NEXT: S_NOP 3
+# GCN-NEXT: V_PERMLANE
+name: vcmpx_vop3_write_exec_permlane32_swap_vop3
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1
+ $exec = V_CMPX_EQ_I32_e64 $vgpr0, $vgpr1, implicit $exec
+ renamable $vgpr0, renamable $vgpr1 = V_PERMLANE32_SWAP_B32_e64 killed $vgpr0, killed $vgpr1, -1, 1, implicit $exec
+...
+
+---
+# GCN-LABEL: name: vcmpx_vopc_write_exec_permlane16_swap_vop1__nowait
+# GCN: V_CMPX_EQ_I32_e32
+# GCN-NEXT: V_MOV_B32
+# GCN-NEXT: V_MOV_B32
+# GCN-NEXT: V_MOV_B32
+# GCN-NEXT: V_MOV_B32
+# GCN-NEXT: V_PERMLANE
+name: vcmpx_vopc_write_exec_permlane16_swap_vop1__nowait
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1
+ V_CMPX_EQ_I32_e32 $vgpr0, $vgpr1, implicit-def $exec, implicit-def $vcc, implicit $exec
+ $vgpr2 = V_MOV_B32_e32 0, implicit $exec
+ $vgpr3 = V_MOV_B32_e32 0, implicit $exec
+ $vgpr4 = V_MOV_B32_e32 0, implicit $exec
+ $vgpr5 = V_MOV_B32_e32 0, implicit $exec
+ renamable $vgpr0, renamable $vgpr1 = V_PERMLANE16_SWAP_B32_e32 killed $vgpr0, killed $vgpr1, implicit $exec
+...
+
+---
+# GCN-LABEL: name: vcmpx_vopc_write_exec_permlane16_swap_vop1__wait1
+# GCN: V_CMPX_EQ_I32_e32
+# GCN-NEXT: V_MOV_B32
+# GCN-NEXT: V_MOV_B32
+# GCN-NEXT: V_MOV_B32
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: V_PERMLANE
+name: vcmpx_vopc_write_exec_permlane16_swap_vop1__wait1
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1
+ V_CMPX_EQ_I32_e32 $vgpr0, $vgpr1, implicit-def $exec, implicit-def $vcc, implicit $exec
+ $vgpr2 = V_MOV_B32_e32 0, implicit $exec
+ $vgpr3 = V_MOV_B32_e32 0, implicit $exec
+ $vgpr4 = V_MOV_B32_e32 0, implicit $exec
+ renamable $vgpr0, renamable $vgpr1 = V_PERMLANE16_SWAP_B32_e32 killed $vgpr0, killed $vgpr1, implicit $exec
+...
More information about the llvm-commits
mailing list