[llvm-branch-commits] [llvm] AMDGPU: Handle vcmpx+permalane gfx950 hazard (PR #117286)

Matt Arsenault via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Fri Nov 22 21:39:02 PST 2024


https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/117286

>From 333e4d806d51da1858eb428e58fbf58b2d554edf Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Mon, 18 Mar 2024 13:45:22 +0530
Subject: [PATCH] AMDGPU: Handle vcmpx+permalane gfx950 hazard

Confusingly, this is a different hazard to the one on gfx10
with a subtarget feature.
---
 .../lib/Target/AMDGPU/GCNHazardRecognizer.cpp |  34 ++++-
 llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h  |   1 +
 llvm/test/CodeGen/AMDGPU/hazards-gfx950.mir   | 144 ++++++++++++++++++
 3 files changed, 175 insertions(+), 4 deletions(-)
 create mode 100644 llvm/test/CodeGen/AMDGPU/hazards-gfx950.mir

diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
index 8008b5f7bcc991..97995560842090 100644
--- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
@@ -168,7 +168,11 @@ static bool isPermlane(const MachineInstr &MI) {
          Opcode == AMDGPU::V_PERMLANE64_B32 ||
          Opcode == AMDGPU::V_PERMLANEX16_B32_e64 ||
          Opcode == AMDGPU::V_PERMLANE16_VAR_B32_e64 ||
-         Opcode == AMDGPU::V_PERMLANEX16_VAR_B32_e64;
+         Opcode == AMDGPU::V_PERMLANEX16_VAR_B32_e64 ||
+         Opcode == AMDGPU::V_PERMLANE16_SWAP_B32_e32 ||
+         Opcode == AMDGPU::V_PERMLANE16_SWAP_B32_e64 ||
+         Opcode == AMDGPU::V_PERMLANE32_SWAP_B32_e32 ||
+         Opcode == AMDGPU::V_PERMLANE32_SWAP_B32_e64;
 }
 
 static bool isLdsDma(const MachineInstr &MI) {
@@ -395,6 +399,9 @@ unsigned GCNHazardRecognizer::PreEmitNoopsCommon(MachineInstr *MI) {
       SIInstrInfo::isDS(*MI))
     return std::max(WaitStates, checkMAILdStHazards(MI));
 
+  if (ST.hasGFX950Insts() && isPermlane(*MI))
+    return std::max(WaitStates, checkPermlaneHazards(MI));
+
   return WaitStates;
 }
 
@@ -1200,6 +1207,13 @@ void GCNHazardRecognizer::fixHazards(MachineInstr *MI) {
   fixRequiredExportPriority(MI);
 }
 
+static bool isVCmpXWritesExec(const SIInstrInfo &TII, const SIRegisterInfo &TRI,
+                              const MachineInstr &MI) {
+  return (TII.isVOPC(MI) ||
+          (MI.isCompare() && (TII.isVOP3(MI) || TII.isSDWA(MI)))) &&
+         MI.modifiesRegister(AMDGPU::EXEC, &TRI);
+}
+
 bool GCNHazardRecognizer::fixVcmpxPermlaneHazards(MachineInstr *MI) {
   if (!ST.hasVcmpxPermlaneHazard() || !isPermlane(*MI))
     return false;
@@ -1207,9 +1221,7 @@ bool GCNHazardRecognizer::fixVcmpxPermlaneHazards(MachineInstr *MI) {
   const SIInstrInfo *TII = ST.getInstrInfo();
   const SIRegisterInfo *TRI = ST.getRegisterInfo();
   auto IsHazardFn = [TII, TRI](const MachineInstr &MI) {
-    return (TII->isVOPC(MI) ||
-            ((TII->isVOP3(MI) || TII->isSDWA(MI)) && MI.isCompare())) &&
-           MI.modifiesRegister(AMDGPU::EXEC, TRI);
+    return isVCmpXWritesExec(*TII, *TRI, MI);
   };
 
   auto IsExpiredFn = [](const MachineInstr &MI, int) {
@@ -2529,6 +2541,20 @@ int GCNHazardRecognizer::checkMAILdStHazards(MachineInstr *MI) {
   return WaitStatesNeeded;
 }
 
+int GCNHazardRecognizer::checkPermlaneHazards(MachineInstr *MI) {
+  assert(!ST.hasVcmpxPermlaneHazard() &&
+         "this is a different vcmpx+permlane hazard");
+  const SIRegisterInfo *TRI = ST.getRegisterInfo();
+  const SIInstrInfo *TII = ST.getInstrInfo();
+
+  auto IsVCmpXWritesExecFn = [TII, TRI](const MachineInstr &MI) {
+    return isVCmpXWritesExec(*TII, *TRI, MI);
+  };
+
+  const int NumWaitStates = 4;
+  return NumWaitStates - getWaitStatesSince(IsVCmpXWritesExecFn, NumWaitStates);
+}
+
 static int GFX940_SMFMA_N_PassWriteVgprVALUWawWaitStates(int NumPasses) {
   // 2 pass -> 4
   // 4 pass -> 6
diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h
index adb2278c48eebe..83ce100c58f0a6 100644
--- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h
+++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h
@@ -134,6 +134,7 @@ class GCNHazardRecognizer final : public ScheduleHazardRecognizer {
   int checkMFMAPadding(MachineInstr *MI);
   int checkMAIVALUHazards(MachineInstr *MI);
   int checkMAILdStHazards(MachineInstr *MI);
+  int checkPermlaneHazards(MachineInstr *MI);
 
 public:
   GCNHazardRecognizer(const MachineFunction &MF);
diff --git a/llvm/test/CodeGen/AMDGPU/hazards-gfx950.mir b/llvm/test/CodeGen/AMDGPU/hazards-gfx950.mir
new file mode 100644
index 00000000000000..97bef7be711ff2
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/hazards-gfx950.mir
@@ -0,0 +1,144 @@
+# RUN: llc -mtriple=amdgcn -mcpu=gfx950 -verify-machineinstrs -run-pass=post-RA-hazard-rec %s -o - | FileCheck -check-prefix=GCN %s
+
+---
+# GCN-LABEL: name: vcmpx_vopc_write_exec_permlane16_swap_vop1
+# GCN:      V_CMPX_EQ_I32_e32
+# GCN-NEXT: S_NOP 3
+# GCN-NEXT: V_PERMLANE
+name:            vcmpx_vopc_write_exec_permlane16_swap_vop1
+body:             |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+    V_CMPX_EQ_I32_e32 $vgpr0, $vgpr1, implicit-def $exec, implicit-def $vcc, implicit $exec
+    renamable $vgpr0, renamable $vgpr1 = V_PERMLANE16_SWAP_B32_e32 killed $vgpr0, killed $vgpr1, implicit $exec
+...
+
+---
+# GCN-LABEL: name: vcmpx_vop3_write_exec_permlane16_swap_vop1
+# GCN:      V_CMPX_EQ_I32_e64
+# GCN-NEXT: S_NOP 3
+# GCN-NEXT: V_PERMLANE
+name:            vcmpx_vop3_write_exec_permlane16_swap_vop1
+body:             |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+    $exec = V_CMPX_EQ_I32_e64 $vgpr0, $vgpr1, implicit $exec
+    renamable $vgpr0, renamable $vgpr1 = V_PERMLANE16_SWAP_B32_e32 killed $vgpr0, killed $vgpr1, implicit $exec
+...
+
+---
+# GCN-LABEL: name: vcmpx_vopc_write_exec_permlane16_swap_vop3
+# GCN:      V_CMPX_EQ_I32_e32
+# GCN-NEXT: S_NOP 3
+# GCN-NEXT: V_PERMLANE
+name:            vcmpx_vopc_write_exec_permlane16_swap_vop3
+body:             |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+    V_CMPX_EQ_I32_e32 $vgpr0, $vgpr1, implicit-def $exec, implicit-def $vcc, implicit $exec
+    renamable $vgpr0, renamable $vgpr1 = V_PERMLANE16_SWAP_B32_e64 killed $vgpr0, killed $vgpr1, -1, 1, implicit $exec
+...
+
+---
+# GCN-LABEL: name: vcmpx_vop3_write_exec_permlane16_swap_vop3
+# GCN:      V_CMPX_EQ_I32_e64
+# GCN-NEXT: S_NOP 3
+# GCN-NEXT: V_PERMLANE
+name:            vcmpx_vop3_write_exec_permlane16_swap_vop3
+body:             |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+    $exec = V_CMPX_EQ_I32_e64 $vgpr0, $vgpr1, implicit $exec
+    renamable $vgpr0, renamable $vgpr1 = V_PERMLANE16_SWAP_B32_e64 killed $vgpr0, killed $vgpr1, -1, 1, implicit $exec
+...
+
+---
+# GCN-LABEL: name: vcmpx_vopc_write_exec_permlane32_swap_vop1
+# GCN:      V_CMPX_EQ_I32_e32
+# GCN-NEXT: S_NOP 3
+# GCN-NEXT: V_PERMLANE
+name:            vcmpx_vopc_write_exec_permlane32_swap_vop1
+body:             |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+    V_CMPX_EQ_I32_e32 $vgpr0, $vgpr1, implicit-def $exec, implicit-def $vcc, implicit $exec
+    renamable $vgpr0, renamable $vgpr1 = V_PERMLANE32_SWAP_B32_e32 killed $vgpr0, killed $vgpr1, implicit $exec
+...
+
+---
+# GCN-LABEL: name: vcmpx_vop3_write_exec_permlane32_swap_vop1
+# GCN:      V_CMPX_EQ_I32_e64
+# GCN-NEXT: S_NOP 3
+# GCN-NEXT: V_PERMLANE
+name:            vcmpx_vop3_write_exec_permlane32_swap_vop1
+body:             |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+    $exec = V_CMPX_EQ_I32_e64 $vgpr0, $vgpr1, implicit $exec
+    renamable $vgpr0, renamable $vgpr1 = V_PERMLANE32_SWAP_B32_e32 killed $vgpr0, killed $vgpr1, implicit $exec
+...
+
+---
+# GCN-LABEL: name: vcmpx_vopc_write_exec_permlane32_swap_vop3
+# GCN:      V_CMPX_EQ_I32_e32
+# GCN-NEXT: S_NOP 3
+# GCN-NEXT: V_PERMLANE
+name:            vcmpx_vopc_write_exec_permlane32_swap_vop3
+body:             |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+    V_CMPX_EQ_I32_e32 $vgpr0, $vgpr1, implicit-def $exec, implicit-def $vcc, implicit $exec
+    renamable $vgpr0, renamable $vgpr1 = V_PERMLANE32_SWAP_B32_e64 killed $vgpr0, killed $vgpr1, -1, 1, implicit $exec
+...
+
+---
+# GCN-LABEL: name: vcmpx_vop3_write_exec_permlane32_swap_vop3
+# GCN:      V_CMPX_EQ_I32_e64
+# GCN-NEXT: S_NOP 3
+# GCN-NEXT: V_PERMLANE
+name:            vcmpx_vop3_write_exec_permlane32_swap_vop3
+body:             |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+    $exec = V_CMPX_EQ_I32_e64 $vgpr0, $vgpr1, implicit $exec
+    renamable $vgpr0, renamable $vgpr1 = V_PERMLANE32_SWAP_B32_e64 killed $vgpr0, killed $vgpr1, -1, 1, implicit $exec
+...
+
+---
+# GCN-LABEL: name: vcmpx_vopc_write_exec_permlane16_swap_vop1__nowait
+# GCN:      V_CMPX_EQ_I32_e32
+# GCN-NEXT: V_MOV_B32
+# GCN-NEXT: V_MOV_B32
+# GCN-NEXT: V_MOV_B32
+# GCN-NEXT: V_MOV_B32
+# GCN-NEXT: V_PERMLANE
+name:            vcmpx_vopc_write_exec_permlane16_swap_vop1__nowait
+body:             |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+    V_CMPX_EQ_I32_e32 $vgpr0, $vgpr1, implicit-def $exec, implicit-def $vcc, implicit $exec
+    $vgpr2 = V_MOV_B32_e32 0, implicit $exec
+    $vgpr3 = V_MOV_B32_e32 0, implicit $exec
+    $vgpr4 = V_MOV_B32_e32 0, implicit $exec
+    $vgpr5 = V_MOV_B32_e32 0, implicit $exec
+    renamable $vgpr0, renamable $vgpr1 = V_PERMLANE16_SWAP_B32_e32 killed $vgpr0, killed $vgpr1, implicit $exec
+...
+
+---
+# GCN-LABEL: name: vcmpx_vopc_write_exec_permlane16_swap_vop1__wait1
+# GCN:      V_CMPX_EQ_I32_e32
+# GCN-NEXT: V_MOV_B32
+# GCN-NEXT: V_MOV_B32
+# GCN-NEXT: V_MOV_B32
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: V_PERMLANE
+name:            vcmpx_vopc_write_exec_permlane16_swap_vop1__wait1
+body:             |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+    V_CMPX_EQ_I32_e32 $vgpr0, $vgpr1, implicit-def $exec, implicit-def $vcc, implicit $exec
+    $vgpr2 = V_MOV_B32_e32 0, implicit $exec
+    $vgpr3 = V_MOV_B32_e32 0, implicit $exec
+    $vgpr4 = V_MOV_B32_e32 0, implicit $exec
+    renamable $vgpr0, renamable $vgpr1 = V_PERMLANE16_SWAP_B32_e32 killed $vgpr0, killed $vgpr1, implicit $exec
+...



More information about the llvm-branch-commits mailing list