[llvm] [AMDGPU] Merge consecutive wait_alu instruction (PR #128916)
Ana Mihajlovic via llvm-commits
llvm-commits at lists.llvm.org
Wed Feb 26 09:31:13 PST 2025
https://github.com/mihajlovicana updated https://github.com/llvm/llvm-project/pull/128916
>From 2c77fd3998321dde8b0a221fc4ff544b014916dc Mon Sep 17 00:00:00 2001
From: Ana Mihajlovic <Ana.Mihajlovic at amd.com>
Date: Wed, 26 Feb 2025 18:20:55 +0100
Subject: [PATCH 1/2] merge consecutive wait_alu instructions
---
.../Target/AMDGPU/AMDGPUWaitSGPRHazards.cpp | 22 ++++++++++++++
.../AMDGPU/merge-consecutive-wait-alus.mir | 30 +++++++++++++++++++
2 files changed, 52 insertions(+)
create mode 100644 llvm/test/CodeGen/AMDGPU/merge-consecutive-wait-alus.mir
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUWaitSGPRHazards.cpp b/llvm/lib/Target/AMDGPU/AMDGPUWaitSGPRHazards.cpp
index 4df55eac5d76b..bb15d12ada650 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUWaitSGPRHazards.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUWaitSGPRHazards.cpp
@@ -164,6 +164,21 @@ class AMDGPUWaitSGPRHazards {
BuildMI(MBB, MI, MI->getDebugLoc(), TII->get(AMDGPU::DS_NOP));
}
+ unsigned mergeMasks(unsigned Mask1, unsigned Mask2) {
+ unsigned Mask = Mask1 & Mask2;
+
+ Mask = AMDGPU::DepCtr::encodeFieldVmVsrc(
+ Mask, std::min(AMDGPU::DepCtr::decodeFieldVmVsrc(Mask1),
+ AMDGPU::DepCtr::decodeFieldVmVsrc(Mask2)));
+ Mask = AMDGPU::DepCtr::encodeFieldVaSdst(
+ Mask, std::min(AMDGPU::DepCtr::decodeFieldVaSdst(Mask1),
+ AMDGPU::DepCtr::decodeFieldVaSdst(Mask2)));
+ Mask = AMDGPU::DepCtr::encodeFieldVaVdst(
+ Mask, std::min(AMDGPU::DepCtr::decodeFieldVaVdst(Mask1),
+ AMDGPU::DepCtr::decodeFieldVaVdst(Mask2)));
+ return Mask;
+ }
+
bool runOnMachineBasicBlock(MachineBasicBlock &MBB, bool Emit) {
enum { WA_VALU = 0x1, WA_SALU = 0x2, WA_VCC = 0x4 };
@@ -362,6 +377,13 @@ class AMDGPUWaitSGPRHazards {
Mask = AMDGPU::DepCtr::encodeFieldVaSdst(Mask, 0);
}
if (Emit) {
+ if (MI != MI->getParent()->begin()) {
+ MachineInstr &PrevMI = *std::prev(MI);
+ if (PrevMI.getOpcode() == AMDGPU::S_WAITCNT_DEPCTR) {
+ Mask = mergeMasks(Mask, PrevMI.getOperand(0).getImm());
+ PrevMI.eraseFromParent();
+ }
+ }
auto NewMI = BuildMI(MBB, MI, MI->getDebugLoc(),
TII->get(AMDGPU::S_WAITCNT_DEPCTR))
.addImm(Mask);
diff --git a/llvm/test/CodeGen/AMDGPU/merge-consecutive-wait-alus.mir b/llvm/test/CodeGen/AMDGPU/merge-consecutive-wait-alus.mir
new file mode 100644
index 0000000000000..0cd203e6a9bbb
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/merge-consecutive-wait-alus.mir
@@ -0,0 +1,30 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs -run-pass amdgpu-wait-sgpr-hazards -o - %s | FileCheck %s
+
+
+---
+name: merge_consecutive_wait_alus
+exposesReturnsTwice: false
+legalized: false
+regBankSelected: false
+selected: false
+failedISel: false
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ ; CHECK-LABEL: name: merge_consecutive_wait_alus
+ ; CHECK: liveins: $vgpr0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: renamable $sgpr0 = V_CMP_NE_U32_e64 0, $vgpr0, implicit $exec, implicit-def $vcc_lo, implicit-def $vcc_lo
+ ; CHECK-NEXT: S_WAITCNT_DEPCTR 61946
+ ; CHECK-NEXT: renamable $vgpr0 = V_CNDMASK_B32_e64 0, -1, 0, killed $vgpr0, killed $sgpr0, implicit $exec, implicit-def $vcc_lo
+ renamable $sgpr0 = V_CMP_NE_U32_e64 0, $vgpr0, implicit $exec, implicit-def $vcc_lo, implicit-def $vcc
+ S_WAITCNT_DEPCTR 65530
+ renamable $vgpr0 = V_CNDMASK_B32_e64 0, -1, 0, killed $vgpr0, killed $sgpr0, implicit $exec, implicit-def $vcc
+...
+
+
+## NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+# CHECK: {{.*}}
>From 2eb2045b009a3332ea68931022e07ccd8f484828 Mon Sep 17 00:00:00 2001
From: Ana Mihajlovic <Ana.Mihajlovic at amd.com>
Date: Wed, 26 Feb 2025 18:30:41 +0100
Subject: [PATCH 2/2] update test
---
llvm/test/CodeGen/AMDGPU/merge-consecutive-wait-alus.mir | 6 ------
1 file changed, 6 deletions(-)
diff --git a/llvm/test/CodeGen/AMDGPU/merge-consecutive-wait-alus.mir b/llvm/test/CodeGen/AMDGPU/merge-consecutive-wait-alus.mir
index 0cd203e6a9bbb..ff3c60d98da11 100644
--- a/llvm/test/CodeGen/AMDGPU/merge-consecutive-wait-alus.mir
+++ b/llvm/test/CodeGen/AMDGPU/merge-consecutive-wait-alus.mir
@@ -4,12 +4,6 @@
---
name: merge_consecutive_wait_alus
-exposesReturnsTwice: false
-legalized: false
-regBankSelected: false
-selected: false
-failedISel: false
-tracksRegLiveness: true
body: |
bb.0:
liveins: $vgpr0
More information about the llvm-commits
mailing list