[llvm] [AMDGPU] Merge consecutive wait_alu instruction (PR #128916)
Ana Mihajlovic via llvm-commits
llvm-commits at lists.llvm.org
Tue Mar 11 02:43:13 PDT 2025
https://github.com/mihajlovicana updated https://github.com/llvm/llvm-project/pull/128916
>From 5752242a14579fcc7973226e6da712548f8dddc2 Mon Sep 17 00:00:00 2001
From: Ana Mihajlovic <Ana.Mihajlovic at amd.com>
Date: Wed, 26 Feb 2025 18:20:55 +0100
Subject: [PATCH 1/4] merge consecutive wait_alu instructions
---
.../Target/AMDGPU/AMDGPUWaitSGPRHazards.cpp | 22 ++++++++++++++
.../AMDGPU/merge-consecutive-wait-alus.mir | 30 +++++++++++++++++++
2 files changed, 52 insertions(+)
create mode 100644 llvm/test/CodeGen/AMDGPU/merge-consecutive-wait-alus.mir
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUWaitSGPRHazards.cpp b/llvm/lib/Target/AMDGPU/AMDGPUWaitSGPRHazards.cpp
index 4df55eac5d76b..bb15d12ada650 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUWaitSGPRHazards.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUWaitSGPRHazards.cpp
@@ -164,6 +164,21 @@ class AMDGPUWaitSGPRHazards {
BuildMI(MBB, MI, MI->getDebugLoc(), TII->get(AMDGPU::DS_NOP));
}
+ unsigned mergeMasks(unsigned Mask1, unsigned Mask2) {
+ unsigned Mask = Mask1 & Mask2;
+
+ Mask = AMDGPU::DepCtr::encodeFieldVmVsrc(
+ Mask, std::min(AMDGPU::DepCtr::decodeFieldVmVsrc(Mask1),
+ AMDGPU::DepCtr::decodeFieldVmVsrc(Mask2)));
+ Mask = AMDGPU::DepCtr::encodeFieldVaSdst(
+ Mask, std::min(AMDGPU::DepCtr::decodeFieldVaSdst(Mask1),
+ AMDGPU::DepCtr::decodeFieldVaSdst(Mask2)));
+ Mask = AMDGPU::DepCtr::encodeFieldVaVdst(
+ Mask, std::min(AMDGPU::DepCtr::decodeFieldVaVdst(Mask1),
+ AMDGPU::DepCtr::decodeFieldVaVdst(Mask2)));
+ return Mask;
+ }
+
bool runOnMachineBasicBlock(MachineBasicBlock &MBB, bool Emit) {
enum { WA_VALU = 0x1, WA_SALU = 0x2, WA_VCC = 0x4 };
@@ -362,6 +377,13 @@ class AMDGPUWaitSGPRHazards {
Mask = AMDGPU::DepCtr::encodeFieldVaSdst(Mask, 0);
}
if (Emit) {
+ if (MI != MI->getParent()->begin()) {
+ MachineInstr &PrevMI = *std::prev(MI);
+ if (PrevMI.getOpcode() == AMDGPU::S_WAITCNT_DEPCTR) {
+ Mask = mergeMasks(Mask, PrevMI.getOperand(0).getImm());
+ PrevMI.eraseFromParent();
+ }
+ }
auto NewMI = BuildMI(MBB, MI, MI->getDebugLoc(),
TII->get(AMDGPU::S_WAITCNT_DEPCTR))
.addImm(Mask);
diff --git a/llvm/test/CodeGen/AMDGPU/merge-consecutive-wait-alus.mir b/llvm/test/CodeGen/AMDGPU/merge-consecutive-wait-alus.mir
new file mode 100644
index 0000000000000..0cd203e6a9bbb
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/merge-consecutive-wait-alus.mir
@@ -0,0 +1,30 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs -run-pass amdgpu-wait-sgpr-hazards -o - %s | FileCheck %s
+
+
+---
+name: merge_consecutive_wait_alus
+exposesReturnsTwice: false
+legalized: false
+regBankSelected: false
+selected: false
+failedISel: false
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ ; CHECK-LABEL: name: merge_consecutive_wait_alus
+ ; CHECK: liveins: $vgpr0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: renamable $sgpr0 = V_CMP_NE_U32_e64 0, $vgpr0, implicit $exec, implicit-def $vcc_lo, implicit-def $vcc_lo
+ ; CHECK-NEXT: S_WAITCNT_DEPCTR 61946
+ ; CHECK-NEXT: renamable $vgpr0 = V_CNDMASK_B32_e64 0, -1, 0, killed $vgpr0, killed $sgpr0, implicit $exec, implicit-def $vcc_lo
+ renamable $sgpr0 = V_CMP_NE_U32_e64 0, $vgpr0, implicit $exec, implicit-def $vcc_lo, implicit-def $vcc
+ S_WAITCNT_DEPCTR 65530
+ renamable $vgpr0 = V_CNDMASK_B32_e64 0, -1, 0, killed $vgpr0, killed $sgpr0, implicit $exec, implicit-def $vcc
+...
+
+
+## NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+# CHECK: {{.*}}
>From 8d02e56fb732c151b9e136377e2fe2c6c42ad2e3 Mon Sep 17 00:00:00 2001
From: Ana Mihajlovic <Ana.Mihajlovic at amd.com>
Date: Thu, 6 Mar 2025 15:23:34 +0100
Subject: [PATCH 2/4] skip debug instructions
---
.../Target/AMDGPU/AMDGPUWaitSGPRHazards.cpp | 29 +++++++--
.../Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp | 36 +++++++++++
llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h | 18 ++++++
.../AMDGPU/merge-consecutive-wait-alus.mir | 64 ++++++++++++++++---
4 files changed, 132 insertions(+), 15 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUWaitSGPRHazards.cpp b/llvm/lib/Target/AMDGPU/AMDGPUWaitSGPRHazards.cpp
index bb15d12ada650..43109be2c23f6 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUWaitSGPRHazards.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUWaitSGPRHazards.cpp
@@ -165,8 +165,13 @@ class AMDGPUWaitSGPRHazards {
}
unsigned mergeMasks(unsigned Mask1, unsigned Mask2) {
- unsigned Mask = Mask1 & Mask2;
-
+ unsigned Mask = 0xffff;
+ Mask = AMDGPU::DepCtr::encodeFieldSaSdst(
+ Mask, std::min(AMDGPU::DepCtr::decodeFieldSaSdst(Mask1),
+ AMDGPU::DepCtr::decodeFieldSaSdst(Mask2)));
+ Mask = AMDGPU::DepCtr::encodeFieldVaVcc(
+ Mask, std::min(AMDGPU::DepCtr::decodeFieldVaVcc(Mask1),
+ AMDGPU::DepCtr::decodeFieldVaVcc(Mask2)));
Mask = AMDGPU::DepCtr::encodeFieldVmVsrc(
Mask, std::min(AMDGPU::DepCtr::decodeFieldVmVsrc(Mask1),
AMDGPU::DepCtr::decodeFieldVmVsrc(Mask2)));
@@ -176,6 +181,12 @@ class AMDGPUWaitSGPRHazards {
Mask = AMDGPU::DepCtr::encodeFieldVaVdst(
Mask, std::min(AMDGPU::DepCtr::decodeFieldVaVdst(Mask1),
AMDGPU::DepCtr::decodeFieldVaVdst(Mask2)));
+ Mask = AMDGPU::DepCtr::encodeFieldHoldCnt(
+ Mask, std::min(AMDGPU::DepCtr::decodeFieldHoldCnt(Mask1),
+ AMDGPU::DepCtr::decodeFieldHoldCnt(Mask2)));
+ Mask = AMDGPU::DepCtr::encodeFieldVaSsrc(
+ Mask, std::min(AMDGPU::DepCtr::decodeFieldVaSsrc(Mask1),
+ AMDGPU::DepCtr::decodeFieldVaSsrc(Mask2)));
return Mask;
}
@@ -377,13 +388,17 @@ class AMDGPUWaitSGPRHazards {
Mask = AMDGPU::DepCtr::encodeFieldVaSdst(Mask, 0);
}
if (Emit) {
- if (MI != MI->getParent()->begin()) {
- MachineInstr &PrevMI = *std::prev(MI);
- if (PrevMI.getOpcode() == AMDGPU::S_WAITCNT_DEPCTR) {
- Mask = mergeMasks(Mask, PrevMI.getOperand(0).getImm());
- PrevMI.eraseFromParent();
+ if (MI != MBB.instr_begin()) {
+ MachineBasicBlock::instr_iterator It = std::prev(MI);
+ while (It != MBB.instr_begin() && It->isDebugInstr())
+ --It;
+ if (It->getOpcode() == AMDGPU::S_WAITCNT_DEPCTR) {
+ Mask = mergeMasks(Mask, It->getOperand(0).getImm());
+ It->getOperand(0).setImm(Mask);
+ continue;
}
}
+
auto NewMI = BuildMI(MBB, MI, MI->getDebugLoc(),
TII->get(AMDGPU::S_WAITCNT_DEPCTR))
.addImm(Mask);
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index b51cf536467b9..b166a8c206054 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -164,6 +164,18 @@ inline unsigned getSaSdstBitWidth() { return 1; }
/// \returns SaSdst bit shift
inline unsigned getSaSdstBitShift() { return 0; }
+/// \returns VaSsrc width
+inline unsigned getVaSsrcBitWidth() { return 1; }
+
+/// \returns VaSsrc bit shift
+inline unsigned getVaSsrcBitShift() { return 8; }
+
+/// \returns HoldCnt bit shift
+inline unsigned getHoldCntWidth() { return 1; }
+
+/// \returns HoldCnt bit shift
+inline unsigned getHoldCntBitShift() { return 7; }
+
} // end anonymous namespace
namespace llvm {
@@ -1740,6 +1752,14 @@ unsigned decodeFieldVaVcc(unsigned Encoded) {
return unpackBits(Encoded, getVaVccBitShift(), getVaVccBitWidth());
}
+unsigned decodeFieldVaSsrc(unsigned Encoded) {
+ return unpackBits(Encoded, getVaSsrcBitShift(), getVaSsrcBitWidth());
+}
+
+unsigned decodeFieldHoldCnt(unsigned Encoded) {
+ return unpackBits(Encoded, getHoldCntBitShift(), getHoldCntWidth());
+}
+
unsigned encodeFieldVmVsrc(unsigned Encoded, unsigned VmVsrc) {
return packBits(VmVsrc, Encoded, getVmVsrcBitShift(), getVmVsrcBitWidth());
}
@@ -1780,6 +1800,22 @@ unsigned encodeFieldVaVcc(unsigned VaVcc) {
return encodeFieldVaVcc(0xffff, VaVcc);
}
+unsigned encodeFieldVaSsrc(unsigned Encoded, unsigned VaSsrc) {
+ return packBits(VaSsrc, Encoded, getVaSsrcBitShift(), getVaSsrcBitWidth());
+}
+
+unsigned encodeFieldVaSsrc(unsigned VaSsrc) {
+ return encodeFieldVaSsrc(0xfff, VaSsrc);
+}
+
+unsigned encodeFieldHoldCnt(unsigned Encoded, unsigned HoldCnt) {
+ return packBits(HoldCnt, Encoded, getHoldCntBitShift(), getHoldCntWidth());
+}
+
+unsigned encodeFieldHoldCnt(unsigned HoldCnt) {
+ return encodeFieldHoldCnt(0xfff, HoldCnt);
+}
+
} // namespace DepCtr
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index f54d5a273ca37..184f40bccfff8 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -1180,6 +1180,12 @@ unsigned decodeFieldVaSdst(unsigned Encoded);
/// \returns Decoded VaVcc from given immediate \p Encoded.
unsigned decodeFieldVaVcc(unsigned Encoded);
+/// \returns Decoded SaSrc from given immediate \p Encoded.
+unsigned decodeFieldVaSsrc(unsigned Encoded);
+
+/// \returns Decoded HoldCnt from given immediate \p Encoded.
+unsigned decodeFieldHoldCnt(unsigned Encoded);
+
/// \returns \p VmVsrc as an encoded Depctr immediate.
unsigned encodeFieldVmVsrc(unsigned VmVsrc);
@@ -1210,6 +1216,18 @@ unsigned encodeFieldVaVcc(unsigned VaVcc);
/// \returns \p Encoded combined with encoded \p VaVcc.
unsigned encodeFieldVaVcc(unsigned Encoded, unsigned VaVcc);
+/// \returns \p HoldCnt as an encoded Depctr immediate.
+unsigned encodeFieldHoldCnt(unsigned HoldCnt);
+
+/// \returns \p Encoded combined with encoded \p HoldCnt.
+unsigned encodeFieldHoldCnt(unsigned HoldCnt, unsigned Encoded);
+
+/// \returns \p VaSsrc as an encoded Depctr immediate.
+unsigned encodeFieldVaSsrc(unsigned VaSsrc);
+
+/// \returns \p Encoded combined with encoded \p VaSsrc.
+unsigned encodeFieldVaSsrc(unsigned Encoded, unsigned VaSsrc);
+
} // namespace DepCtr
namespace Exp {
diff --git a/llvm/test/CodeGen/AMDGPU/merge-consecutive-wait-alus.mir b/llvm/test/CodeGen/AMDGPU/merge-consecutive-wait-alus.mir
index 0cd203e6a9bbb..13d0290dcac1d 100644
--- a/llvm/test/CodeGen/AMDGPU/merge-consecutive-wait-alus.mir
+++ b/llvm/test/CodeGen/AMDGPU/merge-consecutive-wait-alus.mir
@@ -4,12 +4,6 @@
---
name: merge_consecutive_wait_alus
-exposesReturnsTwice: false
-legalized: false
-regBankSelected: false
-selected: false
-failedISel: false
-tracksRegLiveness: true
body: |
bb.0:
liveins: $vgpr0
@@ -24,7 +18,61 @@ body: |
S_WAITCNT_DEPCTR 65530
renamable $vgpr0 = V_CNDMASK_B32_e64 0, -1, 0, killed $vgpr0, killed $sgpr0, implicit $exec, implicit-def $vcc
...
+---
+name: merge_consecutive_wait_alus_two_bb
+body: |
+ ; CHECK-LABEL: name: merge_consecutive_wait_alus_two_bb
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.1(0x80000000)
+ ; CHECK-NEXT: liveins: $vgpr0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: renamable $sgpr0 = V_CMP_NE_U32_e64 0, $vgpr0, implicit $exec, implicit-def $vcc_lo, implicit-def $vcc_lo
+ ; CHECK-NEXT: S_WAITCNT_DEPCTR 65530
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: liveins: $sgpr0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_WAITCNT_DEPCTR 61951
+ ; CHECK-NEXT: renamable $vgpr0 = V_CNDMASK_B32_e64 0, -1, 0, killed $vgpr0, killed $sgpr0, implicit $exec, implicit-def $vcc_lo
+ bb.0:
+ liveins: $vgpr0
+
+ renamable $sgpr0 = V_CMP_NE_U32_e64 0, $vgpr0, implicit $exec, implicit-def $vcc_lo, implicit-def $vcc
+ S_WAITCNT_DEPCTR 65530
+ bb.1:
+ liveins: $sgpr0
-## NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-# CHECK: {{.*}}
+ renamable $vgpr0 = V_CNDMASK_B32_e64 0, -1, 0, killed $vgpr0, killed $sgpr0, implicit $exec, implicit-def $vcc
+...
+---
+name: meta_instructions
+machineFunctionInfo:
+body: |
+ bb.0:
+ ; CHECK-LABEL: name: meta_instructions
+ ; CHECK: renamable $sgpr0 = V_CMP_NE_U32_e64 0, $vgpr0, implicit $exec, implicit-def $vcc_lo, implicit-def $vcc_lo
+ ; CHECK-NEXT: S_WAITCNT_DEPCTR 65530
+ ; CHECK-NEXT: SCHED_BARRIER 0
+ ; CHECK-NEXT: S_WAITCNT_DEPCTR 61951
+ ; CHECK-NEXT: renamable $vgpr0 = V_CNDMASK_B32_e64 0, -1, 0, killed $vgpr0, killed $sgpr0, implicit $exec, implicit-def $vcc_lo
+ renamable $sgpr0 = V_CMP_NE_U32_e64 0, $vgpr0, implicit $exec, implicit-def $vcc_lo, implicit-def $vcc
+ S_WAITCNT_DEPCTR 65530
+ SCHED_BARRIER 0
+ renamable $vgpr0 = V_CNDMASK_B32_e64 0, -1, 0, killed $vgpr0, killed $sgpr0, implicit $exec, implicit-def $vcc
+...
+---
+name: debug_instruction
+machineFunctionInfo:
+body: |
+ bb.0:
+ ; CHECK-LABEL: name: debug_instruction
+ ; CHECK: renamable $sgpr0 = V_CMP_NE_U32_e64 0, $vgpr0, implicit $exec, implicit-def $vcc_lo, implicit-def $vcc_lo
+ ; CHECK-NEXT: S_WAITCNT_DEPCTR 61946
+ ; CHECK-NEXT: DBG_VALUE $sgpr0
+ ; CHECK-NEXT: renamable $vgpr0 = V_CNDMASK_B32_e64 0, -1, 0, killed $vgpr0, killed $sgpr0, implicit $exec, implicit-def $vcc_lo
+ renamable $sgpr0 = V_CMP_NE_U32_e64 0, $vgpr0, implicit $exec, implicit-def $vcc_lo, implicit-def $vcc
+ S_WAITCNT_DEPCTR 65530
+ DBG_VALUE $sgpr0
+ renamable $vgpr0 = V_CNDMASK_B32_e64 0, -1, 0, killed $vgpr0, killed $sgpr0, implicit $exec, implicit-def $vcc
+...
>From f093ec6f3dc35218c9669347f19edc9064348622 Mon Sep 17 00:00:00 2001
From: Ana Mihajlovic <Ana.Mihajlovic at amd.com>
Date: Fri, 7 Mar 2025 11:21:17 +0100
Subject: [PATCH 3/4] removed skipping hazard state update
---
.../Target/AMDGPU/AMDGPUWaitSGPRHazards.cpp | 34 +++++++++++--------
1 file changed, 20 insertions(+), 14 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUWaitSGPRHazards.cpp b/llvm/lib/Target/AMDGPU/AMDGPUWaitSGPRHazards.cpp
index 43109be2c23f6..527e6bf4e2211 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUWaitSGPRHazards.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUWaitSGPRHazards.cpp
@@ -190,6 +190,21 @@ class AMDGPUWaitSGPRHazards {
return Mask;
}
+ bool mergeSubsequentWaitAlus(MachineBasicBlock::instr_iterator &MI,
+ unsigned Mask) {
+ auto MBB = MI->getParent();
+ if (MI != MBB->instr_begin()) {
+ MachineBasicBlock::instr_iterator It = std::prev(MI);
+ while (It != MBB->instr_begin() && It->isDebugInstr())
+ --It;
+ if (It->getOpcode() == AMDGPU::S_WAITCNT_DEPCTR) {
+ It->getOperand(0).setImm(mergeMasks(Mask, It->getOperand(0).getImm()));
+ return true;
+ }
+ }
+ return false;
+ }
+
bool runOnMachineBasicBlock(MachineBasicBlock &MBB, bool Emit) {
enum { WA_VALU = 0x1, WA_SALU = 0x2, WA_VCC = 0x4 };
@@ -388,21 +403,12 @@ class AMDGPUWaitSGPRHazards {
Mask = AMDGPU::DepCtr::encodeFieldVaSdst(Mask, 0);
}
if (Emit) {
- if (MI != MBB.instr_begin()) {
- MachineBasicBlock::instr_iterator It = std::prev(MI);
- while (It != MBB.instr_begin() && It->isDebugInstr())
- --It;
- if (It->getOpcode() == AMDGPU::S_WAITCNT_DEPCTR) {
- Mask = mergeMasks(Mask, It->getOperand(0).getImm());
- It->getOperand(0).setImm(Mask);
- continue;
- }
+ if (!mergeSubsequentWaitAlus(MI, Mask)) {
+ auto NewMI = BuildMI(MBB, MI, MI->getDebugLoc(),
+ TII->get(AMDGPU::S_WAITCNT_DEPCTR))
+ .addImm(Mask);
+ updateGetPCBundle(NewMI);
}
-
- auto NewMI = BuildMI(MBB, MI, MI->getDebugLoc(),
- TII->get(AMDGPU::S_WAITCNT_DEPCTR))
- .addImm(Mask);
- updateGetPCBundle(NewMI);
Emitted = true;
}
}
>From bba8686a76c83b36b865d5364a728c58873599dd Mon Sep 17 00:00:00 2001
From: Ana Mihajlovic <Ana.Mihajlovic at amd.com>
Date: Tue, 11 Mar 2025 10:42:24 +0100
Subject: [PATCH 4/4] style changes, small fix
---
.../Target/AMDGPU/AMDGPUWaitSGPRHazards.cpp | 25 +++++++++----------
.../Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp | 4 +--
.../AMDGPU/merge-consecutive-wait-alus.mir | 1 +
3 files changed, 15 insertions(+), 15 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUWaitSGPRHazards.cpp b/llvm/lib/Target/AMDGPU/AMDGPUWaitSGPRHazards.cpp
index 527e6bf4e2211..bfdd8cf1bc2b1 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUWaitSGPRHazards.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUWaitSGPRHazards.cpp
@@ -190,19 +190,18 @@ class AMDGPUWaitSGPRHazards {
return Mask;
}
- bool mergeSubsequentWaitAlus(MachineBasicBlock::instr_iterator &MI,
- unsigned Mask) {
+ bool mergeConsecutiveWaitAlus(MachineBasicBlock::instr_iterator &MI,
+ unsigned Mask) {
auto MBB = MI->getParent();
- if (MI != MBB->instr_begin()) {
- MachineBasicBlock::instr_iterator It = std::prev(MI);
- while (It != MBB->instr_begin() && It->isDebugInstr())
- --It;
- if (It->getOpcode() == AMDGPU::S_WAITCNT_DEPCTR) {
- It->getOperand(0).setImm(mergeMasks(Mask, It->getOperand(0).getImm()));
- return true;
- }
- }
- return false;
+ if (MI == MBB->instr_begin())
+ return false;
+
+ auto It = prev_nodbg(MI, MBB->instr_begin());
+ if (It->getOpcode() != AMDGPU::S_WAITCNT_DEPCTR)
+ return false;
+
+ It->getOperand(0).setImm(mergeMasks(Mask, It->getOperand(0).getImm()));
+ return true;
}
bool runOnMachineBasicBlock(MachineBasicBlock &MBB, bool Emit) {
@@ -403,7 +402,7 @@ class AMDGPUWaitSGPRHazards {
Mask = AMDGPU::DepCtr::encodeFieldVaSdst(Mask, 0);
}
if (Emit) {
- if (!mergeSubsequentWaitAlus(MI, Mask)) {
+ if (!mergeConsecutiveWaitAlus(MI, Mask)) {
auto NewMI = BuildMI(MBB, MI, MI->getDebugLoc(),
TII->get(AMDGPU::S_WAITCNT_DEPCTR))
.addImm(Mask);
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index b166a8c206054..ac6b07bad3e35 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -1805,7 +1805,7 @@ unsigned encodeFieldVaSsrc(unsigned Encoded, unsigned VaSsrc) {
}
unsigned encodeFieldVaSsrc(unsigned VaSsrc) {
- return encodeFieldVaSsrc(0xfff, VaSsrc);
+ return encodeFieldVaSsrc(0xffff, VaSsrc);
}
unsigned encodeFieldHoldCnt(unsigned Encoded, unsigned HoldCnt) {
@@ -1813,7 +1813,7 @@ unsigned encodeFieldHoldCnt(unsigned Encoded, unsigned HoldCnt) {
}
unsigned encodeFieldHoldCnt(unsigned HoldCnt) {
- return encodeFieldHoldCnt(0xfff, HoldCnt);
+ return encodeFieldHoldCnt(0xffff, HoldCnt);
}
} // namespace DepCtr
diff --git a/llvm/test/CodeGen/AMDGPU/merge-consecutive-wait-alus.mir b/llvm/test/CodeGen/AMDGPU/merge-consecutive-wait-alus.mir
index 13d0290dcac1d..d8f4c9c8f14b5 100644
--- a/llvm/test/CodeGen/AMDGPU/merge-consecutive-wait-alus.mir
+++ b/llvm/test/CodeGen/AMDGPU/merge-consecutive-wait-alus.mir
@@ -76,3 +76,4 @@ body: |
DBG_VALUE $sgpr0
renamable $vgpr0 = V_CNDMASK_B32_e64 0, -1, 0, killed $vgpr0, killed $sgpr0, implicit $exec, implicit-def $vcc
...
+
More information about the llvm-commits
mailing list