[llvm] 41877b8 - AMDGPU: Fix dbg_value handling when forming soft clause bundles
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Mon Feb 1 19:16:44 PST 2021
Author: Matt Arsenault
Date: 2021-02-01T22:16:35-05:00
New Revision: 41877b82f07224041a2a994f9032332fe01e4d1b
URL: https://github.com/llvm/llvm-project/commit/41877b82f07224041a2a994f9032332fe01e4d1b
DIFF: https://github.com/llvm/llvm-project/commit/41877b82f07224041a2a994f9032332fe01e4d1b.diff
LOG: AMDGPU: Fix dbg_value handling when forming soft clause bundles
DBG_VALUES placed between memory instructions would change
codegen. Skip over these and re-insert them after the bundle instead
of giving up on bundling.
Added:
llvm/test/CodeGen/AMDGPU/soft-clause-dbg-value.mir
Modified:
llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
llvm/lib/Target/AMDGPU/SIFormMemoryClauses.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
index aeec3e886327..55fd2b5eb284 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
@@ -384,6 +384,7 @@ bool GCNDownwardRPTracker::advanceBeforeNext() {
void GCNDownwardRPTracker::advanceToNext() {
LastTrackedMI = &*NextMI++;
+ NextMI = skipDebugInstructionsForward(NextMI, MBBEnd);
// Add new registers or mask bits.
for (const auto &MO : LastTrackedMI->operands()) {
diff --git a/llvm/lib/Target/AMDGPU/SIFormMemoryClauses.cpp b/llvm/lib/Target/AMDGPU/SIFormMemoryClauses.cpp
index c2ed148c0cf1..59ce080c8e70 100644
--- a/llvm/lib/Target/AMDGPU/SIFormMemoryClauses.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFormMemoryClauses.cpp
@@ -107,7 +107,8 @@ static bool isSMEMClauseInst(const MachineInstr &MI) {
// There no sense to create store clauses, they do not define anything,
// thus there is nothing to set early-clobber.
static bool isValidClauseInst(const MachineInstr &MI, bool IsVMEMClause) {
- if (MI.isDebugValue() || MI.isBundled())
+ assert(!MI.isDebugInstr() && "debug instructions should not reach here");
+ if (MI.isBundled())
return false;
if (!MI.mayLoad() || MI.mayStore())
return false;
@@ -322,6 +323,8 @@ bool SIFormMemoryClauses::runOnMachineFunction(MachineFunction &MF) {
unsigned FuncMaxClause = AMDGPU::getIntegerAttribute(
MF.getFunction(), "amdgpu-max-memory-clause", MaxClause);
+ SmallVector<MachineInstr *> DbgInstrs;
+
for (MachineBasicBlock &MBB : MF) {
GCNDownwardRPTracker RPT(*LIS);
MachineBasicBlock::instr_iterator Next;
@@ -329,6 +332,9 @@ bool SIFormMemoryClauses::runOnMachineFunction(MachineFunction &MF) {
MachineInstr &MI = *I;
Next = std::next(I);
+ if (MI.isDebugInstr())
+ continue;
+
bool IsVMEM = isVMEMClauseInst(MI);
if (!isValidClauseInst(MI, IsVMEM))
@@ -350,6 +356,11 @@ bool SIFormMemoryClauses::runOnMachineFunction(MachineFunction &MF) {
unsigned Length = 1;
for ( ; Next != E && Length < FuncMaxClause; ++Next) {
+ // Debug instructions should not change the bundling. We need to move
+ // these after the bundle
+ if (Next->isDebugInstr())
+ continue;
+
if (!isValidClauseInst(*Next, IsVMEM))
break;
@@ -374,8 +385,17 @@ bool SIFormMemoryClauses::runOnMachineFunction(MachineFunction &MF) {
// Restore the state after processing the bundle.
RPT.reset(*B, &LiveRegsCopy);
+ DbgInstrs.clear();
+
+ auto BundleNext = I;
+ for (auto BI = I; BI != Next; BI = BundleNext) {
+ BundleNext = std::next(BI);
+
+ if (BI->isDebugValue()) {
+ DbgInstrs.push_back(BI->removeFromParent());
+ continue;
+ }
- for (auto BI = I; BI != Next; ++BI) {
BI->bundleWithPred();
Ind->removeSingleMachineInstrFromMaps(*BI);
@@ -384,6 +404,10 @@ bool SIFormMemoryClauses::runOnMachineFunction(MachineFunction &MF) {
MO.setIsInternalRead(true);
}
+ // Replace any debug instructions after the new bundle.
+ for (MachineInstr *DbgInst : DbgInstrs)
+ MBB.insert(Next, DbgInst);
+
for (auto &&R : Defs) {
forAllLanes(R.first, R.second.second, [&R, &B](unsigned SubReg) {
unsigned S = R.second.first | RegState::EarlyClobber;
diff --git a/llvm/test/CodeGen/AMDGPU/soft-clause-dbg-value.mir b/llvm/test/CodeGen/AMDGPU/soft-clause-dbg-value.mir
new file mode 100644
index 000000000000..6d4883729ece
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/soft-clause-dbg-value.mir
@@ -0,0 +1,49 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -mattr=+xnack -run-pass=si-form-memory-clauses -verify-machineinstrs -o - %s | FileCheck %s
+
+# Make sure that debug instructions do not change the bundling, and
+# the dbg_values which break the clause are inserted after the new
+# bundles.
+
+---
+name: sgpr_clause_dbg_value
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $sgpr4_sgpr5, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24
+ ; CHECK-LABEL: name: sgpr_clause_dbg_value
+ ; CHECK: liveins: $sgpr4_sgpr5, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24
+ ; CHECK: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr4_sgpr5
+ ; CHECK: early-clobber %2:sreg_32_xm0_xexec, early-clobber %1:sreg_32_xm0_xexec = BUNDLE [[COPY]] {
+ ; CHECK: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0, 0 :: (load 4, addrspace 4)
+ ; CHECK: [[S_LOAD_DWORD_IMM1:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 8, 0, 0 :: (load 4, addrspace 4)
+ ; CHECK: }
+ ; CHECK: DBG_VALUE [[S_LOAD_DWORD_IMM]], 0, 0
+ ; CHECK: DBG_VALUE [[S_LOAD_DWORD_IMM1]], 0, 0
+ ; CHECK: S_NOP 0
+ ; CHECK: S_NOP 0
+ ; CHECK: S_NOP 0
+ ; CHECK: early-clobber %4:sreg_32_xm0_xexec, early-clobber %3:sreg_32_xm0_xexec, early-clobber %5:sreg_32_xm0_xexec = BUNDLE [[COPY]] {
+ ; CHECK: [[S_LOAD_DWORD_IMM2:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 16, 0, 0 :: (load 4, addrspace 4)
+ ; CHECK: [[S_LOAD_DWORD_IMM3:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 32, 0, 0 :: (load 4, addrspace 4)
+ ; CHECK: [[S_LOAD_DWORD_IMM4:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 64, 0, 0 :: (load 4, addrspace 4)
+ ; CHECK: }
+ ; CHECK: DBG_VALUE [[S_LOAD_DWORD_IMM2]], 0, 0
+ ; CHECK: DBG_VALUE [[S_LOAD_DWORD_IMM3]], 0, 0
+ ; CHECK: S_ENDPGM 0, implicit [[S_LOAD_DWORD_IMM]], implicit [[S_LOAD_DWORD_IMM1]], implicit [[S_LOAD_DWORD_IMM2]], implicit [[S_LOAD_DWORD_IMM3]], implicit [[S_LOAD_DWORD_IMM4]]
+ %0:sreg_64 = COPY $sgpr4_sgpr5
+ %1:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %0, 0, 0, 0 :: (load 4, align 4, addrspace 4)
+ DBG_VALUE %1, 0, 0
+ %2:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %0, 8, 0, 0 :: (load 4, align 4, addrspace 4)
+ DBG_VALUE %2, 0, 0
+ S_NOP 0
+ S_NOP 0
+ S_NOP 0
+ %3:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %0, 16, 0, 0 :: (load 4, align 4, addrspace 4)
+ %4:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %0, 32, 0, 0 :: (load 4, align 4, addrspace 4)
+ DBG_VALUE %3, 0, 0
+ DBG_VALUE %4, 0, 0
+ %5:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %0, 64, 0, 0 :: (load 4, align 4, addrspace 4)
+ S_ENDPGM 0, implicit %1, implicit %2, implicit %3, implicit %4, implicit %5
+
+...
More information about the llvm-commits
mailing list