[llvm] 41877b8 - AMDGPU: Fix dbg_value handling when forming soft clause bundles

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Mon Feb 1 19:16:44 PST 2021


Author: Matt Arsenault
Date: 2021-02-01T22:16:35-05:00
New Revision: 41877b82f07224041a2a994f9032332fe01e4d1b

URL: https://github.com/llvm/llvm-project/commit/41877b82f07224041a2a994f9032332fe01e4d1b
DIFF: https://github.com/llvm/llvm-project/commit/41877b82f07224041a2a994f9032332fe01e4d1b.diff

LOG: AMDGPU: Fix dbg_value handling when forming soft clause bundles

DBG_VALUES placed between memory instructions would change
codegen. Skip over these and re-insert them after the bundle instead
of giving up on bundling.

Added: 
    llvm/test/CodeGen/AMDGPU/soft-clause-dbg-value.mir

Modified: 
    llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
    llvm/lib/Target/AMDGPU/SIFormMemoryClauses.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
index aeec3e886327..55fd2b5eb284 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
@@ -384,6 +384,7 @@ bool GCNDownwardRPTracker::advanceBeforeNext() {
 
 void GCNDownwardRPTracker::advanceToNext() {
   LastTrackedMI = &*NextMI++;
+  NextMI = skipDebugInstructionsForward(NextMI, MBBEnd);
 
   // Add new registers or mask bits.
   for (const auto &MO : LastTrackedMI->operands()) {

diff  --git a/llvm/lib/Target/AMDGPU/SIFormMemoryClauses.cpp b/llvm/lib/Target/AMDGPU/SIFormMemoryClauses.cpp
index c2ed148c0cf1..59ce080c8e70 100644
--- a/llvm/lib/Target/AMDGPU/SIFormMemoryClauses.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFormMemoryClauses.cpp
@@ -107,7 +107,8 @@ static bool isSMEMClauseInst(const MachineInstr &MI) {
 // There no sense to create store clauses, they do not define anything,
 // thus there is nothing to set early-clobber.
 static bool isValidClauseInst(const MachineInstr &MI, bool IsVMEMClause) {
-  if (MI.isDebugValue() || MI.isBundled())
+  assert(!MI.isDebugInstr() && "debug instructions should not reach here");
+  if (MI.isBundled())
     return false;
   if (!MI.mayLoad() || MI.mayStore())
     return false;
@@ -322,6 +323,8 @@ bool SIFormMemoryClauses::runOnMachineFunction(MachineFunction &MF) {
   unsigned FuncMaxClause = AMDGPU::getIntegerAttribute(
       MF.getFunction(), "amdgpu-max-memory-clause", MaxClause);
 
+  SmallVector<MachineInstr *> DbgInstrs;
+
   for (MachineBasicBlock &MBB : MF) {
     GCNDownwardRPTracker RPT(*LIS);
     MachineBasicBlock::instr_iterator Next;
@@ -329,6 +332,9 @@ bool SIFormMemoryClauses::runOnMachineFunction(MachineFunction &MF) {
       MachineInstr &MI = *I;
       Next = std::next(I);
 
+      if (MI.isDebugInstr())
+        continue;
+
       bool IsVMEM = isVMEMClauseInst(MI);
 
       if (!isValidClauseInst(MI, IsVMEM))
@@ -350,6 +356,11 @@ bool SIFormMemoryClauses::runOnMachineFunction(MachineFunction &MF) {
 
       unsigned Length = 1;
       for ( ; Next != E && Length < FuncMaxClause; ++Next) {
+        // Debug instructions should not change the bundling. We need to move
+        // these after the bundle
+        if (Next->isDebugInstr())
+          continue;
+
         if (!isValidClauseInst(*Next, IsVMEM))
           break;
 
@@ -374,8 +385,17 @@ bool SIFormMemoryClauses::runOnMachineFunction(MachineFunction &MF) {
 
       // Restore the state after processing the bundle.
       RPT.reset(*B, &LiveRegsCopy);
+      DbgInstrs.clear();
+
+      auto BundleNext = I;
+      for (auto BI = I; BI != Next; BI = BundleNext) {
+        BundleNext = std::next(BI);
+
+        if (BI->isDebugValue()) {
+          DbgInstrs.push_back(BI->removeFromParent());
+          continue;
+        }
 
-      for (auto BI = I; BI != Next; ++BI) {
         BI->bundleWithPred();
         Ind->removeSingleMachineInstrFromMaps(*BI);
 
@@ -384,6 +404,10 @@ bool SIFormMemoryClauses::runOnMachineFunction(MachineFunction &MF) {
             MO.setIsInternalRead(true);
       }
 
+      // Replace any debug instructions after the new bundle.
+      for (MachineInstr *DbgInst : DbgInstrs)
+        MBB.insert(Next, DbgInst);
+
       for (auto &&R : Defs) {
         forAllLanes(R.first, R.second.second, [&R, &B](unsigned SubReg) {
           unsigned S = R.second.first | RegState::EarlyClobber;

diff  --git a/llvm/test/CodeGen/AMDGPU/soft-clause-dbg-value.mir b/llvm/test/CodeGen/AMDGPU/soft-clause-dbg-value.mir
new file mode 100644
index 000000000000..6d4883729ece
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/soft-clause-dbg-value.mir
@@ -0,0 +1,49 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -mattr=+xnack -run-pass=si-form-memory-clauses -verify-machineinstrs -o - %s | FileCheck %s
+
+# Make sure that debug instructions do not change the bundling, and
+# the dbg_values which break the clause are inserted after the new
+# bundles.
+
+---
+name: sgpr_clause_dbg_value
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $sgpr4_sgpr5, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24
+    ; CHECK-LABEL: name: sgpr_clause_dbg_value
+    ; CHECK: liveins: $sgpr4_sgpr5, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24
+    ; CHECK: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr4_sgpr5
+    ; CHECK: early-clobber %2:sreg_32_xm0_xexec, early-clobber %1:sreg_32_xm0_xexec = BUNDLE [[COPY]] {
+    ; CHECK:   [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0, 0 :: (load 4, addrspace 4)
+    ; CHECK:   [[S_LOAD_DWORD_IMM1:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 8, 0, 0 :: (load 4, addrspace 4)
+    ; CHECK: }
+    ; CHECK: DBG_VALUE [[S_LOAD_DWORD_IMM]], 0, 0
+    ; CHECK: DBG_VALUE [[S_LOAD_DWORD_IMM1]], 0, 0
+    ; CHECK: S_NOP 0
+    ; CHECK: S_NOP 0
+    ; CHECK: S_NOP 0
+    ; CHECK: early-clobber %4:sreg_32_xm0_xexec, early-clobber %3:sreg_32_xm0_xexec, early-clobber %5:sreg_32_xm0_xexec = BUNDLE [[COPY]] {
+    ; CHECK:   [[S_LOAD_DWORD_IMM2:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 16, 0, 0 :: (load 4, addrspace 4)
+    ; CHECK:   [[S_LOAD_DWORD_IMM3:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 32, 0, 0 :: (load 4, addrspace 4)
+    ; CHECK:   [[S_LOAD_DWORD_IMM4:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 64, 0, 0 :: (load 4, addrspace 4)
+    ; CHECK: }
+    ; CHECK: DBG_VALUE [[S_LOAD_DWORD_IMM2]], 0, 0
+    ; CHECK: DBG_VALUE [[S_LOAD_DWORD_IMM3]], 0, 0
+    ; CHECK: S_ENDPGM 0, implicit [[S_LOAD_DWORD_IMM]], implicit [[S_LOAD_DWORD_IMM1]], implicit [[S_LOAD_DWORD_IMM2]], implicit [[S_LOAD_DWORD_IMM3]], implicit [[S_LOAD_DWORD_IMM4]]
+    %0:sreg_64 = COPY $sgpr4_sgpr5
+    %1:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %0, 0, 0, 0 :: (load 4, align 4, addrspace 4)
+    DBG_VALUE %1, 0, 0
+    %2:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %0, 8, 0, 0 :: (load 4, align 4, addrspace 4)
+    DBG_VALUE %2, 0, 0
+    S_NOP 0
+    S_NOP 0
+    S_NOP 0
+    %3:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %0, 16, 0, 0 :: (load 4, align 4, addrspace 4)
+    %4:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %0, 32, 0, 0 :: (load 4, align 4, addrspace 4)
+    DBG_VALUE %3, 0, 0
+    DBG_VALUE %4, 0, 0
+    %5:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %0, 64, 0, 0 :: (load 4, align 4, addrspace 4)
+    S_ENDPGM 0, implicit %1, implicit %2, implicit %3, implicit %4, implicit %5
+
+...


        


More information about the llvm-commits mailing list