[llvm] c320e81 - AMDGPU: Fix debug info handling in post-RA bundler

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Tue Feb 16 07:42:13 PST 2021


Author: Matt Arsenault
Date: 2021-02-16T10:42:06-05:00
New Revision: c320e8196ae67a4eaa72253ed9013dda28eaf390

URL: https://github.com/llvm/llvm-project/commit/c320e8196ae67a4eaa72253ed9013dda28eaf390
DIFF: https://github.com/llvm/llvm-project/commit/c320e8196ae67a4eaa72253ed9013dda28eaf390.diff

LOG: AMDGPU: Fix debug info handling in post-RA bundler

This was allowing debug instructions to break the bundling, which
would change scheduling behavior. Bundle debug info / kills inside
the bundle. This seems to work OK, although the asm printer doesn't
understand these in a bundle. This implicitly expects the memory
legalizer to unbundle. It would probably be slightly nicer to move
these after.

Rewrite the loop to be clearer and make sure we don't end a bundle on
a meta instruction, only allow them in between other valid bundle
instructions.

Added: 
    llvm/test/CodeGen/AMDGPU/post-ra-soft-clause-dbg-info.ll

Modified: 
    llvm/lib/Target/AMDGPU/SIPostRABundler.cpp
    llvm/test/CodeGen/AMDGPU/postra-bundle-memops.mir

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/SIPostRABundler.cpp b/llvm/lib/Target/AMDGPU/SIPostRABundler.cpp
index ab05081e55d5..2d220a0b71ac 100644
--- a/llvm/lib/Target/AMDGPU/SIPostRABundler.cpp
+++ b/llvm/lib/Target/AMDGPU/SIPostRABundler.cpp
@@ -48,10 +48,15 @@ class SIPostRABundler : public MachineFunctionPass {
 
   SmallSet<Register, 16> Defs;
 
+  bool isBundleCandidate(const MachineInstr &MI) const;
   bool isDependentLoad(const MachineInstr &MI) const;
-
+  bool canBundle(const MachineInstr &MI, const MachineInstr &NextMI) const;
 };
 
+constexpr uint64_t MemFlags = SIInstrFlags::MTBUF | SIInstrFlags::MUBUF |
+                              SIInstrFlags::SMRD | SIInstrFlags::DS |
+                              SIInstrFlags::FLAT | SIInstrFlags::MIMG;
+
 } // End anonymous namespace.
 
 INITIALIZE_PASS(SIPostRABundler, DEBUG_TYPE, "SI post-RA bundler", false, false)
@@ -80,55 +85,73 @@ bool SIPostRABundler::isDependentLoad(const MachineInstr &MI) const {
   return false;
 }
 
+bool SIPostRABundler::isBundleCandidate(const MachineInstr &MI) const {
+  const uint64_t IMemFlags = MI.getDesc().TSFlags & MemFlags;
+  return IMemFlags != 0 && MI.mayLoadOrStore() && !MI.isBundled();
+}
+
+bool SIPostRABundler::canBundle(const MachineInstr &MI,
+                                const MachineInstr &NextMI) const {
+  const uint64_t IMemFlags = MI.getDesc().TSFlags & MemFlags;
+
+  return (IMemFlags != 0 && MI.mayLoadOrStore() && !NextMI.isBundled() &&
+          NextMI.mayLoad() == MI.mayLoad() && NextMI.mayStore() == MI.mayStore() &&
+          ((NextMI.getDesc().TSFlags & MemFlags) == IMemFlags) &&
+          !isDependentLoad(NextMI));
+}
+
 bool SIPostRABundler::runOnMachineFunction(MachineFunction &MF) {
   if (skipFunction(MF.getFunction()))
     return false;
 
   TRI = MF.getSubtarget<GCNSubtarget>().getRegisterInfo();
   bool Changed = false;
-  const uint64_t MemFlags = SIInstrFlags::MTBUF | SIInstrFlags::MUBUF |
-                            SIInstrFlags::SMRD | SIInstrFlags::DS |
-                            SIInstrFlags::FLAT | SIInstrFlags::MIMG;
-
   for (MachineBasicBlock &MBB : MF) {
     MachineBasicBlock::instr_iterator Next;
     MachineBasicBlock::instr_iterator B = MBB.instr_begin();
     MachineBasicBlock::instr_iterator E = MBB.instr_end();
+
     for (auto I = B; I != E; I = Next) {
       Next = std::next(I);
+      if (!isBundleCandidate(*I))
+        continue;
 
-      const uint64_t IMemFlags = I->getDesc().TSFlags & MemFlags;
-
-      if (IMemFlags == 0 || I->isBundled() || !I->mayLoadOrStore() ||
-          B->mayLoad() != I->mayLoad() || B->mayStore() != I->mayStore() ||
-          ((B->getDesc().TSFlags & MemFlags) != IMemFlags) ||
-          isDependentLoad(*I)) {
-
-        if (B != I) {
-          if (std::next(B) != I) {
-            finalizeBundle(MBB, B, I);
-            Changed = true;
-          }
-          Next = I;
+      assert(Defs.empty());
+
+      if (I->getNumExplicitDefs() != 0)
+        Defs.insert(I->defs().begin()->getReg());
+
+      MachineBasicBlock::instr_iterator BundleStart = I;
+      MachineBasicBlock::instr_iterator BundleEnd = I;
+      unsigned ClauseLength = 1;
+      for (I = Next; I != E; I = Next) {
+        Next = std::next(I);
+
+        assert(BundleEnd != I);
+        if (canBundle(*BundleEnd, *I)) {
+          BundleEnd = I;
+          if (I->getNumExplicitDefs() != 0)
+            Defs.insert(I->defs().begin()->getReg());
+          ++ClauseLength;
+        } else if (!I->isMetaInstruction()) {
+          // Allow meta instructions in between bundle candidates, but do not
+          // start or end a bundle on one.
+          //
+          // TODO: It may be better to move meta instructions like dbg_value
+          // after the bundle. We're relying on the memory legalizer to unbundle
+          // these.
+          break;
         }
-
-        B = Next;
-        Defs.clear();
-        continue;
       }
 
-      if (I->getNumExplicitDefs() == 0)
-        continue;
-
-      Defs.insert(I->defs().begin()->getReg());
-    }
+      Next = std::next(BundleEnd);
+      if (ClauseLength > 1) {
+        Changed = true;
+        finalizeBundle(MBB, BundleStart, Next);
+      }
 
-    if (B != E && std::next(B) != E) {
-      finalizeBundle(MBB, B, E);
-      Changed = true;
+      Defs.clear();
     }
-
-    Defs.clear();
   }
 
   return Changed;

diff  --git a/llvm/test/CodeGen/AMDGPU/post-ra-soft-clause-dbg-info.ll b/llvm/test/CodeGen/AMDGPU/post-ra-soft-clause-dbg-info.ll
new file mode 100644
index 000000000000..8daa7eeeaff6
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/post-ra-soft-clause-dbg-info.ll
@@ -0,0 +1,55 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=+xnack -amdgpu-max-memory-clause=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
+
+; Test the behavior of the post-RA soft clause bundler in the presence
+; of debug info. The debug info should not interfere with the
+; bundling, which could result in an observable codegen change.
+
+define amdgpu_kernel void @dbg_clause(float addrspace(1)* %out, float addrspace(1)* %aptr) !dbg !4 {
+; GCN-LABEL: dbg_clause:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
+; GCN-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GCN-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN-NEXT:    global_load_dword v1, v0, s[2:3]
+; GCN-NEXT:    ;DEBUG_VALUE: foo:a <- $vgpr1
+; GCN-NEXT:    global_load_dword v2, v0, s[2:3] offset:32
+; GCN-NEXT:    ;DEBUG_VALUE: foo:b <- $vgpr2
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    v_add_f32_e32 v1, v1, v2
+; GCN-NEXT:    global_store_dword v0, v1, s[0:1]
+; GCN-NEXT:    s_endpgm
+  %tid = call i32 @llvm.amdgcn.workitem.id.x()
+  %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
+  %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
+  %gep1 = getelementptr float, float addrspace(1)* %gep0, i32 8
+  %a = load float, float addrspace(1)* %gep0, align 4
+  call void @llvm.dbg.value(metadata float %a, metadata !8, metadata !DIExpression()), !dbg !9
+  %b = load float, float addrspace(1)* %gep1, align 4
+  call void @llvm.dbg.value(metadata float %b, metadata !10, metadata !DIExpression()), !dbg !11
+  %fadd = fadd float %a, %b
+  store float %fadd, float addrspace(1)* %out.gep, align 4
+  ret void
+}
+
+declare i32 @llvm.amdgcn.workitem.id.x() #0
+declare void @llvm.dbg.value(metadata, metadata, metadata) #1
+
+attributes #0 = { nounwind readnone speculatable willreturn }
+attributes #1 = { nofree nosync nounwind readnone speculatable willreturn }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!2, !3}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, isOptimized: true, runtimeVersion: 0, emissionKind: NoDebug)
+!1 = !DIFile(filename: "/tmp/foo.cl", directory: "/dev/null")
+!2 = !{i32 2, !"Dwarf Version", i32 4}
+!3 = !{i32 2, !"Debug Info Version", i32 3}
+!4 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 1, type: !5, scopeLine: 2, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0)
+!5 = !DISubroutineType(types: !6)
+!6 = !{null, !7}
+!7 = !DIBasicType(name: "float", size: 32, align: 32)
+!8 = !DILocalVariable(name: "a", arg: 1, scope: !4, file: !1, line: 1)
+!9 = !DILocation(line: 1, column: 42, scope: !4)
+!10 = !DILocalVariable(name: "b", arg: 2, scope: !4, file: !1, line: 2)
+!11 = !DILocation(line: 2, column: 42, scope: !4)

diff  --git a/llvm/test/CodeGen/AMDGPU/postra-bundle-memops.mir b/llvm/test/CodeGen/AMDGPU/postra-bundle-memops.mir
index 3202fad8f70e..fca771680713 100644
--- a/llvm/test/CodeGen/AMDGPU/postra-bundle-memops.mir
+++ b/llvm/test/CodeGen/AMDGPU/postra-bundle-memops.mir
@@ -56,12 +56,12 @@ body:             |
     ; GCN:   BUFFER_STORE_DWORD_ADDR64 $vgpr0, $vgpr2_vgpr3, undef $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, implicit $exec
     ; GCN: }
     ; GCN: BUNDLE implicit-def $vgpr2, implicit-def $vgpr2_lo16, implicit-def $vgpr2_hi16, implicit-def $vgpr3, implicit-def $vgpr3_lo16, implicit-def $vgpr3_hi16, implicit undef $vgpr4_vgpr5_vgpr6_vgpr7, implicit undef $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, implicit $exec {
-    ; GCN:   $vgpr2 = IMAGE_LOAD_V1_V4 undef $vgpr4_vgpr5_vgpr6_vgpr7, undef $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 2, -1, 0, 0, 0, 0, 0, 0, 0, implicit $exec
-    ; GCN:   $vgpr3 = IMAGE_LOAD_V1_V4 undef $vgpr4_vgpr5_vgpr6_vgpr7, undef $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 2, -1, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+    ; GCN:   $vgpr2 = IMAGE_LOAD_V1_V4 undef $vgpr4_vgpr5_vgpr6_vgpr7, undef $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 2, -1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4)
+    ; GCN:   $vgpr3 = IMAGE_LOAD_V1_V4 undef $vgpr4_vgpr5_vgpr6_vgpr7, undef $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 2, -1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4)
     ; GCN: }
     ; GCN: BUNDLE implicit undef $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1, implicit undef $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, implicit $exec {
-    ; GCN:   IMAGE_STORE_V4_V2 undef $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr0_vgpr1, undef $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 15, -1, 1, 0, 0, 0, 0, 0, 0, implicit $exec
-    ; GCN:   IMAGE_STORE_V4_V2 undef $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr0_vgpr1, undef $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 15, -1, 1, 0, 0, 0, 0, 0, 0, implicit $exec
+    ; GCN:   IMAGE_STORE_V4_V2 undef $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr0_vgpr1, undef $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 15, -1, 1, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 16)
+    ; GCN:   IMAGE_STORE_V4_V2 undef $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr0_vgpr1, undef $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 15, -1, 1, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 16)
     ; GCN: }
     ; GCN: S_NOP 0
     ; GCN: $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71 = S_LOAD_DWORDX8_IMM undef $sgpr10_sgpr11, 464, 0, 0
@@ -112,3 +112,111 @@ body:             |
     $vgpr2 = DS_READ_B32_gfx9 $vgpr0, 0, 0, implicit $exec
     $vgpr3 = DS_READ_B32_gfx9 $vgpr1, 0, 0, implicit $exec
 ...
+
+# Middle dbg_value should be bundled
+---
+name: bundle_dbg_value_0
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $vgpr3_vgpr4, $vgpr5_vgpr6
+    ; GCN-LABEL: name: bundle_dbg_value_0
+    ; GCN: liveins: $vgpr3_vgpr4, $vgpr5_vgpr6
+    ; GCN: BUNDLE implicit-def $vgpr0, implicit-def $vgpr0_lo16, implicit-def $vgpr0_hi16, implicit-def $vgpr1, implicit-def $vgpr1_lo16, implicit-def $vgpr1_hi16, implicit $vgpr3_vgpr4, implicit $exec, implicit $vgpr5_vgpr6 {
+    ; GCN:   $vgpr0 = GLOBAL_LOAD_DWORD $vgpr3_vgpr4, 0, 0, 0, 0, implicit $exec
+    ; GCN:   DBG_VALUE internal $vgpr0, 0, 0
+    ; GCN:   $vgpr1 = GLOBAL_LOAD_DWORD $vgpr5_vgpr6, 0, 0, 0, 0, implicit $exec
+    ; GCN: }
+    $vgpr0 = GLOBAL_LOAD_DWORD $vgpr3_vgpr4, 0, 0, 0, 0, implicit $exec
+    DBG_VALUE $vgpr0, 0, 0
+    $vgpr1 = GLOBAL_LOAD_DWORD $vgpr5_vgpr6, 0, 0, 0, 0, implicit $exec
+
+...
+
+# Middle dbg_value should be bundled
+---
+name: bundle_dbg_value_1
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $vgpr3_vgpr4, $vgpr5_vgpr6, $vgpr1
+    ; GCN-LABEL: name: bundle_dbg_value_1
+    ; GCN: liveins: $vgpr3_vgpr4, $vgpr5_vgpr6, $vgpr1
+    ; GCN: BUNDLE implicit-def $vgpr0, implicit-def $vgpr0_lo16, implicit-def $vgpr0_hi16, implicit-def $vgpr2, implicit-def $vgpr2_lo16, implicit-def $vgpr2_hi16, implicit $vgpr3_vgpr4, implicit $exec, implicit $vgpr1, implicit $vgpr5_vgpr6 {
+    ; GCN:   $vgpr0 = GLOBAL_LOAD_DWORD $vgpr3_vgpr4, 0, 0, 0, 0, implicit $exec
+    ; GCN:   DBG_VALUE internal $vgpr0, 0, 0
+    ; GCN:   DBG_VALUE $vgpr1, 0, 0
+    ; GCN:   $vgpr2 = GLOBAL_LOAD_DWORD $vgpr5_vgpr6, 0, 0, 0, 0, implicit $exec
+    ; GCN: }
+    ; GCN: DBG_VALUE $vgpr2, 0, 0
+    $vgpr0 = GLOBAL_LOAD_DWORD $vgpr3_vgpr4, 0, 0, 0, 0, implicit $exec
+    DBG_VALUE $vgpr0, 0, 0
+    DBG_VALUE $vgpr1, 0, 0
+    $vgpr2 = GLOBAL_LOAD_DWORD $vgpr5_vgpr6, 0, 0, 0, 0, implicit $exec
+    DBG_VALUE $vgpr2, 0, 0
+...
+
+# Starting and ending dbg_values should not be in the bundle
+---
+name: bundle_dbg_value_2
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $vgpr3_vgpr4, $vgpr5_vgpr6, $vgpr1
+    ; GCN-LABEL: name: bundle_dbg_value_2
+    ; GCN: liveins: $vgpr3_vgpr4, $vgpr5_vgpr6, $vgpr1
+    ; GCN: DBG_VALUE $vgpr1, 0, 0
+    ; GCN: BUNDLE implicit-def $vgpr0, implicit-def $vgpr0_lo16, implicit-def $vgpr0_hi16, implicit-def $vgpr2, implicit-def $vgpr2_lo16, implicit-def $vgpr2_hi16, implicit $vgpr3_vgpr4, implicit $exec, implicit $vgpr5_vgpr6 {
+    ; GCN:   $vgpr0 = GLOBAL_LOAD_DWORD $vgpr3_vgpr4, 0, 0, 0, 0, implicit $exec
+    ; GCN:   DBG_VALUE internal $vgpr0, 0, 0
+    ; GCN:   $vgpr2 = GLOBAL_LOAD_DWORD $vgpr5_vgpr6, 0, 0, 0, 0, implicit $exec
+    ; GCN: }
+    ; GCN: DBG_VALUE $vgpr2, 0, 0
+    DBG_VALUE $vgpr1, 0, 0
+    $vgpr0 = GLOBAL_LOAD_DWORD $vgpr3_vgpr4, 0, 0, 0, 0, implicit $exec
+    DBG_VALUE $vgpr0, 0, 0
+    $vgpr2 = GLOBAL_LOAD_DWORD $vgpr5_vgpr6, 0, 0, 0, 0, implicit $exec
+    DBG_VALUE $vgpr2, 0, 0
+...
+
+---
+name: bundle_kill
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $vgpr3_vgpr4, $vgpr5_vgpr6
+    ; GCN-LABEL: name: bundle_kill
+    ; GCN: liveins: $vgpr3_vgpr4, $vgpr5_vgpr6
+    ; GCN: $vgpr1 = V_MOV_B32_e32 0, implicit $exec
+    ; GCN: BUNDLE implicit-def $vgpr0, implicit-def $vgpr0_lo16, implicit-def $vgpr0_hi16, implicit-def $vgpr2, implicit-def $vgpr2_lo16, implicit-def $vgpr2_hi16, implicit $vgpr3_vgpr4, implicit $exec, implicit $vgpr1, implicit $vgpr5_vgpr6 {
+    ; GCN:   $vgpr0 = GLOBAL_LOAD_DWORD $vgpr3_vgpr4, 0, 0, 0, 0, implicit $exec
+    ; GCN:   KILL $vgpr1
+    ; GCN:   $vgpr2 = GLOBAL_LOAD_DWORD $vgpr5_vgpr6, 0, 0, 0, 0, implicit $exec
+    ; GCN: }
+    $vgpr1 = V_MOV_B32_e32 0, implicit $exec
+    $vgpr0 = GLOBAL_LOAD_DWORD $vgpr3_vgpr4, 0, 0, 0, 0, implicit $exec
+    KILL $vgpr1
+    $vgpr2 = GLOBAL_LOAD_DWORD $vgpr5_vgpr6, 0, 0, 0, 0, implicit $exec
+
+...
+
+---
+name: bundle_kill_def_in_bundle
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $vgpr3_vgpr4, $vgpr5_vgpr6
+    ; GCN-LABEL: name: bundle_kill_def_in_bundle
+    ; GCN: liveins: $vgpr3_vgpr4, $vgpr5_vgpr6
+    ; GCN: $vgpr1 = V_MOV_B32_e32 0, implicit $exec
+    ; GCN: BUNDLE implicit-def $vgpr0, implicit-def $vgpr0_lo16, implicit-def $vgpr0_hi16, implicit-def $vgpr2, implicit-def $vgpr2_lo16, implicit-def $vgpr2_hi16, implicit $vgpr3_vgpr4, implicit $exec, implicit $vgpr5_vgpr6 {
+    ; GCN:   $vgpr0 = GLOBAL_LOAD_DWORD $vgpr3_vgpr4, 0, 0, 0, 0, implicit $exec
+    ; GCN:   KILL internal $vgpr0
+    ; GCN:   $vgpr2 = GLOBAL_LOAD_DWORD $vgpr5_vgpr6, 0, 0, 0, 0, implicit $exec
+    ; GCN: }
+    $vgpr1 = V_MOV_B32_e32 0, implicit $exec
+    $vgpr0 = GLOBAL_LOAD_DWORD $vgpr3_vgpr4, 0, 0, 0, 0, implicit $exec
+    KILL $vgpr0
+    $vgpr2 = GLOBAL_LOAD_DWORD $vgpr5_vgpr6, 0, 0, 0, 0, implicit $exec
+
+...


        


More information about the llvm-commits mailing list