[llvm] c320e81 - AMDGPU: Fix debug info handling in post-RA bundler
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Tue Feb 16 07:42:13 PST 2021
Author: Matt Arsenault
Date: 2021-02-16T10:42:06-05:00
New Revision: c320e8196ae67a4eaa72253ed9013dda28eaf390
URL: https://github.com/llvm/llvm-project/commit/c320e8196ae67a4eaa72253ed9013dda28eaf390
DIFF: https://github.com/llvm/llvm-project/commit/c320e8196ae67a4eaa72253ed9013dda28eaf390.diff
LOG: AMDGPU: Fix debug info handling in post-RA bundler
This was allowing debug instructions to break the bundling, which
would change scheduling behavior. Bundle debug info / kills inside
the bundle. This seems to work OK, although the asm printer doesn't
understand these in a bundle. This implicitly expects the memory
legalizer to unbundle. It would probably be slightly nicer to move
these after.
Rewrite the loop to be clearer and make sure we don't end a bundle on
a meta instruction, only allow them in between other valid bundle
instructions.
Added:
llvm/test/CodeGen/AMDGPU/post-ra-soft-clause-dbg-info.ll
Modified:
llvm/lib/Target/AMDGPU/SIPostRABundler.cpp
llvm/test/CodeGen/AMDGPU/postra-bundle-memops.mir
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/SIPostRABundler.cpp b/llvm/lib/Target/AMDGPU/SIPostRABundler.cpp
index ab05081e55d5..2d220a0b71ac 100644
--- a/llvm/lib/Target/AMDGPU/SIPostRABundler.cpp
+++ b/llvm/lib/Target/AMDGPU/SIPostRABundler.cpp
@@ -48,10 +48,15 @@ class SIPostRABundler : public MachineFunctionPass {
SmallSet<Register, 16> Defs;
+ bool isBundleCandidate(const MachineInstr &MI) const;
bool isDependentLoad(const MachineInstr &MI) const;
-
+ bool canBundle(const MachineInstr &MI, const MachineInstr &NextMI) const;
};
+constexpr uint64_t MemFlags = SIInstrFlags::MTBUF | SIInstrFlags::MUBUF |
+ SIInstrFlags::SMRD | SIInstrFlags::DS |
+ SIInstrFlags::FLAT | SIInstrFlags::MIMG;
+
} // End anonymous namespace.
INITIALIZE_PASS(SIPostRABundler, DEBUG_TYPE, "SI post-RA bundler", false, false)
@@ -80,55 +85,73 @@ bool SIPostRABundler::isDependentLoad(const MachineInstr &MI) const {
return false;
}
+bool SIPostRABundler::isBundleCandidate(const MachineInstr &MI) const {
+ const uint64_t IMemFlags = MI.getDesc().TSFlags & MemFlags;
+ return IMemFlags != 0 && MI.mayLoadOrStore() && !MI.isBundled();
+}
+
+bool SIPostRABundler::canBundle(const MachineInstr &MI,
+ const MachineInstr &NextMI) const {
+ const uint64_t IMemFlags = MI.getDesc().TSFlags & MemFlags;
+
+ return (IMemFlags != 0 && MI.mayLoadOrStore() && !NextMI.isBundled() &&
+ NextMI.mayLoad() == MI.mayLoad() && NextMI.mayStore() == MI.mayStore() &&
+ ((NextMI.getDesc().TSFlags & MemFlags) == IMemFlags) &&
+ !isDependentLoad(NextMI));
+}
+
bool SIPostRABundler::runOnMachineFunction(MachineFunction &MF) {
if (skipFunction(MF.getFunction()))
return false;
TRI = MF.getSubtarget<GCNSubtarget>().getRegisterInfo();
bool Changed = false;
- const uint64_t MemFlags = SIInstrFlags::MTBUF | SIInstrFlags::MUBUF |
- SIInstrFlags::SMRD | SIInstrFlags::DS |
- SIInstrFlags::FLAT | SIInstrFlags::MIMG;
-
for (MachineBasicBlock &MBB : MF) {
MachineBasicBlock::instr_iterator Next;
MachineBasicBlock::instr_iterator B = MBB.instr_begin();
MachineBasicBlock::instr_iterator E = MBB.instr_end();
+
for (auto I = B; I != E; I = Next) {
Next = std::next(I);
+ if (!isBundleCandidate(*I))
+ continue;
- const uint64_t IMemFlags = I->getDesc().TSFlags & MemFlags;
-
- if (IMemFlags == 0 || I->isBundled() || !I->mayLoadOrStore() ||
- B->mayLoad() != I->mayLoad() || B->mayStore() != I->mayStore() ||
- ((B->getDesc().TSFlags & MemFlags) != IMemFlags) ||
- isDependentLoad(*I)) {
-
- if (B != I) {
- if (std::next(B) != I) {
- finalizeBundle(MBB, B, I);
- Changed = true;
- }
- Next = I;
+ assert(Defs.empty());
+
+ if (I->getNumExplicitDefs() != 0)
+ Defs.insert(I->defs().begin()->getReg());
+
+ MachineBasicBlock::instr_iterator BundleStart = I;
+ MachineBasicBlock::instr_iterator BundleEnd = I;
+ unsigned ClauseLength = 1;
+ for (I = Next; I != E; I = Next) {
+ Next = std::next(I);
+
+ assert(BundleEnd != I);
+ if (canBundle(*BundleEnd, *I)) {
+ BundleEnd = I;
+ if (I->getNumExplicitDefs() != 0)
+ Defs.insert(I->defs().begin()->getReg());
+ ++ClauseLength;
+ } else if (!I->isMetaInstruction()) {
+ // Allow meta instructions in between bundle candidates, but do not
+ // start or end a bundle on one.
+ //
+ // TODO: It may be better to move meta instructions like dbg_value
+ // after the bundle. We're relying on the memory legalizer to unbundle
+ // these.
+ break;
}
-
- B = Next;
- Defs.clear();
- continue;
}
- if (I->getNumExplicitDefs() == 0)
- continue;
-
- Defs.insert(I->defs().begin()->getReg());
- }
+ Next = std::next(BundleEnd);
+ if (ClauseLength > 1) {
+ Changed = true;
+ finalizeBundle(MBB, BundleStart, Next);
+ }
- if (B != E && std::next(B) != E) {
- finalizeBundle(MBB, B, E);
- Changed = true;
+ Defs.clear();
}
-
- Defs.clear();
}
return Changed;
diff --git a/llvm/test/CodeGen/AMDGPU/post-ra-soft-clause-dbg-info.ll b/llvm/test/CodeGen/AMDGPU/post-ra-soft-clause-dbg-info.ll
new file mode 100644
index 000000000000..8daa7eeeaff6
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/post-ra-soft-clause-dbg-info.ll
@@ -0,0 +1,55 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=+xnack -amdgpu-max-memory-clause=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
+
+; Test the behavior of the post-RA soft clause bundler in the presence
+; of debug info. The debug info should not interfere with the
+; bundling, which could result in an observable codegen change.
+
+define amdgpu_kernel void @dbg_clause(float addrspace(1)* %out, float addrspace(1)* %aptr) !dbg !4 {
+; GCN-LABEL: dbg_clause:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
+; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: global_load_dword v1, v0, s[2:3]
+; GCN-NEXT: ;DEBUG_VALUE: foo:a <- $vgpr1
+; GCN-NEXT: global_load_dword v2, v0, s[2:3] offset:32
+; GCN-NEXT: ;DEBUG_VALUE: foo:b <- $vgpr2
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: v_add_f32_e32 v1, v1, v2
+; GCN-NEXT: global_store_dword v0, v1, s[0:1]
+; GCN-NEXT: s_endpgm
+ %tid = call i32 @llvm.amdgcn.workitem.id.x()
+ %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
+ %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
+ %gep1 = getelementptr float, float addrspace(1)* %gep0, i32 8
+ %a = load float, float addrspace(1)* %gep0, align 4
+ call void @llvm.dbg.value(metadata float %a, metadata !8, metadata !DIExpression()), !dbg !9
+ %b = load float, float addrspace(1)* %gep1, align 4
+ call void @llvm.dbg.value(metadata float %b, metadata !10, metadata !DIExpression()), !dbg !11
+ %fadd = fadd float %a, %b
+ store float %fadd, float addrspace(1)* %out.gep, align 4
+ ret void
+}
+
+declare i32 @llvm.amdgcn.workitem.id.x() #0
+declare void @llvm.dbg.value(metadata, metadata, metadata) #1
+
+attributes #0 = { nounwind readnone speculatable willreturn }
+attributes #1 = { nofree nosync nounwind readnone speculatable willreturn }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!2, !3}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, isOptimized: true, runtimeVersion: 0, emissionKind: NoDebug)
+!1 = !DIFile(filename: "/tmp/foo.cl", directory: "/dev/null")
+!2 = !{i32 2, !"Dwarf Version", i32 4}
+!3 = !{i32 2, !"Debug Info Version", i32 3}
+!4 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 1, type: !5, scopeLine: 2, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0)
+!5 = !DISubroutineType(types: !6)
+!6 = !{null, !7}
+!7 = !DIBasicType(name: "float", size: 32, align: 32)
+!8 = !DILocalVariable(name: "a", arg: 1, scope: !4, file: !1, line: 1)
+!9 = !DILocation(line: 1, column: 42, scope: !4)
+!10 = !DILocalVariable(name: "b", arg: 2, scope: !4, file: !1, line: 2)
+!11 = !DILocation(line: 2, column: 42, scope: !4)
diff --git a/llvm/test/CodeGen/AMDGPU/postra-bundle-memops.mir b/llvm/test/CodeGen/AMDGPU/postra-bundle-memops.mir
index 3202fad8f70e..fca771680713 100644
--- a/llvm/test/CodeGen/AMDGPU/postra-bundle-memops.mir
+++ b/llvm/test/CodeGen/AMDGPU/postra-bundle-memops.mir
@@ -56,12 +56,12 @@ body: |
; GCN: BUFFER_STORE_DWORD_ADDR64 $vgpr0, $vgpr2_vgpr3, undef $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, implicit $exec
; GCN: }
; GCN: BUNDLE implicit-def $vgpr2, implicit-def $vgpr2_lo16, implicit-def $vgpr2_hi16, implicit-def $vgpr3, implicit-def $vgpr3_lo16, implicit-def $vgpr3_hi16, implicit undef $vgpr4_vgpr5_vgpr6_vgpr7, implicit undef $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, implicit $exec {
- ; GCN: $vgpr2 = IMAGE_LOAD_V1_V4 undef $vgpr4_vgpr5_vgpr6_vgpr7, undef $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 2, -1, 0, 0, 0, 0, 0, 0, 0, implicit $exec
- ; GCN: $vgpr3 = IMAGE_LOAD_V1_V4 undef $vgpr4_vgpr5_vgpr6_vgpr7, undef $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 2, -1, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ ; GCN: $vgpr2 = IMAGE_LOAD_V1_V4 undef $vgpr4_vgpr5_vgpr6_vgpr7, undef $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 2, -1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4)
+ ; GCN: $vgpr3 = IMAGE_LOAD_V1_V4 undef $vgpr4_vgpr5_vgpr6_vgpr7, undef $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 2, -1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4)
; GCN: }
; GCN: BUNDLE implicit undef $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1, implicit undef $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, implicit $exec {
- ; GCN: IMAGE_STORE_V4_V2 undef $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr0_vgpr1, undef $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 15, -1, 1, 0, 0, 0, 0, 0, 0, implicit $exec
- ; GCN: IMAGE_STORE_V4_V2 undef $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr0_vgpr1, undef $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 15, -1, 1, 0, 0, 0, 0, 0, 0, implicit $exec
+ ; GCN: IMAGE_STORE_V4_V2 undef $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr0_vgpr1, undef $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 15, -1, 1, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 16)
+ ; GCN: IMAGE_STORE_V4_V2 undef $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr0_vgpr1, undef $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 15, -1, 1, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 16)
; GCN: }
; GCN: S_NOP 0
; GCN: $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71 = S_LOAD_DWORDX8_IMM undef $sgpr10_sgpr11, 464, 0, 0
@@ -112,3 +112,111 @@ body: |
$vgpr2 = DS_READ_B32_gfx9 $vgpr0, 0, 0, implicit $exec
$vgpr3 = DS_READ_B32_gfx9 $vgpr1, 0, 0, implicit $exec
...
+
+# Middle dbg_value should be bundled
+---
+name: bundle_dbg_value_0
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr3_vgpr4, $vgpr5_vgpr6
+ ; GCN-LABEL: name: bundle_dbg_value_0
+ ; GCN: liveins: $vgpr3_vgpr4, $vgpr5_vgpr6
+ ; GCN: BUNDLE implicit-def $vgpr0, implicit-def $vgpr0_lo16, implicit-def $vgpr0_hi16, implicit-def $vgpr1, implicit-def $vgpr1_lo16, implicit-def $vgpr1_hi16, implicit $vgpr3_vgpr4, implicit $exec, implicit $vgpr5_vgpr6 {
+ ; GCN: $vgpr0 = GLOBAL_LOAD_DWORD $vgpr3_vgpr4, 0, 0, 0, 0, implicit $exec
+ ; GCN: DBG_VALUE internal $vgpr0, 0, 0
+ ; GCN: $vgpr1 = GLOBAL_LOAD_DWORD $vgpr5_vgpr6, 0, 0, 0, 0, implicit $exec
+ ; GCN: }
+ $vgpr0 = GLOBAL_LOAD_DWORD $vgpr3_vgpr4, 0, 0, 0, 0, implicit $exec
+ DBG_VALUE $vgpr0, 0, 0
+ $vgpr1 = GLOBAL_LOAD_DWORD $vgpr5_vgpr6, 0, 0, 0, 0, implicit $exec
+
+...
+
+# Middle dbg_value should be bundled
+---
+name: bundle_dbg_value_1
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr3_vgpr4, $vgpr5_vgpr6, $vgpr1
+ ; GCN-LABEL: name: bundle_dbg_value_1
+ ; GCN: liveins: $vgpr3_vgpr4, $vgpr5_vgpr6, $vgpr1
+ ; GCN: BUNDLE implicit-def $vgpr0, implicit-def $vgpr0_lo16, implicit-def $vgpr0_hi16, implicit-def $vgpr2, implicit-def $vgpr2_lo16, implicit-def $vgpr2_hi16, implicit $vgpr3_vgpr4, implicit $exec, implicit $vgpr1, implicit $vgpr5_vgpr6 {
+ ; GCN: $vgpr0 = GLOBAL_LOAD_DWORD $vgpr3_vgpr4, 0, 0, 0, 0, implicit $exec
+ ; GCN: DBG_VALUE internal $vgpr0, 0, 0
+ ; GCN: DBG_VALUE $vgpr1, 0, 0
+ ; GCN: $vgpr2 = GLOBAL_LOAD_DWORD $vgpr5_vgpr6, 0, 0, 0, 0, implicit $exec
+ ; GCN: }
+ ; GCN: DBG_VALUE $vgpr2, 0, 0
+ $vgpr0 = GLOBAL_LOAD_DWORD $vgpr3_vgpr4, 0, 0, 0, 0, implicit $exec
+ DBG_VALUE $vgpr0, 0, 0
+ DBG_VALUE $vgpr1, 0, 0
+ $vgpr2 = GLOBAL_LOAD_DWORD $vgpr5_vgpr6, 0, 0, 0, 0, implicit $exec
+ DBG_VALUE $vgpr2, 0, 0
+...
+
+# Starting and ending dbg_values should not be in the bundle
+---
+name: bundle_dbg_value_2
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr3_vgpr4, $vgpr5_vgpr6, $vgpr1
+ ; GCN-LABEL: name: bundle_dbg_value_2
+ ; GCN: liveins: $vgpr3_vgpr4, $vgpr5_vgpr6, $vgpr1
+ ; GCN: DBG_VALUE $vgpr1, 0, 0
+ ; GCN: BUNDLE implicit-def $vgpr0, implicit-def $vgpr0_lo16, implicit-def $vgpr0_hi16, implicit-def $vgpr2, implicit-def $vgpr2_lo16, implicit-def $vgpr2_hi16, implicit $vgpr3_vgpr4, implicit $exec, implicit $vgpr5_vgpr6 {
+ ; GCN: $vgpr0 = GLOBAL_LOAD_DWORD $vgpr3_vgpr4, 0, 0, 0, 0, implicit $exec
+ ; GCN: DBG_VALUE internal $vgpr0, 0, 0
+ ; GCN: $vgpr2 = GLOBAL_LOAD_DWORD $vgpr5_vgpr6, 0, 0, 0, 0, implicit $exec
+ ; GCN: }
+ ; GCN: DBG_VALUE $vgpr2, 0, 0
+ DBG_VALUE $vgpr1, 0, 0
+ $vgpr0 = GLOBAL_LOAD_DWORD $vgpr3_vgpr4, 0, 0, 0, 0, implicit $exec
+ DBG_VALUE $vgpr0, 0, 0
+ $vgpr2 = GLOBAL_LOAD_DWORD $vgpr5_vgpr6, 0, 0, 0, 0, implicit $exec
+ DBG_VALUE $vgpr2, 0, 0
+...
+
+---
+name: bundle_kill
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr3_vgpr4, $vgpr5_vgpr6
+ ; GCN-LABEL: name: bundle_kill
+ ; GCN: liveins: $vgpr3_vgpr4, $vgpr5_vgpr6
+ ; GCN: $vgpr1 = V_MOV_B32_e32 0, implicit $exec
+ ; GCN: BUNDLE implicit-def $vgpr0, implicit-def $vgpr0_lo16, implicit-def $vgpr0_hi16, implicit-def $vgpr2, implicit-def $vgpr2_lo16, implicit-def $vgpr2_hi16, implicit $vgpr3_vgpr4, implicit $exec, implicit $vgpr1, implicit $vgpr5_vgpr6 {
+ ; GCN: $vgpr0 = GLOBAL_LOAD_DWORD $vgpr3_vgpr4, 0, 0, 0, 0, implicit $exec
+ ; GCN: KILL $vgpr1
+ ; GCN: $vgpr2 = GLOBAL_LOAD_DWORD $vgpr5_vgpr6, 0, 0, 0, 0, implicit $exec
+ ; GCN: }
+ $vgpr1 = V_MOV_B32_e32 0, implicit $exec
+ $vgpr0 = GLOBAL_LOAD_DWORD $vgpr3_vgpr4, 0, 0, 0, 0, implicit $exec
+ KILL $vgpr1
+ $vgpr2 = GLOBAL_LOAD_DWORD $vgpr5_vgpr6, 0, 0, 0, 0, implicit $exec
+
+...
+
+---
+name: bundle_kill_def_in_bundle
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr3_vgpr4, $vgpr5_vgpr6
+ ; GCN-LABEL: name: bundle_kill_def_in_bundle
+ ; GCN: liveins: $vgpr3_vgpr4, $vgpr5_vgpr6
+ ; GCN: $vgpr1 = V_MOV_B32_e32 0, implicit $exec
+ ; GCN: BUNDLE implicit-def $vgpr0, implicit-def $vgpr0_lo16, implicit-def $vgpr0_hi16, implicit-def $vgpr2, implicit-def $vgpr2_lo16, implicit-def $vgpr2_hi16, implicit $vgpr3_vgpr4, implicit $exec, implicit $vgpr5_vgpr6 {
+ ; GCN: $vgpr0 = GLOBAL_LOAD_DWORD $vgpr3_vgpr4, 0, 0, 0, 0, implicit $exec
+ ; GCN: KILL internal $vgpr0
+ ; GCN: $vgpr2 = GLOBAL_LOAD_DWORD $vgpr5_vgpr6, 0, 0, 0, 0, implicit $exec
+ ; GCN: }
+ $vgpr1 = V_MOV_B32_e32 0, implicit $exec
+ $vgpr0 = GLOBAL_LOAD_DWORD $vgpr3_vgpr4, 0, 0, 0, 0, implicit $exec
+ KILL $vgpr0
+ $vgpr2 = GLOBAL_LOAD_DWORD $vgpr5_vgpr6, 0, 0, 0, 0, implicit $exec
+
+...
More information about the llvm-commits
mailing list