[llvm] a7455d7 - AMDGPU: Remove kills following clusters of memory instruction
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Tue Feb 16 07:49:35 PST 2021
Author: Matt Arsenault
Date: 2021-02-16T10:49:28-05:00
New Revision: a7455d7b7ce33bb4d04106e26f242e6cb24f3042
URL: https://github.com/llvm/llvm-project/commit/a7455d7b7ce33bb4d04106e26f242e6cb24f3042
DIFF: https://github.com/llvm/llvm-project/commit/a7455d7b7ce33bb4d04106e26f242e6cb24f3042.diff
LOG: AMDGPU: Remove kills following clusters of memory instruction
In a future commit, soft clauses will be hinted with kill instructions
rather than forced together with bundles. Look for kills that look
like this, and erase them. I'm not sure if the check for specific uses
is worthwhile, or if it would be better to just unconditionally erase
kills.
This reduces test churn in a future patch.
Added:
Modified:
llvm/lib/Target/AMDGPU/SIPostRABundler.cpp
llvm/test/CodeGen/AMDGPU/postra-bundle-memops.mir
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/SIPostRABundler.cpp b/llvm/lib/Target/AMDGPU/SIPostRABundler.cpp
index 2d220a0b71ac..1123a89b3e6a 100644
--- a/llvm/lib/Target/AMDGPU/SIPostRABundler.cpp
+++ b/llvm/lib/Target/AMDGPU/SIPostRABundler.cpp
@@ -48,6 +48,9 @@ class SIPostRABundler : public MachineFunctionPass {
SmallSet<Register, 16> Defs;
+ void collectUsedRegUnits(const MachineInstr &MI,
+ BitVector &UsedRegUnits) const;
+
bool isBundleCandidate(const MachineInstr &MI) const;
bool isDependentLoad(const MachineInstr &MI) const;
bool canBundle(const MachineInstr &MI, const MachineInstr &NextMI) const;
@@ -85,6 +88,21 @@ bool SIPostRABundler::isDependentLoad(const MachineInstr &MI) const {
return false;
}
+void SIPostRABundler::collectUsedRegUnits(const MachineInstr &MI,
+ BitVector &UsedRegUnits) const {
+ for (const MachineOperand &Op : MI.operands()) {
+ if (!Op.isReg() || !Op.readsReg())
+ continue;
+
+ Register Reg = Op.getReg();
+ assert(!Op.getSubReg() &&
+ "subregister indexes should not be present after RA");
+
+ for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units)
+ UsedRegUnits.set(*Units);
+ }
+}
+
bool SIPostRABundler::isBundleCandidate(const MachineInstr &MI) const {
const uint64_t IMemFlags = MI.getDesc().TSFlags & MemFlags;
return IMemFlags != 0 && MI.mayLoadOrStore() && !MI.isBundled();
@@ -105,6 +123,9 @@ bool SIPostRABundler::runOnMachineFunction(MachineFunction &MF) {
return false;
TRI = MF.getSubtarget<GCNSubtarget>().getRegisterInfo();
+ BitVector BundleUsedRegUnits(TRI->getNumRegUnits());
+ BitVector KillUsedRegUnits(TRI->getNumRegUnits());
+
bool Changed = false;
for (MachineBasicBlock &MBB : MF) {
MachineBasicBlock::instr_iterator Next;
@@ -147,6 +168,34 @@ bool SIPostRABundler::runOnMachineFunction(MachineFunction &MF) {
Next = std::next(BundleEnd);
if (ClauseLength > 1) {
Changed = true;
+
+ // Before register allocation, kills are inserted after potential soft
+ // clauses to hint register allocation. Look for kills that look like
+ // this, and erase them.
+ if (Next != E && Next->isKill()) {
+ MachineInstr &Kill = *Next;
+
+ // TODO: Should maybe back-propagate kill flags to the bundle.
+ for (const MachineInstr &BundleMI : make_range(BundleStart, Next))
+ collectUsedRegUnits(BundleMI, BundleUsedRegUnits);
+ collectUsedRegUnits(Kill, KillUsedRegUnits);
+
+ BundleUsedRegUnits.flip();
+ KillUsedRegUnits &= BundleUsedRegUnits;
+
+ // Erase the kill if it's a subset of the used registers.
+ //
+ // TODO: Should we just remove all kills? Is there any real reason to
+ // keep them after RA?
+ if (KillUsedRegUnits.none()) {
+ ++Next;
+ Kill.eraseFromParent();
+ }
+
+ BundleUsedRegUnits.reset();
+ KillUsedRegUnits.reset();
+ }
+
finalizeBundle(MBB, BundleStart, Next);
}
diff --git a/llvm/test/CodeGen/AMDGPU/postra-bundle-memops.mir b/llvm/test/CodeGen/AMDGPU/postra-bundle-memops.mir
index fca771680713..c135a77163ef 100644
--- a/llvm/test/CodeGen/AMDGPU/postra-bundle-memops.mir
+++ b/llvm/test/CodeGen/AMDGPU/postra-bundle-memops.mir
@@ -220,3 +220,61 @@ body: |
$vgpr2 = GLOBAL_LOAD_DWORD $vgpr5_vgpr6, 0, 0, 0, 0, implicit $exec
...
+
+# Before register allocation, KILL hints are inserted after potential soft
+# clauses to hint the register allocator to not clobber the input
+# registers. Kills that look like this should be erased.
+---
+name: post_bundle_kill
+body: |
+ bb.0:
+ liveins: $vgpr3_vgpr4, $vgpr5_vgpr6
+
+ ; GCN-LABEL: name: post_bundle_kill
+ ; GCN: BUNDLE implicit-def $vgpr0, implicit-def $vgpr0_lo16, implicit-def $vgpr0_hi16, implicit-def $vgpr1, implicit-def $vgpr1_lo16, implicit-def $vgpr1_hi16, implicit $vgpr3_vgpr4, implicit $exec, implicit $vgpr5_vgpr6 {
+ ; GCN: $vgpr0 = GLOBAL_LOAD_DWORD $vgpr3_vgpr4, 0, 0, 0, 0, implicit $exec
+ ; GCN: $vgpr1 = GLOBAL_LOAD_DWORD $vgpr5_vgpr6, 0, 0, 0, 0, implicit $exec
+ ; GCN: }
+ $vgpr0 = GLOBAL_LOAD_DWORD $vgpr3_vgpr4, 0, 0, 0, 0, implicit $exec
+ $vgpr1 = GLOBAL_LOAD_DWORD $vgpr5_vgpr6, 0, 0, 0, 0, implicit $exec
+ KILL killed $vgpr3_vgpr4, killed $vgpr5_vgpr6
+...
+
+# Kill some other register not used in the bundle, should not be touched.
+---
+name: post_bundle_kill_other
+body: |
+ bb.0:
+ liveins: $vgpr3_vgpr4, $vgpr5_vgpr6
+ ; GCN-LABEL: name: post_bundle_kill_other
+ ; GCN: $vgpr7 = V_MOV_B32_e32 0, implicit $exec
+ ; GCN: BUNDLE implicit-def $vgpr0, implicit-def $vgpr0_lo16, implicit-def $vgpr0_hi16, implicit-def $vgpr1, implicit-def $vgpr1_lo16, implicit-def $vgpr1_hi16, implicit $vgpr3_vgpr4, implicit $exec, implicit $vgpr5_vgpr6 {
+ ; GCN: $vgpr0 = GLOBAL_LOAD_DWORD $vgpr3_vgpr4, 0, 0, 0, 0, implicit $exec
+ ; GCN: $vgpr1 = GLOBAL_LOAD_DWORD $vgpr5_vgpr6, 0, 0, 0, 0, implicit $exec
+ ; GCN: }
+ ; GCN: KILL killed $vgpr7
+ $vgpr7 = V_MOV_B32_e32 0, implicit $exec
+ $vgpr0 = GLOBAL_LOAD_DWORD $vgpr3_vgpr4, 0, 0, 0, 0, implicit $exec
+ $vgpr1 = GLOBAL_LOAD_DWORD $vgpr5_vgpr6, 0, 0, 0, 0, implicit $exec
+ KILL killed $vgpr7
+...
+
+# Kill some other register not used in the bundle, but also some
+# from the bundle.
+---
+name: post_bundle_kill_plus_other
+body: |
+ bb.0:
+ liveins: $vgpr3_vgpr4, $vgpr5_vgpr6
+ ; GCN-LABEL: name: post_bundle_kill_plus_other
+ ; GCN: $vgpr7 = V_MOV_B32_e32 0, implicit $exec
+ ; GCN: BUNDLE implicit-def $vgpr0, implicit-def $vgpr0_lo16, implicit-def $vgpr0_hi16, implicit-def $vgpr1, implicit-def $vgpr1_lo16, implicit-def $vgpr1_hi16, implicit $vgpr3_vgpr4, implicit $exec, implicit $vgpr5_vgpr6 {
+ ; GCN: $vgpr0 = GLOBAL_LOAD_DWORD $vgpr3_vgpr4, 0, 0, 0, 0, implicit $exec
+ ; GCN: $vgpr1 = GLOBAL_LOAD_DWORD $vgpr5_vgpr6, 0, 0, 0, 0, implicit $exec
+ ; GCN: }
+ ; GCN: KILL killed $vgpr7, killed $vgpr3
+ $vgpr7 = V_MOV_B32_e32 0, implicit $exec
+ $vgpr0 = GLOBAL_LOAD_DWORD $vgpr3_vgpr4, 0, 0, 0, 0, implicit $exec
+ $vgpr1 = GLOBAL_LOAD_DWORD $vgpr5_vgpr6, 0, 0, 0, 0, implicit $exec
+ KILL killed $vgpr7, killed $vgpr3
+...
More information about the llvm-commits
mailing list