[llvm] [AMDGPU] VGPR instruction placement should be aware of exec mask prol… (PR #175080)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Jan 8 14:27:08 PST 2026
https://github.com/carlobertolli created https://github.com/llvm/llvm-project/pull/175080
…ogues.
This patch changes the insertion logic for VGPR modifying instruction to skip to after the exec mask is reset, if such an instruction is present in the basic block. It is used to prevent placement of scratch store/load for VGPRs before the exec mask is reset on AMDGPUs.
>From de6a9674e195edf328738d395dc36a9a5d82e3a4 Mon Sep 17 00:00:00 2001
From: Carlo Bertolli <carlo.bertolli at amd.com>
Date: Wed, 7 Jan 2026 15:19:37 -0600
Subject: [PATCH] [AMDGPU] VGPR instruction placement should be aware of exec
mask prologues. This patch changes the insertion logic for VGPR modifying
instruction to skip to after the exec mask is reset, if such an instruction
is present in the basic block. It is used to prevent placement of scratch
store/load for VGPRs before the exec mask is reset on AMDGPUs.
---
llvm/include/llvm/CodeGen/TargetInstrInfo.h | 6 ++++
llvm/lib/CodeGen/SplitKit.cpp | 4 +--
llvm/lib/CodeGen/TargetInstrInfo.cpp | 6 ++++
llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 38 +++++++++++++++++++++
llvm/lib/Target/AMDGPU/SIInstrInfo.h | 3 ++
5 files changed, 55 insertions(+), 2 deletions(-)
diff --git a/llvm/include/llvm/CodeGen/TargetInstrInfo.h b/llvm/include/llvm/CodeGen/TargetInstrInfo.h
index 11adc190b2a62..6aa42cdaf64ff 100644
--- a/llvm/include/llvm/CodeGen/TargetInstrInfo.h
+++ b/llvm/include/llvm/CodeGen/TargetInstrInfo.h
@@ -2166,6 +2166,12 @@ class LLVM_ABI TargetInstrInfo : public MCInstrInfo {
return false;
}
+ /// Get the insertion point for a live range split copy at the beginning of
+ /// a basic block. This is used by the register allocator's live range
+ /// splitting logic.
+ virtual MachineBasicBlock::iterator
+ getExecAwareInsertPoint(MachineBasicBlock &MBB, Register Reg) const;
+
/// Allows targets to use appropriate copy instruction while spilitting live
/// range of a register in register allocation.
virtual unsigned getLiveRangeSplitOpcode(Register Reg,
diff --git a/llvm/lib/CodeGen/SplitKit.cpp b/llvm/lib/CodeGen/SplitKit.cpp
index 8ec4bfbb5a330..4b8bec7a7ac96 100644
--- a/llvm/lib/CodeGen/SplitKit.cpp
+++ b/llvm/lib/CodeGen/SplitKit.cpp
@@ -842,8 +842,8 @@ SlotIndex SplitEditor::leaveIntvAtTop(MachineBasicBlock &MBB) {
unsigned RegIdx = 0;
Register Reg = LIS.getInterval(Edit->get(RegIdx)).reg();
- VNInfo *VNI = defFromParent(RegIdx, ParentVNI, Start, MBB,
- MBB.SkipPHIsLabelsAndDebug(MBB.begin(), Reg));
+ MachineBasicBlock::iterator InsertPt = TII.getExecAwareInsertPoint(MBB, Reg);
+ VNInfo *VNI = defFromParent(RegIdx, ParentVNI, Start, MBB, InsertPt);
RegAssign.insert(Start, VNI->def, OpenIdx);
LLVM_DEBUG(dump());
return VNI->def;
diff --git a/llvm/lib/CodeGen/TargetInstrInfo.cpp b/llvm/lib/CodeGen/TargetInstrInfo.cpp
index fef3a3663d3a8..10e24b4b3b127 100644
--- a/llvm/lib/CodeGen/TargetInstrInfo.cpp
+++ b/llvm/lib/CodeGen/TargetInstrInfo.cpp
@@ -2222,3 +2222,9 @@ bool TargetInstrInfo::isGlobalMemoryObject(const MachineInstr *MI) const {
return MI->isCall() || MI->hasUnmodeledSideEffects() ||
(MI->hasOrderedMemoryRef() && !MI->isDereferenceableInvariantLoad());
}
+
+MachineBasicBlock::iterator
+TargetInstrInfo::getExecAwareInsertPoint(MachineBasicBlock &MBB,
+ Register Reg) const {
+ return MBB.SkipPHIsLabelsAndDebug(MBB.begin(), Reg);
+}
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 87278f74cea8d..869487a466aca 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -9903,6 +9903,44 @@ bool SIInstrInfo::isBasicBlockPrologue(const MachineInstr &MI,
MI.modifiesRegister(AMDGPU::EXEC, &RI)));
}
+MachineBasicBlock::iterator
+SIInstrInfo::getExecAwareInsertPoint(MachineBasicBlock &MBB,
+ Register Reg) const {
+ if (!Reg)
+ return MBB.SkipPHIsLabelsAndDebug(MBB.begin());
+
+ // Instructions using SGPRS are independent of exec mask: skip.
+ const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
+ const TargetRegisterClass *RC = Reg.isPhysical() ? RI.getPhysRegBaseClass(Reg)
+ : MRI.getRegClassOrNull(Reg);
+ if (RC && RI.isSGPRClass(RC))
+ return MBB.SkipPHIsLabelsAndDebug(MBB.begin(), Reg);
+
+ // For VGPRs, find a safe insert point after exec has been restored.
+ // A standard basic block prologue pattern is:
+ // BB:
+ // <PHIs>
+ // <labels>
+ // <SGPR spills/restores>
+ // <scalar copies - live range splits for SGPRs>
+ // s_or_b32 exec_lo, exec_lo, s_saved
+
+ // Look for exec restore instruction: it could be after the first SGPR copy.
+ MachineBasicBlock::iterator I = MBB.SkipPHIsLabelsAndDebug(MBB.begin(), Reg);
+ MachineBasicBlock::iterator E = MBB.end();
+
+ for (MachineBasicBlock::iterator It = I; It != E; ++It) {
+ if (!It->isTerminator() && It->modifiesRegister(AMDGPU::EXEC, &RI))
+ return std::next(It);
+
+ if (It->mayLoadOrStore() && !isSGPRSpill(It->getOpcode()) &&
+ !isWWMRegSpillOpcode(It->getOpcode()))
+ break;
+ }
+
+ return I;
+}
+
MachineInstrBuilder
SIInstrInfo::getAddNoCarry(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
index 9373cdb199e29..d5ac314ae899a 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
@@ -1579,6 +1579,9 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
bool isBasicBlockPrologue(const MachineInstr &MI,
Register Reg = Register()) const override;
+ MachineBasicBlock::iterator
+ getExecAwareInsertPoint(MachineBasicBlock &MBB, Register Reg) const override;
+
MachineInstr *createPHIDestinationCopy(MachineBasicBlock &MBB,
MachineBasicBlock::iterator InsPt,
const DebugLoc &DL, Register Src,
More information about the llvm-commits
mailing list