[llvm] 5cb6b15 - [AMDGPU] Constrain use LiveMask by the operand's LaneMask for RP calculation.
Jeffrey Byrnes via llvm-commits
llvm-commits at lists.llvm.org
Tue Oct 8 10:34:32 PDT 2024
Author: Jeffrey Byrnes
Date: 2024-10-08T10:29:50-07:00
New Revision: 5cb6b15568c1554a2ea89fee849ed0e27b785ee0
URL: https://github.com/llvm/llvm-project/commit/5cb6b15568c1554a2ea89fee849ed0e27b785ee0
DIFF: https://github.com/llvm/llvm-project/commit/5cb6b15568c1554a2ea89fee849ed0e27b785ee0.diff
LOG: [AMDGPU] Constrain use LiveMask by the operand's LaneMask for RP calculation.
For speculative RP queries, recede may calculate inaccurate masks for subreg uses. Previously, the calculation would look at any live lane for the use at the position of the MI in the LIS. This also adds lanes for any subregs which are live at but not used by the instruction. By constraining against the getSubRegIndexLaneMask for the operand's subreg, we are sure to not pick up on these extra lanes.
For current clients of recede, this is not an issue. This is because 1. the current clients do not violate the program order in the LIS, and 2. the change to RP is based on the difference between previous mask and new mask. Since current clients are not exposed to this issue, this patch is sort of NFC.
Co-authored-by: Valery Pykhtin Valery.Pykhtin at amd.com
Change-Id: Iaed80271226b2587297e6fb78fe081afec1a9275
Added:
Modified:
llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
llvm/lib/Target/AMDGPU/GCNRegPressure.h
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
index cb0624f11592d2..7c633b2bce7bc2 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
@@ -259,7 +259,8 @@ static void
collectVirtualRegUses(SmallVectorImpl<RegisterMaskPair> &RegMaskPairs,
const MachineInstr &MI, const LiveIntervals &LIS,
const MachineRegisterInfo &MRI) {
- SlotIndex InstrSI;
+
+ auto &TRI = *MRI.getTargetRegisterInfo();
for (const auto &MO : MI.operands()) {
if (!MO.isReg() || !MO.getReg().isVirtual())
continue;
@@ -267,25 +268,31 @@ collectVirtualRegUses(SmallVectorImpl<RegisterMaskPair> &RegMaskPairs,
continue;
Register Reg = MO.getReg();
- if (llvm::any_of(RegMaskPairs, [Reg](const RegisterMaskPair &RM) {
- return RM.RegUnit == Reg;
- }))
- continue;
+ auto I = llvm::find_if(RegMaskPairs, [Reg](const RegisterMaskPair &RM) {
+ return RM.RegUnit == Reg;
+ });
+
+ auto &P = I == RegMaskPairs.end()
+ ? RegMaskPairs.emplace_back(Reg, LaneBitmask::getNone())
+ : *I;
- LaneBitmask UseMask;
- auto &LI = LIS.getInterval(Reg);
+ P.LaneMask |= MO.getSubReg() ? TRI.getSubRegIndexLaneMask(MO.getSubReg())
+ : MRI.getMaxLaneMaskForVReg(Reg);
+ }
+
+ SlotIndex InstrSI;
+ for (auto &P : RegMaskPairs) {
+ auto &LI = LIS.getInterval(P.RegUnit);
if (!LI.hasSubRanges())
- UseMask = MRI.getMaxLaneMaskForVReg(Reg);
- else {
- // For a tentative schedule LIS isn't updated yet but livemask should
- // remain the same on any schedule. Subreg defs can be reordered but they
- // all must dominate uses anyway.
- if (!InstrSI)
- InstrSI = LIS.getInstructionIndex(*MO.getParent()).getBaseIndex();
- UseMask = getLiveLaneMask(LI, InstrSI, MRI);
- }
+ continue;
+
+ // For a tentative schedule LIS isn't updated yet but livemask should
+ // remain the same on any schedule. Subreg defs can be reordered but they
+ // all must dominate uses anyway.
+ if (!InstrSI)
+ InstrSI = LIS.getInstructionIndex(MI).getBaseIndex();
- RegMaskPairs.emplace_back(Reg, UseMask);
+ P.LaneMask = getLiveLaneMask(LI, InstrSI, MRI, P.LaneMask);
}
}
@@ -294,22 +301,25 @@ collectVirtualRegUses(SmallVectorImpl<RegisterMaskPair> &RegMaskPairs,
LaneBitmask llvm::getLiveLaneMask(unsigned Reg, SlotIndex SI,
const LiveIntervals &LIS,
- const MachineRegisterInfo &MRI) {
- return getLiveLaneMask(LIS.getInterval(Reg), SI, MRI);
+ const MachineRegisterInfo &MRI,
+ LaneBitmask LaneMaskFilter) {
+ return getLiveLaneMask(LIS.getInterval(Reg), SI, MRI, LaneMaskFilter);
}
LaneBitmask llvm::getLiveLaneMask(const LiveInterval &LI, SlotIndex SI,
- const MachineRegisterInfo &MRI) {
+ const MachineRegisterInfo &MRI,
+ LaneBitmask LaneMaskFilter) {
LaneBitmask LiveMask;
if (LI.hasSubRanges()) {
for (const auto &S : LI.subranges())
- if (S.liveAt(SI)) {
+ if ((S.LaneMask & LaneMaskFilter).any() && S.liveAt(SI)) {
LiveMask |= S.LaneMask;
assert(LiveMask == (LiveMask & MRI.getMaxLaneMaskForVReg(LI.reg())));
}
} else if (LI.liveAt(SI)) {
LiveMask = MRI.getMaxLaneMaskForVReg(LI.reg());
}
+ LiveMask &= LaneMaskFilter;
return LiveMask;
}
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.h b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
index 54dc1972d27619..d419fcc802c60a 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.h
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
@@ -265,13 +265,18 @@ class GCNDownwardRPTracker : public GCNRPTracker {
const LiveRegSet *LiveRegsCopy = nullptr);
};
-LaneBitmask getLiveLaneMask(unsigned Reg,
- SlotIndex SI,
+/// \returns the LaneMask of live lanes of \p Reg at position \p SI. Only the
+/// active lanes of \p LaneMaskFilter will be set in the return value. This is
+/// used, for example, to limit the live lanes to a specific subreg when
+/// calculating use masks.
+LaneBitmask getLiveLaneMask(unsigned Reg, SlotIndex SI,
const LiveIntervals &LIS,
- const MachineRegisterInfo &MRI);
+ const MachineRegisterInfo &MRI,
+ LaneBitmask LaneMaskFilter = LaneBitmask::getAll());
LaneBitmask getLiveLaneMask(const LiveInterval &LI, SlotIndex SI,
- const MachineRegisterInfo &MRI);
+ const MachineRegisterInfo &MRI,
+ LaneBitmask LaneMaskFilter = LaneBitmask::getAll());
GCNRPTracker::LiveRegSet getLiveRegs(SlotIndex SI, const LiveIntervals &LIS,
const MachineRegisterInfo &MRI);
More information about the llvm-commits
mailing list