[llvm] [clang] [clang-tools-extra] [AMDGPU] Fix GCNUpwardRPTracker. (WIP) (PR #71186)
Valery Pykhtin via llvm-commits
llvm-commits at lists.llvm.org
Fri Nov 10 02:54:04 PST 2023
https://github.com/vpykhtin updated https://github.com/llvm/llvm-project/pull/71186
>From e2254cb744adb65685edeb2373f900368c11c11a Mon Sep 17 00:00:00 2001
From: Valery Pykhtin <valery.pykhtin at gmail.com>
Date: Thu, 2 Nov 2023 20:39:11 +0100
Subject: [PATCH 1/3] [AMDGPU] Fix GCNUpwardRPTracker.
---
.../Target/AMDGPU/GCNIterativeScheduler.cpp | 4 +-
llvm/lib/Target/AMDGPU/GCNRegPressure.cpp | 135 ++++++++---------
llvm/lib/Target/AMDGPU/GCNRegPressure.h | 47 ++++--
.../CodeGen/AMDGPU/regpressure_printer.mir | 137 +++++++++++-------
4 files changed, 190 insertions(+), 133 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp b/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp
index d89c9b1febded0f..cdc9de7f65e3e50 100644
--- a/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp
@@ -251,7 +251,7 @@ GCNIterativeScheduler::getRegionPressure(MachineBasicBlock::iterator Begin,
assert(UPTracker.isValid() ||
(dbgs() << "Tracked region ",
printRegion(dbgs(), Begin, End, LIS), false));
- return UPTracker.moveMaxPressure();
+ return UPTracker.getMaxPressureAndReset();
}
// returns max pressure for a tentative schedule
@@ -272,7 +272,7 @@ GCNIterativeScheduler::getSchedulePressure(const Region &R,
for (auto I = Schedule.end(), B = Schedule.begin(); I != B;) {
RPTracker.recede(*getMachineInstr(*--I));
}
- return RPTracker.moveMaxPressure();
+ return RPTracker.getMaxPressureAndReset();
}
void GCNIterativeScheduler::enterRegion(MachineBasicBlock *BB, // overridden
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
index a04c470b7b9762f..f191f3f08c56c6d 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
@@ -166,66 +166,62 @@ static LaneBitmask getDefRegMask(const MachineOperand &MO,
MRI.getTargetRegisterInfo()->getSubRegIndexLaneMask(MO.getSubReg());
}
-static LaneBitmask getUsedRegMask(const MachineOperand &MO,
- const MachineRegisterInfo &MRI,
- const LiveIntervals &LIS) {
- assert(MO.isUse() && MO.isReg() && MO.getReg().isVirtual());
-
- if (auto SubReg = MO.getSubReg())
- return MRI.getTargetRegisterInfo()->getSubRegIndexLaneMask(SubReg);
-
- auto MaxMask = MRI.getMaxLaneMaskForVReg(MO.getReg());
- if (SIRegisterInfo::getNumCoveredRegs(MaxMask) > 1) // cannot have subregs
- return MaxMask;
-
- // For a tentative schedule LIS isn't updated yet but livemask should remain
- // the same on any schedule. Subreg defs can be reordered but they all must
- // dominate uses anyway.
- auto SI = LIS.getInstructionIndex(*MO.getParent()).getBaseIndex();
- return getLiveLaneMask(MO.getReg(), SI, LIS, MRI);
-}
-
-static SmallVector<RegisterMaskPair, 8>
-collectVirtualRegUses(const MachineInstr &MI, const LiveIntervals &LIS,
+static void
+collectVirtualRegUses(SmallVectorImpl<RegisterMaskPair> &RegMaskPairs,
+ const MachineInstr &MI, const LiveIntervals &LIS,
const MachineRegisterInfo &MRI) {
- SmallVector<RegisterMaskPair, 8> Res;
+ SlotIndex InstrSI;
for (const auto &MO : MI.operands()) {
if (!MO.isReg() || !MO.getReg().isVirtual())
continue;
if (!MO.isUse() || !MO.readsReg())
continue;
- auto const UsedMask = getUsedRegMask(MO, MRI, LIS);
+ Register Reg = MO.getReg();
+ auto I = llvm::find_if(RegMaskPairs, [Reg](const RegisterMaskPair &RM) {
+ return RM.RegUnit == Reg;
+ });
+ if (I != RegMaskPairs.end())
+ continue;
+
+ LaneBitmask UseMask;
+ auto &LI = LIS.getInterval(Reg);
+ if (!LI.hasSubRanges())
+ UseMask = MRI.getMaxLaneMaskForVReg(Reg);
+ else {
+ // For a tentative schedule LIS isn't updated yet but livemask should
+ // remain the same on any schedule. Subreg defs can be reordered but they
+ // all must dominate uses anyway.
+ if (!InstrSI)
+ InstrSI = LIS.getInstructionIndex(*MO.getParent()).getBaseIndex();
+ UseMask = getLiveLaneMask(LI, InstrSI, MRI);
+ }
- auto Reg = MO.getReg();
- auto I = llvm::find_if(
- Res, [Reg](const RegisterMaskPair &RM) { return RM.RegUnit == Reg; });
- if (I != Res.end())
- I->LaneMask |= UsedMask;
- else
- Res.push_back(RegisterMaskPair(Reg, UsedMask));
+ RegMaskPairs.emplace_back(Reg, UseMask);
}
- return Res;
}
///////////////////////////////////////////////////////////////////////////////
// GCNRPTracker
-LaneBitmask llvm::getLiveLaneMask(unsigned Reg,
- SlotIndex SI,
+LaneBitmask llvm::getLiveLaneMask(unsigned Reg, SlotIndex SI,
const LiveIntervals &LIS,
const MachineRegisterInfo &MRI) {
+ return getLiveLaneMask(LIS.getInterval(Reg), SI, MRI);
+}
+
+LaneBitmask llvm::getLiveLaneMask(const LiveInterval &LI, SlotIndex SI,
+ const MachineRegisterInfo &MRI) {
LaneBitmask LiveMask;
- const auto &LI = LIS.getInterval(Reg);
if (LI.hasSubRanges()) {
for (const auto &S : LI.subranges())
if (S.liveAt(SI)) {
LiveMask |= S.LaneMask;
- assert(LiveMask < MRI.getMaxLaneMaskForVReg(Reg) ||
- LiveMask == MRI.getMaxLaneMaskForVReg(Reg));
+ assert(LiveMask < MRI.getMaxLaneMaskForVReg(LI.reg()) ||
+ LiveMask == MRI.getMaxLaneMaskForVReg(LI.reg()));
}
} else if (LI.liveAt(SI)) {
- LiveMask = MRI.getMaxLaneMaskForVReg(Reg);
+ LiveMask = MRI.getMaxLaneMaskForVReg(LI.reg());
}
return LiveMask;
}
@@ -261,15 +257,11 @@ void GCNRPTracker::reset(const MachineInstr &MI,
MaxPressure = CurPressure = getRegPressure(*MRI, LiveRegs);
}
-void GCNUpwardRPTracker::reset(const MachineInstr &MI,
- const LiveRegSet *LiveRegsCopy) {
- GCNRPTracker::reset(MI, LiveRegsCopy, true);
-}
-
void GCNUpwardRPTracker::reset(const MachineRegisterInfo &MRI_,
const LiveRegSet &LiveRegs_) {
MRI = &MRI_;
LiveRegs = LiveRegs_;
+ LastTrackedMI = nullptr; // TODO: LastTrackedMI isnt' used, remove?
MaxPressure = CurPressure = getRegPressure(MRI_, LiveRegs_);
}
@@ -281,38 +273,49 @@ void GCNUpwardRPTracker::recede(const MachineInstr &MI) {
if (MI.isDebugInstr())
return;
- auto const RegUses = collectVirtualRegUses(MI, LIS, *MRI);
-
- // calc pressure at the MI (defs + uses)
- auto AtMIPressure = CurPressure;
- for (const auto &U : RegUses) {
- auto LiveMask = LiveRegs[U.RegUnit];
- AtMIPressure.inc(U.RegUnit, LiveMask, LiveMask | U.LaneMask, *MRI);
- }
- // update max pressure
- MaxPressure = max(AtMIPressure, MaxPressure);
-
- for (const auto &MO : MI.all_defs()) {
- if (!MO.getReg().isVirtual() || MO.isDead())
- continue;
-
- auto Reg = MO.getReg();
+ auto DecrementDef = [this](const MachineOperand &MO) {
+ Register Reg = MO.getReg();
auto I = LiveRegs.find(Reg);
if (I == LiveRegs.end())
- continue;
- auto &LiveMask = I->second;
- auto PrevMask = LiveMask;
+ return;
+
+ LaneBitmask &LiveMask = I->second;
+ LaneBitmask PrevMask = LiveMask;
LiveMask &= ~getDefRegMask(MO, *MRI);
CurPressure.inc(Reg, PrevMask, LiveMask, *MRI);
if (LiveMask.none())
LiveRegs.erase(I);
+ };
+
+ // Decrement non-early-clobber defs.
+ SmallVector<const MachineOperand *, 2> EarlyClobberDefs;
+ for (const MachineOperand &MO : MI.all_defs()) {
+ if (!MO.getReg().isVirtual())
+ continue;
+ if (!MO.isEarlyClobber())
+ DecrementDef(MO);
+ else
+ EarlyClobberDefs.push_back(&MO);
}
- for (const auto &U : RegUses) {
- auto &LiveMask = LiveRegs[U.RegUnit];
- auto PrevMask = LiveMask;
+
+ // Increment uses.
+ SmallVector<RegisterMaskPair, 8> RegUses;
+ collectVirtualRegUses(RegUses, MI, LIS, *MRI);
+ for (const RegisterMaskPair &U : RegUses) {
+ LaneBitmask &LiveMask = LiveRegs[U.RegUnit];
+ LaneBitmask PrevMask = LiveMask;
LiveMask |= U.LaneMask;
CurPressure.inc(U.RegUnit, PrevMask, LiveMask, *MRI);
}
+
+ // Point of maximum pressure: non-early-clobber defs are decremented and uses
+ // are incremented.
+ MaxPressure = max(CurPressure, MaxPressure);
+
+ // Now decrement early clobber defs.
+ for (const MachineOperand *MO : EarlyClobberDefs)
+ DecrementDef(*MO);
+
assert(CurPressure == getRegPressure(*MRI, LiveRegs));
}
@@ -562,15 +565,15 @@ bool GCNRegPressurePrinter::runOnMachineFunction(MachineFunction &MF) {
} else {
GCNUpwardRPTracker RPT(LIS);
RPT.reset(MRI, MBBEndSlot);
- RPT.moveMaxPressure(); // Clear max pressure.
LiveOut = RPT.getLiveRegs();
RPAtMBBEnd = RPT.getPressure();
for (auto &MI : reverse(MBB)) {
+ RPT.resetMaxPressure();
RPT.recede(MI);
if (!MI.isDebugInstr())
- RP.emplace_back(RPT.getPressure(), RPT.moveMaxPressure());
+ RP.emplace_back(RPT.getPressure(), RPT.getMaxPressure());
}
LiveIn = RPT.getLiveRegs();
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.h b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
index c750fe74749e2b3..732ed33337d24dc 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.h
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
@@ -128,7 +128,7 @@ class GCNRPTracker {
void clearMaxPressure() { MaxPressure.clear(); }
- GCNRegPressure getPressure() const { return CurPressure; }
+ const GCNRegPressure &getPressure() const { return CurPressure; }
// returns MaxPressure, resetting it
decltype(MaxPressure) moveMaxPressure() {
@@ -149,24 +149,46 @@ class GCNUpwardRPTracker : public GCNRPTracker {
public:
GCNUpwardRPTracker(const LiveIntervals &LIS_) : GCNRPTracker(LIS_) {}
- // reset tracker to the point just below MI
- // filling live regs upon this point using LIS
- void reset(const MachineInstr &MI, const LiveRegSet *LiveRegs = nullptr);
-
// reset tracker and set live register set to the specified value.
void reset(const MachineRegisterInfo &MRI_, const LiveRegSet &LiveRegs_);
// reset tracker at the specified slot index.
- void reset(const MachineRegisterInfo &MRI_, SlotIndex SI) {
- reset(MRI_, llvm::getLiveRegs(SI, LIS, MRI_));
+ void reset(const MachineRegisterInfo &MRI, SlotIndex SI) {
+ reset(MRI, llvm::getLiveRegs(SI, LIS, MRI));
+ }
+
+ // reset tracker to the end of the MBB.
+ void reset(const MachineBasicBlock &MBB) {
+ reset(MBB.getParent()->getRegInfo(),
+ LIS.getSlotIndexes()->getMBBEndIdx(&MBB));
+ }
+
+ // reset tracker to the point just after MI (in program order).
+ void reset(const MachineInstr &MI) {
+ reset(MI.getMF()->getRegInfo(), LIS.getInstructionIndex(MI).getDeadSlot());
}
- // move to the state just above the MI
+ // move to the state just before the MI (in program order).
void recede(const MachineInstr &MI);
// checks whether the tracker's state after receding MI corresponds
- // to reported by LIS
+ // to reported by LIS.
bool isValid() const;
+
+ // deprecated.
+ decltype(MaxPressure) moveMaxPressure() = delete;
+
+ const GCNRegPressure &getMaxPressure() const { return MaxPressure; }
+
+ void resetMaxPressure() {
+ MaxPressure = CurPressure;
+ }
+
+ GCNRegPressure getMaxPressureAndReset() {
+ GCNRegPressure RP = MaxPressure;
+ resetMaxPressure();
+ return RP;
+ }
};
class GCNDownwardRPTracker : public GCNRPTracker {
@@ -209,6 +231,13 @@ LaneBitmask getLiveLaneMask(unsigned Reg,
const LiveIntervals &LIS,
const MachineRegisterInfo &MRI);
+LaneBitmask getLiveLaneMask(const LiveInterval &LI, SlotIndex SI,
+ const MachineRegisterInfo &MRI);
+
+GCNRPTracker::LiveRegSet getLiveRegs(SlotIndex SI,
+ const LiveIntervals &LIS,
+ const MachineRegisterInfo &MRI);
+
/// creates a map MachineInstr -> LiveRegSet
/// R - range of iterators on instructions
/// After - upon entry or exit of every instruction
diff --git a/llvm/test/CodeGen/AMDGPU/regpressure_printer.mir b/llvm/test/CodeGen/AMDGPU/regpressure_printer.mir
index d53050167e98bef..bb889e48aa16895 100644
--- a/llvm/test/CodeGen/AMDGPU/regpressure_printer.mir
+++ b/llvm/test/CodeGen/AMDGPU/regpressure_printer.mir
@@ -136,54 +136,28 @@ body: |
name: upward_problem_lis_subregs_mismatch
tracksRegLiveness: true
body: |
- ; RPU-LABEL: name: upward_problem_lis_subregs_mismatch
- ; RPU: bb.0:
- ; RPU-NEXT: Live-in:
- ; RPU-NEXT: SGPR VGPR
- ; RPU-NEXT: 0 0
- ; RPU-NEXT: 0 1 undef %0.sub0:vreg_64 = V_MOV_B32_e32 42, implicit $exec
- ; RPU-NEXT: 0 1
- ; RPU-NEXT: 0 2 undef %1.sub1:vreg_64 = V_MOV_B32_e32 33, implicit $exec
- ; RPU-NEXT: 0 2
- ; RPU-NEXT: Live-out: %0:0000000000000003 %1:000000000000000C
- ; RPU-NEXT: bb.1:
- ; RPU-NEXT: Live-in: %0:0000000000000003 %1:000000000000000C
- ; RPU-NEXT: SGPR VGPR
- ; RPU-NEXT: 0 2
- ; RPU-NEXT: Live-out: %0:0000000000000003 %1:000000000000000C
- ; RPU-NEXT: bb.2:
- ; RPU-NEXT: Live-in: %0:000000000000000F %1:000000000000000F
- ; RPU-NEXT: mis LIS: %0:0000000000000003 %1:000000000000000C
- ; RPU-NEXT: %0 masks doesn't match: LIS reported 0000000000000003, tracked 000000000000000F
- ; RPU-NEXT: %1 masks doesn't match: LIS reported 000000000000000C, tracked 000000000000000F
- ; RPU-NEXT: SGPR VGPR
- ; RPU-NEXT: 0 4
- ; RPU-NEXT: 0 4 S_NOP 0, implicit %0:vreg_64, implicit %1:vreg_64
- ; RPU-NEXT: 0 0
- ; RPU-NEXT: Live-out:
- ;
- ; RPD-LABEL: name: upward_problem_lis_subregs_mismatch
- ; RPD: bb.0:
- ; RPD-NEXT: Live-in:
- ; RPD-NEXT: SGPR VGPR
- ; RPD-NEXT: 0 0
- ; RPD-NEXT: 0 1 undef %0.sub0:vreg_64 = V_MOV_B32_e32 42, implicit $exec
- ; RPD-NEXT: 0 1
- ; RPD-NEXT: 0 2 undef %1.sub1:vreg_64 = V_MOV_B32_e32 33, implicit $exec
- ; RPD-NEXT: 0 2
- ; RPD-NEXT: Live-out: %0:0000000000000003 %1:000000000000000C
- ; RPD-NEXT: bb.1:
- ; RPD-NEXT: Live-in: %0:0000000000000003 %1:000000000000000C
- ; RPD-NEXT: SGPR VGPR
- ; RPD-NEXT: 0 2
- ; RPD-NEXT: Live-out: %0:0000000000000003 %1:000000000000000C
- ; RPD-NEXT: bb.2:
- ; RPD-NEXT: Live-in: %0:0000000000000003 %1:000000000000000C
- ; RPD-NEXT: SGPR VGPR
- ; RPD-NEXT: 0 2
- ; RPD-NEXT: 0 2 S_NOP 0, implicit %0:vreg_64, implicit %1:vreg_64
- ; RPD-NEXT: 0 0
- ; RPD-NEXT: Live-out:
+ ; RP-LABEL: name: upward_problem_lis_subregs_mismatch
+ ; RP: bb.0:
+ ; RP-NEXT: Live-in:
+ ; RP-NEXT: SGPR VGPR
+ ; RP-NEXT: 0 0
+ ; RP-NEXT: 0 1 undef %0.sub0:vreg_64 = V_MOV_B32_e32 42, implicit $exec
+ ; RP-NEXT: 0 1
+ ; RP-NEXT: 0 2 undef %1.sub1:vreg_64 = V_MOV_B32_e32 33, implicit $exec
+ ; RP-NEXT: 0 2
+ ; RP-NEXT: Live-out: %0:0000000000000003 %1:000000000000000C
+ ; RP-NEXT: bb.1:
+ ; RP-NEXT: Live-in: %0:0000000000000003 %1:000000000000000C
+ ; RP-NEXT: SGPR VGPR
+ ; RP-NEXT: 0 2
+ ; RP-NEXT: Live-out: %0:0000000000000003 %1:000000000000000C
+ ; RP-NEXT: bb.2:
+ ; RP-NEXT: Live-in: %0:0000000000000003 %1:000000000000000C
+ ; RP-NEXT: SGPR VGPR
+ ; RP-NEXT: 0 2
+ ; RP-NEXT: 0 2 S_NOP 0, implicit %0:vreg_64, implicit %1:vreg_64
+ ; RP-NEXT: 0 0
+ ; RP-NEXT: Live-out:
bb.0:
undef %0.sub0:vreg_64 = V_MOV_B32_e32 42, implicit $exec
undef %1.sub1:vreg_64 = V_MOV_B32_e32 33, implicit $exec
@@ -217,13 +191,13 @@ body: |
; RPU-NEXT: 0 7
; RPU-NEXT: 0 8 %4.sub0:vreg_64 = V_MOV_B32_e32 111, implicit $exec
; RPU-NEXT: 0 8
- ; RPU-NEXT: 0 10 %5:vreg_64 = COPY %2:vreg_64
+ ; RPU-NEXT: 0 9 %5:vreg_64 = COPY %2:vreg_64
; RPU-NEXT: 0 9
; RPU-NEXT: 0 9 undef %6.sub0:vreg_64 = V_ADD_F32_e32 %1.sub0:vreg_64, %5.sub0:vreg_64, implicit $mode, implicit $exec
; RPU-NEXT: 0 8
; RPU-NEXT: 0 8 dead %6.sub1:vreg_64 = V_ADD_F32_e32 %1.sub1:vreg_64, %5.sub0:vreg_64, implicit $mode, implicit $exec
; RPU-NEXT: 0 7
- ; RPU-NEXT: 0 8 %7:vgpr_32 = GLOBAL_LOAD_DWORD %5:vreg_64, 0, 0, implicit $exec
+ ; RPU-NEXT: 0 7 %7:vgpr_32 = GLOBAL_LOAD_DWORD %5:vreg_64, 0, 0, implicit $exec
; RPU-NEXT: 0 6
; RPU-NEXT: 0 7 %8:vreg_64 = IMPLICIT_DEF
; RPU-NEXT: 0 7
@@ -247,16 +221,16 @@ body: |
; RPU-NEXT: 0 20
; RPU-NEXT: 0 21 %18:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; RPU-NEXT: 0 21
- ; RPU-NEXT: 0 22 undef %19.sub0:vreg_64 = V_ADD_F32_e32 %7:vgpr_32, %2.sub0:vreg_64, implicit $mode, implicit $exec
+ ; RPU-NEXT: 0 21 undef %19.sub0:vreg_64 = V_ADD_F32_e32 %7:vgpr_32, %2.sub0:vreg_64, implicit $mode, implicit $exec
; RPU-NEXT: 0 20
- ; RPU-NEXT: 0 21 %19.sub1:vreg_64 = V_ADD_F32_e32 %3:vgpr_32, %3:vgpr_32, implicit $mode, implicit $exec
+ ; RPU-NEXT: 0 20 %19.sub1:vreg_64 = V_ADD_F32_e32 %3:vgpr_32, %3:vgpr_32, implicit $mode, implicit $exec
; RPU-NEXT: DBG_VALUE
; RPU-NEXT: 0 20
; RPU-NEXT: 0 20 GLOBAL_STORE_DWORDX2 %19:vreg_64, %4:vreg_64, 32, 0, implicit $exec
; RPU-NEXT: 0 16
- ; RPU-NEXT: 0 17 %11.sub0:vreg_64 = GLOBAL_LOAD_DWORD %9:vreg_64, 0, 0, implicit $exec
+ ; RPU-NEXT: 0 16 %11.sub0:vreg_64 = GLOBAL_LOAD_DWORD %9:vreg_64, 0, 0, implicit $exec
; RPU-NEXT: 0 15
- ; RPU-NEXT: 0 16 %8.sub0:vreg_64 = GLOBAL_LOAD_DWORD %10:vreg_64, 0, 0, implicit $exec
+ ; RPU-NEXT: 0 15 %8.sub0:vreg_64 = GLOBAL_LOAD_DWORD %10:vreg_64, 0, 0, implicit $exec
; RPU-NEXT: 0 14
; RPU-NEXT: 0 14 dead %20:vgpr_32 = GLOBAL_LOAD_DWORD %11:vreg_64, 0, 0, implicit $exec
; RPU-NEXT: DBG_VALUE
@@ -266,7 +240,7 @@ body: |
; RPU-NEXT: 0 10
; RPU-NEXT: 0 10 dead %22:vgpr_32 = GLOBAL_LOAD_DWORD %15:vreg_64, 0, 0, implicit $exec
; RPU-NEXT: 0 10
- ; RPU-NEXT: 0 11 %23:vreg_64 = V_LSHLREV_B64_e64 2, %8:vreg_64, implicit $exec
+ ; RPU-NEXT: 0 10 %23:vreg_64 = V_LSHLREV_B64_e64 2, %8:vreg_64, implicit $exec
; RPU-NEXT: 0 9
; RPU-NEXT: 0 9 S_NOP 0, implicit %13:vgpr_32, implicit %23.sub0:vreg_64, implicit %12:vgpr_32, implicit %17:vgpr_32
; RPU-NEXT: 0 5
@@ -459,4 +433,55 @@ body: |
S_NOP 0, implicit %16
S_ENDPGM 0
...
-
+---
+name: test_early_clobber_trivial
+tracksRegLiveness: true
+body: |
+ bb.0:
+ ; RP-LABEL: name: test_early_clobber_trivial
+ ; RP: Live-in:
+ ; RP-NEXT: SGPR VGPR
+ ; RP-NEXT: 0 0
+ ; RP-NEXT: 0 1 %0:vgpr_32 = V_MOV_B32_e32 42, implicit $exec
+ ; RP-NEXT: 0 1
+ ; RP-NEXT: 0 2 early-clobber %1:vgpr_32 = V_MOV_B32_e32 %0:vgpr_32, implicit $exec
+ ; RP-NEXT: 0 1
+ ; RP-NEXT: 0 1 S_NOP 0, implicit %1:vgpr_32
+ ; RP-NEXT: 0 0
+ ; RP-NEXT: Live-out:
+ %0:vgpr_32 = V_MOV_B32_e32 42, implicit $exec
+ early-clobber %1:vgpr_32 = V_MOV_B32_e32 %0, implicit $exec
+ S_NOP 0, implicit %1
+...
+---
+name: test_not_early_clobber_trivial
+tracksRegLiveness: true
+body: |
+ bb.0:
+ ; RPU-LABEL: name: test_not_early_clobber_trivial
+ ; RPU: Live-in:
+ ; RPU-NEXT: SGPR VGPR
+ ; RPU-NEXT: 0 0
+ ; RPU-NEXT: 0 1 %0:vgpr_32 = V_MOV_B32_e32 42, implicit $exec
+ ; RPU-NEXT: 0 1
+ ; RPU-NEXT: 0 1 %1:vgpr_32 = V_MOV_B32_e32 %0:vgpr_32, implicit $exec
+ ; RPU-NEXT: 0 1
+ ; RPU-NEXT: 0 1 S_NOP 0, implicit %1:vgpr_32
+ ; RPU-NEXT: 0 0
+ ; RPU-NEXT: Live-out:
+ ;
+ ; RPD-LABEL: name: test_not_early_clobber_trivial
+ ; RPD: Live-in:
+ ; RPD-NEXT: SGPR VGPR
+ ; RPD-NEXT: 0 0
+ ; RPD-NEXT: 0 1 %0:vgpr_32 = V_MOV_B32_e32 42, implicit $exec
+ ; RPD-NEXT: 0 1
+ ; RPD-NEXT: 0 2 %1:vgpr_32 = V_MOV_B32_e32 %0:vgpr_32, implicit $exec
+ ; RPD-NEXT: 0 1
+ ; RPD-NEXT: 0 1 S_NOP 0, implicit %1:vgpr_32
+ ; RPD-NEXT: 0 0
+ ; RPD-NEXT: Live-out:
+ %0:vgpr_32 = V_MOV_B32_e32 42, implicit $exec
+ %1:vgpr_32 = V_MOV_B32_e32 %0, implicit $exec
+ S_NOP 0, implicit %1
+...
>From bfc74ff6c4a0040303da3cbf56229e17c0855009 Mon Sep 17 00:00:00 2001
From: Valery Pykhtin <valery.pykhtin at gmail.com>
Date: Tue, 7 Nov 2023 21:39:33 +0100
Subject: [PATCH 2/3] comments
---
llvm/lib/Target/AMDGPU/GCNRegPressure.cpp | 20 ++++++++++-------
llvm/lib/Target/AMDGPU/GCNRegPressure.h | 26 +++++++++--------------
2 files changed, 22 insertions(+), 24 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
index f191f3f08c56c6d..6a0bc163646e1e4 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
@@ -178,12 +178,11 @@ collectVirtualRegUses(SmallVectorImpl<RegisterMaskPair> &RegMaskPairs,
continue;
Register Reg = MO.getReg();
- auto I = llvm::find_if(RegMaskPairs, [Reg](const RegisterMaskPair &RM) {
- return RM.RegUnit == Reg;
- });
- if (I != RegMaskPairs.end())
+ if (llvm::any_of(RegMaskPairs, [Reg](const RegisterMaskPair &RM) {
+ return RM.RegUnit == Reg;
+ }))
continue;
-
+
LaneBitmask UseMask;
auto &LI = LIS.getInterval(Reg);
if (!LI.hasSubRanges())
@@ -217,8 +216,7 @@ LaneBitmask llvm::getLiveLaneMask(const LiveInterval &LI, SlotIndex SI,
for (const auto &S : LI.subranges())
if (S.liveAt(SI)) {
LiveMask |= S.LaneMask;
- assert(LiveMask < MRI.getMaxLaneMaskForVReg(LI.reg()) ||
- LiveMask == MRI.getMaxLaneMaskForVReg(LI.reg()));
+ assert(LiveMask == (LiveMask & MRI.getMaxLaneMaskForVReg(LI.reg())));
}
} else if (LI.liveAt(SI)) {
LiveMask = MRI.getMaxLaneMaskForVReg(LI.reg());
@@ -257,11 +255,14 @@ void GCNRPTracker::reset(const MachineInstr &MI,
MaxPressure = CurPressure = getRegPressure(*MRI, LiveRegs);
}
+////////////////////////////////////////////////////////////////////////////////
+// GCNUpwardRPTracker
+
void GCNUpwardRPTracker::reset(const MachineRegisterInfo &MRI_,
const LiveRegSet &LiveRegs_) {
MRI = &MRI_;
LiveRegs = LiveRegs_;
- LastTrackedMI = nullptr; // TODO: LastTrackedMI isnt' used, remove?
+ LastTrackedMI = nullptr;
MaxPressure = CurPressure = getRegPressure(MRI_, LiveRegs_);
}
@@ -319,6 +320,9 @@ void GCNUpwardRPTracker::recede(const MachineInstr &MI) {
assert(CurPressure == getRegPressure(*MRI, LiveRegs));
}
+////////////////////////////////////////////////////////////////////////////////
+// GCNDownwardRPTracker
+
bool GCNDownwardRPTracker::reset(const MachineInstr &MI,
const LiveRegSet *LiveRegsCopy) {
MRI = &MI.getParent()->getParent()->getRegInfo();
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.h b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
index 732ed33337d24dc..754957daa4b4a91 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.h
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
@@ -130,13 +130,6 @@ class GCNRPTracker {
const GCNRegPressure &getPressure() const { return CurPressure; }
- // returns MaxPressure, resetting it
- decltype(MaxPressure) moveMaxPressure() {
- auto Res = MaxPressure;
- MaxPressure.clear();
- return Res;
- }
-
decltype(LiveRegs) moveLiveRegs() {
return std::move(LiveRegs);
}
@@ -175,16 +168,11 @@ class GCNUpwardRPTracker : public GCNRPTracker {
// to reported by LIS.
bool isValid() const;
- // deprecated.
- decltype(MaxPressure) moveMaxPressure() = delete;
-
const GCNRegPressure &getMaxPressure() const { return MaxPressure; }
- void resetMaxPressure() {
- MaxPressure = CurPressure;
- }
+ void resetMaxPressure() { MaxPressure = CurPressure; }
- GCNRegPressure getMaxPressureAndReset() {
+ GCNRegPressure getMaxPressureAndReset() {
GCNRegPressure RP = MaxPressure;
resetMaxPressure();
return RP;
@@ -202,6 +190,13 @@ class GCNDownwardRPTracker : public GCNRPTracker {
MachineBasicBlock::const_iterator getNext() const { return NextMI; }
+ // Return MaxPressure and clear it.
+ decltype(MaxPressure) moveMaxPressure() {
+ auto Res = MaxPressure;
+ MaxPressure.clear();
+ return Res;
+ }
+
// Reset tracker to the point before the MI
// filling live regs upon this point using LIS.
// Returns false if block is empty except debug values.
@@ -234,8 +229,7 @@ LaneBitmask getLiveLaneMask(unsigned Reg,
LaneBitmask getLiveLaneMask(const LiveInterval &LI, SlotIndex SI,
const MachineRegisterInfo &MRI);
-GCNRPTracker::LiveRegSet getLiveRegs(SlotIndex SI,
- const LiveIntervals &LIS,
+GCNRPTracker::LiveRegSet getLiveRegs(SlotIndex SI, const LiveIntervals &LIS,
const MachineRegisterInfo &MRI);
/// creates a map MachineInstr -> LiveRegSet
>From c0e353f1b64a078fb61f387358b03094a13cd744 Mon Sep 17 00:00:00 2001
From: Valery Pykhtin <valery.pykhtin at gmail.com>
Date: Fri, 10 Nov 2023 11:42:34 +0100
Subject: [PATCH 3/3] comments
---
llvm/lib/Target/AMDGPU/GCNRegPressure.h | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.h b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
index 754957daa4b4a91..e21bf10d795ba52 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.h
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
@@ -128,7 +128,7 @@ class GCNRPTracker {
void clearMaxPressure() { MaxPressure.clear(); }
- const GCNRegPressure &getPressure() const { return CurPressure; }
+ GCNRegPressure getPressure() const { return CurPressure; }
decltype(LiveRegs) moveLiveRegs() {
return std::move(LiveRegs);
@@ -191,7 +191,7 @@ class GCNDownwardRPTracker : public GCNRPTracker {
MachineBasicBlock::const_iterator getNext() const { return NextMI; }
// Return MaxPressure and clear it.
- decltype(MaxPressure) moveMaxPressure() {
+ GCNRegPressure moveMaxPressure() {
auto Res = MaxPressure;
MaxPressure.clear();
return Res;
More information about the llvm-commits
mailing list