[llvm] [AMDGPU] Fix GCNUpwardRPTracker. (WIP) (PR #71186)

Valery Pykhtin via llvm-commits llvm-commits at lists.llvm.org
Tue Nov 7 12:49:23 PST 2023


https://github.com/vpykhtin updated https://github.com/llvm/llvm-project/pull/71186

>From e2254cb744adb65685edeb2373f900368c11c11a Mon Sep 17 00:00:00 2001
From: Valery Pykhtin <valery.pykhtin at gmail.com>
Date: Thu, 2 Nov 2023 20:39:11 +0100
Subject: [PATCH 1/2] [AMDGPU] Fix GCNUpwardRPTracker.

---
 .../Target/AMDGPU/GCNIterativeScheduler.cpp   |   4 +-
 llvm/lib/Target/AMDGPU/GCNRegPressure.cpp     | 135 ++++++++---------
 llvm/lib/Target/AMDGPU/GCNRegPressure.h       |  47 ++++--
 .../CodeGen/AMDGPU/regpressure_printer.mir    | 137 +++++++++++-------
 4 files changed, 190 insertions(+), 133 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp b/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp
index d89c9b1febded0f..cdc9de7f65e3e50 100644
--- a/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp
@@ -251,7 +251,7 @@ GCNIterativeScheduler::getRegionPressure(MachineBasicBlock::iterator Begin,
   assert(UPTracker.isValid() ||
          (dbgs() << "Tracked region ",
           printRegion(dbgs(), Begin, End, LIS), false));
-  return UPTracker.moveMaxPressure();
+  return UPTracker.getMaxPressureAndReset();
 }
 
 // returns max pressure for a tentative schedule
@@ -272,7 +272,7 @@ GCNIterativeScheduler::getSchedulePressure(const Region &R,
   for (auto I = Schedule.end(), B = Schedule.begin(); I != B;) {
     RPTracker.recede(*getMachineInstr(*--I));
   }
-  return RPTracker.moveMaxPressure();
+  return RPTracker.getMaxPressureAndReset();
 }
 
 void GCNIterativeScheduler::enterRegion(MachineBasicBlock *BB, // overridden
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
index a04c470b7b9762f..f191f3f08c56c6d 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
@@ -166,66 +166,62 @@ static LaneBitmask getDefRegMask(const MachineOperand &MO,
     MRI.getTargetRegisterInfo()->getSubRegIndexLaneMask(MO.getSubReg());
 }
 
-static LaneBitmask getUsedRegMask(const MachineOperand &MO,
-                                  const MachineRegisterInfo &MRI,
-                                  const LiveIntervals &LIS) {
-  assert(MO.isUse() && MO.isReg() && MO.getReg().isVirtual());
-
-  if (auto SubReg = MO.getSubReg())
-    return MRI.getTargetRegisterInfo()->getSubRegIndexLaneMask(SubReg);
-
-  auto MaxMask = MRI.getMaxLaneMaskForVReg(MO.getReg());
-  if (SIRegisterInfo::getNumCoveredRegs(MaxMask) > 1) // cannot have subregs
-    return MaxMask;
-
-  // For a tentative schedule LIS isn't updated yet but livemask should remain
-  // the same on any schedule. Subreg defs can be reordered but they all must
-  // dominate uses anyway.
-  auto SI = LIS.getInstructionIndex(*MO.getParent()).getBaseIndex();
-  return getLiveLaneMask(MO.getReg(), SI, LIS, MRI);
-}
-
-static SmallVector<RegisterMaskPair, 8>
-collectVirtualRegUses(const MachineInstr &MI, const LiveIntervals &LIS,
+static void
+collectVirtualRegUses(SmallVectorImpl<RegisterMaskPair> &RegMaskPairs,
+                      const MachineInstr &MI, const LiveIntervals &LIS,
                       const MachineRegisterInfo &MRI) {
-  SmallVector<RegisterMaskPair, 8> Res;
+  SlotIndex InstrSI;
   for (const auto &MO : MI.operands()) {
     if (!MO.isReg() || !MO.getReg().isVirtual())
       continue;
     if (!MO.isUse() || !MO.readsReg())
       continue;
 
-    auto const UsedMask = getUsedRegMask(MO, MRI, LIS);
+    Register Reg = MO.getReg();
+    auto I = llvm::find_if(RegMaskPairs, [Reg](const RegisterMaskPair &RM) {
+      return RM.RegUnit == Reg;
+    });
+    if (I != RegMaskPairs.end())
+      continue;
+      
+    LaneBitmask UseMask;
+    auto &LI = LIS.getInterval(Reg);
+    if (!LI.hasSubRanges())
+      UseMask = MRI.getMaxLaneMaskForVReg(Reg);
+    else {
+      // For a tentative schedule LIS isn't updated yet but livemask should
+      // remain the same on any schedule. Subreg defs can be reordered but they
+      // all must dominate uses anyway.
+      if (!InstrSI)
+        InstrSI = LIS.getInstructionIndex(*MO.getParent()).getBaseIndex();
+      UseMask = getLiveLaneMask(LI, InstrSI, MRI);
+    }
 
-    auto Reg = MO.getReg();
-    auto I = llvm::find_if(
-        Res, [Reg](const RegisterMaskPair &RM) { return RM.RegUnit == Reg; });
-    if (I != Res.end())
-      I->LaneMask |= UsedMask;
-    else
-      Res.push_back(RegisterMaskPair(Reg, UsedMask));
+    RegMaskPairs.emplace_back(Reg, UseMask);
   }
-  return Res;
 }
 
 ///////////////////////////////////////////////////////////////////////////////
 // GCNRPTracker
 
-LaneBitmask llvm::getLiveLaneMask(unsigned Reg,
-                                  SlotIndex SI,
+LaneBitmask llvm::getLiveLaneMask(unsigned Reg, SlotIndex SI,
                                   const LiveIntervals &LIS,
                                   const MachineRegisterInfo &MRI) {
+  return getLiveLaneMask(LIS.getInterval(Reg), SI, MRI);
+}
+
+LaneBitmask llvm::getLiveLaneMask(const LiveInterval &LI, SlotIndex SI,
+                                  const MachineRegisterInfo &MRI) {
   LaneBitmask LiveMask;
-  const auto &LI = LIS.getInterval(Reg);
   if (LI.hasSubRanges()) {
     for (const auto &S : LI.subranges())
       if (S.liveAt(SI)) {
         LiveMask |= S.LaneMask;
-        assert(LiveMask < MRI.getMaxLaneMaskForVReg(Reg) ||
-               LiveMask == MRI.getMaxLaneMaskForVReg(Reg));
+        assert(LiveMask < MRI.getMaxLaneMaskForVReg(LI.reg()) ||
+               LiveMask == MRI.getMaxLaneMaskForVReg(LI.reg()));
       }
   } else if (LI.liveAt(SI)) {
-    LiveMask = MRI.getMaxLaneMaskForVReg(Reg);
+    LiveMask = MRI.getMaxLaneMaskForVReg(LI.reg());
   }
   return LiveMask;
 }
@@ -261,15 +257,11 @@ void GCNRPTracker::reset(const MachineInstr &MI,
   MaxPressure = CurPressure = getRegPressure(*MRI, LiveRegs);
 }
 
-void GCNUpwardRPTracker::reset(const MachineInstr &MI,
-                               const LiveRegSet *LiveRegsCopy) {
-  GCNRPTracker::reset(MI, LiveRegsCopy, true);
-}
-
 void GCNUpwardRPTracker::reset(const MachineRegisterInfo &MRI_,
                                const LiveRegSet &LiveRegs_) {
   MRI = &MRI_;
   LiveRegs = LiveRegs_;
+  LastTrackedMI = nullptr; // TODO: LastTrackedMI isnt' used, remove?
   MaxPressure = CurPressure = getRegPressure(MRI_, LiveRegs_);
 }
 
@@ -281,38 +273,49 @@ void GCNUpwardRPTracker::recede(const MachineInstr &MI) {
   if (MI.isDebugInstr())
     return;
 
-  auto const RegUses = collectVirtualRegUses(MI, LIS, *MRI);
-
-  // calc pressure at the MI (defs + uses)
-  auto AtMIPressure = CurPressure;
-  for (const auto &U : RegUses) {
-    auto LiveMask = LiveRegs[U.RegUnit];
-    AtMIPressure.inc(U.RegUnit, LiveMask, LiveMask | U.LaneMask, *MRI);
-  }
-  // update max pressure
-  MaxPressure = max(AtMIPressure, MaxPressure);
-
-  for (const auto &MO : MI.all_defs()) {
-    if (!MO.getReg().isVirtual() || MO.isDead())
-      continue;
-
-    auto Reg = MO.getReg();
+  auto DecrementDef = [this](const MachineOperand &MO) {
+    Register Reg = MO.getReg();
     auto I = LiveRegs.find(Reg);
     if (I == LiveRegs.end())
-      continue;
-    auto &LiveMask = I->second;
-    auto PrevMask = LiveMask;
+      return;
+
+    LaneBitmask &LiveMask = I->second;
+    LaneBitmask PrevMask = LiveMask;
     LiveMask &= ~getDefRegMask(MO, *MRI);
     CurPressure.inc(Reg, PrevMask, LiveMask, *MRI);
     if (LiveMask.none())
       LiveRegs.erase(I);
+  };
+
+  // Decrement non-early-clobber defs.
+  SmallVector<const MachineOperand *, 2> EarlyClobberDefs;
+  for (const MachineOperand &MO : MI.all_defs()) {
+    if (!MO.getReg().isVirtual())
+      continue;
+    if (!MO.isEarlyClobber())
+      DecrementDef(MO);
+    else
+      EarlyClobberDefs.push_back(&MO);
   }
-  for (const auto &U : RegUses) {
-    auto &LiveMask = LiveRegs[U.RegUnit];
-    auto PrevMask = LiveMask;
+
+  // Increment uses.
+  SmallVector<RegisterMaskPair, 8> RegUses;
+  collectVirtualRegUses(RegUses, MI, LIS, *MRI);
+  for (const RegisterMaskPair &U : RegUses) {
+    LaneBitmask &LiveMask = LiveRegs[U.RegUnit];
+    LaneBitmask PrevMask = LiveMask;
     LiveMask |= U.LaneMask;
     CurPressure.inc(U.RegUnit, PrevMask, LiveMask, *MRI);
   }
+
+  // Point of maximum pressure: non-early-clobber defs are decremented and uses
+  // are incremented.
+  MaxPressure = max(CurPressure, MaxPressure);
+
+  // Now decrement early clobber defs.
+  for (const MachineOperand *MO : EarlyClobberDefs)
+    DecrementDef(*MO);
+
   assert(CurPressure == getRegPressure(*MRI, LiveRegs));
 }
 
@@ -562,15 +565,15 @@ bool GCNRegPressurePrinter::runOnMachineFunction(MachineFunction &MF) {
     } else {
       GCNUpwardRPTracker RPT(LIS);
       RPT.reset(MRI, MBBEndSlot);
-      RPT.moveMaxPressure(); // Clear max pressure.
 
       LiveOut = RPT.getLiveRegs();
       RPAtMBBEnd = RPT.getPressure();
 
       for (auto &MI : reverse(MBB)) {
+        RPT.resetMaxPressure();
         RPT.recede(MI);
         if (!MI.isDebugInstr())
-          RP.emplace_back(RPT.getPressure(), RPT.moveMaxPressure());
+          RP.emplace_back(RPT.getPressure(), RPT.getMaxPressure());
       }
 
       LiveIn = RPT.getLiveRegs();
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.h b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
index c750fe74749e2b3..732ed33337d24dc 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.h
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
@@ -128,7 +128,7 @@ class GCNRPTracker {
 
   void clearMaxPressure() { MaxPressure.clear(); }
 
-  GCNRegPressure getPressure() const { return CurPressure; }
+  const GCNRegPressure &getPressure() const { return CurPressure; }
 
   // returns MaxPressure, resetting it
   decltype(MaxPressure) moveMaxPressure() {
@@ -149,24 +149,46 @@ class GCNUpwardRPTracker : public GCNRPTracker {
 public:
   GCNUpwardRPTracker(const LiveIntervals &LIS_) : GCNRPTracker(LIS_) {}
 
-  // reset tracker to the point just below MI
-  // filling live regs upon this point using LIS
-  void reset(const MachineInstr &MI, const LiveRegSet *LiveRegs = nullptr);
-
   // reset tracker and set live register set to the specified value.
   void reset(const MachineRegisterInfo &MRI_, const LiveRegSet &LiveRegs_);
 
   // reset tracker at the specified slot index.
-  void reset(const MachineRegisterInfo &MRI_, SlotIndex SI) {
-    reset(MRI_, llvm::getLiveRegs(SI, LIS, MRI_));
+  void reset(const MachineRegisterInfo &MRI, SlotIndex SI) {
+    reset(MRI, llvm::getLiveRegs(SI, LIS, MRI));
+  }
+
+  // reset tracker to the end of the MBB.
+  void reset(const MachineBasicBlock &MBB) {
+    reset(MBB.getParent()->getRegInfo(),
+          LIS.getSlotIndexes()->getMBBEndIdx(&MBB));
+  }
+
+  // reset tracker to the point just after MI (in program order).
+  void reset(const MachineInstr &MI) {
+    reset(MI.getMF()->getRegInfo(), LIS.getInstructionIndex(MI).getDeadSlot());
   }
 
-  // move to the state just above the MI
+  // move to the state just before the MI (in program order).
   void recede(const MachineInstr &MI);
 
   // checks whether the tracker's state after receding MI corresponds
-  // to reported by LIS
+  // to reported by LIS.
   bool isValid() const;
+
+  // deprecated.
+  decltype(MaxPressure) moveMaxPressure() = delete;
+
+  const GCNRegPressure &getMaxPressure() const { return MaxPressure; }
+
+  void resetMaxPressure() {
+    MaxPressure = CurPressure;
+  }
+
+  GCNRegPressure getMaxPressureAndReset() { 
+    GCNRegPressure RP = MaxPressure;
+    resetMaxPressure();
+    return RP;
+  }
 };
 
 class GCNDownwardRPTracker : public GCNRPTracker {
@@ -209,6 +231,13 @@ LaneBitmask getLiveLaneMask(unsigned Reg,
                             const LiveIntervals &LIS,
                             const MachineRegisterInfo &MRI);
 
+LaneBitmask getLiveLaneMask(const LiveInterval &LI, SlotIndex SI,
+                            const MachineRegisterInfo &MRI);
+
+GCNRPTracker::LiveRegSet getLiveRegs(SlotIndex SI,
+                                     const LiveIntervals &LIS,
+                                     const MachineRegisterInfo &MRI);
+
 /// creates a map MachineInstr -> LiveRegSet
 /// R - range of iterators on instructions
 /// After - upon entry or exit of every instruction
diff --git a/llvm/test/CodeGen/AMDGPU/regpressure_printer.mir b/llvm/test/CodeGen/AMDGPU/regpressure_printer.mir
index d53050167e98bef..bb889e48aa16895 100644
--- a/llvm/test/CodeGen/AMDGPU/regpressure_printer.mir
+++ b/llvm/test/CodeGen/AMDGPU/regpressure_printer.mir
@@ -136,54 +136,28 @@ body:             |
 name:  upward_problem_lis_subregs_mismatch
 tracksRegLiveness: true
 body:             |
-  ; RPU-LABEL: name: upward_problem_lis_subregs_mismatch
-  ; RPU: bb.0:
-  ; RPU-NEXT:   Live-in:
-  ; RPU-NEXT:   SGPR  VGPR
-  ; RPU-NEXT:   0     0
-  ; RPU-NEXT:   0     1      undef %0.sub0:vreg_64 = V_MOV_B32_e32 42, implicit $exec
-  ; RPU-NEXT:   0     1
-  ; RPU-NEXT:   0     2      undef %1.sub1:vreg_64 = V_MOV_B32_e32 33, implicit $exec
-  ; RPU-NEXT:   0     2
-  ; RPU-NEXT:   Live-out: %0:0000000000000003 %1:000000000000000C
-  ; RPU-NEXT: bb.1:
-  ; RPU-NEXT:   Live-in:  %0:0000000000000003 %1:000000000000000C
-  ; RPU-NEXT:   SGPR  VGPR
-  ; RPU-NEXT:   0     2
-  ; RPU-NEXT:   Live-out: %0:0000000000000003 %1:000000000000000C
-  ; RPU-NEXT: bb.2:
-  ; RPU-NEXT:   Live-in:  %0:000000000000000F %1:000000000000000F
-  ; RPU-NEXT:   mis LIS:  %0:0000000000000003 %1:000000000000000C
-  ; RPU-NEXT:     %0 masks doesn't match: LIS reported 0000000000000003, tracked 000000000000000F
-  ; RPU-NEXT:     %1 masks doesn't match: LIS reported 000000000000000C, tracked 000000000000000F
-  ; RPU-NEXT:   SGPR  VGPR
-  ; RPU-NEXT:   0     4
-  ; RPU-NEXT:   0     4      S_NOP 0, implicit %0:vreg_64, implicit %1:vreg_64
-  ; RPU-NEXT:   0     0
-  ; RPU-NEXT:   Live-out:
-  ;
-  ; RPD-LABEL: name: upward_problem_lis_subregs_mismatch
-  ; RPD: bb.0:
-  ; RPD-NEXT:   Live-in:
-  ; RPD-NEXT:   SGPR  VGPR
-  ; RPD-NEXT:   0     0
-  ; RPD-NEXT:   0     1      undef %0.sub0:vreg_64 = V_MOV_B32_e32 42, implicit $exec
-  ; RPD-NEXT:   0     1
-  ; RPD-NEXT:   0     2      undef %1.sub1:vreg_64 = V_MOV_B32_e32 33, implicit $exec
-  ; RPD-NEXT:   0     2
-  ; RPD-NEXT:   Live-out: %0:0000000000000003 %1:000000000000000C
-  ; RPD-NEXT: bb.1:
-  ; RPD-NEXT:   Live-in:  %0:0000000000000003 %1:000000000000000C
-  ; RPD-NEXT:   SGPR  VGPR
-  ; RPD-NEXT:   0     2
-  ; RPD-NEXT:   Live-out: %0:0000000000000003 %1:000000000000000C
-  ; RPD-NEXT: bb.2:
-  ; RPD-NEXT:   Live-in:  %0:0000000000000003 %1:000000000000000C
-  ; RPD-NEXT:   SGPR  VGPR
-  ; RPD-NEXT:   0     2
-  ; RPD-NEXT:   0     2      S_NOP 0, implicit %0:vreg_64, implicit %1:vreg_64
-  ; RPD-NEXT:   0     0
-  ; RPD-NEXT:   Live-out:
+  ; RP-LABEL: name: upward_problem_lis_subregs_mismatch
+  ; RP: bb.0:
+  ; RP-NEXT:   Live-in:
+  ; RP-NEXT:   SGPR  VGPR
+  ; RP-NEXT:   0     0
+  ; RP-NEXT:   0     1      undef %0.sub0:vreg_64 = V_MOV_B32_e32 42, implicit $exec
+  ; RP-NEXT:   0     1
+  ; RP-NEXT:   0     2      undef %1.sub1:vreg_64 = V_MOV_B32_e32 33, implicit $exec
+  ; RP-NEXT:   0     2
+  ; RP-NEXT:   Live-out: %0:0000000000000003 %1:000000000000000C
+  ; RP-NEXT: bb.1:
+  ; RP-NEXT:   Live-in:  %0:0000000000000003 %1:000000000000000C
+  ; RP-NEXT:   SGPR  VGPR
+  ; RP-NEXT:   0     2
+  ; RP-NEXT:   Live-out: %0:0000000000000003 %1:000000000000000C
+  ; RP-NEXT: bb.2:
+  ; RP-NEXT:   Live-in:  %0:0000000000000003 %1:000000000000000C
+  ; RP-NEXT:   SGPR  VGPR
+  ; RP-NEXT:   0     2
+  ; RP-NEXT:   0     2      S_NOP 0, implicit %0:vreg_64, implicit %1:vreg_64
+  ; RP-NEXT:   0     0
+  ; RP-NEXT:   Live-out:
   bb.0:
     undef %0.sub0:vreg_64 = V_MOV_B32_e32 42, implicit $exec
     undef %1.sub1:vreg_64 = V_MOV_B32_e32 33, implicit $exec
@@ -217,13 +191,13 @@ body:             |
   ; RPU-NEXT:   0     7
   ; RPU-NEXT:   0     8      %4.sub0:vreg_64 = V_MOV_B32_e32 111, implicit $exec
   ; RPU-NEXT:   0     8
-  ; RPU-NEXT:   0     10     %5:vreg_64 = COPY %2:vreg_64
+  ; RPU-NEXT:   0     9      %5:vreg_64 = COPY %2:vreg_64
   ; RPU-NEXT:   0     9
   ; RPU-NEXT:   0     9      undef %6.sub0:vreg_64 = V_ADD_F32_e32 %1.sub0:vreg_64, %5.sub0:vreg_64, implicit $mode, implicit $exec
   ; RPU-NEXT:   0     8
   ; RPU-NEXT:   0     8      dead %6.sub1:vreg_64 = V_ADD_F32_e32 %1.sub1:vreg_64, %5.sub0:vreg_64, implicit $mode, implicit $exec
   ; RPU-NEXT:   0     7
-  ; RPU-NEXT:   0     8      %7:vgpr_32 = GLOBAL_LOAD_DWORD %5:vreg_64, 0, 0, implicit $exec
+  ; RPU-NEXT:   0     7      %7:vgpr_32 = GLOBAL_LOAD_DWORD %5:vreg_64, 0, 0, implicit $exec
   ; RPU-NEXT:   0     6
   ; RPU-NEXT:   0     7      %8:vreg_64 = IMPLICIT_DEF
   ; RPU-NEXT:   0     7
@@ -247,16 +221,16 @@ body:             |
   ; RPU-NEXT:   0     20
   ; RPU-NEXT:   0     21     %18:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
   ; RPU-NEXT:   0     21
-  ; RPU-NEXT:   0     22     undef %19.sub0:vreg_64 = V_ADD_F32_e32 %7:vgpr_32, %2.sub0:vreg_64, implicit $mode, implicit $exec
+  ; RPU-NEXT:   0     21     undef %19.sub0:vreg_64 = V_ADD_F32_e32 %7:vgpr_32, %2.sub0:vreg_64, implicit $mode, implicit $exec
   ; RPU-NEXT:   0     20
-  ; RPU-NEXT:   0     21     %19.sub1:vreg_64 = V_ADD_F32_e32 %3:vgpr_32, %3:vgpr_32, implicit $mode, implicit $exec
+  ; RPU-NEXT:   0     20     %19.sub1:vreg_64 = V_ADD_F32_e32 %3:vgpr_32, %3:vgpr_32, implicit $mode, implicit $exec
   ; RPU-NEXT:                DBG_VALUE
   ; RPU-NEXT:   0     20
   ; RPU-NEXT:   0     20     GLOBAL_STORE_DWORDX2 %19:vreg_64, %4:vreg_64, 32, 0, implicit $exec
   ; RPU-NEXT:   0     16
-  ; RPU-NEXT:   0     17     %11.sub0:vreg_64 = GLOBAL_LOAD_DWORD %9:vreg_64, 0, 0, implicit $exec
+  ; RPU-NEXT:   0     16     %11.sub0:vreg_64 = GLOBAL_LOAD_DWORD %9:vreg_64, 0, 0, implicit $exec
   ; RPU-NEXT:   0     15
-  ; RPU-NEXT:   0     16     %8.sub0:vreg_64 = GLOBAL_LOAD_DWORD %10:vreg_64, 0, 0, implicit $exec
+  ; RPU-NEXT:   0     15     %8.sub0:vreg_64 = GLOBAL_LOAD_DWORD %10:vreg_64, 0, 0, implicit $exec
   ; RPU-NEXT:   0     14
   ; RPU-NEXT:   0     14     dead %20:vgpr_32 = GLOBAL_LOAD_DWORD %11:vreg_64, 0, 0, implicit $exec
   ; RPU-NEXT:                DBG_VALUE
@@ -266,7 +240,7 @@ body:             |
   ; RPU-NEXT:   0     10
   ; RPU-NEXT:   0     10     dead %22:vgpr_32 = GLOBAL_LOAD_DWORD %15:vreg_64, 0, 0, implicit $exec
   ; RPU-NEXT:   0     10
-  ; RPU-NEXT:   0     11     %23:vreg_64 = V_LSHLREV_B64_e64 2, %8:vreg_64, implicit $exec
+  ; RPU-NEXT:   0     10     %23:vreg_64 = V_LSHLREV_B64_e64 2, %8:vreg_64, implicit $exec
   ; RPU-NEXT:   0     9
   ; RPU-NEXT:   0     9      S_NOP 0, implicit %13:vgpr_32, implicit %23.sub0:vreg_64, implicit %12:vgpr_32, implicit %17:vgpr_32
   ; RPU-NEXT:   0     5
@@ -459,4 +433,55 @@ body:             |
     S_NOP 0, implicit %16
     S_ENDPGM 0
 ...
-
+---
+name:  test_early_clobber_trivial
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    ; RP-LABEL: name: test_early_clobber_trivial
+    ; RP: Live-in:
+    ; RP-NEXT: SGPR  VGPR
+    ; RP-NEXT: 0     0
+    ; RP-NEXT: 0     1      %0:vgpr_32 = V_MOV_B32_e32 42, implicit $exec
+    ; RP-NEXT: 0     1
+    ; RP-NEXT: 0     2      early-clobber %1:vgpr_32 = V_MOV_B32_e32 %0:vgpr_32, implicit $exec
+    ; RP-NEXT: 0     1
+    ; RP-NEXT: 0     1      S_NOP 0, implicit %1:vgpr_32
+    ; RP-NEXT: 0     0
+    ; RP-NEXT: Live-out:
+    %0:vgpr_32 = V_MOV_B32_e32 42, implicit $exec
+    early-clobber %1:vgpr_32 = V_MOV_B32_e32 %0, implicit $exec
+    S_NOP 0, implicit %1
+...
+---
+name:  test_not_early_clobber_trivial
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    ; RPU-LABEL: name: test_not_early_clobber_trivial
+    ; RPU: Live-in:
+    ; RPU-NEXT: SGPR  VGPR
+    ; RPU-NEXT: 0     0
+    ; RPU-NEXT: 0     1      %0:vgpr_32 = V_MOV_B32_e32 42, implicit $exec
+    ; RPU-NEXT: 0     1
+    ; RPU-NEXT: 0     1      %1:vgpr_32 = V_MOV_B32_e32 %0:vgpr_32, implicit $exec
+    ; RPU-NEXT: 0     1
+    ; RPU-NEXT: 0     1      S_NOP 0, implicit %1:vgpr_32
+    ; RPU-NEXT: 0     0
+    ; RPU-NEXT: Live-out:
+    ;
+    ; RPD-LABEL: name: test_not_early_clobber_trivial
+    ; RPD: Live-in:
+    ; RPD-NEXT: SGPR  VGPR
+    ; RPD-NEXT: 0     0
+    ; RPD-NEXT: 0     1      %0:vgpr_32 = V_MOV_B32_e32 42, implicit $exec
+    ; RPD-NEXT: 0     1
+    ; RPD-NEXT: 0     2      %1:vgpr_32 = V_MOV_B32_e32 %0:vgpr_32, implicit $exec
+    ; RPD-NEXT: 0     1
+    ; RPD-NEXT: 0     1      S_NOP 0, implicit %1:vgpr_32
+    ; RPD-NEXT: 0     0
+    ; RPD-NEXT: Live-out:
+    %0:vgpr_32 = V_MOV_B32_e32 42, implicit $exec
+    %1:vgpr_32 = V_MOV_B32_e32 %0, implicit $exec
+    S_NOP 0, implicit %1
+...

>From bfc74ff6c4a0040303da3cbf56229e17c0855009 Mon Sep 17 00:00:00 2001
From: Valery Pykhtin <valery.pykhtin at gmail.com>
Date: Tue, 7 Nov 2023 21:39:33 +0100
Subject: [PATCH 2/2] comments

---
 llvm/lib/Target/AMDGPU/GCNRegPressure.cpp | 20 ++++++++++-------
 llvm/lib/Target/AMDGPU/GCNRegPressure.h   | 26 +++++++++--------------
 2 files changed, 22 insertions(+), 24 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
index f191f3f08c56c6d..6a0bc163646e1e4 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
@@ -178,12 +178,11 @@ collectVirtualRegUses(SmallVectorImpl<RegisterMaskPair> &RegMaskPairs,
       continue;
 
     Register Reg = MO.getReg();
-    auto I = llvm::find_if(RegMaskPairs, [Reg](const RegisterMaskPair &RM) {
-      return RM.RegUnit == Reg;
-    });
-    if (I != RegMaskPairs.end())
+    if (llvm::any_of(RegMaskPairs, [Reg](const RegisterMaskPair &RM) {
+          return RM.RegUnit == Reg;
+        }))
       continue;
-      
+
     LaneBitmask UseMask;
     auto &LI = LIS.getInterval(Reg);
     if (!LI.hasSubRanges())
@@ -217,8 +216,7 @@ LaneBitmask llvm::getLiveLaneMask(const LiveInterval &LI, SlotIndex SI,
     for (const auto &S : LI.subranges())
       if (S.liveAt(SI)) {
         LiveMask |= S.LaneMask;
-        assert(LiveMask < MRI.getMaxLaneMaskForVReg(LI.reg()) ||
-               LiveMask == MRI.getMaxLaneMaskForVReg(LI.reg()));
+        assert(LiveMask == (LiveMask & MRI.getMaxLaneMaskForVReg(LI.reg())));
       }
   } else if (LI.liveAt(SI)) {
     LiveMask = MRI.getMaxLaneMaskForVReg(LI.reg());
@@ -257,11 +255,14 @@ void GCNRPTracker::reset(const MachineInstr &MI,
   MaxPressure = CurPressure = getRegPressure(*MRI, LiveRegs);
 }
 
+////////////////////////////////////////////////////////////////////////////////
+// GCNUpwardRPTracker
+
 void GCNUpwardRPTracker::reset(const MachineRegisterInfo &MRI_,
                                const LiveRegSet &LiveRegs_) {
   MRI = &MRI_;
   LiveRegs = LiveRegs_;
-  LastTrackedMI = nullptr; // TODO: LastTrackedMI isnt' used, remove?
+  LastTrackedMI = nullptr;
   MaxPressure = CurPressure = getRegPressure(MRI_, LiveRegs_);
 }
 
@@ -319,6 +320,9 @@ void GCNUpwardRPTracker::recede(const MachineInstr &MI) {
   assert(CurPressure == getRegPressure(*MRI, LiveRegs));
 }
 
+////////////////////////////////////////////////////////////////////////////////
+// GCNDownwardRPTracker
+
 bool GCNDownwardRPTracker::reset(const MachineInstr &MI,
                                  const LiveRegSet *LiveRegsCopy) {
   MRI = &MI.getParent()->getParent()->getRegInfo();
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.h b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
index 732ed33337d24dc..754957daa4b4a91 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.h
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
@@ -130,13 +130,6 @@ class GCNRPTracker {
 
   const GCNRegPressure &getPressure() const { return CurPressure; }
 
-  // returns MaxPressure, resetting it
-  decltype(MaxPressure) moveMaxPressure() {
-    auto Res = MaxPressure;
-    MaxPressure.clear();
-    return Res;
-  }
-
   decltype(LiveRegs) moveLiveRegs() {
     return std::move(LiveRegs);
   }
@@ -175,16 +168,11 @@ class GCNUpwardRPTracker : public GCNRPTracker {
   // to reported by LIS.
   bool isValid() const;
 
-  // deprecated.
-  decltype(MaxPressure) moveMaxPressure() = delete;
-
   const GCNRegPressure &getMaxPressure() const { return MaxPressure; }
 
-  void resetMaxPressure() {
-    MaxPressure = CurPressure;
-  }
+  void resetMaxPressure() { MaxPressure = CurPressure; }
 
-  GCNRegPressure getMaxPressureAndReset() { 
+  GCNRegPressure getMaxPressureAndReset() {
     GCNRegPressure RP = MaxPressure;
     resetMaxPressure();
     return RP;
@@ -202,6 +190,13 @@ class GCNDownwardRPTracker : public GCNRPTracker {
 
   MachineBasicBlock::const_iterator getNext() const { return NextMI; }
 
+  // Return MaxPressure and clear it.
+  decltype(MaxPressure) moveMaxPressure() {
+    auto Res = MaxPressure;
+    MaxPressure.clear();
+    return Res;
+  }
+
   // Reset tracker to the point before the MI
   // filling live regs upon this point using LIS.
   // Returns false if block is empty except debug values.
@@ -234,8 +229,7 @@ LaneBitmask getLiveLaneMask(unsigned Reg,
 LaneBitmask getLiveLaneMask(const LiveInterval &LI, SlotIndex SI,
                             const MachineRegisterInfo &MRI);
 
-GCNRPTracker::LiveRegSet getLiveRegs(SlotIndex SI,
-                                     const LiveIntervals &LIS,
+GCNRPTracker::LiveRegSet getLiveRegs(SlotIndex SI, const LiveIntervals &LIS,
                                      const MachineRegisterInfo &MRI);
 
 /// creates a map MachineInstr -> LiveRegSet



More information about the llvm-commits mailing list