[llvm] [AMDGPU] Constrain use LiveMask by the operand's LaneMask for RP calculation. (PR #111452)

Jeffrey Byrnes via llvm-commits llvm-commits at lists.llvm.org
Tue Oct 8 08:23:23 PDT 2024


https://github.com/jrbyrnes updated https://github.com/llvm/llvm-project/pull/111452

>From 03fb12224c77f71fb68dcd3febcf42c94cc04441 Mon Sep 17 00:00:00 2001
From: Jeffrey Byrnes <Jeffrey.Byrnes at amd.com>
Date: Mon, 7 Oct 2024 20:55:13 +0200
Subject: [PATCH 1/2] [AMDGPU] Constrain use LiveMask by the operand's LaneMask
 for RP calculation.

CoAuthor: Valery Pykhtin <Valery.Pykhtin at amd.com>

For speculative RP queries, recede may calculate inaccurate masks for subreg uses. Previously, the calcaultion would look at any live lane for the use at the position of the MI in the LIS. This also adds lanes for any subregs which are live at but not used by the instruction. By constraining against the getSubRegIndexLaneMask for the operand's subreg, we are sure to not pick up on these extra lanes.

For current clients of recede, this is not an issue. This is because 1. the current clients do not violate the program order in the LIS, and 2. the change to RP is based on the difference between previous mask and new mask. Since current clients are not exposed to this issue, this patch is sort of NFC.

Change-Id: I63c788785b104ed88297d06e9b2b0dbc0bf6d040
---
 llvm/lib/Target/AMDGPU/GCNRegPressure.cpp | 52 ++++++++++++++---------
 llvm/lib/Target/AMDGPU/GCNRegPressure.h   |  9 ++--
 2 files changed, 36 insertions(+), 25 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
index cb0624f11592d2..29cf9fcc715bdd 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
@@ -259,7 +259,8 @@ static void
 collectVirtualRegUses(SmallVectorImpl<RegisterMaskPair> &RegMaskPairs,
                       const MachineInstr &MI, const LiveIntervals &LIS,
                       const MachineRegisterInfo &MRI) {
-  SlotIndex InstrSI;
+
+  auto &TRI = *MRI.getTargetRegisterInfo();
   for (const auto &MO : MI.operands()) {
     if (!MO.isReg() || !MO.getReg().isVirtual())
       continue;
@@ -267,25 +268,31 @@ collectVirtualRegUses(SmallVectorImpl<RegisterMaskPair> &RegMaskPairs,
       continue;
 
     Register Reg = MO.getReg();
-    if (llvm::any_of(RegMaskPairs, [Reg](const RegisterMaskPair &RM) {
-          return RM.RegUnit == Reg;
-        }))
-      continue;
+    auto I = llvm::find_if(RegMaskPairs, [Reg](const RegisterMaskPair &RM) {
+      return RM.RegUnit == Reg;
+    });
+
+    auto &P = I == RegMaskPairs.end()
+                  ? RegMaskPairs.emplace_back(Reg, LaneBitmask::getNone())
+                  : *I;
 
-    LaneBitmask UseMask;
-    auto &LI = LIS.getInterval(Reg);
+    P.LaneMask |= MO.getSubReg() ? TRI.getSubRegIndexLaneMask(MO.getSubReg())
+                                 : MRI.getMaxLaneMaskForVReg(Reg);
+  }
+
+  SlotIndex InstrSI;
+  for (auto &P : RegMaskPairs) {
+    auto &LI = LIS.getInterval(P.RegUnit);
     if (!LI.hasSubRanges())
-      UseMask = MRI.getMaxLaneMaskForVReg(Reg);
-    else {
-      // For a tentative schedule LIS isn't updated yet but livemask should
-      // remain the same on any schedule. Subreg defs can be reordered but they
-      // all must dominate uses anyway.
-      if (!InstrSI)
-        InstrSI = LIS.getInstructionIndex(*MO.getParent()).getBaseIndex();
-      UseMask = getLiveLaneMask(LI, InstrSI, MRI);
-    }
+      continue;
+
+    // For a tentative schedule LIS isn't updated yet but livemask should
+    // remain the same on any schedule. Subreg defs can be reordered but they
+    // all must dominate uses anyway.
+    if (!InstrSI)
+      InstrSI = LIS.getInstructionIndex(MI).getBaseIndex();
 
-    RegMaskPairs.emplace_back(Reg, UseMask);
+    P.LaneMask = getLiveLaneMask(LI, InstrSI, MRI, P.LaneMask);
   }
 }
 
@@ -294,22 +301,25 @@ collectVirtualRegUses(SmallVectorImpl<RegisterMaskPair> &RegMaskPairs,
 
 LaneBitmask llvm::getLiveLaneMask(unsigned Reg, SlotIndex SI,
                                   const LiveIntervals &LIS,
-                                  const MachineRegisterInfo &MRI) {
-  return getLiveLaneMask(LIS.getInterval(Reg), SI, MRI);
+                                  const MachineRegisterInfo &MRI,
+                                  LaneBitmask Mask) {
+  return getLiveLaneMask(LIS.getInterval(Reg), SI, MRI, Mask);
 }
 
 LaneBitmask llvm::getLiveLaneMask(const LiveInterval &LI, SlotIndex SI,
-                                  const MachineRegisterInfo &MRI) {
+                                  const MachineRegisterInfo &MRI,
+                                  LaneBitmask Mask) {
   LaneBitmask LiveMask;
   if (LI.hasSubRanges()) {
     for (const auto &S : LI.subranges())
-      if (S.liveAt(SI)) {
+      if ((S.LaneMask & Mask).any() && S.liveAt(SI)) {
         LiveMask |= S.LaneMask;
         assert(LiveMask == (LiveMask & MRI.getMaxLaneMaskForVReg(LI.reg())));
       }
   } else if (LI.liveAt(SI)) {
     LiveMask = MRI.getMaxLaneMaskForVReg(LI.reg());
   }
+  LiveMask &= Mask;
   return LiveMask;
 }
 
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.h b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
index 54dc1972d27619..e71d6bb4becf8c 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.h
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
@@ -265,13 +265,14 @@ class GCNDownwardRPTracker : public GCNRPTracker {
                const LiveRegSet *LiveRegsCopy = nullptr);
 };
 
-LaneBitmask getLiveLaneMask(unsigned Reg,
-                            SlotIndex SI,
+LaneBitmask getLiveLaneMask(unsigned Reg, SlotIndex SI,
                             const LiveIntervals &LIS,
-                            const MachineRegisterInfo &MRI);
+                            const MachineRegisterInfo &MRI,
+                            LaneBitmask Mask = LaneBitmask::getAll());
 
 LaneBitmask getLiveLaneMask(const LiveInterval &LI, SlotIndex SI,
-                            const MachineRegisterInfo &MRI);
+                            const MachineRegisterInfo &MRI,
+                            LaneBitmask Mask = LaneBitmask::getAll());
 
 GCNRPTracker::LiveRegSet getLiveRegs(SlotIndex SI, const LiveIntervals &LIS,
                                      const MachineRegisterInfo &MRI);

>From da204759d0532e766d6706831febc0b406fd74c3 Mon Sep 17 00:00:00 2001
From: Jeffrey Byrnes <Jeffrey.Byrnes at amd.com>
Date: Tue, 8 Oct 2024 08:22:42 -0700
Subject: [PATCH 2/2] Document the new signature

Change-Id: Ibf989b53555fe686693ca01d2e63f64aed58ded7
---
 llvm/lib/Target/AMDGPU/GCNRegPressure.cpp | 10 +++++-----
 llvm/lib/Target/AMDGPU/GCNRegPressure.h   |  8 ++++++--
 2 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
index 29cf9fcc715bdd..b1ad6a2e897029 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
@@ -302,24 +302,24 @@ collectVirtualRegUses(SmallVectorImpl<RegisterMaskPair> &RegMaskPairs,
 LaneBitmask llvm::getLiveLaneMask(unsigned Reg, SlotIndex SI,
                                   const LiveIntervals &LIS,
                                   const MachineRegisterInfo &MRI,
-                                  LaneBitmask Mask) {
-  return getLiveLaneMask(LIS.getInterval(Reg), SI, MRI, Mask);
+                                  LaneBitmask MaxLaneMask) {
+  return getLiveLaneMask(LIS.getInterval(Reg), SI, MRI, MaxLaneMask);
 }
 
 LaneBitmask llvm::getLiveLaneMask(const LiveInterval &LI, SlotIndex SI,
                                   const MachineRegisterInfo &MRI,
-                                  LaneBitmask Mask) {
+                                  LaneBitmask MaxLaneMask) {
   LaneBitmask LiveMask;
   if (LI.hasSubRanges()) {
     for (const auto &S : LI.subranges())
-      if ((S.LaneMask & Mask).any() && S.liveAt(SI)) {
+      if ((S.LaneMask & MaxLaneMask).any() && S.liveAt(SI)) {
         LiveMask |= S.LaneMask;
         assert(LiveMask == (LiveMask & MRI.getMaxLaneMaskForVReg(LI.reg())));
       }
   } else if (LI.liveAt(SI)) {
     LiveMask = MRI.getMaxLaneMaskForVReg(LI.reg());
   }
-  LiveMask &= Mask;
+  LiveMask &= MaxLaneMask;
   return LiveMask;
 }
 
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.h b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
index e71d6bb4becf8c..d9b3633044f7a2 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.h
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
@@ -265,14 +265,18 @@ class GCNDownwardRPTracker : public GCNRPTracker {
                const LiveRegSet *LiveRegsCopy = nullptr);
 };
 
+/// \returns the LaneMask of live lanes of \p Reg at position \p SI. Only the
+/// active lanes of \p MaxLaneMask will be set in the return value. This is
+/// used, for example, to limit the live lanes to a specific subreg when
+/// calculating use masks.
 LaneBitmask getLiveLaneMask(unsigned Reg, SlotIndex SI,
                             const LiveIntervals &LIS,
                             const MachineRegisterInfo &MRI,
-                            LaneBitmask Mask = LaneBitmask::getAll());
+                            LaneBitmask MaxLaneMask = LaneBitmask::getAll());
 
 LaneBitmask getLiveLaneMask(const LiveInterval &LI, SlotIndex SI,
                             const MachineRegisterInfo &MRI,
-                            LaneBitmask Mask = LaneBitmask::getAll());
+                            LaneBitmask MaxLaneMask = LaneBitmask::getAll());
 
 GCNRPTracker::LiveRegSet getLiveRegs(SlotIndex SI, const LiveIntervals &LIS,
                                      const MachineRegisterInfo &MRI);



More information about the llvm-commits mailing list