[llvm] [AMDGPU] Fix GCNUpwardRPTracker: max register pressure on defs. (PR #74422)

Valery Pykhtin via llvm-commits llvm-commits at lists.llvm.org
Wed Dec 6 05:02:24 PST 2023


https://github.com/vpykhtin updated https://github.com/llvm/llvm-project/pull/74422

>From 822ba7ab3d4e3d1698a8a46094327f83823e6ad6 Mon Sep 17 00:00:00 2001
From: Valery Pykhtin <valery.pykhtin at gmail.com>
Date: Tue, 5 Dec 2023 07:36:40 +0100
Subject: [PATCH] [AMDGPU] Fix GCNUpwardRPTracker: max pressure on defs.

---
 llvm/lib/Target/AMDGPU/GCNRegPressure.cpp     |  50 ++---
 llvm/lib/Target/AMDGPU/GCNRegPressure.h       |  26 +++
 .../CodeGen/AMDGPU/regpressure_printer.mir    | 171 ++++++------------
 3 files changed, 112 insertions(+), 135 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
index 5ebf834377f2c..fd8f0bebd3bec 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
@@ -274,32 +274,42 @@ void GCNUpwardRPTracker::recede(const MachineInstr &MI) {
   if (MI.isDebugInstr())
     return;
 
-  auto DecrementDef = [this](const MachineOperand &MO) {
+  // Kill all defs.
+  GCNRegPressure DefPressure, ECDefPressure;
+  bool HasECDefs = false;
+  for (const MachineOperand &MO : MI.all_defs()) {
+    if (!MO.getReg().isVirtual())
+      continue;
+
     Register Reg = MO.getReg();
+    LaneBitmask DefMask = getDefRegMask(MO, *MRI);
+
+    // Treat a def as fully live at the moment of definition: keep a record.
+    if (MO.isEarlyClobber()) {
+      ECDefPressure.inc(Reg, LaneBitmask::getNone(), DefMask, *MRI);
+      HasECDefs = true;
+    } else
+      DefPressure.inc(Reg, LaneBitmask::getNone(), DefMask, *MRI);
+
     auto I = LiveRegs.find(Reg);
     if (I == LiveRegs.end())
-      return;
+      continue;
 
     LaneBitmask &LiveMask = I->second;
     LaneBitmask PrevMask = LiveMask;
-    LiveMask &= ~getDefRegMask(MO, *MRI);
+    LiveMask &= ~DefMask;
     CurPressure.inc(Reg, PrevMask, LiveMask, *MRI);
     if (LiveMask.none())
       LiveRegs.erase(I);
-  };
-
-  // Decrement non-early-clobber defs.
-  SmallVector<const MachineOperand *, 2> EarlyClobberDefs;
-  for (const MachineOperand &MO : MI.all_defs()) {
-    if (!MO.getReg().isVirtual())
-      continue;
-    if (!MO.isEarlyClobber())
-      DecrementDef(MO);
-    else
-      EarlyClobberDefs.push_back(&MO);
   }
 
-  // Increment uses.
+  // Update MaxPressure with defs pressure.
+  DefPressure += CurPressure;
+  if (HasECDefs)
+    DefPressure += ECDefPressure;
+  MaxPressure = max(DefPressure, MaxPressure);
+
+  // Make uses alive.
   SmallVector<RegisterMaskPair, 8> RegUses;
   collectVirtualRegUses(RegUses, MI, LIS, *MRI);
   for (const RegisterMaskPair &U : RegUses) {
@@ -309,13 +319,9 @@ void GCNUpwardRPTracker::recede(const MachineInstr &MI) {
     CurPressure.inc(U.RegUnit, PrevMask, LiveMask, *MRI);
   }
 
-  // Point of maximum pressure: non-early-clobber defs are decremented and uses
-  // are incremented.
-  MaxPressure = max(CurPressure, MaxPressure);
-
-  // Now decrement early clobber defs.
-  for (const MachineOperand *MO : EarlyClobberDefs)
-    DecrementDef(*MO);
+  // Update MaxPressure with uses plus early-clobber defs pressure.
+  MaxPressure = HasECDefs ? max(CurPressure + ECDefPressure, MaxPressure)
+                          : max(CurPressure, MaxPressure);
 
   assert(CurPressure == getRegPressure(*MRI, LiveRegs));
 }
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.h b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
index e21bf10d795ba..4100970fe1a96 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.h
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
@@ -85,6 +85,18 @@ struct GCNRegPressure {
     return !(*this == O);
   }
 
+  GCNRegPressure &operator+=(const GCNRegPressure &RHS) {
+    for (unsigned I = 0; I < TOTAL_KINDS; ++I)
+      Value[I] += RHS.Value[I];
+    return *this;
+  }
+
+  GCNRegPressure &operator-=(const GCNRegPressure &RHS) {
+    for (unsigned I = 0; I < TOTAL_KINDS; ++I)
+      Value[I] -= RHS.Value[I];
+    return *this;
+  }
+
   void dump() const;
 
 private:
@@ -105,6 +117,20 @@ inline GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2) {
   return Res;
 }
 
+inline GCNRegPressure operator+(const GCNRegPressure &P1,
+                                const GCNRegPressure &P2) {
+  GCNRegPressure Sum = P1;
+  Sum += P2;
+  return Sum;
+}
+
+inline GCNRegPressure operator-(const GCNRegPressure &P1,
+                                const GCNRegPressure &P2) {
+  GCNRegPressure Diff = P1;
+  Diff -= P2;
+  return Diff;
+}
+
 class GCNRPTracker {
 public:
   using LiveRegSet = DenseMap<unsigned, LaneBitmask>;
diff --git a/llvm/test/CodeGen/AMDGPU/regpressure_printer.mir b/llvm/test/CodeGen/AMDGPU/regpressure_printer.mir
index 83e85ccf7f8f1..f0c5ba489ef3d 100644
--- a/llvm/test/CodeGen/AMDGPU/regpressure_printer.mir
+++ b/llvm/test/CodeGen/AMDGPU/regpressure_printer.mir
@@ -47,87 +47,46 @@ body:             |
 name:  live_through_test
 tracksRegLiveness: true
 body:             |
-  ; RPU-LABEL: name: live_through_test
-  ; RPU: bb.0:
-  ; RPU-NEXT:   Live-in:
-  ; RPU-NEXT:   SGPR  VGPR
-  ; RPU-NEXT:   0     0
-  ; RPU-NEXT:   3     0      %0:sgpr_128 = IMPLICIT_DEF
-  ; RPU-NEXT:   3     0
-  ; RPU-NEXT:   Live-out: %0:00000000000000F3
-  ; RPU-NEXT:   Live-thr:
-  ; RPU-NEXT:   0     0
-  ; RPU-NEXT: bb.1:
-  ; RPU-NEXT:   Live-in:  %0:00000000000000F3
-  ; RPU-NEXT:   SGPR  VGPR
-  ; RPU-NEXT:   3     0
-  ; RPU-NEXT:   3     0      S_NOP 0, implicit %0.sub0:sgpr_128
-  ; RPU-NEXT:   2     0
-  ; RPU-NEXT:   3     0      %0.sub0:sgpr_128 = IMPLICIT_DEF
-  ; RPU-NEXT:   3     0
-  ; RPU-NEXT:   3     0      %0.sub1:sgpr_128 = IMPLICIT_DEF
-  ; RPU-NEXT:   3     0
-  ; RPU-NEXT:   3     0      S_NOP 0, implicit %0.sub2:sgpr_128
-  ; RPU-NEXT:   2     0
-  ; RPU-NEXT:   3     0      %0.sub2:sgpr_128 = IMPLICIT_DEF
-  ; RPU-NEXT:   3     0
-  ; RPU-NEXT:   3     0      S_NOP 0, implicit %0.sub2:sgpr_128
-  ; RPU-NEXT:   2     0
-  ; RPU-NEXT:   2     0      S_NOP 0, implicit %0.sub3:sgpr_128
-  ; RPU-NEXT:   2     0
-  ; RPU-NEXT:   Live-out: %0:00000000000000C3
-  ; RPU-NEXT:   Live-thr: %0:00000000000000C0
-  ; RPU-NEXT:   1     0
-  ; RPU-NEXT: bb.2:
-  ; RPU-NEXT:   Live-in:  %0:00000000000000C3
-  ; RPU-NEXT:   SGPR  VGPR
-  ; RPU-NEXT:   2     0
-  ; RPU-NEXT:   2     0      S_NOP 0, implicit %0.sub3:sgpr_128, implicit %0.sub0:sgpr_128
-  ; RPU-NEXT:   0     0
-  ; RPU-NEXT:   Live-out:
-  ; RPU-NEXT:   Live-thr:
-  ; RPU-NEXT:   0     0
-  ;
-  ; RPD-LABEL: name: live_through_test
-  ; RPD: bb.0:
-  ; RPD-NEXT:   Live-in:
-  ; RPD-NEXT:   SGPR  VGPR
-  ; RPD-NEXT:   0     0
-  ; RPD-NEXT:   4     0      %0:sgpr_128 = IMPLICIT_DEF
-  ; RPD-NEXT:   3     0
-  ; RPD-NEXT:   Live-out: %0:00000000000000F3
-  ; RPD-NEXT:   Live-thr:
-  ; RPD-NEXT:   0     0
-  ; RPD-NEXT: bb.1:
-  ; RPD-NEXT:   Live-in:  %0:00000000000000F3
-  ; RPD-NEXT:   SGPR  VGPR
-  ; RPD-NEXT:   3     0
-  ; RPD-NEXT:   3     0      S_NOP 0, implicit %0.sub0:sgpr_128
-  ; RPD-NEXT:   2     0
-  ; RPD-NEXT:   3     0      %0.sub0:sgpr_128 = IMPLICIT_DEF
-  ; RPD-NEXT:   3     0
-  ; RPD-NEXT:   4     0      %0.sub1:sgpr_128 = IMPLICIT_DEF
-  ; RPD-NEXT:   3     0
-  ; RPD-NEXT:   3     0      S_NOP 0, implicit %0.sub2:sgpr_128
-  ; RPD-NEXT:   2     0
-  ; RPD-NEXT:   3     0      %0.sub2:sgpr_128 = IMPLICIT_DEF
-  ; RPD-NEXT:   3     0
-  ; RPD-NEXT:   3     0      S_NOP 0, implicit %0.sub2:sgpr_128
-  ; RPD-NEXT:   2     0
-  ; RPD-NEXT:   2     0      S_NOP 0, implicit %0.sub3:sgpr_128
-  ; RPD-NEXT:   2     0
-  ; RPD-NEXT:   Live-out: %0:00000000000000C3
-  ; RPD-NEXT:   Live-thr: %0:00000000000000C0
-  ; RPD-NEXT:   1     0
-  ; RPD-NEXT: bb.2:
-  ; RPD-NEXT:   Live-in:  %0:00000000000000C3
-  ; RPD-NEXT:   SGPR  VGPR
-  ; RPD-NEXT:   2     0
-  ; RPD-NEXT:   2     0      S_NOP 0, implicit %0.sub3:sgpr_128, implicit %0.sub0:sgpr_128
-  ; RPD-NEXT:   0     0
-  ; RPD-NEXT:   Live-out:
-  ; RPD-NEXT:   Live-thr:
-  ; RPD-NEXT:   0     0
+  ; RP-LABEL: name: live_through_test
+  ; RP: bb.0:
+  ; RP-NEXT:   Live-in:
+  ; RP-NEXT:   SGPR  VGPR
+  ; RP-NEXT:   0     0
+  ; RP-NEXT:   4     0      %0:sgpr_128 = IMPLICIT_DEF
+  ; RP-NEXT:   3     0
+  ; RP-NEXT:   Live-out: %0:00000000000000F3
+  ; RP-NEXT:   Live-thr:
+  ; RP-NEXT:   0     0
+  ; RP-NEXT: bb.1:
+  ; RP-NEXT:   Live-in:  %0:00000000000000F3
+  ; RP-NEXT:   SGPR  VGPR
+  ; RP-NEXT:   3     0
+  ; RP-NEXT:   3     0      S_NOP 0, implicit %0.sub0:sgpr_128
+  ; RP-NEXT:   2     0
+  ; RP-NEXT:   3     0      %0.sub0:sgpr_128 = IMPLICIT_DEF
+  ; RP-NEXT:   3     0
+  ; RP-NEXT:   4     0      %0.sub1:sgpr_128 = IMPLICIT_DEF
+  ; RP-NEXT:   3     0
+  ; RP-NEXT:   3     0      S_NOP 0, implicit %0.sub2:sgpr_128
+  ; RP-NEXT:   2     0
+  ; RP-NEXT:   3     0      %0.sub2:sgpr_128 = IMPLICIT_DEF
+  ; RP-NEXT:   3     0
+  ; RP-NEXT:   3     0      S_NOP 0, implicit %0.sub2:sgpr_128
+  ; RP-NEXT:   2     0
+  ; RP-NEXT:   2     0      S_NOP 0, implicit %0.sub3:sgpr_128
+  ; RP-NEXT:   2     0
+  ; RP-NEXT:   Live-out: %0:00000000000000C3
+  ; RP-NEXT:   Live-thr: %0:00000000000000C0
+  ; RP-NEXT:   1     0
+  ; RP-NEXT: bb.2:
+  ; RP-NEXT:   Live-in:  %0:00000000000000C3
+  ; RP-NEXT:   SGPR  VGPR
+  ; RP-NEXT:   2     0
+  ; RP-NEXT:   2     0      S_NOP 0, implicit %0.sub3:sgpr_128, implicit %0.sub0:sgpr_128
+  ; RP-NEXT:   0     0
+  ; RP-NEXT:   Live-out:
+  ; RP-NEXT:   Live-thr:
+  ; RP-NEXT:   0     0
   bb.0:
     %0:sgpr_128 = IMPLICIT_DEF
   bb.1:
@@ -223,7 +182,7 @@ body:             |
   ; RPU-NEXT:   0     7
   ; RPU-NEXT:   0     7      %7:vgpr_32 = GLOBAL_LOAD_DWORD %5:vreg_64, 0, 0, implicit $exec
   ; RPU-NEXT:   0     6
-  ; RPU-NEXT:   0     7      %8:vreg_64 = IMPLICIT_DEF
+  ; RPU-NEXT:   0     8      %8:vreg_64 = IMPLICIT_DEF
   ; RPU-NEXT:   0     7
   ; RPU-NEXT:   0     9      %9:vreg_64 = IMPLICIT_DEF
   ; RPU-NEXT:   0     9
@@ -262,7 +221,7 @@ body:             |
   ; RPU-NEXT:   0     12
   ; RPU-NEXT:   0     12     dead %21:vgpr_32 = GLOBAL_LOAD_DWORD %14:vreg_64, 0, 0, implicit $exec
   ; RPU-NEXT:   0     10
-  ; RPU-NEXT:   0     10     dead %22:vgpr_32 = GLOBAL_LOAD_DWORD %15:vreg_64, 0, 0, implicit $exec
+  ; RPU-NEXT:   0     11     dead %22:vgpr_32 = GLOBAL_LOAD_DWORD %15:vreg_64, 0, 0, implicit $exec
   ; RPU-NEXT:   0     10
   ; RPU-NEXT:   0     10     %23:vreg_64 = V_LSHLREV_B64_e64 2, %8:vreg_64, implicit $exec
   ; RPU-NEXT:   0     9
@@ -550,7 +509,7 @@ body: |
   ; RPU-NEXT:   0     0
   ; RPU-NEXT:   0     0      $sgpr0 = S_BUFFER_LOAD_DWORD_IMM $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0
   ; RPU-NEXT:   0     0
-  ; RPU-NEXT:   0     0      undef %0.sub5:vreg_512 = V_MOV_B32_e32 5, implicit $exec
+  ; RPU-NEXT:   0     1      undef %0.sub5:vreg_512 = V_MOV_B32_e32 5, implicit $exec
   ; RPU-NEXT:   0     0
   ; RPU-NEXT:   0     0      S_CMP_GT_U32 $sgpr0, 15, implicit-def $scc
   ; RPU-NEXT:   0     0
@@ -569,7 +528,7 @@ body: |
   ; RPU-NEXT:   0     1
   ; RPU-NEXT:   0     1      $m0 = S_MOV_B32 killed $sgpr0
   ; RPU-NEXT:   0     1
-  ; RPU-NEXT:   0     1      %0:vreg_512 = V_INDIRECT_REG_WRITE_MOVREL_B32_V16 %0:vreg_512(tied-def 0), 42, 3, implicit $m0, implicit $exec
+  ; RPU-NEXT:   0     16     %0:vreg_512 = V_INDIRECT_REG_WRITE_MOVREL_B32_V16 %0:vreg_512(tied-def 0), 42, 3, implicit $m0, implicit $exec
   ; RPU-NEXT:   0     1
   ; RPU-NEXT:   Live-out: %0:0000000000000C00
   ; RPU-NEXT:   Live-thr:
@@ -709,33 +668,19 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     liveins: $sgpr0_sgpr1_sgpr2_sgpr3
-    ; RPU-LABEL: name: test_partially_used_early_clobber_def
-    ; RPU: Live-in:
-    ; RPU-NEXT: SGPR  VGPR
-    ; RPU-NEXT: 0     0
-    ; RPU-NEXT: 4     0      %0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
-    ; RPU-NEXT: 4     0
-    ; RPU-NEXT: 5     0      early-clobber %1:sgpr_128 = COPY %0:sgpr_128
-    ; RPU-NEXT: 1     0
-    ; RPU-NEXT: 1     0      S_NOP 0, implicit %1.sub1:sgpr_128
-    ; RPU-NEXT: 0     0
-    ; RPU-NEXT: Live-out:
-    ; RPU-NEXT: Live-thr:
-    ; RPU-NEXT: 0     0
-    ;
-    ; RPD-LABEL: name: test_partially_used_early_clobber_def
-    ; RPD: Live-in:
-    ; RPD-NEXT: SGPR  VGPR
-    ; RPD-NEXT: 0     0
-    ; RPD-NEXT: 4     0      %0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
-    ; RPD-NEXT: 4     0
-    ; RPD-NEXT: 8     0      early-clobber %1:sgpr_128 = COPY %0:sgpr_128
-    ; RPD-NEXT: 1     0
-    ; RPD-NEXT: 1     0      S_NOP 0, implicit %1.sub1:sgpr_128
-    ; RPD-NEXT: 0     0
-    ; RPD-NEXT: Live-out:
-    ; RPD-NEXT: Live-thr:
-    ; RPD-NEXT: 0     0
+    ; RP-LABEL: name: test_partially_used_early_clobber_def
+    ; RP: Live-in:
+    ; RP-NEXT: SGPR  VGPR
+    ; RP-NEXT: 0     0
+    ; RP-NEXT: 4     0      %0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+    ; RP-NEXT: 4     0
+    ; RP-NEXT: 8     0      early-clobber %1:sgpr_128 = COPY %0:sgpr_128
+    ; RP-NEXT: 1     0
+    ; RP-NEXT: 1     0      S_NOP 0, implicit %1.sub1:sgpr_128
+    ; RP-NEXT: 0     0
+    ; RP-NEXT: Live-out:
+    ; RP-NEXT: Live-thr:
+    ; RP-NEXT: 0     0
     %0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
     early-clobber %1:sgpr_128 = COPY %0:sgpr_128
     S_NOP 0, implicit %1.sub1
@@ -752,7 +697,7 @@ body:             |
     ; RPU-NEXT: 0     0
     ; RPU-NEXT: 4     0      %0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
     ; RPU-NEXT: 4     0
-    ; RPU-NEXT: 7     0      %1:sgpr_128 = COPY %0:sgpr_128, implicit-def %2:sgpr_128, implicit-def early-clobber %3:sgpr_128, implicit-def dead early-clobber %4:sgpr_128
+    ; RPU-NEXT: 16    0      %1:sgpr_128 = COPY %0:sgpr_128, implicit-def %2:sgpr_128, implicit-def early-clobber %3:sgpr_128, implicit-def dead early-clobber %4:sgpr_128
     ; RPU-NEXT: 6     0
     ; RPU-NEXT: 6     0      S_NOP 0, implicit %1.sub1:sgpr_128, implicit %2.sub0_sub1:sgpr_128, implicit %3.sub0_sub1_sub2:sgpr_128
     ; RPU-NEXT: 0     0



More information about the llvm-commits mailing list