[llvm] [AMDGPUFix GCUpwardRPTracker. (PR #74328)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Dec 4 07:19:53 PST 2023
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
Author: Valery Pykhtin (vpykhtin)
<details>
<summary>Changes</summary>
1. Treat a defined register as fully live "at" the instruction and update maximum pressure accordingly. Fixes https://github.com/llvm/llvm-project/pull/73786.
2. Fix for a case when an early-clobber defined register is used at the same time on the RHS.
It's doesn't make much sense but the tracker should work correctly. Basically the register on the RHS becomes dead as it is clobbered by the LHS before use. The testcase should be presubmitted, please take a look at the https://github.com/llvm/llvm-project/commit/328b537b9ce0cfaffeb9d4612835d5a1f8e5684f to see a diff.
---
Full diff: https://github.com/llvm/llvm-project/pull/74328.diff
3 Files Affected:
- (modified) llvm/lib/Target/AMDGPU/GCNRegPressure.cpp (+33-38)
- (modified) llvm/lib/Target/AMDGPU/GCNRegPressure.h (+26)
- (modified) llvm/test/CodeGen/AMDGPU/regpressure_printer.mir (+111-85)
``````````diff
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
index 5ebf834377f2c..f771a409f4d1a 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
@@ -183,18 +183,13 @@ collectVirtualRegUses(SmallVectorImpl<RegisterMaskPair> &RegMaskPairs,
}))
continue;
- LaneBitmask UseMask;
- auto &LI = LIS.getInterval(Reg);
- if (!LI.hasSubRanges())
- UseMask = MRI.getMaxLaneMaskForVReg(Reg);
- else {
- // For a tentative schedule LIS isn't updated yet but livemask should
- // remain the same on any schedule. Subreg defs can be reordered but they
- // all must dominate uses anyway.
- if (!InstrSI)
- InstrSI = LIS.getInstructionIndex(*MO.getParent()).getBaseIndex();
- UseMask = getLiveLaneMask(LI, InstrSI, MRI);
- }
+ if (!InstrSI)
+ InstrSI = LIS.getInstructionIndex(*MO.getParent()).getBaseIndex();
+
+ // For a tentative schedule LIS isn't updated yet but livemask should
+ // remain the same on any schedule. Subreg defs can be reordered but they
+ // all must dominate uses anyway.
+ LaneBitmask UseMask = getLiveLaneMask(LIS.getInterval(Reg), InstrSI, MRI);
RegMaskPairs.emplace_back(Reg, UseMask);
}
@@ -274,48 +269,48 @@ void GCNUpwardRPTracker::recede(const MachineInstr &MI) {
if (MI.isDebugInstr())
return;
- auto DecrementDef = [this](const MachineOperand &MO) {
+ // Kill all defs.
+ GCNRegPressure DefPressure, ECDefPressure;
+ for (const MachineOperand &MO : MI.all_defs()) {
+ if (!MO.getReg().isVirtual())
+ continue;
+
Register Reg = MO.getReg();
+ LaneBitmask DefMask = getDefRegMask(MO, *MRI);
+
+ // Treat a def as fully live at the moment of definition: keep a record.
+ (MO.isEarlyClobber() ? &ECDefPressure : &DefPressure)
+ ->inc(Reg, LaneBitmask::getNone(), DefMask, *MRI);
+
auto I = LiveRegs.find(Reg);
if (I == LiveRegs.end())
- return;
+ continue;
LaneBitmask &LiveMask = I->second;
LaneBitmask PrevMask = LiveMask;
- LiveMask &= ~getDefRegMask(MO, *MRI);
+ LiveMask &= ~DefMask;
CurPressure.inc(Reg, PrevMask, LiveMask, *MRI);
if (LiveMask.none())
LiveRegs.erase(I);
- };
-
- // Decrement non-early-clobber defs.
- SmallVector<const MachineOperand *, 2> EarlyClobberDefs;
- for (const MachineOperand &MO : MI.all_defs()) {
- if (!MO.getReg().isVirtual())
- continue;
- if (!MO.isEarlyClobber())
- DecrementDef(MO);
- else
- EarlyClobberDefs.push_back(&MO);
}
- // Increment uses.
+ // Update MaxPressure with defs pressure.
+ MaxPressure = max(CurPressure + DefPressure + ECDefPressure, MaxPressure);
+
+ // Make uses alive.
SmallVector<RegisterMaskPair, 8> RegUses;
collectVirtualRegUses(RegUses, MI, LIS, *MRI);
- for (const RegisterMaskPair &U : RegUses) {
- LaneBitmask &LiveMask = LiveRegs[U.RegUnit];
+ for (auto [Reg, LaneMask] : RegUses) {
+ if (LaneMask.none())
+ continue;
+ LaneBitmask &LiveMask = LiveRegs[Reg];
LaneBitmask PrevMask = LiveMask;
- LiveMask |= U.LaneMask;
- CurPressure.inc(U.RegUnit, PrevMask, LiveMask, *MRI);
+ LiveMask |= LaneMask;
+ CurPressure.inc(Reg, PrevMask, LiveMask, *MRI);
}
- // Point of maximum pressure: non-early-clobber defs are decremented and uses
- // are incremented.
- MaxPressure = max(CurPressure, MaxPressure);
-
- // Now decrement early clobber defs.
- for (const MachineOperand *MO : EarlyClobberDefs)
- DecrementDef(*MO);
+ // Update MaxPressure with all uses alive plus early-clobber defs pressure.
+ MaxPressure = max(CurPressure + ECDefPressure, MaxPressure);
assert(CurPressure == getRegPressure(*MRI, LiveRegs));
}
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.h b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
index e21bf10d795ba..4100970fe1a96 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.h
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
@@ -85,6 +85,18 @@ struct GCNRegPressure {
return !(*this == O);
}
+ GCNRegPressure &operator+=(const GCNRegPressure &RHS) {
+ for (unsigned I = 0; I < TOTAL_KINDS; ++I)
+ Value[I] += RHS.Value[I];
+ return *this;
+ }
+
+ GCNRegPressure &operator-=(const GCNRegPressure &RHS) {
+ for (unsigned I = 0; I < TOTAL_KINDS; ++I)
+ Value[I] -= RHS.Value[I];
+ return *this;
+ }
+
void dump() const;
private:
@@ -105,6 +117,20 @@ inline GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2) {
return Res;
}
+inline GCNRegPressure operator+(const GCNRegPressure &P1,
+ const GCNRegPressure &P2) {
+ GCNRegPressure Sum = P1;
+ Sum += P2;
+ return Sum;
+}
+
+inline GCNRegPressure operator-(const GCNRegPressure &P1,
+ const GCNRegPressure &P2) {
+ GCNRegPressure Diff = P1;
+ Diff -= P2;
+ return Diff;
+}
+
class GCNRPTracker {
public:
using LiveRegSet = DenseMap<unsigned, LaneBitmask>;
diff --git a/llvm/test/CodeGen/AMDGPU/regpressure_printer.mir b/llvm/test/CodeGen/AMDGPU/regpressure_printer.mir
index a1722c42b189f..d5b62af4b02b0 100644
--- a/llvm/test/CodeGen/AMDGPU/regpressure_printer.mir
+++ b/llvm/test/CodeGen/AMDGPU/regpressure_printer.mir
@@ -47,87 +47,46 @@ body: |
name: live_through_test
tracksRegLiveness: true
body: |
- ; RPU-LABEL: name: live_through_test
- ; RPU: bb.0:
- ; RPU-NEXT: Live-in:
- ; RPU-NEXT: SGPR VGPR
- ; RPU-NEXT: 0 0
- ; RPU-NEXT: 3 0 %0:sgpr_128 = IMPLICIT_DEF
- ; RPU-NEXT: 3 0
- ; RPU-NEXT: Live-out: %0:00000000000000F3
- ; RPU-NEXT: Live-thr:
- ; RPU-NEXT: 0 0
- ; RPU-NEXT: bb.1:
- ; RPU-NEXT: Live-in: %0:00000000000000F3
- ; RPU-NEXT: SGPR VGPR
- ; RPU-NEXT: 3 0
- ; RPU-NEXT: 3 0 S_NOP 0, implicit %0.sub0:sgpr_128
- ; RPU-NEXT: 2 0
- ; RPU-NEXT: 3 0 %0.sub0:sgpr_128 = IMPLICIT_DEF
- ; RPU-NEXT: 3 0
- ; RPU-NEXT: 3 0 %0.sub1:sgpr_128 = IMPLICIT_DEF
- ; RPU-NEXT: 3 0
- ; RPU-NEXT: 3 0 S_NOP 0, implicit %0.sub2:sgpr_128
- ; RPU-NEXT: 2 0
- ; RPU-NEXT: 3 0 %0.sub2:sgpr_128 = IMPLICIT_DEF
- ; RPU-NEXT: 3 0
- ; RPU-NEXT: 3 0 S_NOP 0, implicit %0.sub2:sgpr_128
- ; RPU-NEXT: 2 0
- ; RPU-NEXT: 2 0 S_NOP 0, implicit %0.sub3:sgpr_128
- ; RPU-NEXT: 2 0
- ; RPU-NEXT: Live-out: %0:00000000000000C3
- ; RPU-NEXT: Live-thr: %0:00000000000000C0
- ; RPU-NEXT: 1 0
- ; RPU-NEXT: bb.2:
- ; RPU-NEXT: Live-in: %0:00000000000000C3
- ; RPU-NEXT: SGPR VGPR
- ; RPU-NEXT: 2 0
- ; RPU-NEXT: 2 0 S_NOP 0, implicit %0.sub3:sgpr_128, implicit %0.sub0:sgpr_128
- ; RPU-NEXT: 0 0
- ; RPU-NEXT: Live-out:
- ; RPU-NEXT: Live-thr:
- ; RPU-NEXT: 0 0
- ;
- ; RPD-LABEL: name: live_through_test
- ; RPD: bb.0:
- ; RPD-NEXT: Live-in:
- ; RPD-NEXT: SGPR VGPR
- ; RPD-NEXT: 0 0
- ; RPD-NEXT: 4 0 %0:sgpr_128 = IMPLICIT_DEF
- ; RPD-NEXT: 3 0
- ; RPD-NEXT: Live-out: %0:00000000000000F3
- ; RPD-NEXT: Live-thr:
- ; RPD-NEXT: 0 0
- ; RPD-NEXT: bb.1:
- ; RPD-NEXT: Live-in: %0:00000000000000F3
- ; RPD-NEXT: SGPR VGPR
- ; RPD-NEXT: 3 0
- ; RPD-NEXT: 3 0 S_NOP 0, implicit %0.sub0:sgpr_128
- ; RPD-NEXT: 2 0
- ; RPD-NEXT: 3 0 %0.sub0:sgpr_128 = IMPLICIT_DEF
- ; RPD-NEXT: 3 0
- ; RPD-NEXT: 4 0 %0.sub1:sgpr_128 = IMPLICIT_DEF
- ; RPD-NEXT: 3 0
- ; RPD-NEXT: 3 0 S_NOP 0, implicit %0.sub2:sgpr_128
- ; RPD-NEXT: 2 0
- ; RPD-NEXT: 3 0 %0.sub2:sgpr_128 = IMPLICIT_DEF
- ; RPD-NEXT: 3 0
- ; RPD-NEXT: 3 0 S_NOP 0, implicit %0.sub2:sgpr_128
- ; RPD-NEXT: 2 0
- ; RPD-NEXT: 2 0 S_NOP 0, implicit %0.sub3:sgpr_128
- ; RPD-NEXT: 2 0
- ; RPD-NEXT: Live-out: %0:00000000000000C3
- ; RPD-NEXT: Live-thr: %0:00000000000000C0
- ; RPD-NEXT: 1 0
- ; RPD-NEXT: bb.2:
- ; RPD-NEXT: Live-in: %0:00000000000000C3
- ; RPD-NEXT: SGPR VGPR
- ; RPD-NEXT: 2 0
- ; RPD-NEXT: 2 0 S_NOP 0, implicit %0.sub3:sgpr_128, implicit %0.sub0:sgpr_128
- ; RPD-NEXT: 0 0
- ; RPD-NEXT: Live-out:
- ; RPD-NEXT: Live-thr:
- ; RPD-NEXT: 0 0
+ ; RP-LABEL: name: live_through_test
+ ; RP: bb.0:
+ ; RP-NEXT: Live-in:
+ ; RP-NEXT: SGPR VGPR
+ ; RP-NEXT: 0 0
+ ; RP-NEXT: 4 0 %0:sgpr_128 = IMPLICIT_DEF
+ ; RP-NEXT: 3 0
+ ; RP-NEXT: Live-out: %0:00000000000000F3
+ ; RP-NEXT: Live-thr:
+ ; RP-NEXT: 0 0
+ ; RP-NEXT: bb.1:
+ ; RP-NEXT: Live-in: %0:00000000000000F3
+ ; RP-NEXT: SGPR VGPR
+ ; RP-NEXT: 3 0
+ ; RP-NEXT: 3 0 S_NOP 0, implicit %0.sub0:sgpr_128
+ ; RP-NEXT: 2 0
+ ; RP-NEXT: 3 0 %0.sub0:sgpr_128 = IMPLICIT_DEF
+ ; RP-NEXT: 3 0
+ ; RP-NEXT: 4 0 %0.sub1:sgpr_128 = IMPLICIT_DEF
+ ; RP-NEXT: 3 0
+ ; RP-NEXT: 3 0 S_NOP 0, implicit %0.sub2:sgpr_128
+ ; RP-NEXT: 2 0
+ ; RP-NEXT: 3 0 %0.sub2:sgpr_128 = IMPLICIT_DEF
+ ; RP-NEXT: 3 0
+ ; RP-NEXT: 3 0 S_NOP 0, implicit %0.sub2:sgpr_128
+ ; RP-NEXT: 2 0
+ ; RP-NEXT: 2 0 S_NOP 0, implicit %0.sub3:sgpr_128
+ ; RP-NEXT: 2 0
+ ; RP-NEXT: Live-out: %0:00000000000000C3
+ ; RP-NEXT: Live-thr: %0:00000000000000C0
+ ; RP-NEXT: 1 0
+ ; RP-NEXT: bb.2:
+ ; RP-NEXT: Live-in: %0:00000000000000C3
+ ; RP-NEXT: SGPR VGPR
+ ; RP-NEXT: 2 0
+ ; RP-NEXT: 2 0 S_NOP 0, implicit %0.sub3:sgpr_128, implicit %0.sub0:sgpr_128
+ ; RP-NEXT: 0 0
+ ; RP-NEXT: Live-out:
+ ; RP-NEXT: Live-thr:
+ ; RP-NEXT: 0 0
bb.0:
%0:sgpr_128 = IMPLICIT_DEF
bb.1:
@@ -223,7 +182,7 @@ body: |
; RPU-NEXT: 0 7
; RPU-NEXT: 0 7 %7:vgpr_32 = GLOBAL_LOAD_DWORD %5:vreg_64, 0, 0, implicit $exec
; RPU-NEXT: 0 6
- ; RPU-NEXT: 0 7 %8:vreg_64 = IMPLICIT_DEF
+ ; RPU-NEXT: 0 8 %8:vreg_64 = IMPLICIT_DEF
; RPU-NEXT: 0 7
; RPU-NEXT: 0 9 %9:vreg_64 = IMPLICIT_DEF
; RPU-NEXT: 0 9
@@ -262,7 +221,7 @@ body: |
; RPU-NEXT: 0 12
; RPU-NEXT: 0 12 dead %21:vgpr_32 = GLOBAL_LOAD_DWORD %14:vreg_64, 0, 0, implicit $exec
; RPU-NEXT: 0 10
- ; RPU-NEXT: 0 10 dead %22:vgpr_32 = GLOBAL_LOAD_DWORD %15:vreg_64, 0, 0, implicit $exec
+ ; RPU-NEXT: 0 11 dead %22:vgpr_32 = GLOBAL_LOAD_DWORD %15:vreg_64, 0, 0, implicit $exec
; RPU-NEXT: 0 10
; RPU-NEXT: 0 10 %23:vreg_64 = V_LSHLREV_B64_e64 2, %8:vreg_64, implicit $exec
; RPU-NEXT: 0 9
@@ -550,7 +509,7 @@ body: |
; RPU-NEXT: 0 0
; RPU-NEXT: 0 0 $sgpr0 = S_BUFFER_LOAD_DWORD_IMM $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0
; RPU-NEXT: 0 0
- ; RPU-NEXT: 0 0 undef %0.sub5:vreg_512 = V_MOV_B32_e32 5, implicit $exec
+ ; RPU-NEXT: 0 1 undef %0.sub5:vreg_512 = V_MOV_B32_e32 5, implicit $exec
; RPU-NEXT: 0 0
; RPU-NEXT: 0 0 S_CMP_GT_U32 $sgpr0, 15, implicit-def $scc
; RPU-NEXT: 0 0
@@ -569,7 +528,7 @@ body: |
; RPU-NEXT: 0 1
; RPU-NEXT: 0 1 $m0 = S_MOV_B32 killed $sgpr0
; RPU-NEXT: 0 1
- ; RPU-NEXT: 0 1 %0:vreg_512 = V_INDIRECT_REG_WRITE_MOVREL_B32_V16 %0:vreg_512(tied-def 0), 42, 3, implicit $m0, implicit $exec
+ ; RPU-NEXT: 0 16 %0:vreg_512 = V_INDIRECT_REG_WRITE_MOVREL_B32_V16 %0:vreg_512(tied-def 0), 42, 3, implicit $m0, implicit $exec
; RPU-NEXT: 0 1
; RPU-NEXT: Live-out: %0:0000000000000C00
; RPU-NEXT: Live-thr:
@@ -666,3 +625,70 @@ body: |
EXP_DONE 0, %49:vgpr_32, undef %51:vgpr_32, undef %53:vgpr_32, undef %55:vgpr_32, -1, 0, 1, implicit $exec
S_ENDPGM 0
...
+---
+name: early_clobber_def_used_on_rhs
+registers:
+ - { id: 0, class: vgpr_32 }
+body: |
+ ; RPU-LABEL: name: early_clobber_def_used_on_rhs
+ ; RPU: bb.0:
+ ; RPU-NEXT: Live-in:
+ ; RPU-NEXT: SGPR VGPR
+ ; RPU-NEXT: 0 0
+ ; RPU-NEXT: 0 1 dead %3:vgpr_32 = COPY $vgpr0
+ ; RPU-NEXT: 0 0
+ ; RPU-NEXT: 0 1 early-clobber %2:vgpr_32 = COPY %0:vgpr_32
+ ; RPU-NEXT: 0 1
+ ; RPU-NEXT: 0 1 S_NOP 0, implicit %2:vgpr_32
+ ; RPU-NEXT: 0 0
+ ; RPU-NEXT: Live-out:
+ ; RPU-NEXT: Live-thr:
+ ; RPU-NEXT: 0 0
+ ; RPU-NEXT: bb.1:
+ ; RPU-NEXT: Live-in:
+ ; RPU-NEXT: SGPR VGPR
+ ; RPU-NEXT: 0 0
+ ; RPU-NEXT: 0 1 dead %1:vgpr_32 = COPY $vgpr0
+ ; RPU-NEXT: 0 0
+ ; RPU-NEXT: 0 1 dead %0:vgpr_32 = COPY $vgpr0
+ ; RPU-NEXT: 0 0
+ ; RPU-NEXT: Live-out:
+ ; RPU-NEXT: Live-thr:
+ ; RPU-NEXT: 0 0
+ ;
+ ; RPD-LABEL: name: early_clobber_def_used_on_rhs
+ ; RPD: bb.0:
+ ; RPD-NEXT: Live-in:
+ ; RPD-NEXT: SGPR VGPR
+ ; RPD-NEXT: 0 0
+ ; RPD-NEXT: 0 1 dead %3:vgpr_32 = COPY $vgpr0
+ ; RPD-NEXT: 0 0
+ ; RPD-NEXT: 0 1 early-clobber %2:vgpr_32 = COPY %0:vgpr_32
+ ; RPD-NEXT: 0 0
+ ; RPD-NEXT: 0 0 S_NOP 0, implicit %2:vgpr_32
+ ; RPD-NEXT: 0 -1
+ ; RPD-NEXT: Live-out:
+ ; RPD-NEXT: mis LIS:
+ ; RPD-NEXT: Live-thr:
+ ; RPD-NEXT: 0 0
+ ; RPD-NEXT: bb.1:
+ ; RPD-NEXT: Live-in:
+ ; RPD-NEXT: SGPR VGPR
+ ; RPD-NEXT: 0 0
+ ; RPD-NEXT: 0 1 dead %1:vgpr_32 = COPY $vgpr0
+ ; RPD-NEXT: 0 0
+ ; RPD-NEXT: 0 1 dead %0:vgpr_32 = COPY $vgpr0
+ ; RPD-NEXT: 0 0
+ ; RPD-NEXT: Live-out:
+ ; RPD-NEXT: Live-thr:
+ ; RPD-NEXT: 0 0
+ bb.0:
+ liveins: $vgpr0
+ %0 = COPY $vgpr0
+ early-clobber %0 = COPY %0
+ S_NOP 0, implicit %0
+ bb.1:
+ liveins: $vgpr0
+ %0 = COPY $vgpr0
+ %0 = COPY $vgpr0 ; Force isSSA = false
+...
``````````
</details>
https://github.com/llvm/llvm-project/pull/74328
More information about the llvm-commits
mailing list