[llvm] [WIP][CodeGen] Encode liveness for COPY instructions after virtRegRewriter pass. (PR #151123)
Vikash Gupta via llvm-commits
llvm-commits at lists.llvm.org
Mon Aug 4 04:26:30 PDT 2025
https://github.com/vg0204 updated https://github.com/llvm/llvm-project/pull/151123
>From 8cb9a5e039fbbe1735e8ca8224363b13e6a431a8 Mon Sep 17 00:00:00 2001
From: vikashgu <Vikash.Gupta at amd.com>
Date: Fri, 25 Jul 2025 08:37:53 +0000
Subject: [PATCH 1/2] [CodeGen]Encode liveness for copy used MO after
virtRegRewriter.
As for the Greedy RA, the virtRegRewriter pass is the last place
that holds livenes info, even at subregister level. So, now that
information can be extracted and encoded on COPY instruction.
This information for COPY can later be used to identify partially
live regsiters precisely, assuming the liveness information used is
not invalidated by any kind if IR muatation later.
---
llvm/include/llvm/Target/Target.td | 2 +-
llvm/lib/CodeGen/VirtRegMap.cpp | 89 ++++++++++++++++++-
.../greedy-alloc-fail-sgpr1024-spill.mir | 4 +-
.../ran-out-of-sgprs-allocation-failure.mir | 4 +-
4 files changed, 93 insertions(+), 6 deletions(-)
diff --git a/llvm/include/llvm/Target/Target.td b/llvm/include/llvm/Target/Target.td
index 4c83f8a580aa0..1f125c2cf87de 100644
--- a/llvm/include/llvm/Target/Target.td
+++ b/llvm/include/llvm/Target/Target.td
@@ -1323,7 +1323,7 @@ def REG_SEQUENCE : StandardPseudoInstruction {
}
def COPY : StandardPseudoInstruction {
let OutOperandList = (outs unknown:$dst);
- let InOperandList = (ins unknown:$src);
+ let InOperandList = (ins unknown:$src, variable_ops);
let AsmString = "";
let hasSideEffects = false;
let isAsCheapAsAMove = true;
diff --git a/llvm/lib/CodeGen/VirtRegMap.cpp b/llvm/lib/CodeGen/VirtRegMap.cpp
index 99ba893d6f096..227c0ae813934 100644
--- a/llvm/lib/CodeGen/VirtRegMap.cpp
+++ b/llvm/lib/CodeGen/VirtRegMap.cpp
@@ -213,6 +213,8 @@ class VirtRegRewriter {
void rewrite();
void addMBBLiveIns();
bool readsUndefSubreg(const MachineOperand &MO) const;
+ uint64_t calcLiveRegUnitMask(const MachineOperand &MO,
+ MCRegister PhysReg) const;
void addLiveInsForSubRanges(const LiveInterval &LI, MCRegister PhysReg) const;
void handleIdentityCopy(MachineInstr &MI);
void expandCopyBundle(MachineInstr &MI) const;
@@ -474,6 +476,77 @@ bool VirtRegRewriter::readsUndefSubreg(const MachineOperand &MO) const {
return true;
}
+// Return LaneBitmask value as unint64_t for PhysReg assigned to MO,
+// representing its live register units at its parent MI. In case of undef or
+// fully live MO, return 0u.
+uint64_t VirtRegRewriter::calcLiveRegUnitMask(const MachineOperand &MO,
+ MCRegister PhysReg) const {
+ Register Reg = MO.getReg();
+ const LiveInterval &LI = LIS->getInterval(Reg);
+ const MachineInstr &MI = *MO.getParent();
+ SlotIndex MIIndex = LIS->getInstructionIndex(MI);
+ unsigned SubRegIdx = MO.getSubReg();
+ LaneBitmask UseMask = SubRegIdx
+ ? TRI->getSubRegIndexLaneMask(SubRegIdx)
+ : (Reg.isVirtual() ? MRI->getMaxLaneMaskForVReg(Reg)
+ : LaneBitmask::getNone());
+
+ LaneBitmask LiveRegUnitMask;
+ DenseSet<unsigned> LiveRegUnits;
+
+ // dbgs() << "\n********** " << printReg(Reg, TRI) << "[ " <<
+ // printReg(PhysReg, TRI) << " ]" << " **********\n";
+
+ if (MO.isUndef())
+ return 0u;
+
+ assert(LI.liveAt(MIIndex) &&
+ "Reads of completely dead register should be marked undef already");
+
+ if (LI.hasSubRanges()) {
+ for (MCRegUnitMaskIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
+ unsigned Unit = (*Units).first;
+ LaneBitmask Mask = (*Units).second;
+ for (const LiveInterval::SubRange &S : LI.subranges()) {
+ if ((S.LaneMask & UseMask & Mask).any() && S.liveAt(MIIndex)) {
+ LiveRegUnits.insert(Unit);
+ }
+ }
+ }
+ } else {
+ for (MCRegUnitMaskIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
+ unsigned Unit = (*Units).first;
+ const LiveRange &UnitRange = LIS->getRegUnit(Unit);
+ LaneBitmask Mask = (*Units).second;
+
+ if (UnitRange.liveAt(MIIndex) && (UseMask & Mask).any())
+ LiveRegUnits.insert(Unit);
+ }
+ }
+
+ // Consider the exact subregister & create new UseMask as per the RC for it.
+ if (SubRegIdx != 0) {
+ PhysReg = TRI->getSubReg(PhysReg, SubRegIdx);
+ UseMask = (TRI->getMinimalPhysRegClass(PhysReg))->getLaneMask();
+ }
+
+ for (MCRegUnitMaskIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
+ unsigned Unit = (*Units).first;
+ LaneBitmask Mask = (*Units).second;
+ if (LiveRegUnits.count(Unit)) {
+ // dbgs() << "LIVE DEF UNIT : " << printRegUnit(Unit, TRI) << '\n';
+ LiveRegUnitMask |= Mask;
+ }
+ }
+
+ // dbgs() << "UseMask : " << PrintLaneMask(UseMask) << '\n';
+ // dbgs() << "LiveRegUnitMask : " << PrintLaneMask(LiveRegUnitMask) << '\n';
+ if (UseMask == LiveRegUnitMask)
+ return 0u;
+
+ return LiveRegUnitMask.getAsInteger();
+}
+
void VirtRegRewriter::handleIdentityCopy(MachineInstr &MI) {
if (!MI.isIdentityCopy())
return;
@@ -495,7 +568,11 @@ void VirtRegRewriter::handleIdentityCopy(MachineInstr &MI) {
// give us additional liveness information: The target (super-)register
// must not be valid before this point. Replace the COPY with a KILL
// instruction to maintain this information.
- if (MI.getOperand(1).isUndef() || MI.getNumOperands() > 2) {
+
+ // Avoid COPY with an exact 3 operand, wiith third operand be Mask, as
+ // it same as a COPY with no additional liveness information.
+ if (MI.getOperand(1).isUndef() || MI.getNumOperands() > 3 ||
+ (MI.getNumOperands() == 3 && !MI.getOperand(2).isImm())) {
MI.setDesc(TII->get(TargetOpcode::KILL));
LLVM_DEBUG(dbgs() << " replace by: " << MI);
return;
@@ -641,11 +718,14 @@ void VirtRegRewriter::rewrite() {
SmallVector<Register, 8> SuperDeads;
SmallVector<Register, 8> SuperDefs;
SmallVector<Register, 8> SuperKills;
+ uint64_t Mask;
for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end();
MBBI != MBBE; ++MBBI) {
LLVM_DEBUG(MBBI->print(dbgs(), Indexes));
for (MachineInstr &MI : llvm::make_early_inc_range(MBBI->instrs())) {
+ // reset for each MI.
+ Mask = 0u;
for (MachineOperand &MO : MI.operands()) {
// Make sure MRI knows about registers clobbered by regmasks.
if (MO.isRegMask())
@@ -663,6 +743,9 @@ void VirtRegRewriter::rewrite() {
RewriteRegs.insert(PhysReg);
assert(!MRI->isReserved(PhysReg) && "Reserved register assignment");
+ if (MO.isUse() && MI.isCopy())
+ Mask = calcLiveRegUnitMask(MO, PhysReg);
+
// Preserve semantics of sub-register operands.
unsigned SubReg = MO.getSubReg();
if (SubReg != 0) {
@@ -739,6 +822,10 @@ void VirtRegRewriter::rewrite() {
MO.setIsRenamable(true);
}
+ // Add LaneBitmask as MO_Imm
+ if (MI.isCopy() && Mask)
+ MI.addOperand(*MF, MachineOperand::CreateImm(Mask));
+
// Add any missing super-register kills after rewriting the whole
// instruction.
while (!SuperKills.empty())
diff --git a/llvm/test/CodeGen/AMDGPU/greedy-alloc-fail-sgpr1024-spill.mir b/llvm/test/CodeGen/AMDGPU/greedy-alloc-fail-sgpr1024-spill.mir
index da1175c02e94a..965c31970404f 100644
--- a/llvm/test/CodeGen/AMDGPU/greedy-alloc-fail-sgpr1024-spill.mir
+++ b/llvm/test/CodeGen/AMDGPU/greedy-alloc-fail-sgpr1024-spill.mir
@@ -86,7 +86,7 @@ body: |
; CHECK-NEXT: liveins: $sgpr4_sgpr5, $sgpr64_sgpr65:0x000000000000000F
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: renamable $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95_sgpr96_sgpr97_sgpr98_sgpr99 = SI_SPILL_S1024_RESTORE %stack.0, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.0, align 4, addrspace 5)
- ; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_1024 = COPY renamable $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95_sgpr96_sgpr97_sgpr98_sgpr99
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_1024 = COPY renamable $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95_sgpr96_sgpr97_sgpr98_sgpr99, 4398046511103
; CHECK-NEXT: renamable $sgpr6 = S_LSHL_B32 renamable $sgpr65, 1, implicit-def dead $scc
; CHECK-NEXT: dead [[COPY:%[0-9]+]]:vreg_1024 = V_INDIRECT_REG_WRITE_GPR_IDX_B32_V32 [[COPY]], 0, killed $sgpr6, 3, implicit-def $m0, implicit $m0, implicit $exec
; CHECK-NEXT: {{ $}}
@@ -117,7 +117,7 @@ body: |
; CHECK-NEXT: renamable $sgpr55 = COPY renamable $sgpr68
; CHECK-NEXT: renamable $sgpr56 = COPY renamable $sgpr68
; CHECK-NEXT: renamable $sgpr57 = COPY killed renamable $sgpr68
- ; CHECK-NEXT: dead [[COPY1:%[0-9]+]]:vreg_1024 = COPY renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67, implicit $exec
+ ; CHECK-NEXT: dead [[COPY1:%[0-9]+]]:vreg_1024 = COPY renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67, 17592186044415, implicit $exec
; CHECK-NEXT: $exec = S_XOR_B64_term $exec, killed renamable $sgpr6_sgpr7, implicit-def $scc
; CHECK-NEXT: S_CBRANCH_EXECZ %bb.5, implicit $exec
; CHECK-NEXT: S_BRANCH %bb.1
diff --git a/llvm/test/CodeGen/AMDGPU/ran-out-of-sgprs-allocation-failure.mir b/llvm/test/CodeGen/AMDGPU/ran-out-of-sgprs-allocation-failure.mir
index 4a0bb6ceccd3f..09526ea5ac878 100644
--- a/llvm/test/CodeGen/AMDGPU/ran-out-of-sgprs-allocation-failure.mir
+++ b/llvm/test/CodeGen/AMDGPU/ran-out-of-sgprs-allocation-failure.mir
@@ -50,7 +50,7 @@ body: |
; CHECK-NEXT: renamable $sgpr56 = S_MOV_B32 0
; CHECK-NEXT: renamable $sgpr12_sgpr13 = V_CMP_EQ_U32_e64 undef $sgpr4, undef %18:vgpr_32, implicit $exec
; CHECK-NEXT: SI_SPILL_S64_SAVE killed renamable $sgpr12_sgpr13, %stack.4, implicit $exec, implicit $sgpr32 :: (store (s64) into %stack.4, align 4, addrspace 5)
- ; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_1024_align2 = COPY renamable $sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71, implicit $exec
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_1024_align2 = COPY renamable $sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71, 12884901888, implicit $exec
; CHECK-NEXT: renamable $sgpr100_sgpr101 = V_CMP_NE_U32_e64 1, undef %18:vgpr_32, implicit $exec
; CHECK-NEXT: renamable $sgpr57 = S_MOV_B32 1083786240
; CHECK-NEXT: SI_SPILL_S1024_SAVE renamable $sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71, %stack.1, implicit $exec, implicit $sgpr32 :: (store (s1024) into %stack.1, align 4, addrspace 5)
@@ -221,7 +221,7 @@ body: |
; CHECK-NEXT: dead $sgpr30_sgpr31 = SI_CALL undef renamable $sgpr12_sgpr13, 0, csr_amdgpu_gfx90ainsts, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
- ; CHECK-NEXT: $sgpr8_sgpr9 = COPY renamable $sgpr82_sgpr83
+ ; CHECK-NEXT: $sgpr8_sgpr9 = COPY renamable $sgpr82_sgpr83, 3
; CHECK-NEXT: dead $sgpr30_sgpr31 = SI_CALL undef renamable $sgpr12_sgpr13, 0, csr_amdgpu_gfx90ainsts, implicit $sgpr8_sgpr9
; CHECK-NEXT: renamable $sgpr18_sgpr19 = COPY killed renamable $sgpr48_sgpr49
; CHECK-NEXT: renamable $sgpr14 = COPY killed renamable $sgpr85
>From 3352e3495a6f1e4c85888075227bb259cca90646 Mon Sep 17 00:00:00 2001
From: Vikash Gupta <Vikash.Gupta at amd.com>
Date: Mon, 4 Aug 2025 16:56:21 +0530
Subject: [PATCH 2/2] Update llvm/lib/CodeGen/VirtRegMap.cpp
Co-authored-by: Matt Arsenault <arsenm2 at gmail.com>
---
llvm/lib/CodeGen/VirtRegMap.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/lib/CodeGen/VirtRegMap.cpp b/llvm/lib/CodeGen/VirtRegMap.cpp
index 227c0ae813934..611eada7fbd0c 100644
--- a/llvm/lib/CodeGen/VirtRegMap.cpp
+++ b/llvm/lib/CodeGen/VirtRegMap.cpp
@@ -476,7 +476,7 @@ bool VirtRegRewriter::readsUndefSubreg(const MachineOperand &MO) const {
return true;
}
-// Return LaneBitmask value as unint64_t for PhysReg assigned to MO,
+// Return LaneBitmask value as uint64_t for PhysReg assigned to MO,
// representing its live register units at its parent MI. In case of undef or
// fully live MO, return 0u.
uint64_t VirtRegRewriter::calcLiveRegUnitMask(const MachineOperand &MO,
More information about the llvm-commits
mailing list