[llvm] [RegisterScavenging] Respect early-clobber when scavenging registers (PR #184814)
Dominik Steenken via llvm-commits
llvm-commits at lists.llvm.org
Fri Mar 6 03:18:08 PST 2026
https://github.com/dominik-steenken updated https://github.com/llvm/llvm-project/pull/184814
>From 7a16a2363ce484da177fc56a9b27958fab50b4f8 Mon Sep 17 00:00:00 2001
From: Dominik Steenken <dost at de.ibm.com>
Date: Mon, 19 Jan 2026 11:42:30 +0100
Subject: [PATCH 1/4] [RegisterScavenging] Respect early-clobber when
scavenging registers
When scavenging registers backwards for virtual registers introduced
during frame index elimination, the register scavenger was ignoring
early-clobber constraints on the instruction using the scavenged
register. This could lead to assigning a virtual register to a physical
register marked as early-clobber output, violating the constraint that
early-clobber outputs cannot overlap with inputs.
Fix by filtering the allocation order to exclude registers that overlap
with any early-clobber defs in the using instruction before calling
findSurvivorBackwards().
Add test to validate the fix.
Fixes #172511
---
llvm/lib/CodeGen/RegisterScavenging.cpp | 26 ++++++++++-
.../SystemZ/scavenge-clobbered-reg.mir | 45 +++++++++++++++++++
2 files changed, 69 insertions(+), 2 deletions(-)
create mode 100644 llvm/test/CodeGen/SystemZ/scavenge-clobbered-reg.mir
diff --git a/llvm/lib/CodeGen/RegisterScavenging.cpp b/llvm/lib/CodeGen/RegisterScavenging.cpp
index bcac08ba322a8..b7369dab20c0e 100644
--- a/llvm/lib/CodeGen/RegisterScavenging.cpp
+++ b/llvm/lib/CodeGen/RegisterScavenging.cpp
@@ -299,10 +299,32 @@ Register RegScavenger::scavengeRegisterBackwards(const TargetRegisterClass &RC,
const MachineBasicBlock &MBB = *To->getParent();
const MachineFunction &MF = *MBB.getParent();
+ // Obtain a list of candidate registers in allocation order of RC.
+ // If the instruction at MBBI has any early-clobber def regs, we must exclude
+ // them from the candidates, without including the whole of that instruction's
+ // constraints. We achieve this by filtering the allocation order of RC.
+ // First, determine if there are any such early-clobber def regs.
+ SmallVector<MCPhysReg> FilteredAllocationOrder;
+ SmallVector<MCPhysReg> ECDefs;
+ for (const MachineOperand &Op : MBBI->operands())
+ if (Op.isReg() && Op.isDef() && Op.isEarlyClobber())
+ ECDefs.push_back(Op.getReg());
+ if (!ECDefs.empty()) {
+ // if so, obtain the filtered version.
+ for (MCPhysReg Reg : RC.getRawAllocationOrder(MF)) {
+ // Only add Reg if it does not overlap with any element of ECDefs.
+ if (!llvm::any_of(ECDefs, [&](MCPhysReg ECReg) {
+ return TRI->regsOverlap(Reg, ECReg);
+ }))
+ FilteredAllocationOrder.push_back(Reg);
+ }
+ } else
+ FilteredAllocationOrder =
+ SmallVector<MCPhysReg>(RC.getRawAllocationOrder(MF));
+
// Find the register whose use is furthest away.
- ArrayRef<MCPhysReg> AllocationOrder = RC.getRawAllocationOrder(MF);
std::pair<MCPhysReg, MachineBasicBlock::iterator> P = findSurvivorBackwards(
- *MRI, std::prev(MBBI), To, LiveUnits, AllocationOrder, RestoreAfter);
+ *MRI, std::prev(MBBI), To, LiveUnits, FilteredAllocationOrder, RestoreAfter);
MCPhysReg Reg = P.first;
MachineBasicBlock::iterator SpillBefore = P.second;
// Found an available register?
diff --git a/llvm/test/CodeGen/SystemZ/scavenge-clobbered-reg.mir b/llvm/test/CodeGen/SystemZ/scavenge-clobbered-reg.mir
new file mode 100644
index 0000000000000..72be469d5fa95
--- /dev/null
+++ b/llvm/test/CodeGen/SystemZ/scavenge-clobbered-reg.mir
@@ -0,0 +1,45 @@
+# RUN: llc %s -mtriple=s390x-ibm-linux -mcpu=z15 -run-pass=prologepilog -o - | FileCheck %s
+# CHECK: STG killed $r2d, $r15d, 168
+# CHECK-NOT: $r1d = LAY $r11d, 4096, $noreg
+# CHECK: $r2d = LAY $r15d, 4096
+# CHECK: $r2d = LG $r15d, 168
+--- |
+ ; ModuleID = 'repro.ll'
+ target datalayout = "E-m:e-i1:8:16-i8:8:16-i64:64-f128:64-v128:64-a:8:16-n32:64"
+ target triple = "s390x-unknown-linux-gnu"
+
+ define dso_local i64 @repro(i64 noundef %0, i64 noundef %1) local_unnamed_addr {
+ %3 = alloca [16384 x i8], align 1
+ %4 = getelementptr inbounds i8, ptr %3, i64 5000
+ %5 = call i64 asm sideeffect " lg $0, $1\0A\09", "=&r,*m,~{r3},~{r4},~{r5},~{r6},~{r7},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{cc},~{memory}"(ptr nonnull elementtype(i8) %4)
+ %6 = add i64 %1, %0
+ %7 = add i64 %6, %5
+ ret i64 %7
+ }
+...
+---
+name: repro
+alignment: 16
+
+tracksRegLiveness: true
+registers: []
+liveins:
+ - { reg: '$r2d', virtual-reg: '' }
+ - { reg: '$r3d', virtual-reg: '' }
+frameInfo:
+ stackSize: 0
+ maxAlignment: 1
+stack:
+ - { id: 0, name: '', type: default, offset: 0, size: 16384, alignment: 1,
+ stack-id: default, callee-saved-register: '', callee-saved-restored: true,
+ debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+body: |
+ bb.0 (%ir-block.2):
+ liveins: $r2d, $r3d
+
+ renamable $r0d = COPY $r3d
+ INLINEASM &" lg $0, $1\0A\09", 25 /* sideeffect mayload maystore attdialect */, 1179659 /* regdef-ec:GR64Bit */, def early-clobber renamable $r1d, 262174 /* mem:m */, %stack.0, 5000, $noreg, 12 /* clobber */, implicit-def dead early-clobber $r3d, 12 /* clobber */, implicit-def dead early-clobber $r4d, 12 /* clobber */, implicit-def dead early-clobber $r5d, 12 /* clobber */, implicit-def dead early-clobber $r6d, 12 /* clobber */, implicit-def dead early-clobber $r7d, 12 /* clobber */, implicit-def dead early-clobber $r8d, 12 /* clobber */, implicit-def dead early-clobber $r9d, 12 /* clobber */, implicit-def dead early-clobber $r10d, 12 /* clobber */, implicit-def dead early-clobber $r11d, 12 /* clobber */, implicit-def dead early-clobber $r12d, 12 /* clobber */, implicit-def dead early-clobber $r13d, 12 /* clobber */, implicit-def dead early-clobber $r14d, 12 /* clobber */, implicit-def dead early-clobber $cc
+ renamable $r2d = AGR killed renamable $r2d, killed renamable $r0d, implicit-def dead $cc
+ renamable $r2d = AGR killed renamable $r2d, killed renamable $r1d, implicit-def dead $cc
+ Return implicit $r2d
+...
>From 1be210de68f9d3d74e2a422aa8446e189a6ebc05 Mon Sep 17 00:00:00 2001
From: Dominik Steenken <dost at de.ibm.com>
Date: Fri, 6 Mar 2026 10:36:13 +0100
Subject: [PATCH 2/4] formatting
---
llvm/lib/CodeGen/RegisterScavenging.cpp | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/CodeGen/RegisterScavenging.cpp b/llvm/lib/CodeGen/RegisterScavenging.cpp
index b7369dab20c0e..36b56d123047d 100644
--- a/llvm/lib/CodeGen/RegisterScavenging.cpp
+++ b/llvm/lib/CodeGen/RegisterScavenging.cpp
@@ -323,8 +323,9 @@ Register RegScavenger::scavengeRegisterBackwards(const TargetRegisterClass &RC,
SmallVector<MCPhysReg>(RC.getRawAllocationOrder(MF));
// Find the register whose use is furthest away.
- std::pair<MCPhysReg, MachineBasicBlock::iterator> P = findSurvivorBackwards(
- *MRI, std::prev(MBBI), To, LiveUnits, FilteredAllocationOrder, RestoreAfter);
+ std::pair<MCPhysReg, MachineBasicBlock::iterator> P =
+ findSurvivorBackwards(*MRI, std::prev(MBBI), To, LiveUnits,
+ FilteredAllocationOrder, RestoreAfter);
MCPhysReg Reg = P.first;
MachineBasicBlock::iterator SpillBefore = P.second;
// Found an available register?
>From 0a55e97c0aa08a465ff5dc01b64c36753e4eb8b1 Mon Sep 17 00:00:00 2001
From: Dominik Steenken <dost at de.ibm.com>
Date: Fri, 6 Mar 2026 12:17:24 +0100
Subject: [PATCH 3/4] Avoid sentinel iterators
---
llvm/lib/CodeGen/RegisterScavenging.cpp | 9 +++++----
1 file changed, 5 insertions(+), 4 deletions(-)
diff --git a/llvm/lib/CodeGen/RegisterScavenging.cpp b/llvm/lib/CodeGen/RegisterScavenging.cpp
index 36b56d123047d..4756d9d20b2b0 100644
--- a/llvm/lib/CodeGen/RegisterScavenging.cpp
+++ b/llvm/lib/CodeGen/RegisterScavenging.cpp
@@ -306,11 +306,12 @@ Register RegScavenger::scavengeRegisterBackwards(const TargetRegisterClass &RC,
// First, determine if there are any such early-clobber def regs.
SmallVector<MCPhysReg> FilteredAllocationOrder;
SmallVector<MCPhysReg> ECDefs;
- for (const MachineOperand &Op : MBBI->operands())
- if (Op.isReg() && Op.isDef() && Op.isEarlyClobber())
- ECDefs.push_back(Op.getReg());
+ if ((MBBI != MBB.end()) && (MBBI != MBB.begin()))
+ for (const MachineOperand &Op : MBBI->operands())
+ if (Op.isReg() && Op.isDef() && Op.isEarlyClobber())
+ ECDefs.push_back(Op.getReg());
if (!ECDefs.empty()) {
- // if so, obtain the filtered version.
+ // If so, obtain the filtered version.
for (MCPhysReg Reg : RC.getRawAllocationOrder(MF)) {
// Only add Reg if it does not overlap with any element of ECDefs.
if (!llvm::any_of(ECDefs, [&](MCPhysReg ECReg) {
>From 8c83343d1bea2a9983296b91b121cab5eae151cb Mon Sep 17 00:00:00 2001
From: Dominik Steenken <dost at de.ibm.com>
Date: Fri, 6 Mar 2026 12:17:42 +0100
Subject: [PATCH 4/4] update AMDGPU test
---
llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll b/llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll
index e7254eb5c3465..ce15560ac85ec 100644
--- a/llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll
+++ b/llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll
@@ -1151,9 +1151,9 @@ define void @scratch_reg_needed_mubuf_offset(ptr addrspace(5) byval([4096 x i8])
; FLATSCR-NEXT: s_addk_i32 s32, 0x100c
; FLATSCR-NEXT: v_writelane_b32 v40, s30, 0
; FLATSCR-NEXT: v_mov_b32_e32 v0, 0
-; FLATSCR-NEXT: s_add_i32 s0, s33, 0x1000
+; FLATSCR-NEXT: s_add_i32 s41, s33, 0x1000
; FLATSCR-NEXT: v_writelane_b32 v40, s31, 1
-; FLATSCR-NEXT: scratch_store_dword off, v0, s0
+; FLATSCR-NEXT: scratch_store_dword off, v0, s41
; FLATSCR-NEXT: s_waitcnt vmcnt(0)
; FLATSCR-NEXT: ;;#ASMSTART
; FLATSCR-NEXT: ; clobber nonpreserved SGPRs
More information about the llvm-commits
mailing list