[llvm] [RegisterScavenging] Respect early-clobber when scavenging registers (PR #184814)

Dominik Steenken via llvm-commits llvm-commits at lists.llvm.org
Fri Mar 6 03:18:08 PST 2026


https://github.com/dominik-steenken updated https://github.com/llvm/llvm-project/pull/184814

>From 7a16a2363ce484da177fc56a9b27958fab50b4f8 Mon Sep 17 00:00:00 2001
From: Dominik Steenken <dost at de.ibm.com>
Date: Mon, 19 Jan 2026 11:42:30 +0100
Subject: [PATCH 1/4] [RegisterScavenging] Respect early-clobber when
 scavenging registers

When scavenging registers backwards for virtual registers introduced
during frame index elimination, the register scavenger was ignoring
early-clobber constraints on the instruction using the scavenged
register. This could lead to assigning a virtual register to a physical
register marked as early-clobber output, violating the constraint that
early-clobber outputs cannot overlap with inputs.

Fix by filtering the allocation order to exclude registers that overlap
with any early-clobber defs in the using instruction before calling
findSurvivorBackwards().

Add test to validate the fix.

Fixes #172511
---
 llvm/lib/CodeGen/RegisterScavenging.cpp       | 26 ++++++++++-
 .../SystemZ/scavenge-clobbered-reg.mir        | 45 +++++++++++++++++++
 2 files changed, 69 insertions(+), 2 deletions(-)
 create mode 100644 llvm/test/CodeGen/SystemZ/scavenge-clobbered-reg.mir

diff --git a/llvm/lib/CodeGen/RegisterScavenging.cpp b/llvm/lib/CodeGen/RegisterScavenging.cpp
index bcac08ba322a8..b7369dab20c0e 100644
--- a/llvm/lib/CodeGen/RegisterScavenging.cpp
+++ b/llvm/lib/CodeGen/RegisterScavenging.cpp
@@ -299,10 +299,32 @@ Register RegScavenger::scavengeRegisterBackwards(const TargetRegisterClass &RC,
   const MachineBasicBlock &MBB = *To->getParent();
   const MachineFunction &MF = *MBB.getParent();
 
+  // Obtain a list of candidate registers in allocation order of RC.
+  // If the instruction at MBBI has any early-clobber def regs, we must exclude
+  // them from the candidates, without including the whole of that instruction's
+  // constraints. We achieve this by filtering the allocation order of RC.
+  // First, determine if there are any such early-clobber def regs.
+  SmallVector<MCPhysReg> FilteredAllocationOrder;
+  SmallVector<MCPhysReg> ECDefs;
+  for (const MachineOperand &Op : MBBI->operands())
+    if (Op.isReg() && Op.isDef() && Op.isEarlyClobber())
+      ECDefs.push_back(Op.getReg());
+  if (!ECDefs.empty()) {
+    // if so, obtain the filtered version.
+    for (MCPhysReg Reg : RC.getRawAllocationOrder(MF)) {
+      // Only add Reg if it does not overlap with any element of ECDefs.
+      if (!llvm::any_of(ECDefs, [&](MCPhysReg ECReg) {
+            return TRI->regsOverlap(Reg, ECReg);
+          }))
+        FilteredAllocationOrder.push_back(Reg);
+    }
+  } else
+    FilteredAllocationOrder =
+        SmallVector<MCPhysReg>(RC.getRawAllocationOrder(MF));
+
   // Find the register whose use is furthest away.
-  ArrayRef<MCPhysReg> AllocationOrder = RC.getRawAllocationOrder(MF);
   std::pair<MCPhysReg, MachineBasicBlock::iterator> P = findSurvivorBackwards(
-      *MRI, std::prev(MBBI), To, LiveUnits, AllocationOrder, RestoreAfter);
+      *MRI, std::prev(MBBI), To, LiveUnits, FilteredAllocationOrder, RestoreAfter);
   MCPhysReg Reg = P.first;
   MachineBasicBlock::iterator SpillBefore = P.second;
   // Found an available register?
diff --git a/llvm/test/CodeGen/SystemZ/scavenge-clobbered-reg.mir b/llvm/test/CodeGen/SystemZ/scavenge-clobbered-reg.mir
new file mode 100644
index 0000000000000..72be469d5fa95
--- /dev/null
+++ b/llvm/test/CodeGen/SystemZ/scavenge-clobbered-reg.mir
@@ -0,0 +1,45 @@
+# RUN: llc %s -mtriple=s390x-ibm-linux -mcpu=z15 -run-pass=prologepilog -o - | FileCheck %s
+# CHECK: STG killed $r2d, $r15d, 168
+# CHECK-NOT: $r1d = LAY $r11d, 4096, $noreg
+# CHECK: $r2d = LAY $r15d, 4096
+# CHECK: $r2d = LG $r15d, 168
+--- |
+  ; ModuleID = 'repro.ll'
+  target datalayout = "E-m:e-i1:8:16-i8:8:16-i64:64-f128:64-v128:64-a:8:16-n32:64"
+  target triple = "s390x-unknown-linux-gnu"
+  
+  define dso_local i64 @repro(i64 noundef %0, i64 noundef %1) local_unnamed_addr {
+    %3 = alloca [16384 x i8], align 1
+    %4 = getelementptr inbounds i8, ptr %3, i64 5000
+    %5 = call i64 asm sideeffect "  lg $0, $1\0A\09", "=&r,*m,~{r3},~{r4},~{r5},~{r6},~{r7},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{cc},~{memory}"(ptr nonnull elementtype(i8) %4)
+    %6 = add i64 %1, %0
+    %7 = add i64 %6, %5
+    ret i64 %7
+  }
+...
+---
+name:            repro
+alignment:       16
+
+tracksRegLiveness: true
+registers:       []
+liveins:
+  - { reg: '$r2d', virtual-reg: '' }
+  - { reg: '$r3d', virtual-reg: '' }
+frameInfo:
+  stackSize:       0
+  maxAlignment:    1
+stack:
+  - { id: 0, name: '', type: default, offset: 0, size: 16384, alignment: 1, 
+      stack-id: default, callee-saved-register: '', callee-saved-restored: true, 
+      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+body:             |
+  bb.0 (%ir-block.2):
+    liveins: $r2d, $r3d
+  
+    renamable $r0d = COPY $r3d
+    INLINEASM &"  lg $0, $1\0A\09", 25 /* sideeffect mayload maystore attdialect */, 1179659 /* regdef-ec:GR64Bit */, def early-clobber renamable $r1d, 262174 /* mem:m */, %stack.0, 5000, $noreg, 12 /* clobber */, implicit-def dead early-clobber $r3d, 12 /* clobber */, implicit-def dead early-clobber $r4d, 12 /* clobber */, implicit-def dead early-clobber $r5d, 12 /* clobber */, implicit-def dead early-clobber $r6d, 12 /* clobber */, implicit-def dead early-clobber $r7d, 12 /* clobber */, implicit-def dead early-clobber $r8d, 12 /* clobber */, implicit-def dead early-clobber $r9d, 12 /* clobber */, implicit-def dead early-clobber $r10d, 12 /* clobber */, implicit-def dead early-clobber $r11d, 12 /* clobber */, implicit-def dead early-clobber $r12d, 12 /* clobber */, implicit-def dead early-clobber $r13d, 12 /* clobber */, implicit-def dead early-clobber $r14d, 12 /* clobber */, implicit-def dead early-clobber $cc
+    renamable $r2d = AGR killed renamable $r2d, killed renamable $r0d, implicit-def dead $cc
+    renamable $r2d = AGR killed renamable $r2d, killed renamable $r1d, implicit-def dead $cc
+    Return implicit $r2d
+...

>From 1be210de68f9d3d74e2a422aa8446e189a6ebc05 Mon Sep 17 00:00:00 2001
From: Dominik Steenken <dost at de.ibm.com>
Date: Fri, 6 Mar 2026 10:36:13 +0100
Subject: [PATCH 2/4] formatting

---
 llvm/lib/CodeGen/RegisterScavenging.cpp | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/CodeGen/RegisterScavenging.cpp b/llvm/lib/CodeGen/RegisterScavenging.cpp
index b7369dab20c0e..36b56d123047d 100644
--- a/llvm/lib/CodeGen/RegisterScavenging.cpp
+++ b/llvm/lib/CodeGen/RegisterScavenging.cpp
@@ -323,8 +323,9 @@ Register RegScavenger::scavengeRegisterBackwards(const TargetRegisterClass &RC,
         SmallVector<MCPhysReg>(RC.getRawAllocationOrder(MF));
 
   // Find the register whose use is furthest away.
-  std::pair<MCPhysReg, MachineBasicBlock::iterator> P = findSurvivorBackwards(
-      *MRI, std::prev(MBBI), To, LiveUnits, FilteredAllocationOrder, RestoreAfter);
+  std::pair<MCPhysReg, MachineBasicBlock::iterator> P =
+      findSurvivorBackwards(*MRI, std::prev(MBBI), To, LiveUnits,
+                            FilteredAllocationOrder, RestoreAfter);
   MCPhysReg Reg = P.first;
   MachineBasicBlock::iterator SpillBefore = P.second;
   // Found an available register?

>From 0a55e97c0aa08a465ff5dc01b64c36753e4eb8b1 Mon Sep 17 00:00:00 2001
From: Dominik Steenken <dost at de.ibm.com>
Date: Fri, 6 Mar 2026 12:17:24 +0100
Subject: [PATCH 3/4] Avoid sentinel iterators

---
 llvm/lib/CodeGen/RegisterScavenging.cpp | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/CodeGen/RegisterScavenging.cpp b/llvm/lib/CodeGen/RegisterScavenging.cpp
index 36b56d123047d..4756d9d20b2b0 100644
--- a/llvm/lib/CodeGen/RegisterScavenging.cpp
+++ b/llvm/lib/CodeGen/RegisterScavenging.cpp
@@ -306,11 +306,12 @@ Register RegScavenger::scavengeRegisterBackwards(const TargetRegisterClass &RC,
   // First, determine if there are any such early-clobber def regs.
   SmallVector<MCPhysReg> FilteredAllocationOrder;
   SmallVector<MCPhysReg> ECDefs;
-  for (const MachineOperand &Op : MBBI->operands())
-    if (Op.isReg() && Op.isDef() && Op.isEarlyClobber())
-      ECDefs.push_back(Op.getReg());
+  if ((MBBI != MBB.end()) && (MBBI != MBB.begin()))
+    for (const MachineOperand &Op : MBBI->operands())
+      if (Op.isReg() && Op.isDef() && Op.isEarlyClobber())
+        ECDefs.push_back(Op.getReg());
   if (!ECDefs.empty()) {
-    // if so, obtain the filtered version.
+    // If so, obtain the filtered version.
     for (MCPhysReg Reg : RC.getRawAllocationOrder(MF)) {
       // Only add Reg if it does not overlap with any element of ECDefs.
       if (!llvm::any_of(ECDefs, [&](MCPhysReg ECReg) {

>From 8c83343d1bea2a9983296b91b121cab5eae151cb Mon Sep 17 00:00:00 2001
From: Dominik Steenken <dost at de.ibm.com>
Date: Fri, 6 Mar 2026 12:17:42 +0100
Subject: [PATCH 4/4] update AMDGPU test

---
 llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll b/llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll
index e7254eb5c3465..ce15560ac85ec 100644
--- a/llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll
+++ b/llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll
@@ -1151,9 +1151,9 @@ define void @scratch_reg_needed_mubuf_offset(ptr addrspace(5) byval([4096 x i8])
 ; FLATSCR-NEXT:    s_addk_i32 s32, 0x100c
 ; FLATSCR-NEXT:    v_writelane_b32 v40, s30, 0
 ; FLATSCR-NEXT:    v_mov_b32_e32 v0, 0
-; FLATSCR-NEXT:    s_add_i32 s0, s33, 0x1000
+; FLATSCR-NEXT:    s_add_i32 s41, s33, 0x1000
 ; FLATSCR-NEXT:    v_writelane_b32 v40, s31, 1
-; FLATSCR-NEXT:    scratch_store_dword off, v0, s0
+; FLATSCR-NEXT:    scratch_store_dword off, v0, s41
 ; FLATSCR-NEXT:    s_waitcnt vmcnt(0)
 ; FLATSCR-NEXT:    ;;#ASMSTART
 ; FLATSCR-NEXT:    ; clobber nonpreserved SGPRs



More information about the llvm-commits mailing list