[llvm-branch-commits] [llvm] Reapply "RegAlloc: Fix verifier error after failed allocation (#119690)" (PR #128400)
Matt Arsenault via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Sat Feb 22 21:16:28 PST 2025
https://github.com/arsenm created https://github.com/llvm/llvm-project/pull/128400
Reapply "RegAlloc: Fix verifier error after failed allocation (#119690)"
This reverts commit 0c50054820799578be8f62b6fd2cc3fbc751c01e.
Reapply with more fixes to avoid expensive_checks failures. Make sure to
call splitSeparateComponents after shrinkToUses, and update the VirtRegMap
with the split registers. Also set undef on all physical register aliases to
the assigned register.
Move physreg handling. Not sure if necessary
Remove intervals from regunits. Not sure if necessary
>From c8e70d9b94b799014f41264f67c3dd298c0e42e5 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Sat, 22 Feb 2025 10:02:04 +0700
Subject: [PATCH 1/3] Reapply "RegAlloc: Fix verifier error after failed
allocation (#119690)"
This reverts commit 0c50054820799578be8f62b6fd2cc3fbc751c01e.
Reapply with more fixes to avoid expensive_checks failures. Make sure to
call splitSeparateComponents after shrinkToUses, and update the VirtRegMap
with the split registers. Also set undef on all physical register aliases to
the assigned register.
---
llvm/lib/CodeGen/RegAllocBase.cpp | 55 +++++++++++++++++
llvm/lib/CodeGen/RegAllocBase.h | 6 ++
llvm/lib/CodeGen/RegAllocBasic.cpp | 1 +
llvm/lib/CodeGen/RegAllocGreedy.cpp | 1 +
.../AMDGPU/illegal-eviction-assert.mir | 6 +-
...-reg-class-snippet-copy-use-after-free.mir | 16 ++---
llvm/test/CodeGen/AMDGPU/issue48473.mir | 2 +-
...ster-killed-error-after-alloc-failure0.mir | 59 +++++++++++++++++++
...ister-killed-error-after-alloc-failure1.ll | 30 ++++++++++
.../remaining-virtual-register-operands.ll | 2 +-
10 files changed, 165 insertions(+), 13 deletions(-)
create mode 100644 llvm/test/CodeGen/AMDGPU/register-killed-error-after-alloc-failure0.mir
create mode 100644 llvm/test/CodeGen/AMDGPU/register-killed-error-after-alloc-failure1.ll
diff --git a/llvm/lib/CodeGen/RegAllocBase.cpp b/llvm/lib/CodeGen/RegAllocBase.cpp
index 50addcbcca065..d17e92da0d2c5 100644
--- a/llvm/lib/CodeGen/RegAllocBase.cpp
+++ b/llvm/lib/CodeGen/RegAllocBase.cpp
@@ -65,6 +65,7 @@ void RegAllocBase::init(VirtRegMap &vrm, LiveIntervals &lis,
Matrix = &mat;
MRI->freezeReservedRegs();
RegClassInfo.runOnMachineFunction(vrm.getMachineFunction());
+ FailedVRegs.clear();
}
// Visit all the live registers. If they are already assigned to a physical
@@ -128,6 +129,7 @@ void RegAllocBase::allocatePhysRegs() {
// Keep going after reporting the error.
VRM->assignVirt2Phys(VirtReg->reg(), AvailablePhysReg);
+ FailedVRegs.insert(VirtReg->reg());
} else if (AvailablePhysReg)
Matrix->assign(*VirtReg, AvailablePhysReg);
@@ -161,6 +163,59 @@ void RegAllocBase::postOptimization() {
DeadRemats.clear();
}
+void RegAllocBase::cleanupFailedVRegs() {
+ SmallSet<Register, 8> JunkRegs;
+
+ for (Register FailedReg : FailedVRegs) {
+ JunkRegs.insert(FailedReg);
+
+ MCRegister PhysReg = VRM->getPhys(FailedReg);
+ LiveInterval &FailedInterval = LIS->getInterval(FailedReg);
+
+ // The liveness information for the failed register and anything interfering
+ // with the physical register we arbitrarily chose is junk and needs to be
+ // deleted.
+ for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
+ LiveIntervalUnion::Query &Q = Matrix->query(FailedInterval, *Units);
+ for (const LiveInterval *InterferingReg : Q.interferingVRegs())
+ JunkRegs.insert(InterferingReg->reg());
+ }
+
+ // The liveness of the assigned physical register is also now unreliable.
+ for (MCRegAliasIterator Aliases(PhysReg, TRI, true); Aliases.isValid();
+ ++Aliases) {
+ for (MachineOperand &MO : MRI->reg_operands(*Aliases)) {
+ if (MO.readsReg())
+ MO.setIsUndef(true);
+ }
+ }
+ }
+
+ for (Register JunkReg : JunkRegs) {
+ MCRegister PhysReg = VRM->getPhys(JunkReg);
+ // We still should produce valid IR. Kill all the uses and reduce the live
+ // ranges so that we don't think it's possible to introduce kill flags
+ // later which will fail the verifier.
+ for (MachineOperand &MO : MRI->reg_operands(JunkReg)) {
+ if (MO.readsReg())
+ MO.setIsUndef(true);
+ }
+
+ LiveInterval &JunkLI = LIS->getInterval(JunkReg);
+ if (LIS->shrinkToUses(&JunkLI)) {
+ SmallVector<LiveInterval *, 8> SplitLIs;
+ LIS->splitSeparateComponents(JunkLI, SplitLIs);
+
+ VRM->grow();
+ Register Original = VRM->getOriginal(JunkReg);
+ for (LiveInterval *SplitLI : SplitLIs) {
+ VRM->setIsSplitFromReg(SplitLI->reg(), Original);
+ VRM->assignVirt2Phys(SplitLI->reg(), PhysReg);
+ }
+ }
+ }
+}
+
void RegAllocBase::enqueue(const LiveInterval *LI) {
const Register Reg = LI->reg();
diff --git a/llvm/lib/CodeGen/RegAllocBase.h b/llvm/lib/CodeGen/RegAllocBase.h
index 5bd52da61f2dc..1fdbab694bb0e 100644
--- a/llvm/lib/CodeGen/RegAllocBase.h
+++ b/llvm/lib/CodeGen/RegAllocBase.h
@@ -37,6 +37,7 @@
#define LLVM_LIB_CODEGEN_REGALLOCBASE_H
#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/RegAllocCommon.h"
#include "llvm/CodeGen/RegisterClassInfo.h"
@@ -81,6 +82,7 @@ class RegAllocBase {
/// always available for the remat of all the siblings of the original reg.
SmallPtrSet<MachineInstr *, 32> DeadRemats;
+ SmallSet<Register, 2> FailedVRegs;
RegAllocBase(const RegAllocFilterFunc F = nullptr)
: shouldAllocateRegisterImpl(F) {}
@@ -104,6 +106,10 @@ class RegAllocBase {
// rematerialization.
virtual void postOptimization();
+ /// Perform cleanups on registers that failed to allocate. This hacks on the
+ /// liveness in order to avoid spurious verifier errors in later passes.
+ void cleanupFailedVRegs();
+
// Get a temporary reference to a Spiller instance.
virtual Spiller &spiller() = 0;
diff --git a/llvm/lib/CodeGen/RegAllocBasic.cpp b/llvm/lib/CodeGen/RegAllocBasic.cpp
index 51e047b2fa3f0..d240bf916ac05 100644
--- a/llvm/lib/CodeGen/RegAllocBasic.cpp
+++ b/llvm/lib/CodeGen/RegAllocBasic.cpp
@@ -329,6 +329,7 @@ bool RABasic::runOnMachineFunction(MachineFunction &mf) {
allocatePhysRegs();
postOptimization();
+ cleanupFailedVRegs();
// Diagnostic output before rewriting
LLVM_DEBUG(dbgs() << "Post alloc VirtRegMap:\n" << *VRM << "\n");
diff --git a/llvm/lib/CodeGen/RegAllocGreedy.cpp b/llvm/lib/CodeGen/RegAllocGreedy.cpp
index f1e734dd2f933..74a363b5bc4b5 100644
--- a/llvm/lib/CodeGen/RegAllocGreedy.cpp
+++ b/llvm/lib/CodeGen/RegAllocGreedy.cpp
@@ -2795,6 +2795,7 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) {
if (VerifyEnabled)
MF->verify(this, "Before post optimization", &errs());
postOptimization();
+ cleanupFailedVRegs();
reportStats();
releaseMemory();
diff --git a/llvm/test/CodeGen/AMDGPU/illegal-eviction-assert.mir b/llvm/test/CodeGen/AMDGPU/illegal-eviction-assert.mir
index 84ade2687f4ba..e583b168c15f7 100644
--- a/llvm/test/CodeGen/AMDGPU/illegal-eviction-assert.mir
+++ b/llvm/test/CodeGen/AMDGPU/illegal-eviction-assert.mir
@@ -13,12 +13,12 @@
ret void
}
- attributes #0 = { "amdgpu-waves-per-eu"="8,8" }
+ attributes #0 = { "amdgpu-waves-per-eu"="8,8" }
...
-# CHECK: S_NOP 0, implicit-def $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19, implicit-def $vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit-def $vgpr28_vgpr29_vgpr30_vgpr31, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3
-# CHECK: S_NOP 0, implicit killed undef $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19, implicit killed undef $vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27, implicit killed undef $vgpr0_vgpr1_vgpr2_vgpr3, implicit killed undef $vgpr28_vgpr29_vgpr30_vgpr31, implicit killed undef $vgpr0_vgpr1_vgpr2_vgpr3
+# CHECK: S_NOP 0, implicit-def $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19, implicit-def $vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27, implicit-def dead $vgpr0_vgpr1_vgpr2_vgpr3, implicit-def $vgpr28_vgpr29_vgpr30_vgpr31, implicit-def dead $vgpr0_vgpr1_vgpr2_vgpr3
+# CHECK: S_NOP 0, implicit killed undef $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19, implicit killed undef $vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27, implicit undef $vgpr0_vgpr1_vgpr2_vgpr3, implicit killed undef $vgpr28_vgpr29_vgpr30_vgpr31, implicit undef $vgpr0_vgpr1_vgpr2_vgpr3
---
name: foo
diff --git a/llvm/test/CodeGen/AMDGPU/inflated-reg-class-snippet-copy-use-after-free.mir b/llvm/test/CodeGen/AMDGPU/inflated-reg-class-snippet-copy-use-after-free.mir
index 503f27edf70df..6d63a4a1cc0ab 100644
--- a/llvm/test/CodeGen/AMDGPU/inflated-reg-class-snippet-copy-use-after-free.mir
+++ b/llvm/test/CodeGen/AMDGPU/inflated-reg-class-snippet-copy-use-after-free.mir
@@ -27,10 +27,10 @@
# CHECK-LABEL: name: inflated_reg_class_copy_use_after_free
# CHECK: S_NOP 0, implicit-def [[ORIG_REG:%[0-9]+]].sub0_sub1_sub2_sub3
# CHECK-NEXT: SI_SPILL_AV512_SAVE [[ORIG_REG]], %stack.0, $sgpr32, 0, implicit $exec :: (store (s512) into %stack.0, align 4, addrspace 5)
-# CHECK-NEXT: [[RESTORE0:%[0-9]+]]:vreg_512_align2 = SI_SPILL_V512_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s512) from %stack.0, align 4, addrspace 5)
-# CHECK-NEXT: early-clobber [[MFMA0:%[0-9]+]]:vreg_512_align2 = V_MFMA_F32_16X16X1F32_vgprcd_e64 undef %3:vgpr_32, undef %3:vgpr_32, [[RESTORE0]], 0, 0, 0, implicit $mode, implicit $exec, implicit $mode, implicit $exec
-# CHECK-NEXT: undef [[SPLIT0:%[0-9]+]].sub2_sub3:av_512_align2 = COPY [[MFMA0]].sub2_sub3 {
-# CHECK-NEXT: internal [[SPLIT0]].sub0:av_512_align2 = COPY [[MFMA0]].sub0
+# CHECK-NEXT: dead [[RESTORE0:%[0-9]+]]:vreg_512_align2 = SI_SPILL_V512_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s512) from %stack.0, align 4, addrspace 5)
+# CHECK-NEXT: dead early-clobber [[MFMA0:%[0-9]+]]:vreg_512_align2 = V_MFMA_F32_16X16X1F32_vgprcd_e64 undef %3:vgpr_32, undef %3:vgpr_32, undef [[RESTORE0]], 0, 0, 0, implicit $mode, implicit $exec, implicit $mode, implicit $exec
+# CHECK-NEXT: undef [[SPLIT0:%[0-9]+]].sub2_sub3:av_512_align2 = COPY undef [[MFMA0]].sub2_sub3 {
+# CHECK-NEXT: internal [[SPLIT0]].sub0:av_512_align2 = COPY undef [[MFMA0]].sub0
# CHECK-NEXT: }
# CHECK-NEXT: undef [[SPLIT1:%[0-9]+]].sub2_sub3:av_512_align2 = COPY [[SPLIT0]].sub2_sub3 {
# CHECK-NEXT: internal [[SPLIT1]].sub0:av_512_align2 = COPY [[SPLIT0]].sub0
@@ -118,10 +118,10 @@ body: |
# CHECK-LABEL: name: inflated_reg_class_copy_use_after_free_lane_subset
# CHECK: S_NOP 0, implicit-def [[ORIG_REG:%[0-9]+]].sub0_sub1_sub2_sub3
# CHECK-NEXT: SI_SPILL_AV512_SAVE [[ORIG_REG]], %stack.0, $sgpr32, 0, implicit $exec :: (store (s512) into %stack.0, align 4, addrspace 5)
-# CHECK-NEXT: [[RESTORE_0:%[0-9]+]]:av_512_align2 = SI_SPILL_AV512_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s512) from %stack.0, align 4, addrspace 5)
-# CHECK-NEXT: S_NOP 0, implicit-def early-clobber [[REG1:%[0-9]+]], implicit [[RESTORE_0]].sub0_sub1_sub2_sub3, implicit [[RESTORE_0]].sub4_sub5_sub6_sub7
-# CHECK-NEXT: undef [[SPLIT0:%[0-9]+]].sub2_sub3:av_512_align2 = COPY [[REG1]].sub2_sub3 {
-# CHECK-NEXT: internal [[SPLIT0]].sub0:av_512_align2 = COPY [[REG1]].sub0
+# CHECK-NEXT: dead [[RESTORE_0:%[0-9]+]]:av_512_align2 = SI_SPILL_AV512_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s512) from %stack.0, align 4, addrspace 5)
+# CHECK-NEXT: S_NOP 0, implicit-def dead early-clobber [[REG1:%[0-9]+]], implicit undef [[RESTORE_0]].sub0_sub1_sub2_sub3, implicit undef [[RESTORE_0]].sub4_sub5_sub6_sub7
+# CHECK-NEXT: undef [[SPLIT0:%[0-9]+]].sub2_sub3:av_512_align2 = COPY undef [[REG1]].sub2_sub3 {
+# CHECK-NEXT: internal [[SPLIT0]].sub0:av_512_align2 = COPY undef [[REG1]].sub0
# CHECK-NEXT: }
# CHECK-NEXT: undef [[SPLIT1:%[0-9]+]].sub2_sub3:av_512_align2 = COPY [[SPLIT0]].sub2_sub3 {
# CHECK-NEXT: internal [[SPLIT1]].sub0:av_512_align2 = COPY [[SPLIT0]].sub0
diff --git a/llvm/test/CodeGen/AMDGPU/issue48473.mir b/llvm/test/CodeGen/AMDGPU/issue48473.mir
index ada15be594891..c5cb1445adca0 100644
--- a/llvm/test/CodeGen/AMDGPU/issue48473.mir
+++ b/llvm/test/CodeGen/AMDGPU/issue48473.mir
@@ -43,7 +43,7 @@
# %25 to $sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67
# CHECK-LABEL: name: issue48473
-# CHECK: S_NOP 0, implicit killed undef $sgpr0_sgpr1_sgpr2_sgpr3, implicit killed undef $sgpr12_sgpr13_sgpr14_sgpr15, implicit killed undef $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23, implicit killed undef $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31, implicit killed undef $sgpr84_sgpr85_sgpr86_sgpr87, implicit killed undef $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43, implicit killed undef $sgpr4_sgpr5_sgpr6_sgpr7, implicit killed undef $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51, implicit killed undef $sgpr88_sgpr89_sgpr90_sgpr91, implicit killed undef $sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83, implicit killed undef $sgpr0_sgpr1_sgpr2_sgpr3, implicit killed undef $sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59, implicit killed undef $sgpr92_sgpr93_sgpr94_sgpr95, implicit killed undef $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75, implicit undef $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75, implicit killed undef $sgpr96_sgpr97_sgpr98_sgpr99, implicit killed undef $sgpr8_sgpr9_sgpr10_sgpr11, implicit killed undef $sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67
+# CHECK: S_NOP 0, implicit undef $sgpr0_sgpr1_sgpr2_sgpr3, implicit killed undef $sgpr12_sgpr13_sgpr14_sgpr15, implicit killed undef $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23, implicit killed undef $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31, implicit killed undef $sgpr84_sgpr85_sgpr86_sgpr87, implicit killed undef $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43, implicit killed undef $sgpr4_sgpr5_sgpr6_sgpr7, implicit killed undef $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51, implicit killed undef $sgpr88_sgpr89_sgpr90_sgpr91, implicit killed undef $sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83, implicit undef $sgpr0_sgpr1_sgpr2_sgpr3, implicit killed undef $sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59, implicit killed undef $sgpr92_sgpr93_sgpr94_sgpr95, implicit killed undef $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75, implicit undef $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75, implicit killed undef $sgpr96_sgpr97_sgpr98_sgpr99, implicit killed undef $sgpr8_sgpr9_sgpr10_sgpr11, implicit killed undef $sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67
---
name: issue48473
diff --git a/llvm/test/CodeGen/AMDGPU/register-killed-error-after-alloc-failure0.mir b/llvm/test/CodeGen/AMDGPU/register-killed-error-after-alloc-failure0.mir
new file mode 100644
index 0000000000000..e2a839ea9d24e
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/register-killed-error-after-alloc-failure0.mir
@@ -0,0 +1,59 @@
+# RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -vgpr-regalloc=basic -sgpr-regalloc=basic -start-before=regallocbasic,0 -stop-after=virtregrewriter,2 -verify-regalloc -o - %s 2> %t.basic.err | FileCheck -check-prefix=BASIC %s
+# RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -start-before=greedy,0 -stop-after=virtregrewriter,2 -verify-regalloc -o - %s 2> %t.greedy.err | FileCheck -check-prefix=GREEDY %s
+
+# RUN: FileCheck -check-prefix=ERR -implicit-check-not=error %s < %t.basic.err
+# RUN: FileCheck -check-prefix=ERR -implicit-check-not=error %s < %t.greedy.err
+
+# This testcase must fail register allocation. It should also not
+# produce a verifier error after doing so. Previously, it would not
+# properly update the liveness for the dummy selected register. As a
+# result, VirtRegRewriter would incorrectly add kill flags which
+# combined with other uses of the physical register produced a
+# verifier error.
+
+# ERR: error: <unknown>:0:0: ran out of registers during register allocation
+
+# GREEDY: SI_SPILL_V256_SAVE undef $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+# GREEDY-NEXT: SI_SPILL_V512_SAVE undef $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19
+# GREEDY-NEXT: SI_SPILL_V128_SAVE undef $vgpr0_vgpr1_vgpr2_vgpr3
+
+# GREEDY: dead $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19 = SI_SPILL_V512_RESTORE
+# GREEDY: dead $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = SI_SPILL_V256_RESTORE
+# GREEDY: S_NOP 0, implicit undef $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19, implicit undef $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, implicit undef $vgpr0_vgpr1_vgpr2_vgpr3
+# GREEDY: S_NOP 0, implicit killed undef $vgpr20_vgpr21
+
+
+# BASIC: SI_SPILL_V128_SAVE undef $vgpr0_vgpr1_vgpr2_vgpr3
+# BASIC: SI_SPILL_V256_SAVE killed undef $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
+# BASIC: SI_SPILL_V512_SAVE undef $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
+# BASIC: SI_SPILL_V64_SAVE killed undef $vgpr0_vgpr1, %stack.{{[0-9]+}}, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.{{[0-9]+}}, align 4, addrspace 5)
+# BASIC: dead $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = SI_SPILL_V512_RESTORE
+# BASIC: $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23 = SI_SPILL_V256_RESTORE
+# BASIC: dead $vgpr0_vgpr1_vgpr2_vgpr3 = SI_SPILL_V128_RESTORE
+# BASIC: S_NOP 0, implicit undef $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit killed undef $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, implicit undef $vgpr0_vgpr1_vgpr2_vgpr3
+# BASIC: $vgpr0_vgpr1 = SI_SPILL_V64_RESTORE
+
+--- |
+ define void @killed_reg_after_regalloc_failure() #0 {
+ ret void
+ }
+
+ attributes #0 = { "amdgpu-waves-per-eu"="10,10" }
+
+...
+---
+name: killed_reg_after_regalloc_failure
+tracksRegLiveness: true
+machineFunctionInfo:
+ scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
+ frameOffsetReg: '$sgpr33'
+ stackPtrOffsetReg: '$sgpr32'
+body: |
+ bb.0:
+ S_NOP 0, implicit-def %0:vreg_512, implicit-def %1:vreg_256, implicit-def %2:vreg_128
+ S_NOP 0, implicit-def %3:vreg_64
+ S_NOP 0, implicit %0, implicit %1, implicit %2
+ S_NOP 0, implicit %3
+ S_ENDPGM 0
+
+...
diff --git a/llvm/test/CodeGen/AMDGPU/register-killed-error-after-alloc-failure1.ll b/llvm/test/CodeGen/AMDGPU/register-killed-error-after-alloc-failure1.ll
new file mode 100644
index 0000000000000..5e466a9470fc5
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/register-killed-error-after-alloc-failure1.ll
@@ -0,0 +1,30 @@
+; RUN: not llc -mtriple=amdgcn -mcpu=gfx908 -verify-machineinstrs -filetype=null %s 2>&1 | FileCheck -check-prefix=ERR -implicit-check-not=error %s
+
+; ERR: error: inline assembly requires more registers than available
+; ERR-NOT: ERROR
+; ERR-NOT: Bad machine code
+
+; This test requires respecting undef on the spill source operand when
+; expanding the pseudos to avoid all verifier errors
+
+%asm.output = type { <16 x i32>, <8 x i32>, <4 x i32>, <3 x i32>, <3 x i32> }
+
+define void @foo(<32 x i32> addrspace(1)* %arg) #0 {
+ %agpr0 = call i32 asm sideeffect "; def $0","=${a0}"()
+ %asm = call %asm.output asm sideeffect "; def $0 $1 $2 $3 $4","=v,=v,=v,=v,=v"()
+ %vgpr0 = extractvalue %asm.output %asm, 0
+ %vgpr1 = extractvalue %asm.output %asm, 1
+ %vgpr2 = extractvalue %asm.output %asm, 2
+ %vgpr3 = extractvalue %asm.output %asm, 3
+ %vgpr4 = extractvalue %asm.output %asm, 4
+ call void asm sideeffect "; clobber", "~{a[0:31]},~{v[0:31]}"()
+ call void asm sideeffect "; use $0","v"(<16 x i32> %vgpr0)
+ call void asm sideeffect "; use $0","v"(<8 x i32> %vgpr1)
+ call void asm sideeffect "; use $0","v"(<4 x i32> %vgpr2)
+ call void asm sideeffect "; use $0","v"(<3 x i32> %vgpr3)
+ call void asm sideeffect "; use $0","v"(<3 x i32> %vgpr4)
+ call void asm sideeffect "; use $0","{a1}"(i32 %agpr0)
+ ret void
+}
+
+attributes #0 = { "amdgpu-waves-per-eu"="8,8" }
diff --git a/llvm/test/CodeGen/AMDGPU/remaining-virtual-register-operands.ll b/llvm/test/CodeGen/AMDGPU/remaining-virtual-register-operands.ll
index a9654be075692..9279b44edac75 100644
--- a/llvm/test/CodeGen/AMDGPU/remaining-virtual-register-operands.ll
+++ b/llvm/test/CodeGen/AMDGPU/remaining-virtual-register-operands.ll
@@ -1,4 +1,4 @@
-; RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -verify-machineinstrs < %s 2>&1 | FileCheck %s
+; RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -verify-machineinstrs -filetype=null %s 2>&1 | FileCheck -implicit-check-not=error %s
; This testcase fails register allocation at the same time it performs
; virtual register splitting (by introducing VGPR to AGPR copies). We
>From 3fab6f82eabeccce1ce60123f645a54fa9074cc8 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Sun, 23 Feb 2025 10:53:11 +0700
Subject: [PATCH 2/3] Move physreg handling. Not sure if necessary
---
llvm/lib/CodeGen/RegAllocBase.cpp | 18 +++++++++---------
1 file changed, 9 insertions(+), 9 deletions(-)
diff --git a/llvm/lib/CodeGen/RegAllocBase.cpp b/llvm/lib/CodeGen/RegAllocBase.cpp
index d17e92da0d2c5..c94bb106f8dab 100644
--- a/llvm/lib/CodeGen/RegAllocBase.cpp
+++ b/llvm/lib/CodeGen/RegAllocBase.cpp
@@ -180,15 +180,6 @@ void RegAllocBase::cleanupFailedVRegs() {
for (const LiveInterval *InterferingReg : Q.interferingVRegs())
JunkRegs.insert(InterferingReg->reg());
}
-
- // The liveness of the assigned physical register is also now unreliable.
- for (MCRegAliasIterator Aliases(PhysReg, TRI, true); Aliases.isValid();
- ++Aliases) {
- for (MachineOperand &MO : MRI->reg_operands(*Aliases)) {
- if (MO.readsReg())
- MO.setIsUndef(true);
- }
- }
}
for (Register JunkReg : JunkRegs) {
@@ -201,6 +192,15 @@ void RegAllocBase::cleanupFailedVRegs() {
MO.setIsUndef(true);
}
+ // The liveness of the assigned physical register is also now unreliable.
+ for (MCRegAliasIterator Aliases(PhysReg, TRI, true); Aliases.isValid();
+ ++Aliases) {
+ for (MachineOperand &MO : MRI->reg_operands(*Aliases)) {
+ if (MO.readsReg())
+ MO.setIsUndef(true);
+ }
+ }
+
LiveInterval &JunkLI = LIS->getInterval(JunkReg);
if (LIS->shrinkToUses(&JunkLI)) {
SmallVector<LiveInterval *, 8> SplitLIs;
>From a8b8cafcb1c5e252e23a1626a44eed4e4ef89316 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Sun, 23 Feb 2025 11:11:18 +0700
Subject: [PATCH 3/3] Remove intervals from regunits. Not sure if necessary
---
llvm/lib/CodeGen/RegAllocBase.cpp | 1 +
1 file changed, 1 insertion(+)
diff --git a/llvm/lib/CodeGen/RegAllocBase.cpp b/llvm/lib/CodeGen/RegAllocBase.cpp
index c94bb106f8dab..d66d396a15018 100644
--- a/llvm/lib/CodeGen/RegAllocBase.cpp
+++ b/llvm/lib/CodeGen/RegAllocBase.cpp
@@ -179,6 +179,7 @@ void RegAllocBase::cleanupFailedVRegs() {
LiveIntervalUnion::Query &Q = Matrix->query(FailedInterval, *Units);
for (const LiveInterval *InterferingReg : Q.interferingVRegs())
JunkRegs.insert(InterferingReg->reg());
+ LIS->removeRegUnit(*Units);
}
}
More information about the llvm-branch-commits
mailing list