[llvm] Reapply "RegAlloc: Fix verifier error after failed allocation (#119690)" (PR #128400)

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Wed Feb 26 00:28:52 PST 2025


https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/128400

>From 3f05e477328c3fbcfd8583ff38c4b40ff39ef03e Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Sat, 22 Feb 2025 10:02:04 +0700
Subject: [PATCH 1/3] Reapply "RegAlloc: Fix verifier error after failed
 allocation (#119690)"

This reverts commit 0c50054820799578be8f62b6fd2cc3fbc751c01e.

Reapply with more fixes to avoid expensive_checks failures. Make sure to
call splitSeparateComponents after shrinkToUses, and update the VirtRegMap
with the split registers. Also set undef on all physical register aliases to
the assigned register.
---
 llvm/lib/CodeGen/RegAllocBase.cpp             | 55 +++++++++++++++++
 llvm/lib/CodeGen/RegAllocBase.h               |  6 ++
 llvm/lib/CodeGen/RegAllocBasic.cpp            |  1 +
 llvm/lib/CodeGen/RegAllocGreedy.cpp           |  1 +
 .../AMDGPU/illegal-eviction-assert.mir        |  6 +-
 ...-reg-class-snippet-copy-use-after-free.mir | 16 ++---
 llvm/test/CodeGen/AMDGPU/issue48473.mir       |  2 +-
 ...ster-killed-error-after-alloc-failure0.mir | 59 +++++++++++++++++++
 ...ister-killed-error-after-alloc-failure1.ll | 30 ++++++++++
 .../remaining-virtual-register-operands.ll    |  2 +-
 10 files changed, 165 insertions(+), 13 deletions(-)
 create mode 100644 llvm/test/CodeGen/AMDGPU/register-killed-error-after-alloc-failure0.mir
 create mode 100644 llvm/test/CodeGen/AMDGPU/register-killed-error-after-alloc-failure1.ll

diff --git a/llvm/lib/CodeGen/RegAllocBase.cpp b/llvm/lib/CodeGen/RegAllocBase.cpp
index 50addcbcca065..d17e92da0d2c5 100644
--- a/llvm/lib/CodeGen/RegAllocBase.cpp
+++ b/llvm/lib/CodeGen/RegAllocBase.cpp
@@ -65,6 +65,7 @@ void RegAllocBase::init(VirtRegMap &vrm, LiveIntervals &lis,
   Matrix = &mat;
   MRI->freezeReservedRegs();
   RegClassInfo.runOnMachineFunction(vrm.getMachineFunction());
+  FailedVRegs.clear();
 }
 
 // Visit all the live registers. If they are already assigned to a physical
@@ -128,6 +129,7 @@ void RegAllocBase::allocatePhysRegs() {
 
       // Keep going after reporting the error.
       VRM->assignVirt2Phys(VirtReg->reg(), AvailablePhysReg);
+      FailedVRegs.insert(VirtReg->reg());
     } else if (AvailablePhysReg)
       Matrix->assign(*VirtReg, AvailablePhysReg);
 
@@ -161,6 +163,59 @@ void RegAllocBase::postOptimization() {
   DeadRemats.clear();
 }
 
+void RegAllocBase::cleanupFailedVRegs() {
+  SmallSet<Register, 8> JunkRegs;
+
+  for (Register FailedReg : FailedVRegs) {
+    JunkRegs.insert(FailedReg);
+
+    MCRegister PhysReg = VRM->getPhys(FailedReg);
+    LiveInterval &FailedInterval = LIS->getInterval(FailedReg);
+
+    // The liveness information for the failed register and anything interfering
+    // with the physical register we arbitrarily chose is junk and needs to be
+    // deleted.
+    for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
+      LiveIntervalUnion::Query &Q = Matrix->query(FailedInterval, *Units);
+      for (const LiveInterval *InterferingReg : Q.interferingVRegs())
+        JunkRegs.insert(InterferingReg->reg());
+    }
+
+    // The liveness of the assigned physical register is also now unreliable.
+    for (MCRegAliasIterator Aliases(PhysReg, TRI, true); Aliases.isValid();
+         ++Aliases) {
+      for (MachineOperand &MO : MRI->reg_operands(*Aliases)) {
+        if (MO.readsReg())
+          MO.setIsUndef(true);
+      }
+    }
+  }
+
+  for (Register JunkReg : JunkRegs) {
+    MCRegister PhysReg = VRM->getPhys(JunkReg);
+    // We still should produce valid IR. Kill all the uses and reduce the live
+    // ranges so that we don't think it's possible to introduce kill flags
+    // later which will fail the verifier.
+    for (MachineOperand &MO : MRI->reg_operands(JunkReg)) {
+      if (MO.readsReg())
+        MO.setIsUndef(true);
+    }
+
+    LiveInterval &JunkLI = LIS->getInterval(JunkReg);
+    if (LIS->shrinkToUses(&JunkLI)) {
+      SmallVector<LiveInterval *, 8> SplitLIs;
+      LIS->splitSeparateComponents(JunkLI, SplitLIs);
+
+      VRM->grow();
+      Register Original = VRM->getOriginal(JunkReg);
+      for (LiveInterval *SplitLI : SplitLIs) {
+        VRM->setIsSplitFromReg(SplitLI->reg(), Original);
+        VRM->assignVirt2Phys(SplitLI->reg(), PhysReg);
+      }
+    }
+  }
+}
+
 void RegAllocBase::enqueue(const LiveInterval *LI) {
   const Register Reg = LI->reg();
 
diff --git a/llvm/lib/CodeGen/RegAllocBase.h b/llvm/lib/CodeGen/RegAllocBase.h
index 5bd52da61f2dc..1fdbab694bb0e 100644
--- a/llvm/lib/CodeGen/RegAllocBase.h
+++ b/llvm/lib/CodeGen/RegAllocBase.h
@@ -37,6 +37,7 @@
 #define LLVM_LIB_CODEGEN_REGALLOCBASE_H
 
 #include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/RegAllocCommon.h"
 #include "llvm/CodeGen/RegisterClassInfo.h"
@@ -81,6 +82,7 @@ class RegAllocBase {
   /// always available for the remat of all the siblings of the original reg.
   SmallPtrSet<MachineInstr *, 32> DeadRemats;
 
+  SmallSet<Register, 2> FailedVRegs;
   RegAllocBase(const RegAllocFilterFunc F = nullptr)
       : shouldAllocateRegisterImpl(F) {}
 
@@ -104,6 +106,10 @@ class RegAllocBase {
   // rematerialization.
   virtual void postOptimization();
 
+  /// Perform cleanups on registers that failed to allocate. This hacks on the
+  /// liveness in order to avoid spurious verifier errors in later passes.
+  void cleanupFailedVRegs();
+
   // Get a temporary reference to a Spiller instance.
   virtual Spiller &spiller() = 0;
 
diff --git a/llvm/lib/CodeGen/RegAllocBasic.cpp b/llvm/lib/CodeGen/RegAllocBasic.cpp
index 51e047b2fa3f0..d240bf916ac05 100644
--- a/llvm/lib/CodeGen/RegAllocBasic.cpp
+++ b/llvm/lib/CodeGen/RegAllocBasic.cpp
@@ -329,6 +329,7 @@ bool RABasic::runOnMachineFunction(MachineFunction &mf) {
 
   allocatePhysRegs();
   postOptimization();
+  cleanupFailedVRegs();
 
   // Diagnostic output before rewriting
   LLVM_DEBUG(dbgs() << "Post alloc VirtRegMap:\n" << *VRM << "\n");
diff --git a/llvm/lib/CodeGen/RegAllocGreedy.cpp b/llvm/lib/CodeGen/RegAllocGreedy.cpp
index 6dba1bd8f47dd..f8faf766d9ec5 100644
--- a/llvm/lib/CodeGen/RegAllocGreedy.cpp
+++ b/llvm/lib/CodeGen/RegAllocGreedy.cpp
@@ -2927,6 +2927,7 @@ bool RAGreedy::run(MachineFunction &mf) {
   if (VerifyEnabled)
     MF->verify(LIS, Indexes, "Before post optimization", &errs());
   postOptimization();
+  cleanupFailedVRegs();
   reportStats();
 
   releaseMemory();
diff --git a/llvm/test/CodeGen/AMDGPU/illegal-eviction-assert.mir b/llvm/test/CodeGen/AMDGPU/illegal-eviction-assert.mir
index 84ade2687f4ba..e583b168c15f7 100644
--- a/llvm/test/CodeGen/AMDGPU/illegal-eviction-assert.mir
+++ b/llvm/test/CodeGen/AMDGPU/illegal-eviction-assert.mir
@@ -13,12 +13,12 @@
     ret void
   }
 
-  attributes #0 = { "amdgpu-waves-per-eu"="8,8"  }
+  attributes #0 = { "amdgpu-waves-per-eu"="8,8" }
 
 ...
 
-# CHECK: S_NOP 0, implicit-def $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19, implicit-def $vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit-def $vgpr28_vgpr29_vgpr30_vgpr31, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3
-# CHECK: S_NOP 0, implicit killed undef $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19, implicit killed undef $vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27, implicit killed undef $vgpr0_vgpr1_vgpr2_vgpr3, implicit killed undef $vgpr28_vgpr29_vgpr30_vgpr31, implicit killed undef $vgpr0_vgpr1_vgpr2_vgpr3
+# CHECK: S_NOP 0, implicit-def $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19, implicit-def $vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27, implicit-def dead $vgpr0_vgpr1_vgpr2_vgpr3, implicit-def $vgpr28_vgpr29_vgpr30_vgpr31, implicit-def dead $vgpr0_vgpr1_vgpr2_vgpr3
+# CHECK: S_NOP 0, implicit killed undef $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19, implicit killed undef $vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27, implicit undef $vgpr0_vgpr1_vgpr2_vgpr3, implicit killed undef $vgpr28_vgpr29_vgpr30_vgpr31, implicit undef $vgpr0_vgpr1_vgpr2_vgpr3
 
 ---
 name:            foo
diff --git a/llvm/test/CodeGen/AMDGPU/inflated-reg-class-snippet-copy-use-after-free.mir b/llvm/test/CodeGen/AMDGPU/inflated-reg-class-snippet-copy-use-after-free.mir
index 503f27edf70df..6d63a4a1cc0ab 100644
--- a/llvm/test/CodeGen/AMDGPU/inflated-reg-class-snippet-copy-use-after-free.mir
+++ b/llvm/test/CodeGen/AMDGPU/inflated-reg-class-snippet-copy-use-after-free.mir
@@ -27,10 +27,10 @@
 # CHECK-LABEL: name: inflated_reg_class_copy_use_after_free
 # CHECK: S_NOP 0, implicit-def [[ORIG_REG:%[0-9]+]].sub0_sub1_sub2_sub3
 # CHECK-NEXT: SI_SPILL_AV512_SAVE [[ORIG_REG]], %stack.0, $sgpr32, 0, implicit $exec :: (store (s512) into %stack.0, align 4, addrspace 5)
-# CHECK-NEXT: [[RESTORE0:%[0-9]+]]:vreg_512_align2 = SI_SPILL_V512_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s512) from %stack.0, align 4, addrspace 5)
-# CHECK-NEXT: early-clobber [[MFMA0:%[0-9]+]]:vreg_512_align2 = V_MFMA_F32_16X16X1F32_vgprcd_e64 undef %3:vgpr_32, undef %3:vgpr_32, [[RESTORE0]], 0, 0, 0, implicit $mode, implicit $exec, implicit $mode, implicit $exec
-# CHECK-NEXT: undef [[SPLIT0:%[0-9]+]].sub2_sub3:av_512_align2 = COPY [[MFMA0]].sub2_sub3 {
-# CHECK-NEXT: internal [[SPLIT0]].sub0:av_512_align2 = COPY [[MFMA0]].sub0
+# CHECK-NEXT: dead [[RESTORE0:%[0-9]+]]:vreg_512_align2 = SI_SPILL_V512_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s512) from %stack.0, align 4, addrspace 5)
+# CHECK-NEXT: dead early-clobber [[MFMA0:%[0-9]+]]:vreg_512_align2 = V_MFMA_F32_16X16X1F32_vgprcd_e64 undef %3:vgpr_32, undef %3:vgpr_32, undef [[RESTORE0]], 0, 0, 0, implicit $mode, implicit $exec, implicit $mode, implicit $exec
+# CHECK-NEXT: undef [[SPLIT0:%[0-9]+]].sub2_sub3:av_512_align2 = COPY undef [[MFMA0]].sub2_sub3 {
+# CHECK-NEXT: internal [[SPLIT0]].sub0:av_512_align2 = COPY undef [[MFMA0]].sub0
 # CHECK-NEXT: }
 # CHECK-NEXT: undef [[SPLIT1:%[0-9]+]].sub2_sub3:av_512_align2 = COPY [[SPLIT0]].sub2_sub3 {
 # CHECK-NEXT: internal [[SPLIT1]].sub0:av_512_align2 = COPY [[SPLIT0]].sub0
@@ -118,10 +118,10 @@ body:             |
 # CHECK-LABEL: name: inflated_reg_class_copy_use_after_free_lane_subset
 # CHECK: S_NOP 0, implicit-def [[ORIG_REG:%[0-9]+]].sub0_sub1_sub2_sub3
 # CHECK-NEXT: SI_SPILL_AV512_SAVE [[ORIG_REG]], %stack.0, $sgpr32, 0, implicit $exec :: (store (s512) into %stack.0, align 4, addrspace 5)
-# CHECK-NEXT: [[RESTORE_0:%[0-9]+]]:av_512_align2 = SI_SPILL_AV512_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s512) from %stack.0, align 4, addrspace 5)
-# CHECK-NEXT: S_NOP 0, implicit-def early-clobber [[REG1:%[0-9]+]], implicit [[RESTORE_0]].sub0_sub1_sub2_sub3, implicit [[RESTORE_0]].sub4_sub5_sub6_sub7
-# CHECK-NEXT: undef [[SPLIT0:%[0-9]+]].sub2_sub3:av_512_align2 = COPY [[REG1]].sub2_sub3 {
-# CHECK-NEXT: internal [[SPLIT0]].sub0:av_512_align2 = COPY [[REG1]].sub0
+# CHECK-NEXT: dead [[RESTORE_0:%[0-9]+]]:av_512_align2 = SI_SPILL_AV512_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s512) from %stack.0, align 4, addrspace 5)
+# CHECK-NEXT: S_NOP 0, implicit-def dead early-clobber [[REG1:%[0-9]+]], implicit undef [[RESTORE_0]].sub0_sub1_sub2_sub3, implicit undef [[RESTORE_0]].sub4_sub5_sub6_sub7
+# CHECK-NEXT: undef [[SPLIT0:%[0-9]+]].sub2_sub3:av_512_align2 = COPY undef [[REG1]].sub2_sub3 {
+# CHECK-NEXT: internal [[SPLIT0]].sub0:av_512_align2 = COPY undef [[REG1]].sub0
 # CHECK-NEXT: }
 # CHECK-NEXT: undef [[SPLIT1:%[0-9]+]].sub2_sub3:av_512_align2 = COPY [[SPLIT0]].sub2_sub3 {
 # CHECK-NEXT: internal [[SPLIT1]].sub0:av_512_align2 = COPY [[SPLIT0]].sub0
diff --git a/llvm/test/CodeGen/AMDGPU/issue48473.mir b/llvm/test/CodeGen/AMDGPU/issue48473.mir
index ada15be594891..c5cb1445adca0 100644
--- a/llvm/test/CodeGen/AMDGPU/issue48473.mir
+++ b/llvm/test/CodeGen/AMDGPU/issue48473.mir
@@ -43,7 +43,7 @@
 # %25 to $sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67
 
 # CHECK-LABEL: name: issue48473
-# CHECK: S_NOP 0, implicit killed undef $sgpr0_sgpr1_sgpr2_sgpr3, implicit killed undef $sgpr12_sgpr13_sgpr14_sgpr15, implicit killed undef $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23, implicit killed undef $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31, implicit killed undef $sgpr84_sgpr85_sgpr86_sgpr87, implicit killed undef $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43, implicit killed undef $sgpr4_sgpr5_sgpr6_sgpr7, implicit killed undef $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51, implicit killed undef $sgpr88_sgpr89_sgpr90_sgpr91, implicit killed undef $sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83, implicit killed undef $sgpr0_sgpr1_sgpr2_sgpr3, implicit killed undef $sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59, implicit killed undef $sgpr92_sgpr93_sgpr94_sgpr95, implicit killed undef $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75, implicit undef $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75, implicit killed undef $sgpr96_sgpr97_sgpr98_sgpr99, implicit killed undef $sgpr8_sgpr9_sgpr10_sgpr11, implicit killed undef $sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67
+# CHECK: S_NOP 0, implicit undef $sgpr0_sgpr1_sgpr2_sgpr3, implicit killed undef $sgpr12_sgpr13_sgpr14_sgpr15, implicit killed undef $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23, implicit killed undef $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31, implicit killed undef $sgpr84_sgpr85_sgpr86_sgpr87, implicit killed undef $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43, implicit killed undef $sgpr4_sgpr5_sgpr6_sgpr7, implicit killed undef $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51, implicit killed undef $sgpr88_sgpr89_sgpr90_sgpr91, implicit killed undef $sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83, implicit undef $sgpr0_sgpr1_sgpr2_sgpr3, implicit killed undef $sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59, implicit killed undef $sgpr92_sgpr93_sgpr94_sgpr95, implicit killed undef $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75, implicit undef $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75, implicit killed undef $sgpr96_sgpr97_sgpr98_sgpr99, implicit killed undef $sgpr8_sgpr9_sgpr10_sgpr11, implicit killed undef $sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67
 
 ---
 name:            issue48473
diff --git a/llvm/test/CodeGen/AMDGPU/register-killed-error-after-alloc-failure0.mir b/llvm/test/CodeGen/AMDGPU/register-killed-error-after-alloc-failure0.mir
new file mode 100644
index 0000000000000..e2a839ea9d24e
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/register-killed-error-after-alloc-failure0.mir
@@ -0,0 +1,59 @@
+# RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -vgpr-regalloc=basic -sgpr-regalloc=basic -start-before=regallocbasic,0 -stop-after=virtregrewriter,2 -verify-regalloc -o - %s 2> %t.basic.err | FileCheck -check-prefix=BASIC %s
+# RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -start-before=greedy,0 -stop-after=virtregrewriter,2 -verify-regalloc -o - %s 2> %t.greedy.err | FileCheck -check-prefix=GREEDY %s
+
+# RUN: FileCheck -check-prefix=ERR -implicit-check-not=error %s < %t.basic.err
+# RUN: FileCheck -check-prefix=ERR -implicit-check-not=error %s < %t.greedy.err
+
+# This testcase must fail register allocation. It should also not
+# produce a verifier error after doing so. Previously, it would not
+# properly update the liveness for the dummy selected register. As a
+# result, VirtRegRewriter would incorrectly add kill flags which
+# combined with other uses of the physical register produced a
+# verifier error.
+
+# ERR: error: <unknown>:0:0: ran out of registers during register allocation
+
+# GREEDY: SI_SPILL_V256_SAVE undef $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+# GREEDY-NEXT: SI_SPILL_V512_SAVE undef $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19
+# GREEDY-NEXT: SI_SPILL_V128_SAVE undef $vgpr0_vgpr1_vgpr2_vgpr3
+
+# GREEDY: dead $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19 = SI_SPILL_V512_RESTORE
+# GREEDY: dead $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = SI_SPILL_V256_RESTORE
+# GREEDY: S_NOP 0, implicit undef $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19, implicit undef $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, implicit undef $vgpr0_vgpr1_vgpr2_vgpr3
+# GREEDY: S_NOP 0, implicit killed undef $vgpr20_vgpr21
+
+
+# BASIC: SI_SPILL_V128_SAVE undef $vgpr0_vgpr1_vgpr2_vgpr3
+# BASIC: SI_SPILL_V256_SAVE killed undef $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
+# BASIC: SI_SPILL_V512_SAVE undef $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
+# BASIC: SI_SPILL_V64_SAVE killed undef $vgpr0_vgpr1, %stack.{{[0-9]+}}, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.{{[0-9]+}}, align 4, addrspace 5)
+# BASIC: dead $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = SI_SPILL_V512_RESTORE
+# BASIC: $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23 = SI_SPILL_V256_RESTORE
+# BASIC: dead $vgpr0_vgpr1_vgpr2_vgpr3 = SI_SPILL_V128_RESTORE
+# BASIC: S_NOP 0, implicit undef $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit killed undef $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, implicit undef $vgpr0_vgpr1_vgpr2_vgpr3
+# BASIC: $vgpr0_vgpr1 = SI_SPILL_V64_RESTORE
+
+--- |
+  define void @killed_reg_after_regalloc_failure() #0 {
+    ret void
+  }
+
+  attributes #0 = { "amdgpu-waves-per-eu"="10,10" }
+
+...
+---
+name:            killed_reg_after_regalloc_failure
+tracksRegLiveness: true
+machineFunctionInfo:
+  scratchRSrcReg:  '$sgpr0_sgpr1_sgpr2_sgpr3'
+  frameOffsetReg:  '$sgpr33'
+  stackPtrOffsetReg: '$sgpr32'
+body:             |
+  bb.0:
+    S_NOP 0, implicit-def %0:vreg_512, implicit-def %1:vreg_256, implicit-def %2:vreg_128
+    S_NOP 0, implicit-def %3:vreg_64
+    S_NOP 0, implicit %0, implicit %1, implicit %2
+    S_NOP 0, implicit %3
+    S_ENDPGM 0
+
+...
diff --git a/llvm/test/CodeGen/AMDGPU/register-killed-error-after-alloc-failure1.ll b/llvm/test/CodeGen/AMDGPU/register-killed-error-after-alloc-failure1.ll
new file mode 100644
index 0000000000000..5e466a9470fc5
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/register-killed-error-after-alloc-failure1.ll
@@ -0,0 +1,30 @@
+; RUN: not llc -mtriple=amdgcn -mcpu=gfx908 -verify-machineinstrs -filetype=null %s 2>&1 | FileCheck -check-prefix=ERR -implicit-check-not=error %s
+
+; ERR: error: inline assembly requires more registers than available
+; ERR-NOT: ERROR
+; ERR-NOT: Bad machine code
+
+; This test requires respecting undef on the spill source operand when
+; expanding the pseudos to avoid all verifier errors
+
+%asm.output = type { <16 x i32>, <8 x i32>, <4 x i32>, <3 x i32>, <3 x i32> }
+
+define void @foo(<32 x i32> addrspace(1)* %arg) #0 {
+  %agpr0 = call i32 asm sideeffect "; def $0","=${a0}"()
+  %asm = call %asm.output asm sideeffect "; def $0 $1 $2 $3 $4","=v,=v,=v,=v,=v"()
+  %vgpr0 = extractvalue %asm.output %asm, 0
+  %vgpr1 = extractvalue %asm.output %asm, 1
+  %vgpr2 = extractvalue %asm.output %asm, 2
+  %vgpr3 = extractvalue %asm.output %asm, 3
+  %vgpr4 = extractvalue %asm.output %asm, 4
+  call void asm sideeffect "; clobber", "~{a[0:31]},~{v[0:31]}"()
+  call void asm sideeffect "; use $0","v"(<16 x i32> %vgpr0)
+  call void asm sideeffect "; use $0","v"(<8 x i32> %vgpr1)
+  call void asm sideeffect "; use $0","v"(<4 x i32> %vgpr2)
+  call void asm sideeffect "; use $0","v"(<3 x i32> %vgpr3)
+  call void asm sideeffect "; use $0","v"(<3 x i32> %vgpr4)
+  call void asm sideeffect "; use $0","{a1}"(i32 %agpr0)
+  ret void
+}
+
+attributes #0 = { "amdgpu-waves-per-eu"="8,8" }
diff --git a/llvm/test/CodeGen/AMDGPU/remaining-virtual-register-operands.ll b/llvm/test/CodeGen/AMDGPU/remaining-virtual-register-operands.ll
index a9654be075692..9279b44edac75 100644
--- a/llvm/test/CodeGen/AMDGPU/remaining-virtual-register-operands.ll
+++ b/llvm/test/CodeGen/AMDGPU/remaining-virtual-register-operands.ll
@@ -1,4 +1,4 @@
-; RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -verify-machineinstrs < %s 2>&1 | FileCheck %s
+; RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -verify-machineinstrs -filetype=null %s 2>&1 | FileCheck -implicit-check-not=error %s
 
 ; This testcase fails register allocation at the same time it performs
 ; virtual register splitting (by introducing VGPR to AGPR copies). We

>From c4c474923403ea3129dacddfab2489579e614f4f Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Sun, 23 Feb 2025 10:53:11 +0700
Subject: [PATCH 2/3] Move physreg handling. Not sure if necessary

---
 llvm/lib/CodeGen/RegAllocBase.cpp | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/llvm/lib/CodeGen/RegAllocBase.cpp b/llvm/lib/CodeGen/RegAllocBase.cpp
index d17e92da0d2c5..c94bb106f8dab 100644
--- a/llvm/lib/CodeGen/RegAllocBase.cpp
+++ b/llvm/lib/CodeGen/RegAllocBase.cpp
@@ -180,15 +180,6 @@ void RegAllocBase::cleanupFailedVRegs() {
       for (const LiveInterval *InterferingReg : Q.interferingVRegs())
         JunkRegs.insert(InterferingReg->reg());
     }
-
-    // The liveness of the assigned physical register is also now unreliable.
-    for (MCRegAliasIterator Aliases(PhysReg, TRI, true); Aliases.isValid();
-         ++Aliases) {
-      for (MachineOperand &MO : MRI->reg_operands(*Aliases)) {
-        if (MO.readsReg())
-          MO.setIsUndef(true);
-      }
-    }
   }
 
   for (Register JunkReg : JunkRegs) {
@@ -201,6 +192,15 @@ void RegAllocBase::cleanupFailedVRegs() {
         MO.setIsUndef(true);
     }
 
+    // The liveness of the assigned physical register is also now unreliable.
+    for (MCRegAliasIterator Aliases(PhysReg, TRI, true); Aliases.isValid();
+         ++Aliases) {
+      for (MachineOperand &MO : MRI->reg_operands(*Aliases)) {
+        if (MO.readsReg())
+          MO.setIsUndef(true);
+      }
+    }
+
     LiveInterval &JunkLI = LIS->getInterval(JunkReg);
     if (LIS->shrinkToUses(&JunkLI)) {
       SmallVector<LiveInterval *, 8> SplitLIs;

>From 753f4c9fb19281c60e3ca657edb8cc96e760a300 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Sun, 23 Feb 2025 11:11:18 +0700
Subject: [PATCH 3/3] Remove intervals from regunits. Not sure if necessary

---
 llvm/lib/CodeGen/RegAllocBase.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/llvm/lib/CodeGen/RegAllocBase.cpp b/llvm/lib/CodeGen/RegAllocBase.cpp
index c94bb106f8dab..d66d396a15018 100644
--- a/llvm/lib/CodeGen/RegAllocBase.cpp
+++ b/llvm/lib/CodeGen/RegAllocBase.cpp
@@ -179,6 +179,7 @@ void RegAllocBase::cleanupFailedVRegs() {
       LiveIntervalUnion::Query &Q = Matrix->query(FailedInterval, *Units);
       for (const LiveInterval *InterferingReg : Q.interferingVRegs())
         JunkRegs.insert(InterferingReg->reg());
+      LIS->removeRegUnit(*Units);
     }
   }
 



More information about the llvm-commits mailing list