[llvm] 34167f9 - RegAlloc: Fix verifier error after failed allocation (#119690)

via llvm-commits llvm-commits at lists.llvm.org
Fri Feb 21 07:11:55 PST 2025


Author: Matt Arsenault
Date: 2025-02-21T22:11:51+07:00
New Revision: 34167f99668ce4d4d6a1fb88453a8d5b56d16ed5

URL: https://github.com/llvm/llvm-project/commit/34167f99668ce4d4d6a1fb88453a8d5b56d16ed5
DIFF: https://github.com/llvm/llvm-project/commit/34167f99668ce4d4d6a1fb88453a8d5b56d16ed5.diff

LOG: RegAlloc: Fix verifier error after failed allocation (#119690)

In some cases after reporting an allocation failure, this would fail
the verifier. It picks the first allocatable register and assigns it,
but didn't update the liveness appropriately. When VirtRegRewriter
relied on the liveness to set kill flags, it would incorrectly add
kill flags if there was another overlapping kill of the virtual
register.

We can't properly assign the register to an overlapping range, so
break the liveness of the failing register (and any other interfering
registers) instead. Give the virtual register dummy liveness by
effectively deleting all the uses by setting them to undef.

The edge case not tested here which I'm worried about is if the read
of the register is a def of a subregister. I've been unable to come up
with a test where this occurs.

https://reviews.llvm.org/D122616

Added: 
    llvm/test/CodeGen/AMDGPU/register-killed-error-after-alloc-failure0.mir
    llvm/test/CodeGen/AMDGPU/register-killed-error-after-alloc-failure1.ll

Modified: 
    llvm/lib/CodeGen/RegAllocBase.cpp
    llvm/lib/CodeGen/RegAllocBase.h
    llvm/lib/CodeGen/RegAllocBasic.cpp
    llvm/lib/CodeGen/RegAllocGreedy.cpp
    llvm/test/CodeGen/AMDGPU/illegal-eviction-assert.mir
    llvm/test/CodeGen/AMDGPU/inflated-reg-class-snippet-copy-use-after-free.mir
    llvm/test/CodeGen/AMDGPU/issue48473.mir
    llvm/test/CodeGen/AMDGPU/ran-out-of-registers-error-all-regs-reserved.ll
    llvm/test/CodeGen/AMDGPU/regalloc-failure-overlapping-insert-assert.mir
    llvm/test/CodeGen/AMDGPU/remaining-virtual-register-operands.ll
    llvm/test/CodeGen/X86/inline-asm-assertion.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/RegAllocBase.cpp b/llvm/lib/CodeGen/RegAllocBase.cpp
index 50addcbcca065..123d5cdc5bcc1 100644
--- a/llvm/lib/CodeGen/RegAllocBase.cpp
+++ b/llvm/lib/CodeGen/RegAllocBase.cpp
@@ -65,6 +65,7 @@ void RegAllocBase::init(VirtRegMap &vrm, LiveIntervals &lis,
   Matrix = &mat;
   MRI->freezeReservedRegs();
   RegClassInfo.runOnMachineFunction(vrm.getMachineFunction());
+  FailedVRegs.clear();
 }
 
 // Visit all the live registers. If they are already assigned to a physical
@@ -128,6 +129,7 @@ void RegAllocBase::allocatePhysRegs() {
 
       // Keep going after reporting the error.
       VRM->assignVirt2Phys(VirtReg->reg(), AvailablePhysReg);
+      FailedVRegs.insert(VirtReg->reg());
     } else if (AvailablePhysReg)
       Matrix->assign(*VirtReg, AvailablePhysReg);
 
@@ -161,6 +163,40 @@ void RegAllocBase::postOptimization() {
   DeadRemats.clear();
 }
 
+void RegAllocBase::cleanupFailedVRegs() {
+  SmallSet<Register, 8> JunkRegs;
+
+  for (Register FailedReg : FailedVRegs) {
+    JunkRegs.insert(FailedReg);
+
+    MCRegister PhysReg = VRM->getPhys(FailedReg);
+    LiveInterval &FailedInterval = LIS->getInterval(FailedReg);
+
+    // The liveness information for the failed register and anything interfering
+    // with the physical register we arbitrarily chose is junk and needs to be
+    // deleted.
+    for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
+      LiveIntervalUnion::Query &Q = Matrix->query(FailedInterval, *Units);
+      for (const LiveInterval *InterferingReg : Q.interferingVRegs())
+        JunkRegs.insert(InterferingReg->reg());
+    }
+  }
+
+  // TODO: Probably need to set undef on any physreg uses not associated with
+  // a virtual register.
+  for (Register JunkReg : JunkRegs) {
+    // We still should produce valid IR. Kill all the uses and reduce the live
+    // ranges so that we don't think it's possible to introduce kill flags
+    // later which will fail the verifier.
+    for (MachineOperand &MO : MRI->reg_operands(JunkReg)) {
+      if (MO.readsReg())
+        MO.setIsUndef(true);
+    }
+
+    LIS->shrinkToUses(&LIS->getInterval(JunkReg));
+  }
+}
+
 void RegAllocBase::enqueue(const LiveInterval *LI) {
   const Register Reg = LI->reg();
 

diff  --git a/llvm/lib/CodeGen/RegAllocBase.h b/llvm/lib/CodeGen/RegAllocBase.h
index 5bd52da61f2dc..1fdbab694bb0e 100644
--- a/llvm/lib/CodeGen/RegAllocBase.h
+++ b/llvm/lib/CodeGen/RegAllocBase.h
@@ -37,6 +37,7 @@
 #define LLVM_LIB_CODEGEN_REGALLOCBASE_H
 
 #include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/RegAllocCommon.h"
 #include "llvm/CodeGen/RegisterClassInfo.h"
@@ -81,6 +82,7 @@ class RegAllocBase {
   /// always available for the remat of all the siblings of the original reg.
   SmallPtrSet<MachineInstr *, 32> DeadRemats;
 
+  SmallSet<Register, 2> FailedVRegs;
   RegAllocBase(const RegAllocFilterFunc F = nullptr)
       : shouldAllocateRegisterImpl(F) {}
 
@@ -104,6 +106,10 @@ class RegAllocBase {
   // rematerialization.
   virtual void postOptimization();
 
+  /// Perform cleanups on registers that failed to allocate. This hacks on the
+  /// liveness in order to avoid spurious verifier errors in later passes.
+  void cleanupFailedVRegs();
+
   // Get a temporary reference to a Spiller instance.
   virtual Spiller &spiller() = 0;
 

diff  --git a/llvm/lib/CodeGen/RegAllocBasic.cpp b/llvm/lib/CodeGen/RegAllocBasic.cpp
index 51e047b2fa3f0..d240bf916ac05 100644
--- a/llvm/lib/CodeGen/RegAllocBasic.cpp
+++ b/llvm/lib/CodeGen/RegAllocBasic.cpp
@@ -329,6 +329,7 @@ bool RABasic::runOnMachineFunction(MachineFunction &mf) {
 
   allocatePhysRegs();
   postOptimization();
+  cleanupFailedVRegs();
 
   // Diagnostic output before rewriting
   LLVM_DEBUG(dbgs() << "Post alloc VirtRegMap:\n" << *VRM << "\n");

diff  --git a/llvm/lib/CodeGen/RegAllocGreedy.cpp b/llvm/lib/CodeGen/RegAllocGreedy.cpp
index bd81d630f9d1f..8978439bf6ba3 100644
--- a/llvm/lib/CodeGen/RegAllocGreedy.cpp
+++ b/llvm/lib/CodeGen/RegAllocGreedy.cpp
@@ -2795,6 +2795,7 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) {
   if (VerifyEnabled)
     MF->verify(this, "Before post optimization", &errs());
   postOptimization();
+  cleanupFailedVRegs();
   reportStats();
 
   releaseMemory();

diff  --git a/llvm/test/CodeGen/AMDGPU/illegal-eviction-assert.mir b/llvm/test/CodeGen/AMDGPU/illegal-eviction-assert.mir
index 99c27fa0bc95c..443d1e9d1bdf0 100644
--- a/llvm/test/CodeGen/AMDGPU/illegal-eviction-assert.mir
+++ b/llvm/test/CodeGen/AMDGPU/illegal-eviction-assert.mir
@@ -17,8 +17,8 @@
 
 ...
 
-# CHECK: S_NOP 0, implicit-def renamable $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19, implicit-def renamable $vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27, implicit-def renamable $vgpr0_vgpr1_vgpr2_vgpr3, implicit-def renamable $vgpr28_vgpr29_vgpr30_vgpr31, implicit-def renamable $vgpr0_vgpr1_vgpr2_vgpr3
-# CHECK: S_NOP 0, implicit killed renamable $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19, implicit killed renamable $vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27, implicit killed renamable $vgpr0_vgpr1_vgpr2_vgpr3, implicit killed renamable $vgpr28_vgpr29_vgpr30_vgpr31, implicit killed renamable $vgpr0_vgpr1_vgpr2_vgpr3
+# CHECK: S_NOP 0, implicit-def renamable $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19, implicit-def renamable $vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27, implicit-def dead renamable $vgpr0_vgpr1_vgpr2_vgpr3, implicit-def renamable $vgpr28_vgpr29_vgpr30_vgpr31, implicit-def dead renamable $vgpr0_vgpr1_vgpr2_vgpr3
+# CHECK: S_NOP 0, implicit killed renamable $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19, implicit killed renamable $vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27, implicit undef renamable $vgpr0_vgpr1_vgpr2_vgpr3, implicit killed renamable $vgpr28_vgpr29_vgpr30_vgpr31, implicit undef renamable $vgpr0_vgpr1_vgpr2_vgpr3
 
 ---
 name:            foo

diff  --git a/llvm/test/CodeGen/AMDGPU/inflated-reg-class-snippet-copy-use-after-free.mir b/llvm/test/CodeGen/AMDGPU/inflated-reg-class-snippet-copy-use-after-free.mir
index 503f27edf70df..6d63a4a1cc0ab 100644
--- a/llvm/test/CodeGen/AMDGPU/inflated-reg-class-snippet-copy-use-after-free.mir
+++ b/llvm/test/CodeGen/AMDGPU/inflated-reg-class-snippet-copy-use-after-free.mir
@@ -27,10 +27,10 @@
 # CHECK-LABEL: name: inflated_reg_class_copy_use_after_free
 # CHECK: S_NOP 0, implicit-def [[ORIG_REG:%[0-9]+]].sub0_sub1_sub2_sub3
 # CHECK-NEXT: SI_SPILL_AV512_SAVE [[ORIG_REG]], %stack.0, $sgpr32, 0, implicit $exec :: (store (s512) into %stack.0, align 4, addrspace 5)
-# CHECK-NEXT: [[RESTORE0:%[0-9]+]]:vreg_512_align2 = SI_SPILL_V512_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s512) from %stack.0, align 4, addrspace 5)
-# CHECK-NEXT: early-clobber [[MFMA0:%[0-9]+]]:vreg_512_align2 = V_MFMA_F32_16X16X1F32_vgprcd_e64 undef %3:vgpr_32, undef %3:vgpr_32, [[RESTORE0]], 0, 0, 0, implicit $mode, implicit $exec, implicit $mode, implicit $exec
-# CHECK-NEXT: undef [[SPLIT0:%[0-9]+]].sub2_sub3:av_512_align2 = COPY [[MFMA0]].sub2_sub3 {
-# CHECK-NEXT: internal [[SPLIT0]].sub0:av_512_align2 = COPY [[MFMA0]].sub0
+# CHECK-NEXT: dead [[RESTORE0:%[0-9]+]]:vreg_512_align2 = SI_SPILL_V512_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s512) from %stack.0, align 4, addrspace 5)
+# CHECK-NEXT: dead early-clobber [[MFMA0:%[0-9]+]]:vreg_512_align2 = V_MFMA_F32_16X16X1F32_vgprcd_e64 undef %3:vgpr_32, undef %3:vgpr_32, undef [[RESTORE0]], 0, 0, 0, implicit $mode, implicit $exec, implicit $mode, implicit $exec
+# CHECK-NEXT: undef [[SPLIT0:%[0-9]+]].sub2_sub3:av_512_align2 = COPY undef [[MFMA0]].sub2_sub3 {
+# CHECK-NEXT: internal [[SPLIT0]].sub0:av_512_align2 = COPY undef [[MFMA0]].sub0
 # CHECK-NEXT: }
 # CHECK-NEXT: undef [[SPLIT1:%[0-9]+]].sub2_sub3:av_512_align2 = COPY [[SPLIT0]].sub2_sub3 {
 # CHECK-NEXT: internal [[SPLIT1]].sub0:av_512_align2 = COPY [[SPLIT0]].sub0
@@ -118,10 +118,10 @@ body:             |
 # CHECK-LABEL: name: inflated_reg_class_copy_use_after_free_lane_subset
 # CHECK: S_NOP 0, implicit-def [[ORIG_REG:%[0-9]+]].sub0_sub1_sub2_sub3
 # CHECK-NEXT: SI_SPILL_AV512_SAVE [[ORIG_REG]], %stack.0, $sgpr32, 0, implicit $exec :: (store (s512) into %stack.0, align 4, addrspace 5)
-# CHECK-NEXT: [[RESTORE_0:%[0-9]+]]:av_512_align2 = SI_SPILL_AV512_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s512) from %stack.0, align 4, addrspace 5)
-# CHECK-NEXT: S_NOP 0, implicit-def early-clobber [[REG1:%[0-9]+]], implicit [[RESTORE_0]].sub0_sub1_sub2_sub3, implicit [[RESTORE_0]].sub4_sub5_sub6_sub7
-# CHECK-NEXT: undef [[SPLIT0:%[0-9]+]].sub2_sub3:av_512_align2 = COPY [[REG1]].sub2_sub3 {
-# CHECK-NEXT: internal [[SPLIT0]].sub0:av_512_align2 = COPY [[REG1]].sub0
+# CHECK-NEXT: dead [[RESTORE_0:%[0-9]+]]:av_512_align2 = SI_SPILL_AV512_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s512) from %stack.0, align 4, addrspace 5)
+# CHECK-NEXT: S_NOP 0, implicit-def dead early-clobber [[REG1:%[0-9]+]], implicit undef [[RESTORE_0]].sub0_sub1_sub2_sub3, implicit undef [[RESTORE_0]].sub4_sub5_sub6_sub7
+# CHECK-NEXT: undef [[SPLIT0:%[0-9]+]].sub2_sub3:av_512_align2 = COPY undef [[REG1]].sub2_sub3 {
+# CHECK-NEXT: internal [[SPLIT0]].sub0:av_512_align2 = COPY undef [[REG1]].sub0
 # CHECK-NEXT: }
 # CHECK-NEXT: undef [[SPLIT1:%[0-9]+]].sub2_sub3:av_512_align2 = COPY [[SPLIT0]].sub2_sub3 {
 # CHECK-NEXT: internal [[SPLIT1]].sub0:av_512_align2 = COPY [[SPLIT0]].sub0

diff  --git a/llvm/test/CodeGen/AMDGPU/issue48473.mir b/llvm/test/CodeGen/AMDGPU/issue48473.mir
index e272bd3480383..ba03855d01cd9 100644
--- a/llvm/test/CodeGen/AMDGPU/issue48473.mir
+++ b/llvm/test/CodeGen/AMDGPU/issue48473.mir
@@ -43,7 +43,8 @@
 # %25 to $sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67
 
 # CHECK-LABEL: name: issue48473
-# CHECK: S_NOP 0, implicit killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit killed renamable $sgpr12_sgpr13_sgpr14_sgpr15, implicit killed renamable $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23, implicit killed renamable $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31, implicit killed renamable $sgpr84_sgpr85_sgpr86_sgpr87, implicit killed renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43, implicit killed renamable $sgpr4_sgpr5_sgpr6_sgpr7, implicit killed renamable $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51, implicit killed renamable $sgpr88_sgpr89_sgpr90_sgpr91, implicit killed renamable $sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83, implicit killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit killed renamable $sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59, implicit killed renamable $sgpr92_sgpr93_sgpr94_sgpr95, implicit killed renamable $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75, implicit renamable $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75, implicit killed renamable $sgpr96_sgpr97_sgpr98_sgpr99, implicit killed renamable $sgpr8_sgpr9_sgpr10_sgpr11, implicit killed renamable $sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67
+# CHECK: S_NOP 0, implicit undef renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit killed renamable $sgpr12_sgpr13_sgpr14_sgpr15, implicit killed renamable $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23, implicit killed renamable $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31, implicit killed renamable $sgpr84_sgpr85_sgpr86_sgpr87, implicit killed renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43, implicit killed renamable $sgpr4_sgpr5_sgpr6_sgpr7, implicit killed renamable $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51, implicit killed renamable $sgpr88_sgpr89_sgpr90_sgpr91, implicit killed renamable $sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83, implicit undef renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit killed renamable $sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59, implicit killed renamable $sgpr92_sgpr93_sgpr94_sgpr95, implicit killed renamable $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75, implicit renamable $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75, implicit killed renamable $sgpr96_sgpr97_sgpr98_sgpr99, implicit killed renamable $sgpr8_sgpr9_sgpr10_sgpr11, implicit killed renamable $sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67
+
 
 ---
 name:            issue48473

diff  --git a/llvm/test/CodeGen/AMDGPU/ran-out-of-registers-error-all-regs-reserved.ll b/llvm/test/CodeGen/AMDGPU/ran-out-of-registers-error-all-regs-reserved.ll
index 388a8e804a889..06c862c7fb481 100644
--- a/llvm/test/CodeGen/AMDGPU/ran-out-of-registers-error-all-regs-reserved.ll
+++ b/llvm/test/CodeGen/AMDGPU/ran-out-of-registers-error-all-regs-reserved.ll
@@ -1,8 +1,6 @@
-; RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -vgpr-regalloc=greedy -verify-machineinstrs=0 -filetype=null %s 2>&1 | FileCheck -implicit-check-not=error %s
-; RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -vgpr-regalloc=basic -verify-machineinstrs=0 -filetype=null %s 2>&1 | FileCheck -implicit-check-not=error %s
-; RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -vgpr-regalloc=fast -verify-machineinstrs=0 -filetype=null %s 2>&1 | FileCheck -implicit-check-not=error %s
-
-; FIXME: Should pass verifier after failure.
+; RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -vgpr-regalloc=greedy -filetype=null %s 2>&1 | FileCheck -implicit-check-not=error %s
+; RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -vgpr-regalloc=basic -filetype=null %s 2>&1 | FileCheck -implicit-check-not=error %s
+; RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -vgpr-regalloc=fast -filetype=null %s 2>&1 | FileCheck -implicit-check-not=error %s
 
 declare <32 x i32> @llvm.amdgcn.mfma.i32.32x32x4i8(i32, i32, <32 x i32>, i32 immarg, i32 immarg, i32 immarg)
 

diff  --git a/llvm/test/CodeGen/AMDGPU/regalloc-failure-overlapping-insert-assert.mir b/llvm/test/CodeGen/AMDGPU/regalloc-failure-overlapping-insert-assert.mir
index fe01728c00563..01139b6aeef77 100644
--- a/llvm/test/CodeGen/AMDGPU/regalloc-failure-overlapping-insert-assert.mir
+++ b/llvm/test/CodeGen/AMDGPU/regalloc-failure-overlapping-insert-assert.mir
@@ -1,10 +1,7 @@
-# RUN: not llc -mtriple=amdgcn -mcpu=gfx908 -verify-machineinstrs=0 -start-before=greedy,1 -stop-after=virtregrewriter,2 %s -o /dev/null 2>&1 | FileCheck -check-prefix=ERR %s
-# RUN: not --crash llc -mtriple=amdgcn -mcpu=gfx908 -verify-machineinstrs -start-before=greedy,1 -stop-after=virtregrewriter,2 %s -o /dev/null 2>&1 | FileCheck -check-prefixes=ERR,VERIFIER %s
-
-# FIXME: We should not produce a verifier error after erroring
+# RUN: not llc -mtriple=amdgcn -mcpu=gfx908 -verify-machineinstrs -start-before=greedy,1 -stop-after=virtregrewriter,2 %s -filetype=null 2>&1 | FileCheck -check-prefix=ERR %s
 
 # ERR: error: inline assembly requires more registers than available
-# VERIFIER: *** Bad machine code: Using an undefined physical register ***
+# ERR-NOT: Bad machine code
 
 # This testcase cannot be compiled with the enforced register
 # budget. Previously, tryLastChanceRecoloring would assert here. It

diff  --git a/llvm/test/CodeGen/AMDGPU/register-killed-error-after-alloc-failure0.mir b/llvm/test/CodeGen/AMDGPU/register-killed-error-after-alloc-failure0.mir
new file mode 100644
index 0000000000000..b7a8624f2dc9d
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/register-killed-error-after-alloc-failure0.mir
@@ -0,0 +1,59 @@
+# RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -vgpr-regalloc=basic -sgpr-regalloc=basic -start-before=regallocbasic,0 -stop-after=virtregrewriter,2 -verify-regalloc -o - %s 2> %t.basic.err | FileCheck -check-prefix=BASIC %s
+# RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -start-before=greedy,0 -stop-after=virtregrewriter,2 -verify-regalloc -o - %s 2> %t.greedy.err | FileCheck -check-prefix=GREEDY %s
+
+# RUN: FileCheck -check-prefix=ERR -implicit-check-not=error %s < %t.basic.err
+# RUN: FileCheck -check-prefix=ERR -implicit-check-not=error %s < %t.greedy.err
+
+# This testcase must fail register allocation. It should also not
+# produce a verifier error after doing so. Previously, it would not
+# properly update the liveness for the dummy selected register. As a
+# result, VirtRegRewriter would incorrectly add kill flags which
+# combined with other uses of the physical register produced a
+# verifier error.
+
+# ERR: error: <unknown>:0:0: ran out of registers during register allocation
+
+# GREEDY: SI_SPILL_V256_SAVE undef $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+# GREEDY-NEXT: SI_SPILL_V512_SAVE undef $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19
+# GREEDY-NEXT: SI_SPILL_V128_SAVE undef $vgpr0_vgpr1_vgpr2_vgpr3
+
+# GREEDY: dead renamable $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19 = SI_SPILL_V512_RESTORE
+# GREEDY: dead renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = SI_SPILL_V256_RESTORE
+# GREEDY: S_NOP 0, implicit undef renamable $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19, implicit undef renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, implicit undef renamable $vgpr0_vgpr1_vgpr2_vgpr3
+# GREEDY: S_NOP 0, implicit killed renamable $vgpr20_vgpr21
+
+
+# BASIC: SI_SPILL_V128_SAVE undef $vgpr0_vgpr1_vgpr2_vgpr3
+# BASIC: SI_SPILL_V256_SAVE killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
+# BASIC: SI_SPILL_V512_SAVE undef $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
+# BASIC: SI_SPILL_V64_SAVE killed $vgpr0_vgpr1, %stack.{{[0-9]+}}, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.{{[0-9]+}}, align 4, addrspace 5)
+# BASIC: dead renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = SI_SPILL_V512_RESTORE
+# BASIC: renamable $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23 = SI_SPILL_V256_RESTORE
+# BASIC: dead renamable $vgpr0_vgpr1_vgpr2_vgpr3 = SI_SPILL_V128_RESTORE
+# BASIC: S_NOP 0, implicit undef renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit killed renamable $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, implicit undef renamable $vgpr0_vgpr1_vgpr2_vgpr3
+# BASIC: renamable $vgpr0_vgpr1 = SI_SPILL_V64_RESTORE
+
+--- |
+  define void @killed_reg_after_regalloc_failure() #0 {
+    ret void
+  }
+
+  attributes #0 = { "amdgpu-waves-per-eu"="10,10" }
+
+...
+---
+name:            killed_reg_after_regalloc_failure
+tracksRegLiveness: true
+machineFunctionInfo:
+  scratchRSrcReg:  '$sgpr0_sgpr1_sgpr2_sgpr3'
+  frameOffsetReg:  '$sgpr33'
+  stackPtrOffsetReg: '$sgpr32'
+body:             |
+  bb.0:
+    S_NOP 0, implicit-def %0:vreg_512, implicit-def %1:vreg_256, implicit-def %2:vreg_128
+    S_NOP 0, implicit-def %3:vreg_64
+    S_NOP 0, implicit %0, implicit %1, implicit %2
+    S_NOP 0, implicit %3
+    S_ENDPGM 0
+
+...

diff  --git a/llvm/test/CodeGen/AMDGPU/register-killed-error-after-alloc-failure1.ll b/llvm/test/CodeGen/AMDGPU/register-killed-error-after-alloc-failure1.ll
new file mode 100644
index 0000000000000..5e466a9470fc5
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/register-killed-error-after-alloc-failure1.ll
@@ -0,0 +1,30 @@
+; RUN: not llc -mtriple=amdgcn -mcpu=gfx908 -verify-machineinstrs -filetype=null %s 2>&1 | FileCheck -check-prefix=ERR -implicit-check-not=error %s
+
+; ERR: error: inline assembly requires more registers than available
+; ERR-NOT: ERROR
+; ERR-NOT: Bad machine code
+
+; This test requires respecting undef on the spill source operand when
+; expanding the pseudos to avoid all verifier errors
+
+%asm.output = type { <16 x i32>, <8 x i32>, <4 x i32>, <3 x i32>, <3 x i32> }
+
+define void @foo(<32 x i32> addrspace(1)* %arg) #0 {
+  %agpr0 = call i32 asm sideeffect "; def $0","=${a0}"()
+  %asm = call %asm.output asm sideeffect "; def $0 $1 $2 $3 $4","=v,=v,=v,=v,=v"()
+  %vgpr0 = extractvalue %asm.output %asm, 0
+  %vgpr1 = extractvalue %asm.output %asm, 1
+  %vgpr2 = extractvalue %asm.output %asm, 2
+  %vgpr3 = extractvalue %asm.output %asm, 3
+  %vgpr4 = extractvalue %asm.output %asm, 4
+  call void asm sideeffect "; clobber", "~{a[0:31]},~{v[0:31]}"()
+  call void asm sideeffect "; use $0","v"(<16 x i32> %vgpr0)
+  call void asm sideeffect "; use $0","v"(<8 x i32> %vgpr1)
+  call void asm sideeffect "; use $0","v"(<4 x i32> %vgpr2)
+  call void asm sideeffect "; use $0","v"(<3 x i32> %vgpr3)
+  call void asm sideeffect "; use $0","v"(<3 x i32> %vgpr4)
+  call void asm sideeffect "; use $0","{a1}"(i32 %agpr0)
+  ret void
+}
+
+attributes #0 = { "amdgpu-waves-per-eu"="8,8" }

diff  --git a/llvm/test/CodeGen/AMDGPU/remaining-virtual-register-operands.ll b/llvm/test/CodeGen/AMDGPU/remaining-virtual-register-operands.ll
index 8e3054cceb85b..d09d6608b216e 100644
--- a/llvm/test/CodeGen/AMDGPU/remaining-virtual-register-operands.ll
+++ b/llvm/test/CodeGen/AMDGPU/remaining-virtual-register-operands.ll
@@ -1,4 +1,4 @@
-; RUN: not --crash llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -verify-machineinstrs < %s 2>&1 | FileCheck %s
+; RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -verify-machineinstrs -filetype=null %s 2>&1 | FileCheck -implicit-check-not=error %s
 
 ; This testcase fails register allocation at the same time it performs
 ; virtual register splitting (by introducing VGPR to AGPR copies). We
@@ -11,7 +11,6 @@
 ; it takes the first avialable register.
 
 ; CHECK: error: <unknown>:0:0: ran out of registers during register allocation
-; CHECK: Bad machine code: Using an undefined physical register
 define amdgpu_kernel void @alloc_failure_with_split_vregs(float %v0, float %v1) #0 {
   %agpr0 = call float asm sideeffect "; def $0", "=${a0}"()
   %agpr.vec = insertelement <16 x float> undef, float %agpr0, i32 0

diff  --git a/llvm/test/CodeGen/X86/inline-asm-assertion.ll b/llvm/test/CodeGen/X86/inline-asm-assertion.ll
index d5507a1c44170..c6f4093760b97 100644
--- a/llvm/test/CodeGen/X86/inline-asm-assertion.ll
+++ b/llvm/test/CodeGen/X86/inline-asm-assertion.ll
@@ -1,5 +1,5 @@
 ; RUN: not llc -verify-machineinstrs -O0 < %s 2>&1 | FileCheck %s
-; RUN: not --crash llc -verify-machineinstrs -O2 < %s 2>&1 | FileCheck %s --check-prefix=CHECK-O2
+; RUN: not llc -verify-machineinstrs -O2 < %s 2>&1 | FileCheck %s --check-prefix=CHECK-O2
 ; CHECK: error: inline assembly requires more registers than available
 ; CHECK: .size   main, .Lfunc_end0-main
 ; CHECK-O2: error: inline assembly requires more registers than available


        


More information about the llvm-commits mailing list