[llvm] RegAllocFast: Fix verifier errors after assigning to reserved registers (PR #128155)
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Fri Feb 21 01:48:23 PST 2025
https://github.com/arsenm created https://github.com/llvm/llvm-project/pull/128155
None
>From be2182653eb5cbcb412e6d7d23eb1393ab315fb5 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Thu, 12 Dec 2024 10:53:41 +0900
Subject: [PATCH] RegAllocFast: Fix verifier errors after assigning to reserved
registers
---
llvm/lib/CodeGen/RegAllocFast.cpp | 42 +++++++++++++------
llvm/lib/CodeGen/VirtRegMap.cpp | 16 +++++--
.../AMDGPU/illegal-eviction-assert.mir | 4 +-
llvm/test/CodeGen/AMDGPU/issue48473.mir | 2 +-
...ut-of-registers-error-all-regs-reserved.ll | 8 ++--
...lloc-failure-overlapping-insert-assert.mir | 6 +--
.../remaining-virtual-register-operands.ll | 11 +++--
llvm/test/CodeGen/X86/inline-asm-assertion.ll | 3 +-
8 files changed, 55 insertions(+), 37 deletions(-)
diff --git a/llvm/lib/CodeGen/RegAllocFast.cpp b/llvm/lib/CodeGen/RegAllocFast.cpp
index 14128dafbe4ee..2cfd5bf1377eb 100644
--- a/llvm/lib/CodeGen/RegAllocFast.cpp
+++ b/llvm/lib/CodeGen/RegAllocFast.cpp
@@ -206,6 +206,7 @@ class RegAllocFastImpl {
bool Error = false; ///< Could not allocate.
explicit LiveReg(Register VirtReg) : VirtReg(VirtReg) {}
+ explicit LiveReg() {}
unsigned getSparseSetIndex() const { return VirtReg.virtRegIndex(); }
};
@@ -216,7 +217,7 @@ class RegAllocFastImpl {
LiveRegMap LiveVirtRegs;
/// Stores assigned virtual registers present in the bundle MI.
- DenseMap<Register, MCPhysReg> BundleVirtRegsMap;
+ DenseMap<Register, LiveReg> BundleVirtRegsMap;
DenseMap<unsigned, SmallVector<MachineOperand *, 2>> LiveDbgValueMap;
/// List of DBG_VALUE that we encountered without the vreg being assigned
@@ -374,7 +375,8 @@ class RegAllocFastImpl {
SmallSet<Register, 2> &PrologLiveIns) const;
void reloadAtBegin(MachineBasicBlock &MBB);
- bool setPhysReg(MachineInstr &MI, MachineOperand &MO, MCPhysReg PhysReg);
+ bool setPhysReg(MachineInstr &MI, MachineOperand &MO,
+ const LiveReg &Assignment);
Register traceCopies(Register VirtReg) const;
Register traceCopyChain(Register Reg) const;
@@ -1005,7 +1007,8 @@ void RegAllocFastImpl::allocVirtRegUndef(MachineOperand &MO) {
MO.setSubReg(0);
}
MO.setReg(PhysReg);
- MO.setIsRenamable(true);
+ if (!LRI->Error)
+ MO.setIsRenamable(true);
}
/// Variation of defineVirtReg() with special handling for livethrough regs
@@ -1109,10 +1112,10 @@ bool RegAllocFastImpl::defineVirtReg(MachineInstr &MI, unsigned OpNum,
LRI->Reloaded = false;
}
if (MI.getOpcode() == TargetOpcode::BUNDLE) {
- BundleVirtRegsMap[VirtReg] = PhysReg;
+ BundleVirtRegsMap[VirtReg] = *LRI;
}
markRegUsedInInstr(PhysReg);
- return setPhysReg(MI, MO, PhysReg);
+ return setPhysReg(MI, MO, *LRI);
}
/// Allocates a register for a VirtReg use.
@@ -1158,10 +1161,10 @@ bool RegAllocFastImpl::useVirtReg(MachineInstr &MI, MachineOperand &MO,
LRI->LastUse = &MI;
if (MI.getOpcode() == TargetOpcode::BUNDLE) {
- BundleVirtRegsMap[VirtReg] = LRI->PhysReg;
+ BundleVirtRegsMap[VirtReg] = *LRI;
}
markRegUsedInInstr(LRI->PhysReg);
- return setPhysReg(MI, MO, LRI->PhysReg);
+ return setPhysReg(MI, MO, *LRI);
}
/// Query a physical register to use as a filler in contexts where the
@@ -1215,16 +1218,30 @@ MCPhysReg RegAllocFastImpl::getErrorAssignment(const LiveReg &LR,
/// Changes operand OpNum in MI the refer the PhysReg, considering subregs.
/// \return true if MI's MachineOperands were re-arranged/invalidated.
bool RegAllocFastImpl::setPhysReg(MachineInstr &MI, MachineOperand &MO,
- MCPhysReg PhysReg) {
+ const LiveReg &Assignment) {
+ MCPhysReg PhysReg = Assignment.PhysReg;
+
+ assert(PhysReg);
+
+ if (LLVM_UNLIKELY(Assignment.Error)) {
+ if (MO.isUse())
+ MO.setIsUndef(true);
+ }
+
if (!MO.getSubReg()) {
MO.setReg(PhysReg);
- MO.setIsRenamable(true);
+ MO.setIsRenamable(!Assignment.Error);
return false;
}
+ assert(PhysReg);
+
// Handle subregister index.
MO.setReg(PhysReg ? TRI->getSubReg(PhysReg, MO.getSubReg()) : MCRegister());
- MO.setIsRenamable(true);
+
+ if (PhysReg)
+ MO.setIsRenamable(!Assignment.Error);
+
// Note: We leave the subreg number around a little longer in case of defs.
// This is so that the register freeing logic in allocateInstruction can still
// recognize this as subregister defs. The code there will clear the number.
@@ -1706,7 +1723,7 @@ void RegAllocFastImpl::handleDebugValue(MachineInstr &MI) {
if (LRI != LiveVirtRegs.end() && LRI->PhysReg) {
// Update every use of Reg within MI.
for (auto &RegMO : DbgOps)
- setPhysReg(MI, *RegMO, LRI->PhysReg);
+ setPhysReg(MI, *RegMO, *LRI);
} else {
DanglingDbgValues[Reg].push_back(&MI);
}
@@ -1729,8 +1746,7 @@ void RegAllocFastImpl::handleBundle(MachineInstr &MI) {
if (!Reg.isVirtual() || !shouldAllocateRegister(Reg))
continue;
- DenseMap<Register, MCPhysReg>::iterator DI;
- DI = BundleVirtRegsMap.find(Reg);
+ DenseMap<Register, LiveReg>::iterator DI = BundleVirtRegsMap.find(Reg);
assert(DI != BundleVirtRegsMap.end() && "Unassigned virtual register");
setPhysReg(MI, MO, DI->second);
diff --git a/llvm/lib/CodeGen/VirtRegMap.cpp b/llvm/lib/CodeGen/VirtRegMap.cpp
index b3a7acc15b3dc..4e4c89eba7aa9 100644
--- a/llvm/lib/CodeGen/VirtRegMap.cpp
+++ b/llvm/lib/CodeGen/VirtRegMap.cpp
@@ -598,6 +598,9 @@ void VirtRegRewriter::rewrite() {
SmallVector<Register, 8> SuperDefs;
SmallVector<Register, 8> SuperKills;
+ const bool IsValidAlloc = !MF->getProperties().hasProperty(
+ MachineFunctionProperties::Property::FailedRegAlloc);
+
for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end();
MBBI != MBBE; ++MBBI) {
LLVM_DEBUG(MBBI->print(dbgs(), Indexes));
@@ -617,9 +620,7 @@ void VirtRegRewriter::rewrite() {
assert(Register(PhysReg).isPhysical());
RewriteRegs.insert(PhysReg);
- assert((!MRI->isReserved(PhysReg) ||
- MF->getProperties().hasProperty(
- MachineFunctionProperties::Property::FailedRegAlloc)) &&
+ assert((!MRI->isReserved(PhysReg) || !IsValidAlloc) &&
"Reserved register assignment");
// Preserve semantics of sub-register operands.
@@ -695,7 +696,14 @@ void VirtRegRewriter::rewrite() {
// Rewrite. Note we could have used MachineOperand::substPhysReg(), but
// we need the inlining here.
MO.setReg(PhysReg);
- MO.setIsRenamable(true);
+
+ // Defend against generating invalid flags in allocation failure
+ // scenarios. We have have assigned a register which was undefined, or a
+ // reserved register which cannot be renamable.
+ if (LLVM_LIKELY(IsValidAlloc))
+ MO.setIsRenamable(true);
+ else if (MO.isUse())
+ MO.setIsUndef(true);
}
// Add any missing super-register kills after rewriting the whole
diff --git a/llvm/test/CodeGen/AMDGPU/illegal-eviction-assert.mir b/llvm/test/CodeGen/AMDGPU/illegal-eviction-assert.mir
index 99c27fa0bc95c..84ade2687f4ba 100644
--- a/llvm/test/CodeGen/AMDGPU/illegal-eviction-assert.mir
+++ b/llvm/test/CodeGen/AMDGPU/illegal-eviction-assert.mir
@@ -17,8 +17,8 @@
...
-# CHECK: S_NOP 0, implicit-def renamable $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19, implicit-def renamable $vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27, implicit-def renamable $vgpr0_vgpr1_vgpr2_vgpr3, implicit-def renamable $vgpr28_vgpr29_vgpr30_vgpr31, implicit-def renamable $vgpr0_vgpr1_vgpr2_vgpr3
-# CHECK: S_NOP 0, implicit killed renamable $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19, implicit killed renamable $vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27, implicit killed renamable $vgpr0_vgpr1_vgpr2_vgpr3, implicit killed renamable $vgpr28_vgpr29_vgpr30_vgpr31, implicit killed renamable $vgpr0_vgpr1_vgpr2_vgpr3
+# CHECK: S_NOP 0, implicit-def $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19, implicit-def $vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit-def $vgpr28_vgpr29_vgpr30_vgpr31, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3
+# CHECK: S_NOP 0, implicit killed undef $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19, implicit killed undef $vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27, implicit killed undef $vgpr0_vgpr1_vgpr2_vgpr3, implicit killed undef $vgpr28_vgpr29_vgpr30_vgpr31, implicit killed undef $vgpr0_vgpr1_vgpr2_vgpr3
---
name: foo
diff --git a/llvm/test/CodeGen/AMDGPU/issue48473.mir b/llvm/test/CodeGen/AMDGPU/issue48473.mir
index e272bd3480383..ada15be594891 100644
--- a/llvm/test/CodeGen/AMDGPU/issue48473.mir
+++ b/llvm/test/CodeGen/AMDGPU/issue48473.mir
@@ -43,7 +43,7 @@
# %25 to $sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67
# CHECK-LABEL: name: issue48473
-# CHECK: S_NOP 0, implicit killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit killed renamable $sgpr12_sgpr13_sgpr14_sgpr15, implicit killed renamable $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23, implicit killed renamable $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31, implicit killed renamable $sgpr84_sgpr85_sgpr86_sgpr87, implicit killed renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43, implicit killed renamable $sgpr4_sgpr5_sgpr6_sgpr7, implicit killed renamable $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51, implicit killed renamable $sgpr88_sgpr89_sgpr90_sgpr91, implicit killed renamable $sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83, implicit killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit killed renamable $sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59, implicit killed renamable $sgpr92_sgpr93_sgpr94_sgpr95, implicit killed renamable $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75, implicit renamable $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75, implicit killed renamable $sgpr96_sgpr97_sgpr98_sgpr99, implicit killed renamable $sgpr8_sgpr9_sgpr10_sgpr11, implicit killed renamable $sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67
+# CHECK: S_NOP 0, implicit killed undef $sgpr0_sgpr1_sgpr2_sgpr3, implicit killed undef $sgpr12_sgpr13_sgpr14_sgpr15, implicit killed undef $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23, implicit killed undef $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31, implicit killed undef $sgpr84_sgpr85_sgpr86_sgpr87, implicit killed undef $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43, implicit killed undef $sgpr4_sgpr5_sgpr6_sgpr7, implicit killed undef $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51, implicit killed undef $sgpr88_sgpr89_sgpr90_sgpr91, implicit killed undef $sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83, implicit killed undef $sgpr0_sgpr1_sgpr2_sgpr3, implicit killed undef $sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59, implicit killed undef $sgpr92_sgpr93_sgpr94_sgpr95, implicit killed undef $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75, implicit undef $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75, implicit killed undef $sgpr96_sgpr97_sgpr98_sgpr99, implicit killed undef $sgpr8_sgpr9_sgpr10_sgpr11, implicit killed undef $sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67
---
name: issue48473
diff --git a/llvm/test/CodeGen/AMDGPU/ran-out-of-registers-error-all-regs-reserved.ll b/llvm/test/CodeGen/AMDGPU/ran-out-of-registers-error-all-regs-reserved.ll
index 388a8e804a889..c5a05e63ba2d1 100644
--- a/llvm/test/CodeGen/AMDGPU/ran-out-of-registers-error-all-regs-reserved.ll
+++ b/llvm/test/CodeGen/AMDGPU/ran-out-of-registers-error-all-regs-reserved.ll
@@ -1,8 +1,6 @@
-; RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -vgpr-regalloc=greedy -verify-machineinstrs=0 -filetype=null %s 2>&1 | FileCheck -implicit-check-not=error %s
-; RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -vgpr-regalloc=basic -verify-machineinstrs=0 -filetype=null %s 2>&1 | FileCheck -implicit-check-not=error %s
-; RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -vgpr-regalloc=fast -verify-machineinstrs=0 -filetype=null %s 2>&1 | FileCheck -implicit-check-not=error %s
-
-; FIXME: Should pass verifier after failure.
+; RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -vgpr-regalloc=greedy -verify-machineinstrs -filetype=null %s 2>&1 | FileCheck -implicit-check-not=error %s
+; RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -vgpr-regalloc=basic -verify-machineinstrs -filetype=null %s 2>&1 | FileCheck -implicit-check-not=error %s
+; RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -vgpr-regalloc=fast -verify-machineinstrs -filetype=null %s 2>&1 | FileCheck -implicit-check-not=error %s
declare <32 x i32> @llvm.amdgcn.mfma.i32.32x32x4i8(i32, i32, <32 x i32>, i32 immarg, i32 immarg, i32 immarg)
diff --git a/llvm/test/CodeGen/AMDGPU/regalloc-failure-overlapping-insert-assert.mir b/llvm/test/CodeGen/AMDGPU/regalloc-failure-overlapping-insert-assert.mir
index fe01728c00563..c9d0cf3893a2b 100644
--- a/llvm/test/CodeGen/AMDGPU/regalloc-failure-overlapping-insert-assert.mir
+++ b/llvm/test/CodeGen/AMDGPU/regalloc-failure-overlapping-insert-assert.mir
@@ -1,10 +1,8 @@
-# RUN: not llc -mtriple=amdgcn -mcpu=gfx908 -verify-machineinstrs=0 -start-before=greedy,1 -stop-after=virtregrewriter,2 %s -o /dev/null 2>&1 | FileCheck -check-prefix=ERR %s
-# RUN: not --crash llc -mtriple=amdgcn -mcpu=gfx908 -verify-machineinstrs -start-before=greedy,1 -stop-after=virtregrewriter,2 %s -o /dev/null 2>&1 | FileCheck -check-prefixes=ERR,VERIFIER %s
+# RUN: not llc -mtriple=amdgcn -mcpu=gfx908 -verify-machineinstrs -start-before=greedy,1 -stop-after=virtregrewriter,2 %s -o /dev/null 2>&1 | FileCheck -check-prefix=ERR %s
-# FIXME: We should not produce a verifier error after erroring
+# Make sure there's no machine verifier error after failure.
# ERR: error: inline assembly requires more registers than available
-# VERIFIER: *** Bad machine code: Using an undefined physical register ***
# This testcase cannot be compiled with the enforced register
# budget. Previously, tryLastChanceRecoloring would assert here. It
diff --git a/llvm/test/CodeGen/AMDGPU/remaining-virtual-register-operands.ll b/llvm/test/CodeGen/AMDGPU/remaining-virtual-register-operands.ll
index 8e3054cceb85b..a9654be075692 100644
--- a/llvm/test/CodeGen/AMDGPU/remaining-virtual-register-operands.ll
+++ b/llvm/test/CodeGen/AMDGPU/remaining-virtual-register-operands.ll
@@ -1,17 +1,16 @@
-; RUN: not --crash llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -verify-machineinstrs < %s 2>&1 | FileCheck %s
+; RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -verify-machineinstrs < %s 2>&1 | FileCheck %s
; This testcase fails register allocation at the same time it performs
; virtual register splitting (by introducing VGPR to AGPR copies). We
; still need to enqueue and allocate the newly split vregs after the
; failure.
-; The machine verifier complains about usage of register
-; which is marked as killed in previous instruction.
-; This happens due to when register allocator is out of registers
-; it takes the first avialable register.
+; The machine verifier should not complain about usage of register
+; which is marked as killed in previous instruction. This happens due
+; to when register allocator is out of registers it takes the first
+; avialable register.
; CHECK: error: <unknown>:0:0: ran out of registers during register allocation
-; CHECK: Bad machine code: Using an undefined physical register
define amdgpu_kernel void @alloc_failure_with_split_vregs(float %v0, float %v1) #0 {
%agpr0 = call float asm sideeffect "; def $0", "=${a0}"()
%agpr.vec = insertelement <16 x float> undef, float %agpr0, i32 0
diff --git a/llvm/test/CodeGen/X86/inline-asm-assertion.ll b/llvm/test/CodeGen/X86/inline-asm-assertion.ll
index d5507a1c44170..3b22abf8b691d 100644
--- a/llvm/test/CodeGen/X86/inline-asm-assertion.ll
+++ b/llvm/test/CodeGen/X86/inline-asm-assertion.ll
@@ -1,8 +1,7 @@
; RUN: not llc -verify-machineinstrs -O0 < %s 2>&1 | FileCheck %s
-; RUN: not --crash llc -verify-machineinstrs -O2 < %s 2>&1 | FileCheck %s --check-prefix=CHECK-O2
+; RUN: not llc -verify-machineinstrs -O2 < %s 2>&1 | FileCheck %s
; CHECK: error: inline assembly requires more registers than available
; CHECK: .size main, .Lfunc_end0-main
-; CHECK-O2: error: inline assembly requires more registers than available
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
More information about the llvm-commits
mailing list