[llvm] [AMDGPU] Ensure non-reserved CSR spilled regs are live-in (PR #146427)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Jul 7 06:53:58 PDT 2025
https://github.com/macurtis-amd updated https://github.com/llvm/llvm-project/pull/146427
>From edc928890db8ba41ec1a712b9e1b3300cd4337fe Mon Sep 17 00:00:00 2001
From: Matthew Curtis <macurtis at amd.com>
Date: Mon, 30 Jun 2025 16:23:55 -0500
Subject: [PATCH 1/4] [AMDGPU] Ensure non-reserved CSR spilled regs are live-in
Fixes:
```
*** Bad machine code: Using an undefined physical register ***
- function: widget
- basic block: %bb.0 bb (0x564092cbe140)
- instruction: $vgpr63 = V_ACCVGPR_READ_B32_e64 killed $agpr13, implicit $exec
- operand 1: killed $agpr13
LLVM ERROR: Found 1 machine code errors.
```
The detailed sequence of events that led to this change:
1. MachineVerifier fails because $agpr13 is not defined on line 19 below:
```
1: bb.0.bb:
2: successors: %bb.1(0x80000000); %bb.1(100.00%)
3: liveins: $agpr14, $agpr15, $sgpr12, $sgpr13, $sgpr14, \
4: $sgpr15, $sgpr30, $sgpr31, $sgpr34, $sgpr35, \
5: $sgpr36, $sgpr37, $sgpr38, $sgpr39, $sgpr48, \
6: $sgpr49, $sgpr50, $sgpr51, $sgpr52, $sgpr53, \
7: $sgpr54, $sgpr55, $sgpr64, $sgpr65, $sgpr66, \
8: $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, \
9: $sgpr80, $sgpr81, $sgpr82, $sgpr83, $sgpr84, \
10: $sgpr85, $sgpr86, $sgpr87, $sgpr96, $sgpr97, \
11: $sgpr98, $sgpr99, $vgpr0, $vgpr31, $vgpr40, $vgpr41, \
12: $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, \
13: $sgpr10_sgpr11
14: $sgpr16 = COPY $sgpr33
15: $sgpr33 = frame-setup COPY $sgpr32
16: $sgpr18_sgpr19 = S_XOR_SAVEEXEC_B64 -1, \
17: implicit-def $exec, implicit-def dead $scc, \
18: implicit $exec
19: $vgpr63 = V_ACCVGPR_READ_B32_e64 killed $agpr13, \
20: implicit $exec
21: BUFFER_STORE_DWORD_OFFSET killed $vgpr63, \
22: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr33, 0, 0, 0, \
23: implicit $exec :: (store (s32) into %stack.38, \
24: addrspace 5)
25: ...
26: $vgpr43 = IMPLICIT_DEF
27: $vgpr43 = SI_SPILL_S32_TO_VGPR $sgpr15, 0, \
28: killed $vgpr43(tied-def 0)
29: $vgpr43 = SI_SPILL_S32_TO_VGPR $sgpr14, 1, \
30: killed $vgpr43(tied-def 0)
31: $sgpr100_sgpr101 = S_OR_SAVEEXEC_B64 -1, \
32: implicit-def $exec, implicit-def dead $scc, \
33: implicit $exec
34: renamable $agpr13 = COPY killed $vgpr43, implicit $exec
```
2. That instruction is created by emitCSRSpillStores (called by
SIFrameLowering::emitPrologue) because $agpr13 is in WWMSpills.
See lines 982, 998, and 993 below.
```
977: // Spill Whole-Wave Mode VGPRs. Save only the inactive lanes of the scratch
978: // registers. However, save all lanes of callee-saved VGPRs. Due to this, we
979: // might end up flipping the EXEC bits twice.
980: Register ScratchExecCopy;
981: SmallVector<std::pair<Register, int>, 2> WWMCalleeSavedRegs, WWMScratchRegs;
982: FuncInfo->splitWWMSpillRegisters(MF, WWMCalleeSavedRegs, WWMScratchRegs);
983: if (!WWMScratchRegs.empty())
984: ScratchExecCopy =
985: buildScratchExecCopy(LiveUnits, MF, MBB, MBBI, DL,
986: /*IsProlog*/ true, /*EnableInactiveLanes*/ true);
987:
988: auto StoreWWMRegisters =
989: [&](SmallVectorImpl<std::pair<Register, int>> &WWMRegs) {
990: for (const auto &Reg : WWMRegs) {
991: Register VGPR = Reg.first;
992: int FI = Reg.second;
993: buildPrologSpill(ST, TRI, *FuncInfo, LiveUnits, MF, MBB, MBBI, DL,
994: VGPR, FI, FrameReg);
995: }
996: };
997:
998: StoreWWMRegisters(WWMScratchRegs);
```
3. $agpr13 got added to WWMSpills by SILowerWWMCopies::runOnMachineFunction
as it processed the WWM_COPY on line 11 below (corresponds to line 34
above in point #1):
```
1: %45:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr15, 0, %45:vgpr_32(tied-def 0)
2: %45:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr14, 1, %45:vgpr_32(tied-def 0)
3: %44:av_32 = WWM_COPY %45:vgpr_32
```
---
llvm/lib/Target/AMDGPU/SIFrameLowering.cpp | 7 +
.../CodeGen/AMDGPU/bug-undef-spilled-agpr.ll | 126 ++++++++++++++++++
2 files changed, 133 insertions(+)
create mode 100644 llvm/test/CodeGen/AMDGPU/bug-undef-spilled-agpr.ll
diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
index 6a3867937d57f..15ad1470036d2 100644
--- a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
@@ -973,6 +973,7 @@ void SIFrameLowering::emitCSRSpillStores(
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
const SIInstrInfo *TII = ST.getInstrInfo();
const SIRegisterInfo &TRI = TII->getRegisterInfo();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
// Spill Whole-Wave Mode VGPRs. Save only the inactive lanes of the scratch
// registers. However, save all lanes of callee-saved VGPRs. Due to this, we
@@ -995,6 +996,12 @@ void SIFrameLowering::emitCSRSpillStores(
}
};
+ for (const auto &Reg : WWMScratchRegs) {
+ if (!MRI.isReserved(Reg.first)) {
+ MRI.addLiveIn(Reg.first);
+ MBB.addLiveIn(Reg.first);
+ }
+ }
StoreWWMRegisters(WWMScratchRegs);
if (!WWMCalleeSavedRegs.empty()) {
if (ScratchExecCopy) {
diff --git a/llvm/test/CodeGen/AMDGPU/bug-undef-spilled-agpr.ll b/llvm/test/CodeGen/AMDGPU/bug-undef-spilled-agpr.ll
new file mode 100644
index 0000000000000..ef2ae5ca5169b
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/bug-undef-spilled-agpr.ll
@@ -0,0 +1,126 @@
+; Just ensure that llc -O1 does not error out
+; RUN: llc -O1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -verify-machineinstrs %s -o - &>/dev/null
+
+define fastcc void @widget(i1 %arg) {
+bb:
+ br label %bb1
+
+bb1: ; preds = %bb3, %bb
+ br i1 %arg, label %bb3, label %bb2
+
+bb2: ; preds = %bb1
+ ret void
+
+bb3: ; preds = %bb1
+ %call = call fastcc i1 @baz(i1 false, float 0.000000e+00, i1 false, float 0.000000e+00, i1 false, i1 false, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, ptr addrspace(5) null, i1 false, ptr null, ptr null, ptr null, ptr null, ptr null, ptr addrspace(5) null, ptr null, ptr null, ptr null, ptr null, ptr null, ptr null, ptr null, ptr addrspace(5) null)
+ br label %bb1
+}
+
+define fastcc i1 @baz(i1 %arg, float %arg1, i1 %arg2, float %arg3, i1 %arg4, i1 %arg5, float %arg6, float %arg7, float %arg8, float %arg9, ptr addrspace(5) %arg10, i1 %arg11, ptr %arg12, ptr %arg13, ptr %arg14, ptr %arg15, ptr %arg16, ptr addrspace(5) %arg17, ptr %arg18, ptr %arg19, ptr %arg20, ptr %arg21, ptr %arg22, ptr %arg23, ptr %arg24, ptr addrspace(5) %arg25) #0 {
+bb:
+ br i1 %arg, label %bb26, label %bb27
+
+bb26: ; preds = %bb
+ ret i1 false
+
+bb27: ; preds = %bb
+ br i1 %arg, label %bb29, label %bb28
+
+bb28: ; preds = %bb27
+ unreachable
+
+bb29: ; preds = %bb49, %bb47, %bb46, %bb39, %bb36, %bb27
+ br i1 %arg4, label %bb55, label %bb30
+
+bb30: ; preds = %bb29
+ br i1 %arg5, label %bb31, label %bb32
+
+bb31: ; preds = %bb30
+ store i1 false, ptr addrspace(5) null, align 2147483648
+ br label %bb55
+
+bb32: ; preds = %bb30
+ store float %arg3, ptr addrspace(5) null, align 2147483648
+ store float %arg7, ptr addrspace(5) %arg10, align 2147483648
+ br i1 %arg2, label %bb34, label %bb33
+
+bb33: ; preds = %bb32
+ %fcmp = fcmp ogt float %arg6, 0.000000e+00
+ br i1 %fcmp, label %bb34, label %bb35
+
+bb34: ; preds = %bb33, %bb32
+ br i1 %arg11, label %bb37, label %bb36
+
+bb35: ; preds = %bb33
+ store float 0.000000e+00, ptr addrspace(5) null, align 2147483648
+ ret i1 false
+
+bb36: ; preds = %bb34
+ store i32 1, ptr addrspace(5) null, align 2147483648
+ br label %bb29
+
+bb37: ; preds = %bb34
+ %load = load i8, ptr %arg12, align 2
+ %trunc = trunc i8 %load to i1
+ br i1 %trunc, label %bb38, label %bb54
+
+bb38: ; preds = %bb37
+ br i1 %arg4, label %bb39, label %bb53
+
+bb39: ; preds = %bb38
+ store float %arg1, ptr addrspace(5) null, align 2147483648
+ %load40 = load float, ptr %arg15, align 8
+ call void @llvm.memcpy.p5.p0.i64(ptr addrspace(5) %arg25, ptr %arg24, i64 12, i1 false)
+ %load41 = load float, ptr %arg16, align 4
+ call void @llvm.memcpy.p5.p0.i64(ptr addrspace(5) %arg17, ptr null, i64 36, i1 false)
+ %load42 = load float, ptr %arg18, align 4
+ %load43 = load float, ptr %arg19, align 4
+ store float 0x7FF8000000000000, ptr addrspace(5) null, align 2147483648
+ %load44 = load float, ptr %arg14, align 16
+ store float %load44, ptr addrspace(5) null, align 2147483648
+ %fcmp45 = fcmp ole float %arg9, 0.000000e+00
+ br i1 %fcmp45, label %bb29, label %bb46
+
+bb46: ; preds = %bb39
+ %fsub = fsub float %arg8, %load40
+ store float %fsub, ptr addrspace(5) null, align 2147483648
+ %fadd = fadd float %load42, %load43
+ br i1 %arg, label %bb29, label %bb47
+
+bb47: ; preds = %bb46
+ br i1 %arg, label %bb29, label %bb48
+
+bb48: ; preds = %bb47
+ br i1 %arg, label %bb49, label %bb52
+
+bb49: ; preds = %bb48
+ store float 0.000000e+00, ptr %arg23, align 4
+ store float 0.000000e+00, ptr %arg22, align 8
+ store float %fadd, ptr addrspace(5) null, align 2147483648
+ %load50 = load float, ptr %arg20, align 4
+ %fdiv = fdiv float %load41, %load50
+ store float %fdiv, ptr addrspace(5) null, align 2147483648
+ %load51 = load float, ptr %arg13, align 16
+ store float %load51, ptr addrspace(5) null, align 2147483648
+ store float 1.000000e+00, ptr %arg21, align 4
+ br label %bb29
+
+bb52: ; preds = %bb48
+ unreachable
+
+bb53: ; preds = %bb38
+ ret i1 false
+
+bb54: ; preds = %bb37
+ ret i1 true
+
+bb55: ; preds = %bb31, %bb29
+ %load56 = load i1, ptr addrspace(5) null, align 2147483648
+ ret i1 %load56
+}
+
+; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: readwrite)
+declare void @llvm.memcpy.p5.p0.i64(ptr addrspace(5) noalias writeonly captures(none), ptr noalias readonly captures(none), i64, i1 immarg) #1
+
+attributes #0 = { "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
+attributes #1 = { nocallback nofree nounwind willreturn memory(argmem: readwrite) }
>From 4cd9726174e574fcf4a9c04f4aa1a0dabe61834d Mon Sep 17 00:00:00 2001
From: Matthew Curtis <macurtis at amd.com>
Date: Tue, 1 Jul 2025 06:17:20 -0500
Subject: [PATCH 2/4] fixup! [AMDGPU] Ensure non-reserved CSR spilled regs are
live-in
---
llvm/lib/Target/AMDGPU/SIFrameLowering.cpp | 8 +++---
.../CodeGen/AMDGPU/bug-undef-spilled-agpr.ll | 27 +++++++++----------
2 files changed, 17 insertions(+), 18 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
index 15ad1470036d2..51cd5d0cfd9dd 100644
--- a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
@@ -996,10 +996,10 @@ void SIFrameLowering::emitCSRSpillStores(
}
};
- for (const auto &Reg : WWMScratchRegs) {
- if (!MRI.isReserved(Reg.first)) {
- MRI.addLiveIn(Reg.first);
- MBB.addLiveIn(Reg.first);
+ for (const auto &Reg : make_first_range(WWMScratchRegs)) {
+ if (!MRI.isReserved(Reg)) {
+ MRI.addLiveIn(Reg);
+ MBB.addLiveIn(Reg);
}
}
StoreWWMRegisters(WWMScratchRegs);
diff --git a/llvm/test/CodeGen/AMDGPU/bug-undef-spilled-agpr.ll b/llvm/test/CodeGen/AMDGPU/bug-undef-spilled-agpr.ll
index ef2ae5ca5169b..e6ad4aaff88a6 100644
--- a/llvm/test/CodeGen/AMDGPU/bug-undef-spilled-agpr.ll
+++ b/llvm/test/CodeGen/AMDGPU/bug-undef-spilled-agpr.ll
@@ -36,12 +36,12 @@ bb30: ; preds = %bb29
br i1 %arg5, label %bb31, label %bb32
bb31: ; preds = %bb30
- store i1 false, ptr addrspace(5) null, align 2147483648
+ store i1 false, ptr addrspace(5) %arg17, align 8
br label %bb55
bb32: ; preds = %bb30
- store float %arg3, ptr addrspace(5) null, align 2147483648
- store float %arg7, ptr addrspace(5) %arg10, align 2147483648
+ store float %arg3, ptr addrspace(5) %arg25, align 8
+ store float %arg7, ptr addrspace(5) %arg10, align 8
br i1 %arg2, label %bb34, label %bb33
bb33: ; preds = %bb32
@@ -52,11 +52,11 @@ bb34: ; preds = %bb33, %bb32
br i1 %arg11, label %bb37, label %bb36
bb35: ; preds = %bb33
- store float 0.000000e+00, ptr addrspace(5) null, align 2147483648
+ store float 0.000000e+00, ptr addrspace(5) %arg25, align 8
ret i1 false
bb36: ; preds = %bb34
- store i32 1, ptr addrspace(5) null, align 2147483648
+ store i32 1, ptr addrspace(5) %arg17, align 8
br label %bb29
bb37: ; preds = %bb34
@@ -68,22 +68,22 @@ bb38: ; preds = %bb37
br i1 %arg4, label %bb39, label %bb53
bb39: ; preds = %bb38
- store float %arg1, ptr addrspace(5) null, align 2147483648
+ store float %arg1, ptr addrspace(5) %arg25, align 8
%load40 = load float, ptr %arg15, align 8
call void @llvm.memcpy.p5.p0.i64(ptr addrspace(5) %arg25, ptr %arg24, i64 12, i1 false)
%load41 = load float, ptr %arg16, align 4
call void @llvm.memcpy.p5.p0.i64(ptr addrspace(5) %arg17, ptr null, i64 36, i1 false)
%load42 = load float, ptr %arg18, align 4
%load43 = load float, ptr %arg19, align 4
- store float 0x7FF8000000000000, ptr addrspace(5) null, align 2147483648
+ store float 0x7FF8000000000000, ptr addrspace(5) %arg25, align 8
%load44 = load float, ptr %arg14, align 16
- store float %load44, ptr addrspace(5) null, align 2147483648
+ store float %load44, ptr addrspace(5) %arg25, align 8
%fcmp45 = fcmp ole float %arg9, 0.000000e+00
br i1 %fcmp45, label %bb29, label %bb46
bb46: ; preds = %bb39
%fsub = fsub float %arg8, %load40
- store float %fsub, ptr addrspace(5) null, align 2147483648
+ store float %fsub, ptr addrspace(5) %arg25, align 8
%fadd = fadd float %load42, %load43
br i1 %arg, label %bb29, label %bb47
@@ -96,12 +96,12 @@ bb48: ; preds = %bb47
bb49: ; preds = %bb48
store float 0.000000e+00, ptr %arg23, align 4
store float 0.000000e+00, ptr %arg22, align 8
- store float %fadd, ptr addrspace(5) null, align 2147483648
+ store float %fadd, ptr addrspace(5) %arg25, align 8
%load50 = load float, ptr %arg20, align 4
%fdiv = fdiv float %load41, %load50
- store float %fdiv, ptr addrspace(5) null, align 2147483648
+ store float %fdiv, ptr addrspace(5) %arg25, align 8
%load51 = load float, ptr %arg13, align 16
- store float %load51, ptr addrspace(5) null, align 2147483648
+ store float %load51, ptr addrspace(5) %arg25, align 8
store float 1.000000e+00, ptr %arg21, align 4
br label %bb29
@@ -115,11 +115,10 @@ bb54: ; preds = %bb37
ret i1 true
bb55: ; preds = %bb31, %bb29
- %load56 = load i1, ptr addrspace(5) null, align 2147483648
+ %load56 = load i1, ptr addrspace(5) null, align 8
ret i1 %load56
}
-; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: readwrite)
declare void @llvm.memcpy.p5.p0.i64(ptr addrspace(5) noalias writeonly captures(none), ptr noalias readonly captures(none), i64, i1 immarg) #1
attributes #0 = { "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
>From 586c2ff927725521fb4c4c343ebaad6b1c7aa47c Mon Sep 17 00:00:00 2001
From: Matthew Curtis <macurtis at amd.com>
Date: Tue, 1 Jul 2025 09:36:45 -0500
Subject: [PATCH 3/4] fixup! [AMDGPU] Ensure non-reserved CSR spilled regs are
live-in
---
llvm/lib/Target/AMDGPU/SIFrameLowering.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
index 51cd5d0cfd9dd..20cbfc82d0c9b 100644
--- a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
@@ -996,7 +996,7 @@ void SIFrameLowering::emitCSRSpillStores(
}
};
- for (const auto &Reg : make_first_range(WWMScratchRegs)) {
+ for (const Register &Reg : make_first_range(WWMScratchRegs)) {
if (!MRI.isReserved(Reg)) {
MRI.addLiveIn(Reg);
MBB.addLiveIn(Reg);
>From b0b218a237b974b547a2d93e99e764c5c099e707 Mon Sep 17 00:00:00 2001
From: Matthew Curtis <macurtis at amd.com>
Date: Mon, 7 Jul 2025 08:44:25 -0500
Subject: [PATCH 4/4] fixup! [AMDGPU] Ensure non-reserved CSR spilled regs are
live-in
---
llvm/test/CodeGen/AMDGPU/bug-undef-spilled-agpr.ll | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/test/CodeGen/AMDGPU/bug-undef-spilled-agpr.ll b/llvm/test/CodeGen/AMDGPU/bug-undef-spilled-agpr.ll
index e6ad4aaff88a6..87d44803f7e19 100644
--- a/llvm/test/CodeGen/AMDGPU/bug-undef-spilled-agpr.ll
+++ b/llvm/test/CodeGen/AMDGPU/bug-undef-spilled-agpr.ll
@@ -115,7 +115,7 @@ bb54: ; preds = %bb37
ret i1 true
bb55: ; preds = %bb31, %bb29
- %load56 = load i1, ptr addrspace(5) null, align 8
+ %load56 = load i1, ptr addrspace(5) %arg25, align 8
ret i1 %load56
}
More information about the llvm-commits
mailing list