[llvm] d28bb1f - [AMDGPU] Ensure non-reserved CSR spilled regs are live-in (#146427)

via llvm-commits llvm-commits at lists.llvm.org
Fri Aug 1 09:54:29 PDT 2025


Author: macurtis-amd
Date: 2025-08-01T11:54:25-05:00
New Revision: d28bb1fd786677fca73390a57972d3c722c30dcd

URL: https://github.com/llvm/llvm-project/commit/d28bb1fd786677fca73390a57972d3c722c30dcd
DIFF: https://github.com/llvm/llvm-project/commit/d28bb1fd786677fca73390a57972d3c722c30dcd.diff

LOG: [AMDGPU] Ensure non-reserved CSR spilled regs are live-in (#146427)

Fixes:
```
*** Bad machine code: Using an undefined physical register ***
- function:    widget
- basic block: %bb.0 bb (0x564092cbe140)
- instruction: $vgpr63 = V_ACCVGPR_READ_B32_e64 killed $agpr13, implicit $exec
- operand 1:   killed $agpr13
LLVM ERROR: Found 1 machine code errors.
```

The detailed sequence of events that led to this assert:

1. MachineVerifier fails because `$agpr13` is not defined on line 19
below:

   ``` 1: bb.0.bb:
    2:   successors: %bb.1(0x80000000); %bb.1(100.00%)
    3:   liveins: $agpr14, $agpr15, $sgpr12, $sgpr13, $sgpr14, \
    4:       $sgpr15, $sgpr30, $sgpr31, $sgpr34, $sgpr35, \
    5:       $sgpr36, $sgpr37, $sgpr38, $sgpr39, $sgpr48, \
    6:       $sgpr49, $sgpr50, $sgpr51, $sgpr52, $sgpr53, \
    7:       $sgpr54, $sgpr55, $sgpr64, $sgpr65, $sgpr66, \
    8:       $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, \
    9:       $sgpr80, $sgpr81, $sgpr82, $sgpr83, $sgpr84, \
   10:       $sgpr85, $sgpr86, $sgpr87, $sgpr96, $sgpr97, \
   11:       $sgpr98, $sgpr99, $vgpr0, $vgpr31, $vgpr40, $vgpr41, \
   12:       $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, \
   13:       $sgpr10_sgpr11
   14:   $sgpr16 = COPY $sgpr33
   15:   $sgpr33 = frame-setup COPY $sgpr32
   16:   $sgpr18_sgpr19 = S_XOR_SAVEEXEC_B64 -1, \
   17:       implicit-def $exec, implicit-def dead $scc, \
   18:       implicit $exec
   19:   $vgpr63 = V_ACCVGPR_READ_B32_e64 killed $agpr13, \
   20:       implicit $exec
   21:   BUFFER_STORE_DWORD_OFFSET killed $vgpr63, \
   22:       $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr33, 0, 0, 0, \
   23:       implicit $exec :: (store (s32) into %stack.38, \
   24:       addrspace 5)
   25:   ...
   26:   $vgpr43 = IMPLICIT_DEF
   27:   $vgpr43 = SI_SPILL_S32_TO_VGPR $sgpr15, 0, \
   28:       killed $vgpr43(tied-def 0)
   29:   $vgpr43 = SI_SPILL_S32_TO_VGPR $sgpr14, 1, \
   30:       killed $vgpr43(tied-def 0)
   31:   $sgpr100_sgpr101 = S_OR_SAVEEXEC_B64 -1, \
   32:       implicit-def $exec, implicit-def dead $scc, \
   33:       implicit $exec
   34:   renamable $agpr13 = COPY killed $vgpr43, implicit $exec
   ```

2. That instruction is created by
[`emitCSRSpillStores`](https://github.com/llvm/llvm-project/blob/d599bdeaa49d7a2b1246328630328d23ddda5a47/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp#L977)
(called by
[`SIFrameLowering::emitPrologue`](https://github.com/llvm/llvm-project/blob/d599bdeaa49d7a2b1246328630328d23ddda5a47/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp#L1122))
because `$agpr13` is in `WWMSpills`.

   See lines 982, 998, and 993 below.

   ```
977: // Spill Whole-Wave Mode VGPRs. Save only the inactive lanes of the
scratch
978: // registers. However, save all lanes of callee-saved VGPRs. Due to
this, we
   979:   // might end up flipping the EXEC bits twice.
   980:   Register ScratchExecCopy;
981: SmallVector<std::pair<Register, int>, 2> WWMCalleeSavedRegs,
WWMScratchRegs;
982: FuncInfo->splitWWMSpillRegisters(MF, WWMCalleeSavedRegs,
WWMScratchRegs);
   983:   if (!WWMScratchRegs.empty())
   984:     ScratchExecCopy =
   985:         buildScratchExecCopy(LiveUnits, MF, MBB, MBBI, DL,
986: /*IsProlog*/ true, /*EnableInactiveLanes*/ true);
   987:
   988:   auto StoreWWMRegisters =
   989:       [&](SmallVectorImpl<std::pair<Register, int>> &WWMRegs) {
   990:         for (const auto &Reg : WWMRegs) {
   991:           Register VGPR = Reg.first;
   992:           int FI = Reg.second;
993: buildPrologSpill(ST, TRI, *FuncInfo, LiveUnits, MF, MBB, MBBI, DL,
   994:                            VGPR, FI, FrameReg);
   995:         }
   996:       };
   997:
   998:   StoreWWMRegisters(WWMScratchRegs);
   ```

3. `$agpr13` got added to `WWMSpills` by
[`SILowerWWMCopies::run`](https://github.com/llvm/llvm-project/blob/59a7185dd9d69cbf737a98f5c2d1cf3d456bee03/llvm/lib/Target/AMDGPU/SILowerWWMCopies.cpp#L137)
as it processed the `WWM_COPY` on line 3 below (corresponds to line 34
above in point #_1_):

   ```
1: %45:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr15, 0, %45:vgpr_32(tied-def
0)
2: %45:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr14, 1, %45:vgpr_32(tied-def
0)
   3: %44:av_32 = WWM_COPY %45:vgpr_32
   ```

Added: 
    llvm/test/CodeGen/AMDGPU/bug-undef-spilled-agpr.mir

Modified: 
    llvm/lib/Target/AMDGPU/SIFrameLowering.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
index 11552b3a9a438..9b348d46fec4f 100644
--- a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
@@ -983,6 +983,7 @@ void SIFrameLowering::emitCSRSpillStores(
   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
   const SIInstrInfo *TII = ST.getInstrInfo();
   const SIRegisterInfo &TRI = TII->getRegisterInfo();
+  MachineRegisterInfo &MRI = MF.getRegInfo();
 
   // Spill Whole-Wave Mode VGPRs. Save only the inactive lanes of the scratch
   // registers. However, save all lanes of callee-saved VGPRs. Due to this, we
@@ -1005,6 +1006,12 @@ void SIFrameLowering::emitCSRSpillStores(
         }
       };
 
+  for (const Register Reg : make_first_range(WWMScratchRegs)) {
+    if (!MRI.isReserved(Reg)) {
+      MRI.addLiveIn(Reg);
+      MBB.addLiveIn(Reg);
+    }
+  }
   StoreWWMRegisters(WWMScratchRegs);
 
   auto EnableAllLanes = [&]() {

diff  --git a/llvm/test/CodeGen/AMDGPU/bug-undef-spilled-agpr.mir b/llvm/test/CodeGen/AMDGPU/bug-undef-spilled-agpr.mir
new file mode 100644
index 0000000000000..7336a54ae42db
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/bug-undef-spilled-agpr.mir
@@ -0,0 +1,97 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -run-pass=si-lower-sgpr-spills,greedy,si-lower-wwm-copies,virtregrewriter,prologepilog -verify-machineinstrs -o - %s | FileCheck -check-prefix=GCN %s
+
+---
+name:            widget
+tracksRegLiveness: true
+frameInfo:
+  adjustsStack:    true
+stack:
+  - { id: 0, type: spill-slot, size: 4, alignment: 4, stack-id: sgpr-spill }
+  - { id: 1, type: spill-slot, size: 4, alignment: 4, stack-id: sgpr-spill }
+machineFunctionInfo:
+  hasSpilledSGPRs: true
+  scratchRSrcReg:  '$sgpr0_sgpr1_sgpr2_sgpr3'
+  stackPtrOffsetReg: '$sgpr32'
+body:             |
+  ; GCN-LABEL: name: widget
+  ; GCN: bb.0:
+  ; GCN-NEXT:   successors: %bb.1(0x80000000)
+  ; GCN-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $agpr0
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT:   $sgpr4_sgpr5 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
+  ; GCN-NEXT:   $vgpr63 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec
+  ; GCN-NEXT:   BUFFER_STORE_DWORD_OFFSET $vgpr63, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
+  ; GCN-NEXT:   $exec = S_MOV_B64 -1
+  ; GCN-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr62, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5)
+  ; GCN-NEXT:   $exec = S_MOV_B64 killed $sgpr4_sgpr5
+  ; GCN-NEXT:   renamable $vgpr62 = IMPLICIT_DEF
+  ; GCN-NEXT:   $vgpr62 = SI_SPILL_S32_TO_VGPR $sgpr15, 0, killed $vgpr62
+  ; GCN-NEXT:   $noreg = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
+  ; GCN-NEXT:   renamable $agpr0 = COPY killed renamable $vgpr62
+  ; GCN-NEXT:   $exec = S_MOV_B64 killed $noreg
+  ; GCN-NEXT:   renamable $vgpr62 = IMPLICIT_DEF
+  ; GCN-NEXT:   dead renamable $vgpr62 = V_AND_B32_e32 1, killed $vgpr62, implicit $exec
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT: bb.1:
+  ; GCN-NEXT:   successors: %bb.3(0x40000000), %bb.2(0x40000000)
+  ; GCN-NEXT:   liveins: $agpr0
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT:   S_CBRANCH_EXECZ %bb.2, implicit $exec
+  ; GCN-NEXT:   S_BRANCH %bb.3
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT: bb.2:
+  ; GCN-NEXT:   successors: %bb.4(0x04000000), %bb.1(0x7c000000)
+  ; GCN-NEXT:   liveins: $agpr0, $sgpr86, $sgpr87, $sgpr66_sgpr67, $sgpr68_sgpr69, $sgpr70_sgpr71, $sgpr80_sgpr81, $sgpr82_sgpr83, $sgpr84_sgpr85, $sgpr96_sgpr97, $sgpr98_sgpr99
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT:   S_CBRANCH_EXECNZ %bb.1, implicit $exec
+  ; GCN-NEXT:   S_BRANCH %bb.4
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT: bb.3:
+  ; GCN-NEXT:   successors: %bb.2(0x80000000)
+  ; GCN-NEXT:   liveins: $agpr0
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT:   $noreg = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
+  ; GCN-NEXT:   renamable $vgpr62 = COPY renamable $agpr0
+  ; GCN-NEXT:   $exec = S_MOV_B64 killed $noreg
+  ; GCN-NEXT:   $sgpr14 = SI_RESTORE_S32_FROM_VGPR killed $vgpr62, 1
+  ; GCN-NEXT:   S_BRANCH %bb.2
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT: bb.4:
+  ; GCN-NEXT:   $sgpr4_sgpr5 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
+  ; GCN-NEXT:   $vgpr63 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
+  ; GCN-NEXT:   $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr63, implicit $exec
+  ; GCN-NEXT:   $exec = S_MOV_B64 -1
+  ; GCN-NEXT:   $vgpr62 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.3, addrspace 5)
+  ; GCN-NEXT:   $exec = S_MOV_B64 killed $sgpr4_sgpr5
+  ; GCN-NEXT:   SI_RETURN
+  bb.0:
+    liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15
+
+    %45:vgpr_32 = IMPLICIT_DEF
+    SI_SPILL_S32_SAVE $sgpr15, %stack.0, implicit $exec, implicit $sgpr32 :: (store (s32) into %stack.0, addrspace 5)
+    %16:vgpr_32 = V_AND_B32_e32 1, %45, implicit $exec
+
+  bb.1:
+    successors: %bb.3, %bb.2
+
+    S_CBRANCH_EXECZ %bb.2, implicit $exec
+    S_BRANCH %bb.3
+
+  bb.2:
+    successors: %bb.4(0x04000000), %bb.1(0x7c000000)
+    liveins: $sgpr86, $sgpr87, $sgpr66_sgpr67, $sgpr68_sgpr69, $sgpr70_sgpr71, $sgpr80_sgpr81, $sgpr82_sgpr83, $sgpr84_sgpr85, $sgpr96_sgpr97, $sgpr98_sgpr99
+
+    S_CBRANCH_EXECNZ %bb.1, implicit $exec
+    S_BRANCH %bb.4
+
+  bb.3:
+    ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
+    $sgpr14 = SI_SPILL_S32_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s32) from %stack.1, addrspace 5)
+    ADJCALLSTACKDOWN 0, 28, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
+    S_BRANCH %bb.2
+
+  bb.4:
+    SI_RETURN
+
+...


        


More information about the llvm-commits mailing list