[llvm] [AMDGPU][RegAlloc] Correct insertion of IMPLICIT_DEF in loop headers (PR #186348)

Fri Mar 13 03:02:39 PDT 2026

https://github.com/chrisjbris updated https://github.com/llvm/llvm-project/pull/186348

>From 2b23aae00744a2fb908f3d2a57d7c3aec8c7793b Mon Sep 17 00:00:00 2001
From: Chris Jackson <chris.jackson at amd.com>
Date: Fri, 13 Mar 2026 04:22:48 -0500
Subject: [PATCH 1/3] [AMDGPU][RegAlloc] Correct insertion of IMPLICIT_DEF in
 loop headers

si-lower-sgpr spills was observed inserting IMPLICIT_DEF for lane VGPR
restores in the loop header. The virtual VGPR is therefore not live-in
to the header and wwm regallocfast does not insert a restore. This
results in the vgpr being clobbered after each backedge.

Correct this by inserting the IMPLICIT_DEF in the preheader.
---
 llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp  | 20 +++++
 .../si-lower-sgpr-spills-loop-preheader.mir   | 81 +++++++++++++++++++
 2 files changed, 101 insertions(+)
 create mode 100644 llvm/test/CodeGen/AMDGPU/si-lower-sgpr-spills-loop-preheader.mir

diff --git a/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp b/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp
index 196e551932659..e51acead07565 100644
--- a/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp
+++ b/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp
@@ -345,6 +345,26 @@ void SILowerSGPRSpills::updateLaneVGPRDomInstr(
       else if (DomMBB != PrevInsertPt->getParent())
         I->second = &(*DomMBB->getFirstTerminator());
     }
+    // The insertion point should not be placed in MBB if it is a loop header. This results
+    // in in the vgpr being clobbered after every backedge. Instead insert into
+    // the preheader. Then the virtual register is live-in to the header and
+    // wwm-regalloc will insert a wwm restore in the header.
+    // Could use Machine Loop Analysis to locate the preheader but that extra
+    // dependency is unnecessary.
+    auto &BlockInsertPt = LaneVGPRDomInstr[Spill.VGPR];
+    MachineBasicBlock *FinalMBB = BlockInsertPt->getParent();
+    if(FinalMBB == MBB)
+      continue;
+    // If the header dominates a predecessor, then we have a backedge.
+    // The preheader is the immediate dominator of the header.
+    for (MachineBasicBlock *Pred : FinalMBB->predecessors()) {
+      if (MDT->dominates(FinalMBB, Pred)) {
+        if (auto *IDomNode = MDT->getNode(FinalMBB)->getIDom()) {
+          BlockInsertPt = IDomNode->getBlock()->getFirstTerminator();
+        }
+        break;
+      }
+    }
   }
 }
 
diff --git a/llvm/test/CodeGen/AMDGPU/si-lower-sgpr-spills-loop-preheader.mir b/llvm/test/CodeGen/AMDGPU/si-lower-sgpr-spills-loop-preheader.mir
new file mode 100644
index 0000000000000..e598ec8a6c430
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/si-lower-sgpr-spills-loop-preheader.mir
@@ -0,0 +1,81 @@
+# When SGPR spills to a virtual VGPR lane occur in both a loop header and the latch,
+# the IMPLICIT_DEF for the lane VGPR must be placed in the preheader (not the header).
+# Establish that the virtual VGPR is live-in to the header and wwm regallocfast inserts
+# a restore, preserving the latch writes. 
+#
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs -run-pass=si-lower-sgpr-spills -o %t.gcn.mir %s
+# RUN: FileCheck -check-prefix=GCN %s < %t.gcn.mir
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -start-before=si-pre-allocate-wwm-regs -stop-after=regallocfast,1 -verify-machineinstrs %t.gcn.mir -o - | FileCheck -check-prefix=WWM %s
+#
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -verify-machineinstrs -run-pass=si-lower-sgpr-spills -o %t.gcn.mir %s
+# RUN: FileCheck -check-prefix=GCN %s < %t.gcn.mir
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx906 -start-before=si-pre-allocate-wwm-regs -stop-after=regallocfast,1 -verify-machineinstrs %t.gcn.mir -o - | FileCheck -check-prefix=WWM %s
+#
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -verify-machineinstrs -run-pass=si-lower-sgpr-spills -o %t.gcn.mir %s
+# RUN: FileCheck -check-prefix=GCN %s < %t.gcn.mir
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx908 -start-before=si-pre-allocate-wwm-regs -stop-after=regallocfast,1 -verify-machineinstrs %t.gcn.mir -o - | FileCheck -check-prefix=WWM %s
+
+---
+name:            sgpr_spill_loop_header_and_latch_implicit_def_in_preheader
+tracksRegLiveness: true
+frameInfo:
+  maxAlignment:    4
+stack:
+  - { id: 0, type: spill-slot, size: 4, alignment: 4, stack-id: sgpr-spill }
+machineFunctionInfo:
+  isEntryFunction: false
+  scratchRSrcReg:  '$sgpr0_sgpr1_sgpr2_sgpr3'
+  stackPtrOffsetReg: '$sgpr32'
+  frameOffsetReg: '$sgpr33'
+  hasSpilledSGPRs: true
+body:             |
+  ; Preheader (bb.0):
+  ; GCN-LABEL: name: sgpr_spill_loop_header_and_latch_implicit_def_in_preheader
+  ; GCN: bb.0:
+  ; GCN-NEXT:   successors: %bb.1(0x80000000)
+  ; GCN-NEXT:   liveins:
+  ; GCN:        S_NOP 0
+  ; GCN-NEXT:   [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+  ; GCN-NEXT:   S_BRANCH %bb.1
+  ; GCN: bb.1:
+  ; GCN-NEXT:   successors:
+  ; GCN-NEXT:   liveins:
+  ; GCN-NOT:   IMPLICIT_DEF
+  ; GCN:   $sgpr10 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 0
+  ; GCN-NEXT:   S_CMP_EQ_U32
+  ; GCN:   [[DEF]]:vgpr_32 = SI_SPILL_S32_TO_VGPR
+  ; GCN-NEXT:   S_CBRANCH_SCC1 %bb.2
+  ; GCN-NEXT:   S_BRANCH %bb.3
+  ; GCN: bb.2:
+  ; GCN:   $sgpr10 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 0
+  ; GCN:   [[DEF]]:vgpr_32 = SI_SPILL_S32_TO_VGPR
+  ; GCN: bb.3:
+  ; GCN:   S_SETPC_B64
+
+
+  ; WWM: bb.1:
+  ; WWM:   [[LANE_VGPR:\$vgpr[0-9]+]] = SI_SPILL_WWM_V32_RESTORE [[WWM_STACK:%stack.[0-9]+]], $sgpr32
+  ; WWM:   SI_SPILL_WWM_V32_SAVE {{(killed )?}}[[LANE_VGPR]], [[WWM_STACK]], $sgpr32
+  ; WWM: bb.2:
+  ; WWM:   [[LANE_VGPR]] = SI_SPILL_WWM_V32_RESTORE [[WWM_STACK]], $sgpr32
+  ; WWM:   SI_SPILL_WWM_V32_SAVE {{(killed )?}}[[LANE_VGPR]], [[WWM_STACK]], $sgpr32
+  bb.0:
+    liveins: $sgpr10, $sgpr11, $sgpr30_sgpr31
+    S_NOP 0
+    S_BRANCH %bb.1
+  bb.1:
+    liveins: $sgpr10, $sgpr11, $sgpr30_sgpr31
+    renamable $sgpr10 = SI_SPILL_S32_RESTORE %stack.0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32
+    S_CMP_EQ_U32 $sgpr11, 0, implicit-def $scc
+    SI_SPILL_S32_SAVE killed $sgpr10, %stack.0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32
+    S_CBRANCH_SCC1 %bb.2, implicit killed $scc
+    S_BRANCH %bb.3
+  bb.2:
+    liveins: $sgpr10, $sgpr11, $sgpr30_sgpr31
+    renamable $sgpr10 = SI_SPILL_S32_RESTORE %stack.0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32
+    $sgpr10 = S_MOV_B32 1
+    SI_SPILL_S32_SAVE killed $sgpr10, %stack.0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32
+    S_BRANCH %bb.1
+  bb.3:
+    liveins: $sgpr30_sgpr31
+    S_SETPC_B64 $sgpr30_sgpr31

>From 4626be5463ee9aae96bed6fa474cc3aff0596d2b Mon Sep 17 00:00:00 2001
From: Chris Jackson <chris.jackson at amd.com>
Date: Fri, 13 Mar 2026 04:59:53 -0500
Subject: [PATCH 2/3] Simplify the new test and update existing test

---
 .../si-lower-sgpr-spills-loop-preheader.mir    | 18 ++++++------------
 .../AMDGPU/spill-sgpr-to-virtual-vgpr.mir      |  6 +++---
 2 files changed, 9 insertions(+), 15 deletions(-)

diff --git a/llvm/test/CodeGen/AMDGPU/si-lower-sgpr-spills-loop-preheader.mir b/llvm/test/CodeGen/AMDGPU/si-lower-sgpr-spills-loop-preheader.mir
index e598ec8a6c430..272cd89e3beb7 100644
--- a/llvm/test/CodeGen/AMDGPU/si-lower-sgpr-spills-loop-preheader.mir
+++ b/llvm/test/CodeGen/AMDGPU/si-lower-sgpr-spills-loop-preheader.mir
@@ -1,19 +1,11 @@
+
 # When SGPR spills to a virtual VGPR lane occur in both a loop header and the latch,
 # the IMPLICIT_DEF for the lane VGPR must be placed in the preheader (not the header).
 # Establish that the virtual VGPR is live-in to the header and wwm regallocfast inserts
-# a restore, preserving the latch writes. 
-#
-# RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs -run-pass=si-lower-sgpr-spills -o %t.gcn.mir %s
-# RUN: FileCheck -check-prefix=GCN %s < %t.gcn.mir
-# RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -start-before=si-pre-allocate-wwm-regs -stop-after=regallocfast,1 -verify-machineinstrs %t.gcn.mir -o - | FileCheck -check-prefix=WWM %s
-#
-# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -verify-machineinstrs -run-pass=si-lower-sgpr-spills -o %t.gcn.mir %s
-# RUN: FileCheck -check-prefix=GCN %s < %t.gcn.mir
-# RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx906 -start-before=si-pre-allocate-wwm-regs -stop-after=regallocfast,1 -verify-machineinstrs %t.gcn.mir -o - | FileCheck -check-prefix=WWM %s
+# a restore, preserving the latch writes.
 #
-# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -verify-machineinstrs -run-pass=si-lower-sgpr-spills -o %t.gcn.mir %s
-# RUN: FileCheck -check-prefix=GCN %s < %t.gcn.mir
-# RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx908 -start-before=si-pre-allocate-wwm-regs -stop-after=regallocfast,1 -verify-machineinstrs %t.gcn.mir -o - | FileCheck -check-prefix=WWM %s
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs -run-pass=si-lower-sgpr-spills -o - %s | FileCheck -check-prefix=GCN %s
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -start-before=si-lower-sgpr-spills -stop-after=regallocfast,1 -verify-machineinstrs %s -o - | FileCheck -check-prefix=WWM %s
 
 ---
 name:            sgpr_spill_loop_header_and_latch_implicit_def_in_preheader
@@ -79,3 +71,5 @@ body:             |
   bb.3:
     liveins: $sgpr30_sgpr31
     S_SETPC_B64 $sgpr30_sgpr31
+## NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+# GCN: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/spill-sgpr-to-virtual-vgpr.mir b/llvm/test/CodeGen/AMDGPU/spill-sgpr-to-virtual-vgpr.mir
index fa3fd3bc6da5b..bb47603647733 100644
--- a/llvm/test/CodeGen/AMDGPU/spill-sgpr-to-virtual-vgpr.mir
+++ b/llvm/test/CodeGen/AMDGPU/spill-sgpr-to-virtual-vgpr.mir
@@ -241,13 +241,14 @@ body:             |
   ; GCN-NEXT:   liveins: $sgpr10, $sgpr11, $sgpr30_sgpr31
   ; GCN-NEXT: {{  $}}
   ; GCN-NEXT:   S_NOP 0
+  ; GCN-NEXT:   [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
   ; GCN-NEXT:   S_BRANCH %bb.3
   ; GCN-NEXT: {{  $}}
   ; GCN-NEXT: bb.1:
   ; GCN-NEXT:   successors: %bb.2(0x80000000)
   ; GCN-NEXT:   liveins: $sgpr10, $sgpr30_sgpr31
   ; GCN-NEXT: {{  $}}
-  ; GCN-NEXT:   $sgpr10 = SI_RESTORE_S32_FROM_VGPR %0, 0
+  ; GCN-NEXT:   $sgpr10 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 0
   ; GCN-NEXT:   $sgpr10 = S_ADD_I32 $sgpr10, 15, implicit-def dead $scc
   ; GCN-NEXT:   S_BRANCH %bb.2
   ; GCN-NEXT: {{  $}}
@@ -255,7 +256,7 @@ body:             |
   ; GCN-NEXT:   successors: %bb.3(0x80000000)
   ; GCN-NEXT:   liveins: $sgpr10, $sgpr30_sgpr31
   ; GCN-NEXT: {{  $}}
-  ; GCN-NEXT:   $sgpr10 = SI_RESTORE_S32_FROM_VGPR %0, 0
+  ; GCN-NEXT:   $sgpr10 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 0
   ; GCN-NEXT:   $sgpr10 = S_ADD_I32 $sgpr10, 20, implicit-def dead $scc
   ; GCN-NEXT:   S_BRANCH %bb.3
   ; GCN-NEXT: {{  $}}
@@ -264,7 +265,6 @@ body:             |
   ; GCN-NEXT:   liveins: $sgpr10, $sgpr11, $sgpr30_sgpr31
   ; GCN-NEXT: {{  $}}
   ; GCN-NEXT:   $sgpr10 = S_MOV_B32 10
-  ; GCN-NEXT:   [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
   ; GCN-NEXT:   [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR killed $sgpr10, 0, [[DEF]]
   ; GCN-NEXT:   S_CMP_EQ_U32 $sgpr11, 0, implicit-def $scc
   ; GCN-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit killed $scc

>From c04c45ce10e9be7243ae9e478865a551a636e15e Mon Sep 17 00:00:00 2001
From: Chris Jackson <chris.jackson at amd.com>
Date: Fri, 13 Mar 2026 05:02:24 -0500
Subject: [PATCH 3/3] Minor test auto-update.

---
 .../si-lower-sgpr-spills-loop-preheader.mir   | 88 ++++++++++++++-----
 1 file changed, 65 insertions(+), 23 deletions(-)

diff --git a/llvm/test/CodeGen/AMDGPU/si-lower-sgpr-spills-loop-preheader.mir b/llvm/test/CodeGen/AMDGPU/si-lower-sgpr-spills-loop-preheader.mir
index 272cd89e3beb7..cb9411b615a96 100644
--- a/llvm/test/CodeGen/AMDGPU/si-lower-sgpr-spills-loop-preheader.mir
+++ b/llvm/test/CodeGen/AMDGPU/si-lower-sgpr-spills-loop-preheader.mir
@@ -1,3 +1,4 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6
 
 # When SGPR spills to a virtual VGPR lane occur in both a loop header and the latch,
 # the IMPLICIT_DEF for the lane VGPR must be placed in the preheader (not the header).
@@ -21,36 +22,78 @@ machineFunctionInfo:
   frameOffsetReg: '$sgpr33'
   hasSpilledSGPRs: true
 body:             |
-  ; Preheader (bb.0):
   ; GCN-LABEL: name: sgpr_spill_loop_header_and_latch_implicit_def_in_preheader
   ; GCN: bb.0:
   ; GCN-NEXT:   successors: %bb.1(0x80000000)
-  ; GCN-NEXT:   liveins:
-  ; GCN:        S_NOP 0
+  ; GCN-NEXT:   liveins: $sgpr10, $sgpr11, $sgpr30_sgpr31
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT:   S_NOP 0
   ; GCN-NEXT:   [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
   ; GCN-NEXT:   S_BRANCH %bb.1
-  ; GCN: bb.1:
-  ; GCN-NEXT:   successors:
-  ; GCN-NEXT:   liveins:
-  ; GCN-NOT:   IMPLICIT_DEF
-  ; GCN:   $sgpr10 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 0
-  ; GCN-NEXT:   S_CMP_EQ_U32
-  ; GCN:   [[DEF]]:vgpr_32 = SI_SPILL_S32_TO_VGPR
-  ; GCN-NEXT:   S_CBRANCH_SCC1 %bb.2
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT: bb.1:
+  ; GCN-NEXT:   successors: %bb.2(0x40000000), %bb.3(0x40000000)
+  ; GCN-NEXT:   liveins: $sgpr10, $sgpr11, $sgpr30_sgpr31
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT:   $sgpr10 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 0
+  ; GCN-NEXT:   S_CMP_EQ_U32 $sgpr11, 0, implicit-def $scc
+  ; GCN-NEXT:   [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR killed $sgpr10, 0, [[DEF]]
+  ; GCN-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit killed $scc
   ; GCN-NEXT:   S_BRANCH %bb.3
-  ; GCN: bb.2:
-  ; GCN:   $sgpr10 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 0
-  ; GCN:   [[DEF]]:vgpr_32 = SI_SPILL_S32_TO_VGPR
-  ; GCN: bb.3:
-  ; GCN:   S_SETPC_B64
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT: bb.2:
+  ; GCN-NEXT:   successors: %bb.1(0x80000000)
+  ; GCN-NEXT:   liveins: $sgpr10, $sgpr11, $sgpr30_sgpr31
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT:   $sgpr10 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 0
+  ; GCN-NEXT:   $sgpr10 = S_MOV_B32 1
+  ; GCN-NEXT:   [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR killed $sgpr10, 0, [[DEF]]
+  ; GCN-NEXT:   S_BRANCH %bb.1
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT: bb.3:
+  ; GCN-NEXT:   liveins: $sgpr30_sgpr31
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT:   S_SETPC_B64 $sgpr30_sgpr31
+  ;
+  ; WWM-LABEL: name: sgpr_spill_loop_header_and_latch_implicit_def_in_preheader
+  ; WWM: bb.0:
+  ; WWM-NEXT:   successors: %bb.1(0x80000000)
+  ; WWM-NEXT:   liveins: $sgpr10, $sgpr11, $sgpr30_sgpr31
+  ; WWM-NEXT: {{  $}}
+  ; WWM-NEXT:   S_NOP 0
+  ; WWM-NEXT:   renamable $vgpr63 = IMPLICIT_DEF
+  ; WWM-NEXT:   S_BRANCH %bb.1
+  ; WWM-NEXT: {{  $}}
+  ; WWM-NEXT: bb.1:
+  ; WWM-NEXT:   successors: %bb.2(0x40000000), %bb.3(0x40000000)
+  ; WWM-NEXT:   liveins: $sgpr10, $sgpr11, $sgpr30_sgpr31
+  ; WWM-NEXT: {{  $}}
+  ; WWM-NEXT:   $vgpr63 = SI_SPILL_WWM_V32_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
+  ; WWM-NEXT:   $sgpr10 = SI_RESTORE_S32_FROM_VGPR $vgpr63, 0
+  ; WWM-NEXT:   S_CMP_EQ_U32 $sgpr11, 0, implicit-def $scc
+  ; WWM-NEXT:   $vgpr63 = SI_SPILL_S32_TO_VGPR $sgpr10, 0, $vgpr63
+  ; WWM-NEXT:   SI_SPILL_WWM_V32_SAVE killed $vgpr63, %stack.1, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
+  ; WWM-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit killed $scc
+  ; WWM-NEXT:   S_BRANCH %bb.3
+  ; WWM-NEXT: {{  $}}
+  ; WWM-NEXT: bb.2:
+  ; WWM-NEXT:   successors: %bb.1(0x80000000)
+  ; WWM-NEXT:   liveins: $sgpr10, $sgpr11, $sgpr30_sgpr31
+  ; WWM-NEXT: {{  $}}
+  ; WWM-NEXT:   $vgpr63 = SI_SPILL_WWM_V32_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
+  ; WWM-NEXT:   dead $sgpr10 = SI_RESTORE_S32_FROM_VGPR $vgpr63, 0
+  ; WWM-NEXT:   $sgpr10 = S_MOV_B32 1
+  ; WWM-NEXT:   $vgpr63 = SI_SPILL_S32_TO_VGPR $sgpr10, 0, $vgpr63
+  ; WWM-NEXT:   SI_SPILL_WWM_V32_SAVE killed $vgpr63, %stack.1, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
+  ; WWM-NEXT:   S_BRANCH %bb.1
+  ; WWM-NEXT: {{  $}}
+  ; WWM-NEXT: bb.3:
+  ; WWM-NEXT:   liveins: $sgpr30_sgpr31
+  ; WWM-NEXT: {{  $}}
+  ; WWM-NEXT:   S_SETPC_B64 killed $sgpr30_sgpr31
+  ; Preheader (bb.0):
 
 
-  ; WWM: bb.1:
-  ; WWM:   [[LANE_VGPR:\$vgpr[0-9]+]] = SI_SPILL_WWM_V32_RESTORE [[WWM_STACK:%stack.[0-9]+]], $sgpr32
-  ; WWM:   SI_SPILL_WWM_V32_SAVE {{(killed )?}}[[LANE_VGPR]], [[WWM_STACK]], $sgpr32
-  ; WWM: bb.2:
-  ; WWM:   [[LANE_VGPR]] = SI_SPILL_WWM_V32_RESTORE [[WWM_STACK]], $sgpr32
-  ; WWM:   SI_SPILL_WWM_V32_SAVE {{(killed )?}}[[LANE_VGPR]], [[WWM_STACK]], $sgpr32
   bb.0:
     liveins: $sgpr10, $sgpr11, $sgpr30_sgpr31
     S_NOP 0
@@ -72,4 +115,3 @@ body:             |
     liveins: $sgpr30_sgpr31
     S_SETPC_B64 $sgpr30_sgpr31
 ## NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-# GCN: {{.*}}