[llvm] edc38a6 - [AMDGPU] Add option to pre-allocate SGPR spill VGPRs (#70626)

via llvm-commits llvm-commits at lists.llvm.org
Sun Nov 12 19:21:21 PST 2023


Author: Carl Ritson
Date: 2023-11-13T12:21:18+09:00
New Revision: edc38a6cbd12a7cfa5d3c4de9767333c442ef2eb

URL: https://github.com/llvm/llvm-project/commit/edc38a6cbd12a7cfa5d3c4de9767333c442ef2eb
DIFF: https://github.com/llvm/llvm-project/commit/edc38a6cbd12a7cfa5d3c4de9767333c442ef2eb.diff

LOG: [AMDGPU] Add option to pre-allocate SGPR spill VGPRs (#70626)

SGPR spill VGPRs are WWM registers so allow them to be allocated by
SIPreAllocateWWMRegs pass.
This intentionally prevents spilling of these VGPRs when enabled.

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.cpp
    llvm/test/CodeGen/AMDGPU/stacksave_stackrestore.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.cpp b/llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.cpp
index 0e78d3286e46635..0c57110b4eb15d9 100644
--- a/llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.cpp
+++ b/llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.cpp
@@ -28,6 +28,10 @@ using namespace llvm;
 
 #define DEBUG_TYPE "si-pre-allocate-wwm-regs"
 
+static cl::opt<bool>
+    EnablePreallocateSGPRSpillVGPRs("amdgpu-prealloc-sgpr-spill-vgprs",
+                                    cl::init(false), cl::Hidden);
+
 namespace {
 
 class SIPreAllocateWWMRegs : public MachineFunctionPass {
@@ -199,6 +203,10 @@ bool SIPreAllocateWWMRegs::runOnMachineFunction(MachineFunction &MF) {
 
   RegClassInfo.runOnMachineFunction(MF);
 
+  bool PreallocateSGPRSpillVGPRs =
+      EnablePreallocateSGPRSpillVGPRs ||
+      MF.getFunction().hasFnAttribute("amdgpu-prealloc-sgpr-spill-vgprs");
+
   bool RegsAssigned = false;
 
   // We use a reverse post-order traversal of the control-flow graph to
@@ -215,8 +223,11 @@ bool SIPreAllocateWWMRegs::runOnMachineFunction(MachineFunction &MF) {
           MI.getOpcode() == AMDGPU::V_SET_INACTIVE_B64)
         RegsAssigned |= processDef(MI.getOperand(0));
 
-      if (MI.getOpcode() == AMDGPU::SI_SPILL_S32_TO_VGPR)
-        continue;
+      if (MI.getOpcode() == AMDGPU::SI_SPILL_S32_TO_VGPR) {
+        if (!PreallocateSGPRSpillVGPRs)
+          continue;
+        RegsAssigned |= processDef(MI.getOperand(0));
+      }
 
       if (MI.getOpcode() == AMDGPU::ENTER_STRICT_WWM ||
           MI.getOpcode() == AMDGPU::ENTER_STRICT_WQM ||

diff  --git a/llvm/test/CodeGen/AMDGPU/stacksave_stackrestore.ll b/llvm/test/CodeGen/AMDGPU/stacksave_stackrestore.ll
index 36b5e2a00f6d4d3..78871385f8ffcb2 100644
--- a/llvm/test/CodeGen/AMDGPU/stacksave_stackrestore.ll
+++ b/llvm/test/CodeGen/AMDGPU/stacksave_stackrestore.ll
@@ -4,6 +4,7 @@
 
 ; RUN: llc -O0 -mtriple=amdgcn-amd-amdpal -mcpu=gfx1030 < %s | FileCheck -check-prefixes=GCN,WAVE32,WAVE32-O0 %s
 ; RUN: llc -O0 -mtriple=amdgcn-amd-amdpal -mcpu=gfx1030 -mattr=+wavefrontsize64 < %s | FileCheck -check-prefixes=GCN,WAVE64,WAVE64-O0 %s
+; RUN: llc -O0 -mtriple=amdgcn-amd-amdpal -mcpu=gfx1030 -amdgpu-prealloc-sgpr-spill-vgprs=1 < %s | FileCheck -check-prefixes=GCN,WAVE32,WAVE32-WWM-PREALLOC %s
 
 declare ptr addrspace(5) @llvm.stacksave.p5()
 declare void @llvm.stackrestore.p5(ptr addrspace(5))
@@ -54,6 +55,16 @@ define void @func_store_stacksave() {
 ; WAVE64-O0-NEXT:    ; use s4
 ; WAVE64-O0-NEXT:    ;;#ASMEND
 ; WAVE64-O0-NEXT:    s_setpc_b64 s[30:31]
+;
+; WAVE32-WWM-PREALLOC-LABEL: func_store_stacksave:
+; WAVE32-WWM-PREALLOC:       ; %bb.0:
+; WAVE32-WWM-PREALLOC-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; WAVE32-WWM-PREALLOC-NEXT:    s_mov_b32 s4, s32
+; WAVE32-WWM-PREALLOC-NEXT:    s_lshr_b32 s4, s4, 5
+; WAVE32-WWM-PREALLOC-NEXT:    ;;#ASMSTART
+; WAVE32-WWM-PREALLOC-NEXT:    ; use s4
+; WAVE32-WWM-PREALLOC-NEXT:    ;;#ASMEND
+; WAVE32-WWM-PREALLOC-NEXT:    s_setpc_b64 s[30:31]
   %stacksave = call ptr addrspace(5) @llvm.stacksave.p5()
   call void asm sideeffect "; use $0", "s"(ptr addrspace(5) %stacksave)
   ret void
@@ -93,6 +104,15 @@ define amdgpu_kernel void @kernel_store_stacksave() {
 ; WAVE64-O0-NEXT:    ; use s0
 ; WAVE64-O0-NEXT:    ;;#ASMEND
 ; WAVE64-O0-NEXT:    s_endpgm
+;
+; WAVE32-WWM-PREALLOC-LABEL: kernel_store_stacksave:
+; WAVE32-WWM-PREALLOC:       ; %bb.0:
+; WAVE32-WWM-PREALLOC-NEXT:    s_mov_b32 s0, s32
+; WAVE32-WWM-PREALLOC-NEXT:    s_lshr_b32 s0, s0, 5
+; WAVE32-WWM-PREALLOC-NEXT:    ;;#ASMSTART
+; WAVE32-WWM-PREALLOC-NEXT:    ; use s0
+; WAVE32-WWM-PREALLOC-NEXT:    ;;#ASMEND
+; WAVE32-WWM-PREALLOC-NEXT:    s_endpgm
   %stacksave = call ptr addrspace(5) @llvm.stacksave.p5()
   call void asm sideeffect "; use $0", "s"(ptr addrspace(5) %stacksave)
   ret void
@@ -158,6 +178,22 @@ define amdgpu_kernel void @kernel_store_stacksave_nocall() {
 ; WAVE64-O0-NEXT:    v_mov_b32_e32 v1, s0
 ; WAVE64-O0-NEXT:    buffer_store_dword v0, v1, s[12:15], 0 offen
 ; WAVE64-O0-NEXT:    s_endpgm
+;
+; WAVE32-WWM-PREALLOC-LABEL: kernel_store_stacksave_nocall:
+; WAVE32-WWM-PREALLOC:       ; %bb.0:
+; WAVE32-WWM-PREALLOC-NEXT:    s_getpc_b64 s[12:13]
+; WAVE32-WWM-PREALLOC-NEXT:    s_mov_b32 s12, s0
+; WAVE32-WWM-PREALLOC-NEXT:    s_load_dwordx4 s[12:15], s[12:13], 0x0
+; WAVE32-WWM-PREALLOC-NEXT:    s_waitcnt lgkmcnt(0)
+; WAVE32-WWM-PREALLOC-NEXT:    s_bitset0_b32 s15, 21
+; WAVE32-WWM-PREALLOC-NEXT:    s_add_u32 s12, s12, s11
+; WAVE32-WWM-PREALLOC-NEXT:    s_addc_u32 s13, s13, 0
+; WAVE32-WWM-PREALLOC-NEXT:    s_mov_b32 s0, s32
+; WAVE32-WWM-PREALLOC-NEXT:    s_lshr_b32 s0, s0, 5
+; WAVE32-WWM-PREALLOC-NEXT:    v_mov_b32_e32 v0, 0
+; WAVE32-WWM-PREALLOC-NEXT:    v_mov_b32_e32 v1, s0
+; WAVE32-WWM-PREALLOC-NEXT:    buffer_store_dword v0, v1, s[12:15], 0 offen
+; WAVE32-WWM-PREALLOC-NEXT:    s_endpgm
   %stacksave = call ptr addrspace(5) @llvm.stacksave.p5()
   store i32 0, ptr addrspace(5) %stacksave
   ret void
@@ -281,6 +317,36 @@ define void @func_stacksave_nonentry_block(i1 %cond) {
 ; WAVE64-O0-NEXT:    s_mov_b64 exec, s[4:5]
 ; WAVE64-O0-NEXT:    s_waitcnt vmcnt(0)
 ; WAVE64-O0-NEXT:    s_setpc_b64 s[30:31]
+;
+; WAVE32-WWM-PREALLOC-LABEL: func_stacksave_nonentry_block:
+; WAVE32-WWM-PREALLOC:       ; %bb.0: ; %bb0
+; WAVE32-WWM-PREALLOC-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; WAVE32-WWM-PREALLOC-NEXT:    s_xor_saveexec_b32 s4, -1
+; WAVE32-WWM-PREALLOC-NEXT:    buffer_store_dword v1, off, s[0:3], s32 ; 4-byte Folded Spill
+; WAVE32-WWM-PREALLOC-NEXT:    s_mov_b32 exec_lo, s4
+; WAVE32-WWM-PREALLOC-NEXT:    ; implicit-def: $vgpr1 : SGPR spill to VGPR lane
+; WAVE32-WWM-PREALLOC-NEXT:    v_and_b32_e64 v0, 1, v0
+; WAVE32-WWM-PREALLOC-NEXT:    v_cmp_eq_u32_e64 s5, v0, 1
+; WAVE32-WWM-PREALLOC-NEXT:    s_mov_b32 s4, exec_lo
+; WAVE32-WWM-PREALLOC-NEXT:    v_writelane_b32 v1, s4, 0
+; WAVE32-WWM-PREALLOC-NEXT:    s_and_b32 s4, s4, s5
+; WAVE32-WWM-PREALLOC-NEXT:    s_mov_b32 exec_lo, s4
+; WAVE32-WWM-PREALLOC-NEXT:    s_cbranch_execz .LBB4_2
+; WAVE32-WWM-PREALLOC-NEXT:  ; %bb.1: ; %bb1
+; WAVE32-WWM-PREALLOC-NEXT:    s_mov_b32 s4, s32
+; WAVE32-WWM-PREALLOC-NEXT:    s_lshr_b32 s4, s4, 5
+; WAVE32-WWM-PREALLOC-NEXT:    ;;#ASMSTART
+; WAVE32-WWM-PREALLOC-NEXT:    ; use s4
+; WAVE32-WWM-PREALLOC-NEXT:    ;;#ASMEND
+; WAVE32-WWM-PREALLOC-NEXT:  .LBB4_2: ; %bb2
+; WAVE32-WWM-PREALLOC-NEXT:    v_readlane_b32 s4, v1, 0
+; WAVE32-WWM-PREALLOC-NEXT:    s_or_b32 exec_lo, exec_lo, s4
+; WAVE32-WWM-PREALLOC-NEXT:    ; kill: killed $vgpr1
+; WAVE32-WWM-PREALLOC-NEXT:    s_xor_saveexec_b32 s4, -1
+; WAVE32-WWM-PREALLOC-NEXT:    buffer_load_dword v1, off, s[0:3], s32 ; 4-byte Folded Reload
+; WAVE32-WWM-PREALLOC-NEXT:    s_mov_b32 exec_lo, s4
+; WAVE32-WWM-PREALLOC-NEXT:    s_waitcnt vmcnt(0)
+; WAVE32-WWM-PREALLOC-NEXT:    s_setpc_b64 s[30:31]
 bb0:
   br i1 %cond, label %bb1, label %bb2
 
@@ -321,6 +387,14 @@ define void @func_stackrestore_poison() {
 ; WAVE64-O0-NEXT:    s_lshl_b32 s4, s4, 6
 ; WAVE64-O0-NEXT:    s_mov_b32 s32, s4
 ; WAVE64-O0-NEXT:    s_setpc_b64 s[30:31]
+;
+; WAVE32-WWM-PREALLOC-LABEL: func_stackrestore_poison:
+; WAVE32-WWM-PREALLOC:       ; %bb.0:
+; WAVE32-WWM-PREALLOC-NEXT:    ; implicit-def: $sgpr4
+; WAVE32-WWM-PREALLOC-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; WAVE32-WWM-PREALLOC-NEXT:    s_lshl_b32 s4, s4, 5
+; WAVE32-WWM-PREALLOC-NEXT:    s_mov_b32 s32, s4
+; WAVE32-WWM-PREALLOC-NEXT:    s_setpc_b64 s[30:31]
   call void @llvm.stackrestore.p5(ptr addrspace(5) poison)
   ret void
 }
@@ -353,6 +427,14 @@ define void @func_stackrestore_null() {
 ; WAVE64-O0-NEXT:    s_lshl_b32 s4, s4, 6
 ; WAVE64-O0-NEXT:    s_mov_b32 s32, s4
 ; WAVE64-O0-NEXT:    s_setpc_b64 s[30:31]
+;
+; WAVE32-WWM-PREALLOC-LABEL: func_stackrestore_null:
+; WAVE32-WWM-PREALLOC:       ; %bb.0:
+; WAVE32-WWM-PREALLOC-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; WAVE32-WWM-PREALLOC-NEXT:    s_mov_b32 s4, 0
+; WAVE32-WWM-PREALLOC-NEXT:    s_lshl_b32 s4, s4, 5
+; WAVE32-WWM-PREALLOC-NEXT:    s_mov_b32 s32, s4
+; WAVE32-WWM-PREALLOC-NEXT:    s_setpc_b64 s[30:31]
   call void @llvm.stackrestore.p5(ptr addrspace(5) null)
   ret void
 }
@@ -385,6 +467,14 @@ define void @func_stackrestore_neg1() {
 ; WAVE64-O0-NEXT:    s_lshl_b32 s4, s4, 6
 ; WAVE64-O0-NEXT:    s_mov_b32 s32, s4
 ; WAVE64-O0-NEXT:    s_setpc_b64 s[30:31]
+;
+; WAVE32-WWM-PREALLOC-LABEL: func_stackrestore_neg1:
+; WAVE32-WWM-PREALLOC:       ; %bb.0:
+; WAVE32-WWM-PREALLOC-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; WAVE32-WWM-PREALLOC-NEXT:    s_mov_b32 s4, -1
+; WAVE32-WWM-PREALLOC-NEXT:    s_lshl_b32 s4, s4, 5
+; WAVE32-WWM-PREALLOC-NEXT:    s_mov_b32 s32, s4
+; WAVE32-WWM-PREALLOC-NEXT:    s_setpc_b64 s[30:31]
   call void @llvm.stackrestore.p5(ptr addrspace(5) inttoptr (i32 -1 to ptr addrspace(5)))
   ret void
 }
@@ -417,6 +507,14 @@ define void @func_stackrestore_42() {
 ; WAVE64-O0-NEXT:    s_lshl_b32 s4, s4, 6
 ; WAVE64-O0-NEXT:    s_mov_b32 s32, s4
 ; WAVE64-O0-NEXT:    s_setpc_b64 s[30:31]
+;
+; WAVE32-WWM-PREALLOC-LABEL: func_stackrestore_42:
+; WAVE32-WWM-PREALLOC:       ; %bb.0:
+; WAVE32-WWM-PREALLOC-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; WAVE32-WWM-PREALLOC-NEXT:    s_mov_b32 s4, 42
+; WAVE32-WWM-PREALLOC-NEXT:    s_lshl_b32 s4, s4, 5
+; WAVE32-WWM-PREALLOC-NEXT:    s_mov_b32 s32, s4
+; WAVE32-WWM-PREALLOC-NEXT:    s_setpc_b64 s[30:31]
   call void @llvm.stackrestore.p5(ptr addrspace(5) inttoptr (i32 42 to ptr addrspace(5)))
   ret void
 }
@@ -445,6 +543,13 @@ define void @func_stacksave_stackrestore() {
 ; WAVE64-O0-NEXT:    s_mov_b32 s4, s32
 ; WAVE64-O0-NEXT:    s_mov_b32 s32, s4
 ; WAVE64-O0-NEXT:    s_setpc_b64 s[30:31]
+;
+; WAVE32-WWM-PREALLOC-LABEL: func_stacksave_stackrestore:
+; WAVE32-WWM-PREALLOC:       ; %bb.0:
+; WAVE32-WWM-PREALLOC-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; WAVE32-WWM-PREALLOC-NEXT:    s_mov_b32 s4, s32
+; WAVE32-WWM-PREALLOC-NEXT:    s_mov_b32 s32, s4
+; WAVE32-WWM-PREALLOC-NEXT:    s_setpc_b64 s[30:31]
   %stacksave = call ptr addrspace(5) @llvm.stacksave.p5()
   call void @llvm.stackrestore.p5(ptr addrspace(5) %stacksave)
   ret void
@@ -490,6 +595,17 @@ define void @func_stacksave_stackrestore_use() {
 ; WAVE64-O0-NEXT:    ;;#ASMEND
 ; WAVE64-O0-NEXT:    s_mov_b32 s32, s4
 ; WAVE64-O0-NEXT:    s_setpc_b64 s[30:31]
+;
+; WAVE32-WWM-PREALLOC-LABEL: func_stacksave_stackrestore_use:
+; WAVE32-WWM-PREALLOC:       ; %bb.0:
+; WAVE32-WWM-PREALLOC-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; WAVE32-WWM-PREALLOC-NEXT:    s_mov_b32 s4, s32
+; WAVE32-WWM-PREALLOC-NEXT:    s_lshr_b32 s5, s4, 5
+; WAVE32-WWM-PREALLOC-NEXT:    ;;#ASMSTART
+; WAVE32-WWM-PREALLOC-NEXT:    ; use s5
+; WAVE32-WWM-PREALLOC-NEXT:    ;;#ASMEND
+; WAVE32-WWM-PREALLOC-NEXT:    s_mov_b32 s32, s4
+; WAVE32-WWM-PREALLOC-NEXT:    s_setpc_b64 s[30:31]
   %stacksave = call ptr addrspace(5) @llvm.stacksave.p5()
   call void asm sideeffect "; use $0", "s"(ptr addrspace(5) %stacksave)
   call void @llvm.stackrestore.p5(ptr addrspace(5) %stacksave)
@@ -532,6 +648,16 @@ define amdgpu_kernel void @kernel_stacksave_stackrestore_use() {
 ; WAVE64-O0-NEXT:    ;;#ASMEND
 ; WAVE64-O0-NEXT:    s_mov_b32 s32, s0
 ; WAVE64-O0-NEXT:    s_endpgm
+;
+; WAVE32-WWM-PREALLOC-LABEL: kernel_stacksave_stackrestore_use:
+; WAVE32-WWM-PREALLOC:       ; %bb.0:
+; WAVE32-WWM-PREALLOC-NEXT:    s_mov_b32 s0, s32
+; WAVE32-WWM-PREALLOC-NEXT:    s_lshr_b32 s1, s0, 5
+; WAVE32-WWM-PREALLOC-NEXT:    ;;#ASMSTART
+; WAVE32-WWM-PREALLOC-NEXT:    ; use s1
+; WAVE32-WWM-PREALLOC-NEXT:    ;;#ASMEND
+; WAVE32-WWM-PREALLOC-NEXT:    s_mov_b32 s32, s0
+; WAVE32-WWM-PREALLOC-NEXT:    s_endpgm
   %stacksave = call ptr addrspace(5) @llvm.stacksave.p5()
   call void asm sideeffect "; use $0", "s"(ptr addrspace(5) %stacksave)
   call void @llvm.stackrestore.p5(ptr addrspace(5) %stacksave)
@@ -578,6 +704,17 @@ define void @func_stacksave_stackrestore_voffset(i32 %offset) {
 ; WAVE64-O0-NEXT:    s_lshl_b32 s4, s4, 6
 ; WAVE64-O0-NEXT:    s_mov_b32 s32, s4
 ; WAVE64-O0-NEXT:    s_setpc_b64 s[30:31]
+;
+; WAVE32-WWM-PREALLOC-LABEL: func_stacksave_stackrestore_voffset:
+; WAVE32-WWM-PREALLOC:       ; %bb.0:
+; WAVE32-WWM-PREALLOC-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; WAVE32-WWM-PREALLOC-NEXT:    s_mov_b32 s4, s32
+; WAVE32-WWM-PREALLOC-NEXT:    s_lshr_b32 s4, s4, 5
+; WAVE32-WWM-PREALLOC-NEXT:    v_add_nc_u32_e64 v0, s4, v0
+; WAVE32-WWM-PREALLOC-NEXT:    v_readfirstlane_b32 s4, v0
+; WAVE32-WWM-PREALLOC-NEXT:    s_lshl_b32 s4, s4, 5
+; WAVE32-WWM-PREALLOC-NEXT:    s_mov_b32 s32, s4
+; WAVE32-WWM-PREALLOC-NEXT:    s_setpc_b64 s[30:31]
   %stacksave = call ptr addrspace(5) @llvm.stacksave.p5()
   %gep = getelementptr i8, ptr addrspace(5) %stacksave, i32 %offset
   call void @llvm.stackrestore.p5(ptr addrspace(5) %gep)
@@ -614,6 +751,14 @@ define void @func_stacksave_vgpr(ptr addrspace(5) %stack) {
 ; WAVE64-O0-NEXT:    s_lshl_b32 s4, s4, 6
 ; WAVE64-O0-NEXT:    s_mov_b32 s32, s4
 ; WAVE64-O0-NEXT:    s_setpc_b64 s[30:31]
+;
+; WAVE32-WWM-PREALLOC-LABEL: func_stacksave_vgpr:
+; WAVE32-WWM-PREALLOC:       ; %bb.0:
+; WAVE32-WWM-PREALLOC-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; WAVE32-WWM-PREALLOC-NEXT:    v_readfirstlane_b32 s4, v0
+; WAVE32-WWM-PREALLOC-NEXT:    s_lshl_b32 s4, s4, 5
+; WAVE32-WWM-PREALLOC-NEXT:    s_mov_b32 s32, s4
+; WAVE32-WWM-PREALLOC-NEXT:    s_setpc_b64 s[30:31]
   call void @llvm.stackrestore.p5(ptr addrspace(5) %stack)
   ret void
 }
@@ -644,6 +789,13 @@ define amdgpu_gfx void @func_stacksave_sgpr(ptr addrspace(5) inreg %stack) {
 ; WAVE64-O0-NEXT:    s_lshl_b32 s34, s4, 6
 ; WAVE64-O0-NEXT:    s_mov_b32 s32, s34
 ; WAVE64-O0-NEXT:    s_setpc_b64 s[30:31]
+;
+; WAVE32-WWM-PREALLOC-LABEL: func_stacksave_sgpr:
+; WAVE32-WWM-PREALLOC:       ; %bb.0:
+; WAVE32-WWM-PREALLOC-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; WAVE32-WWM-PREALLOC-NEXT:    s_lshl_b32 s34, s4, 5
+; WAVE32-WWM-PREALLOC-NEXT:    s_mov_b32 s32, s34
+; WAVE32-WWM-PREALLOC-NEXT:    s_setpc_b64 s[30:31]
   call void @llvm.stackrestore.p5(ptr addrspace(5) %stack)
   ret void
 }
@@ -690,6 +842,18 @@ define amdgpu_kernel void @kernel_stacksave_sgpr(ptr addrspace(5) %stack) {
 ; WAVE64-O0-NEXT:    s_lshl_b32 s0, s0, 6
 ; WAVE64-O0-NEXT:    s_mov_b32 s32, s0
 ; WAVE64-O0-NEXT:    s_endpgm
+;
+; WAVE32-WWM-PREALLOC-LABEL: kernel_stacksave_sgpr:
+; WAVE32-WWM-PREALLOC:       ; %bb.0:
+; WAVE32-WWM-PREALLOC-NEXT:    s_load_dword s0, s[4:5], 0x0
+; WAVE32-WWM-PREALLOC-NEXT:    s_waitcnt lgkmcnt(0)
+; WAVE32-WWM-PREALLOC-NEXT:    s_mov_b32 s1, s0
+; WAVE32-WWM-PREALLOC-NEXT:    ;;#ASMSTART
+; WAVE32-WWM-PREALLOC-NEXT:    ; use s1
+; WAVE32-WWM-PREALLOC-NEXT:    ;;#ASMEND
+; WAVE32-WWM-PREALLOC-NEXT:    s_lshl_b32 s0, s0, 5
+; WAVE32-WWM-PREALLOC-NEXT:    s_mov_b32 s32, s0
+; WAVE32-WWM-PREALLOC-NEXT:    s_endpgm
   call void asm sideeffect "; use $0", "s"(ptr addrspace(5) %stack)
   call void @llvm.stackrestore.p5(ptr addrspace(5) %stack)
   ret void
@@ -985,6 +1149,118 @@ define amdgpu_kernel void @kernel_stacksave_stackrestore_call_with_stack_objects
 ; WAVE64-O0-NEXT:    s_mov_b32 s32, s0
 ; WAVE64-O0-NEXT:    ; kill: killed $vgpr0
 ; WAVE64-O0-NEXT:    s_endpgm
+;
+; WAVE32-WWM-PREALLOC-LABEL: kernel_stacksave_stackrestore_call_with_stack_objects:
+; WAVE32-WWM-PREALLOC:       ; %bb.0:
+; WAVE32-WWM-PREALLOC-NEXT:    s_mov_b32 s32, 0x1200
+; WAVE32-WWM-PREALLOC-NEXT:    s_getpc_b64 s[20:21]
+; WAVE32-WWM-PREALLOC-NEXT:    s_mov_b32 s20, s0
+; WAVE32-WWM-PREALLOC-NEXT:    s_load_dwordx4 s[20:23], s[20:21], 0x0
+; WAVE32-WWM-PREALLOC-NEXT:    s_waitcnt lgkmcnt(0)
+; WAVE32-WWM-PREALLOC-NEXT:    s_bitset0_b32 s23, 21
+; WAVE32-WWM-PREALLOC-NEXT:    s_add_u32 s20, s20, s11
+; WAVE32-WWM-PREALLOC-NEXT:    s_addc_u32 s21, s21, 0
+; WAVE32-WWM-PREALLOC-NEXT:    ; implicit-def: $vgpr32 : SGPR spill to VGPR lane
+; WAVE32-WWM-PREALLOC-NEXT:    s_mov_b32 s14, s10
+; WAVE32-WWM-PREALLOC-NEXT:    s_mov_b32 s13, s9
+; WAVE32-WWM-PREALLOC-NEXT:    s_mov_b32 s12, s8
+; WAVE32-WWM-PREALLOC-NEXT:    s_mov_b64 s[10:11], s[6:7]
+; WAVE32-WWM-PREALLOC-NEXT:    s_mov_b64 s[8:9], s[4:5]
+; WAVE32-WWM-PREALLOC-NEXT:    s_mov_b64 s[6:7], s[2:3]
+; WAVE32-WWM-PREALLOC-NEXT:    s_mov_b64 s[4:5], s[0:1]
+; WAVE32-WWM-PREALLOC-NEXT:    s_mov_b32 s0, s32
+; WAVE32-WWM-PREALLOC-NEXT:    v_writelane_b32 v32, s0, 0
+; WAVE32-WWM-PREALLOC-NEXT:    s_lshr_b32 s0, s0, 5
+; WAVE32-WWM-PREALLOC-NEXT:    v_writelane_b32 v32, s0, 1
+; WAVE32-WWM-PREALLOC-NEXT:    v_mov_b32_e32 v3, 42
+; WAVE32-WWM-PREALLOC-NEXT:    buffer_store_dword v3, off, s[20:23], 0 offset:4
+; WAVE32-WWM-PREALLOC-NEXT:    s_waitcnt_vscnt null, 0x0
+; WAVE32-WWM-PREALLOC-NEXT:    s_mov_b64 s[0:1], s[20:21]
+; WAVE32-WWM-PREALLOC-NEXT:    s_mov_b64 s[2:3], s[22:23]
+; WAVE32-WWM-PREALLOC-NEXT:    s_mov_b32 s15, s32
+; WAVE32-WWM-PREALLOC-NEXT:    v_mov_b32_e32 v3, 17
+; WAVE32-WWM-PREALLOC-NEXT:    buffer_store_dword v3, off, s[20:23], s15 offset:4
+; WAVE32-WWM-PREALLOC-NEXT:    s_mov_b32 s15, stack_passed_argument at abs32@hi
+; WAVE32-WWM-PREALLOC-NEXT:    s_mov_b32 s16, stack_passed_argument at abs32@lo
+; WAVE32-WWM-PREALLOC-NEXT:    ; kill: def $sgpr16 killed $sgpr16 def $sgpr16_sgpr17
+; WAVE32-WWM-PREALLOC-NEXT:    s_mov_b32 s17, s15
+; WAVE32-WWM-PREALLOC-NEXT:    s_mov_b32 s15, 20
+; WAVE32-WWM-PREALLOC-NEXT:    v_lshlrev_b32_e64 v2, s15, v2
+; WAVE32-WWM-PREALLOC-NEXT:    s_mov_b32 s15, 10
+; WAVE32-WWM-PREALLOC-NEXT:    v_lshlrev_b32_e64 v1, s15, v1
+; WAVE32-WWM-PREALLOC-NEXT:    v_or3_b32 v31, v0, v1, v2
+; WAVE32-WWM-PREALLOC-NEXT:    ; implicit-def: $sgpr15
+; WAVE32-WWM-PREALLOC-NEXT:    ; implicit-def: $sgpr18
+; WAVE32-WWM-PREALLOC-NEXT:    v_mov_b32_e32 v0, s18
+; WAVE32-WWM-PREALLOC-NEXT:    ; implicit-def: $sgpr18
+; WAVE32-WWM-PREALLOC-NEXT:    v_mov_b32_e32 v1, s18
+; WAVE32-WWM-PREALLOC-NEXT:    ; implicit-def: $sgpr18
+; WAVE32-WWM-PREALLOC-NEXT:    v_mov_b32_e32 v2, s18
+; WAVE32-WWM-PREALLOC-NEXT:    ; implicit-def: $sgpr18
+; WAVE32-WWM-PREALLOC-NEXT:    v_mov_b32_e32 v3, s18
+; WAVE32-WWM-PREALLOC-NEXT:    ; implicit-def: $sgpr18
+; WAVE32-WWM-PREALLOC-NEXT:    v_mov_b32_e32 v4, s18
+; WAVE32-WWM-PREALLOC-NEXT:    ; implicit-def: $sgpr18
+; WAVE32-WWM-PREALLOC-NEXT:    v_mov_b32_e32 v5, s18
+; WAVE32-WWM-PREALLOC-NEXT:    ; implicit-def: $sgpr18
+; WAVE32-WWM-PREALLOC-NEXT:    v_mov_b32_e32 v6, s18
+; WAVE32-WWM-PREALLOC-NEXT:    ; implicit-def: $sgpr18
+; WAVE32-WWM-PREALLOC-NEXT:    v_mov_b32_e32 v7, s18
+; WAVE32-WWM-PREALLOC-NEXT:    ; implicit-def: $sgpr18
+; WAVE32-WWM-PREALLOC-NEXT:    v_mov_b32_e32 v8, s18
+; WAVE32-WWM-PREALLOC-NEXT:    ; implicit-def: $sgpr18
+; WAVE32-WWM-PREALLOC-NEXT:    v_mov_b32_e32 v9, s18
+; WAVE32-WWM-PREALLOC-NEXT:    ; implicit-def: $sgpr18
+; WAVE32-WWM-PREALLOC-NEXT:    v_mov_b32_e32 v10, s18
+; WAVE32-WWM-PREALLOC-NEXT:    ; implicit-def: $sgpr18
+; WAVE32-WWM-PREALLOC-NEXT:    v_mov_b32_e32 v11, s18
+; WAVE32-WWM-PREALLOC-NEXT:    ; implicit-def: $sgpr18
+; WAVE32-WWM-PREALLOC-NEXT:    v_mov_b32_e32 v12, s18
+; WAVE32-WWM-PREALLOC-NEXT:    ; implicit-def: $sgpr18
+; WAVE32-WWM-PREALLOC-NEXT:    v_mov_b32_e32 v13, s18
+; WAVE32-WWM-PREALLOC-NEXT:    ; implicit-def: $sgpr18
+; WAVE32-WWM-PREALLOC-NEXT:    v_mov_b32_e32 v14, s18
+; WAVE32-WWM-PREALLOC-NEXT:    ; implicit-def: $sgpr18
+; WAVE32-WWM-PREALLOC-NEXT:    v_mov_b32_e32 v15, s18
+; WAVE32-WWM-PREALLOC-NEXT:    ; implicit-def: $sgpr18
+; WAVE32-WWM-PREALLOC-NEXT:    v_mov_b32_e32 v16, s18
+; WAVE32-WWM-PREALLOC-NEXT:    ; implicit-def: $sgpr18
+; WAVE32-WWM-PREALLOC-NEXT:    v_mov_b32_e32 v17, s18
+; WAVE32-WWM-PREALLOC-NEXT:    ; implicit-def: $sgpr18
+; WAVE32-WWM-PREALLOC-NEXT:    v_mov_b32_e32 v18, s18
+; WAVE32-WWM-PREALLOC-NEXT:    ; implicit-def: $sgpr18
+; WAVE32-WWM-PREALLOC-NEXT:    v_mov_b32_e32 v19, s18
+; WAVE32-WWM-PREALLOC-NEXT:    ; implicit-def: $sgpr18
+; WAVE32-WWM-PREALLOC-NEXT:    v_mov_b32_e32 v20, s18
+; WAVE32-WWM-PREALLOC-NEXT:    ; implicit-def: $sgpr18
+; WAVE32-WWM-PREALLOC-NEXT:    v_mov_b32_e32 v21, s18
+; WAVE32-WWM-PREALLOC-NEXT:    ; implicit-def: $sgpr18
+; WAVE32-WWM-PREALLOC-NEXT:    v_mov_b32_e32 v22, s18
+; WAVE32-WWM-PREALLOC-NEXT:    ; implicit-def: $sgpr18
+; WAVE32-WWM-PREALLOC-NEXT:    v_mov_b32_e32 v23, s18
+; WAVE32-WWM-PREALLOC-NEXT:    ; implicit-def: $sgpr18
+; WAVE32-WWM-PREALLOC-NEXT:    v_mov_b32_e32 v24, s18
+; WAVE32-WWM-PREALLOC-NEXT:    ; implicit-def: $sgpr18
+; WAVE32-WWM-PREALLOC-NEXT:    v_mov_b32_e32 v25, s18
+; WAVE32-WWM-PREALLOC-NEXT:    ; implicit-def: $sgpr18
+; WAVE32-WWM-PREALLOC-NEXT:    v_mov_b32_e32 v26, s18
+; WAVE32-WWM-PREALLOC-NEXT:    ; implicit-def: $sgpr18
+; WAVE32-WWM-PREALLOC-NEXT:    v_mov_b32_e32 v27, s18
+; WAVE32-WWM-PREALLOC-NEXT:    ; implicit-def: $sgpr18
+; WAVE32-WWM-PREALLOC-NEXT:    v_mov_b32_e32 v28, s18
+; WAVE32-WWM-PREALLOC-NEXT:    ; implicit-def: $sgpr18
+; WAVE32-WWM-PREALLOC-NEXT:    v_mov_b32_e32 v29, s18
+; WAVE32-WWM-PREALLOC-NEXT:    ; implicit-def: $sgpr18
+; WAVE32-WWM-PREALLOC-NEXT:    v_mov_b32_e32 v30, s18
+; WAVE32-WWM-PREALLOC-NEXT:    s_swappc_b64 s[30:31], s[16:17]
+; WAVE32-WWM-PREALLOC-NEXT:    v_readlane_b32 s1, v32, 1
+; WAVE32-WWM-PREALLOC-NEXT:    v_readlane_b32 s0, v32, 0
+; WAVE32-WWM-PREALLOC-NEXT:    ;;#ASMSTART
+; WAVE32-WWM-PREALLOC-NEXT:    ; use s1
+; WAVE32-WWM-PREALLOC-NEXT:    ;;#ASMEND
+; WAVE32-WWM-PREALLOC-NEXT:    s_mov_b32 s32, s0
+; WAVE32-WWM-PREALLOC-NEXT:    ; kill: killed $vgpr32
+; WAVE32-WWM-PREALLOC-NEXT:    s_endpgm
   %alloca = alloca [32 x i32], addrspace(5)
   %stacksave = call ptr addrspace(5) @llvm.stacksave.p5()
   store volatile i32 42, ptr addrspace(5) %alloca
@@ -1302,6 +1578,118 @@ define void @func_stacksave_stackrestore_call_with_stack_objects() {
 ; WAVE64-O0-NEXT:    s_mov_b32 s33, s19
 ; WAVE64-O0-NEXT:    s_waitcnt vmcnt(0)
 ; WAVE64-O0-NEXT:    s_setpc_b64 s[30:31]
+;
+; WAVE32-WWM-PREALLOC-LABEL: func_stacksave_stackrestore_call_with_stack_objects:
+; WAVE32-WWM-PREALLOC:       ; %bb.0:
+; WAVE32-WWM-PREALLOC-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; WAVE32-WWM-PREALLOC-NEXT:    s_mov_b32 s24, s33
+; WAVE32-WWM-PREALLOC-NEXT:    s_mov_b32 s33, s32
+; WAVE32-WWM-PREALLOC-NEXT:    s_xor_saveexec_b32 s16, -1
+; WAVE32-WWM-PREALLOC-NEXT:    buffer_store_dword v32, off, s[0:3], s33 offset:128 ; 4-byte Folded Spill
+; WAVE32-WWM-PREALLOC-NEXT:    buffer_store_dword v33, off, s[0:3], s33 offset:132 ; 4-byte Folded Spill
+; WAVE32-WWM-PREALLOC-NEXT:    s_mov_b32 exec_lo, s16
+; WAVE32-WWM-PREALLOC-NEXT:    s_add_i32 s32, s32, 0x1200
+; WAVE32-WWM-PREALLOC-NEXT:    ; implicit-def: $vgpr33 : SGPR spill to VGPR lane
+; WAVE32-WWM-PREALLOC-NEXT:    v_writelane_b32 v32, s30, 0
+; WAVE32-WWM-PREALLOC-NEXT:    v_writelane_b32 v32, s31, 1
+; WAVE32-WWM-PREALLOC-NEXT:    s_mov_b32 s16, s32
+; WAVE32-WWM-PREALLOC-NEXT:    v_writelane_b32 v33, s16, 0
+; WAVE32-WWM-PREALLOC-NEXT:    s_lshr_b32 s16, s16, 5
+; WAVE32-WWM-PREALLOC-NEXT:    v_writelane_b32 v33, s16, 1
+; WAVE32-WWM-PREALLOC-NEXT:    v_mov_b32_e32 v0, 42
+; WAVE32-WWM-PREALLOC-NEXT:    buffer_store_dword v0, off, s[0:3], s33
+; WAVE32-WWM-PREALLOC-NEXT:    s_waitcnt_vscnt null, 0x0
+; WAVE32-WWM-PREALLOC-NEXT:    s_mov_b64 s[22:23], s[2:3]
+; WAVE32-WWM-PREALLOC-NEXT:    s_mov_b64 s[20:21], s[0:1]
+; WAVE32-WWM-PREALLOC-NEXT:    s_mov_b32 s16, s32
+; WAVE32-WWM-PREALLOC-NEXT:    v_mov_b32_e32 v0, 17
+; WAVE32-WWM-PREALLOC-NEXT:    buffer_store_dword v0, off, s[0:3], s16 offset:4
+; WAVE32-WWM-PREALLOC-NEXT:    s_mov_b32 s18, stack_passed_argument at abs32@hi
+; WAVE32-WWM-PREALLOC-NEXT:    s_mov_b32 s16, stack_passed_argument at abs32@lo
+; WAVE32-WWM-PREALLOC-NEXT:    ; kill: def $sgpr16 killed $sgpr16 def $sgpr16_sgpr17
+; WAVE32-WWM-PREALLOC-NEXT:    s_mov_b32 s17, s18
+; WAVE32-WWM-PREALLOC-NEXT:    s_mov_b64 s[0:1], s[20:21]
+; WAVE32-WWM-PREALLOC-NEXT:    s_mov_b64 s[2:3], s[22:23]
+; WAVE32-WWM-PREALLOC-NEXT:    ; implicit-def: $sgpr18
+; WAVE32-WWM-PREALLOC-NEXT:    v_mov_b32_e32 v0, s18
+; WAVE32-WWM-PREALLOC-NEXT:    ; implicit-def: $sgpr18
+; WAVE32-WWM-PREALLOC-NEXT:    v_mov_b32_e32 v1, s18
+; WAVE32-WWM-PREALLOC-NEXT:    ; implicit-def: $sgpr18
+; WAVE32-WWM-PREALLOC-NEXT:    v_mov_b32_e32 v2, s18
+; WAVE32-WWM-PREALLOC-NEXT:    ; implicit-def: $sgpr18
+; WAVE32-WWM-PREALLOC-NEXT:    v_mov_b32_e32 v3, s18
+; WAVE32-WWM-PREALLOC-NEXT:    ; implicit-def: $sgpr18
+; WAVE32-WWM-PREALLOC-NEXT:    v_mov_b32_e32 v4, s18
+; WAVE32-WWM-PREALLOC-NEXT:    ; implicit-def: $sgpr18
+; WAVE32-WWM-PREALLOC-NEXT:    v_mov_b32_e32 v5, s18
+; WAVE32-WWM-PREALLOC-NEXT:    ; implicit-def: $sgpr18
+; WAVE32-WWM-PREALLOC-NEXT:    v_mov_b32_e32 v6, s18
+; WAVE32-WWM-PREALLOC-NEXT:    ; implicit-def: $sgpr18
+; WAVE32-WWM-PREALLOC-NEXT:    v_mov_b32_e32 v7, s18
+; WAVE32-WWM-PREALLOC-NEXT:    ; implicit-def: $sgpr18
+; WAVE32-WWM-PREALLOC-NEXT:    v_mov_b32_e32 v8, s18
+; WAVE32-WWM-PREALLOC-NEXT:    ; implicit-def: $sgpr18
+; WAVE32-WWM-PREALLOC-NEXT:    v_mov_b32_e32 v9, s18
+; WAVE32-WWM-PREALLOC-NEXT:    ; implicit-def: $sgpr18
+; WAVE32-WWM-PREALLOC-NEXT:    v_mov_b32_e32 v10, s18
+; WAVE32-WWM-PREALLOC-NEXT:    ; implicit-def: $sgpr18
+; WAVE32-WWM-PREALLOC-NEXT:    v_mov_b32_e32 v11, s18
+; WAVE32-WWM-PREALLOC-NEXT:    ; implicit-def: $sgpr18
+; WAVE32-WWM-PREALLOC-NEXT:    v_mov_b32_e32 v12, s18
+; WAVE32-WWM-PREALLOC-NEXT:    ; implicit-def: $sgpr18
+; WAVE32-WWM-PREALLOC-NEXT:    v_mov_b32_e32 v13, s18
+; WAVE32-WWM-PREALLOC-NEXT:    ; implicit-def: $sgpr18
+; WAVE32-WWM-PREALLOC-NEXT:    v_mov_b32_e32 v14, s18
+; WAVE32-WWM-PREALLOC-NEXT:    ; implicit-def: $sgpr18
+; WAVE32-WWM-PREALLOC-NEXT:    v_mov_b32_e32 v15, s18
+; WAVE32-WWM-PREALLOC-NEXT:    ; implicit-def: $sgpr18
+; WAVE32-WWM-PREALLOC-NEXT:    v_mov_b32_e32 v16, s18
+; WAVE32-WWM-PREALLOC-NEXT:    ; implicit-def: $sgpr18
+; WAVE32-WWM-PREALLOC-NEXT:    v_mov_b32_e32 v17, s18
+; WAVE32-WWM-PREALLOC-NEXT:    ; implicit-def: $sgpr18
+; WAVE32-WWM-PREALLOC-NEXT:    v_mov_b32_e32 v18, s18
+; WAVE32-WWM-PREALLOC-NEXT:    ; implicit-def: $sgpr18
+; WAVE32-WWM-PREALLOC-NEXT:    v_mov_b32_e32 v19, s18
+; WAVE32-WWM-PREALLOC-NEXT:    ; implicit-def: $sgpr18
+; WAVE32-WWM-PREALLOC-NEXT:    v_mov_b32_e32 v20, s18
+; WAVE32-WWM-PREALLOC-NEXT:    ; implicit-def: $sgpr18
+; WAVE32-WWM-PREALLOC-NEXT:    v_mov_b32_e32 v21, s18
+; WAVE32-WWM-PREALLOC-NEXT:    ; implicit-def: $sgpr18
+; WAVE32-WWM-PREALLOC-NEXT:    v_mov_b32_e32 v22, s18
+; WAVE32-WWM-PREALLOC-NEXT:    ; implicit-def: $sgpr18
+; WAVE32-WWM-PREALLOC-NEXT:    v_mov_b32_e32 v23, s18
+; WAVE32-WWM-PREALLOC-NEXT:    ; implicit-def: $sgpr18
+; WAVE32-WWM-PREALLOC-NEXT:    v_mov_b32_e32 v24, s18
+; WAVE32-WWM-PREALLOC-NEXT:    ; implicit-def: $sgpr18
+; WAVE32-WWM-PREALLOC-NEXT:    v_mov_b32_e32 v25, s18
+; WAVE32-WWM-PREALLOC-NEXT:    ; implicit-def: $sgpr18
+; WAVE32-WWM-PREALLOC-NEXT:    v_mov_b32_e32 v26, s18
+; WAVE32-WWM-PREALLOC-NEXT:    ; implicit-def: $sgpr18
+; WAVE32-WWM-PREALLOC-NEXT:    v_mov_b32_e32 v27, s18
+; WAVE32-WWM-PREALLOC-NEXT:    ; implicit-def: $sgpr18
+; WAVE32-WWM-PREALLOC-NEXT:    v_mov_b32_e32 v28, s18
+; WAVE32-WWM-PREALLOC-NEXT:    ; implicit-def: $sgpr18
+; WAVE32-WWM-PREALLOC-NEXT:    v_mov_b32_e32 v29, s18
+; WAVE32-WWM-PREALLOC-NEXT:    ; implicit-def: $sgpr18
+; WAVE32-WWM-PREALLOC-NEXT:    v_mov_b32_e32 v30, s18
+; WAVE32-WWM-PREALLOC-NEXT:    s_swappc_b64 s[30:31], s[16:17]
+; WAVE32-WWM-PREALLOC-NEXT:    v_readlane_b32 s5, v33, 1
+; WAVE32-WWM-PREALLOC-NEXT:    v_readlane_b32 s4, v33, 0
+; WAVE32-WWM-PREALLOC-NEXT:    ;;#ASMSTART
+; WAVE32-WWM-PREALLOC-NEXT:    ; use s5
+; WAVE32-WWM-PREALLOC-NEXT:    ;;#ASMEND
+; WAVE32-WWM-PREALLOC-NEXT:    s_mov_b32 s32, s4
+; WAVE32-WWM-PREALLOC-NEXT:    v_readlane_b32 s31, v32, 1
+; WAVE32-WWM-PREALLOC-NEXT:    v_readlane_b32 s30, v32, 0
+; WAVE32-WWM-PREALLOC-NEXT:    ; kill: killed $vgpr33
+; WAVE32-WWM-PREALLOC-NEXT:    s_xor_saveexec_b32 s4, -1
+; WAVE32-WWM-PREALLOC-NEXT:    buffer_load_dword v32, off, s[0:3], s33 offset:128 ; 4-byte Folded Reload
+; WAVE32-WWM-PREALLOC-NEXT:    buffer_load_dword v33, off, s[0:3], s33 offset:132 ; 4-byte Folded Reload
+; WAVE32-WWM-PREALLOC-NEXT:    s_mov_b32 exec_lo, s4
+; WAVE32-WWM-PREALLOC-NEXT:    s_add_i32 s32, s32, 0xffffee00
+; WAVE32-WWM-PREALLOC-NEXT:    s_mov_b32 s33, s24
+; WAVE32-WWM-PREALLOC-NEXT:    s_waitcnt vmcnt(0)
+; WAVE32-WWM-PREALLOC-NEXT:    s_setpc_b64 s[30:31]
   %alloca = alloca [32 x i32], addrspace(5)
   %stacksave = call ptr addrspace(5) @llvm.stacksave.p5()
   store volatile i32 42, ptr addrspace(5) %alloca


        


More information about the llvm-commits mailing list