[llvm] [AMDGPU] Add mark last scratch load pass (PR #75512)
Mirko BrkuĊĦanin via llvm-commits
llvm-commits at lists.llvm.org
Thu Jan 4 06:28:54 PST 2024
https://github.com/mbrkusanin updated https://github.com/llvm/llvm-project/pull/75512
>From 4d184620caf5327de2126c474a4aa1795f9197c8 Mon Sep 17 00:00:00 2001
From: Mirko Brkusanin <Mirko.Brkusanin at amd.com>
Date: Thu, 14 Dec 2023 19:11:36 +0100
Subject: [PATCH 1/9] [AMDGPU] Add last_use operand for SI_SPILL_*_RESTORE
insts
---
llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 8 ++-
llvm/lib/Target/AMDGPU/SIInstructions.td | 4 +-
.../AMDGPU/accvgpr-spill-scc-clobber.mir | 6 +-
.../CodeGen/AMDGPU/agpr-copy-reuse-writes.mir | 2 +-
.../AMDGPU/bb-prolog-spill-during-regalloc.ll | 14 ++---
.../AMDGPU/extend-wwm-virt-reg-liveness.mir | 8 +--
.../test/CodeGen/AMDGPU/fast-ra-kills-vcc.mir | 2 +-
.../fastregalloc-self-loop-heuristic.mir | 10 ++--
.../CodeGen/AMDGPU/fold-restore-undef-use.mir | 3 +-
.../AMDGPU/greedy-global-heuristic.mir | 14 ++---
.../greedy-instruction-split-subrange.mir | 6 +-
.../AMDGPU/indirect-addressing-term.ll | 6 +-
...nfloop-subrange-spill-inspect-subrange.mir | 2 +-
llvm/test/CodeGen/AMDGPU/nsa-reassign.mir | 28 ++++-----
...al-regcopy-and-spill-missed-at-regalloc.ll | 4 +-
.../CodeGen/AMDGPU/pei-build-av-spill.mir | 48 +++++++--------
.../AMDGPU/pei-build-spill-partial-agpr.mir | 14 ++---
llvm/test/CodeGen/AMDGPU/pei-build-spill.mir | 36 +++++------
.../AMDGPU/pei-reg-scavenger-position.mir | 4 +-
llvm/test/CodeGen/AMDGPU/pr51516.mir | 2 +-
.../ra-inserted-scalar-instructions.mir | 44 +++++++-------
.../ran-out-of-sgprs-allocation-failure.mir | 4 +-
...-unsatisfiable-overlapping-tuple-hints.mir | 8 +--
.../test/CodeGen/AMDGPU/remat-dead-subreg.mir | 24 ++++----
llvm/test/CodeGen/AMDGPU/remat-smrd.mir | 28 ++++-----
llvm/test/CodeGen/AMDGPU/remat-sop.mir | 32 +++++-----
llvm/test/CodeGen/AMDGPU/remat-vop.mir | 60 +++++++++----------
.../sgpr-spill-dead-frame-in-dbg-value.mir | 2 +-
...fi-skip-processing-stack-arg-dbg-value.mir | 2 +-
.../AMDGPU/sgpr-spill-overlap-wwm-reserve.mir | 2 +-
.../AMDGPU/sgpr-spill-to-vmem-scc-clobber.mir | 12 ++--
.../AMDGPU/sgpr-spill-vmem-large-frame.mir | 2 +-
.../AMDGPU/sgpr-spill-wrong-stack-id.mir | 20 +++----
llvm/test/CodeGen/AMDGPU/sgpr-spill.mir | 18 +++---
llvm/test/CodeGen/AMDGPU/spill-agpr.mir | 60 +++++++++----------
.../AMDGPU/spill-empty-live-interval.mir | 2 +-
.../AMDGPU/spill-sgpr-to-virtual-vgpr.mir | 12 ++--
.../CodeGen/AMDGPU/spill-special-sgpr.mir | 2 +-
.../CodeGen/AMDGPU/spill-to-agpr-partial.mir | 16 ++---
.../CodeGen/AMDGPU/spill-vector-superclass.ll | 2 +-
llvm/test/CodeGen/AMDGPU/spill192.mir | 6 +-
llvm/test/CodeGen/AMDGPU/spill224.mir | 6 +-
llvm/test/CodeGen/AMDGPU/spill288.mir | 6 +-
llvm/test/CodeGen/AMDGPU/spill320.mir | 6 +-
llvm/test/CodeGen/AMDGPU/spill352.mir | 6 +-
llvm/test/CodeGen/AMDGPU/spill384.mir | 6 +-
.../CodeGen/AMDGPU/splitkit-copy-bundle.mir | 4 +-
.../AMDGPU/splitkit-copy-live-lanes.mir | 24 ++++----
.../AMDGPU/splitkit-nolivesubranges.mir | 2 +-
llvm/test/CodeGen/AMDGPU/splitkit.mir | 6 +-
.../stack-slot-color-sgpr-vgpr-spills.mir | 2 +-
.../unallocatable-bundle-regression.mir | 2 +-
.../AMDGPU/undefined-physreg-sgpr-spill.mir | 4 +-
.../AMDGPU/unexpected-reg-unit-state.mir | 2 +-
...tor-spill-restore-to-other-vector-type.mir | 16 ++---
.../vgpr-spill-dead-frame-in-dbg-value.mir | 2 +-
...fi-skip-processing-stack-arg-dbg-value.mir | 2 +-
.../CodeGen/AMDGPU/vgpr-spill-scc-clobber.mir | 10 ++--
.../AMDGPU/wwm-spill-superclass-pseudo.mir | 2 +-
59 files changed, 344 insertions(+), 343 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 67992929ab3567..fe4113b68f0262 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -1976,9 +1976,10 @@ void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
if (RI.spillSGPRToVGPR())
FrameInfo.setStackID(FrameIndex, TargetStackID::SGPRSpill);
BuildMI(MBB, MI, DL, OpDesc, DestReg)
- .addFrameIndex(FrameIndex) // addr
- .addMemOperand(MMO)
- .addReg(MFI->getStackPtrOffsetReg(), RegState::Implicit);
+ .addFrameIndex(FrameIndex) // addr
+ .addMemOperand(MMO) // offset
+ .addImm(0) // last_use
+ .addReg(MFI->getStackPtrOffsetReg(), RegState::Implicit);
return;
}
@@ -1989,6 +1990,7 @@ void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
.addFrameIndex(FrameIndex) // vaddr
.addReg(MFI->getStackPtrOffsetReg()) // scratch_offset
.addImm(0) // offset
+ .addImm(0) // last_use
.addMemOperand(MMO);
}
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index 0f127276cfd1be..e6b61210ef1803 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -899,7 +899,7 @@ multiclass SI_SPILL_SGPR <RegisterClass sgpr_class> {
def _RESTORE : PseudoInstSI <
(outs sgpr_class:$data),
- (ins i32imm:$addr)> {
+ (ins i32imm:$addr, i32imm:$last_use)> {
let mayStore = 0;
let mayLoad = 1;
}
@@ -968,7 +968,7 @@ multiclass SI_SPILL_VGPR <RegisterClass vgpr_class, bit UsesTmp = 0> {
def _RESTORE : VPseudoInstSI <
(outs vgpr_class:$vdata),
(ins i32imm:$vaddr,
- SReg_32:$soffset, i32imm:$offset)> {
+ SReg_32:$soffset, i32imm:$offset, i32imm:$last_use)> {
let mayStore = 0;
let mayLoad = 1;
diff --git a/llvm/test/CodeGen/AMDGPU/accvgpr-spill-scc-clobber.mir b/llvm/test/CodeGen/AMDGPU/accvgpr-spill-scc-clobber.mir
index 53540e4a000492..12fd66b6806f5f 100644
--- a/llvm/test/CodeGen/AMDGPU/accvgpr-spill-scc-clobber.mir
+++ b/llvm/test/CodeGen/AMDGPU/accvgpr-spill-scc-clobber.mir
@@ -1007,7 +1007,7 @@ body: |
liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
S_CMP_EQ_U32 0, 0, implicit-def $scc
- $agpr0 = SI_SPILL_A32_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.1, align 4, addrspace 5)
+ $agpr0 = SI_SPILL_A32_RESTORE %stack.1, $sgpr32, 0, 0, implicit $exec :: (load (s32) from %stack.1, align 4, addrspace 5)
S_CBRANCH_SCC1 %bb.2, implicit $scc
bb.1:
@@ -2027,7 +2027,7 @@ body: |
liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
S_CMP_EQ_U32 0, 0, implicit-def $scc
- $agpr0_agpr1 = SI_SPILL_A64_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s64) from %stack.1, align 4, addrspace 5)
+ $agpr0_agpr1 = SI_SPILL_A64_RESTORE %stack.1, $sgpr32, 0, 0, implicit $exec :: (load (s64) from %stack.1, align 4, addrspace 5)
S_CBRANCH_SCC1 %bb.2, implicit $scc
bb.1:
@@ -3052,7 +3052,7 @@ body: |
liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
S_CMP_EQ_U32 0, 0, implicit-def $scc
- $agpr0_agpr1_agpr2 = SI_SPILL_A96_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s64) from %stack.1, align 4, addrspace 5)
+ $agpr0_agpr1_agpr2 = SI_SPILL_A96_RESTORE %stack.1, $sgpr32, 0, 0, implicit $exec :: (load (s64) from %stack.1, align 4, addrspace 5)
S_CBRANCH_SCC1 %bb.2, implicit $scc
bb.1:
diff --git a/llvm/test/CodeGen/AMDGPU/agpr-copy-reuse-writes.mir b/llvm/test/CodeGen/AMDGPU/agpr-copy-reuse-writes.mir
index 0d566efcbc91cf..16bb8a89b95af5 100644
--- a/llvm/test/CodeGen/AMDGPU/agpr-copy-reuse-writes.mir
+++ b/llvm/test/CodeGen/AMDGPU/agpr-copy-reuse-writes.mir
@@ -51,7 +51,7 @@ body: |
; GFX908-NEXT: S_ENDPGM 0
$agpr0_agpr1 = IMPLICIT_DEF
SI_SPILL_AV64_SAVE killed $agpr0_agpr1, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, align 4, addrspace 5)
- $agpr0_agpr1 = SI_SPILL_AV64_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s64) from %stack.0, align 4, addrspace 5)
+ $agpr0_agpr1 = SI_SPILL_AV64_RESTORE %stack.0, $sgpr32, 0, 0, implicit $exec :: (load (s64) from %stack.0, align 4, addrspace 5)
$agpr2_agpr3 = COPY $agpr0_agpr1, implicit $exec
S_ENDPGM 0
...
diff --git a/llvm/test/CodeGen/AMDGPU/bb-prolog-spill-during-regalloc.ll b/llvm/test/CodeGen/AMDGPU/bb-prolog-spill-during-regalloc.ll
index 3ed2cb856eaea8..1589d2dcf44467 100644
--- a/llvm/test/CodeGen/AMDGPU/bb-prolog-spill-during-regalloc.ll
+++ b/llvm/test/CodeGen/AMDGPU/bb-prolog-spill-during-regalloc.ll
@@ -12,7 +12,7 @@ define i32 @prolog_spill(i32 %arg0, i32 %arg1, i32 %arg2) {
; REGALLOC-NEXT: SI_SPILL_V32_SAVE killed $vgpr2, %stack.5, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.5, addrspace 5)
; REGALLOC-NEXT: SI_SPILL_V32_SAVE killed $vgpr1, %stack.4, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.4, addrspace 5)
; REGALLOC-NEXT: renamable $vgpr1 = COPY $vgpr0
- ; REGALLOC-NEXT: $vgpr0 = SI_SPILL_WWM_V32_RESTORE %stack.2, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
+ ; REGALLOC-NEXT: $vgpr0 = SI_SPILL_WWM_V32_RESTORE %stack.2, $sgpr32, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
; REGALLOC-NEXT: renamable $sgpr4 = S_MOV_B32 49
; REGALLOC-NEXT: renamable $sgpr4_sgpr5 = V_CMP_GT_I32_e64 killed $vgpr1, killed $sgpr4, implicit $exec
; REGALLOC-NEXT: renamable $sgpr6 = IMPLICIT_DEF
@@ -31,11 +31,11 @@ define i32 @prolog_spill(i32 %arg0, i32 %arg1, i32 %arg2) {
; REGALLOC-NEXT: bb.1.Flow:
; REGALLOC-NEXT: successors: %bb.2(0x40000000), %bb.4(0x40000000)
; REGALLOC-NEXT: {{ $}}
- ; REGALLOC-NEXT: $vgpr0 = SI_SPILL_WWM_V32_RESTORE %stack.2, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
+ ; REGALLOC-NEXT: $vgpr0 = SI_SPILL_WWM_V32_RESTORE %stack.2, $sgpr32, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
; REGALLOC-NEXT: $sgpr4 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 0, implicit-def $sgpr4_sgpr5
; REGALLOC-NEXT: $sgpr5 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 1
; REGALLOC-NEXT: renamable $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 killed renamable $sgpr4_sgpr5, implicit-def $exec, implicit-def dead $scc, implicit $exec
- ; REGALLOC-NEXT: $vgpr1 = SI_SPILL_V32_RESTORE %stack.3, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.3, addrspace 5)
+ ; REGALLOC-NEXT: $vgpr1 = SI_SPILL_V32_RESTORE %stack.3, $sgpr32, 0, 0, implicit $exec :: (load (s32) from %stack.3, addrspace 5)
; REGALLOC-NEXT: SI_SPILL_V32_SAVE killed $vgpr1, %stack.6, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.6, addrspace 5)
; REGALLOC-NEXT: renamable $sgpr4_sgpr5 = S_AND_B64 $exec, killed renamable $sgpr4_sgpr5, implicit-def dead $scc
; REGALLOC-NEXT: renamable $vgpr0 = SI_SPILL_S32_TO_VGPR killed $sgpr4, 2, $vgpr0, implicit-def $sgpr4_sgpr5, implicit $sgpr4_sgpr5
@@ -48,7 +48,7 @@ define i32 @prolog_spill(i32 %arg0, i32 %arg1, i32 %arg2) {
; REGALLOC-NEXT: bb.2.bb.1:
; REGALLOC-NEXT: successors: %bb.4(0x80000000)
; REGALLOC-NEXT: {{ $}}
- ; REGALLOC-NEXT: $vgpr0 = SI_SPILL_V32_RESTORE %stack.4, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.4, addrspace 5)
+ ; REGALLOC-NEXT: $vgpr0 = SI_SPILL_V32_RESTORE %stack.4, $sgpr32, 0, 0, implicit $exec :: (load (s32) from %stack.4, addrspace 5)
; REGALLOC-NEXT: renamable $sgpr4 = S_MOV_B32 10
; REGALLOC-NEXT: renamable $vgpr0 = V_ADD_U32_e64 $vgpr0, killed $sgpr4, 0, implicit $exec
; REGALLOC-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.6, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.6, addrspace 5)
@@ -57,18 +57,18 @@ define i32 @prolog_spill(i32 %arg0, i32 %arg1, i32 %arg2) {
; REGALLOC-NEXT: bb.3.bb.2:
; REGALLOC-NEXT: successors: %bb.1(0x80000000)
; REGALLOC-NEXT: {{ $}}
- ; REGALLOC-NEXT: $vgpr0 = SI_SPILL_V32_RESTORE %stack.5, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.5, addrspace 5)
+ ; REGALLOC-NEXT: $vgpr0 = SI_SPILL_V32_RESTORE %stack.5, $sgpr32, 0, 0, implicit $exec :: (load (s32) from %stack.5, addrspace 5)
; REGALLOC-NEXT: renamable $sgpr4 = S_MOV_B32 20
; REGALLOC-NEXT: renamable $vgpr0 = V_ADD_U32_e64 $vgpr0, killed $sgpr4, 0, implicit $exec
; REGALLOC-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.3, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5)
; REGALLOC-NEXT: S_BRANCH %bb.1
; REGALLOC-NEXT: {{ $}}
; REGALLOC-NEXT: bb.4.bb.3:
- ; REGALLOC-NEXT: $vgpr1 = SI_SPILL_WWM_V32_RESTORE %stack.2, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
+ ; REGALLOC-NEXT: $vgpr1 = SI_SPILL_WWM_V32_RESTORE %stack.2, $sgpr32, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
; REGALLOC-NEXT: $sgpr4 = SI_RESTORE_S32_FROM_VGPR $vgpr1, 2, implicit-def $sgpr4_sgpr5
; REGALLOC-NEXT: $sgpr5 = SI_RESTORE_S32_FROM_VGPR $vgpr1, 3
; REGALLOC-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr4_sgpr5, implicit-def dead $scc
- ; REGALLOC-NEXT: $vgpr0 = SI_SPILL_V32_RESTORE %stack.6, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.6, addrspace 5)
+ ; REGALLOC-NEXT: $vgpr0 = SI_SPILL_V32_RESTORE %stack.6, $sgpr32, 0, 0, implicit $exec :: (load (s32) from %stack.6, addrspace 5)
; REGALLOC-NEXT: renamable $vgpr0 = V_LSHL_ADD_U32_e64 killed $vgpr0, 2, $vgpr0, implicit $exec
; REGALLOC-NEXT: KILL killed renamable $vgpr1
; REGALLOC-NEXT: SI_RETURN implicit killed $vgpr0
diff --git a/llvm/test/CodeGen/AMDGPU/extend-wwm-virt-reg-liveness.mir b/llvm/test/CodeGen/AMDGPU/extend-wwm-virt-reg-liveness.mir
index 3bf7e7b8c56960..75ad9f11e151fe 100644
--- a/llvm/test/CodeGen/AMDGPU/extend-wwm-virt-reg-liveness.mir
+++ b/llvm/test/CodeGen/AMDGPU/extend-wwm-virt-reg-liveness.mir
@@ -35,7 +35,7 @@ body: |
; GCN-NEXT: SI_RETURN
SI_SPILL_S32_SAVE killed $sgpr4, %stack.0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32
S_NOP 0
- renamable $sgpr4 = SI_SPILL_S32_RESTORE %stack.0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32
+ renamable $sgpr4 = SI_SPILL_S32_RESTORE %stack.0, 0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32
%0:vgpr_32 = V_MOV_B32_e32 20, implicit $exec
GLOBAL_STORE_DWORD $vgpr2_vgpr3, %0:vgpr_32, 0, 0, implicit $exec
SI_RETURN
@@ -101,7 +101,7 @@ body: |
liveins: $sgpr6, $sgpr10_sgpr11
SI_SPILL_S32_SAVE killed $sgpr6, %stack.0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32
S_NOP 0
- renamable $sgpr6 = SI_SPILL_S32_RESTORE %stack.0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32
+ renamable $sgpr6 = SI_SPILL_S32_RESTORE %stack.0, 0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32
%0:vgpr_32 = V_MOV_B32_e32 20, implicit $exec
S_BRANCH %bb.3
bb.3:
@@ -190,7 +190,7 @@ body: |
liveins: $sgpr4, $sgpr10_sgpr11
SI_SPILL_S32_SAVE killed $sgpr4, %stack.0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32
S_NOP 0
- renamable $sgpr4 = SI_SPILL_S32_RESTORE %stack.0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32
+ renamable $sgpr4 = SI_SPILL_S32_RESTORE %stack.0, 0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32
%0:vgpr_32 = V_MOV_B32_e32 20, implicit $exec
S_BRANCH %bb.2
bb.2:
@@ -267,7 +267,7 @@ body: |
liveins: $sgpr4, $vgpr2_vgpr3
SI_SPILL_S32_SAVE killed $sgpr4, %stack.0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32
S_NOP 0
- renamable $sgpr4 = SI_SPILL_S32_RESTORE %stack.0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32
+ renamable $sgpr4 = SI_SPILL_S32_RESTORE %stack.0, 0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32
%0:vgpr_32 = V_MOV_B32_e32 10, implicit $exec
GLOBAL_STORE_DWORD $vgpr2_vgpr3, %0:vgpr_32, 0, 0, implicit $exec
SI_RETURN
diff --git a/llvm/test/CodeGen/AMDGPU/fast-ra-kills-vcc.mir b/llvm/test/CodeGen/AMDGPU/fast-ra-kills-vcc.mir
index 0fa6577b0dd03f..522e4b606de49b 100644
--- a/llvm/test/CodeGen/AMDGPU/fast-ra-kills-vcc.mir
+++ b/llvm/test/CodeGen/AMDGPU/fast-ra-kills-vcc.mir
@@ -50,7 +50,7 @@ body: |
; CHECK-NEXT: renamable $sgpr4_sgpr5 = COPY $vcc
; CHECK-NEXT: SI_SPILL_S64_SAVE $sgpr4_sgpr5, %stack.0, implicit $exec, implicit $sgpr32 :: (store (s64) into %stack.0, align 4, addrspace 5)
; CHECK-NEXT: renamable $sgpr4_sgpr5 = COPY $vcc
- ; CHECK-NEXT: $vcc = SI_SPILL_S64_RESTORE %stack.0, implicit $exec, implicit $sgpr32 :: (load (s64) from %stack.0, align 4, addrspace 5)
+ ; CHECK-NEXT: $vcc = SI_SPILL_S64_RESTORE %stack.0, 0, implicit $exec, implicit $sgpr32 :: (load (s64) from %stack.0, align 4, addrspace 5)
; CHECK-NEXT: renamable $vgpr0 = V_CNDMASK_B32_e64 0, -1, 0, 3, killed $sgpr4_sgpr5, implicit $exec
; CHECK-NEXT: S_ENDPGM 0, implicit killed $vgpr0, implicit killed renamable $vcc
%0:vgpr_32 = COPY $vgpr0
diff --git a/llvm/test/CodeGen/AMDGPU/fastregalloc-self-loop-heuristic.mir b/llvm/test/CodeGen/AMDGPU/fastregalloc-self-loop-heuristic.mir
index 24b82f4862afda..dccd2771c4cb11 100644
--- a/llvm/test/CodeGen/AMDGPU/fastregalloc-self-loop-heuristic.mir
+++ b/llvm/test/CodeGen/AMDGPU/fastregalloc-self-loop-heuristic.mir
@@ -19,7 +19,7 @@ body: |
; GCN-NEXT: bb.1:
; GCN-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
; GCN-NEXT: {{ $}}
- ; GCN-NEXT: $vgpr0_vgpr1 = SI_SPILL_V64_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s64) from %stack.0, align 4, addrspace 5)
+ ; GCN-NEXT: $vgpr0_vgpr1 = SI_SPILL_V64_RESTORE %stack.0, $sgpr32, 0, 0, implicit $exec :: (load (s64) from %stack.0, align 4, addrspace 5)
; GCN-NEXT: renamable $vgpr2 = GLOBAL_LOAD_DWORD renamable $vgpr0_vgpr1, 0, 0, implicit $exec
; GCN-NEXT: GLOBAL_STORE_DWORD renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec
; GCN-NEXT: S_CBRANCH_EXECZ %bb.1, implicit $exec
@@ -58,7 +58,7 @@ body: |
; GCN-NEXT: bb.1:
; GCN-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
; GCN-NEXT: {{ $}}
- ; GCN-NEXT: $vgpr0_vgpr1 = SI_SPILL_V64_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s64) from %stack.0, align 4, addrspace 5)
+ ; GCN-NEXT: $vgpr0_vgpr1 = SI_SPILL_V64_RESTORE %stack.0, $sgpr32, 0, 0, implicit $exec :: (load (s64) from %stack.0, align 4, addrspace 5)
; GCN-NEXT: renamable $vgpr2 = GLOBAL_LOAD_DWORD renamable $vgpr0_vgpr1, 0, 0, implicit $exec
; GCN-NEXT: GLOBAL_STORE_DWORD renamable $vgpr0_vgpr1, renamable $vgpr2, 0, 0, implicit $exec
; GCN-NEXT: renamable $vgpr2 = GLOBAL_LOAD_DWORD renamable $vgpr0_vgpr1, 0, 0, implicit $exec
@@ -104,7 +104,7 @@ body: |
; GCN-NEXT: bb.1:
; GCN-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
; GCN-NEXT: {{ $}}
- ; GCN-NEXT: $vgpr0_vgpr1 = SI_SPILL_V64_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s64) from %stack.0, align 4, addrspace 5)
+ ; GCN-NEXT: $vgpr0_vgpr1 = SI_SPILL_V64_RESTORE %stack.0, $sgpr32, 0, 0, implicit $exec :: (load (s64) from %stack.0, align 4, addrspace 5)
; GCN-NEXT: renamable $vgpr2 = V_ADD_U32_e32 1, undef $vgpr0, implicit $exec
; GCN-NEXT: SI_SPILL_V32_SAVE $vgpr2, %stack.1, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
; GCN-NEXT: GLOBAL_STORE_DWORD renamable $vgpr0_vgpr1, renamable $vgpr2, 0, 0, implicit $exec
@@ -144,7 +144,7 @@ body: |
; GCN-NEXT: bb.1:
; GCN-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
; GCN-NEXT: {{ $}}
- ; GCN-NEXT: $vgpr0_vgpr1 = SI_SPILL_V64_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s64) from %stack.0, align 4, addrspace 5)
+ ; GCN-NEXT: $vgpr0_vgpr1 = SI_SPILL_V64_RESTORE %stack.0, $sgpr32, 0, 0, implicit $exec :: (load (s64) from %stack.0, align 4, addrspace 5)
; GCN-NEXT: GLOBAL_STORE_DWORD renamable $vgpr0_vgpr1, undef renamable $vgpr0, 0, 0, implicit $exec
; GCN-NEXT: renamable $vgpr0 = V_ADD_U32_e64 1, 1, 0, implicit $exec
; GCN-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.1, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
@@ -184,7 +184,7 @@ body: |
; GCN-NEXT: bb.1:
; GCN-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
; GCN-NEXT: {{ $}}
- ; GCN-NEXT: $vgpr0_vgpr1 = SI_SPILL_V64_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s64) from %stack.0, align 4, addrspace 5)
+ ; GCN-NEXT: $vgpr0_vgpr1 = SI_SPILL_V64_RESTORE %stack.0, $sgpr32, 0, 0, implicit $exec :: (load (s64) from %stack.0, align 4, addrspace 5)
; GCN-NEXT: undef renamable $vgpr3 = GLOBAL_LOAD_DWORD renamable $vgpr0_vgpr1, 0, 0, implicit $exec, implicit-def dead $vgpr2_vgpr3
; GCN-NEXT: GLOBAL_STORE_DWORD renamable $vgpr0_vgpr1, undef renamable $vgpr1, 0, 0, implicit $exec
; GCN-NEXT: S_CBRANCH_EXECZ %bb.1, implicit $exec
diff --git a/llvm/test/CodeGen/AMDGPU/fold-restore-undef-use.mir b/llvm/test/CodeGen/AMDGPU/fold-restore-undef-use.mir
index 3616d617f84a0d..db1f6b60cff494 100644
--- a/llvm/test/CodeGen/AMDGPU/fold-restore-undef-use.mir
+++ b/llvm/test/CodeGen/AMDGPU/fold-restore-undef-use.mir
@@ -51,7 +51,7 @@ body: |
; CHECK-NEXT: dead $sgpr30_sgpr31 = SI_CALL undef renamable $sgpr4_sgpr5, 0, csr_amdgpu_gfx90ainsts, implicit $sgpr0_sgpr1_sgpr2_sgpr3
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
- ; CHECK-NEXT: $sgpr4_sgpr5 = SI_SPILL_S64_RESTORE %stack.0, implicit $exec, implicit $sgpr32 :: (load (s64) from %stack.0, align 4, addrspace 5)
+ ; CHECK-NEXT: $sgpr4_sgpr5 = SI_SPILL_S64_RESTORE %stack.0, 0, implicit $exec, implicit $sgpr32 :: (load (s64) from %stack.0, align 4, addrspace 5)
; CHECK-NEXT: dead $sgpr30_sgpr31 = SI_CALL undef renamable $sgpr4_sgpr5, 0, csr_amdgpu_gfx90ainsts, implicit $sgpr4_sgpr5
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
; CHECK-NEXT: {{ $}}
@@ -69,7 +69,6 @@ body: |
bb.2:
ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
- ; A restore for %0 should not be inserted here.
$sgpr6_sgpr7 = COPY undef %0
dead $sgpr30_sgpr31 = SI_CALL undef %1, 0, csr_amdgpu_gfx90ainsts, implicit $sgpr6_sgpr7, implicit $sgpr0_sgpr1_sgpr2_sgpr3
ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
diff --git a/llvm/test/CodeGen/AMDGPU/greedy-global-heuristic.mir b/llvm/test/CodeGen/AMDGPU/greedy-global-heuristic.mir
index 6f1e5b89db8841..c4ee58999237a7 100644
--- a/llvm/test/CodeGen/AMDGPU/greedy-global-heuristic.mir
+++ b/llvm/test/CodeGen/AMDGPU/greedy-global-heuristic.mir
@@ -69,7 +69,7 @@ body: |
; CHECK-NEXT: S_NOP 0, implicit %27
; CHECK-NEXT: S_NOP 0, implicit %35
; CHECK-NEXT: SI_SPILL_V128_SAVE %35, %stack.1, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.1, align 4, addrspace 5)
- ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.0, align 4, addrspace 5)
+ ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.0, $sgpr32, 0, 0, implicit $exec :: (load (s128) from %stack.0, align 4, addrspace 5)
; CHECK-NEXT: S_NOP 0, implicit [[SI_SPILL_V128_RESTORE]]
; CHECK-NEXT: S_NOP 0, implicit %0
; CHECK-NEXT: S_NOP 0, implicit-def %10
@@ -139,22 +139,22 @@ body: |
; CHECK-NEXT: S_NOP 0, implicit %29
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vreg_128 = COPY %27
; CHECK-NEXT: S_NOP 0, implicit %27
- ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE1:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.1, align 4, addrspace 5)
+ ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE1:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.1, $sgpr32, 0, 0, implicit $exec :: (load (s128) from %stack.1, align 4, addrspace 5)
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vreg_128 = COPY [[SI_SPILL_V128_RESTORE1]]
; CHECK-NEXT: S_NOP 0, implicit [[SI_SPILL_V128_RESTORE1]]
- ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE2:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.0, align 4, addrspace 5)
+ ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE2:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.0, $sgpr32, 0, 0, implicit $exec :: (load (s128) from %stack.0, align 4, addrspace 5)
; CHECK-NEXT: S_NOP 0, implicit [[SI_SPILL_V128_RESTORE2]]
; CHECK-NEXT: S_NOP 0, implicit %0
- ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE3:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.2, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.2, align 4, addrspace 5)
+ ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE3:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.2, $sgpr32, 0, 0, implicit $exec :: (load (s128) from %stack.2, align 4, addrspace 5)
; CHECK-NEXT: S_NOP 0, implicit [[SI_SPILL_V128_RESTORE3]]
- ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE4:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.4, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.4, align 4, addrspace 5)
+ ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE4:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.4, $sgpr32, 0, 0, implicit $exec :: (load (s128) from %stack.4, align 4, addrspace 5)
; CHECK-NEXT: S_NOP 0, implicit [[SI_SPILL_V128_RESTORE4]]
- ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE5:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.3, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.3, align 4, addrspace 5)
+ ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE5:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.3, $sgpr32, 0, 0, implicit $exec :: (load (s128) from %stack.3, align 4, addrspace 5)
; CHECK-NEXT: S_NOP 0, implicit [[SI_SPILL_V128_RESTORE5]]
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
; CHECK-NEXT: S_NOP 0, implicit %0
- ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE6:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.0, align 4, addrspace 5)
+ ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE6:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.0, $sgpr32, 0, 0, implicit $exec :: (load (s128) from %stack.0, align 4, addrspace 5)
; CHECK-NEXT: S_NOP 0, implicit [[SI_SPILL_V128_RESTORE6]]
; CHECK-NEXT: S_NOP 0, implicit [[COPY3]]
; CHECK-NEXT: S_NOP 0, implicit [[COPY2]]
diff --git a/llvm/test/CodeGen/AMDGPU/greedy-instruction-split-subrange.mir b/llvm/test/CodeGen/AMDGPU/greedy-instruction-split-subrange.mir
index edc04f2ef39ee8..eae58e41102610 100644
--- a/llvm/test/CodeGen/AMDGPU/greedy-instruction-split-subrange.mir
+++ b/llvm/test/CodeGen/AMDGPU/greedy-instruction-split-subrange.mir
@@ -25,7 +25,7 @@ body: |
; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX2_SADDR2:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2_SADDR undef $sgpr0_sgpr1, undef %5:vgpr_32, 8, 0, implicit $exec :: (load (s64), addrspace 1)
; CHECK-NEXT: undef %9.sub1:vreg_64 = COPY [[GLOBAL_LOAD_DWORDX2_SADDR]].sub1
; CHECK-NEXT: S_NOP 0, implicit %9.sub1
- ; CHECK-NEXT: [[SI_SPILL_V64_RESTORE:%[0-9]+]]:vreg_64 = SI_SPILL_V64_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s64) from %stack.0, align 4, addrspace 5)
+ ; CHECK-NEXT: [[SI_SPILL_V64_RESTORE:%[0-9]+]]:vreg_64 = SI_SPILL_V64_RESTORE %stack.0, $sgpr32, 0, 0, implicit $exec :: (load (s64) from %stack.0, align 4, addrspace 5)
; CHECK-NEXT: S_NOP 0, implicit [[SI_SPILL_V64_RESTORE]].sub0
; CHECK-NEXT: undef %7.sub1:vreg_64 = COPY [[GLOBAL_LOAD_DWORDX2_SADDR2]].sub1
; CHECK-NEXT: S_NOP 0, implicit %7.sub1
@@ -65,11 +65,11 @@ body: |
; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX2_SADDR2:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2_SADDR undef $sgpr0_sgpr1, undef %5:vgpr_32, 8, 0, implicit $exec :: (load (s64), addrspace 1)
; CHECK-NEXT: SI_SPILL_V64_SAVE [[GLOBAL_LOAD_DWORDX2_SADDR2]], %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, align 4, addrspace 5)
; CHECK-NEXT: S_NOP 0, implicit-def [[GLOBAL_LOAD_DWORDX2_SADDR]].sub0
- ; CHECK-NEXT: [[SI_SPILL_V64_RESTORE:%[0-9]+]]:vreg_64 = SI_SPILL_V64_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s64) from %stack.1, align 4, addrspace 5)
+ ; CHECK-NEXT: [[SI_SPILL_V64_RESTORE:%[0-9]+]]:vreg_64 = SI_SPILL_V64_RESTORE %stack.1, $sgpr32, 0, 0, implicit $exec :: (load (s64) from %stack.1, align 4, addrspace 5)
; CHECK-NEXT: undef %13.sub0:vreg_64 = COPY [[SI_SPILL_V64_RESTORE]].sub0
; CHECK-NEXT: S_NOP 0, implicit-def %13.sub1
; CHECK-NEXT: undef %15.sub0:vreg_64 = COPY %13.sub0
- ; CHECK-NEXT: [[SI_SPILL_V64_RESTORE1:%[0-9]+]]:vreg_64 = SI_SPILL_V64_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s64) from %stack.0, align 4, addrspace 5)
+ ; CHECK-NEXT: [[SI_SPILL_V64_RESTORE1:%[0-9]+]]:vreg_64 = SI_SPILL_V64_RESTORE %stack.0, $sgpr32, 0, 0, implicit $exec :: (load (s64) from %stack.0, align 4, addrspace 5)
; CHECK-NEXT: undef %7.sub1:vreg_64 = COPY [[SI_SPILL_V64_RESTORE1]].sub1
; CHECK-NEXT: S_NOP 0, implicit-def %7.sub0
; CHECK-NEXT: undef %9.sub1:vreg_64 = COPY %7.sub1
diff --git a/llvm/test/CodeGen/AMDGPU/indirect-addressing-term.ll b/llvm/test/CodeGen/AMDGPU/indirect-addressing-term.ll
index 08f03c22683f92..8db642b756841f 100644
--- a/llvm/test/CodeGen/AMDGPU/indirect-addressing-term.ll
+++ b/llvm/test/CodeGen/AMDGPU/indirect-addressing-term.ll
@@ -80,7 +80,7 @@ define amdgpu_kernel void @extract_w_offset_vgpr(ptr addrspace(1) %out) {
; GCN-NEXT: bb.1:
; GCN-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000)
; GCN-NEXT: {{ $}}
- ; GCN-NEXT: $sgpr0_sgpr1 = SI_SPILL_S64_RESTORE %stack.2, implicit $exec, implicit $sgpr32 :: (load (s64) from %stack.2, align 4, addrspace 5)
+ ; GCN-NEXT: $sgpr0_sgpr1 = SI_SPILL_S64_RESTORE %stack.2, 0, implicit $exec, implicit $sgpr32 :: (load (s64) from %stack.2, align 4, addrspace 5)
; GCN-NEXT: dead [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[DEF]]
; GCN-NEXT: renamable $sgpr2 = V_READFIRSTLANE_B32 [[COPY]](s32), implicit $exec
; GCN-NEXT: renamable $sgpr0_sgpr1 = V_CMP_EQ_U32_e64 $sgpr2, [[COPY]](s32), implicit $exec
@@ -95,11 +95,11 @@ define amdgpu_kernel void @extract_w_offset_vgpr(ptr addrspace(1) %out) {
; GCN-NEXT: bb.3:
; GCN-NEXT: successors: %bb.2(0x80000000)
; GCN-NEXT: {{ $}}
- ; GCN-NEXT: $sgpr0_sgpr1 = SI_SPILL_S64_RESTORE %stack.0, implicit $exec, implicit $sgpr32 :: (load (s64) from %stack.0, align 4, addrspace 5)
+ ; GCN-NEXT: $sgpr0_sgpr1 = SI_SPILL_S64_RESTORE %stack.0, 0, implicit $exec, implicit $sgpr32 :: (load (s64) from %stack.0, align 4, addrspace 5)
; GCN-NEXT: $exec = S_MOV_B64 renamable $sgpr0_sgpr1
; GCN-NEXT: {{ $}}
; GCN-NEXT: bb.2:
- ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = SI_SPILL_S128_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s128) from %stack.1, align 4, addrspace 5)
+ ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = SI_SPILL_S128_RESTORE %stack.1, 0, implicit $exec, implicit $sgpr32 :: (load (s128) from %stack.1, align 4, addrspace 5)
; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET [[V_INDIRECT_REG_READ_GPR_IDX_B32_V16_]], killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (s32) into %ir.out.load, addrspace 1)
; GCN-NEXT: S_ENDPGM 0
entry:
diff --git a/llvm/test/CodeGen/AMDGPU/infloop-subrange-spill-inspect-subrange.mir b/llvm/test/CodeGen/AMDGPU/infloop-subrange-spill-inspect-subrange.mir
index 7864564d289178..82d79c6b567b94 100644
--- a/llvm/test/CodeGen/AMDGPU/infloop-subrange-spill-inspect-subrange.mir
+++ b/llvm/test/CodeGen/AMDGPU/infloop-subrange-spill-inspect-subrange.mir
@@ -58,7 +58,7 @@ body: |
; CHECK-NEXT: SI_SPILL_S512_SAVE renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51, %stack.0, implicit $exec, implicit $sgpr32 :: (store (s512) into %stack.0, align 4, addrspace 5)
; CHECK-NEXT: renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51 = IMPLICIT_DEF
; CHECK-NEXT: dead undef [[IMAGE_SAMPLE_LZ_V1_V2_2:%[0-9]+]].sub0:vreg_96 = IMAGE_SAMPLE_LZ_V1_V2 undef [[DEF2]], killed renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43, renamable $sgpr12_sgpr13_sgpr14_sgpr15, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 8)
- ; CHECK-NEXT: renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51 = SI_SPILL_S512_RESTORE %stack.0, implicit $exec, implicit $sgpr32 :: (load (s512) from %stack.0, align 4, addrspace 5)
+ ; CHECK-NEXT: renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51 = SI_SPILL_S512_RESTORE %stack.0, 0, implicit $exec, implicit $sgpr32 :: (load (s512) from %stack.0, align 4, addrspace 5)
; CHECK-NEXT: renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 = IMPLICIT_DEF
; CHECK-NEXT: dead undef [[IMAGE_SAMPLE_LZ_V1_V2_3:%[0-9]+]].sub0:vreg_128 = IMAGE_SAMPLE_LZ_V1_V2 undef [[DEF2]], undef renamable $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51, killed renamable $sgpr12_sgpr13_sgpr14_sgpr15, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 8)
; CHECK-NEXT: S_BRANCH %bb.2
diff --git a/llvm/test/CodeGen/AMDGPU/nsa-reassign.mir b/llvm/test/CodeGen/AMDGPU/nsa-reassign.mir
index aa6b0ee477abad..f1c83cd99c8543 100644
--- a/llvm/test/CodeGen/AMDGPU/nsa-reassign.mir
+++ b/llvm/test/CodeGen/AMDGPU/nsa-reassign.mir
@@ -27,13 +27,13 @@ registers:
- { id: 7, class: vgpr_32, preferred-register: '$vgpr7' }
body: |
bb.0:
- %0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, align 4, addrspace 5)
- %1 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, align 4, addrspace 5)
- %2 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, align 4, addrspace 5)
- %3 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, align 4, addrspace 5)
- %4 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, align 4, addrspace 5)
- %5 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, align 4, addrspace 5)
- %6 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, align 4, addrspace 5)
+ %0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, 0, implicit $exec :: (load (s32) from %stack.0, align 4, addrspace 5)
+ %1 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, 0, implicit $exec :: (load (s32) from %stack.0, align 4, addrspace 5)
+ %2 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, 0, implicit $exec :: (load (s32) from %stack.0, align 4, addrspace 5)
+ %3 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, 0, implicit $exec :: (load (s32) from %stack.0, align 4, addrspace 5)
+ %4 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, 0, implicit $exec :: (load (s32) from %stack.0, align 4, addrspace 5)
+ %5 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, 0, implicit $exec :: (load (s32) from %stack.0, align 4, addrspace 5)
+ %6 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, 0, implicit $exec :: (load (s32) from %stack.0, align 4, addrspace 5)
%7:vgpr_32 = IMAGE_SAMPLE_C_L_V1_V5_nsa_gfx10 %0, %2, %4, %5, %6, undef $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, undef $sgpr8_sgpr9_sgpr10_sgpr11, 1, 2, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 7)
S_ENDPGM 0, implicit %7
...
@@ -58,13 +58,13 @@ registers:
- { id: 7, class: vgpr_32, preferred-register: '$vgpr7' }
body: |
bb.0:
- %0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, align 4, addrspace 5)
- %1 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, align 4, addrspace 5)
- %2 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, align 4, addrspace 5)
- %3 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, align 4, addrspace 5)
- %4 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, align 4, addrspace 5)
- %5 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, align 4, addrspace 5)
- %6 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, align 4, addrspace 5)
+ %0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, 0, implicit $exec :: (load (s32) from %stack.0, align 4, addrspace 5)
+ %1 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, 0, implicit $exec :: (load (s32) from %stack.0, align 4, addrspace 5)
+ %2 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, 0, implicit $exec :: (load (s32) from %stack.0, align 4, addrspace 5)
+ %3 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, 0, implicit $exec :: (load (s32) from %stack.0, align 4, addrspace 5)
+ %4 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, 0, implicit $exec :: (load (s32) from %stack.0, align 4, addrspace 5)
+ %5 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, 0, implicit $exec :: (load (s32) from %stack.0, align 4, addrspace 5)
+ %6 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, 0, implicit $exec :: (load (s32) from %stack.0, align 4, addrspace 5)
S_NOP 0, implicit-def dead $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
S_NOP 0, implicit %0, implicit %1, implicit %2, implicit %3, implicit %4, implicit %5, implicit %6
%7:vgpr_32 = IMAGE_SAMPLE_C_L_V1_V5_nsa_gfx10 %0, %2, %4, %5, %6, undef $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, undef $sgpr8_sgpr9_sgpr10_sgpr11, 1, 2, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 7)
diff --git a/llvm/test/CodeGen/AMDGPU/partial-regcopy-and-spill-missed-at-regalloc.ll b/llvm/test/CodeGen/AMDGPU/partial-regcopy-and-spill-missed-at-regalloc.ll
index 75da11bf9a0963..7410b4b9ae7b90 100644
--- a/llvm/test/CodeGen/AMDGPU/partial-regcopy-and-spill-missed-at-regalloc.ll
+++ b/llvm/test/CodeGen/AMDGPU/partial-regcopy-and-spill-missed-at-regalloc.ll
@@ -22,7 +22,7 @@ define amdgpu_kernel void @partial_copy(<4 x i32> %arg) #0 {
; REGALLOC-GFX908-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
; REGALLOC-GFX908-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2, implicit $exec
; REGALLOC-GFX908-NEXT: [[V_MFMA_I32_4X4X4I8_e64_:%[0-9]+]]:areg_128 = V_MFMA_I32_4X4X4I8_e64 [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], [[COPY2]], 0, 0, 0, implicit $mode, implicit $exec
- ; REGALLOC-GFX908-NEXT: [[SI_SPILL_V64_RESTORE:%[0-9]+]]:vreg_64 = SI_SPILL_V64_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s64) from %stack.0, align 4, addrspace 5)
+ ; REGALLOC-GFX908-NEXT: [[SI_SPILL_V64_RESTORE:%[0-9]+]]:vreg_64 = SI_SPILL_V64_RESTORE %stack.0, $sgpr32, 0, 0, implicit $exec :: (load (s64) from %stack.0, align 4, addrspace 5)
; REGALLOC-GFX908-NEXT: GLOBAL_STORE_DWORDX2 undef %16:vreg_64, [[SI_SPILL_V64_RESTORE]], 0, 0, implicit $exec :: (volatile store (s64) into `ptr addrspace(1) undef`, addrspace 1)
; REGALLOC-GFX908-NEXT: [[COPY3:%[0-9]+]]:vreg_128 = COPY [[V_MFMA_I32_4X4X4I8_e64_]]
; REGALLOC-GFX908-NEXT: GLOBAL_STORE_DWORDX4 undef %18:vreg_64, [[COPY3]], 0, 0, implicit $exec :: (volatile store (s128) into `ptr addrspace(1) undef`, addrspace 1)
@@ -70,7 +70,7 @@ define amdgpu_kernel void @partial_copy(<4 x i32> %arg) #0 {
; REGALLOC-GFX90A-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
; REGALLOC-GFX90A-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2, implicit $exec
; REGALLOC-GFX90A-NEXT: [[V_MFMA_I32_4X4X4I8_e64_:%[0-9]+]]:areg_128_align2 = V_MFMA_I32_4X4X4I8_e64 [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], [[COPY1]], 0, 0, 0, implicit $mode, implicit $exec
- ; REGALLOC-GFX90A-NEXT: [[SI_SPILL_AV64_RESTORE:%[0-9]+]]:av_64_align2 = SI_SPILL_AV64_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s64) from %stack.0, align 4, addrspace 5)
+ ; REGALLOC-GFX90A-NEXT: [[SI_SPILL_AV64_RESTORE:%[0-9]+]]:av_64_align2 = SI_SPILL_AV64_RESTORE %stack.0, $sgpr32, 0, 0, implicit $exec :: (load (s64) from %stack.0, align 4, addrspace 5)
; REGALLOC-GFX90A-NEXT: GLOBAL_STORE_DWORDX2 undef %16:vreg_64_align2, [[SI_SPILL_AV64_RESTORE]], 0, 0, implicit $exec :: (volatile store (s64) into `ptr addrspace(1) undef`, addrspace 1)
; REGALLOC-GFX90A-NEXT: GLOBAL_STORE_DWORDX4 undef %18:vreg_64_align2, [[V_MFMA_I32_4X4X4I8_e64_]], 0, 0, implicit $exec :: (volatile store (s128) into `ptr addrspace(1) undef`, addrspace 1)
; REGALLOC-GFX90A-NEXT: S_ENDPGM 0
diff --git a/llvm/test/CodeGen/AMDGPU/pei-build-av-spill.mir b/llvm/test/CodeGen/AMDGPU/pei-build-av-spill.mir
index a9580da5af1da1..c764bc5ec72a72 100644
--- a/llvm/test/CodeGen/AMDGPU/pei-build-av-spill.mir
+++ b/llvm/test/CodeGen/AMDGPU/pei-build-av-spill.mir
@@ -69,7 +69,7 @@ body: |
; FLATSCR-GFX90A-V2A-NEXT: S_ENDPGM 0
$vgpr0 = IMPLICIT_DEF
SI_SPILL_AV32_SAVE killed $vgpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, align 4, addrspace 5)
- $vgpr0 = SI_SPILL_AV32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, align 4, addrspace 5)
+ $vgpr0 = SI_SPILL_AV32_RESTORE %stack.0, $sgpr32, 0, 0, implicit $exec :: (load (s32) from %stack.0, align 4, addrspace 5)
S_ENDPGM 0
...
@@ -147,7 +147,7 @@ body: |
; FLATSCR-GFX90A-V2A-NEXT: S_ENDPGM 0
$vgpr0_vgpr1 = IMPLICIT_DEF
SI_SPILL_AV64_SAVE killed $vgpr0_vgpr1, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, align 4, addrspace 5)
- $vgpr0_vgpr1 = SI_SPILL_AV64_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s64) from %stack.0, align 4, addrspace 5)
+ $vgpr0_vgpr1 = SI_SPILL_AV64_RESTORE %stack.0, $sgpr32, 0, 0, implicit $exec :: (load (s64) from %stack.0, align 4, addrspace 5)
S_ENDPGM 0
...
@@ -237,7 +237,7 @@ body: |
; FLATSCR-GFX90A-V2A-NEXT: S_ENDPGM 0
$vgpr0_vgpr1_vgpr2 = IMPLICIT_DEF
SI_SPILL_AV96_SAVE killed $vgpr0_vgpr1_vgpr2, %stack.0, $sgpr32, 0, implicit $exec :: (store (s96) into %stack.0, align 4, addrspace 5)
- $vgpr0_vgpr1_vgpr2 = SI_SPILL_AV96_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s96) from %stack.0, align 4, addrspace 5)
+ $vgpr0_vgpr1_vgpr2 = SI_SPILL_AV96_RESTORE %stack.0, $sgpr32, 0, 0, implicit $exec :: (load (s96) from %stack.0, align 4, addrspace 5)
S_ENDPGM 0
...
@@ -339,7 +339,7 @@ body: |
; FLATSCR-GFX90A-V2A-NEXT: S_ENDPGM 0
$vgpr0_vgpr1_vgpr2_vgpr3 = IMPLICIT_DEF
SI_SPILL_AV128_SAVE killed $vgpr0_vgpr1_vgpr2_vgpr3, %stack.0, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.0, align 4, addrspace 5)
- $vgpr0_vgpr1_vgpr2_vgpr3 = SI_SPILL_AV128_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.0, align 4, addrspace 5)
+ $vgpr0_vgpr1_vgpr2_vgpr3 = SI_SPILL_AV128_RESTORE %stack.0, $sgpr32, 0, 0, implicit $exec :: (load (s128) from %stack.0, align 4, addrspace 5)
S_ENDPGM 0
...
@@ -457,7 +457,7 @@ body: |
; FLATSCR-GFX90A-V2A-NEXT: S_ENDPGM 0
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = IMPLICIT_DEF
SI_SPILL_AV160_SAVE killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4, %stack.0, $sgpr32, 0, implicit $exec :: (store (s160) into %stack.0, align 4, addrspace 5)
- $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = SI_SPILL_AV160_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s160) from %stack.0, align 4, addrspace 5)
+ $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = SI_SPILL_AV160_RESTORE %stack.0, $sgpr32, 0, 0, implicit $exec :: (load (s160) from %stack.0, align 4, addrspace 5)
S_ENDPGM 0
...
@@ -587,7 +587,7 @@ body: |
; FLATSCR-GFX90A-V2A-NEXT: S_ENDPGM 0
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 = IMPLICIT_DEF
SI_SPILL_AV192_SAVE killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5, %stack.0, $sgpr32, 0, implicit $exec :: (store (s192) into %stack.0, align 4, addrspace 5)
- $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 = SI_SPILL_AV192_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s192) from %stack.0, align 4, addrspace 5)
+ $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 = SI_SPILL_AV192_RESTORE %stack.0, $sgpr32, 0, 0, implicit $exec :: (load (s192) from %stack.0, align 4, addrspace 5)
S_ENDPGM 0
...
@@ -729,7 +729,7 @@ body: |
; FLATSCR-GFX90A-V2A-NEXT: S_ENDPGM 0
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 = IMPLICIT_DEF
SI_SPILL_AV224_SAVE killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6, %stack.0, $sgpr32, 0, implicit $exec :: (store (s224) into %stack.0, align 4, addrspace 5)
- $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 = SI_SPILL_AV224_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s224) from %stack.0, align 4, addrspace 5)
+ $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 = SI_SPILL_AV224_RESTORE %stack.0, $sgpr32, 0, 0, implicit $exec :: (load (s224) from %stack.0, align 4, addrspace 5)
S_ENDPGM 0
...
@@ -883,7 +883,7 @@ body: |
; FLATSCR-GFX90A-V2A-NEXT: S_ENDPGM 0
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = IMPLICIT_DEF
SI_SPILL_AV256_SAVE killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, %stack.0, $sgpr32, 0, implicit $exec :: (store (s256) into %stack.0, align 4, addrspace 5)
- $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = SI_SPILL_AV256_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s256) from %stack.0, align 4, addrspace 5)
+ $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = SI_SPILL_AV256_RESTORE %stack.0, $sgpr32, 0, 0, implicit $exec :: (load (s256) from %stack.0, align 4, addrspace 5)
S_ENDPGM 0
...
@@ -1141,7 +1141,7 @@ body: |
; FLATSCR-GFX90A-V2A-NEXT: S_ENDPGM 0
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = IMPLICIT_DEF
SI_SPILL_AV512_SAVE killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, %stack.0, $sgpr32, 0, implicit $exec :: (store (s512) into %stack.0, align 4, addrspace 5)
- $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = SI_SPILL_AV512_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s512) from %stack.0, align 4, addrspace 5)
+ $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = SI_SPILL_AV512_RESTORE %stack.0, $sgpr32, 0, 0, implicit $exec :: (load (s512) from %stack.0, align 4, addrspace 5)
S_ENDPGM 0
...
@@ -1607,7 +1607,7 @@ body: |
; FLATSCR-GFX90A-V2A-NEXT: S_ENDPGM 0
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = IMPLICIT_DEF
SI_SPILL_AV1024_SAVE killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, %stack.0, $sgpr32, 0, implicit $exec :: (store (s1024) into %stack.0, align 4, addrspace 5)
- $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = SI_SPILL_AV1024_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s1024) from %stack.0, align 4, addrspace 5)
+ $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = SI_SPILL_AV1024_RESTORE %stack.0, $sgpr32, 0, 0, implicit $exec :: (load (s1024) from %stack.0, align 4, addrspace 5)
S_ENDPGM 0
...
@@ -1677,7 +1677,7 @@ body: |
; FLATSCR-GFX90A-V2A-NEXT: S_ENDPGM 0
$agpr0 = IMPLICIT_DEF
SI_SPILL_AV32_SAVE killed $agpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, align 4, addrspace 5)
- $agpr0 = SI_SPILL_AV32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, align 4, addrspace 5)
+ $agpr0 = SI_SPILL_AV32_RESTORE %stack.0, $sgpr32, 0, 0, implicit $exec :: (load (s32) from %stack.0, align 4, addrspace 5)
S_ENDPGM 0
...
@@ -1765,7 +1765,7 @@ body: |
; FLATSCR-GFX90A-V2A-NEXT: S_ENDPGM 0
$agpr0_agpr1 = IMPLICIT_DEF
SI_SPILL_AV64_SAVE killed $agpr0_agpr1, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, align 4, addrspace 5)
- $agpr0_agpr1 = SI_SPILL_AV64_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s64) from %stack.0, align 4, addrspace 5)
+ $agpr0_agpr1 = SI_SPILL_AV64_RESTORE %stack.0, $sgpr32, 0, 0, implicit $exec :: (load (s64) from %stack.0, align 4, addrspace 5)
S_ENDPGM 0
...
@@ -1871,7 +1871,7 @@ body: |
; FLATSCR-GFX90A-V2A-NEXT: S_ENDPGM 0
$agpr0_agpr1_agpr2 = IMPLICIT_DEF
SI_SPILL_AV96_SAVE killed $agpr0_agpr1_agpr2, %stack.0, $sgpr32, 0, implicit $exec :: (store (s96) into %stack.0, align 4, addrspace 5)
- $agpr0_agpr1_agpr2 = SI_SPILL_AV96_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s96) from %stack.0, align 4, addrspace 5)
+ $agpr0_agpr1_agpr2 = SI_SPILL_AV96_RESTORE %stack.0, $sgpr32, 0, 0, implicit $exec :: (load (s96) from %stack.0, align 4, addrspace 5)
S_ENDPGM 0
...
@@ -1995,7 +1995,7 @@ body: |
; FLATSCR-GFX90A-V2A-NEXT: S_ENDPGM 0
$agpr0_agpr1_agpr2_agpr3 = IMPLICIT_DEF
SI_SPILL_AV128_SAVE killed $agpr0_agpr1_agpr2_agpr3, %stack.0, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.0, align 4, addrspace 5)
- $agpr0_agpr1_agpr2_agpr3 = SI_SPILL_AV128_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.0, align 4, addrspace 5)
+ $agpr0_agpr1_agpr2_agpr3 = SI_SPILL_AV128_RESTORE %stack.0, $sgpr32, 0, 0, implicit $exec :: (load (s128) from %stack.0, align 4, addrspace 5)
S_ENDPGM 0
...
@@ -2139,7 +2139,7 @@ body: |
; FLATSCR-GFX90A-V2A-NEXT: S_ENDPGM 0
$agpr0_agpr1_agpr2_agpr3_agpr4 = IMPLICIT_DEF
SI_SPILL_AV160_SAVE killed $agpr0_agpr1_agpr2_agpr3_agpr4, %stack.0, $sgpr32, 0, implicit $exec :: (store (s160) into %stack.0, align 4, addrspace 5)
- $agpr0_agpr1_agpr2_agpr3_agpr4 = SI_SPILL_AV160_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s160) from %stack.0, align 4, addrspace 5)
+ $agpr0_agpr1_agpr2_agpr3_agpr4 = SI_SPILL_AV160_RESTORE %stack.0, $sgpr32, 0, 0, implicit $exec :: (load (s160) from %stack.0, align 4, addrspace 5)
S_ENDPGM 0
...
@@ -2301,7 +2301,7 @@ body: |
; FLATSCR-GFX90A-V2A-NEXT: S_ENDPGM 0
$agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 = IMPLICIT_DEF
SI_SPILL_AV192_SAVE killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5, %stack.0, $sgpr32, 0, implicit $exec :: (store (s192) into %stack.0, align 4, addrspace 5)
- $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 = SI_SPILL_AV192_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s192) from %stack.0, align 4, addrspace 5)
+ $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 = SI_SPILL_AV192_RESTORE %stack.0, $sgpr32, 0, 0, implicit $exec :: (load (s192) from %stack.0, align 4, addrspace 5)
S_ENDPGM 0
...
@@ -2481,7 +2481,7 @@ body: |
; FLATSCR-GFX90A-V2A-NEXT: S_ENDPGM 0
$agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6 = IMPLICIT_DEF
SI_SPILL_AV224_SAVE killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6, %stack.0, $sgpr32, 0, implicit $exec :: (store (s224) into %stack.0, align 4, addrspace 5)
- $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6 = SI_SPILL_AV224_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s224) from %stack.0, align 4, addrspace 5)
+ $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6 = SI_SPILL_AV224_RESTORE %stack.0, $sgpr32, 0, 0, implicit $exec :: (load (s224) from %stack.0, align 4, addrspace 5)
S_ENDPGM 0
...
@@ -2679,7 +2679,7 @@ body: |
; FLATSCR-GFX90A-V2A-NEXT: S_ENDPGM 0
$agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 = IMPLICIT_DEF
SI_SPILL_AV256_SAVE killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7, %stack.0, $sgpr32, 0, implicit $exec :: (store (s256) into %stack.0, align 4, addrspace 5)
- $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 = SI_SPILL_AV256_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s256) from %stack.0, align 4, addrspace 5)
+ $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 = SI_SPILL_AV256_RESTORE %stack.0, $sgpr32, 0, 0, implicit $exec :: (load (s256) from %stack.0, align 4, addrspace 5)
S_ENDPGM 0
...
@@ -2897,7 +2897,7 @@ body: |
; FLATSCR-GFX90A-V2A-NEXT: S_ENDPGM 0
$agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8 = IMPLICIT_DEF
SI_SPILL_AV288_SAVE killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8, %stack.0, $sgpr32, 0, implicit $exec :: (store (s288) into %stack.0, align 4, addrspace 5)
- $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8 = SI_SPILL_AV288_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s288) from %stack.0, align 4, addrspace 5)
+ $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8 = SI_SPILL_AV288_RESTORE %stack.0, $sgpr32, 0, 0, implicit $exec :: (load (s288) from %stack.0, align 4, addrspace 5)
S_ENDPGM 0
...
@@ -3133,7 +3133,7 @@ body: |
; FLATSCR-GFX90A-V2A-NEXT: S_ENDPGM 0
$agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9 = IMPLICIT_DEF
SI_SPILL_AV320_SAVE killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9, %stack.0, $sgpr32, 0, implicit $exec :: (store (s320) into %stack.0, align 4, addrspace 5)
- $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9 = SI_SPILL_AV320_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s320) from %stack.0, align 4, addrspace 5)
+ $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9 = SI_SPILL_AV320_RESTORE %stack.0, $sgpr32, 0, 0, implicit $exec :: (load (s320) from %stack.0, align 4, addrspace 5)
S_ENDPGM 0
...
@@ -3387,7 +3387,7 @@ body: |
; FLATSCR-GFX90A-V2A-NEXT: S_ENDPGM 0
$agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10 = IMPLICIT_DEF
SI_SPILL_AV352_SAVE killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10, %stack.0, $sgpr32, 0, implicit $exec :: (store (s352) into %stack.0, align 4, addrspace 5)
- $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10 = SI_SPILL_AV352_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s352) from %stack.0, align 4, addrspace 5)
+ $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10 = SI_SPILL_AV352_RESTORE %stack.0, $sgpr32, 0, 0, implicit $exec :: (load (s352) from %stack.0, align 4, addrspace 5)
S_ENDPGM 0
...
@@ -3659,7 +3659,7 @@ body: |
; FLATSCR-GFX90A-V2A-NEXT: S_ENDPGM 0
$agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11 = IMPLICIT_DEF
SI_SPILL_AV384_SAVE killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11, %stack.0, $sgpr32, 0, implicit $exec :: (store (s384) into %stack.0, align 4, addrspace 5)
- $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11 = SI_SPILL_AV384_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s384) from %stack.0, align 4, addrspace 5)
+ $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11 = SI_SPILL_AV384_RESTORE %stack.0, $sgpr32, 0, 0, implicit $exec :: (load (s384) from %stack.0, align 4, addrspace 5)
S_ENDPGM 0
...
@@ -4005,7 +4005,7 @@ body: |
; FLATSCR-GFX90A-V2A-NEXT: S_ENDPGM 0
$agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = IMPLICIT_DEF
SI_SPILL_AV512_SAVE killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, %stack.0, $sgpr32, 0, implicit $exec :: (store (s512) into %stack.0, align 4, addrspace 5)
- $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = SI_SPILL_AV512_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s512) from %stack.0, align 4, addrspace 5)
+ $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = SI_SPILL_AV512_RESTORE %stack.0, $sgpr32, 0, 0, implicit $exec :: (load (s512) from %stack.0, align 4, addrspace 5)
S_ENDPGM 0
...
@@ -4647,7 +4647,7 @@ body: |
; FLATSCR-GFX90A-V2A-NEXT: S_ENDPGM 0
$agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 = IMPLICIT_DEF
SI_SPILL_AV1024_SAVE killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31, %stack.0, $sgpr32, 0, implicit $exec :: (store (s1024) into %stack.0, align 4, addrspace 5)
- $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 = SI_SPILL_AV1024_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s1024) from %stack.0, align 4, addrspace 5)
+ $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 = SI_SPILL_AV1024_RESTORE %stack.0, $sgpr32, 0, 0, implicit $exec :: (load (s1024) from %stack.0, align 4, addrspace 5)
S_ENDPGM 0
...
diff --git a/llvm/test/CodeGen/AMDGPU/pei-build-spill-partial-agpr.mir b/llvm/test/CodeGen/AMDGPU/pei-build-spill-partial-agpr.mir
index ec50b6d548896a..090581cbebc09a 100644
--- a/llvm/test/CodeGen/AMDGPU/pei-build-spill-partial-agpr.mir
+++ b/llvm/test/CodeGen/AMDGPU/pei-build-spill-partial-agpr.mir
@@ -76,7 +76,7 @@ body: |
; FLATSCR-V2A-NEXT: S_ENDPGM 0
$vgpr0_vgpr1 = IMPLICIT_DEF
SI_SPILL_V64_SAVE killed $vgpr0_vgpr1, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, align 4, addrspace 5)
- $vgpr0_vgpr1 = SI_SPILL_V64_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s64) from %stack.0, align 4, addrspace 5)
+ $vgpr0_vgpr1 = SI_SPILL_V64_RESTORE %stack.0, $sgpr32, 0, 0, implicit $exec :: (load (s64) from %stack.0, align 4, addrspace 5)
S_ENDPGM 0
...
@@ -114,7 +114,7 @@ body: |
; FLATSCR-V2A-NEXT: S_ENDPGM 0
$vgpr0_vgpr1_vgpr2 = IMPLICIT_DEF
SI_SPILL_V96_SAVE killed $vgpr0_vgpr1_vgpr2, %stack.0, $sgpr32, 0, implicit $exec :: (store (s96) into %stack.0, align 4, addrspace 5)
- $vgpr0_vgpr1_vgpr2 = SI_SPILL_V96_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s96) from %stack.0, align 4, addrspace 5)
+ $vgpr0_vgpr1_vgpr2 = SI_SPILL_V96_RESTORE %stack.0, $sgpr32, 0, 0, implicit $exec :: (load (s96) from %stack.0, align 4, addrspace 5)
S_ENDPGM 0
...
@@ -158,7 +158,7 @@ body: |
; FLATSCR-V2A-NEXT: S_ENDPGM 0
$vgpr0_vgpr1_vgpr2_vgpr3 = IMPLICIT_DEF
SI_SPILL_V128_SAVE killed $vgpr0_vgpr1_vgpr2_vgpr3, %stack.0, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.0, align 4, addrspace 5)
- $vgpr0_vgpr1_vgpr2_vgpr3 = SI_SPILL_V128_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.0, align 4, addrspace 5)
+ $vgpr0_vgpr1_vgpr2_vgpr3 = SI_SPILL_V128_RESTORE %stack.0, $sgpr32, 0, 0, implicit $exec :: (load (s128) from %stack.0, align 4, addrspace 5)
S_ENDPGM 0
...
@@ -204,7 +204,7 @@ body: |
; FLATSCR-V2A-NEXT: S_ENDPGM 0
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = IMPLICIT_DEF
SI_SPILL_V160_SAVE killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4, %stack.0, $sgpr32, 0, implicit $exec :: (store (s160) into %stack.0, align 4, addrspace 5)
- $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = SI_SPILL_V160_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s160) from %stack.0, align 4, addrspace 5)
+ $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = SI_SPILL_V160_RESTORE %stack.0, $sgpr32, 0, 0, implicit $exec :: (load (s160) from %stack.0, align 4, addrspace 5)
S_ENDPGM 0
...
@@ -256,7 +256,7 @@ body: |
; FLATSCR-V2A-NEXT: S_ENDPGM 0
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 = IMPLICIT_DEF
SI_SPILL_V192_SAVE killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5, %stack.0, $sgpr32, 0, implicit $exec :: (store (s196) into %stack.0, align 4, addrspace 5)
- $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 = SI_SPILL_V192_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s196) from %stack.0, align 4, addrspace 5)
+ $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 = SI_SPILL_V192_RESTORE %stack.0, $sgpr32, 0, 0, implicit $exec :: (load (s196) from %stack.0, align 4, addrspace 5)
S_ENDPGM 0
...
@@ -310,7 +310,7 @@ body: |
; FLATSCR-V2A-NEXT: S_ENDPGM 0
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = IMPLICIT_DEF
SI_SPILL_V256_SAVE killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, %stack.0, $sgpr32, 0, implicit $exec :: (store (s256) into %stack.0, align 4, addrspace 5)
- $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = SI_SPILL_V256_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s256) from %stack.0, align 4, addrspace 5)
+ $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = SI_SPILL_V256_RESTORE %stack.0, $sgpr32, 0, 0, implicit $exec :: (load (s256) from %stack.0, align 4, addrspace 5)
S_ENDPGM 0
...
@@ -386,7 +386,7 @@ body: |
; FLATSCR-V2A-NEXT: S_ENDPGM 0
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = IMPLICIT_DEF
SI_SPILL_V512_SAVE killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, %stack.0, $sgpr32, 0, implicit $exec :: (store (s512) into %stack.0, align 4, addrspace 5)
- $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = SI_SPILL_V512_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s512) from %stack.0, align 4, addrspace 5)
+ $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = SI_SPILL_V512_RESTORE %stack.0, $sgpr32, 0, 0, implicit $exec :: (load (s512) from %stack.0, align 4, addrspace 5)
S_ENDPGM 0
...
diff --git a/llvm/test/CodeGen/AMDGPU/pei-build-spill.mir b/llvm/test/CodeGen/AMDGPU/pei-build-spill.mir
index bd052daff5cd42..807991e9ef0ab3 100644
--- a/llvm/test/CodeGen/AMDGPU/pei-build-spill.mir
+++ b/llvm/test/CodeGen/AMDGPU/pei-build-spill.mir
@@ -69,7 +69,7 @@ body: |
; FLATSCR-GFX90A-V2A-NEXT: S_ENDPGM 0
$vgpr0 = IMPLICIT_DEF
SI_SPILL_V32_SAVE killed $vgpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, align 4, addrspace 5)
- $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, align 4, addrspace 5)
+ $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, 0, implicit $exec :: (load (s32) from %stack.0, align 4, addrspace 5)
S_ENDPGM 0
...
@@ -147,7 +147,7 @@ body: |
; FLATSCR-GFX90A-V2A-NEXT: S_ENDPGM 0
$vgpr0_vgpr1 = IMPLICIT_DEF
SI_SPILL_V64_SAVE killed $vgpr0_vgpr1, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, align 4, addrspace 5)
- $vgpr0_vgpr1 = SI_SPILL_V64_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s64) from %stack.0, align 4, addrspace 5)
+ $vgpr0_vgpr1 = SI_SPILL_V64_RESTORE %stack.0, $sgpr32, 0, 0, implicit $exec :: (load (s64) from %stack.0, align 4, addrspace 5)
S_ENDPGM 0
...
@@ -237,7 +237,7 @@ body: |
; FLATSCR-GFX90A-V2A-NEXT: S_ENDPGM 0
$vgpr0_vgpr1_vgpr2 = IMPLICIT_DEF
SI_SPILL_V96_SAVE killed $vgpr0_vgpr1_vgpr2, %stack.0, $sgpr32, 0, implicit $exec :: (store (s96) into %stack.0, align 4, addrspace 5)
- $vgpr0_vgpr1_vgpr2 = SI_SPILL_V96_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s96) from %stack.0, align 4, addrspace 5)
+ $vgpr0_vgpr1_vgpr2 = SI_SPILL_V96_RESTORE %stack.0, $sgpr32, 0, 0, implicit $exec :: (load (s96) from %stack.0, align 4, addrspace 5)
S_ENDPGM 0
...
@@ -339,7 +339,7 @@ body: |
; FLATSCR-GFX90A-V2A-NEXT: S_ENDPGM 0
$vgpr0_vgpr1_vgpr2_vgpr3 = IMPLICIT_DEF
SI_SPILL_V128_SAVE killed $vgpr0_vgpr1_vgpr2_vgpr3, %stack.0, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.0, align 4, addrspace 5)
- $vgpr0_vgpr1_vgpr2_vgpr3 = SI_SPILL_V128_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.0, align 4, addrspace 5)
+ $vgpr0_vgpr1_vgpr2_vgpr3 = SI_SPILL_V128_RESTORE %stack.0, $sgpr32, 0, 0, implicit $exec :: (load (s128) from %stack.0, align 4, addrspace 5)
S_ENDPGM 0
...
@@ -457,7 +457,7 @@ body: |
; FLATSCR-GFX90A-V2A-NEXT: S_ENDPGM 0
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = IMPLICIT_DEF
SI_SPILL_V160_SAVE killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4, %stack.0, $sgpr32, 0, implicit $exec :: (store (s160) into %stack.0, align 4, addrspace 5)
- $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = SI_SPILL_V160_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s160) from %stack.0, align 4, addrspace 5)
+ $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = SI_SPILL_V160_RESTORE %stack.0, $sgpr32, 0, 0, implicit $exec :: (load (s160) from %stack.0, align 4, addrspace 5)
S_ENDPGM 0
...
@@ -587,7 +587,7 @@ body: |
; FLATSCR-GFX90A-V2A-NEXT: S_ENDPGM 0
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 = IMPLICIT_DEF
SI_SPILL_V192_SAVE killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5, %stack.0, $sgpr32, 0, implicit $exec :: (store (s160) into %stack.0, align 4, addrspace 5)
- $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 = SI_SPILL_V192_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s160) from %stack.0, align 4, addrspace 5)
+ $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 = SI_SPILL_V192_RESTORE %stack.0, $sgpr32, 0, 0, implicit $exec :: (load (s160) from %stack.0, align 4, addrspace 5)
S_ENDPGM 0
...
@@ -741,7 +741,7 @@ body: |
; FLATSCR-GFX90A-V2A-NEXT: S_ENDPGM 0
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = IMPLICIT_DEF
SI_SPILL_V256_SAVE killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, %stack.0, $sgpr32, 0, implicit $exec :: (store (s256) into %stack.0, align 4, addrspace 5)
- $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = SI_SPILL_V256_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s256) from %stack.0, align 4, addrspace 5)
+ $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = SI_SPILL_V256_RESTORE %stack.0, $sgpr32, 0, 0, implicit $exec :: (load (s256) from %stack.0, align 4, addrspace 5)
S_ENDPGM 0
...
@@ -999,7 +999,7 @@ body: |
; FLATSCR-GFX90A-V2A-NEXT: S_ENDPGM 0
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = IMPLICIT_DEF
SI_SPILL_V512_SAVE killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, %stack.0, $sgpr32, 0, implicit $exec :: (store (s512) into %stack.0, align 4, addrspace 5)
- $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = SI_SPILL_V512_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s512) from %stack.0, align 4, addrspace 5)
+ $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = SI_SPILL_V512_RESTORE %stack.0, $sgpr32, 0, 0, implicit $exec :: (load (s512) from %stack.0, align 4, addrspace 5)
S_ENDPGM 0
...
@@ -1465,7 +1465,7 @@ body: |
; FLATSCR-GFX90A-V2A-NEXT: S_ENDPGM 0
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = IMPLICIT_DEF
SI_SPILL_V1024_SAVE killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, %stack.0, $sgpr32, 0, implicit $exec :: (store (s1024) into %stack.0, align 4, addrspace 5)
- $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = SI_SPILL_V1024_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s1024) from %stack.0, align 4, addrspace 5)
+ $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = SI_SPILL_V1024_RESTORE %stack.0, $sgpr32, 0, 0, implicit $exec :: (load (s1024) from %stack.0, align 4, addrspace 5)
S_ENDPGM 0
...
@@ -1535,7 +1535,7 @@ body: |
; FLATSCR-GFX90A-V2A-NEXT: S_ENDPGM 0
$agpr0 = IMPLICIT_DEF
SI_SPILL_A32_SAVE killed $agpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, align 4, addrspace 5)
- $agpr0 = SI_SPILL_A32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, align 4, addrspace 5)
+ $agpr0 = SI_SPILL_A32_RESTORE %stack.0, $sgpr32, 0, 0, implicit $exec :: (load (s32) from %stack.0, align 4, addrspace 5)
S_ENDPGM 0
...
@@ -1623,7 +1623,7 @@ body: |
; FLATSCR-GFX90A-V2A-NEXT: S_ENDPGM 0
$agpr0_agpr1 = IMPLICIT_DEF
SI_SPILL_A64_SAVE killed $agpr0_agpr1, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, align 4, addrspace 5)
- $agpr0_agpr1 = SI_SPILL_A64_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s64) from %stack.0, align 4, addrspace 5)
+ $agpr0_agpr1 = SI_SPILL_A64_RESTORE %stack.0, $sgpr32, 0, 0, implicit $exec :: (load (s64) from %stack.0, align 4, addrspace 5)
S_ENDPGM 0
...
@@ -1729,7 +1729,7 @@ body: |
; FLATSCR-GFX90A-V2A-NEXT: S_ENDPGM 0
$agpr0_agpr1_agpr2 = IMPLICIT_DEF
SI_SPILL_A96_SAVE killed $agpr0_agpr1_agpr2, %stack.0, $sgpr32, 0, implicit $exec :: (store (s96) into %stack.0, align 4, addrspace 5)
- $agpr0_agpr1_agpr2 = SI_SPILL_A96_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s96) from %stack.0, align 4, addrspace 5)
+ $agpr0_agpr1_agpr2 = SI_SPILL_A96_RESTORE %stack.0, $sgpr32, 0, 0, implicit $exec :: (load (s96) from %stack.0, align 4, addrspace 5)
S_ENDPGM 0
...
@@ -1853,7 +1853,7 @@ body: |
; FLATSCR-GFX90A-V2A-NEXT: S_ENDPGM 0
$agpr0_agpr1_agpr2_agpr3 = IMPLICIT_DEF
SI_SPILL_A128_SAVE killed $agpr0_agpr1_agpr2_agpr3, %stack.0, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.0, align 4, addrspace 5)
- $agpr0_agpr1_agpr2_agpr3 = SI_SPILL_A128_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.0, align 4, addrspace 5)
+ $agpr0_agpr1_agpr2_agpr3 = SI_SPILL_A128_RESTORE %stack.0, $sgpr32, 0, 0, implicit $exec :: (load (s128) from %stack.0, align 4, addrspace 5)
S_ENDPGM 0
...
@@ -1997,7 +1997,7 @@ body: |
; FLATSCR-GFX90A-V2A-NEXT: S_ENDPGM 0
$agpr0_agpr1_agpr2_agpr3_agpr4 = IMPLICIT_DEF
SI_SPILL_A160_SAVE killed $agpr0_agpr1_agpr2_agpr3_agpr4, %stack.0, $sgpr32, 0, implicit $exec :: (store (s160) into %stack.0, align 4, addrspace 5)
- $agpr0_agpr1_agpr2_agpr3_agpr4 = SI_SPILL_A160_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s160) from %stack.0, align 4, addrspace 5)
+ $agpr0_agpr1_agpr2_agpr3_agpr4 = SI_SPILL_A160_RESTORE %stack.0, $sgpr32, 0, 0, implicit $exec :: (load (s160) from %stack.0, align 4, addrspace 5)
S_ENDPGM 0
...
@@ -2159,7 +2159,7 @@ body: |
; FLATSCR-GFX90A-V2A-NEXT: S_ENDPGM 0
$agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 = IMPLICIT_DEF
SI_SPILL_A192_SAVE killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5, %stack.0, $sgpr32, 0, implicit $exec :: (store (s192) into %stack.0, align 4, addrspace 5)
- $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 = SI_SPILL_A192_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s192) from %stack.0, align 4, addrspace 5)
+ $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 = SI_SPILL_A192_RESTORE %stack.0, $sgpr32, 0, 0, implicit $exec :: (load (s192) from %stack.0, align 4, addrspace 5)
S_ENDPGM 0
...
@@ -2357,7 +2357,7 @@ body: |
; FLATSCR-GFX90A-V2A-NEXT: S_ENDPGM 0
$agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 = IMPLICIT_DEF
SI_SPILL_A256_SAVE killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7, %stack.0, $sgpr32, 0, implicit $exec :: (store (s256) into %stack.0, align 4, addrspace 5)
- $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 = SI_SPILL_A256_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s256) from %stack.0, align 4, addrspace 5)
+ $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 = SI_SPILL_A256_RESTORE %stack.0, $sgpr32, 0, 0, implicit $exec :: (load (s256) from %stack.0, align 4, addrspace 5)
S_ENDPGM 0
...
@@ -2703,7 +2703,7 @@ body: |
; FLATSCR-GFX90A-V2A-NEXT: S_ENDPGM 0
$agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = IMPLICIT_DEF
SI_SPILL_A512_SAVE killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, %stack.0, $sgpr32, 0, implicit $exec :: (store (s512) into %stack.0, align 4, addrspace 5)
- $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = SI_SPILL_A512_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s512) from %stack.0, align 4, addrspace 5)
+ $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = SI_SPILL_A512_RESTORE %stack.0, $sgpr32, 0, 0, implicit $exec :: (load (s512) from %stack.0, align 4, addrspace 5)
S_ENDPGM 0
...
@@ -3345,7 +3345,7 @@ body: |
; FLATSCR-GFX90A-V2A-NEXT: S_ENDPGM 0
$agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 = IMPLICIT_DEF
SI_SPILL_A1024_SAVE killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31, %stack.0, $sgpr32, 0, implicit $exec :: (store (s1024) into %stack.0, align 4, addrspace 5)
- $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 = SI_SPILL_A1024_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s1024) from %stack.0, align 4, addrspace 5)
+ $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 = SI_SPILL_A1024_RESTORE %stack.0, $sgpr32, 0, 0, implicit $exec :: (load (s1024) from %stack.0, align 4, addrspace 5)
S_ENDPGM 0
...
diff --git a/llvm/test/CodeGen/AMDGPU/pei-reg-scavenger-position.mir b/llvm/test/CodeGen/AMDGPU/pei-reg-scavenger-position.mir
index aa4428f3da4eb9..b8dcfdd5cad85d 100644
--- a/llvm/test/CodeGen/AMDGPU/pei-reg-scavenger-position.mir
+++ b/llvm/test/CodeGen/AMDGPU/pei-reg-scavenger-position.mir
@@ -40,10 +40,10 @@ body: |
; CHECK-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr4, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0, align 8192, addrspace 5)
; CHECK-NEXT: S_ENDPGM 0, implicit $vgpr0
bb.0:
- $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
+ $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
S_BRANCH %bb.1
bb.1:
- $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
+ $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
S_ENDPGM 0, implicit $vgpr0
...
diff --git a/llvm/test/CodeGen/AMDGPU/pr51516.mir b/llvm/test/CodeGen/AMDGPU/pr51516.mir
index b21285e83dc21d..1355d03f0cb406 100644
--- a/llvm/test/CodeGen/AMDGPU/pr51516.mir
+++ b/llvm/test/CodeGen/AMDGPU/pr51516.mir
@@ -4,7 +4,7 @@
# is killed by that store.
# GCN-LABEL: name: global_sextload_v32i32_to_v32i64
-# GCN: renamable $vgpr33_vgpr34_vgpr35_vgpr36 = SI_SPILL_V128_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.0, align 4, addrspace 5)
+# GCN: renamable $vgpr33_vgpr34_vgpr35_vgpr36 = SI_SPILL_V128_RESTORE %stack.0, $sgpr32, 0, 0, implicit $exec :: (load (s128) from %stack.0, align 4, addrspace 5)
# GCN: GLOBAL_STORE_DWORDX4_SADDR killed renamable $vgpr47, killed renamable $vgpr29_vgpr30_vgpr31_vgpr32, killed renamable $sgpr0_sgpr1, 16, 0, implicit $exec, implicit killed renamable $vgpr46
---
diff --git a/llvm/test/CodeGen/AMDGPU/ra-inserted-scalar-instructions.mir b/llvm/test/CodeGen/AMDGPU/ra-inserted-scalar-instructions.mir
index dca9ffad7e800c..d3162df93055d7 100644
--- a/llvm/test/CodeGen/AMDGPU/ra-inserted-scalar-instructions.mir
+++ b/llvm/test/CodeGen/AMDGPU/ra-inserted-scalar-instructions.mir
@@ -218,7 +218,7 @@ body: |
; GCN-NEXT: SI_SPILL_S512_SAVE [[S_LOAD_DWORDX16_IMM1]], %stack.12, implicit $exec, implicit $sgpr32 :: (store (s512) into %stack.12, align 4, addrspace 5)
; GCN-NEXT: [[S_LOAD_DWORDX8_IMM7:%[0-9]+]]:sgpr_256 = S_LOAD_DWORDX8_IMM [[V_READFIRSTLANE_B32_]], 156, 0 :: ("amdgpu-noclobber" load (s256), align 8, addrspace 1)
; GCN-NEXT: SI_SPILL_S256_SAVE [[S_LOAD_DWORDX8_IMM7]], %stack.8, implicit $exec, implicit $sgpr32 :: (store (s256) into %stack.8, align 4, addrspace 5)
- ; GCN-NEXT: [[SI_SPILL_S64_RESTORE:%[0-9]+]]:sgpr_64 = SI_SPILL_S64_RESTORE %stack.19, implicit $exec, implicit $sgpr32 :: (load (s64) from %stack.19, align 4, addrspace 5)
+ ; GCN-NEXT: [[SI_SPILL_S64_RESTORE:%[0-9]+]]:sgpr_64 = SI_SPILL_S64_RESTORE %stack.19, 0, implicit $exec, implicit $sgpr32 :: (load (s64) from %stack.19, align 4, addrspace 5)
; GCN-NEXT: [[S_LOAD_DWORD_IMM3:%[0-9]+]]:sgpr_32 = S_LOAD_DWORD_IMM [[SI_SPILL_S64_RESTORE]], 0, 0 :: ("amdgpu-noclobber" load (s32), align 8, addrspace 1)
; GCN-NEXT: SI_SPILL_S32_SAVE [[S_LOAD_DWORD_IMM3]], %stack.7, implicit $exec, implicit $sgpr32 :: (store (s32) into %stack.7, addrspace 5)
; GCN-NEXT: SI_SPILL_S64_SAVE [[V_READFIRSTLANE_B32_]], %stack.0, implicit $exec, implicit $sgpr32 :: (store (s64) into %stack.0, align 4, addrspace 5)
@@ -226,7 +226,7 @@ body: |
; GCN-NEXT: dead [[S_LOAD_DWORD_IMM4:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY1]], 0, 0 :: ("amdgpu-noclobber" load (s32), addrspace 1)
; GCN-NEXT: [[S_MOV_B64_1:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GCN-NEXT: [[S_LOAD_DWORD_IMM5:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[S_MOV_B64_1]], 0, 0 :: ("amdgpu-noclobber" load (s32), addrspace 1)
- ; GCN-NEXT: [[SI_SPILL_S64_RESTORE1:%[0-9]+]]:sgpr_64 = SI_SPILL_S64_RESTORE %stack.2, implicit $exec, implicit $sgpr32 :: (load (s64) from %stack.2, align 4, addrspace 5)
+ ; GCN-NEXT: [[SI_SPILL_S64_RESTORE1:%[0-9]+]]:sgpr_64 = SI_SPILL_S64_RESTORE %stack.2, 0, implicit $exec, implicit $sgpr32 :: (load (s64) from %stack.2, align 4, addrspace 5)
; GCN-NEXT: undef [[COPY2:%[0-9]+]].sub1:sgpr_64 = COPY [[SI_SPILL_S64_RESTORE1]].sub1
; GCN-NEXT: [[COPY2:%[0-9]+]].sub0:sgpr_64 = S_MOV_B32 1
; GCN-NEXT: S_CBRANCH_SCC1 %bb.10, implicit undef $scc
@@ -253,21 +253,21 @@ body: |
; GCN-NEXT: successors: %bb.10(0x80000000)
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sgpr_32 = COPY [[S_MOV_B32_4]]
- ; GCN-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = SI_SPILL_S64_RESTORE %stack.2, implicit $exec, implicit $sgpr32 :: (load (s64) from %stack.2, align 4, addrspace 5)
+ ; GCN-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = SI_SPILL_S64_RESTORE %stack.2, 0, implicit $exec, implicit $sgpr32 :: (load (s64) from %stack.2, align 4, addrspace 5)
; GCN-NEXT: {{ $}}
; GCN-NEXT: bb.10:
; GCN-NEXT: successors: %bb.11(0x40000000), %bb.12(0x40000000)
; GCN-NEXT: {{ $}}
; GCN-NEXT: dead [[V_CMP_GT_F32_e64_:%[0-9]+]]:sreg_32 = V_CMP_GT_F32_e64 0, 0, 0, [[S_LOAD_DWORD_IMM2]], 0, implicit $mode, implicit $exec
- ; GCN-NEXT: [[SI_SPILL_S32_RESTORE:%[0-9]+]]:sreg_32_xm0_xexec = SI_SPILL_S32_RESTORE %stack.17, implicit $exec, implicit $sgpr32 :: (load (s32) from %stack.17, addrspace 5)
+ ; GCN-NEXT: [[SI_SPILL_S32_RESTORE:%[0-9]+]]:sreg_32_xm0_xexec = SI_SPILL_S32_RESTORE %stack.17, 0, implicit $exec, implicit $sgpr32 :: (load (s32) from %stack.17, addrspace 5)
; GCN-NEXT: dead [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 undef [[V_CMP_GT_F32_e64_]], [[SI_SPILL_S32_RESTORE]], implicit-def dead $scc
- ; GCN-NEXT: [[SI_SPILL_S32_RESTORE1:%[0-9]+]]:sgpr_32 = SI_SPILL_S32_RESTORE %stack.15, implicit $exec, implicit $sgpr32 :: (load (s32) from %stack.15, addrspace 5)
+ ; GCN-NEXT: [[SI_SPILL_S32_RESTORE1:%[0-9]+]]:sgpr_32 = SI_SPILL_S32_RESTORE %stack.15, 0, implicit $exec, implicit $sgpr32 :: (load (s32) from %stack.15, addrspace 5)
; GCN-NEXT: S_CMP_EQ_U32 [[SI_SPILL_S32_RESTORE1]], 0, implicit-def $scc
; GCN-NEXT: dead [[DEF4:%[0-9]+]]:sreg_32_xm0_xexec = IMPLICIT_DEF
- ; GCN-NEXT: [[SI_SPILL_S64_RESTORE2:%[0-9]+]]:sreg_64_xexec = SI_SPILL_S64_RESTORE %stack.18, implicit $exec, implicit $sgpr32 :: (load (s64) from %stack.18, align 4, addrspace 5)
+ ; GCN-NEXT: [[SI_SPILL_S64_RESTORE2:%[0-9]+]]:sreg_64_xexec = SI_SPILL_S64_RESTORE %stack.18, 0, implicit $exec, implicit $sgpr32 :: (load (s64) from %stack.18, align 4, addrspace 5)
; GCN-NEXT: S_CMP_EQ_U32 [[SI_SPILL_S64_RESTORE2]].sub1, 0, implicit-def $scc
; GCN-NEXT: dead [[DEF5:%[0-9]+]]:sreg_32_xm0_xexec = IMPLICIT_DEF
- ; GCN-NEXT: [[SI_SPILL_S256_RESTORE:%[0-9]+]]:sgpr_256 = SI_SPILL_S256_RESTORE %stack.20, implicit $exec, implicit $sgpr32 :: (load (s256) from %stack.20, align 4, addrspace 5)
+ ; GCN-NEXT: [[SI_SPILL_S256_RESTORE:%[0-9]+]]:sgpr_256 = SI_SPILL_S256_RESTORE %stack.20, 0, implicit $exec, implicit $sgpr32 :: (load (s256) from %stack.20, align 4, addrspace 5)
; GCN-NEXT: undef [[COPY3:%[0-9]+]].sub0:sgpr_256 = COPY [[SI_SPILL_S256_RESTORE]].sub0 {
; GCN-NEXT: internal [[COPY3]].sub2:sgpr_256 = COPY [[SI_SPILL_S256_RESTORE]].sub2
; GCN-NEXT: internal [[COPY3]].sub4:sgpr_256 = COPY [[SI_SPILL_S256_RESTORE]].sub4
@@ -284,7 +284,7 @@ body: |
; GCN-NEXT: dead [[V_CMP_GT_F32_e64_7:%[0-9]+]]:sreg_32 = V_CMP_GT_F32_e64 0, 0, 0, [[S_LOAD_DWORDX8_IMM]].sub4, 0, implicit $mode, implicit $exec
; GCN-NEXT: dead [[V_CMP_GT_F32_e64_8:%[0-9]+]]:sreg_32 = V_CMP_GT_F32_e64 0, 0, 0, [[S_LOAD_DWORDX8_IMM]].sub5, 0, implicit $mode, implicit $exec
; GCN-NEXT: dead [[V_CMP_GT_F32_e64_9:%[0-9]+]]:sreg_32 = V_CMP_GT_F32_e64 0, 0, 0, [[S_LOAD_DWORDX8_IMM]].sub6, 0, implicit $mode, implicit $exec
- ; GCN-NEXT: [[SI_SPILL_S128_RESTORE:%[0-9]+]]:sgpr_128 = SI_SPILL_S128_RESTORE %stack.14, implicit $exec, implicit $sgpr32 :: (load (s128) from %stack.14, align 4, addrspace 5)
+ ; GCN-NEXT: [[SI_SPILL_S128_RESTORE:%[0-9]+]]:sgpr_128 = SI_SPILL_S128_RESTORE %stack.14, 0, implicit $exec, implicit $sgpr32 :: (load (s128) from %stack.14, align 4, addrspace 5)
; GCN-NEXT: undef [[COPY4:%[0-9]+]].sub0_sub1_sub2:sgpr_128 = COPY [[SI_SPILL_S128_RESTORE]].sub0_sub1_sub2
; GCN-NEXT: dead [[V_CMP_GT_F32_e64_10:%[0-9]+]]:sreg_32 = V_CMP_GT_F32_e64 0, 0, 0, [[COPY4]].sub0, 0, implicit $mode, implicit $exec
; GCN-NEXT: dead [[V_CMP_GT_F32_e64_11:%[0-9]+]]:sreg_32 = V_CMP_GT_F32_e64 0, 0, 0, [[COPY4]].sub1, 0, implicit $mode, implicit $exec
@@ -297,7 +297,7 @@ body: |
; GCN-NEXT: dead [[DEF10:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
; GCN-NEXT: dead [[DEF11:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
; GCN-NEXT: dead [[S_AND_B32_2:%[0-9]+]]:sreg_32 = S_AND_B32 undef [[DEF11]], undef [[DEF11]], implicit-def dead $scc
- ; GCN-NEXT: [[SI_SPILL_S256_RESTORE1:%[0-9]+]]:sgpr_256 = SI_SPILL_S256_RESTORE %stack.16, implicit $exec, implicit $sgpr32 :: (load (s256) from %stack.16, align 4, addrspace 5)
+ ; GCN-NEXT: [[SI_SPILL_S256_RESTORE1:%[0-9]+]]:sgpr_256 = SI_SPILL_S256_RESTORE %stack.16, 0, implicit $exec, implicit $sgpr32 :: (load (s256) from %stack.16, align 4, addrspace 5)
; GCN-NEXT: undef [[COPY5:%[0-9]+]].sub0:sgpr_256 = COPY [[SI_SPILL_S256_RESTORE1]].sub0 {
; GCN-NEXT: internal [[COPY5]].sub2:sgpr_256 = COPY [[SI_SPILL_S256_RESTORE1]].sub2
; GCN-NEXT: internal [[COPY5]].sub5:sgpr_256 = COPY [[SI_SPILL_S256_RESTORE1]].sub5
@@ -307,19 +307,19 @@ body: |
; GCN-NEXT: dead [[S_AND_B32_3:%[0-9]+]]:sreg_32 = S_AND_B32 undef [[V_CMP_GT_F32_e64_8]], undef [[V_CMP_GT_F32_e64_9]], implicit-def dead $scc
; GCN-NEXT: dead [[V_CMP_GT_F32_e64_14:%[0-9]+]]:sreg_32 = V_CMP_GT_F32_e64 0, 0, 0, [[COPY5]].sub2, 0, implicit $mode, implicit $exec
; GCN-NEXT: dead [[S_OR_B32_2:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY5]].sub5, [[COPY5]].sub7, implicit-def dead $scc
- ; GCN-NEXT: [[SI_SPILL_S256_RESTORE2:%[0-9]+]]:sgpr_256 = SI_SPILL_S256_RESTORE %stack.10, implicit $exec, implicit $sgpr32 :: (load (s256) from %stack.10, align 4, addrspace 5)
+ ; GCN-NEXT: [[SI_SPILL_S256_RESTORE2:%[0-9]+]]:sgpr_256 = SI_SPILL_S256_RESTORE %stack.10, 0, implicit $exec, implicit $sgpr32 :: (load (s256) from %stack.10, align 4, addrspace 5)
; GCN-NEXT: undef [[COPY6:%[0-9]+]].lo16_hi16_sub1_lo16_sub1_hi16_sub2_lo16_sub2_hi16_sub3_lo16_sub3_hi16_sub4_lo16_sub4_hi16_sub5_lo16_sub5_hi16_sub6_lo16_sub6_hi16:sgpr_256 = COPY [[SI_SPILL_S256_RESTORE2]].lo16_hi16_sub1_lo16_sub1_hi16_sub2_lo16_sub2_hi16_sub3_lo16_sub3_hi16_sub4_lo16_sub4_hi16_sub5_lo16_sub5_hi16_sub6_lo16_sub6_hi16
; GCN-NEXT: dead [[S_OR_B32_3:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY6]].sub0, [[COPY6]].sub1, implicit-def dead $scc
; GCN-NEXT: dead [[S_OR_B32_4:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY6]].sub2, undef [[S_OR_B32_3]], implicit-def dead $scc
- ; GCN-NEXT: [[SI_SPILL_S32_RESTORE2:%[0-9]+]]:sreg_32_xm0_xexec = SI_SPILL_S32_RESTORE %stack.9, implicit $exec, implicit $sgpr32 :: (load (s32) from %stack.9, addrspace 5)
+ ; GCN-NEXT: [[SI_SPILL_S32_RESTORE2:%[0-9]+]]:sreg_32_xm0_xexec = SI_SPILL_S32_RESTORE %stack.9, 0, implicit $exec, implicit $sgpr32 :: (load (s32) from %stack.9, addrspace 5)
; GCN-NEXT: dead [[S_AND_B32_4:%[0-9]+]]:sreg_32 = S_AND_B32 undef [[S_OR_B32_3]], [[SI_SPILL_S32_RESTORE2]], implicit-def dead $scc
; GCN-NEXT: dead [[V_CMP_GT_F32_e64_15:%[0-9]+]]:sreg_32 = V_CMP_GT_F32_e64 0, 0, 0, [[COPY6]].sub3, 0, implicit $mode, implicit $exec
; GCN-NEXT: dead [[V_CMP_GT_F32_e64_16:%[0-9]+]]:sreg_32 = V_CMP_GT_F32_e64 0, 0, 0, [[COPY6]].sub4, 0, implicit $mode, implicit $exec
; GCN-NEXT: dead [[V_CMP_GT_F32_e64_17:%[0-9]+]]:sreg_32 = V_CMP_GT_F32_e64 0, 0, 0, [[COPY6]].sub5, 0, implicit $mode, implicit $exec
; GCN-NEXT: dead [[V_CMP_GT_F32_e64_18:%[0-9]+]]:sreg_32 = V_CMP_GT_F32_e64 0, 0, 0, [[COPY6]].sub6, 0, implicit $mode, implicit $exec
- ; GCN-NEXT: [[SI_SPILL_S32_RESTORE3:%[0-9]+]]:sreg_32_xm0_xexec = SI_SPILL_S32_RESTORE %stack.11, implicit $exec, implicit $sgpr32 :: (load (s32) from %stack.11, addrspace 5)
+ ; GCN-NEXT: [[SI_SPILL_S32_RESTORE3:%[0-9]+]]:sreg_32_xm0_xexec = SI_SPILL_S32_RESTORE %stack.11, 0, implicit $exec, implicit $sgpr32 :: (load (s32) from %stack.11, addrspace 5)
; GCN-NEXT: dead [[V_CMP_GT_F32_e64_19:%[0-9]+]]:sreg_32 = V_CMP_GT_F32_e64 0, 0, 0, [[SI_SPILL_S32_RESTORE3]], 0, implicit $mode, implicit $exec
- ; GCN-NEXT: [[SI_SPILL_S256_RESTORE3:%[0-9]+]]:sgpr_256 = SI_SPILL_S256_RESTORE %stack.13, implicit $exec, implicit $sgpr32 :: (load (s256) from %stack.13, align 4, addrspace 5)
+ ; GCN-NEXT: [[SI_SPILL_S256_RESTORE3:%[0-9]+]]:sgpr_256 = SI_SPILL_S256_RESTORE %stack.13, 0, implicit $exec, implicit $sgpr32 :: (load (s256) from %stack.13, align 4, addrspace 5)
; GCN-NEXT: undef [[COPY7:%[0-9]+]].sub0:sgpr_256 = COPY [[SI_SPILL_S256_RESTORE3]].sub0 {
; GCN-NEXT: internal [[COPY7]].sub2:sgpr_256 = COPY [[SI_SPILL_S256_RESTORE3]].sub2
; GCN-NEXT: internal [[COPY7]].sub4:sgpr_256 = COPY [[SI_SPILL_S256_RESTORE3]].sub4
@@ -348,7 +348,7 @@ body: |
; GCN-NEXT: dead [[S_OR_B32_5:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY8]].sub10, [[COPY8]].sub9, implicit-def dead $scc
; GCN-NEXT: dead [[V_CMP_GT_F32_e64_27:%[0-9]+]]:sreg_32 = V_CMP_GT_F32_e64 0, 0, 0, [[COPY8]].sub13, 0, implicit $mode, implicit $exec
; GCN-NEXT: dead [[V_CMP_GT_F32_e64_28:%[0-9]+]]:sreg_32 = V_CMP_GT_F32_e64 0, 0, 0, [[COPY8]].sub14, 0, implicit $mode, implicit $exec
- ; GCN-NEXT: [[SI_SPILL_S512_RESTORE:%[0-9]+]]:sgpr_512 = SI_SPILL_S512_RESTORE %stack.12, implicit $exec, implicit $sgpr32 :: (load (s512) from %stack.12, align 4, addrspace 5)
+ ; GCN-NEXT: [[SI_SPILL_S512_RESTORE:%[0-9]+]]:sgpr_512 = SI_SPILL_S512_RESTORE %stack.12, 0, implicit $exec, implicit $sgpr32 :: (load (s512) from %stack.12, align 4, addrspace 5)
; GCN-NEXT: undef [[COPY9:%[0-9]+]].sub1:sgpr_512 = COPY [[SI_SPILL_S512_RESTORE]].sub1 {
; GCN-NEXT: internal [[COPY9]].sub5:sgpr_512 = COPY [[SI_SPILL_S512_RESTORE]].sub5
; GCN-NEXT: internal [[COPY9]].sub6:sgpr_512 = COPY [[SI_SPILL_S512_RESTORE]].sub6
@@ -367,7 +367,7 @@ body: |
; GCN-NEXT: dead [[DEF15:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
; GCN-NEXT: dead [[S_AND_B32_7:%[0-9]+]]:sreg_32 = S_AND_B32 undef [[DEF15]], undef [[DEF14]], implicit-def dead $scc
; GCN-NEXT: dead [[V_CMP_GT_F32_e64_33:%[0-9]+]]:sreg_32 = V_CMP_GT_F32_e64 0, 0, 0, [[COPY9]].sub12, 0, implicit $mode, implicit $exec
- ; GCN-NEXT: [[SI_SPILL_S256_RESTORE4:%[0-9]+]]:sgpr_256 = SI_SPILL_S256_RESTORE %stack.6, implicit $exec, implicit $sgpr32 :: (load (s256) from %stack.6, align 4, addrspace 5)
+ ; GCN-NEXT: [[SI_SPILL_S256_RESTORE4:%[0-9]+]]:sgpr_256 = SI_SPILL_S256_RESTORE %stack.6, 0, implicit $exec, implicit $sgpr32 :: (load (s256) from %stack.6, align 4, addrspace 5)
; GCN-NEXT: undef [[COPY10:%[0-9]+]].lo16_hi16_sub1_lo16_sub1_hi16_sub2_lo16_sub2_hi16_sub3_lo16_sub3_hi16_sub4_lo16_sub4_hi16_sub5_lo16_sub5_hi16_sub6_lo16_sub6_hi16:sgpr_256 = COPY [[SI_SPILL_S256_RESTORE4]].lo16_hi16_sub1_lo16_sub1_hi16_sub2_lo16_sub2_hi16_sub3_lo16_sub3_hi16_sub4_lo16_sub4_hi16_sub5_lo16_sub5_hi16_sub6_lo16_sub6_hi16
; GCN-NEXT: dead [[S_OR_B32_6:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY10]].sub0, [[COPY9]].sub15, implicit-def dead $scc
; GCN-NEXT: dead [[DEF16:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
@@ -380,22 +380,22 @@ body: |
; GCN-NEXT: dead [[V_CMP_GT_F32_e64_38:%[0-9]+]]:sreg_32 = V_CMP_GT_F32_e64 0, 0, 0, [[COPY10]].sub5, 0, implicit $mode, implicit $exec
; GCN-NEXT: dead [[V_CMP_GT_F32_e64_39:%[0-9]+]]:sreg_32 = V_CMP_GT_F32_e64 0, 0, 0, [[COPY10]].sub6, 0, implicit $mode, implicit $exec
; GCN-NEXT: dead [[S_AND_B32_8:%[0-9]+]]:sreg_32 = S_AND_B32 undef [[DEF18]], undef [[DEF17]], implicit-def dead $scc
- ; GCN-NEXT: [[SI_SPILL_S256_RESTORE5:%[0-9]+]]:sgpr_256 = SI_SPILL_S256_RESTORE %stack.4, implicit $exec, implicit $sgpr32 :: (load (s256) from %stack.4, align 4, addrspace 5)
+ ; GCN-NEXT: [[SI_SPILL_S256_RESTORE5:%[0-9]+]]:sgpr_256 = SI_SPILL_S256_RESTORE %stack.4, 0, implicit $exec, implicit $sgpr32 :: (load (s256) from %stack.4, align 4, addrspace 5)
; GCN-NEXT: undef [[COPY11:%[0-9]+]].sub0_sub1_sub2_sub3_sub4_sub5:sgpr_256 = COPY [[SI_SPILL_S256_RESTORE5]].sub0_sub1_sub2_sub3_sub4_sub5
; GCN-NEXT: dead [[V_CMP_GT_F32_e64_40:%[0-9]+]]:sreg_32 = V_CMP_GT_F32_e64 0, 0, 0, [[COPY11]].sub0, 0, implicit $mode, implicit $exec
; GCN-NEXT: dead [[V_CMP_GT_F32_e64_41:%[0-9]+]]:sreg_32 = V_CMP_GT_F32_e64 0, 0, 0, [[COPY11]].sub1, 0, implicit $mode, implicit $exec
- ; GCN-NEXT: [[SI_SPILL_S32_RESTORE4:%[0-9]+]]:sreg_32_xm0_xexec = SI_SPILL_S32_RESTORE %stack.3, implicit $exec, implicit $sgpr32 :: (load (s32) from %stack.3, addrspace 5)
+ ; GCN-NEXT: [[SI_SPILL_S32_RESTORE4:%[0-9]+]]:sreg_32_xm0_xexec = SI_SPILL_S32_RESTORE %stack.3, 0, implicit $exec, implicit $sgpr32 :: (load (s32) from %stack.3, addrspace 5)
; GCN-NEXT: dead [[V_CMP_GT_F32_e64_42:%[0-9]+]]:sreg_32 = V_CMP_GT_F32_e64 0, 0, 0, [[SI_SPILL_S32_RESTORE4]], 0, implicit $mode, implicit $exec
; GCN-NEXT: dead [[V_CMP_GT_F32_e64_43:%[0-9]+]]:sreg_32 = V_CMP_GT_F32_e64 0, 0, 0, [[COPY11]].sub2, 0, implicit $mode, implicit $exec
; GCN-NEXT: dead [[V_CMP_GT_F32_e64_44:%[0-9]+]]:sreg_32 = V_CMP_GT_F32_e64 0, 0, 0, [[COPY11]].sub3, 0, implicit $mode, implicit $exec
; GCN-NEXT: dead [[S_OR_B32_7:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY11]].sub4, [[COPY11]].sub5, implicit-def dead $scc
; GCN-NEXT: S_CMP_EQ_U32 [[SI_SPILL_S32_RESTORE4]], 0, implicit-def $scc
- ; GCN-NEXT: [[SI_SPILL_S32_RESTORE5:%[0-9]+]]:sreg_32_xm0_xexec = SI_SPILL_S32_RESTORE %stack.5, implicit $exec, implicit $sgpr32 :: (load (s32) from %stack.5, addrspace 5)
+ ; GCN-NEXT: [[SI_SPILL_S32_RESTORE5:%[0-9]+]]:sreg_32_xm0_xexec = SI_SPILL_S32_RESTORE %stack.5, 0, implicit $exec, implicit $sgpr32 :: (load (s32) from %stack.5, addrspace 5)
; GCN-NEXT: dead [[S_AND_B32_9:%[0-9]+]]:sreg_32 = S_AND_B32 undef [[S_OR_B32_7]], [[SI_SPILL_S32_RESTORE5]], implicit-def dead $scc
; GCN-NEXT: dead [[S_LOAD_DWORD_IMM6:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY2]], 0, 0 :: ("amdgpu-noclobber" load (s32), align 8, addrspace 1)
- ; GCN-NEXT: [[SI_SPILL_S256_RESTORE6:%[0-9]+]]:sgpr_256 = SI_SPILL_S256_RESTORE %stack.8, implicit $exec, implicit $sgpr32 :: (load (s256) from %stack.8, align 4, addrspace 5)
+ ; GCN-NEXT: [[SI_SPILL_S256_RESTORE6:%[0-9]+]]:sgpr_256 = SI_SPILL_S256_RESTORE %stack.8, 0, implicit $exec, implicit $sgpr32 :: (load (s256) from %stack.8, align 4, addrspace 5)
; GCN-NEXT: S_CMP_EQ_U32 [[SI_SPILL_S256_RESTORE6]].sub7, 0, implicit-def $scc
- ; GCN-NEXT: [[SI_SPILL_S32_RESTORE6:%[0-9]+]]:sgpr_32 = SI_SPILL_S32_RESTORE %stack.7, implicit $exec, implicit $sgpr32 :: (load (s32) from %stack.7, addrspace 5)
+ ; GCN-NEXT: [[SI_SPILL_S32_RESTORE6:%[0-9]+]]:sgpr_32 = SI_SPILL_S32_RESTORE %stack.7, 0, implicit $exec, implicit $sgpr32 :: (load (s32) from %stack.7, addrspace 5)
; GCN-NEXT: dead [[V_CMP_GT_F32_e64_45:%[0-9]+]]:sreg_32 = V_CMP_GT_F32_e64 0, 0, 0, [[SI_SPILL_S32_RESTORE6]], 0, implicit $mode, implicit $exec
; GCN-NEXT: [[DEF19:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
; GCN-NEXT: dead [[S_AND_B32_10:%[0-9]+]]:sreg_32 = S_AND_B32 [[DEF19]], undef [[S_LOAD_DWORD_IMM6]], implicit-def dead $scc
@@ -409,9 +409,9 @@ body: |
; GCN-NEXT: {{ $}}
; GCN-NEXT: {{ $}}
; GCN-NEXT: bb.12:
- ; GCN-NEXT: [[SI_SPILL_S64_RESTORE3:%[0-9]+]]:sgpr_64 = SI_SPILL_S64_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s64) from %stack.1, align 4, addrspace 5)
+ ; GCN-NEXT: [[SI_SPILL_S64_RESTORE3:%[0-9]+]]:sgpr_64 = SI_SPILL_S64_RESTORE %stack.1, 0, implicit $exec, implicit $sgpr32 :: (load (s64) from %stack.1, align 4, addrspace 5)
; GCN-NEXT: GLOBAL_STORE_DWORD_SADDR undef [[DEF]], undef [[DEF]], [[SI_SPILL_S64_RESTORE3]], 0, 0, implicit $exec :: (store (s32), addrspace 1)
- ; GCN-NEXT: [[SI_SPILL_S64_RESTORE4:%[0-9]+]]:sgpr_64 = SI_SPILL_S64_RESTORE %stack.0, implicit $exec, implicit $sgpr32 :: (load (s64) from %stack.0, align 4, addrspace 5)
+ ; GCN-NEXT: [[SI_SPILL_S64_RESTORE4:%[0-9]+]]:sgpr_64 = SI_SPILL_S64_RESTORE %stack.0, 0, implicit $exec, implicit $sgpr32 :: (load (s64) from %stack.0, align 4, addrspace 5)
; GCN-NEXT: GLOBAL_STORE_DWORD_SADDR undef [[DEF]], undef [[DEF]], [[SI_SPILL_S64_RESTORE4]], 0, 0, implicit $exec :: (store (s32), addrspace 1)
; GCN-NEXT: S_ENDPGM 0
bb.0:
diff --git a/llvm/test/CodeGen/AMDGPU/ran-out-of-sgprs-allocation-failure.mir b/llvm/test/CodeGen/AMDGPU/ran-out-of-sgprs-allocation-failure.mir
index 2b613cbbabeee7..a02d7ff2424052 100644
--- a/llvm/test/CodeGen/AMDGPU/ran-out-of-sgprs-allocation-failure.mir
+++ b/llvm/test/CodeGen/AMDGPU/ran-out-of-sgprs-allocation-failure.mir
@@ -96,7 +96,7 @@ body: |
; CHECK-NEXT: renamable $sgpr93 = COPY renamable $sgpr60
; CHECK-NEXT: renamable $sgpr94 = COPY renamable $sgpr60
; CHECK-NEXT: renamable $sgpr95 = COPY renamable $sgpr60
- ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vreg_1024_align2 = COPY killed renamable $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95, implicit $exec
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_1024_align2 = COPY killed renamable $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95, implicit $exec
; CHECK-NEXT: S_CBRANCH_EXECZ %bb.11, implicit $exec
; CHECK-NEXT: S_BRANCH %bb.5
; CHECK-NEXT: {{ $}}
@@ -118,7 +118,7 @@ body: |
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
; CHECK-NEXT: $sgpr12 = COPY killed renamable $sgpr60
; CHECK-NEXT: $sgpr13 = COPY killed renamable $sgpr62
- ; CHECK-NEXT: $sgpr14 = SI_SPILL_S32_RESTORE %stack.0, implicit $exec, implicit $sgpr32 :: (load (s32) from %stack.0, addrspace 5)
+ ; CHECK-NEXT: $sgpr14 = SI_SPILL_S32_RESTORE %stack.0, 0, implicit $exec, implicit $sgpr32 :: (load (s32) from %stack.0, addrspace 5)
; CHECK-NEXT: dead $sgpr30_sgpr31 = SI_CALL undef renamable $sgpr4_sgpr5, 0, csr_amdgpu_noregs, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
; CHECK-NEXT: S_BRANCH %bb.17
diff --git a/llvm/test/CodeGen/AMDGPU/regalloc-fail-unsatisfiable-overlapping-tuple-hints.mir b/llvm/test/CodeGen/AMDGPU/regalloc-fail-unsatisfiable-overlapping-tuple-hints.mir
index 09be927dc952e2..e774e4ddbe724b 100644
--- a/llvm/test/CodeGen/AMDGPU/regalloc-fail-unsatisfiable-overlapping-tuple-hints.mir
+++ b/llvm/test/CodeGen/AMDGPU/regalloc-fail-unsatisfiable-overlapping-tuple-hints.mir
@@ -49,8 +49,8 @@ body: |
; CHECK-NEXT: S_NOP 0, implicit-def %17
; CHECK-NEXT: SI_SPILL_V256_SAVE %17, %stack.2, $sgpr32, 0, implicit $exec :: (store (s256) into %stack.2, align 4, addrspace 5)
; CHECK-NEXT: S_NOP 0, implicit-def %4
- ; CHECK-NEXT: [[SI_SPILL_V256_RESTORE:%[0-9]+]]:vreg_256 = SI_SPILL_V256_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s256) from %stack.1, align 4, addrspace 5)
- ; CHECK-NEXT: [[SI_SPILL_V256_RESTORE1:%[0-9]+]]:vreg_256 = SI_SPILL_V256_RESTORE %stack.3, $sgpr32, 0, implicit $exec :: (load (s256) from %stack.3, align 4, addrspace 5)
+ ; CHECK-NEXT: [[SI_SPILL_V256_RESTORE:%[0-9]+]]:vreg_256 = SI_SPILL_V256_RESTORE %stack.1, $sgpr32, 0, 0, implicit $exec :: (load (s256) from %stack.1, align 4, addrspace 5)
+ ; CHECK-NEXT: [[SI_SPILL_V256_RESTORE1:%[0-9]+]]:vreg_256 = SI_SPILL_V256_RESTORE %stack.3, $sgpr32, 0, 0, implicit $exec :: (load (s256) from %stack.3, align 4, addrspace 5)
; CHECK-NEXT: S_NOP 0, implicit [[SI_SPILL_V256_RESTORE]], implicit [[SI_SPILL_V256_RESTORE1]], implicit %4
; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_256 = COPY [[SI_SPILL_V256_RESTORE1]]
; CHECK-NEXT: S_CBRANCH_EXECNZ %bb.2, implicit $exec
@@ -59,9 +59,9 @@ body: |
; CHECK-NEXT: successors: %bb.2(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: S_NOP 0, implicit [[COPY]]
- ; CHECK-NEXT: [[SI_SPILL_V256_RESTORE2:%[0-9]+]]:vreg_256 = SI_SPILL_V256_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s256) from %stack.0, align 4, addrspace 5)
+ ; CHECK-NEXT: [[SI_SPILL_V256_RESTORE2:%[0-9]+]]:vreg_256 = SI_SPILL_V256_RESTORE %stack.0, $sgpr32, 0, 0, implicit $exec :: (load (s256) from %stack.0, align 4, addrspace 5)
; CHECK-NEXT: S_NOP 0, implicit [[SI_SPILL_V256_RESTORE2]]
- ; CHECK-NEXT: [[SI_SPILL_V256_RESTORE3:%[0-9]+]]:vreg_256 = SI_SPILL_V256_RESTORE %stack.2, $sgpr32, 0, implicit $exec :: (load (s256) from %stack.2, align 4, addrspace 5)
+ ; CHECK-NEXT: [[SI_SPILL_V256_RESTORE3:%[0-9]+]]:vreg_256 = SI_SPILL_V256_RESTORE %stack.2, $sgpr32, 0, 0, implicit $exec :: (load (s256) from %stack.2, align 4, addrspace 5)
; CHECK-NEXT: S_NOP 0, implicit [[SI_SPILL_V256_RESTORE3]]
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
diff --git a/llvm/test/CodeGen/AMDGPU/remat-dead-subreg.mir b/llvm/test/CodeGen/AMDGPU/remat-dead-subreg.mir
index a9e49456a7d907..27be40100cb8db 100644
--- a/llvm/test/CodeGen/AMDGPU/remat-dead-subreg.mir
+++ b/llvm/test/CodeGen/AMDGPU/remat-dead-subreg.mir
@@ -16,9 +16,9 @@ body: |
; GCN-NEXT: SI_SPILL_S32_SAVE killed renamable $sgpr1, %stack.0, implicit $exec, implicit $sp_reg :: (store (s32) into %stack.0, addrspace 5)
; GCN-NEXT: renamable $sgpr2 = S_MOV_B32 2, implicit $m0
; GCN-NEXT: renamable $sgpr1 = S_MOV_B32 3, implicit $m0
- ; GCN-NEXT: dead %4:vgpr_32 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit killed $sgpr2, implicit killed $sgpr1
- ; GCN-NEXT: renamable $sgpr1 = SI_SPILL_S32_RESTORE %stack.0, implicit $exec, implicit $sp_reg :: (load (s32) from %stack.0, addrspace 5)
- ; GCN-NEXT: dead %5:vgpr_32 = V_MOV_B32_e32 killed $sgpr1, implicit $exec
+ ; GCN-NEXT: dead [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit killed $sgpr2, implicit killed $sgpr1
+ ; GCN-NEXT: renamable $sgpr1 = SI_SPILL_S32_RESTORE %stack.0, 0, implicit $exec, implicit $sp_reg :: (load (s32) from %stack.0, addrspace 5)
+ ; GCN-NEXT: dead [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 killed $sgpr1, implicit $exec
; GCN-NEXT: S_NOP 0, implicit killed renamable $sgpr0
$m0 = IMPLICIT_DEF
%0:sreg_64_xexec = S_MOV_B64 1, implicit $m0
@@ -40,10 +40,10 @@ body: |
; GCN-NEXT: renamable $sgpr2 = S_MOV_B32 2, implicit $m0
; GCN-NEXT: SI_SPILL_S32_SAVE killed renamable $sgpr2, %stack.0, implicit $exec, implicit $sp_reg :: (store (s32) into %stack.0, addrspace 5)
; GCN-NEXT: renamable $sgpr2 = S_MOV_B32 3, implicit $m0
- ; GCN-NEXT: renamable $sgpr0 = SI_SPILL_S32_RESTORE %stack.0, implicit $exec, implicit $sp_reg :: (load (s32) from %stack.0, addrspace 5)
- ; GCN-NEXT: dead %4:vgpr_32 = V_MOV_B32_e32 $sgpr4, implicit $exec, implicit killed $sgpr0, implicit killed $sgpr2
+ ; GCN-NEXT: renamable $sgpr0 = SI_SPILL_S32_RESTORE %stack.0, 0, implicit $exec, implicit $sp_reg :: (load (s32) from %stack.0, addrspace 5)
+ ; GCN-NEXT: dead [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 $sgpr4, implicit $exec, implicit killed $sgpr0, implicit killed $sgpr2
; GCN-NEXT: renamable $sgpr0 = S_MUL_I32 renamable $sgpr5, 3
- ; GCN-NEXT: dead %5:vgpr_32 = V_MOV_B32_e32 killed $sgpr0, implicit $exec
+ ; GCN-NEXT: dead [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 killed $sgpr0, implicit $exec
; GCN-NEXT: S_NOP 0, implicit killed renamable $sgpr5
$m0 = IMPLICIT_DEF
%0:sreg_64_xexec = S_MOV_B64 1, implicit $m0
@@ -66,9 +66,9 @@ body: |
; GCN-NEXT: SI_SPILL_S64_SAVE killed renamable $sgpr4_sgpr5, %stack.0, implicit $exec, implicit $sp_reg :: (store (s64) into %stack.0, align 4, addrspace 5)
; GCN-NEXT: renamable $sgpr4_sgpr5 = S_MOV_B64 3, implicit $m0
; GCN-NEXT: renamable $sgpr0_sgpr1 = S_MOV_B64 2, implicit $m0
- ; GCN-NEXT: dead %5:vgpr_32 = V_MOV_B32_e32 $sgpr8, implicit $exec, implicit killed $sgpr0_sgpr1, implicit killed $sgpr4_sgpr5
- ; GCN-NEXT: renamable $sgpr4_sgpr5 = SI_SPILL_S64_RESTORE %stack.0, implicit $exec, implicit $sp_reg :: (load (s64) from %stack.0, align 4, addrspace 5)
- ; GCN-NEXT: dead %6:vreg_64 = V_MOV_B64_PSEUDO killed $sgpr4_sgpr5, implicit $exec
+ ; GCN-NEXT: dead [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 $sgpr8, implicit $exec, implicit killed $sgpr0_sgpr1, implicit killed $sgpr4_sgpr5
+ ; GCN-NEXT: renamable $sgpr4_sgpr5 = SI_SPILL_S64_RESTORE %stack.0, 0, implicit $exec, implicit $sp_reg :: (load (s64) from %stack.0, align 4, addrspace 5)
+ ; GCN-NEXT: dead [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO killed $sgpr4_sgpr5, implicit $exec
; GCN-NEXT: S_NOP 0, implicit killed renamable $sgpr8, implicit renamable $sgpr11
%0:sreg_64 = IMPLICIT_DEF
%1:sgpr_128 = S_LOAD_DWORDX4_IMM %0, 1, 0
@@ -91,9 +91,9 @@ body: |
; GCN-NEXT: SI_SPILL_S64_SAVE killed renamable $sgpr2_sgpr3, %stack.0, implicit $exec, implicit $sp_reg :: (store (s64) into %stack.0, align 4, addrspace 5)
; GCN-NEXT: renamable $sgpr4_sgpr5 = S_MOV_B64 2, implicit $m0
; GCN-NEXT: renamable $sgpr2_sgpr3 = S_MOV_B64 3, implicit $m0
- ; GCN-NEXT: dead %4:vgpr_32 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit killed $sgpr4_sgpr5, implicit killed $sgpr2_sgpr3
- ; GCN-NEXT: renamable $sgpr2_sgpr3 = SI_SPILL_S64_RESTORE %stack.0, implicit $exec, implicit $sp_reg :: (load (s64) from %stack.0, align 4, addrspace 5)
- ; GCN-NEXT: dead %5:vreg_64 = V_MOV_B64_PSEUDO killed $sgpr2_sgpr3, implicit $exec
+ ; GCN-NEXT: dead [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit killed $sgpr4_sgpr5, implicit killed $sgpr2_sgpr3
+ ; GCN-NEXT: renamable $sgpr2_sgpr3 = SI_SPILL_S64_RESTORE %stack.0, 0, implicit $exec, implicit $sp_reg :: (load (s64) from %stack.0, align 4, addrspace 5)
+ ; GCN-NEXT: dead [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO killed $sgpr2_sgpr3, implicit $exec
; GCN-NEXT: S_NOP 0, implicit killed renamable $sgpr0
$m0 = IMPLICIT_DEF
%0:sreg_64_xexec = S_MOV_B64 1, implicit $m0
diff --git a/llvm/test/CodeGen/AMDGPU/remat-smrd.mir b/llvm/test/CodeGen/AMDGPU/remat-smrd.mir
index 95eac12a65389e..e0c0bb95e08f36 100644
--- a/llvm/test/CodeGen/AMDGPU/remat-smrd.mir
+++ b/llvm/test/CodeGen/AMDGPU/remat-smrd.mir
@@ -268,10 +268,10 @@ body: |
; GCN-NEXT: renamable $sgpr1 = S_GET_WAVEID_IN_WORKGROUP
; GCN-NEXT: renamable $sgpr0 = S_GET_WAVEID_IN_WORKGROUP
; GCN-NEXT: SI_SPILL_S32_SAVE killed renamable $sgpr0, %stack.0, implicit $exec, implicit $sp_reg :: (store (s32) into %stack.0, addrspace 5)
- ; GCN-NEXT: renamable $sgpr0 = SI_SPILL_S32_RESTORE %stack.1, implicit $exec, implicit $sp_reg :: (load (s32) from %stack.1, addrspace 5)
+ ; GCN-NEXT: renamable $sgpr0 = SI_SPILL_S32_RESTORE %stack.1, 0, implicit $exec, implicit $sp_reg :: (load (s32) from %stack.1, addrspace 5)
; GCN-NEXT: S_NOP 0, implicit killed renamable $sgpr0
; GCN-NEXT: S_NOP 0, implicit killed renamable $sgpr1
- ; GCN-NEXT: renamable $sgpr0 = SI_SPILL_S32_RESTORE %stack.0, implicit $exec, implicit $sp_reg :: (load (s32) from %stack.0, addrspace 5)
+ ; GCN-NEXT: renamable $sgpr0 = SI_SPILL_S32_RESTORE %stack.0, 0, implicit $exec, implicit $sp_reg :: (load (s32) from %stack.0, addrspace 5)
; GCN-NEXT: S_NOP 0, implicit killed renamable $sgpr0
; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $sgpr2_sgpr3
%0:sreg_64_xexec = COPY $sgpr8_sgpr9
@@ -358,10 +358,10 @@ body: |
; GCN-NEXT: renamable $sgpr1 = S_LOAD_DWORD_IMM_ci renamable $sgpr2_sgpr3, 4, 0 :: (dereferenceable invariant load (s32), addrspace 4)
; GCN-NEXT: renamable $sgpr0 = S_LOAD_DWORD_IMM_ci renamable $sgpr2_sgpr3, 8, 0 :: (dereferenceable invariant load (s32), addrspace 4)
; GCN-NEXT: SI_SPILL_S32_SAVE killed renamable $sgpr0, %stack.1, implicit $exec, implicit $sp_reg :: (store (s32) into %stack.1, addrspace 5)
- ; GCN-NEXT: renamable $sgpr0 = SI_SPILL_S32_RESTORE %stack.0, implicit $exec, implicit $sp_reg :: (load (s32) from %stack.0, addrspace 5)
+ ; GCN-NEXT: renamable $sgpr0 = SI_SPILL_S32_RESTORE %stack.0, 0, implicit $exec, implicit $sp_reg :: (load (s32) from %stack.0, addrspace 5)
; GCN-NEXT: S_NOP 0, implicit killed renamable $sgpr0
; GCN-NEXT: S_NOP 0, implicit killed renamable $sgpr1
- ; GCN-NEXT: renamable $sgpr0 = SI_SPILL_S32_RESTORE %stack.1, implicit $exec, implicit $sp_reg :: (load (s32) from %stack.1, addrspace 5)
+ ; GCN-NEXT: renamable $sgpr0 = SI_SPILL_S32_RESTORE %stack.1, 0, implicit $exec, implicit $sp_reg :: (load (s32) from %stack.1, addrspace 5)
; GCN-NEXT: S_NOP 0, implicit killed renamable $sgpr0
; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $sgpr2_sgpr3
%0:sreg_64_xexec = COPY $sgpr8_sgpr9
@@ -475,10 +475,10 @@ body: |
; GCN-NEXT: renamable $sgpr1 = S_LOAD_DWORD_IMM renamable $sgpr2_sgpr3, 4, 0 :: (dereferenceable load (s32), addrspace 4)
; GCN-NEXT: renamable $sgpr0 = S_LOAD_DWORD_IMM renamable $sgpr2_sgpr3, 8, 0 :: (dereferenceable load (s32), addrspace 4)
; GCN-NEXT: SI_SPILL_S32_SAVE killed renamable $sgpr0, %stack.1, implicit $exec, implicit $sp_reg :: (store (s32) into %stack.1, addrspace 5)
- ; GCN-NEXT: renamable $sgpr0 = SI_SPILL_S32_RESTORE %stack.0, implicit $exec, implicit $sp_reg :: (load (s32) from %stack.0, addrspace 5)
+ ; GCN-NEXT: renamable $sgpr0 = SI_SPILL_S32_RESTORE %stack.0, 0, implicit $exec, implicit $sp_reg :: (load (s32) from %stack.0, addrspace 5)
; GCN-NEXT: S_NOP 0, implicit killed renamable $sgpr0
; GCN-NEXT: S_NOP 0, implicit killed renamable $sgpr1
- ; GCN-NEXT: renamable $sgpr0 = SI_SPILL_S32_RESTORE %stack.1, implicit $exec, implicit $sp_reg :: (load (s32) from %stack.1, addrspace 5)
+ ; GCN-NEXT: renamable $sgpr0 = SI_SPILL_S32_RESTORE %stack.1, 0, implicit $exec, implicit $sp_reg :: (load (s32) from %stack.1, addrspace 5)
; GCN-NEXT: S_NOP 0, implicit killed renamable $sgpr0
; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $sgpr2_sgpr3
%0:sreg_64_xexec = COPY $sgpr8_sgpr9
@@ -507,11 +507,11 @@ body: |
; GCN-NEXT: renamable $sgpr0_sgpr1 = S_MEMTIME
; GCN-NEXT: SI_SPILL_S64_SAVE killed renamable $sgpr0_sgpr1, %stack.1, implicit $exec, implicit $sp_reg :: (store (s64) into %stack.1, align 4, addrspace 5)
; GCN-NEXT: S_NOP 0, implicit killed renamable $sgpr2_sgpr3
- ; GCN-NEXT: renamable $sgpr0_sgpr1 = SI_SPILL_S64_RESTORE %stack.2, implicit $exec, implicit $sp_reg :: (load (s64) from %stack.2, align 4, addrspace 5)
+ ; GCN-NEXT: renamable $sgpr0_sgpr1 = SI_SPILL_S64_RESTORE %stack.2, 0, implicit $exec, implicit $sp_reg :: (load (s64) from %stack.2, align 4, addrspace 5)
; GCN-NEXT: S_NOP 0, implicit killed renamable $sgpr0_sgpr1
- ; GCN-NEXT: renamable $sgpr0_sgpr1 = SI_SPILL_S64_RESTORE %stack.1, implicit $exec, implicit $sp_reg :: (load (s64) from %stack.1, align 4, addrspace 5)
+ ; GCN-NEXT: renamable $sgpr0_sgpr1 = SI_SPILL_S64_RESTORE %stack.1, 0, implicit $exec, implicit $sp_reg :: (load (s64) from %stack.1, align 4, addrspace 5)
; GCN-NEXT: S_NOP 0, implicit killed renamable $sgpr0_sgpr1
- ; GCN-NEXT: renamable $sgpr0_sgpr1 = SI_SPILL_S64_RESTORE %stack.0, implicit $exec, implicit $sp_reg :: (load (s64) from %stack.0, align 4, addrspace 5)
+ ; GCN-NEXT: renamable $sgpr0_sgpr1 = SI_SPILL_S64_RESTORE %stack.0, 0, implicit $exec, implicit $sp_reg :: (load (s64) from %stack.0, align 4, addrspace 5)
; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $sgpr0_sgpr1
%0:sreg_64_xexec = COPY $sgpr8_sgpr9
%1:sreg_64_xexec = S_MEMTIME
@@ -539,11 +539,11 @@ body: |
; GCN-NEXT: renamable $sgpr0_sgpr1 = S_MEMREALTIME
; GCN-NEXT: SI_SPILL_S64_SAVE killed renamable $sgpr0_sgpr1, %stack.1, implicit $exec, implicit $sp_reg :: (store (s64) into %stack.1, align 4, addrspace 5)
; GCN-NEXT: S_NOP 0, implicit killed renamable $sgpr2_sgpr3
- ; GCN-NEXT: renamable $sgpr0_sgpr1 = SI_SPILL_S64_RESTORE %stack.2, implicit $exec, implicit $sp_reg :: (load (s64) from %stack.2, align 4, addrspace 5)
+ ; GCN-NEXT: renamable $sgpr0_sgpr1 = SI_SPILL_S64_RESTORE %stack.2, 0, implicit $exec, implicit $sp_reg :: (load (s64) from %stack.2, align 4, addrspace 5)
; GCN-NEXT: S_NOP 0, implicit killed renamable $sgpr0_sgpr1
- ; GCN-NEXT: renamable $sgpr0_sgpr1 = SI_SPILL_S64_RESTORE %stack.1, implicit $exec, implicit $sp_reg :: (load (s64) from %stack.1, align 4, addrspace 5)
+ ; GCN-NEXT: renamable $sgpr0_sgpr1 = SI_SPILL_S64_RESTORE %stack.1, 0, implicit $exec, implicit $sp_reg :: (load (s64) from %stack.1, align 4, addrspace 5)
; GCN-NEXT: S_NOP 0, implicit killed renamable $sgpr0_sgpr1
- ; GCN-NEXT: renamable $sgpr0_sgpr1 = SI_SPILL_S64_RESTORE %stack.0, implicit $exec, implicit $sp_reg :: (load (s64) from %stack.0, align 4, addrspace 5)
+ ; GCN-NEXT: renamable $sgpr0_sgpr1 = SI_SPILL_S64_RESTORE %stack.0, 0, implicit $exec, implicit $sp_reg :: (load (s64) from %stack.0, align 4, addrspace 5)
; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $sgpr0_sgpr1
%0:sreg_64_xexec = COPY $sgpr8_sgpr9
%1:sreg_64_xexec = S_MEMREALTIME
@@ -570,9 +570,9 @@ body: |
; GCN-NEXT: renamable $sgpr2_sgpr3 = S_LOAD_DWORDX2_IMM renamable $sgpr0_sgpr1, 4, 0 :: (dereferenceable invariant load (s64), addrspace 4)
; GCN-NEXT: SI_SPILL_S64_SAVE killed renamable $sgpr2_sgpr3, %stack.0, implicit $exec, implicit $sp_reg :: (store (s64) into %stack.0, align 4, addrspace 5)
; GCN-NEXT: renamable $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM renamable $sgpr0_sgpr1, 8, 0 :: (dereferenceable invariant load (s64), addrspace 4)
- ; GCN-NEXT: renamable $sgpr2_sgpr3 = SI_SPILL_S64_RESTORE %stack.1, implicit $exec, implicit $sp_reg :: (load (s64) from %stack.1, align 4, addrspace 5)
+ ; GCN-NEXT: renamable $sgpr2_sgpr3 = SI_SPILL_S64_RESTORE %stack.1, 0, implicit $exec, implicit $sp_reg :: (load (s64) from %stack.1, align 4, addrspace 5)
; GCN-NEXT: S_NOP 0, implicit killed renamable $sgpr2_sgpr3
- ; GCN-NEXT: renamable $sgpr2_sgpr3 = SI_SPILL_S64_RESTORE %stack.0, implicit $exec, implicit $sp_reg :: (load (s64) from %stack.0, align 4, addrspace 5)
+ ; GCN-NEXT: renamable $sgpr2_sgpr3 = SI_SPILL_S64_RESTORE %stack.0, 0, implicit $exec, implicit $sp_reg :: (load (s64) from %stack.0, align 4, addrspace 5)
; GCN-NEXT: S_NOP 0, implicit killed renamable $sgpr2_sgpr3
; GCN-NEXT: S_NOP 0, implicit killed renamable $sgpr0_sgpr1
undef %0.sub0_sub1:sgpr_256 = COPY $sgpr8_sgpr9
diff --git a/llvm/test/CodeGen/AMDGPU/remat-sop.mir b/llvm/test/CodeGen/AMDGPU/remat-sop.mir
index e41c42c4f40b82..f8cd4029ace537 100644
--- a/llvm/test/CodeGen/AMDGPU/remat-sop.mir
+++ b/llvm/test/CodeGen/AMDGPU/remat-sop.mir
@@ -36,10 +36,10 @@ body: |
; GCN-NEXT: renamable $sgpr1 = S_MOV_B32 2, implicit $exec
; GCN-NEXT: renamable $sgpr0 = S_MOV_B32 3, implicit $exec
; GCN-NEXT: SI_SPILL_S32_SAVE killed renamable $sgpr0, %stack.0, implicit $exec, implicit $sgpr32 :: (store (s32) into %stack.0, addrspace 5)
- ; GCN-NEXT: renamable $sgpr0 = SI_SPILL_S32_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s32) from %stack.1, addrspace 5)
+ ; GCN-NEXT: renamable $sgpr0 = SI_SPILL_S32_RESTORE %stack.1, 0, implicit $exec, implicit $sgpr32 :: (load (s32) from %stack.1, addrspace 5)
; GCN-NEXT: S_NOP 0, implicit killed renamable $sgpr0
; GCN-NEXT: S_NOP 0, implicit killed renamable $sgpr1
- ; GCN-NEXT: renamable $sgpr0 = SI_SPILL_S32_RESTORE %stack.0, implicit $exec, implicit $sgpr32 :: (load (s32) from %stack.0, addrspace 5)
+ ; GCN-NEXT: renamable $sgpr0 = SI_SPILL_S32_RESTORE %stack.0, 0, implicit $exec, implicit $sgpr32 :: (load (s32) from %stack.0, addrspace 5)
; GCN-NEXT: S_NOP 0, implicit killed renamable $sgpr0
; GCN-NEXT: S_ENDPGM 0
$exec = IMPLICIT_DEF
@@ -96,9 +96,9 @@ body: |
; GCN-NEXT: renamable $sgpr1 = S_MOV_B32 renamable $sgpr0
; GCN-NEXT: SI_SPILL_S32_SAVE killed renamable $sgpr1, %stack.0, implicit $exec, implicit $sgpr32 :: (store (s32) into %stack.0, addrspace 5)
; GCN-NEXT: renamable $sgpr0 = S_MOV_B32 killed renamable $sgpr0
- ; GCN-NEXT: renamable $sgpr1 = SI_SPILL_S32_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s32) from %stack.1, addrspace 5)
+ ; GCN-NEXT: renamable $sgpr1 = SI_SPILL_S32_RESTORE %stack.1, 0, implicit $exec, implicit $sgpr32 :: (load (s32) from %stack.1, addrspace 5)
; GCN-NEXT: S_NOP 0, implicit killed renamable $sgpr1
- ; GCN-NEXT: renamable $sgpr1 = SI_SPILL_S32_RESTORE %stack.0, implicit $exec, implicit $sgpr32 :: (load (s32) from %stack.0, addrspace 5)
+ ; GCN-NEXT: renamable $sgpr1 = SI_SPILL_S32_RESTORE %stack.0, 0, implicit $exec, implicit $sgpr32 :: (load (s32) from %stack.0, addrspace 5)
; GCN-NEXT: S_NOP 0, implicit killed renamable $sgpr1
; GCN-NEXT: S_NOP 0, implicit killed renamable $sgpr0
; GCN-NEXT: S_ENDPGM 0
@@ -616,23 +616,23 @@ body: |
; GCN-NEXT: SI_SPILL_S32_SAVE killed renamable $sgpr0, %stack.5, implicit $exec, implicit $sp_reg :: (store (s32) into %stack.5, addrspace 5)
; GCN-NEXT: renamable $sgpr0 = COPY killed renamable $sgpr1
; GCN-NEXT: SI_SPILL_S32_SAVE killed renamable $sgpr0, %stack.4, implicit $exec, implicit $sp_reg :: (store (s32) into %stack.4, addrspace 5)
- ; GCN-NEXT: renamable $sgpr0 = SI_SPILL_S32_RESTORE %stack.1, implicit $exec, implicit $sp_reg :: (load (s32) from %stack.1, addrspace 5)
- ; GCN-NEXT: renamable $sgpr1 = SI_SPILL_S32_RESTORE %stack.3, implicit $exec, implicit $sp_reg :: (load (s32) from %stack.3, addrspace 5)
+ ; GCN-NEXT: renamable $sgpr0 = SI_SPILL_S32_RESTORE %stack.1, 0, implicit $exec, implicit $sp_reg :: (load (s32) from %stack.1, addrspace 5)
+ ; GCN-NEXT: renamable $sgpr1 = SI_SPILL_S32_RESTORE %stack.3, 0, implicit $exec, implicit $sp_reg :: (load (s32) from %stack.3, addrspace 5)
; GCN-NEXT: dead renamable $sgpr0 = S_ADD_U32 killed renamable $sgpr1, killed renamable $sgpr0, implicit-def $scc
- ; GCN-NEXT: renamable $sgpr0 = SI_SPILL_S32_RESTORE %stack.0, implicit $exec, implicit $sp_reg :: (load (s32) from %stack.0, addrspace 5)
- ; GCN-NEXT: renamable $sgpr1 = SI_SPILL_S32_RESTORE %stack.2, implicit $exec, implicit $sp_reg :: (load (s32) from %stack.2, addrspace 5)
+ ; GCN-NEXT: renamable $sgpr0 = SI_SPILL_S32_RESTORE %stack.0, 0, implicit $exec, implicit $sp_reg :: (load (s32) from %stack.0, addrspace 5)
+ ; GCN-NEXT: renamable $sgpr1 = SI_SPILL_S32_RESTORE %stack.2, 0, implicit $exec, implicit $sp_reg :: (load (s32) from %stack.2, addrspace 5)
; GCN-NEXT: dead renamable $sgpr0 = S_ADDC_U32 killed renamable $sgpr0, killed renamable $sgpr1, implicit-def $scc, implicit $scc
- ; GCN-NEXT: renamable $sgpr0 = SI_SPILL_S32_RESTORE %stack.3, implicit $exec, implicit $sp_reg :: (load (s32) from %stack.3, addrspace 5)
- ; GCN-NEXT: renamable $sgpr1 = SI_SPILL_S32_RESTORE %stack.5, implicit $exec, implicit $sp_reg :: (load (s32) from %stack.5, addrspace 5)
+ ; GCN-NEXT: renamable $sgpr0 = SI_SPILL_S32_RESTORE %stack.3, 0, implicit $exec, implicit $sp_reg :: (load (s32) from %stack.3, addrspace 5)
+ ; GCN-NEXT: renamable $sgpr1 = SI_SPILL_S32_RESTORE %stack.5, 0, implicit $exec, implicit $sp_reg :: (load (s32) from %stack.5, addrspace 5)
; GCN-NEXT: dead renamable $sgpr0 = S_ADD_U32 killed renamable $sgpr0, killed renamable $sgpr1, implicit-def $scc
- ; GCN-NEXT: renamable $sgpr0 = SI_SPILL_S32_RESTORE %stack.0, implicit $exec, implicit $sp_reg :: (load (s32) from %stack.0, addrspace 5)
- ; GCN-NEXT: renamable $sgpr1 = SI_SPILL_S32_RESTORE %stack.4, implicit $exec, implicit $sp_reg :: (load (s32) from %stack.4, addrspace 5)
+ ; GCN-NEXT: renamable $sgpr0 = SI_SPILL_S32_RESTORE %stack.0, 0, implicit $exec, implicit $sp_reg :: (load (s32) from %stack.0, addrspace 5)
+ ; GCN-NEXT: renamable $sgpr1 = SI_SPILL_S32_RESTORE %stack.4, 0, implicit $exec, implicit $sp_reg :: (load (s32) from %stack.4, addrspace 5)
; GCN-NEXT: dead renamable $sgpr0 = S_ADDC_U32 killed renamable $sgpr0, killed renamable $sgpr1, implicit-def $scc, implicit $scc
- ; GCN-NEXT: renamable $sgpr0 = SI_SPILL_S32_RESTORE %stack.1, implicit $exec, implicit $sp_reg :: (load (s32) from %stack.1, addrspace 5)
- ; GCN-NEXT: renamable $sgpr1 = SI_SPILL_S32_RESTORE %stack.5, implicit $exec, implicit $sp_reg :: (load (s32) from %stack.5, addrspace 5)
+ ; GCN-NEXT: renamable $sgpr0 = SI_SPILL_S32_RESTORE %stack.1, 0, implicit $exec, implicit $sp_reg :: (load (s32) from %stack.1, addrspace 5)
+ ; GCN-NEXT: renamable $sgpr1 = SI_SPILL_S32_RESTORE %stack.5, 0, implicit $exec, implicit $sp_reg :: (load (s32) from %stack.5, addrspace 5)
; GCN-NEXT: dead renamable $sgpr0 = S_ADD_U32 killed renamable $sgpr0, killed renamable $sgpr1, implicit-def $scc
- ; GCN-NEXT: renamable $sgpr0 = SI_SPILL_S32_RESTORE %stack.2, implicit $exec, implicit $sp_reg :: (load (s32) from %stack.2, addrspace 5)
- ; GCN-NEXT: renamable $sgpr1 = SI_SPILL_S32_RESTORE %stack.4, implicit $exec, implicit $sp_reg :: (load (s32) from %stack.4, addrspace 5)
+ ; GCN-NEXT: renamable $sgpr0 = SI_SPILL_S32_RESTORE %stack.2, 0, implicit $exec, implicit $sp_reg :: (load (s32) from %stack.2, addrspace 5)
+ ; GCN-NEXT: renamable $sgpr1 = SI_SPILL_S32_RESTORE %stack.4, 0, implicit $exec, implicit $sp_reg :: (load (s32) from %stack.4, addrspace 5)
; GCN-NEXT: dead renamable $sgpr0 = S_ADDC_U32 killed renamable $sgpr0, killed renamable $sgpr1, implicit-def $scc, implicit $scc
; GCN-NEXT: S_ENDPGM 0
%0:sreg_64 = S_GETPC_B64
diff --git a/llvm/test/CodeGen/AMDGPU/remat-vop.mir b/llvm/test/CodeGen/AMDGPU/remat-vop.mir
index 248a9e2ddb6360..bcab6a47b18e6c 100644
--- a/llvm/test/CodeGen/AMDGPU/remat-vop.mir
+++ b/llvm/test/CodeGen/AMDGPU/remat-vop.mir
@@ -36,10 +36,10 @@ body: |
; GCN-NEXT: renamable $vgpr1 = V_MOV_B32_e32 2, implicit $exec, implicit $m0
; GCN-NEXT: renamable $vgpr0 = V_MOV_B32_e32 3, implicit $exec, implicit $m0
; GCN-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
- ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
+ ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.1, $sgpr32, 0, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
+ ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
; GCN-NEXT: S_ENDPGM 0
$m0 = IMPLICIT_DEF
@@ -110,10 +110,10 @@ body: |
; GCN-NEXT: renamable $vgpr1 = V_MOV_B32_dpp undef $vgpr1, undef $vgpr0, 1, 15, 15, 1, implicit $exec
; GCN-NEXT: renamable $vgpr0 = V_MOV_B32_dpp undef $vgpr0, undef $vgpr0, 1, 15, 15, 1, implicit $exec
; GCN-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
- ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
+ ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.1, $sgpr32, 0, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
+ ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
; GCN-NEXT: S_ENDPGM 0
%1:vgpr_32 = V_MOV_B32_dpp undef %1:vgpr_32, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec
@@ -221,10 +221,10 @@ body: |
; GCN-NEXT: renamable $vgpr1 = V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode
; GCN-NEXT: renamable $vgpr0 = V_CVT_I32_F64_e32 3, implicit $exec, implicit $mode
; GCN-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
- ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
+ ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.1, $sgpr32, 0, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
+ ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
; GCN-NEXT: S_ENDPGM 0
%0:vgpr_32 = V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode
@@ -250,10 +250,10 @@ body: |
; GCN-NEXT: renamable $vgpr1 = nofpexcept V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode
; GCN-NEXT: renamable $vgpr0 = nofpexcept V_CVT_I32_F64_e32 3, implicit $exec, implicit $mode
; GCN-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
- ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
+ ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.1, $sgpr32, 0, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
+ ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
; GCN-NEXT: S_ENDPGM 0
$mode = IMPLICIT_DEF
@@ -322,10 +322,10 @@ body: |
; GCN-NEXT: renamable $vgpr1 = V_CVT_I32_F64_dpp undef $vgpr1, 0, undef $vgpr0_vgpr1, 336, 0, 0, 0, implicit $exec, implicit $mode
; GCN-NEXT: renamable $vgpr0 = V_CVT_I32_F64_dpp undef $vgpr0, 0, undef $vgpr0_vgpr1, 336, 0, 0, 0, implicit $exec, implicit $mode
; GCN-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
- ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
+ ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.1, $sgpr32, 0, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
+ ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
; GCN-NEXT: S_ENDPGM 0
%1:vgpr_32 = V_CVT_I32_F64_dpp undef %1:vgpr_32, 0, undef %0:vreg_64_align2, 336, 0, 0, 0, implicit $exec, implicit $mode
@@ -349,10 +349,10 @@ body: |
; GCN-NEXT: renamable $vgpr1 = V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode, implicit-def $m0
; GCN-NEXT: renamable $vgpr0 = V_CVT_I32_F64_e32 3, implicit $exec, implicit $mode, implicit-def $m0
; GCN-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
- ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
+ ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.1, $sgpr32, 0, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
+ ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
; GCN-NEXT: S_ENDPGM 0
%0:vgpr_32 = V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode, implicit-def $m0
@@ -376,10 +376,10 @@ body: |
; GCN-NEXT: renamable $vgpr1 = V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode, implicit $m0
; GCN-NEXT: renamable $vgpr0 = V_CVT_I32_F64_e32 3, implicit $exec, implicit $mode, implicit $m0
; GCN-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
- ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
+ ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.1, $sgpr32, 0, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
+ ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
; GCN-NEXT: S_ENDPGM 0
%0:vgpr_32 = V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode, implicit $m0
@@ -552,10 +552,10 @@ body: |
; GCN-NEXT: renamable $vgpr1 = V_CVT_F32_I32_sdwa 0, undef $vgpr0, 0, 0, 0, 2, 0, implicit $exec, implicit $mode, implicit undef $vgpr1(tied-def 0)
; GCN-NEXT: renamable $vgpr0 = V_CVT_F32_I32_sdwa 0, undef $vgpr0, 0, 0, 0, 2, 0, implicit $exec, implicit $mode, implicit undef $vgpr0(tied-def 0)
; GCN-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
- ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
+ ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.1, $sgpr32, 0, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
+ ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
; GCN-NEXT: S_ENDPGM 0
%1:vgpr_32 = V_CVT_F32_I32_sdwa 0, undef %0:vgpr_32, 0, 0, 0, 2, 0, implicit $exec, implicit $mode, implicit undef %1:vgpr_32(tied-def 0)
@@ -1377,10 +1377,10 @@ body: |
; GCN-NEXT: renamable $vgpr1 = V_CNDMASK_B32_e32 1, undef $vgpr0, implicit $exec, implicit undef $vcc
; GCN-NEXT: renamable $vgpr0 = V_CNDMASK_B32_e32 1, undef $vgpr0, implicit $exec, implicit undef $vcc
; GCN-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
- ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
+ ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.1, $sgpr32, 0, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
+ ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
; GCN-NEXT: S_ENDPGM 0
%1:vgpr_32 = V_CNDMASK_B32_e32 1, undef %0:vgpr_32, implicit $exec, implicit undef $vcc
@@ -1404,10 +1404,10 @@ body: |
; GCN-NEXT: renamable $vgpr1 = V_CNDMASK_B32_sdwa 0, undef $vgpr0, 0, undef $vgpr0, 0, 0, 0, 0, 0, implicit $exec, implicit undef $vcc
; GCN-NEXT: renamable $vgpr0 = V_CNDMASK_B32_sdwa 0, undef $vgpr0, 0, undef $vgpr0, 0, 0, 0, 0, 0, implicit $exec, implicit undef $vcc
; GCN-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
- ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
+ ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.1, $sgpr32, 0, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
+ ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
; GCN-NEXT: S_ENDPGM 0
%1:vgpr_32 = V_CNDMASK_B32_sdwa 0, undef %0:vgpr_32, 0, undef %0:vgpr_32, 0, 0, 0, 0, 0, implicit $exec, implicit undef $vcc
@@ -1431,10 +1431,10 @@ body: |
; GCN-NEXT: renamable $vgpr1 = V_CNDMASK_B32_dpp undef $vgpr1, 0, undef $vgpr0, 0, undef $vgpr0, 1, 15, 15, 10, implicit $exec, implicit undef $vcc
; GCN-NEXT: renamable $vgpr0 = V_CNDMASK_B32_dpp undef $vgpr0, 0, undef $vgpr0, 0, undef $vgpr0, 1, 15, 15, 10, implicit $exec, implicit undef $vcc
; GCN-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
- ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
+ ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.1, $sgpr32, 0, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
+ ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
; GCN-NEXT: S_ENDPGM 0
%1:vgpr_32 = V_CNDMASK_B32_dpp undef %1:vgpr_32, 0, undef %0:vgpr_32, 0, undef %0:vgpr_32, 1, 15, 15, 10, implicit $exec, implicit undef $vcc
@@ -1563,10 +1563,10 @@ body: |
; GCN-NEXT: renamable $vgpr1 = nofpexcept V_ADD_F32_dpp undef $vgpr1, 0, undef $vgpr0, 0, undef $vgpr0, 1, 15, 15, 10, implicit $exec, implicit $mode
; GCN-NEXT: renamable $vgpr0 = nofpexcept V_ADD_F32_dpp undef $vgpr0, 0, undef $vgpr0, 0, undef $vgpr0, 1, 15, 15, 10, implicit $exec, implicit $mode
; GCN-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
- ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
+ ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.1, $sgpr32, 0, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
+ ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
; GCN-NEXT: S_ENDPGM 0
%1:vgpr_32 = nofpexcept V_ADD_F32_dpp undef %1:vgpr_32, 0, undef %0:vgpr_32, 0, undef %0:vgpr_32, 1, 15, 15, 10, implicit $exec, implicit $mode
@@ -5070,11 +5070,11 @@ body: |
; GCN-NEXT: SI_SPILL_V32_SAVE killed $vgpr1, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
; GCN-NEXT: renamable $vgpr1 = V_PK_ADD_F16 10, $vgpr0, 10, $vgpr0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
; GCN-NEXT: SI_SPILL_V32_SAVE killed $vgpr1, %stack.2, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
- ; GCN-NEXT: renamable $vgpr1 = SI_SPILL_V32_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
+ ; GCN-NEXT: renamable $vgpr1 = SI_SPILL_V32_RESTORE %stack.1, $sgpr32, 0, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr1 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
+ ; GCN-NEXT: renamable $vgpr1 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr1 = SI_SPILL_V32_RESTORE %stack.2, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
+ ; GCN-NEXT: renamable $vgpr1 = SI_SPILL_V32_RESTORE %stack.2, $sgpr32, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0
%0:vgpr_32 = COPY $vgpr0
@@ -5291,11 +5291,11 @@ body: |
; GCN-NEXT: SI_SPILL_V64_SAVE killed $vgpr2_vgpr3, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, align 4, addrspace 5)
; GCN-NEXT: renamable $vgpr2_vgpr3 = V_PK_FMA_F32 8, $vgpr0_vgpr1, 8, $vgpr0_vgpr1, 11, $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
; GCN-NEXT: SI_SPILL_V64_SAVE killed $vgpr2_vgpr3, %stack.2, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.2, align 4, addrspace 5)
- ; GCN-NEXT: renamable $vgpr2_vgpr3 = SI_SPILL_V64_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s64) from %stack.1, align 4, addrspace 5)
+ ; GCN-NEXT: renamable $vgpr2_vgpr3 = SI_SPILL_V64_RESTORE %stack.1, $sgpr32, 0, 0, implicit $exec :: (load (s64) from %stack.1, align 4, addrspace 5)
; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr2_vgpr3
- ; GCN-NEXT: renamable $vgpr2_vgpr3 = SI_SPILL_V64_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s64) from %stack.0, align 4, addrspace 5)
+ ; GCN-NEXT: renamable $vgpr2_vgpr3 = SI_SPILL_V64_RESTORE %stack.0, $sgpr32, 0, 0, implicit $exec :: (load (s64) from %stack.0, align 4, addrspace 5)
; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr2_vgpr3
- ; GCN-NEXT: renamable $vgpr2_vgpr3 = SI_SPILL_V64_RESTORE %stack.2, $sgpr32, 0, implicit $exec :: (load (s64) from %stack.2, align 4, addrspace 5)
+ ; GCN-NEXT: renamable $vgpr2_vgpr3 = SI_SPILL_V64_RESTORE %stack.2, $sgpr32, 0, 0, implicit $exec :: (load (s64) from %stack.2, align 4, addrspace 5)
; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr2_vgpr3
; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0_vgpr1
%0:vreg_64_align2 = COPY $vgpr0_vgpr1
diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-spill-dead-frame-in-dbg-value.mir b/llvm/test/CodeGen/AMDGPU/sgpr-spill-dead-frame-in-dbg-value.mir
index a7c841d54f7c97..bc88acbd9f6503 100644
--- a/llvm/test/CodeGen/AMDGPU/sgpr-spill-dead-frame-in-dbg-value.mir
+++ b/llvm/test/CodeGen/AMDGPU/sgpr-spill-dead-frame-in-dbg-value.mir
@@ -73,5 +73,5 @@ body: |
DBG_VALUE %stack.0, 0, !1, !8, debug-location !9
bb.1:
- renamable $sgpr10 = SI_SPILL_S32_RESTORE %stack.0, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32
+ renamable $sgpr10 = SI_SPILL_S32_RESTORE %stack.0, 0, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32
S_ENDPGM 0
diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-spill-fi-skip-processing-stack-arg-dbg-value.mir b/llvm/test/CodeGen/AMDGPU/sgpr-spill-fi-skip-processing-stack-arg-dbg-value.mir
index a6cb7d4af7641c..0b3b51bd4dd906 100644
--- a/llvm/test/CodeGen/AMDGPU/sgpr-spill-fi-skip-processing-stack-arg-dbg-value.mir
+++ b/llvm/test/CodeGen/AMDGPU/sgpr-spill-fi-skip-processing-stack-arg-dbg-value.mir
@@ -52,5 +52,5 @@ body: |
DBG_VALUE %fixed-stack.0, 0, !1, !8, debug-location !9
bb.1:
- renamable $sgpr10 = SI_SPILL_S32_RESTORE %stack.0, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32
+ renamable $sgpr10 = SI_SPILL_S32_RESTORE %stack.0, 0, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32
S_ENDPGM 0
diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-spill-overlap-wwm-reserve.mir b/llvm/test/CodeGen/AMDGPU/sgpr-spill-overlap-wwm-reserve.mir
index 1473e667f894cd..e4566e724f2bfd 100644
--- a/llvm/test/CodeGen/AMDGPU/sgpr-spill-overlap-wwm-reserve.mir
+++ b/llvm/test/CodeGen/AMDGPU/sgpr-spill-overlap-wwm-reserve.mir
@@ -220,7 +220,7 @@ body: |
KILL implicit-def $vcc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31, implicit-def $sgpr32_sgpr33_sgpr34_sgpr35_sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63, implicit-def $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95, implicit-def $sgpr96_sgpr97_sgpr98_sgpr99_sgpr100_sgpr101_sgpr102_sgpr103
bb.2:
- renamable $sgpr22 = SI_SPILL_S32_RESTORE %stack.0, implicit $exec, implicit $sgpr32 :: (load (s32) from %stack.0, addrspace 5)
+ renamable $sgpr22 = SI_SPILL_S32_RESTORE %stack.0, 0, implicit $exec, implicit $sgpr32 :: (load (s32) from %stack.0, addrspace 5)
bb.3:
S_ENDPGM 0
diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-spill-to-vmem-scc-clobber.mir b/llvm/test/CodeGen/AMDGPU/sgpr-spill-to-vmem-scc-clobber.mir
index 2c4b7a22facf43..d3fb516e409f7e 100644
--- a/llvm/test/CodeGen/AMDGPU/sgpr-spill-to-vmem-scc-clobber.mir
+++ b/llvm/test/CodeGen/AMDGPU/sgpr-spill-to-vmem-scc-clobber.mir
@@ -143,7 +143,7 @@ body: |
bb.0:
liveins:
S_CMP_EQ_U32 0, 0, implicit-def $scc
- $sgpr8 = SI_SPILL_S32_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s32) from %stack.1, align 4, addrspace 5)
+ $sgpr8 = SI_SPILL_S32_RESTORE %stack.1, 0, implicit $exec, implicit $sgpr32 :: (load (s32) from %stack.1, align 4, addrspace 5)
S_CBRANCH_SCC1 %bb.2, implicit $scc
bb.1:
@@ -192,7 +192,7 @@ body: |
bb.0:
liveins:
S_CMP_EQ_U32 0, 0, implicit-def $scc
- $sgpr8_sgpr9 = SI_SPILL_S64_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s64) from %stack.1, align 4, addrspace 5)
+ $sgpr8_sgpr9 = SI_SPILL_S64_RESTORE %stack.1, 0, implicit $exec, implicit $sgpr32 :: (load (s64) from %stack.1, align 4, addrspace 5)
S_CBRANCH_SCC1 %bb.2, implicit $scc
bb.1:
@@ -299,7 +299,7 @@ body: |
bb.0:
liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
S_CMP_EQ_U32 0, 0, implicit-def $scc
- $sgpr8 = SI_SPILL_S32_RESTORE %stack.1, implicit $exec, implicit $sgpr32:: (load (s32) from %stack.1, align 4, addrspace 5)
+ $sgpr8 = SI_SPILL_S32_RESTORE %stack.1, 0, implicit $exec, implicit $sgpr32:: (load (s32) from %stack.1, align 4, addrspace 5)
S_CBRANCH_SCC1 %bb.2, implicit $scc
bb.1:
@@ -410,7 +410,7 @@ body: |
bb.0:
liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
S_CMP_EQ_U32 0, 0, implicit-def $scc
- $sgpr8_sgpr9 = SI_SPILL_S64_RESTORE %stack.1, implicit $exec, implicit $sgpr32:: (load (s64) from %stack.1, align 4, addrspace 5)
+ $sgpr8_sgpr9 = SI_SPILL_S64_RESTORE %stack.1, 0, implicit $exec, implicit $sgpr32:: (load (s64) from %stack.1, align 4, addrspace 5)
S_CBRANCH_SCC1 %bb.2, implicit $scc
bb.1:
@@ -543,8 +543,8 @@ body: |
bb.0:
liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
S_CMP_EQ_U32 0, 0, implicit-def $scc
- $sgpr8 = SI_SPILL_S32_RESTORE %stack.1, implicit $exec, implicit $sgpr32:: (load (s32) from %stack.1, align 4, addrspace 5)
- $sgpr9 = SI_SPILL_S32_RESTORE %stack.2, implicit $exec, implicit $sgpr32:: (load (s32) from %stack.2, align 4, addrspace 5)
+ $sgpr8 = SI_SPILL_S32_RESTORE %stack.1, 0, implicit $exec, implicit $sgpr32:: (load (s32) from %stack.1, align 4, addrspace 5)
+ $sgpr9 = SI_SPILL_S32_RESTORE %stack.2, 0, implicit $exec, implicit $sgpr32:: (load (s32) from %stack.2, align 4, addrspace 5)
S_CBRANCH_SCC1 %bb.2, implicit $scc
bb.1:
diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-spill-vmem-large-frame.mir b/llvm/test/CodeGen/AMDGPU/sgpr-spill-vmem-large-frame.mir
index cac9c85130a7b4..0790b03db0af9f 100644
--- a/llvm/test/CodeGen/AMDGPU/sgpr-spill-vmem-large-frame.mir
+++ b/llvm/test/CodeGen/AMDGPU/sgpr-spill-vmem-large-frame.mir
@@ -43,6 +43,6 @@ body: |
; CHECK-NEXT: S_SETPC_B64 $sgpr30_sgpr31, implicit $scc
S_CMP_EQ_U32 0, 0, implicit-def $scc
SI_SPILL_S32_SAVE killed $sgpr10, %stack.0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32
- renamable $sgpr10 = SI_SPILL_S32_RESTORE %stack.0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32
+ renamable $sgpr10 = SI_SPILL_S32_RESTORE %stack.0, 0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32
S_SETPC_B64 $sgpr30_sgpr31, implicit $scc
...
diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-spill-wrong-stack-id.mir b/llvm/test/CodeGen/AMDGPU/sgpr-spill-wrong-stack-id.mir
index efbdbca9da6b7f..6e5248839a0b0c 100644
--- a/llvm/test/CodeGen/AMDGPU/sgpr-spill-wrong-stack-id.mir
+++ b/llvm/test/CodeGen/AMDGPU/sgpr-spill-wrong-stack-id.mir
@@ -39,13 +39,13 @@
# SHARE: SI_SPILL_S32_SAVE $sgpr32, %stack.2, implicit $exec, implicit $sgpr32 :: (store (s32) into %stack.2, addrspace 5)
# SHARE: SI_SPILL_V32_SAVE killed $vgpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
# SHARE: SI_SPILL_S64_SAVE killed renamable $sgpr4_sgpr5, %stack.1, implicit $exec, implicit $sgpr32 :: (store (s64) into %stack.1, align 4, addrspace 5)
-# SHARE: renamable $sgpr4_sgpr5 = SI_SPILL_S64_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s64) from %stack.1, align 4, addrspace 5)
+# SHARE: renamable $sgpr4_sgpr5 = SI_SPILL_S64_RESTORE %stack.1, 0, implicit $exec, implicit $sgpr32 :: (load (s64) from %stack.1, align 4, addrspace 5)
# SHARE: dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr4_sgpr5, @func, csr_amdgpu, implicit undef $vgpr0
-# SHARE: $sgpr32 = SI_SPILL_S32_RESTORE %stack.2, implicit $exec, implicit $sgpr32 :: (load (s32) from %stack.2, addrspace 5)
-# SHARE: $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
-# SHARE: renamable $sgpr4_sgpr5 = SI_SPILL_S64_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s64) from %stack.1, align 4, addrspace 5)
+# SHARE: $sgpr32 = SI_SPILL_S32_RESTORE %stack.2, 0, implicit $exec, implicit $sgpr32 :: (load (s32) from %stack.2, addrspace 5)
+# SHARE: $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
+# SHARE: renamable $sgpr4_sgpr5 = SI_SPILL_S64_RESTORE %stack.1, 0, implicit $exec, implicit $sgpr32 :: (load (s64) from %stack.1, align 4, addrspace 5)
# SHARE: dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr4_sgpr5, @func, csr_amdgpu, implicit $vgpr0
-# SHARE: $sgpr32 = SI_SPILL_S32_RESTORE %stack.2, implicit $exec, implicit $sgpr32 :: (load (s32) from %stack.2, addrspace 5)
+# SHARE: $sgpr32 = SI_SPILL_S32_RESTORE %stack.2, 0, implicit $exec, implicit $sgpr32 :: (load (s32) from %stack.2, addrspace 5)
# NOSHARE: stack:
# NOSHARE: - { id: 0, name: '', type: spill-slot, offset: 0, size: 4, alignment: 4,
@@ -64,14 +64,14 @@
# NOSHARE: SI_SPILL_S32_SAVE $sgpr32, %stack.2, implicit $exec, implicit $sgpr32 :: (store (s32) into %stack.2, addrspace 5)
# NOSHARE: SI_SPILL_V32_SAVE killed $vgpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
# NOSHARE: SI_SPILL_S64_SAVE killed renamable $sgpr4_sgpr5, %stack.1, implicit $exec, implicit $sgpr32 :: (store (s64) into %stack.1, align 4, addrspace 5)
-# NOSHARE: renamable $sgpr4_sgpr5 = SI_SPILL_S64_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s64) from %stack.1, align 4, addrspace 5)
+# NOSHARE: renamable $sgpr4_sgpr5 = SI_SPILL_S64_RESTORE %stack.1, 0, implicit $exec, implicit $sgpr32 :: (load (s64) from %stack.1, align 4, addrspace 5)
# NOSHARE: dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr4_sgpr5, @func, csr_amdgpu, implicit undef $vgpr0
-# NOSHARE: $sgpr32 = SI_SPILL_S32_RESTORE %stack.2, implicit $exec, implicit $sgpr32 :: (load (s32) from %stack.2, addrspace 5)
+# NOSHARE: $sgpr32 = SI_SPILL_S32_RESTORE %stack.2, 0, implicit $exec, implicit $sgpr32 :: (load (s32) from %stack.2, addrspace 5)
# NOSHARE: SI_SPILL_S32_SAVE $sgpr32, %stack.3, implicit $exec, implicit $sgpr32 :: (store (s32) into %stack.3, addrspace 5)
-# NOSHARE: $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
-# NOSHARE: renamable $sgpr4_sgpr5 = SI_SPILL_S64_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s64) from %stack.1, align 4, addrspace 5)
+# NOSHARE: $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
+# NOSHARE: renamable $sgpr4_sgpr5 = SI_SPILL_S64_RESTORE %stack.1, 0, implicit $exec, implicit $sgpr32 :: (load (s64) from %stack.1, align 4, addrspace 5)
# NOSHARE: dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr4_sgpr5, @func, csr_amdgpu, implicit $vgpr0
-# NOSHARE: $sgpr32 = SI_SPILL_S32_RESTORE %stack.3, implicit $exec, implicit $sgpr32 :: (load (s32) from %stack.3, addrspace 5)
+# NOSHARE: $sgpr32 = SI_SPILL_S32_RESTORE %stack.3, 0, implicit $exec, implicit $sgpr32 :: (load (s32) from %stack.3, addrspace 5)
...
diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-spill.mir b/llvm/test/CodeGen/AMDGPU/sgpr-spill.mir
index 059eb6dad2c313..9f46b80d206a6d 100644
--- a/llvm/test/CodeGen/AMDGPU/sgpr-spill.mir
+++ b/llvm/test/CodeGen/AMDGPU/sgpr-spill.mir
@@ -1023,21 +1023,21 @@ body: |
; GCN64-FLATSCR-NEXT: $sgpr12 = SI_RESTORE_S32_FROM_VGPR killed $vgpr0, 0
; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5)
; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
- renamable $sgpr12 = SI_SPILL_S32_RESTORE %stack.0, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32
+ renamable $sgpr12 = SI_SPILL_S32_RESTORE %stack.0, 0, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32
- renamable $sgpr12_sgpr13 = SI_SPILL_S64_RESTORE %stack.1, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32
+ renamable $sgpr12_sgpr13 = SI_SPILL_S64_RESTORE %stack.1, 0, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32
- renamable $sgpr12_sgpr13_sgpr14 = SI_SPILL_S96_RESTORE %stack.2, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32
+ renamable $sgpr12_sgpr13_sgpr14 = SI_SPILL_S96_RESTORE %stack.2, 0, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32
- renamable $sgpr12_sgpr13_sgpr14_sgpr15 = SI_SPILL_S128_RESTORE %stack.3, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32
+ renamable $sgpr12_sgpr13_sgpr14_sgpr15 = SI_SPILL_S128_RESTORE %stack.3, 0, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32
- renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 = SI_SPILL_S160_RESTORE %stack.4, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32
+ renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 = SI_SPILL_S160_RESTORE %stack.4, 0, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32
- renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 = SI_SPILL_S256_RESTORE %stack.5, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32
+ renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 = SI_SPILL_S256_RESTORE %stack.5, 0, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32
- renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 = SI_SPILL_S512_RESTORE %stack.6, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32
+ renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 = SI_SPILL_S512_RESTORE %stack.6, 0, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32
- renamable $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 = SI_SPILL_S1024_RESTORE %stack.7, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32
+ renamable $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 = SI_SPILL_S1024_RESTORE %stack.7, 0, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32
- renamable $sgpr12 = SI_SPILL_S32_RESTORE %stack.8, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32
+ renamable $sgpr12 = SI_SPILL_S32_RESTORE %stack.8, 0, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32
...
diff --git a/llvm/test/CodeGen/AMDGPU/spill-agpr.mir b/llvm/test/CodeGen/AMDGPU/spill-agpr.mir
index a0fa4efdd1583b..9674f8f3f05060 100644
--- a/llvm/test/CodeGen/AMDGPU/spill-agpr.mir
+++ b/llvm/test/CodeGen/AMDGPU/spill-agpr.mir
@@ -27,8 +27,8 @@ body: |
; GFX908-SPILLED-NEXT: S_NOP 1
; GFX908-SPILLED-NEXT: {{ $}}
; GFX908-SPILLED-NEXT: bb.2:
- ; GFX908-SPILLED-NEXT: $agpr0 = SI_SPILL_A32_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
- ; GFX908-SPILLED-NEXT: $agpr1 = SI_SPILL_A32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
+ ; GFX908-SPILLED-NEXT: $agpr0 = SI_SPILL_A32_RESTORE %stack.1, $sgpr32, 0, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
+ ; GFX908-SPILLED-NEXT: $agpr1 = SI_SPILL_A32_RESTORE %stack.0, $sgpr32, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
; GFX908-SPILLED-NEXT: S_NOP 0, implicit killed renamable $agpr0, implicit killed renamable $agpr1
; GFX908-EXPANDED-LABEL: name: spill_restore_agpr32
; GFX908-EXPANDED: bb.0:
@@ -69,8 +69,8 @@ body: |
; GFX90A-SPILLED-NEXT: S_NOP 1
; GFX90A-SPILLED-NEXT: {{ $}}
; GFX90A-SPILLED-NEXT: bb.2:
- ; GFX90A-SPILLED-NEXT: $agpr0 = SI_SPILL_A32_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
- ; GFX90A-SPILLED-NEXT: $agpr1 = SI_SPILL_A32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
+ ; GFX90A-SPILLED-NEXT: $agpr0 = SI_SPILL_A32_RESTORE %stack.1, $sgpr32, 0, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
+ ; GFX90A-SPILLED-NEXT: $agpr1 = SI_SPILL_A32_RESTORE %stack.0, $sgpr32, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
; GFX90A-SPILLED-NEXT: S_NOP 0, implicit killed renamable $agpr0, implicit killed renamable $agpr1
; GFX90A-EXPANDED-LABEL: name: spill_restore_agpr32
; GFX90A-EXPANDED: bb.0:
@@ -128,7 +128,7 @@ body: |
; GFX908-SPILLED-NEXT: S_NOP 1
; GFX908-SPILLED-NEXT: {{ $}}
; GFX908-SPILLED-NEXT: bb.2:
- ; GFX908-SPILLED-NEXT: $agpr0_agpr1 = SI_SPILL_A64_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s64) from %stack.0, align 4, addrspace 5)
+ ; GFX908-SPILLED-NEXT: $agpr0_agpr1 = SI_SPILL_A64_RESTORE %stack.0, $sgpr32, 0, 0, implicit $exec :: (load (s64) from %stack.0, align 4, addrspace 5)
; GFX908-SPILLED-NEXT: S_NOP 0, implicit killed renamable $agpr0_agpr1
; GFX908-EXPANDED-LABEL: name: spill_restore_agpr64
; GFX908-EXPANDED: bb.0:
@@ -166,7 +166,7 @@ body: |
; GFX90A-SPILLED-NEXT: S_NOP 1
; GFX90A-SPILLED-NEXT: {{ $}}
; GFX90A-SPILLED-NEXT: bb.2:
- ; GFX90A-SPILLED-NEXT: $agpr0_agpr1 = SI_SPILL_A64_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s64) from %stack.0, align 4, addrspace 5)
+ ; GFX90A-SPILLED-NEXT: $agpr0_agpr1 = SI_SPILL_A64_RESTORE %stack.0, $sgpr32, 0, 0, implicit $exec :: (load (s64) from %stack.0, align 4, addrspace 5)
; GFX90A-SPILLED-NEXT: S_NOP 0, implicit killed renamable $agpr0_agpr1
; GFX90A-EXPANDED-LABEL: name: spill_restore_agpr64
; GFX90A-EXPANDED: bb.0:
@@ -222,7 +222,7 @@ body: |
; GFX908-SPILLED-NEXT: {{ $}}
; GFX908-SPILLED-NEXT: {{ $}}
; GFX908-SPILLED-NEXT: bb.2:
- ; GFX908-SPILLED-NEXT: $agpr0 = SI_SPILL_A32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
+ ; GFX908-SPILLED-NEXT: $agpr0 = SI_SPILL_A32_RESTORE %stack.0, $sgpr32, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
; GFX908-SPILLED-NEXT: S_NOP 0, implicit undef $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31
; GFX908-SPILLED-NEXT: S_NOP 0, implicit undef $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47
; GFX908-SPILLED-NEXT: S_NOP 0, implicit undef $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
@@ -288,7 +288,7 @@ body: |
; GFX90A-SPILLED-NEXT: {{ $}}
; GFX90A-SPILLED-NEXT: {{ $}}
; GFX90A-SPILLED-NEXT: bb.2:
- ; GFX90A-SPILLED-NEXT: $agpr0 = SI_SPILL_A32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
+ ; GFX90A-SPILLED-NEXT: $agpr0 = SI_SPILL_A32_RESTORE %stack.0, $sgpr32, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
; GFX90A-SPILLED-NEXT: S_NOP 0, implicit undef $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31
; GFX90A-SPILLED-NEXT: S_NOP 0, implicit undef $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47
; GFX90A-SPILLED-NEXT: S_NOP 0, implicit undef $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
@@ -387,7 +387,7 @@ body: |
; GFX908-SPILLED-NEXT: S_NOP 1
; GFX908-SPILLED-NEXT: {{ $}}
; GFX908-SPILLED-NEXT: bb.2:
- ; GFX908-SPILLED-NEXT: $agpr0_agpr1_agpr2 = SI_SPILL_A96_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s96) from %stack.0, align 4, addrspace 5)
+ ; GFX908-SPILLED-NEXT: $agpr0_agpr1_agpr2 = SI_SPILL_A96_RESTORE %stack.0, $sgpr32, 0, 0, implicit $exec :: (load (s96) from %stack.0, align 4, addrspace 5)
; GFX908-SPILLED-NEXT: S_NOP 0, implicit killed renamable $agpr0_agpr1_agpr2
; GFX908-EXPANDED-LABEL: name: spill_restore_agpr96
; GFX908-EXPANDED: bb.0:
@@ -427,7 +427,7 @@ body: |
; GFX90A-SPILLED-NEXT: S_NOP 1
; GFX90A-SPILLED-NEXT: {{ $}}
; GFX90A-SPILLED-NEXT: bb.2:
- ; GFX90A-SPILLED-NEXT: $agpr0_agpr1_agpr2 = SI_SPILL_A96_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s96) from %stack.0, align 4, addrspace 5)
+ ; GFX90A-SPILLED-NEXT: $agpr0_agpr1_agpr2 = SI_SPILL_A96_RESTORE %stack.0, $sgpr32, 0, 0, implicit $exec :: (load (s96) from %stack.0, align 4, addrspace 5)
; GFX90A-SPILLED-NEXT: S_NOP 0, implicit killed renamable $agpr0_agpr1_agpr2
; GFX90A-EXPANDED-LABEL: name: spill_restore_agpr96
; GFX90A-EXPANDED: bb.0:
@@ -485,7 +485,7 @@ body: |
; GFX908-SPILLED-NEXT: S_NOP 1
; GFX908-SPILLED-NEXT: {{ $}}
; GFX908-SPILLED-NEXT: bb.2:
- ; GFX908-SPILLED-NEXT: $agpr0_agpr1_agpr2_agpr3 = SI_SPILL_A128_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.0, align 4, addrspace 5)
+ ; GFX908-SPILLED-NEXT: $agpr0_agpr1_agpr2_agpr3 = SI_SPILL_A128_RESTORE %stack.0, $sgpr32, 0, 0, implicit $exec :: (load (s128) from %stack.0, align 4, addrspace 5)
; GFX908-SPILLED-NEXT: S_NOP 0, implicit killed renamable $agpr0_agpr1_agpr2_agpr3
; GFX908-EXPANDED-LABEL: name: spill_restore_agpr128
; GFX908-EXPANDED: bb.0:
@@ -527,7 +527,7 @@ body: |
; GFX90A-SPILLED-NEXT: S_NOP 1
; GFX90A-SPILLED-NEXT: {{ $}}
; GFX90A-SPILLED-NEXT: bb.2:
- ; GFX90A-SPILLED-NEXT: $agpr0_agpr1_agpr2_agpr3 = SI_SPILL_A128_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.0, align 4, addrspace 5)
+ ; GFX90A-SPILLED-NEXT: $agpr0_agpr1_agpr2_agpr3 = SI_SPILL_A128_RESTORE %stack.0, $sgpr32, 0, 0, implicit $exec :: (load (s128) from %stack.0, align 4, addrspace 5)
; GFX90A-SPILLED-NEXT: S_NOP 0, implicit killed renamable $agpr0_agpr1_agpr2_agpr3
; GFX90A-EXPANDED-LABEL: name: spill_restore_agpr128
; GFX90A-EXPANDED: bb.0:
@@ -587,7 +587,7 @@ body: |
; GFX908-SPILLED-NEXT: S_NOP 1
; GFX908-SPILLED-NEXT: {{ $}}
; GFX908-SPILLED-NEXT: bb.2:
- ; GFX908-SPILLED-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4 = SI_SPILL_A160_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s160) from %stack.0, align 4, addrspace 5)
+ ; GFX908-SPILLED-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4 = SI_SPILL_A160_RESTORE %stack.0, $sgpr32, 0, 0, implicit $exec :: (load (s160) from %stack.0, align 4, addrspace 5)
; GFX908-SPILLED-NEXT: S_NOP 0, implicit killed renamable $agpr0_agpr1_agpr2_agpr3_agpr4
; GFX908-EXPANDED-LABEL: name: spill_restore_agpr160
; GFX908-EXPANDED: bb.0:
@@ -631,7 +631,7 @@ body: |
; GFX90A-SPILLED-NEXT: S_NOP 1
; GFX90A-SPILLED-NEXT: {{ $}}
; GFX90A-SPILLED-NEXT: bb.2:
- ; GFX90A-SPILLED-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4 = SI_SPILL_A160_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s160) from %stack.0, align 4, addrspace 5)
+ ; GFX90A-SPILLED-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4 = SI_SPILL_A160_RESTORE %stack.0, $sgpr32, 0, 0, implicit $exec :: (load (s160) from %stack.0, align 4, addrspace 5)
; GFX90A-SPILLED-NEXT: S_NOP 0, implicit killed renamable $agpr0_agpr1_agpr2_agpr3_agpr4
; GFX90A-EXPANDED-LABEL: name: spill_restore_agpr160
; GFX90A-EXPANDED: bb.0:
@@ -693,7 +693,7 @@ body: |
; GFX908-SPILLED-NEXT: S_NOP 1
; GFX908-SPILLED-NEXT: {{ $}}
; GFX908-SPILLED-NEXT: bb.2:
- ; GFX908-SPILLED-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 = SI_SPILL_A192_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s192) from %stack.0, align 4, addrspace 5)
+ ; GFX908-SPILLED-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 = SI_SPILL_A192_RESTORE %stack.0, $sgpr32, 0, 0, implicit $exec :: (load (s192) from %stack.0, align 4, addrspace 5)
; GFX908-SPILLED-NEXT: S_NOP 0, implicit killed renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5
; GFX908-EXPANDED-LABEL: name: spill_restore_agpr192
; GFX908-EXPANDED: bb.0:
@@ -739,7 +739,7 @@ body: |
; GFX90A-SPILLED-NEXT: S_NOP 1
; GFX90A-SPILLED-NEXT: {{ $}}
; GFX90A-SPILLED-NEXT: bb.2:
- ; GFX90A-SPILLED-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 = SI_SPILL_A192_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s192) from %stack.0, align 4, addrspace 5)
+ ; GFX90A-SPILLED-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 = SI_SPILL_A192_RESTORE %stack.0, $sgpr32, 0, 0, implicit $exec :: (load (s192) from %stack.0, align 4, addrspace 5)
; GFX90A-SPILLED-NEXT: S_NOP 0, implicit killed renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5
; GFX90A-EXPANDED-LABEL: name: spill_restore_agpr192
; GFX90A-EXPANDED: bb.0:
@@ -803,7 +803,7 @@ body: |
; GFX908-SPILLED-NEXT: S_NOP 1
; GFX908-SPILLED-NEXT: {{ $}}
; GFX908-SPILLED-NEXT: bb.2:
- ; GFX908-SPILLED-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 = SI_SPILL_A256_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s256) from %stack.0, align 4, addrspace 5)
+ ; GFX908-SPILLED-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 = SI_SPILL_A256_RESTORE %stack.0, $sgpr32, 0, 0, implicit $exec :: (load (s256) from %stack.0, align 4, addrspace 5)
; GFX908-SPILLED-NEXT: S_NOP 0, implicit killed renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7
; GFX908-EXPANDED-LABEL: name: spill_restore_agpr256
; GFX908-EXPANDED: bb.0:
@@ -853,7 +853,7 @@ body: |
; GFX90A-SPILLED-NEXT: S_NOP 1
; GFX90A-SPILLED-NEXT: {{ $}}
; GFX90A-SPILLED-NEXT: bb.2:
- ; GFX90A-SPILLED-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 = SI_SPILL_A256_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s256) from %stack.0, align 4, addrspace 5)
+ ; GFX90A-SPILLED-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 = SI_SPILL_A256_RESTORE %stack.0, $sgpr32, 0, 0, implicit $exec :: (load (s256) from %stack.0, align 4, addrspace 5)
; GFX90A-SPILLED-NEXT: S_NOP 0, implicit killed renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7
; GFX90A-EXPANDED-LABEL: name: spill_restore_agpr256
; GFX90A-EXPANDED: bb.0:
@@ -921,7 +921,7 @@ body: |
; GFX908-SPILLED-NEXT: S_NOP 1
; GFX908-SPILLED-NEXT: {{ $}}
; GFX908-SPILLED-NEXT: bb.2:
- ; GFX908-SPILLED-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8 = SI_SPILL_A288_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s288) from %stack.0, align 4, addrspace 5)
+ ; GFX908-SPILLED-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8 = SI_SPILL_A288_RESTORE %stack.0, $sgpr32, 0, 0, implicit $exec :: (load (s288) from %stack.0, align 4, addrspace 5)
; GFX908-SPILLED-NEXT: S_NOP 0, implicit killed renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8
; GFX908-EXPANDED-LABEL: name: spill_restore_agpr288
; GFX908-EXPANDED: bb.0:
@@ -973,7 +973,7 @@ body: |
; GFX90A-SPILLED-NEXT: S_NOP 1
; GFX90A-SPILLED-NEXT: {{ $}}
; GFX90A-SPILLED-NEXT: bb.2:
- ; GFX90A-SPILLED-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8 = SI_SPILL_A288_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s288) from %stack.0, align 4, addrspace 5)
+ ; GFX90A-SPILLED-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8 = SI_SPILL_A288_RESTORE %stack.0, $sgpr32, 0, 0, implicit $exec :: (load (s288) from %stack.0, align 4, addrspace 5)
; GFX90A-SPILLED-NEXT: S_NOP 0, implicit killed renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8
; GFX90A-EXPANDED-LABEL: name: spill_restore_agpr288
; GFX90A-EXPANDED: bb.0:
@@ -1043,7 +1043,7 @@ body: |
; GFX908-SPILLED-NEXT: S_NOP 1
; GFX908-SPILLED-NEXT: {{ $}}
; GFX908-SPILLED-NEXT: bb.2:
- ; GFX908-SPILLED-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9 = SI_SPILL_A320_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s320) from %stack.0, align 4, addrspace 5)
+ ; GFX908-SPILLED-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9 = SI_SPILL_A320_RESTORE %stack.0, $sgpr32, 0, 0, implicit $exec :: (load (s320) from %stack.0, align 4, addrspace 5)
; GFX908-SPILLED-NEXT: S_NOP 0, implicit killed renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9
; GFX908-EXPANDED-LABEL: name: spill_restore_agpr320
; GFX908-EXPANDED: bb.0:
@@ -1097,7 +1097,7 @@ body: |
; GFX90A-SPILLED-NEXT: S_NOP 1
; GFX90A-SPILLED-NEXT: {{ $}}
; GFX90A-SPILLED-NEXT: bb.2:
- ; GFX90A-SPILLED-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9 = SI_SPILL_A320_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s320) from %stack.0, align 4, addrspace 5)
+ ; GFX90A-SPILLED-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9 = SI_SPILL_A320_RESTORE %stack.0, $sgpr32, 0, 0, implicit $exec :: (load (s320) from %stack.0, align 4, addrspace 5)
; GFX90A-SPILLED-NEXT: S_NOP 0, implicit killed renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9
; GFX90A-EXPANDED-LABEL: name: spill_restore_agpr320
; GFX90A-EXPANDED: bb.0:
@@ -1169,7 +1169,7 @@ body: |
; GFX908-SPILLED-NEXT: S_NOP 1
; GFX908-SPILLED-NEXT: {{ $}}
; GFX908-SPILLED-NEXT: bb.2:
- ; GFX908-SPILLED-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10 = SI_SPILL_A352_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s352) from %stack.0, align 4, addrspace 5)
+ ; GFX908-SPILLED-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10 = SI_SPILL_A352_RESTORE %stack.0, $sgpr32, 0, 0, implicit $exec :: (load (s352) from %stack.0, align 4, addrspace 5)
; GFX908-SPILLED-NEXT: S_NOP 0, implicit killed renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10
; GFX908-EXPANDED-LABEL: name: spill_restore_agpr352
; GFX908-EXPANDED: bb.0:
@@ -1225,7 +1225,7 @@ body: |
; GFX90A-SPILLED-NEXT: S_NOP 1
; GFX90A-SPILLED-NEXT: {{ $}}
; GFX90A-SPILLED-NEXT: bb.2:
- ; GFX90A-SPILLED-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10 = SI_SPILL_A352_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s352) from %stack.0, align 4, addrspace 5)
+ ; GFX90A-SPILLED-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10 = SI_SPILL_A352_RESTORE %stack.0, $sgpr32, 0, 0, implicit $exec :: (load (s352) from %stack.0, align 4, addrspace 5)
; GFX90A-SPILLED-NEXT: S_NOP 0, implicit killed renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10
; GFX90A-EXPANDED-LABEL: name: spill_restore_agpr352
; GFX90A-EXPANDED: bb.0:
@@ -1299,7 +1299,7 @@ body: |
; GFX908-SPILLED-NEXT: S_NOP 1
; GFX908-SPILLED-NEXT: {{ $}}
; GFX908-SPILLED-NEXT: bb.2:
- ; GFX908-SPILLED-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11 = SI_SPILL_A384_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s384) from %stack.0, align 4, addrspace 5)
+ ; GFX908-SPILLED-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11 = SI_SPILL_A384_RESTORE %stack.0, $sgpr32, 0, 0, implicit $exec :: (load (s384) from %stack.0, align 4, addrspace 5)
; GFX908-SPILLED-NEXT: S_NOP 0, implicit killed renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11
; GFX908-EXPANDED-LABEL: name: spill_restore_agpr384
; GFX908-EXPANDED: bb.0:
@@ -1357,7 +1357,7 @@ body: |
; GFX90A-SPILLED-NEXT: S_NOP 1
; GFX90A-SPILLED-NEXT: {{ $}}
; GFX90A-SPILLED-NEXT: bb.2:
- ; GFX90A-SPILLED-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11 = SI_SPILL_A384_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s384) from %stack.0, align 4, addrspace 5)
+ ; GFX90A-SPILLED-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11 = SI_SPILL_A384_RESTORE %stack.0, $sgpr32, 0, 0, implicit $exec :: (load (s384) from %stack.0, align 4, addrspace 5)
; GFX90A-SPILLED-NEXT: S_NOP 0, implicit killed renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11
; GFX90A-EXPANDED-LABEL: name: spill_restore_agpr384
; GFX90A-EXPANDED: bb.0:
@@ -1433,7 +1433,7 @@ body: |
; GFX908-SPILLED-NEXT: S_NOP 1
; GFX908-SPILLED-NEXT: {{ $}}
; GFX908-SPILLED-NEXT: bb.2:
- ; GFX908-SPILLED-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = SI_SPILL_A512_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s512) from %stack.0, align 4, addrspace 5)
+ ; GFX908-SPILLED-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = SI_SPILL_A512_RESTORE %stack.0, $sgpr32, 0, 0, implicit $exec :: (load (s512) from %stack.0, align 4, addrspace 5)
; GFX908-SPILLED-NEXT: S_NOP 0, implicit killed renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
; GFX908-EXPANDED-LABEL: name: spill_restore_agpr512
; GFX908-EXPANDED: bb.0:
@@ -1499,7 +1499,7 @@ body: |
; GFX90A-SPILLED-NEXT: S_NOP 1
; GFX90A-SPILLED-NEXT: {{ $}}
; GFX90A-SPILLED-NEXT: bb.2:
- ; GFX90A-SPILLED-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = SI_SPILL_A512_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s512) from %stack.0, align 4, addrspace 5)
+ ; GFX90A-SPILLED-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = SI_SPILL_A512_RESTORE %stack.0, $sgpr32, 0, 0, implicit $exec :: (load (s512) from %stack.0, align 4, addrspace 5)
; GFX90A-SPILLED-NEXT: S_NOP 0, implicit killed renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
; GFX90A-EXPANDED-LABEL: name: spill_restore_agpr512
; GFX90A-EXPANDED: bb.0:
@@ -1583,7 +1583,7 @@ body: |
; GFX908-SPILLED-NEXT: S_NOP 1
; GFX908-SPILLED-NEXT: {{ $}}
; GFX908-SPILLED-NEXT: bb.2:
- ; GFX908-SPILLED-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 = SI_SPILL_A1024_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s1024) from %stack.0, align 4, addrspace 5)
+ ; GFX908-SPILLED-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 = SI_SPILL_A1024_RESTORE %stack.0, $sgpr32, 0, 0, implicit $exec :: (load (s1024) from %stack.0, align 4, addrspace 5)
; GFX908-SPILLED-NEXT: S_NOP 0, implicit killed renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
; GFX908-EXPANDED-LABEL: name: spill_restore_agpr1024
; GFX908-EXPANDED: bb.0:
@@ -1681,7 +1681,7 @@ body: |
; GFX90A-SPILLED-NEXT: S_NOP 1
; GFX90A-SPILLED-NEXT: {{ $}}
; GFX90A-SPILLED-NEXT: bb.2:
- ; GFX90A-SPILLED-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 = SI_SPILL_A1024_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s1024) from %stack.0, align 4, addrspace 5)
+ ; GFX90A-SPILLED-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 = SI_SPILL_A1024_RESTORE %stack.0, $sgpr32, 0, 0, implicit $exec :: (load (s1024) from %stack.0, align 4, addrspace 5)
; GFX90A-SPILLED-NEXT: S_NOP 0, implicit killed renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
; GFX90A-EXPANDED-LABEL: name: spill_restore_agpr1024
; GFX90A-EXPANDED: bb.0:
diff --git a/llvm/test/CodeGen/AMDGPU/spill-empty-live-interval.mir b/llvm/test/CodeGen/AMDGPU/spill-empty-live-interval.mir
index 9bac6bbd975957..6462a8f575be69 100644
--- a/llvm/test/CodeGen/AMDGPU/spill-empty-live-interval.mir
+++ b/llvm/test/CodeGen/AMDGPU/spill-empty-live-interval.mir
@@ -27,7 +27,7 @@ body: |
; CHECK-NEXT: bb.1:
; CHECK-NEXT: undef %6.sub1:vreg_64 = V_MOV_B32_e32 1786773504, implicit $exec
; CHECK-NEXT: S_NOP 0, implicit %6.sub1
- ; CHECK-NEXT: [[SI_SPILL_V64_RESTORE:%[0-9]+]]:vreg_64 = SI_SPILL_V64_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s64) from %stack.0, align 4, addrspace 5)
+ ; CHECK-NEXT: [[SI_SPILL_V64_RESTORE:%[0-9]+]]:vreg_64 = SI_SPILL_V64_RESTORE %stack.0, $sgpr32, 0, 0, implicit $exec :: (load (s64) from %stack.0, align 4, addrspace 5)
; CHECK-NEXT: S_NOP 0, implicit [[SI_SPILL_V64_RESTORE]].sub1
; CHECK-NEXT: S_NOP 0, implicit undef %9.sub0:vreg_64
bb.0:
diff --git a/llvm/test/CodeGen/AMDGPU/spill-sgpr-to-virtual-vgpr.mir b/llvm/test/CodeGen/AMDGPU/spill-sgpr-to-virtual-vgpr.mir
index efb022ccb0d556..51eda8dc9536cf 100644
--- a/llvm/test/CodeGen/AMDGPU/spill-sgpr-to-virtual-vgpr.mir
+++ b/llvm/test/CodeGen/AMDGPU/spill-sgpr-to-virtual-vgpr.mir
@@ -28,7 +28,7 @@ body: |
; GCN-NEXT: S_SETPC_B64 $sgpr30_sgpr31
S_NOP 0
SI_SPILL_S32_SAVE killed $sgpr10, %stack.0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32
- renamable $sgpr10 = SI_SPILL_S32_RESTORE %stack.0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32
+ renamable $sgpr10 = SI_SPILL_S32_RESTORE %stack.0, 0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32
S_SETPC_B64 $sgpr30_sgpr31
...
@@ -161,8 +161,8 @@ body: |
SI_SPILL_S32_SAVE killed $sgpr10, %stack.0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32
SI_SPILL_S1024_SAVE killed $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95, %stack.1, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32
S_NOP 0
- renamable $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 = SI_SPILL_S1024_RESTORE %stack.1, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32
- renamable $sgpr10 = SI_SPILL_S32_RESTORE %stack.0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32
+ renamable $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 = SI_SPILL_S1024_RESTORE %stack.1, 0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32
+ renamable $sgpr10 = SI_SPILL_S32_RESTORE %stack.0, 0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32
S_SETPC_B64 $sgpr30_sgpr31
...
@@ -230,7 +230,7 @@ body: |
S_BRANCH %bb.3
bb.3:
liveins: $sgpr10, $sgpr30_sgpr31
- renamable $sgpr10 = SI_SPILL_S32_RESTORE %stack.0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32
+ renamable $sgpr10 = SI_SPILL_S32_RESTORE %stack.0, 0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32
S_SETPC_B64 $sgpr30_sgpr31, implicit $sgpr10
...
@@ -297,12 +297,12 @@ body: |
S_BRANCH %bb.3
bb.1:
liveins: $sgpr10, $sgpr30_sgpr31
- renamable $sgpr10 = SI_SPILL_S32_RESTORE %stack.0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32
+ renamable $sgpr10 = SI_SPILL_S32_RESTORE %stack.0, 0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32
$sgpr10 = S_ADD_I32 $sgpr10, 15, implicit-def dead $scc
S_BRANCH %bb.2
bb.2:
liveins: $sgpr10, $sgpr30_sgpr31
- renamable $sgpr10 = SI_SPILL_S32_RESTORE %stack.0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32
+ renamable $sgpr10 = SI_SPILL_S32_RESTORE %stack.0, 0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32
$sgpr10 = S_ADD_I32 $sgpr10, 20, implicit-def dead $scc
S_BRANCH %bb.3
bb.3:
diff --git a/llvm/test/CodeGen/AMDGPU/spill-special-sgpr.mir b/llvm/test/CodeGen/AMDGPU/spill-special-sgpr.mir
index 3892ceb418959f..2879e902b5a7a2 100644
--- a/llvm/test/CodeGen/AMDGPU/spill-special-sgpr.mir
+++ b/llvm/test/CodeGen/AMDGPU/spill-special-sgpr.mir
@@ -147,5 +147,5 @@ body: |
$vcc = IMPLICIT_DEF
SI_SPILL_S64_SAVE killed $vcc, %stack.0, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32
- $vcc = SI_SPILL_S64_RESTORE %stack.0, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32
+ $vcc = SI_SPILL_S64_RESTORE %stack.0, 0, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32
...
diff --git a/llvm/test/CodeGen/AMDGPU/spill-to-agpr-partial.mir b/llvm/test/CodeGen/AMDGPU/spill-to-agpr-partial.mir
index 52593e01eafdeb..1639b17df647bf 100644
--- a/llvm/test/CodeGen/AMDGPU/spill-to-agpr-partial.mir
+++ b/llvm/test/CodeGen/AMDGPU/spill-to-agpr-partial.mir
@@ -22,7 +22,7 @@ body: |
; GCN-NEXT: $vgpr0_vgpr1_vgpr2 = SCRATCH_LOAD_DWORDX3_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 :: (load (s96) from %stack.0, align 4, addrspace 5)
; GCN-NEXT: S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit $agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23, implicit $agpr24_agpr25_agpr26_agpr27, implicit $agpr28_agpr29, implicit $agpr30
SI_SPILL_V128_SAVE killed $vgpr0_vgpr1_vgpr2_vgpr3, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, addrspace 5)
- $vgpr0_vgpr1_vgpr2_vgpr3 = SI_SPILL_V128_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.0, align 4, addrspace 5)
+ $vgpr0_vgpr1_vgpr2_vgpr3 = SI_SPILL_V128_RESTORE %stack.0, $sgpr32, 0, 0, implicit $exec :: (load (s128) from %stack.0, align 4, addrspace 5)
S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit $agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23, implicit $agpr24_agpr25_agpr26_agpr27, implicit $agpr28_agpr29, implicit $agpr30
...
@@ -49,7 +49,7 @@ body: |
; GCN-NEXT: $vgpr0_vgpr1 = SCRATCH_LOAD_DWORDX2_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 :: (load (s64) from %stack.0, align 4, addrspace 5)
; GCN-NEXT: S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit $agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23, implicit $agpr24_agpr25_agpr26_agpr27, implicit $agpr28_agpr29
SI_SPILL_V128_SAVE killed $vgpr0_vgpr1_vgpr2_vgpr3, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, addrspace 5)
- $vgpr0_vgpr1_vgpr2_vgpr3 = SI_SPILL_V128_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.0, align 4, addrspace 5)
+ $vgpr0_vgpr1_vgpr2_vgpr3 = SI_SPILL_V128_RESTORE %stack.0, $sgpr32, 0, 0, implicit $exec :: (load (s128) from %stack.0, align 4, addrspace 5)
S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit $agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23, implicit $agpr24_agpr25_agpr26_agpr27, implicit $agpr28_agpr29
...
@@ -78,7 +78,7 @@ body: |
; GCN-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 :: (load (s32) from %stack.0, addrspace 5)
; GCN-NEXT: S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit $agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23, implicit $agpr24_agpr25_agpr26_agpr27, implicit $agpr28
SI_SPILL_V128_SAVE killed $vgpr0_vgpr1_vgpr2_vgpr3, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, addrspace 5)
- $vgpr0_vgpr1_vgpr2_vgpr3 = SI_SPILL_V128_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.0, align 4, addrspace 5)
+ $vgpr0_vgpr1_vgpr2_vgpr3 = SI_SPILL_V128_RESTORE %stack.0, $sgpr32, 0, 0, implicit $exec :: (load (s128) from %stack.0, align 4, addrspace 5)
S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit $agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23, implicit $agpr24_agpr25_agpr26_agpr27, implicit $agpr28
...
@@ -107,7 +107,7 @@ body: |
; GCN-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3
; GCN-NEXT: S_ENDPGM 0
SI_SPILL_V128_SAVE killed $vgpr0_vgpr1_vgpr2_vgpr3, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, addrspace 5)
- $vgpr0_vgpr1_vgpr2_vgpr3 = SI_SPILL_V128_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.0, align 4, addrspace 5)
+ $vgpr0_vgpr1_vgpr2_vgpr3 = SI_SPILL_V128_RESTORE %stack.0, $sgpr32, 0, 0, implicit $exec :: (load (s128) from %stack.0, align 4, addrspace 5)
S_ENDPGM 0
...
@@ -132,7 +132,7 @@ body: |
; GCN-NEXT: $agpr0_agpr1_agpr2 = SCRATCH_LOAD_DWORDX3_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3 :: (load (s96) from %stack.0, align 4, addrspace 5)
; GCN-NEXT: S_ENDPGM 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, implicit $vgpr48_vgpr49_vgpr50_vgpr51, implicit $vgpr52_vgpr53, implicit $vgpr54
SI_SPILL_A128_SAVE killed $agpr0_agpr1_agpr2_agpr3, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, addrspace 5)
- $agpr0_agpr1_agpr2_agpr3 = SI_SPILL_A128_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.0, align 4, addrspace 5)
+ $agpr0_agpr1_agpr2_agpr3 = SI_SPILL_A128_RESTORE %stack.0, $sgpr32, 0, 0, implicit $exec :: (load (s128) from %stack.0, align 4, addrspace 5)
S_ENDPGM 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, implicit $vgpr48_vgpr49_vgpr50_vgpr51, implicit $vgpr52_vgpr53, implicit $vgpr54
...
@@ -159,7 +159,7 @@ body: |
; GCN-NEXT: $agpr0_agpr1 = SCRATCH_LOAD_DWORDX2_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3 :: (load (s64) from %stack.0, align 4, addrspace 5)
; GCN-NEXT: S_ENDPGM 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, implicit $vgpr48_vgpr49_vgpr50_vgpr51, implicit $vgpr52_vgpr53
SI_SPILL_A128_SAVE killed $agpr0_agpr1_agpr2_agpr3, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, addrspace 5)
- $agpr0_agpr1_agpr2_agpr3 = SI_SPILL_A128_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.0, align 4, addrspace 5)
+ $agpr0_agpr1_agpr2_agpr3 = SI_SPILL_A128_RESTORE %stack.0, $sgpr32, 0, 0, implicit $exec :: (load (s128) from %stack.0, align 4, addrspace 5)
S_ENDPGM 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, implicit $vgpr48_vgpr49_vgpr50_vgpr51, implicit $vgpr52_vgpr53
...
@@ -188,7 +188,7 @@ body: |
; GCN-NEXT: $agpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3 :: (load (s32) from %stack.0, addrspace 5)
; GCN-NEXT: S_ENDPGM 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, implicit $vgpr48_vgpr49_vgpr50_vgpr51, implicit $vgpr52
SI_SPILL_A128_SAVE killed $agpr0_agpr1_agpr2_agpr3, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, addrspace 5)
- $agpr0_agpr1_agpr2_agpr3 = SI_SPILL_A128_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.0, align 4, addrspace 5)
+ $agpr0_agpr1_agpr2_agpr3 = SI_SPILL_A128_RESTORE %stack.0, $sgpr32, 0, 0, implicit $exec :: (load (s128) from %stack.0, align 4, addrspace 5)
S_ENDPGM 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, implicit $vgpr48_vgpr49_vgpr50_vgpr51, implicit $vgpr52
...
@@ -217,6 +217,6 @@ body: |
; GCN-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr3, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3
; GCN-NEXT: S_ENDPGM 0
SI_SPILL_A128_SAVE killed $agpr0_agpr1_agpr2_agpr3, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, addrspace 5)
- $agpr0_agpr1_agpr2_agpr3 = SI_SPILL_A128_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.0, align 4, addrspace 5)
+ $agpr0_agpr1_agpr2_agpr3 = SI_SPILL_A128_RESTORE %stack.0, $sgpr32, 0, 0, implicit $exec :: (load (s128) from %stack.0, align 4, addrspace 5)
S_ENDPGM 0
...
diff --git a/llvm/test/CodeGen/AMDGPU/spill-vector-superclass.ll b/llvm/test/CodeGen/AMDGPU/spill-vector-superclass.ll
index d5f97314f9324c..32e6bc16f71ccf 100644
--- a/llvm/test/CodeGen/AMDGPU/spill-vector-superclass.ll
+++ b/llvm/test/CodeGen/AMDGPU/spill-vector-superclass.ll
@@ -16,7 +16,7 @@ define amdgpu_kernel void @test_spill_av_class(<4 x i32> %arg) #0 {
; GCN-NEXT: SI_SPILL_V64_SAVE %24, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, align 4, addrspace 5)
; GCN-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY [[V_MFMA_I32_4X4X4I8_e64_]]
; GCN-NEXT: GLOBAL_STORE_DWORDX4 undef %16:vreg_64, [[COPY1]], 0, 0, implicit $exec :: (volatile store (s128) into `ptr addrspace(1) undef`, addrspace 1)
- ; GCN-NEXT: [[SI_SPILL_V64_RESTORE:%[0-9]+]]:vreg_64 = SI_SPILL_V64_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s64) from %stack.0, align 4, addrspace 5)
+ ; GCN-NEXT: [[SI_SPILL_V64_RESTORE:%[0-9]+]]:vreg_64 = SI_SPILL_V64_RESTORE %stack.0, $sgpr32, 0, 0, implicit $exec :: (load (s64) from %stack.0, align 4, addrspace 5)
; GCN-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 3538953 /* reguse:VReg_64 */, [[SI_SPILL_V64_RESTORE]]
; GCN-NEXT: S_ENDPGM 0
%v0 = call i32 asm sideeffect "; def $0", "=v"()
diff --git a/llvm/test/CodeGen/AMDGPU/spill192.mir b/llvm/test/CodeGen/AMDGPU/spill192.mir
index 9371a72d6992b5..4871de52f54706 100644
--- a/llvm/test/CodeGen/AMDGPU/spill192.mir
+++ b/llvm/test/CodeGen/AMDGPU/spill192.mir
@@ -27,7 +27,7 @@ body: |
; SPILLED-NEXT: S_NOP 1
; SPILLED-NEXT: {{ $}}
; SPILLED-NEXT: bb.2:
- ; SPILLED-NEXT: $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9 = SI_SPILL_S192_RESTORE %stack.0, implicit $exec, implicit $sgpr32 :: (load (s192) from %stack.0, align 4, addrspace 5)
+ ; SPILLED-NEXT: $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9 = SI_SPILL_S192_RESTORE %stack.0, 0, implicit $exec, implicit $sgpr32 :: (load (s192) from %stack.0, align 4, addrspace 5)
; SPILLED-NEXT: S_NOP 0, implicit killed renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9
;
; EXPANDED-LABEL: name: spill_restore_sgpr192
@@ -89,7 +89,7 @@ body: |
; SPILLED-NEXT: S_NOP 1
; SPILLED-NEXT: {{ $}}
; SPILLED-NEXT: bb.2:
- ; SPILLED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 = SI_SPILL_V192_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s192) from %stack.0, align 4, addrspace 5)
+ ; SPILLED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 = SI_SPILL_V192_RESTORE %stack.0, $sgpr32, 0, 0, implicit $exec :: (load (s192) from %stack.0, align 4, addrspace 5)
; SPILLED-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5
;
; EXPANDED-LABEL: name: spill_restore_vgpr192
@@ -106,7 +106,7 @@ body: |
; EXPANDED-NEXT: S_NOP 1
; EXPANDED-NEXT: {{ $}}
; EXPANDED-NEXT: bb.2:
- ; EXPANDED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 = SI_SPILL_V192_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s192) from %stack.0, align 4, addrspace 5)
+ ; EXPANDED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 = SI_SPILL_V192_RESTORE %stack.0, $sgpr32, 0, 0, implicit $exec :: (load (s192) from %stack.0, align 4, addrspace 5)
; EXPANDED-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5
bb.0:
S_NOP 0, implicit-def %0:vreg_192
diff --git a/llvm/test/CodeGen/AMDGPU/spill224.mir b/llvm/test/CodeGen/AMDGPU/spill224.mir
index 2b63abe34eed69..41526ee9982b96 100644
--- a/llvm/test/CodeGen/AMDGPU/spill224.mir
+++ b/llvm/test/CodeGen/AMDGPU/spill224.mir
@@ -25,7 +25,7 @@ body: |
; SPILLED-NEXT: S_NOP 1
; SPILLED-NEXT: {{ $}}
; SPILLED-NEXT: bb.2:
- ; SPILLED-NEXT: $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10 = SI_SPILL_S224_RESTORE %stack.0, implicit $exec, implicit $sgpr32 :: (load (s224) from %stack.0, align 4, addrspace 5)
+ ; SPILLED-NEXT: $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10 = SI_SPILL_S224_RESTORE %stack.0, 0, implicit $exec, implicit $sgpr32 :: (load (s224) from %stack.0, align 4, addrspace 5)
; SPILLED-NEXT: S_NOP 0, implicit killed renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10
;
; EXPANDED-LABEL: name: spill_restore_sgpr224
@@ -89,7 +89,7 @@ body: |
; SPILLED-NEXT: S_NOP 1
; SPILLED-NEXT: {{ $}}
; SPILLED-NEXT: bb.2:
- ; SPILLED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 = SI_SPILL_V224_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s224) from %stack.0, align 4, addrspace 5)
+ ; SPILLED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 = SI_SPILL_V224_RESTORE %stack.0, $sgpr32, 0, 0, implicit $exec :: (load (s224) from %stack.0, align 4, addrspace 5)
; SPILLED-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
;
; EXPANDED-LABEL: name: spill_restore_vgpr224
@@ -106,7 +106,7 @@ body: |
; EXPANDED-NEXT: S_NOP 1
; EXPANDED-NEXT: {{ $}}
; EXPANDED-NEXT: bb.2:
- ; EXPANDED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 = SI_SPILL_V224_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s224) from %stack.0, align 4, addrspace 5)
+ ; EXPANDED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 = SI_SPILL_V224_RESTORE %stack.0, $sgpr32, 0, 0, implicit $exec :: (load (s224) from %stack.0, align 4, addrspace 5)
; EXPANDED-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
bb.0:
S_NOP 0, implicit-def %0:vreg_224
diff --git a/llvm/test/CodeGen/AMDGPU/spill288.mir b/llvm/test/CodeGen/AMDGPU/spill288.mir
index 29df0b16c1eaa8..a5391da13d3b46 100644
--- a/llvm/test/CodeGen/AMDGPU/spill288.mir
+++ b/llvm/test/CodeGen/AMDGPU/spill288.mir
@@ -25,7 +25,7 @@ body: |
; SPILLED-NEXT: S_NOP 1
; SPILLED-NEXT: {{ $}}
; SPILLED-NEXT: bb.2:
- ; SPILLED-NEXT: $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12 = SI_SPILL_S288_RESTORE %stack.0, implicit $exec, implicit $sgpr32 :: (load (s288) from %stack.0, align 4, addrspace 5)
+ ; SPILLED-NEXT: $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12 = SI_SPILL_S288_RESTORE %stack.0, 0, implicit $exec, implicit $sgpr32 :: (load (s288) from %stack.0, align 4, addrspace 5)
; SPILLED-NEXT: S_NOP 0, implicit killed renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12
;
; EXPANDED-LABEL: name: spill_restore_sgpr288
@@ -93,7 +93,7 @@ body: |
; SPILLED-NEXT: S_NOP 1
; SPILLED-NEXT: {{ $}}
; SPILLED-NEXT: bb.2:
- ; SPILLED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8 = SI_SPILL_V288_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s288) from %stack.0, align 4, addrspace 5)
+ ; SPILLED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8 = SI_SPILL_V288_RESTORE %stack.0, $sgpr32, 0, 0, implicit $exec :: (load (s288) from %stack.0, align 4, addrspace 5)
; SPILLED-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8
;
; EXPANDED-LABEL: name: spill_restore_vgpr288
@@ -110,7 +110,7 @@ body: |
; EXPANDED-NEXT: S_NOP 1
; EXPANDED-NEXT: {{ $}}
; EXPANDED-NEXT: bb.2:
- ; EXPANDED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8 = SI_SPILL_V288_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s288) from %stack.0, align 4, addrspace 5)
+ ; EXPANDED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8 = SI_SPILL_V288_RESTORE %stack.0, $sgpr32, 0, 0, implicit $exec :: (load (s288) from %stack.0, align 4, addrspace 5)
; EXPANDED-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8
bb.0:
S_NOP 0, implicit-def %0:vreg_288
diff --git a/llvm/test/CodeGen/AMDGPU/spill320.mir b/llvm/test/CodeGen/AMDGPU/spill320.mir
index d7ded7d8f01b48..ecc347c0c223d8 100644
--- a/llvm/test/CodeGen/AMDGPU/spill320.mir
+++ b/llvm/test/CodeGen/AMDGPU/spill320.mir
@@ -25,7 +25,7 @@ body: |
; SPILLED-NEXT: S_NOP 1
; SPILLED-NEXT: {{ $}}
; SPILLED-NEXT: bb.2:
- ; SPILLED-NEXT: $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13 = SI_SPILL_S320_RESTORE %stack.0, implicit $exec, implicit $sgpr32 :: (load (s320) from %stack.0, align 4, addrspace 5)
+ ; SPILLED-NEXT: $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13 = SI_SPILL_S320_RESTORE %stack.0, 0, implicit $exec, implicit $sgpr32 :: (load (s320) from %stack.0, align 4, addrspace 5)
; SPILLED-NEXT: S_NOP 0, implicit killed renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
;
; EXPANDED-LABEL: name: spill_restore_sgpr320
@@ -95,7 +95,7 @@ body: |
; SPILLED-NEXT: S_NOP 1
; SPILLED-NEXT: {{ $}}
; SPILLED-NEXT: bb.2:
- ; SPILLED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 = SI_SPILL_V320_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s320) from %stack.0, align 4, addrspace 5)
+ ; SPILLED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 = SI_SPILL_V320_RESTORE %stack.0, $sgpr32, 0, 0, implicit $exec :: (load (s320) from %stack.0, align 4, addrspace 5)
; SPILLED-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9
;
; EXPANDED-LABEL: name: spill_restore_vgpr320
@@ -112,7 +112,7 @@ body: |
; EXPANDED-NEXT: S_NOP 1
; EXPANDED-NEXT: {{ $}}
; EXPANDED-NEXT: bb.2:
- ; EXPANDED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 = SI_SPILL_V320_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s320) from %stack.0, align 4, addrspace 5)
+ ; EXPANDED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 = SI_SPILL_V320_RESTORE %stack.0, $sgpr32, 0, 0, implicit $exec :: (load (s320) from %stack.0, align 4, addrspace 5)
; EXPANDED-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9
bb.0:
S_NOP 0, implicit-def %0:vreg_320
diff --git a/llvm/test/CodeGen/AMDGPU/spill352.mir b/llvm/test/CodeGen/AMDGPU/spill352.mir
index fc8e72eca05ff3..6a11e3123e89a1 100644
--- a/llvm/test/CodeGen/AMDGPU/spill352.mir
+++ b/llvm/test/CodeGen/AMDGPU/spill352.mir
@@ -25,7 +25,7 @@ body: |
; SPILLED-NEXT: S_NOP 1
; SPILLED-NEXT: {{ $}}
; SPILLED-NEXT: bb.2:
- ; SPILLED-NEXT: $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14 = SI_SPILL_S352_RESTORE %stack.0, implicit $exec, implicit $sgpr32 :: (load (s352) from %stack.0, align 4, addrspace 5)
+ ; SPILLED-NEXT: $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14 = SI_SPILL_S352_RESTORE %stack.0, 0, implicit $exec, implicit $sgpr32 :: (load (s352) from %stack.0, align 4, addrspace 5)
; SPILLED-NEXT: S_NOP 0, implicit killed renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14
;
; EXPANDED-LABEL: name: spill_restore_sgpr352
@@ -97,7 +97,7 @@ body: |
; SPILLED-NEXT: S_NOP 1
; SPILLED-NEXT: {{ $}}
; SPILLED-NEXT: bb.2:
- ; SPILLED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10 = SI_SPILL_V352_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s352) from %stack.0, align 4, addrspace 5)
+ ; SPILLED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10 = SI_SPILL_V352_RESTORE %stack.0, $sgpr32, 0, 0, implicit $exec :: (load (s352) from %stack.0, align 4, addrspace 5)
; SPILLED-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10
;
; EXPANDED-LABEL: name: spill_restore_vgpr352
@@ -114,7 +114,7 @@ body: |
; EXPANDED-NEXT: S_NOP 1
; EXPANDED-NEXT: {{ $}}
; EXPANDED-NEXT: bb.2:
- ; EXPANDED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10 = SI_SPILL_V352_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s352) from %stack.0, align 4, addrspace 5)
+ ; EXPANDED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10 = SI_SPILL_V352_RESTORE %stack.0, $sgpr32, 0, 0, implicit $exec :: (load (s352) from %stack.0, align 4, addrspace 5)
; EXPANDED-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10
bb.0:
S_NOP 0, implicit-def %0:vreg_352
diff --git a/llvm/test/CodeGen/AMDGPU/spill384.mir b/llvm/test/CodeGen/AMDGPU/spill384.mir
index 3d6fe33f98950f..e50e7bc6cdc622 100644
--- a/llvm/test/CodeGen/AMDGPU/spill384.mir
+++ b/llvm/test/CodeGen/AMDGPU/spill384.mir
@@ -25,7 +25,7 @@ body: |
; SPILLED-NEXT: S_NOP 1
; SPILLED-NEXT: {{ $}}
; SPILLED-NEXT: bb.2:
- ; SPILLED-NEXT: $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = SI_SPILL_S384_RESTORE %stack.0, implicit $exec, implicit $sgpr32 :: (load (s384) from %stack.0, align 4, addrspace 5)
+ ; SPILLED-NEXT: $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = SI_SPILL_S384_RESTORE %stack.0, 0, implicit $exec, implicit $sgpr32 :: (load (s384) from %stack.0, align 4, addrspace 5)
; SPILLED-NEXT: S_NOP 0, implicit killed renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
;
; EXPANDED-LABEL: name: spill_restore_sgpr384
@@ -99,7 +99,7 @@ body: |
; SPILLED-NEXT: S_NOP 1
; SPILLED-NEXT: {{ $}}
; SPILLED-NEXT: bb.2:
- ; SPILLED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11 = SI_SPILL_V384_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s384) from %stack.0, align 4, addrspace 5)
+ ; SPILLED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11 = SI_SPILL_V384_RESTORE %stack.0, $sgpr32, 0, 0, implicit $exec :: (load (s384) from %stack.0, align 4, addrspace 5)
; SPILLED-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11
;
; EXPANDED-LABEL: name: spill_restore_vgpr384
@@ -116,7 +116,7 @@ body: |
; EXPANDED-NEXT: S_NOP 1
; EXPANDED-NEXT: {{ $}}
; EXPANDED-NEXT: bb.2:
- ; EXPANDED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11 = SI_SPILL_V384_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s384) from %stack.0, align 4, addrspace 5)
+ ; EXPANDED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11 = SI_SPILL_V384_RESTORE %stack.0, $sgpr32, 0, 0, implicit $exec :: (load (s384) from %stack.0, align 4, addrspace 5)
; EXPANDED-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11
bb.0:
S_NOP 0, implicit-def %0:vreg_384
diff --git a/llvm/test/CodeGen/AMDGPU/splitkit-copy-bundle.mir b/llvm/test/CodeGen/AMDGPU/splitkit-copy-bundle.mir
index c318502b8a32d1..49c0ef43a339e2 100644
--- a/llvm/test/CodeGen/AMDGPU/splitkit-copy-bundle.mir
+++ b/llvm/test/CodeGen/AMDGPU/splitkit-copy-bundle.mir
@@ -281,7 +281,7 @@ body: |
; RA-NEXT: }
; RA-NEXT: SI_SPILL_S512_SAVE %18, %stack.0, implicit $exec, implicit $sgpr32 :: (store (s512) into %stack.0, align 4, addrspace 5)
; RA-NEXT: S_NOP 0, implicit-def $sgpr8, implicit-def $sgpr12, implicit-def $sgpr16, implicit-def $sgpr20, implicit-def $sgpr24, implicit-def $sgpr28, implicit-def $sgpr32, implicit-def $sgpr36, implicit-def $sgpr40, implicit-def $sgpr44, implicit-def $sgpr48, implicit-def $sgpr52, implicit-def $sgpr56, implicit-def $sgpr60, implicit-def $sgpr64, implicit-def $sgpr68, implicit-def $sgpr72, implicit-def $sgpr74, implicit-def $sgpr78, implicit-def $sgpr82, implicit-def $sgpr86, implicit-def $sgpr90, implicit-def $sgpr94, implicit-def $sgpr98
- ; RA-NEXT: [[SI_SPILL_S512_RESTORE:%[0-9]+]]:sgpr_512 = SI_SPILL_S512_RESTORE %stack.0, implicit $exec, implicit $sgpr32 :: (load (s512) from %stack.0, align 4, addrspace 5)
+ ; RA-NEXT: [[SI_SPILL_S512_RESTORE:%[0-9]+]]:sgpr_512 = SI_SPILL_S512_RESTORE %stack.0, 0, implicit $exec, implicit $sgpr32 :: (load (s512) from %stack.0, align 4, addrspace 5)
; RA-NEXT: undef %17.sub4_sub5:sgpr_512 = COPY [[SI_SPILL_S512_RESTORE]].sub4_sub5 {
; RA-NEXT: internal %17.sub10_sub11:sgpr_512 = COPY [[SI_SPILL_S512_RESTORE]].sub10_sub11
; RA-NEXT: internal %17.sub7:sgpr_512 = COPY [[SI_SPILL_S512_RESTORE]].sub7
@@ -318,7 +318,7 @@ body: |
; VR-NEXT: renamable $sgpr26 = S_MOV_B32 -1
; VR-NEXT: SI_SPILL_S512_SAVE killed renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27, %stack.0, implicit $exec, implicit $sgpr32 :: (store (s512) into %stack.0, align 4, addrspace 5)
; VR-NEXT: S_NOP 0, implicit-def $sgpr8, implicit-def $sgpr12, implicit-def $sgpr16, implicit-def $sgpr20, implicit-def $sgpr24, implicit-def $sgpr28, implicit-def $sgpr32, implicit-def $sgpr36, implicit-def $sgpr40, implicit-def $sgpr44, implicit-def $sgpr48, implicit-def $sgpr52, implicit-def $sgpr56, implicit-def $sgpr60, implicit-def $sgpr64, implicit-def $sgpr68, implicit-def $sgpr72, implicit-def $sgpr74, implicit-def $sgpr78, implicit-def $sgpr82, implicit-def $sgpr86, implicit-def $sgpr90, implicit-def $sgpr94, implicit-def $sgpr98
- ; VR-NEXT: renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 = SI_SPILL_S512_RESTORE %stack.0, implicit $exec, implicit $sgpr32 :: (load (s512) from %stack.0, align 4, addrspace 5)
+ ; VR-NEXT: renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 = SI_SPILL_S512_RESTORE %stack.0, 0, implicit $exec, implicit $sgpr32 :: (load (s512) from %stack.0, align 4, addrspace 5)
; VR-NEXT: renamable $sgpr12_sgpr13 = COPY killed renamable $sgpr16_sgpr17
; VR-NEXT: renamable $sgpr15 = COPY killed renamable $sgpr19
; VR-NEXT: renamable $sgpr18_sgpr19 = COPY killed renamable $sgpr22_sgpr23
diff --git a/llvm/test/CodeGen/AMDGPU/splitkit-copy-live-lanes.mir b/llvm/test/CodeGen/AMDGPU/splitkit-copy-live-lanes.mir
index 71f4a4c1dedffa..b32d439f78c363 100644
--- a/llvm/test/CodeGen/AMDGPU/splitkit-copy-live-lanes.mir
+++ b/llvm/test/CodeGen/AMDGPU/splitkit-copy-live-lanes.mir
@@ -312,7 +312,7 @@ body: |
; CHECK-NEXT: %134.sub1:vreg_128 = COPY %43.sub1
; CHECK-NEXT: %134.sub3:vreg_128 = COPY %43.sub1
; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET %134, %2, 0, 224, 0, 0, implicit $exec :: (store (s128), align 32, addrspace 1)
- ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.10, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.10, align 4, addrspace 5)
+ ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.10, $sgpr32, 0, 0, implicit $exec :: (load (s128) from %stack.10, align 4, addrspace 5)
; CHECK-NEXT: undef %131.sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE]].sub0 {
; CHECK-NEXT: internal %131.sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE]].sub2
; CHECK-NEXT: }
@@ -331,7 +331,7 @@ body: |
; CHECK-NEXT: %122.sub1:vreg_128 = COPY %43.sub1
; CHECK-NEXT: %122.sub3:vreg_128 = COPY %43.sub1
; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET %122, %2, 0, 192, 0, 0, implicit $exec :: (store (s128), align 64, addrspace 1)
- ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE1:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.9, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.9, align 4, addrspace 5)
+ ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE1:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.9, $sgpr32, 0, 0, implicit $exec :: (load (s128) from %stack.9, align 4, addrspace 5)
; CHECK-NEXT: undef %190.sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE1]].sub0 {
; CHECK-NEXT: internal %190.sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE1]].sub2
; CHECK-NEXT: }
@@ -341,7 +341,7 @@ body: |
; CHECK-NEXT: %120.sub1:vreg_128 = COPY %43.sub1
; CHECK-NEXT: %120.sub3:vreg_128 = COPY %43.sub1
; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET %120, %2, 0, 208, 0, 0, implicit $exec :: (store (s128), addrspace 1)
- ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE2:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.11, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.11, align 4, addrspace 5)
+ ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE2:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.11, $sgpr32, 0, 0, implicit $exec :: (load (s128) from %stack.11, align 4, addrspace 5)
; CHECK-NEXT: undef %211.sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE2]].sub0 {
; CHECK-NEXT: internal %211.sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE2]].sub2
; CHECK-NEXT: }
@@ -351,7 +351,7 @@ body: |
; CHECK-NEXT: %208.sub1:vreg_128 = COPY %43.sub1
; CHECK-NEXT: %208.sub3:vreg_128 = COPY %43.sub1
; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET %208, %2, 0, 160, 0, 0, implicit $exec :: (store (s128), align 32, addrspace 1)
- ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE3:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.8, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.8, align 4, addrspace 5)
+ ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE3:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.8, $sgpr32, 0, 0, implicit $exec :: (load (s128) from %stack.8, align 4, addrspace 5)
; CHECK-NEXT: undef %178.sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE3]].sub0 {
; CHECK-NEXT: internal %178.sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE3]].sub2
; CHECK-NEXT: }
@@ -379,7 +379,7 @@ body: |
; CHECK-NEXT: %106.sub1:vreg_128 = COPY %43.sub1
; CHECK-NEXT: %106.sub3:vreg_128 = COPY %43.sub1
; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET %106, %2, 0, 144, 0, 0, implicit $exec :: (store (s128), addrspace 1)
- ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE4:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.6, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.6, align 4, addrspace 5)
+ ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE4:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.6, $sgpr32, 0, 0, implicit $exec :: (load (s128) from %stack.6, align 4, addrspace 5)
; CHECK-NEXT: undef %103.sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE4]].sub0 {
; CHECK-NEXT: internal %103.sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE4]].sub2
; CHECK-NEXT: }
@@ -389,7 +389,7 @@ body: |
; CHECK-NEXT: %100.sub1:vreg_128 = COPY %43.sub1
; CHECK-NEXT: %100.sub3:vreg_128 = COPY %43.sub1
; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET %100, %2, 0, 96, 0, 0, implicit $exec :: (store (s128), align 32, addrspace 1)
- ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE5:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.7, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.7, align 4, addrspace 5)
+ ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE5:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.7, $sgpr32, 0, 0, implicit $exec :: (load (s128) from %stack.7, align 4, addrspace 5)
; CHECK-NEXT: undef %97.sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE5]].sub0 {
; CHECK-NEXT: internal %97.sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE5]].sub2
; CHECK-NEXT: }
@@ -399,7 +399,7 @@ body: |
; CHECK-NEXT: %94.sub1:vreg_128 = COPY %43.sub1
; CHECK-NEXT: %94.sub3:vreg_128 = COPY %43.sub1
; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET %94, %2, 0, 112, 0, 0, implicit $exec :: (store (s128), addrspace 1)
- ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE6:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.5, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.5, align 4, addrspace 5)
+ ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE6:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.5, $sgpr32, 0, 0, implicit $exec :: (load (s128) from %stack.5, align 4, addrspace 5)
; CHECK-NEXT: undef %89.sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE6]].sub0 {
; CHECK-NEXT: internal %89.sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE6]].sub2
; CHECK-NEXT: }
@@ -409,7 +409,7 @@ body: |
; CHECK-NEXT: %86.sub1:vreg_128 = COPY %43.sub1
; CHECK-NEXT: %86.sub3:vreg_128 = COPY %43.sub1
; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET %86, %2, 0, 64, 0, 0, implicit $exec :: (store (s128), align 64, addrspace 1)
- ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE7:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.4, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.4, align 4, addrspace 5)
+ ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE7:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.4, $sgpr32, 0, 0, implicit $exec :: (load (s128) from %stack.4, align 4, addrspace 5)
; CHECK-NEXT: undef %81.sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE7]].sub0 {
; CHECK-NEXT: internal %81.sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE7]].sub2
; CHECK-NEXT: }
@@ -419,7 +419,7 @@ body: |
; CHECK-NEXT: %78.sub1:vreg_128 = COPY %43.sub1
; CHECK-NEXT: %78.sub3:vreg_128 = COPY %43.sub1
; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET %78, %2, 0, 80, 0, 0, implicit $exec :: (store (s128), addrspace 1)
- ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE8:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.3, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.3, align 4, addrspace 5)
+ ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE8:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.3, $sgpr32, 0, 0, implicit $exec :: (load (s128) from %stack.3, align 4, addrspace 5)
; CHECK-NEXT: undef %73.sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE8]].sub0 {
; CHECK-NEXT: internal %73.sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE8]].sub2
; CHECK-NEXT: }
@@ -429,7 +429,7 @@ body: |
; CHECK-NEXT: %70.sub1:vreg_128 = COPY %43.sub1
; CHECK-NEXT: %70.sub3:vreg_128 = COPY %43.sub1
; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET %70, %2, 0, 32, 0, 0, implicit $exec :: (store (s128), align 32, addrspace 1)
- ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE9:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.2, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.2, align 4, addrspace 5)
+ ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE9:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.2, $sgpr32, 0, 0, implicit $exec :: (load (s128) from %stack.2, align 4, addrspace 5)
; CHECK-NEXT: undef %65.sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE9]].sub0 {
; CHECK-NEXT: internal %65.sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE9]].sub2
; CHECK-NEXT: }
@@ -439,7 +439,7 @@ body: |
; CHECK-NEXT: %62.sub1:vreg_128 = COPY %43.sub1
; CHECK-NEXT: %62.sub3:vreg_128 = COPY %43.sub1
; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET %62, %2, 0, 48, 0, 0, implicit $exec :: (store (s128), addrspace 1)
- ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE10:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.1, align 4, addrspace 5)
+ ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE10:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.1, $sgpr32, 0, 0, implicit $exec :: (load (s128) from %stack.1, align 4, addrspace 5)
; CHECK-NEXT: undef %57.sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE10]].sub0 {
; CHECK-NEXT: internal %57.sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE10]].sub2
; CHECK-NEXT: }
@@ -449,7 +449,7 @@ body: |
; CHECK-NEXT: %54.sub1:vreg_128 = COPY %43.sub1
; CHECK-NEXT: %54.sub3:vreg_128 = COPY %43.sub1
; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET %54, %2, 0, 0, 0, 0, implicit $exec :: (store (s128), align 512, addrspace 1)
- ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE11:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.0, align 4, addrspace 5)
+ ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE11:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.0, $sgpr32, 0, 0, implicit $exec :: (load (s128) from %stack.0, align 4, addrspace 5)
; CHECK-NEXT: undef %49.sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE11]].sub0 {
; CHECK-NEXT: internal %49.sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE11]].sub2
; CHECK-NEXT: }
diff --git a/llvm/test/CodeGen/AMDGPU/splitkit-nolivesubranges.mir b/llvm/test/CodeGen/AMDGPU/splitkit-nolivesubranges.mir
index ef85ea7efd58ff..8db54c83d34940 100644
--- a/llvm/test/CodeGen/AMDGPU/splitkit-nolivesubranges.mir
+++ b/llvm/test/CodeGen/AMDGPU/splitkit-nolivesubranges.mir
@@ -27,7 +27,7 @@ body: |
; CHECK-NEXT: renamable $sgpr1 = IMPLICIT_DEF
; CHECK-NEXT: SI_SPILL_S64_SAVE renamable $sgpr0_sgpr1, %stack.0, implicit $exec, implicit $sp_reg :: (store (s64) into %stack.0, align 4, addrspace 5)
; CHECK-NEXT: KILL implicit-def $vcc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31, implicit-def $sgpr32_sgpr33_sgpr34_sgpr35_sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63, implicit-def $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95, implicit-def $sgpr96_sgpr97_sgpr98_sgpr99_sgpr100_sgpr101_sgpr102_sgpr103
- ; CHECK-NEXT: renamable $sgpr0_sgpr1 = SI_SPILL_S64_RESTORE %stack.0, implicit $exec, implicit $sp_reg :: (load (s64) from %stack.0, align 4, addrspace 5)
+ ; CHECK-NEXT: renamable $sgpr0_sgpr1 = SI_SPILL_S64_RESTORE %stack.0, 0, implicit $exec, implicit $sp_reg :: (load (s64) from %stack.0, align 4, addrspace 5)
; CHECK-NEXT: $sgpr105 = S_AND_B32 killed renamable $sgpr1, renamable $sgpr1, implicit-def $scc
; CHECK-NEXT: S_NOP 0, implicit $sgpr104, implicit $sgpr105
%0:sreg_64 = COPY $sgpr0_sgpr1
diff --git a/llvm/test/CodeGen/AMDGPU/splitkit.mir b/llvm/test/CodeGen/AMDGPU/splitkit.mir
index dd3abf60078540..b8692519570e1f 100644
--- a/llvm/test/CodeGen/AMDGPU/splitkit.mir
+++ b/llvm/test/CodeGen/AMDGPU/splitkit.mir
@@ -18,7 +18,7 @@ body: |
; CHECK-NEXT: S_NOP 0, implicit-def renamable $sgpr3
; CHECK-NEXT: SI_SPILL_S128_SAVE renamable $sgpr0_sgpr1_sgpr2_sgpr3, %stack.0, implicit $exec, implicit $sp_reg :: (store (s128) into %stack.0, align 4, addrspace 5)
; CHECK-NEXT: S_NOP 0, implicit-def dead $sgpr0, implicit-def dead $sgpr1, implicit-def dead $sgpr2, implicit-def dead $sgpr3, implicit-def dead $sgpr4, implicit-def dead $sgpr5, implicit-def dead $sgpr6, implicit-def dead $sgpr7, implicit-def dead $sgpr8, implicit-def dead $sgpr9, implicit-def dead $sgpr10, implicit-def dead $sgpr11
- ; CHECK-NEXT: renamable $sgpr0_sgpr1_sgpr2_sgpr3 = SI_SPILL_S128_RESTORE %stack.0, implicit $exec, implicit $sp_reg :: (load (s128) from %stack.0, align 4, addrspace 5)
+ ; CHECK-NEXT: renamable $sgpr0_sgpr1_sgpr2_sgpr3 = SI_SPILL_S128_RESTORE %stack.0, 0, implicit $exec, implicit $sp_reg :: (load (s128) from %stack.0, align 4, addrspace 5)
; CHECK-NEXT: S_NOP 0, implicit renamable $sgpr0
; CHECK-NEXT: S_NOP 0, implicit renamable $sgpr3
; CHECK-NEXT: S_NOP 0, implicit renamable $sgpr0
@@ -92,7 +92,7 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: S_NOP 0, implicit renamable $sgpr0
; CHECK-NEXT: S_NOP 0, implicit-def dead $sgpr0, implicit-def dead $sgpr1, implicit-def dead $sgpr2, implicit-def dead $sgpr3, implicit-def dead $sgpr4, implicit-def dead $sgpr5, implicit-def dead $sgpr6, implicit-def dead $sgpr7, implicit-def dead $sgpr8, implicit-def dead $sgpr9, implicit-def dead $sgpr10, implicit-def dead $sgpr11
- ; CHECK-NEXT: renamable $sgpr0_sgpr1_sgpr2_sgpr3 = SI_SPILL_S128_RESTORE %stack.0, implicit $exec, implicit $sp_reg :: (load (s128) from %stack.0, align 4, addrspace 5)
+ ; CHECK-NEXT: renamable $sgpr0_sgpr1_sgpr2_sgpr3 = SI_SPILL_S128_RESTORE %stack.0, 0, implicit $exec, implicit $sp_reg :: (load (s128) from %stack.0, align 4, addrspace 5)
; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.1, implicit undef $vcc
; CHECK-NEXT: S_BRANCH %bb.3
; CHECK-NEXT: {{ $}}
@@ -101,7 +101,7 @@ body: |
; CHECK-NEXT: liveins: $sgpr0_sgpr1_sgpr2_sgpr3:0x00000000000000C3
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: S_NOP 0, implicit-def dead $sgpr0, implicit-def dead $sgpr1, implicit-def dead $sgpr2, implicit-def dead $sgpr3, implicit-def dead $sgpr4, implicit-def dead $sgpr5, implicit-def dead $sgpr6, implicit-def dead $sgpr7, implicit-def dead $sgpr8, implicit-def dead $sgpr9, implicit-def dead $sgpr10, implicit-def dead $sgpr11
- ; CHECK-NEXT: renamable $sgpr0_sgpr1_sgpr2_sgpr3 = SI_SPILL_S128_RESTORE %stack.0, implicit $exec, implicit $sp_reg :: (load (s128) from %stack.0, align 4, addrspace 5)
+ ; CHECK-NEXT: renamable $sgpr0_sgpr1_sgpr2_sgpr3 = SI_SPILL_S128_RESTORE %stack.0, 0, implicit $exec, implicit $sp_reg :: (load (s128) from %stack.0, align 4, addrspace 5)
; CHECK-NEXT: S_BRANCH %bb.3
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3:
diff --git a/llvm/test/CodeGen/AMDGPU/stack-slot-color-sgpr-vgpr-spills.mir b/llvm/test/CodeGen/AMDGPU/stack-slot-color-sgpr-vgpr-spills.mir
index 1c55dc5a653721..2a2e3c86008de4 100644
--- a/llvm/test/CodeGen/AMDGPU/stack-slot-color-sgpr-vgpr-spills.mir
+++ b/llvm/test/CodeGen/AMDGPU/stack-slot-color-sgpr-vgpr-spills.mir
@@ -10,7 +10,7 @@
# CHECK-NEXT: stack-id: sgpr-spill,
# CHECK: SI_SPILL_S32_SAVE killed renamable $sgpr5, %stack.0, implicit $exec, implicit $sgpr32 :: (store (s32) into %stack.0, addrspace 5)
-# CHECK: renamable $sgpr5 = SI_SPILL_S32_RESTORE %stack.0, implicit $exec, implicit $sgpr32 :: (load (s32) from %stack.0, addrspace 5)
+# CHECK: renamable $sgpr5 = SI_SPILL_S32_RESTORE %stack.0, 0, implicit $exec, implicit $sgpr32 :: (load (s32) from %stack.0, addrspace 5)
name: no_merge_sgpr_vgpr_spill_slot
tracksRegLiveness: true
diff --git a/llvm/test/CodeGen/AMDGPU/unallocatable-bundle-regression.mir b/llvm/test/CodeGen/AMDGPU/unallocatable-bundle-regression.mir
index b80c478c3761fc..9fae3aee766577 100644
--- a/llvm/test/CodeGen/AMDGPU/unallocatable-bundle-regression.mir
+++ b/llvm/test/CodeGen/AMDGPU/unallocatable-bundle-regression.mir
@@ -76,7 +76,7 @@ body: |
; CHECK-NEXT: renamable $vgpr12 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 renamable $vgpr5_vgpr6, killed renamable $sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67, killed renamable $sgpr96_sgpr97_sgpr98_sgpr99, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128))
; CHECK-NEXT: renamable $vgpr13 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 renamable $vgpr5_vgpr6, renamable $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75, renamable $sgpr4_sgpr5_sgpr6_sgpr7, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128))
; CHECK-NEXT: renamable $vgpr14 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 renamable $vgpr5_vgpr6, renamable $sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59, renamable $sgpr0_sgpr1_sgpr2_sgpr3, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128))
- ; CHECK-NEXT: renamable $sgpr8_sgpr9_sgpr10_sgpr11 = SI_SPILL_S128_RESTORE %stack.0, implicit $exec, implicit $sgpr32 :: (load (s128) from %stack.0, align 4, addrspace 5)
+ ; CHECK-NEXT: renamable $sgpr8_sgpr9_sgpr10_sgpr11 = SI_SPILL_S128_RESTORE %stack.0, 0, implicit $exec, implicit $sgpr32 :: (load (s128) from %stack.0, align 4, addrspace 5)
; CHECK-NEXT: renamable $vgpr1_vgpr2_vgpr3_vgpr4 = BUFFER_LOAD_FORMAT_XYZW_IDXEN renamable $vgpr0, renamable $sgpr8_sgpr9_sgpr10_sgpr11, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 7)
; CHECK-NEXT: KILL killed renamable $sgpr4_sgpr5_sgpr6_sgpr7
; CHECK-NEXT: KILL killed renamable $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75
diff --git a/llvm/test/CodeGen/AMDGPU/undefined-physreg-sgpr-spill.mir b/llvm/test/CodeGen/AMDGPU/undefined-physreg-sgpr-spill.mir
index f9853017b9d3fe..fa89df0ae7107a 100644
--- a/llvm/test/CodeGen/AMDGPU/undefined-physreg-sgpr-spill.mir
+++ b/llvm/test/CodeGen/AMDGPU/undefined-physreg-sgpr-spill.mir
@@ -68,7 +68,7 @@ body: |
successors:
liveins: $sgpr8_sgpr9_sgpr10_sgpr11, $sgpr13
- $sgpr0_sgpr1 = SI_SPILL_S64_RESTORE %stack.0, implicit $exec, implicit $sgpr8_sgpr9_sgpr10_sgpr11, implicit $sgpr13, implicit-def dead $m0 :: (load (s64) from %stack.0, align 4, addrspace 5)
+ $sgpr0_sgpr1 = SI_SPILL_S64_RESTORE %stack.0, 0, implicit $exec, implicit $sgpr8_sgpr9_sgpr10_sgpr11, implicit $sgpr13, implicit-def dead $m0 :: (load (s64) from %stack.0, align 4, addrspace 5)
$exec = S_OR_B64 $exec, killed $sgpr0_sgpr1, implicit-def $scc
bb.3:
@@ -133,7 +133,7 @@ body: |
successors:
liveins: $sgpr8_sgpr9_sgpr10_sgpr11, $sgpr13
- $sgpr0_sgpr1 = SI_SPILL_S64_RESTORE %stack.0, implicit $exec, implicit $sgpr8_sgpr9_sgpr10_sgpr11, implicit $sgpr13, implicit-def dead $m0 :: (load (s64) from %stack.0, align 4, addrspace 5)
+ $sgpr0_sgpr1 = SI_SPILL_S64_RESTORE %stack.0, 0, implicit $exec, implicit $sgpr8_sgpr9_sgpr10_sgpr11, implicit $sgpr13, implicit-def dead $m0 :: (load (s64) from %stack.0, align 4, addrspace 5)
$exec = S_OR_B64 $exec, killed $sgpr0_sgpr1, implicit-def $scc
bb.3:
diff --git a/llvm/test/CodeGen/AMDGPU/unexpected-reg-unit-state.mir b/llvm/test/CodeGen/AMDGPU/unexpected-reg-unit-state.mir
index 1652f313347457..746774308786ad 100644
--- a/llvm/test/CodeGen/AMDGPU/unexpected-reg-unit-state.mir
+++ b/llvm/test/CodeGen/AMDGPU/unexpected-reg-unit-state.mir
@@ -19,7 +19,7 @@ body: |
; CHECK-NEXT: renamable $sgpr4_sgpr5 = COPY $vcc
; CHECK-NEXT: SI_SPILL_S64_SAVE $sgpr4_sgpr5, %stack.0, implicit $exec, implicit $sgpr32 :: (store (s64) into %stack.0, align 4, addrspace 5)
; CHECK-NEXT: renamable $sgpr4_sgpr5 = COPY $vcc
- ; CHECK-NEXT: $vcc = SI_SPILL_S64_RESTORE %stack.0, implicit $exec, implicit $sgpr32 :: (load (s64) from %stack.0, align 4, addrspace 5)
+ ; CHECK-NEXT: $vcc = SI_SPILL_S64_RESTORE %stack.0, 0, implicit $exec, implicit $sgpr32 :: (load (s64) from %stack.0, align 4, addrspace 5)
; CHECK-NEXT: renamable $vgpr0 = V_CNDMASK_B32_e64 0, -1, 0, 3, killed $sgpr4_sgpr5, implicit $exec
; CHECK-NEXT: S_ENDPGM 0, implicit killed $vgpr0, implicit killed renamable $vcc
%0:vgpr_32 = COPY $vgpr0
diff --git a/llvm/test/CodeGen/AMDGPU/vector-spill-restore-to-other-vector-type.mir b/llvm/test/CodeGen/AMDGPU/vector-spill-restore-to-other-vector-type.mir
index 2fac3d29cb0dc0..c7f87359f8af55 100644
--- a/llvm/test/CodeGen/AMDGPU/vector-spill-restore-to-other-vector-type.mir
+++ b/llvm/test/CodeGen/AMDGPU/vector-spill-restore-to-other-vector-type.mir
@@ -25,7 +25,7 @@ body: |
; GCN-NEXT: $vgpr48_vgpr49_vgpr50 = SCRATCH_LOAD_DWORDX3_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr48_vgpr49_vgpr50_vgpr51 :: (load (s96) from %stack.0, align 4, addrspace 5)
; GCN-NEXT: S_ENDPGM 0, implicit $vgpr52, implicit $vgpr53, implicit $vgpr54, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47
SI_SPILL_A128_SAVE killed $agpr0_agpr1_agpr2_agpr3, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, addrspace 5)
- $vgpr48_vgpr49_vgpr50_vgpr51 = SI_SPILL_V128_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.0, align 4, addrspace 5)
+ $vgpr48_vgpr49_vgpr50_vgpr51 = SI_SPILL_V128_RESTORE %stack.0, $sgpr32, 0, 0, implicit $exec :: (load (s128) from %stack.0, align 4, addrspace 5)
S_ENDPGM 0, implicit $vgpr52, implicit $vgpr53, implicit $vgpr54, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47
...
@@ -52,7 +52,7 @@ body: |
; GCN-NEXT: $vgpr48_vgpr49 = SCRATCH_LOAD_DWORDX2_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr48_vgpr49_vgpr50_vgpr51 :: (load (s64) from %stack.0, align 4, addrspace 5)
; GCN-NEXT: S_ENDPGM 0, implicit $vgpr52, implicit $vgpr53, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47
SI_SPILL_A128_SAVE killed $agpr0_agpr1_agpr2_agpr3, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, addrspace 5)
- $vgpr48_vgpr49_vgpr50_vgpr51 = SI_SPILL_V128_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.0, align 4, addrspace 5)
+ $vgpr48_vgpr49_vgpr50_vgpr51 = SI_SPILL_V128_RESTORE %stack.0, $sgpr32, 0, 0, implicit $exec :: (load (s128) from %stack.0, align 4, addrspace 5)
S_ENDPGM 0, implicit $vgpr52, implicit $vgpr53, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47
...
@@ -81,7 +81,7 @@ body: |
; GCN-NEXT: $vgpr48 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr48_vgpr49_vgpr50_vgpr51 :: (load (s32) from %stack.0, addrspace 5)
; GCN-NEXT: S_ENDPGM 0, implicit $vgpr52, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47
SI_SPILL_A128_SAVE killed $agpr0_agpr1_agpr2_agpr3, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, addrspace 5)
- $vgpr48_vgpr49_vgpr50_vgpr51 = SI_SPILL_V128_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.0, align 4, addrspace 5)
+ $vgpr48_vgpr49_vgpr50_vgpr51 = SI_SPILL_V128_RESTORE %stack.0, $sgpr32, 0, 0, implicit $exec :: (load (s128) from %stack.0, align 4, addrspace 5)
S_ENDPGM 0, implicit $vgpr52, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47
...
@@ -110,7 +110,7 @@ body: |
; GCN-NEXT: $vgpr52 = COPY $vgpr3, implicit $vgpr52_vgpr53_vgpr54_vgpr55
; GCN-NEXT: S_ENDPGM 0
SI_SPILL_A128_SAVE killed $agpr0_agpr1_agpr2_agpr3, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, addrspace 5)
- $vgpr52_vgpr53_vgpr54_vgpr55 = SI_SPILL_V128_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.0, align 4, addrspace 5)
+ $vgpr52_vgpr53_vgpr54_vgpr55 = SI_SPILL_V128_RESTORE %stack.0, $sgpr32, 0, 0, implicit $exec :: (load (s128) from %stack.0, align 4, addrspace 5)
S_ENDPGM 0
...
@@ -135,7 +135,7 @@ body: |
; GCN-NEXT: $agpr26_agpr27_agpr28 = SCRATCH_LOAD_DWORDX3_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr26_agpr27_agpr28_agpr29 :: (load (s96) from %stack.0, align 4, addrspace 5)
; GCN-NEXT: S_ENDPGM 0, implicit $agpr31, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit $agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23, implicit $agpr24_agpr25
SI_SPILL_V128_SAVE killed $vgpr0_vgpr1_vgpr2_vgpr3, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, addrspace 5)
- $agpr26_agpr27_agpr28_agpr29 = SI_SPILL_A128_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.0, align 4, addrspace 5)
+ $agpr26_agpr27_agpr28_agpr29 = SI_SPILL_A128_RESTORE %stack.0, $sgpr32, 0, 0, implicit $exec :: (load (s128) from %stack.0, align 4, addrspace 5)
S_ENDPGM 0, implicit $agpr31, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit $agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23, implicit $agpr24_agpr25
...
@@ -162,7 +162,7 @@ body: |
; GCN-NEXT: $agpr26_agpr27 = SCRATCH_LOAD_DWORDX2_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr26_agpr27_agpr28_agpr29 :: (load (s64) from %stack.0, align 4, addrspace 5)
; GCN-NEXT: S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit $agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23, implicit $agpr24_agpr25
SI_SPILL_V128_SAVE killed $vgpr0_vgpr1_vgpr2_vgpr3, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, addrspace 5)
- $agpr26_agpr27_agpr28_agpr29 = SI_SPILL_A128_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.0, align 4, addrspace 5)
+ $agpr26_agpr27_agpr28_agpr29 = SI_SPILL_A128_RESTORE %stack.0, $sgpr32, 0, 0, implicit $exec :: (load (s128) from %stack.0, align 4, addrspace 5)
S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit $agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23, implicit $agpr24_agpr25
...
@@ -191,7 +191,7 @@ body: |
; GCN-NEXT: $agpr26 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr26_agpr27_agpr28_agpr29 :: (load (s32) from %stack.0, addrspace 5)
; GCN-NEXT: S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit $agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23, implicit $agpr24
SI_SPILL_V128_SAVE killed $vgpr0_vgpr1_vgpr2_vgpr3, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, addrspace 5)
- $agpr26_agpr27_agpr28_agpr29 = SI_SPILL_A128_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.0, align 4, addrspace 5)
+ $agpr26_agpr27_agpr28_agpr29 = SI_SPILL_A128_RESTORE %stack.0, $sgpr32, 0, 0, implicit $exec :: (load (s128) from %stack.0, align 4, addrspace 5)
S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit $agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23, implicit $agpr24
...
@@ -220,6 +220,6 @@ body: |
; GCN-NEXT: $agpr0 = COPY $agpr7, implicit $agpr0_agpr1_agpr2_agpr3
; GCN-NEXT: S_ENDPGM 0
SI_SPILL_V128_SAVE killed $vgpr0_vgpr1_vgpr2_vgpr3, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, addrspace 5)
- $agpr0_agpr1_agpr2_agpr3 = SI_SPILL_A128_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.0, align 4, addrspace 5)
+ $agpr0_agpr1_agpr2_agpr3 = SI_SPILL_A128_RESTORE %stack.0, $sgpr32, 0, 0, implicit $exec :: (load (s128) from %stack.0, align 4, addrspace 5)
S_ENDPGM 0
...
diff --git a/llvm/test/CodeGen/AMDGPU/vgpr-spill-dead-frame-in-dbg-value.mir b/llvm/test/CodeGen/AMDGPU/vgpr-spill-dead-frame-in-dbg-value.mir
index 7c559f74f949a7..bbfa21cd73d079 100644
--- a/llvm/test/CodeGen/AMDGPU/vgpr-spill-dead-frame-in-dbg-value.mir
+++ b/llvm/test/CodeGen/AMDGPU/vgpr-spill-dead-frame-in-dbg-value.mir
@@ -52,5 +52,5 @@ body: |
DBG_VALUE %stack.0, 0, !1, !8, debug-location !9
bb.1:
- renamable $vgpr2 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, align 4, addrspace 5)
+ renamable $vgpr2 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, 0, implicit $exec :: (load (s32) from %stack.0, align 4, addrspace 5)
S_ENDPGM 0
diff --git a/llvm/test/CodeGen/AMDGPU/vgpr-spill-fi-skip-processing-stack-arg-dbg-value.mir b/llvm/test/CodeGen/AMDGPU/vgpr-spill-fi-skip-processing-stack-arg-dbg-value.mir
index 2058a94b0614a8..22b4f61bfebca0 100644
--- a/llvm/test/CodeGen/AMDGPU/vgpr-spill-fi-skip-processing-stack-arg-dbg-value.mir
+++ b/llvm/test/CodeGen/AMDGPU/vgpr-spill-fi-skip-processing-stack-arg-dbg-value.mir
@@ -52,5 +52,5 @@ body: |
DBG_VALUE %fixed-stack.0, 0, !1, !8, debug-location !9
bb.1:
- renamable $vgpr2 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, align 4, addrspace 5)
+ renamable $vgpr2 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, 0, implicit $exec :: (load (s32) from %stack.0, align 4, addrspace 5)
S_ENDPGM 0
diff --git a/llvm/test/CodeGen/AMDGPU/vgpr-spill-scc-clobber.mir b/llvm/test/CodeGen/AMDGPU/vgpr-spill-scc-clobber.mir
index 93b98df2f7dba3..aa6fe80403b8b4 100644
--- a/llvm/test/CodeGen/AMDGPU/vgpr-spill-scc-clobber.mir
+++ b/llvm/test/CodeGen/AMDGPU/vgpr-spill-scc-clobber.mir
@@ -282,7 +282,7 @@ body: |
bb.0:
liveins:
S_CMP_EQ_U32 0, 0, implicit-def $scc
- $vgpr0 = SI_SPILL_V32_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.1, align 4, addrspace 5)
+ $vgpr0 = SI_SPILL_V32_RESTORE %stack.1, $sgpr32, 0, 0, implicit $exec :: (load (s32) from %stack.1, align 4, addrspace 5)
S_CBRANCH_SCC1 %bb.2, implicit $scc
bb.1:
@@ -375,7 +375,7 @@ body: |
bb.0:
liveins:
S_CMP_EQ_U32 0, 0, implicit-def $scc
- $vgpr0_vgpr1 = SI_SPILL_V64_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s64) from %stack.1, align 4, addrspace 5)
+ $vgpr0_vgpr1 = SI_SPILL_V64_RESTORE %stack.1, $sgpr32, 0, 0, implicit $exec :: (load (s64) from %stack.1, align 4, addrspace 5)
S_CBRANCH_SCC1 %bb.2, implicit $scc
bb.1:
@@ -483,7 +483,7 @@ body: |
liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
S_CMP_EQ_U32 0, 0, implicit-def $scc
- $vgpr0 = SI_SPILL_V32_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.1, align 4, addrspace 5)
+ $vgpr0 = SI_SPILL_V32_RESTORE %stack.1, $sgpr32, 0, 0, implicit $exec :: (load (s32) from %stack.1, align 4, addrspace 5)
S_CBRANCH_SCC1 %bb.2, implicit $scc
bb.1:
@@ -594,7 +594,7 @@ body: |
liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
S_CMP_EQ_U32 0, 0, implicit-def $scc
- $vgpr0_vgpr1 = SI_SPILL_V64_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s64) from %stack.1, align 4, addrspace 5)
+ $vgpr0_vgpr1 = SI_SPILL_V64_RESTORE %stack.1, $sgpr32, 0, 0, implicit $exec :: (load (s64) from %stack.1, align 4, addrspace 5)
S_CBRANCH_SCC1 %bb.2, implicit $scc
bb.1:
@@ -707,7 +707,7 @@ body: |
liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
S_CMP_EQ_U32 0, 0, implicit-def $scc
- $vgpr0_vgpr1_vgpr2 = SI_SPILL_V96_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s96) from %stack.1, align 4, addrspace 5)
+ $vgpr0_vgpr1_vgpr2 = SI_SPILL_V96_RESTORE %stack.1, $sgpr32, 0, 0, implicit $exec :: (load (s96) from %stack.1, align 4, addrspace 5)
S_CBRANCH_SCC1 %bb.2, implicit $scc
bb.1:
diff --git a/llvm/test/CodeGen/AMDGPU/wwm-spill-superclass-pseudo.mir b/llvm/test/CodeGen/AMDGPU/wwm-spill-superclass-pseudo.mir
index bdf9d88bbd8bb7..c1223cc8e99d9b 100644
--- a/llvm/test/CodeGen/AMDGPU/wwm-spill-superclass-pseudo.mir
+++ b/llvm/test/CodeGen/AMDGPU/wwm-spill-superclass-pseudo.mir
@@ -74,7 +74,7 @@ body: |
bb.7:
liveins: $sgpr60_sgpr61
$exec = S_OR_B64 $exec, killed $sgpr60_sgpr61, implicit-def $scc
- $sgpr2_sgpr3 = SI_SPILL_S64_RESTORE %stack.0, implicit $exec, implicit $sgpr32
+ $sgpr2_sgpr3 = SI_SPILL_S64_RESTORE %stack.0, 0, implicit $exec, implicit $sgpr32
S_BRANCH %bb.5
bb.8:
>From ca6845ae0509114f5e11e863dedf2716fa657766 Mon Sep 17 00:00:00 2001
From: Mirko Brkusanin <Mirko.Brkusanin at amd.com>
Date: Thu, 14 Dec 2023 19:13:31 +0100
Subject: [PATCH 2/9] [AMDGPU] Add mark last scratch load pass
---
llvm/include/llvm/CodeGen/TargetPassConfig.h | 3 +
llvm/lib/CodeGen/TargetPassConfig.cpp | 2 +
llvm/lib/Target/AMDGPU/AMDGPU.h | 3 +
.../AMDGPU/AMDGPUMarkLastScratchLoad.cpp | 143 +++++++++
.../lib/Target/AMDGPU/AMDGPUTargetMachine.cpp | 6 +
llvm/lib/Target/AMDGPU/CMakeLists.txt | 1 +
llvm/test/CodeGen/AMDGPU/llc-pipeline.ll | 4 +
.../CodeGen/AMDGPU/sgpr-regalloc-flags.ll | 4 +
.../AMDGPU/vgpr-mark-last-scratch-load.ll | 231 +++++++++++++
.../AMDGPU/vgpr-mark-last-scratch-load.mir | 303 ++++++++++++++++++
10 files changed, 700 insertions(+)
create mode 100644 llvm/lib/Target/AMDGPU/AMDGPUMarkLastScratchLoad.cpp
create mode 100644 llvm/test/CodeGen/AMDGPU/vgpr-mark-last-scratch-load.ll
create mode 100644 llvm/test/CodeGen/AMDGPU/vgpr-mark-last-scratch-load.mir
diff --git a/llvm/include/llvm/CodeGen/TargetPassConfig.h b/llvm/include/llvm/CodeGen/TargetPassConfig.h
index 66365419aa330b..67c50236832f21 100644
--- a/llvm/include/llvm/CodeGen/TargetPassConfig.h
+++ b/llvm/include/llvm/CodeGen/TargetPassConfig.h
@@ -423,6 +423,9 @@ class TargetPassConfig : public ImmutablePass {
/// to physical registers.
virtual void addPostRewrite() { }
+ /// Add passes to be run immediately before Stack Slot Coloring pass.
+ virtual void addPreStackSlotColoring() {}
+
/// This method may be implemented by targets that want to run passes after
/// register allocation pass pipeline but before prolog-epilog insertion.
virtual void addPostRegAlloc() { }
diff --git a/llvm/lib/CodeGen/TargetPassConfig.cpp b/llvm/lib/CodeGen/TargetPassConfig.cpp
index 4003a08a5422dd..d61d522cb42e1c 100644
--- a/llvm/lib/CodeGen/TargetPassConfig.cpp
+++ b/llvm/lib/CodeGen/TargetPassConfig.cpp
@@ -1502,6 +1502,8 @@ void TargetPassConfig::addOptimizedRegAlloc() {
addPass(&MachineSchedulerID);
if (addRegAssignAndRewriteOptimized()) {
+ addPreStackSlotColoring();
+
// Perform stack slot coloring and post-ra machine LICM.
addPass(&StackSlotColoringID);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h
index 35d33cb60bc47c..36af767a70b0a8 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.h
@@ -166,6 +166,9 @@ extern char &SILowerI1CopiesID;
void initializeAMDGPUGlobalISelDivergenceLoweringPass(PassRegistry &);
extern char &AMDGPUGlobalISelDivergenceLoweringID;
+void initializeAMDGPUMarkLastScratchLoadPass(PassRegistry &);
+extern char &AMDGPUMarkLastScratchLoadID;
+
void initializeSILowerSGPRSpillsPass(PassRegistry &);
extern char &SILowerSGPRSpillsID;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMarkLastScratchLoad.cpp b/llvm/lib/Target/AMDGPU/AMDGPUMarkLastScratchLoad.cpp
new file mode 100644
index 00000000000000..4c9d417760bd36
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/AMDGPUMarkLastScratchLoad.cpp
@@ -0,0 +1,143 @@
+//===-- AMDGPUMarkLastScratchLoad.cpp -------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Mark scratch load/spill instructions which are guaranteed to be the last time
+// this scratch slot is used so it can be evicted from caches.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "GCNSubtarget.h"
+#include "llvm/CodeGen/LiveStacks.h"
+#include "llvm/CodeGen/MachineOperand.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "amdgpu-mark-last-scratch-load"
+
+namespace {
+
+class AMDGPUMarkLastScratchLoad : public MachineFunctionPass {
+private:
+ LiveStacks *LS = nullptr;
+ SlotIndexes *SI = nullptr;
+ const SIInstrInfo *SII = nullptr;
+
+public:
+ static char ID;
+
+ AMDGPUMarkLastScratchLoad() : MachineFunctionPass(ID) {
+ initializeAMDGPUMarkLastScratchLoadPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<SlotIndexes>();
+ AU.addRequired<LiveStacks>();
+ AU.setPreservesAll();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ StringRef getPassName() const override {
+ return "AMDGPU Mark Last Scratch Load";
+ }
+};
+
+} // end anonymous namespace
+
+bool AMDGPUMarkLastScratchLoad::runOnMachineFunction(MachineFunction &MF) {
+ LLVM_DEBUG({
+ dbgs() << "********** Mark Last Scratch Load **********\n"
+ << "********** Function: " << MF.getName() << '\n';
+ });
+
+ if (skipFunction(MF.getFunction()))
+ return false;
+
+ const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
+ if (ST.getGeneration() < AMDGPUSubtarget::GFX12)
+ return false;
+
+ LS = &getAnalysis<LiveStacks>();
+ SI = &getAnalysis<SlotIndexes>();
+ SII = ST.getInstrInfo();
+
+ const unsigned NumSlots = LS->getNumIntervals();
+ if (NumSlots == 0) {
+ LLVM_DEBUG(dbgs() << "No live slots, skipping\n");
+ return false;
+ }
+
+ LLVM_DEBUG(dbgs() << LS->getNumIntervals() << " intervals\n");
+
+ bool Changed = false;
+
+ for (auto &[SS, LI] : *LS) {
+ LLVM_DEBUG(dbgs() << "Checking interval: " << LI << "\n");
+
+ for (const LiveRange::Segment &Segment : LI.segments) {
+ LLVM_DEBUG(dbgs() << " Checking segment: " << Segment << "\n");
+
+ // Ignore segments that run to the end of basic block because in this case
+ // slot is still live at the end of it.
+ if (Segment.end.isBlock())
+ continue;
+
+ const int FrameIndex = Register::stackSlot2Index(LI.reg());
+ MachineInstr *LastLoad = nullptr;
+
+ MachineInstr *MISegmentStart = SI->getInstructionFromIndex(Segment.start);
+ MachineInstr *MISegmentEnd = SI->getInstructionFromIndex(Segment.end);
+ if (!MISegmentEnd) {
+ // FIXME: The start and end can refer to deleted instructions. We should
+ // be able to handle this more gracefully by finding the closest real
+ // instructions.
+ continue;
+ }
+ MachineBasicBlock *BB = MISegmentEnd->getParent();
+
+ // Start iteration backwards from segment end until the start of basic
+ // block or start of segment if it is in the same basic block.
+ auto End = BB->instr_rend();
+ if (MISegmentStart && MISegmentStart->getParent() == BB)
+ End = MISegmentStart->getReverseIterator();
+
+ for (auto MI = MISegmentEnd->getReverseIterator(); MI != End; ++MI) {
+ int LoadFI = 0;
+
+ if (SII->isLoadFromStackSlot(*MI, LoadFI) && LoadFI == FrameIndex) {
+ LastLoad = &*MI;
+ break;
+ }
+ }
+
+ if (LastLoad) {
+ MachineOperand *LastUse =
+ SII->getNamedOperand(*LastLoad, AMDGPU::OpName::last_use);
+ assert(LastUse && "This instruction must have a last_use operand");
+ LastUse->setImm(1);
+ Changed = true;
+ LLVM_DEBUG(dbgs() << " Found last load: " << *LastLoad;);
+ }
+ }
+ }
+
+ return Changed;
+}
+
+char AMDGPUMarkLastScratchLoad::ID = 0;
+
+char &llvm::AMDGPUMarkLastScratchLoadID = AMDGPUMarkLastScratchLoad::ID;
+
+INITIALIZE_PASS_BEGIN(AMDGPUMarkLastScratchLoad, DEBUG_TYPE,
+ "AMDGPU Mark last scratch load", false, false)
+INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
+INITIALIZE_PASS_DEPENDENCY(LiveStacks)
+INITIALIZE_PASS_END(AMDGPUMarkLastScratchLoad, DEBUG_TYPE,
+ "AMDGPU Mark last scratch load", false, false)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 0f3bb3e7b0d8d0..c592f764c8295d 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -382,6 +382,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {
initializeSILowerI1CopiesPass(*PR);
initializeAMDGPUGlobalISelDivergenceLoweringPass(*PR);
initializeSILowerWWMCopiesPass(*PR);
+ initializeAMDGPUMarkLastScratchLoadPass(*PR);
initializeSILowerSGPRSpillsPass(*PR);
initializeSIFixSGPRCopiesPass(*PR);
initializeSIFixVGPRCopiesPass(*PR);
@@ -962,6 +963,7 @@ class GCNPassConfig final : public AMDGPUPassConfig {
void addPreRegAlloc() override;
bool addPreRewrite() override;
+ void addPreStackSlotColoring() override;
void addPostRegAlloc() override;
void addPreSched2() override;
void addPreEmitPass() override;
@@ -1346,6 +1348,10 @@ bool GCNPassConfig::addPreRewrite() {
return true;
}
+void GCNPassConfig::addPreStackSlotColoring() {
+ addPass(&AMDGPUMarkLastScratchLoadID);
+}
+
FunctionPass *GCNPassConfig::createSGPRAllocPass(bool Optimized) {
// Initialize the global default.
llvm::call_once(InitializeDefaultSGPRRegisterAllocatorFlag,
diff --git a/llvm/lib/Target/AMDGPU/CMakeLists.txt b/llvm/lib/Target/AMDGPU/CMakeLists.txt
index 2c92e7a0738855..9a974eaf50d235 100644
--- a/llvm/lib/Target/AMDGPU/CMakeLists.txt
+++ b/llvm/lib/Target/AMDGPU/CMakeLists.txt
@@ -79,6 +79,7 @@ add_llvm_target(AMDGPUCodeGen
AMDGPUMCInstLower.cpp
AMDGPUIGroupLP.cpp
AMDGPUInsertSingleUseVDST.cpp
+ AMDGPUMarkLastScratchLoad.cpp
AMDGPUMIRFormatter.cpp
AMDGPUOpenCLEnqueuedBlockLowering.cpp
AMDGPUPerfHintAnalysis.cpp
diff --git a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll
index 8b0b6263832243..48f00a82e3e1c6 100644
--- a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll
+++ b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll
@@ -359,6 +359,7 @@
; GCN-O1-NEXT: SI Lower WWM Copies
; GCN-O1-NEXT: GCN NSA Reassign
; GCN-O1-NEXT: Virtual Register Rewriter
+; GCN-O1-NEXT: AMDGPU Mark Last Scratch Load
; GCN-O1-NEXT: Stack Slot Coloring
; GCN-O1-NEXT: Machine Copy Propagation Pass
; GCN-O1-NEXT: Machine Loop Invariant Code Motion
@@ -655,6 +656,7 @@
; GCN-O1-OPTS-NEXT: SI Lower WWM Copies
; GCN-O1-OPTS-NEXT: GCN NSA Reassign
; GCN-O1-OPTS-NEXT: Virtual Register Rewriter
+; GCN-O1-OPTS-NEXT: AMDGPU Mark Last Scratch Load
; GCN-O1-OPTS-NEXT: Stack Slot Coloring
; GCN-O1-OPTS-NEXT: Machine Copy Propagation Pass
; GCN-O1-OPTS-NEXT: Machine Loop Invariant Code Motion
@@ -957,6 +959,7 @@
; GCN-O2-NEXT: SI Lower WWM Copies
; GCN-O2-NEXT: GCN NSA Reassign
; GCN-O2-NEXT: Virtual Register Rewriter
+; GCN-O2-NEXT: AMDGPU Mark Last Scratch Load
; GCN-O2-NEXT: Stack Slot Coloring
; GCN-O2-NEXT: Machine Copy Propagation Pass
; GCN-O2-NEXT: Machine Loop Invariant Code Motion
@@ -1271,6 +1274,7 @@
; GCN-O3-NEXT: SI Lower WWM Copies
; GCN-O3-NEXT: GCN NSA Reassign
; GCN-O3-NEXT: Virtual Register Rewriter
+; GCN-O3-NEXT: AMDGPU Mark Last Scratch Load
; GCN-O3-NEXT: Stack Slot Coloring
; GCN-O3-NEXT: Machine Copy Propagation Pass
; GCN-O3-NEXT: Machine Loop Invariant Code Motion
diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-regalloc-flags.ll b/llvm/test/CodeGen/AMDGPU/sgpr-regalloc-flags.ll
index 7c8507fe4559fb..17a19116735e4e 100644
--- a/llvm/test/CodeGen/AMDGPU/sgpr-regalloc-flags.ll
+++ b/llvm/test/CodeGen/AMDGPU/sgpr-regalloc-flags.ll
@@ -25,6 +25,7 @@
; DEFAULT-NEXT: SI Lower WWM Copies
; DEFAULT-NEXT: GCN NSA Reassign
; DEFAULT-NEXT: Virtual Register Rewriter
+; DEFAULT-NEXT: AMDGPU Mark Last Scratch Load
; DEFAULT-NEXT: Stack Slot Coloring
; O0: Fast Register Allocator
@@ -61,6 +62,7 @@
; BASIC-DEFAULT-NEXT: SI Lower WWM Copies
; BASIC-DEFAULT-NEXT: GCN NSA Reassign
; BASIC-DEFAULT-NEXT: Virtual Register Rewriter
+; BASIC-DEFAULT-NEXT: AMDGPU Mark Last Scratch Load
; BASIC-DEFAULT-NEXT: Stack Slot Coloring
@@ -75,6 +77,7 @@
; DEFAULT-BASIC-NEXT: SI Lower WWM Copies
; DEFAULT-BASIC-NEXT: GCN NSA Reassign
; DEFAULT-BASIC-NEXT: Virtual Register Rewriter
+; DEFAULT-BASIC-NEXT: AMDGPU Mark Last Scratch Load
; DEFAULT-BASIC-NEXT: Stack Slot Coloring
@@ -95,6 +98,7 @@
; BASIC-BASIC-NEXT: SI Lower WWM Copies
; BASIC-BASIC-NEXT: GCN NSA Reassign
; BASIC-BASIC-NEXT: Virtual Register Rewriter
+; BASIC-BASIC-NEXT: AMDGPU Mark Last Scratch Load
; BASIC-BASIC-NEXT: Stack Slot Coloring
diff --git a/llvm/test/CodeGen/AMDGPU/vgpr-mark-last-scratch-load.ll b/llvm/test/CodeGen/AMDGPU/vgpr-mark-last-scratch-load.ll
new file mode 100644
index 00000000000000..57ced76e13a3cd
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/vgpr-mark-last-scratch-load.ll
@@ -0,0 +1,231 @@
+; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 3
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -o - %s --stop-after=amdgpu-mark-last-scratch-load | FileCheck -check-prefix=CHECK %s
+
+define amdgpu_cs void @max_6_vgprs(ptr addrspace(1) %p) "amdgpu-num-vgpr"="6" {
+ ; CHECK-LABEL: name: max_6_vgprs
+ ; CHECK: bb.0 (%ir-block.0):
+ ; CHECK-NEXT: liveins: $vgpr0, $vgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: renamable $vgpr2 = GLOBAL_LOAD_DWORD undef renamable $vgpr0_vgpr1, 0, 0, implicit $exec :: (volatile "amdgpu-noclobber" load (s32) from `ptr addrspace(1) undef`, addrspace 1)
+ ; CHECK-NEXT: renamable $vgpr3 = V_ASHRREV_I32_e64 31, $vgpr2, implicit $exec
+ ; CHECK-NEXT: renamable $vgpr2_vgpr3 = V_LSHLREV_B64_pseudo_e64 2, killed $vgpr2_vgpr3, implicit $exec
+ ; CHECK-NEXT: renamable $vgpr0, renamable $vcc_lo = V_ADD_CO_U32_e64 killed $vgpr0, $vgpr2, 0, implicit $exec
+ ; CHECK-NEXT: renamable $vgpr1, dead renamable $vcc_lo = V_ADDC_U32_e64 killed $vgpr1, killed $vgpr3, killed $vcc_lo, 0, implicit $exec
+ ; CHECK-NEXT: renamable $vgpr5 = GLOBAL_LOAD_DWORD renamable $vgpr0_vgpr1, 0, 0, implicit $exec :: (volatile load (s32) from %ir.p1, addrspace 1)
+ ; CHECK-NEXT: renamable $vgpr2 = GLOBAL_LOAD_DWORD renamable $vgpr0_vgpr1, 16, 0, implicit $exec :: (volatile load (s32) from %ir.p2, addrspace 1)
+ ; CHECK-NEXT: SI_SPILL_V32_SAVE killed $vgpr2, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
+ ; CHECK-NEXT: renamable $vgpr2 = GLOBAL_LOAD_DWORD renamable $vgpr0_vgpr1, 48, 0, implicit $exec :: (volatile load (s32) from %ir.p3, addrspace 1)
+ ; CHECK-NEXT: SI_SPILL_V32_SAVE killed $vgpr2, %stack.1, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
+ ; CHECK-NEXT: renamable $vgpr2 = GLOBAL_LOAD_DWORD renamable $vgpr0_vgpr1, 96, 0, implicit $exec :: (volatile load (s32) from %ir.p4, addrspace 1)
+ ; CHECK-NEXT: SI_SPILL_V32_SAVE killed $vgpr2, %stack.2, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
+ ; CHECK-NEXT: renamable $vgpr0 = GLOBAL_LOAD_DWORD killed renamable $vgpr0_vgpr1, 160, 0, implicit $exec :: (volatile load (s32) from %ir.p5, addrspace 1)
+ ; CHECK-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.3, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5)
+ ; CHECK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def dead early-clobber $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4
+ ; CHECK-NEXT: GLOBAL_STORE_DWORD undef renamable $vgpr0_vgpr1, killed renamable $vgpr5, 0, 0, implicit $exec :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1)
+ ; CHECK-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, 1, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
+ ; CHECK-NEXT: GLOBAL_STORE_DWORD undef renamable $vgpr0_vgpr1, killed renamable $vgpr0, 0, 0, implicit $exec :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1)
+ ; CHECK-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.1, $sgpr32, 0, 1, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
+ ; CHECK-NEXT: GLOBAL_STORE_DWORD undef renamable $vgpr0_vgpr1, killed renamable $vgpr0, 0, 0, implicit $exec :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1)
+ ; CHECK-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.2, $sgpr32, 0, 1, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
+ ; CHECK-NEXT: GLOBAL_STORE_DWORD undef renamable $vgpr0_vgpr1, killed renamable $vgpr0, 0, 0, implicit $exec :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1)
+ ; CHECK-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.3, $sgpr32, 0, 1, implicit $exec :: (load (s32) from %stack.3, addrspace 5)
+ ; CHECK-NEXT: GLOBAL_STORE_DWORD undef renamable $vgpr0_vgpr1, killed renamable $vgpr0, 0, 0, implicit $exec :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1)
+ ; CHECK-NEXT: S_ENDPGM 0
+ %tid = load volatile i32, ptr addrspace(1) undef
+ %p1 = getelementptr inbounds i32, ptr addrspace(1) %p, i32 %tid
+ %p2 = getelementptr inbounds i32, ptr addrspace(1) %p1, i32 4
+ %p3 = getelementptr inbounds i32, ptr addrspace(1) %p2, i32 8
+ %p4 = getelementptr inbounds i32, ptr addrspace(1) %p3, i32 12
+ %p5 = getelementptr inbounds i32, ptr addrspace(1) %p4, i32 16
+ %v1 = load volatile i32, ptr addrspace(1) %p1
+ %v2 = load volatile i32, ptr addrspace(1) %p2
+ %v3 = load volatile i32, ptr addrspace(1) %p3
+ %v4 = load volatile i32, ptr addrspace(1) %p4
+ %v5 = load volatile i32, ptr addrspace(1) %p5
+ call void asm sideeffect "", "~{v[0:4]}" ()
+ store volatile i32 %v1, ptr addrspace(1) undef
+ store volatile i32 %v2, ptr addrspace(1) undef
+ store volatile i32 %v3, ptr addrspace(1) undef
+ store volatile i32 %v4, ptr addrspace(1) undef
+ store volatile i32 %v5, ptr addrspace(1) undef
+ ret void
+}
+
+define amdgpu_cs void @max_11_vgprs_branch(ptr addrspace(1) %p, i32 %tmp) "amdgpu-num-vgpr"="11" {
+ ; CHECK-LABEL: name: max_11_vgprs_branch
+ ; CHECK: bb.0..entry:
+ ; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.1(0x40000000)
+ ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: renamable $vgpr3 = GLOBAL_LOAD_DWORD undef renamable $vgpr0_vgpr1, 0, 0, implicit $exec :: (volatile "amdgpu-noclobber" load (s32) from `ptr addrspace(1) undef`, addrspace 1)
+ ; CHECK-NEXT: renamable $vgpr4 = V_ASHRREV_I32_e64 31, $vgpr3, implicit $exec
+ ; CHECK-NEXT: renamable $vgpr3_vgpr4 = V_LSHLREV_B64_pseudo_e64 2, killed $vgpr3_vgpr4, implicit $exec
+ ; CHECK-NEXT: renamable $vgpr0, renamable $vcc_lo = V_ADD_CO_U32_e64 killed $vgpr0, $vgpr3, 0, implicit $exec
+ ; CHECK-NEXT: renamable $vgpr1, dead renamable $vcc_lo = V_ADDC_U32_e64 killed $vgpr1, killed $vgpr4, killed $vcc_lo, 0, implicit $exec
+ ; CHECK-NEXT: renamable $vgpr3 = GLOBAL_LOAD_DWORD renamable $vgpr0_vgpr1, 336, 0, implicit $exec :: (volatile load (s32) from %ir.p7, addrspace 1)
+ ; CHECK-NEXT: SI_SPILL_V32_SAVE killed $vgpr3, %stack.2, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
+ ; CHECK-NEXT: renamable $vgpr3 = GLOBAL_LOAD_DWORD renamable $vgpr0_vgpr1, 448, 0, implicit $exec :: (volatile load (s32) from %ir.p8, addrspace 1)
+ ; CHECK-NEXT: SI_SPILL_V32_SAVE killed $vgpr3, %stack.3, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5)
+ ; CHECK-NEXT: renamable $vgpr3 = GLOBAL_LOAD_DWORD renamable $vgpr0_vgpr1, 576, 0, implicit $exec :: (volatile load (s32) from %ir.p9, addrspace 1)
+ ; CHECK-NEXT: SI_SPILL_V32_SAVE killed $vgpr3, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
+ ; CHECK-NEXT: renamable $vgpr3 = GLOBAL_LOAD_DWORD renamable $vgpr0_vgpr1, 720, 0, implicit $exec :: (volatile load (s32) from %ir.p10, addrspace 1)
+ ; CHECK-NEXT: SI_SPILL_V32_SAVE killed $vgpr3, %stack.1, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
+ ; CHECK-NEXT: renamable $vcc_lo = V_CMP_EQ_U32_e64 0, killed $vgpr2, implicit $exec
+ ; CHECK-NEXT: renamable $sgpr0 = COPY $exec_lo, implicit-def $exec_lo
+ ; CHECK-NEXT: renamable $sgpr1 = S_AND_B32 renamable $sgpr0, killed renamable $vcc_lo, implicit-def dead $scc
+ ; CHECK-NEXT: renamable $sgpr0 = S_XOR_B32 renamable $sgpr1, killed renamable $sgpr0, implicit-def dead $scc
+ ; CHECK-NEXT: $exec_lo = S_MOV_B32_term killed renamable $sgpr1
+ ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.1, implicit $exec
+ ; CHECK-NEXT: S_BRANCH %bb.3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1.Flow:
+ ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.4(0x40000000)
+ ; CHECK-NEXT: liveins: $sgpr0, $vgpr0_vgpr1:0x000000000000000F
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: renamable $sgpr0 = S_OR_SAVEEXEC_B32 killed renamable $sgpr0, implicit-def $exec, implicit-def $scc, implicit $exec
+ ; CHECK-NEXT: $exec_lo = S_XOR_B32_term $exec_lo, renamable $sgpr0, implicit-def $scc
+ ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.4, implicit $exec
+ ; CHECK-NEXT: S_BRANCH %bb.2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2..true:
+ ; CHECK-NEXT: successors: %bb.4(0x80000000)
+ ; CHECK-NEXT: liveins: $sgpr0, $vgpr0_vgpr1:0x000000000000000F
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: renamable $vgpr10 = GLOBAL_LOAD_DWORD renamable $vgpr0_vgpr1, 0, 0, implicit $exec :: (volatile load (s32) from %ir.p1, addrspace 1)
+ ; CHECK-NEXT: renamable $vgpr2 = GLOBAL_LOAD_DWORD renamable $vgpr0_vgpr1, 16, 0, implicit $exec :: (volatile load (s32) from %ir.sunkaddr, addrspace 1)
+ ; CHECK-NEXT: SI_SPILL_V32_SAVE killed $vgpr2, %stack.9, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.9, addrspace 5)
+ ; CHECK-NEXT: renamable $vgpr2 = GLOBAL_LOAD_DWORD renamable $vgpr0_vgpr1, 48, 0, implicit $exec :: (volatile load (s32) from %ir.sunkaddr1, addrspace 1)
+ ; CHECK-NEXT: SI_SPILL_V32_SAVE killed $vgpr2, %stack.10, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.10, addrspace 5)
+ ; CHECK-NEXT: renamable $vgpr2 = GLOBAL_LOAD_DWORD renamable $vgpr0_vgpr1, 96, 0, implicit $exec :: (volatile load (s32) from %ir.sunkaddr2, addrspace 1)
+ ; CHECK-NEXT: SI_SPILL_V32_SAVE killed $vgpr2, %stack.11, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.11, addrspace 5)
+ ; CHECK-NEXT: renamable $vgpr2 = GLOBAL_LOAD_DWORD renamable $vgpr0_vgpr1, 160, 0, implicit $exec :: (volatile load (s32) from %ir.sunkaddr3, addrspace 1)
+ ; CHECK-NEXT: SI_SPILL_V32_SAVE killed $vgpr2, %stack.12, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.12, addrspace 5)
+ ; CHECK-NEXT: renamable $vgpr0 = GLOBAL_LOAD_DWORD killed renamable $vgpr0_vgpr1, 240, 0, implicit $exec :: (volatile load (s32) from %ir.sunkaddr4, addrspace 1)
+ ; CHECK-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.13, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.13, addrspace 5)
+ ; CHECK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def dead early-clobber $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9
+ ; CHECK-NEXT: GLOBAL_STORE_DWORD undef renamable $vgpr0_vgpr1, killed renamable $vgpr10, 0, 0, implicit $exec :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1)
+ ; CHECK-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.9, $sgpr32, 0, 1, implicit $exec :: (load (s32) from %stack.9, addrspace 5)
+ ; CHECK-NEXT: GLOBAL_STORE_DWORD undef renamable $vgpr0_vgpr1, killed renamable $vgpr0, 0, 0, implicit $exec :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1)
+ ; CHECK-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.10, $sgpr32, 0, 1, implicit $exec :: (load (s32) from %stack.10, addrspace 5)
+ ; CHECK-NEXT: GLOBAL_STORE_DWORD undef renamable $vgpr0_vgpr1, killed renamable $vgpr0, 0, 0, implicit $exec :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1)
+ ; CHECK-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.11, $sgpr32, 0, 1, implicit $exec :: (load (s32) from %stack.11, addrspace 5)
+ ; CHECK-NEXT: GLOBAL_STORE_DWORD undef renamable $vgpr0_vgpr1, killed renamable $vgpr0, 0, 0, implicit $exec :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1)
+ ; CHECK-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.12, $sgpr32, 0, 1, implicit $exec :: (load (s32) from %stack.12, addrspace 5)
+ ; CHECK-NEXT: GLOBAL_STORE_DWORD undef renamable $vgpr0_vgpr1, killed renamable $vgpr0, 0, 0, implicit $exec :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1)
+ ; CHECK-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.13, $sgpr32, 0, 1, implicit $exec :: (load (s32) from %stack.13, addrspace 5)
+ ; CHECK-NEXT: GLOBAL_STORE_DWORD undef renamable $vgpr0_vgpr1, killed renamable $vgpr0, 0, 0, implicit $exec :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1)
+ ; CHECK-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.2, $sgpr32, 0, 1, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
+ ; CHECK-NEXT: GLOBAL_STORE_DWORD undef renamable $vgpr0_vgpr1, killed renamable $vgpr0, 0, 0, implicit $exec :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1)
+ ; CHECK-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.3, $sgpr32, 0, 1, implicit $exec :: (load (s32) from %stack.3, addrspace 5)
+ ; CHECK-NEXT: GLOBAL_STORE_DWORD undef renamable $vgpr0_vgpr1, killed renamable $vgpr0, 0, 0, implicit $exec :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1)
+ ; CHECK-NEXT: S_BRANCH %bb.4
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.3..false:
+ ; CHECK-NEXT: successors: %bb.1(0x80000000)
+ ; CHECK-NEXT: liveins: $sgpr0, $vgpr0_vgpr1:0x000000000000000F
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: renamable $vgpr10 = GLOBAL_LOAD_DWORD renamable $vgpr0_vgpr1, 0, 0, implicit $exec :: (volatile load (s32) from %ir.p1, addrspace 1)
+ ; CHECK-NEXT: renamable $vgpr2 = GLOBAL_LOAD_DWORD renamable $vgpr0_vgpr1, 16, 0, implicit $exec :: (volatile load (s32) from %ir.sunkaddr5, addrspace 1)
+ ; CHECK-NEXT: SI_SPILL_V32_SAVE killed $vgpr2, %stack.4, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.4, addrspace 5)
+ ; CHECK-NEXT: renamable $vgpr2 = GLOBAL_LOAD_DWORD renamable $vgpr0_vgpr1, 48, 0, implicit $exec :: (volatile load (s32) from %ir.sunkaddr6, addrspace 1)
+ ; CHECK-NEXT: SI_SPILL_V32_SAVE killed $vgpr2, %stack.5, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.5, addrspace 5)
+ ; CHECK-NEXT: renamable $vgpr2 = GLOBAL_LOAD_DWORD renamable $vgpr0_vgpr1, 96, 0, implicit $exec :: (volatile load (s32) from %ir.sunkaddr7, addrspace 1)
+ ; CHECK-NEXT: SI_SPILL_V32_SAVE killed $vgpr2, %stack.6, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.6, addrspace 5)
+ ; CHECK-NEXT: renamable $vgpr2 = GLOBAL_LOAD_DWORD renamable $vgpr0_vgpr1, 160, 0, implicit $exec :: (volatile load (s32) from %ir.sunkaddr8, addrspace 1)
+ ; CHECK-NEXT: SI_SPILL_V32_SAVE killed $vgpr2, %stack.7, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.7, addrspace 5)
+ ; CHECK-NEXT: renamable $vgpr0 = GLOBAL_LOAD_DWORD killed renamable $vgpr0_vgpr1, 240, 0, implicit $exec :: (volatile load (s32) from %ir.sunkaddr9, addrspace 1)
+ ; CHECK-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.8, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.8, addrspace 5)
+ ; CHECK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def dead early-clobber $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9
+ ; CHECK-NEXT: GLOBAL_STORE_DWORD undef renamable $vgpr0_vgpr1, killed renamable $vgpr10, 0, 0, implicit $exec :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1)
+ ; CHECK-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.4, $sgpr32, 0, 1, implicit $exec :: (load (s32) from %stack.4, addrspace 5)
+ ; CHECK-NEXT: GLOBAL_STORE_DWORD undef renamable $vgpr0_vgpr1, killed renamable $vgpr0, 0, 0, implicit $exec :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1)
+ ; CHECK-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.5, $sgpr32, 0, 1, implicit $exec :: (load (s32) from %stack.5, addrspace 5)
+ ; CHECK-NEXT: GLOBAL_STORE_DWORD undef renamable $vgpr0_vgpr1, killed renamable $vgpr0, 0, 0, implicit $exec :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1)
+ ; CHECK-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.6, $sgpr32, 0, 1, implicit $exec :: (load (s32) from %stack.6, addrspace 5)
+ ; CHECK-NEXT: GLOBAL_STORE_DWORD undef renamable $vgpr0_vgpr1, killed renamable $vgpr0, 0, 0, implicit $exec :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1)
+ ; CHECK-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.7, $sgpr32, 0, 1, implicit $exec :: (load (s32) from %stack.7, addrspace 5)
+ ; CHECK-NEXT: GLOBAL_STORE_DWORD undef renamable $vgpr0_vgpr1, killed renamable $vgpr0, 0, 0, implicit $exec :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1)
+ ; CHECK-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.8, $sgpr32, 0, 1, implicit $exec :: (load (s32) from %stack.8, addrspace 5)
+ ; CHECK-NEXT: GLOBAL_STORE_DWORD undef renamable $vgpr0_vgpr1, killed renamable $vgpr0, 0, 0, implicit $exec :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1)
+ ; CHECK-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.2, $sgpr32, 0, 1, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
+ ; CHECK-NEXT: GLOBAL_STORE_DWORD undef renamable $vgpr0_vgpr1, killed renamable $vgpr0, 0, 0, implicit $exec :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1)
+ ; CHECK-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.3, $sgpr32, 0, 1, implicit $exec :: (load (s32) from %stack.3, addrspace 5)
+ ; CHECK-NEXT: GLOBAL_STORE_DWORD undef renamable $vgpr0_vgpr1, killed renamable $vgpr0, 0, 0, implicit $exec :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1)
+ ; CHECK-NEXT: renamable $vgpr0 = IMPLICIT_DEF
+ ; CHECK-NEXT: KILL killed renamable $vgpr0
+ ; CHECK-NEXT: renamable $vgpr0 = IMPLICIT_DEF
+ ; CHECK-NEXT: KILL killed renamable $vgpr0
+ ; CHECK-NEXT: renamable $vgpr0_vgpr1 = IMPLICIT_DEF
+ ; CHECK-NEXT: S_BRANCH %bb.1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.4..exit:
+ ; CHECK-NEXT: liveins: $sgpr0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $exec_lo = S_OR_B32 $exec_lo, killed renamable $sgpr0, implicit-def $scc
+ ; CHECK-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, 1, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
+ ; CHECK-NEXT: GLOBAL_STORE_DWORD undef renamable $vgpr0_vgpr1, killed renamable $vgpr0, 0, 0, implicit $exec :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1)
+ ; CHECK-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.1, $sgpr32, 0, 1, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
+ ; CHECK-NEXT: GLOBAL_STORE_DWORD undef renamable $vgpr0_vgpr1, killed renamable $vgpr0, 0, 0, implicit $exec :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1)
+ ; CHECK-NEXT: S_ENDPGM 0
+.entry:
+ %tid = load volatile i32, ptr addrspace(1) undef
+ %p1 = getelementptr inbounds i32, ptr addrspace(1) %p, i32 %tid
+ %p2 = getelementptr inbounds i32, ptr addrspace(1) %p1, i32 4
+ %p3 = getelementptr inbounds i32, ptr addrspace(1) %p2, i32 8
+ %p4 = getelementptr inbounds i32, ptr addrspace(1) %p3, i32 12
+ %p5 = getelementptr inbounds i32, ptr addrspace(1) %p4, i32 16
+ %p6 = getelementptr inbounds i32, ptr addrspace(1) %p5, i32 20
+ %p7 = getelementptr inbounds i32, ptr addrspace(1) %p6, i32 24
+ %p8 = getelementptr inbounds i32, ptr addrspace(1) %p7, i32 28
+ %p9 = getelementptr inbounds i32, ptr addrspace(1) %p8, i32 32
+ %p10 = getelementptr inbounds i32, ptr addrspace(1) %p9, i32 36
+ %v7 = load volatile i32, ptr addrspace(1) %p7
+ %v8 = load volatile i32, ptr addrspace(1) %p8
+ %v9 = load volatile i32, ptr addrspace(1) %p9
+ %v10 = load volatile i32, ptr addrspace(1) %p10
+ %cmp = icmp ne i32 %tmp, 0
+ br i1 %cmp, label %.true, label %.false
+
+.true:
+ %v1_t = load volatile i32, ptr addrspace(1) %p1
+ %v2_t = load volatile i32, ptr addrspace(1) %p2
+ %v3_t = load volatile i32, ptr addrspace(1) %p3
+ %v4_t = load volatile i32, ptr addrspace(1) %p4
+ %v5_t = load volatile i32, ptr addrspace(1) %p5
+ %v6_t = load volatile i32, ptr addrspace(1) %p6
+ call void asm sideeffect "", "~{v[0:9]}" ()
+ store volatile i32 %v1_t, ptr addrspace(1) undef
+ store volatile i32 %v2_t, ptr addrspace(1) undef
+ store volatile i32 %v3_t, ptr addrspace(1) undef
+ store volatile i32 %v4_t, ptr addrspace(1) undef
+ store volatile i32 %v5_t, ptr addrspace(1) undef
+ store volatile i32 %v6_t, ptr addrspace(1) undef
+ store volatile i32 %v7, ptr addrspace(1) undef
+ store volatile i32 %v8, ptr addrspace(1) undef
+
+ br label %.exit
+
+.false:
+ %v1_f = load volatile i32, ptr addrspace(1) %p1
+ %v2_f = load volatile i32, ptr addrspace(1) %p2
+ %v3_f = load volatile i32, ptr addrspace(1) %p3
+ %v4_f = load volatile i32, ptr addrspace(1) %p4
+ %v5_f = load volatile i32, ptr addrspace(1) %p5
+ %v6_f = load volatile i32, ptr addrspace(1) %p6
+ call void asm sideeffect "", "~{v[0:9]}" ()
+ store volatile i32 %v1_f, ptr addrspace(1) undef
+ store volatile i32 %v2_f, ptr addrspace(1) undef
+ store volatile i32 %v3_f, ptr addrspace(1) undef
+ store volatile i32 %v4_f, ptr addrspace(1) undef
+ store volatile i32 %v5_f, ptr addrspace(1) undef
+ store volatile i32 %v6_f, ptr addrspace(1) undef
+ store volatile i32 %v7, ptr addrspace(1) undef
+ store volatile i32 %v8, ptr addrspace(1) undef
+
+ br label %.exit
+
+.exit:
+ store volatile i32 %v9, ptr addrspace(1) undef
+ store volatile i32 %v10, ptr addrspace(1) undef
+ ret void
+}
diff --git a/llvm/test/CodeGen/AMDGPU/vgpr-mark-last-scratch-load.mir b/llvm/test/CodeGen/AMDGPU/vgpr-mark-last-scratch-load.mir
new file mode 100644
index 00000000000000..45600f5d21c296
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/vgpr-mark-last-scratch-load.mir
@@ -0,0 +1,303 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 3
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -o - %s -run-pass=greedy -run-pass=amdgpu-mark-last-scratch-load | FileCheck -check-prefix=CHECK %s
+
+--- |
+ define amdgpu_cs void @test_spill_12x32() "amdgpu-num-vgpr"="12" {
+ ret void
+ }
+ define amdgpu_cs void @test_spill_384() "amdgpu-num-vgpr"="12" {
+ ret void
+ }
+ define amdgpu_ps void @test_loop_12() "amdgpu-num-vgpr"="12" {
+ ret void
+ }
+...
+---
+name: test_spill_12x32
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11
+
+ ; CHECK-LABEL: name: test_spill_12x32
+ ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: SI_SPILL_V32_SAVE $vgpr0, %stack.0, $sp_reg, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
+ ; CHECK-NEXT: SI_SPILL_V32_SAVE $vgpr1, %stack.1, $sp_reg, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
+ ; CHECK-NEXT: SI_SPILL_V32_SAVE $vgpr2, %stack.2, $sp_reg, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
+ ; CHECK-NEXT: SI_SPILL_V32_SAVE $vgpr3, %stack.3, $sp_reg, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5)
+ ; CHECK-NEXT: SI_SPILL_V32_SAVE $vgpr4, %stack.4, $sp_reg, 0, implicit $exec :: (store (s32) into %stack.4, addrspace 5)
+ ; CHECK-NEXT: SI_SPILL_V32_SAVE $vgpr5, %stack.5, $sp_reg, 0, implicit $exec :: (store (s32) into %stack.5, addrspace 5)
+ ; CHECK-NEXT: SI_SPILL_V32_SAVE $vgpr6, %stack.6, $sp_reg, 0, implicit $exec :: (store (s32) into %stack.6, addrspace 5)
+ ; CHECK-NEXT: SI_SPILL_V32_SAVE $vgpr7, %stack.7, $sp_reg, 0, implicit $exec :: (store (s32) into %stack.7, addrspace 5)
+ ; CHECK-NEXT: SI_SPILL_V32_SAVE $vgpr8, %stack.8, $sp_reg, 0, implicit $exec :: (store (s32) into %stack.8, addrspace 5)
+ ; CHECK-NEXT: SI_SPILL_V32_SAVE $vgpr9, %stack.9, $sp_reg, 0, implicit $exec :: (store (s32) into %stack.9, addrspace 5)
+ ; CHECK-NEXT: SI_SPILL_V32_SAVE $vgpr10, %stack.10, $sp_reg, 0, implicit $exec :: (store (s32) into %stack.10, addrspace 5)
+ ; CHECK-NEXT: SI_SPILL_V32_SAVE $vgpr11, %stack.11, $sp_reg, 0, implicit $exec :: (store (s32) into %stack.11, addrspace 5)
+ ; CHECK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def early-clobber $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11
+ ; CHECK-NEXT: [[SI_SPILL_V32_RESTORE:%[0-9]+]]:vgpr_32 = SI_SPILL_V32_RESTORE %stack.0, $sp_reg, 0, 1, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
+ ; CHECK-NEXT: [[SI_SPILL_V32_RESTORE1:%[0-9]+]]:vgpr_32 = SI_SPILL_V32_RESTORE %stack.1, $sp_reg, 0, 1, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
+ ; CHECK-NEXT: [[SI_SPILL_V32_RESTORE2:%[0-9]+]]:vgpr_32 = SI_SPILL_V32_RESTORE %stack.2, $sp_reg, 0, 1, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
+ ; CHECK-NEXT: [[SI_SPILL_V32_RESTORE3:%[0-9]+]]:vgpr_32 = SI_SPILL_V32_RESTORE %stack.3, $sp_reg, 0, 1, implicit $exec :: (load (s32) from %stack.3, addrspace 5)
+ ; CHECK-NEXT: [[SI_SPILL_V32_RESTORE4:%[0-9]+]]:vgpr_32 = SI_SPILL_V32_RESTORE %stack.4, $sp_reg, 0, 1, implicit $exec :: (load (s32) from %stack.4, addrspace 5)
+ ; CHECK-NEXT: [[SI_SPILL_V32_RESTORE5:%[0-9]+]]:vgpr_32 = SI_SPILL_V32_RESTORE %stack.5, $sp_reg, 0, 1, implicit $exec :: (load (s32) from %stack.5, addrspace 5)
+ ; CHECK-NEXT: [[SI_SPILL_V32_RESTORE6:%[0-9]+]]:vgpr_32 = SI_SPILL_V32_RESTORE %stack.6, $sp_reg, 0, 1, implicit $exec :: (load (s32) from %stack.6, addrspace 5)
+ ; CHECK-NEXT: [[SI_SPILL_V32_RESTORE7:%[0-9]+]]:vgpr_32 = SI_SPILL_V32_RESTORE %stack.7, $sp_reg, 0, 1, implicit $exec :: (load (s32) from %stack.7, addrspace 5)
+ ; CHECK-NEXT: [[SI_SPILL_V32_RESTORE8:%[0-9]+]]:vgpr_32 = SI_SPILL_V32_RESTORE %stack.8, $sp_reg, 0, 1, implicit $exec :: (load (s32) from %stack.8, addrspace 5)
+ ; CHECK-NEXT: [[SI_SPILL_V32_RESTORE9:%[0-9]+]]:vgpr_32 = SI_SPILL_V32_RESTORE %stack.9, $sp_reg, 0, 1, implicit $exec :: (load (s32) from %stack.9, addrspace 5)
+ ; CHECK-NEXT: [[SI_SPILL_V32_RESTORE10:%[0-9]+]]:vgpr_32 = SI_SPILL_V32_RESTORE %stack.10, $sp_reg, 0, 1, implicit $exec :: (load (s32) from %stack.10, addrspace 5)
+ ; CHECK-NEXT: [[SI_SPILL_V32_RESTORE11:%[0-9]+]]:vgpr_32 = SI_SPILL_V32_RESTORE %stack.11, $sp_reg, 0, 1, implicit $exec :: (load (s32) from %stack.11, addrspace 5)
+ ; CHECK-NEXT: S_ENDPGM 0, implicit [[SI_SPILL_V32_RESTORE]], implicit [[SI_SPILL_V32_RESTORE1]], implicit [[SI_SPILL_V32_RESTORE2]], implicit [[SI_SPILL_V32_RESTORE3]], implicit [[SI_SPILL_V32_RESTORE4]], implicit [[SI_SPILL_V32_RESTORE5]], implicit [[SI_SPILL_V32_RESTORE6]], implicit [[SI_SPILL_V32_RESTORE7]], implicit [[SI_SPILL_V32_RESTORE8]], implicit [[SI_SPILL_V32_RESTORE9]], implicit [[SI_SPILL_V32_RESTORE10]], implicit [[SI_SPILL_V32_RESTORE11]]
+ %0:vgpr_32 = COPY $vgpr0
+ %1:vgpr_32 = COPY $vgpr1
+ %2:vgpr_32 = COPY $vgpr2
+ %3:vgpr_32 = COPY $vgpr3
+ %4:vgpr_32 = COPY $vgpr4
+ %5:vgpr_32 = COPY $vgpr5
+ %6:vgpr_32 = COPY $vgpr6
+ %7:vgpr_32 = COPY $vgpr7
+ %8:vgpr_32 = COPY $vgpr8
+ %9:vgpr_32 = COPY $vgpr9
+ %10:vgpr_32 = COPY $vgpr10
+ %11:vgpr_32 = COPY $vgpr11
+ INLINEASM &"", 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def early-clobber $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11
+ S_ENDPGM 0, implicit %0, implicit %1, implicit %2, implicit %3, implicit %4, implicit %5, implicit %6, implicit %7, implicit %8, implicit %9, implicit %10, implicit %11
+...
+
+---
+name: test_spill_384
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11
+
+ ; CHECK-LABEL: name: test_spill_384
+ ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: SI_SPILL_V384_SAVE $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11, %stack.0, $sp_reg, 0, implicit $exec :: (store (s384) into %stack.0, align 4, addrspace 5)
+ ; CHECK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def early-clobber $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11
+ ; CHECK-NEXT: [[SI_SPILL_V384_RESTORE:%[0-9]+]]:vreg_384 = SI_SPILL_V384_RESTORE %stack.0, $sp_reg, 0, 1, implicit $exec :: (load (s384) from %stack.0, align 4, addrspace 5)
+ ; CHECK-NEXT: S_ENDPGM 0, implicit [[SI_SPILL_V384_RESTORE]]
+ %0:vreg_384 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11
+ INLINEASM &"", 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def early-clobber $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11
+ S_ENDPGM 0, implicit %0
+...
+
+---
+name: test_loop_12
+tracksRegLiveness: true
+body: |
+ ; CHECK-LABEL: name: test_loop_12
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: SI_SPILL_V32_SAVE $vgpr12, %stack.0, $sp_reg, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
+ ; CHECK-NEXT: SI_SPILL_V32_SAVE $vgpr11, %stack.1, $sp_reg, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
+ ; CHECK-NEXT: SI_SPILL_V32_SAVE $vgpr10, %stack.2, $sp_reg, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
+ ; CHECK-NEXT: SI_SPILL_V32_SAVE $vgpr9, %stack.3, $sp_reg, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5)
+ ; CHECK-NEXT: SI_SPILL_V32_SAVE $vgpr8, %stack.4, $sp_reg, 0, implicit $exec :: (store (s32) into %stack.4, addrspace 5)
+ ; CHECK-NEXT: SI_SPILL_V32_SAVE $vgpr7, %stack.5, $sp_reg, 0, implicit $exec :: (store (s32) into %stack.5, addrspace 5)
+ ; CHECK-NEXT: SI_SPILL_V32_SAVE $vgpr6, %stack.6, $sp_reg, 0, implicit $exec :: (store (s32) into %stack.6, addrspace 5)
+ ; CHECK-NEXT: SI_SPILL_V32_SAVE $vgpr5, %stack.7, $sp_reg, 0, implicit $exec :: (store (s32) into %stack.7, addrspace 5)
+ ; CHECK-NEXT: SI_SPILL_V32_SAVE $vgpr4, %stack.8, $sp_reg, 0, implicit $exec :: (store (s32) into %stack.8, addrspace 5)
+ ; CHECK-NEXT: SI_SPILL_V32_SAVE $vgpr3, %stack.9, $sp_reg, 0, implicit $exec :: (store (s32) into %stack.9, addrspace 5)
+ ; CHECK-NEXT: SI_SPILL_V32_SAVE $vgpr2, %stack.10, $sp_reg, 0, implicit $exec :: (store (s32) into %stack.10, addrspace 5)
+ ; CHECK-NEXT: SI_SPILL_V32_SAVE $vgpr1, %stack.11, $sp_reg, 0, implicit $exec :: (store (s32) into %stack.11, addrspace 5)
+ ; CHECK-NEXT: SI_SPILL_V32_SAVE $vgpr0, %stack.12, $sp_reg, 0, implicit $exec :: (store (s32) into %stack.12, addrspace 5)
+ ; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ ; CHECK-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ ; CHECK-NEXT: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ ; CHECK-NEXT: [[V_MOV_B32_e32_3:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ ; CHECK-NEXT: SI_SPILL_V32_SAVE [[V_MOV_B32_e32_3]], %stack.16, $sp_reg, 0, implicit $exec :: (store (s32) into %stack.16, addrspace 5)
+ ; CHECK-NEXT: %res5:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ ; CHECK-NEXT: %res6:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ ; CHECK-NEXT: %res7:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ ; CHECK-NEXT: %res8:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ ; CHECK-NEXT: %res9:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ ; CHECK-NEXT: %res10:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ ; CHECK-NEXT: %res11:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ ; CHECK-NEXT: %res12:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ ; CHECK-NEXT: [[SI_SPILL_V32_RESTORE:%[0-9]+]]:vgpr_32 = SI_SPILL_V32_RESTORE %stack.12, $sp_reg, 0, 0, implicit $exec :: (load (s32) from %stack.12, addrspace 5)
+ ; CHECK-NEXT: %vcmp:sreg_32 = V_CMP_LT_I32_e64 0, [[SI_SPILL_V32_RESTORE]], implicit $exec
+ ; CHECK-NEXT: %mask:sreg_32 = COPY $exec_lo, implicit-def $exec_lo
+ ; CHECK-NEXT: %sand:sreg_32 = S_AND_B32 %mask, %vcmp, implicit-def dead $scc
+ ; CHECK-NEXT: $exec_lo = S_MOV_B32_term %sand
+ ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec
+ ; CHECK-NEXT: S_BRANCH %bb.1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.3(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %mask2:sgpr_32 = S_MOV_B32 0
+ ; CHECK-NEXT: %count:sgpr_32 = S_MOV_B32 0
+ ; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ ; CHECK-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ ; CHECK-NEXT: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ ; CHECK-NEXT: [[V_MOV_B32_e32_4:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ ; CHECK-NEXT: %res5:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ ; CHECK-NEXT: %res6:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ ; CHECK-NEXT: %res7:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ ; CHECK-NEXT: %res8:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ ; CHECK-NEXT: %res9:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ ; CHECK-NEXT: %res10:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ ; CHECK-NEXT: %res11:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ ; CHECK-NEXT: %res12:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ ; CHECK-NEXT: S_BRANCH %bb.3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: successors: %bb.5(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $exec_lo = S_OR_B32 $exec_lo, %mask, implicit-def $scc
+ ; CHECK-NEXT: S_BRANCH %bb.5
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.3:
+ ; CHECK-NEXT: successors: %bb.4(0x04000000), %bb.3(0x7c000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: SI_SPILL_V32_SAVE [[V_MOV_B32_e32_1]], %stack.14, $sp_reg, 0, implicit $exec :: (store (s32) into %stack.14, addrspace 5)
+ ; CHECK-NEXT: [[SI_SPILL_V32_RESTORE1:%[0-9]+]]:vgpr_32 = SI_SPILL_V32_RESTORE %stack.11, $sp_reg, 0, 0, implicit $exec :: (load (s32) from %stack.11, addrspace 5)
+ ; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_F32_e64 0, [[V_MOV_B32_e32_]], 0, [[SI_SPILL_V32_RESTORE1]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = SI_SPILL_V32_RESTORE %stack.14, $sp_reg, 0, 1, implicit $exec :: (load (s32) from %stack.14, addrspace 5)
+ ; CHECK-NEXT: SI_SPILL_V32_SAVE [[V_MOV_B32_e32_]], %stack.13, $sp_reg, 0, implicit $exec :: (store (s32) into %stack.13, addrspace 5)
+ ; CHECK-NEXT: [[SI_SPILL_V32_RESTORE2:%[0-9]+]]:vgpr_32 = SI_SPILL_V32_RESTORE %stack.10, $sp_reg, 0, 0, implicit $exec :: (load (s32) from %stack.10, addrspace 5)
+ ; CHECK-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_F32_e64 0, [[V_MOV_B32_e32_1]], 0, [[SI_SPILL_V32_RESTORE2]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[SI_SPILL_V32_RESTORE3:%[0-9]+]]:vgpr_32 = SI_SPILL_V32_RESTORE %stack.9, $sp_reg, 0, 0, implicit $exec :: (load (s32) from %stack.9, addrspace 5)
+ ; CHECK-NEXT: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_ADD_F32_e64 0, [[V_MOV_B32_e32_2]], 0, [[SI_SPILL_V32_RESTORE3]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: SI_SPILL_V32_SAVE [[V_MOV_B32_e32_2]], %stack.15, $sp_reg, 0, implicit $exec :: (store (s32) into %stack.15, addrspace 5)
+ ; CHECK-NEXT: [[SI_SPILL_V32_RESTORE4:%[0-9]+]]:vgpr_32 = SI_SPILL_V32_RESTORE %stack.8, $sp_reg, 0, 0, implicit $exec :: (load (s32) from %stack.8, addrspace 5)
+ ; CHECK-NEXT: [[V_MOV_B32_e32_4:%[0-9]+]]:vgpr_32 = V_ADD_F32_e64 0, [[V_MOV_B32_e32_4]], 0, [[SI_SPILL_V32_RESTORE4]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[SI_SPILL_V32_RESTORE5:%[0-9]+]]:vgpr_32 = SI_SPILL_V32_RESTORE %stack.7, $sp_reg, 0, 0, implicit $exec :: (load (s32) from %stack.7, addrspace 5)
+ ; CHECK-NEXT: %res5:vgpr_32 = V_ADD_F32_e64 0, %res5, 0, [[SI_SPILL_V32_RESTORE5]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[SI_SPILL_V32_RESTORE6:%[0-9]+]]:vgpr_32 = SI_SPILL_V32_RESTORE %stack.6, $sp_reg, 0, 0, implicit $exec :: (load (s32) from %stack.6, addrspace 5)
+ ; CHECK-NEXT: %res6:vgpr_32 = V_ADD_F32_e64 0, %res6, 0, [[SI_SPILL_V32_RESTORE6]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[SI_SPILL_V32_RESTORE7:%[0-9]+]]:vgpr_32 = SI_SPILL_V32_RESTORE %stack.5, $sp_reg, 0, 0, implicit $exec :: (load (s32) from %stack.5, addrspace 5)
+ ; CHECK-NEXT: %res7:vgpr_32 = V_ADD_F32_e64 0, %res7, 0, [[SI_SPILL_V32_RESTORE7]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[SI_SPILL_V32_RESTORE8:%[0-9]+]]:vgpr_32 = SI_SPILL_V32_RESTORE %stack.4, $sp_reg, 0, 0, implicit $exec :: (load (s32) from %stack.4, addrspace 5)
+ ; CHECK-NEXT: %res8:vgpr_32 = V_ADD_F32_e64 0, %res8, 0, [[SI_SPILL_V32_RESTORE8]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[SI_SPILL_V32_RESTORE9:%[0-9]+]]:vgpr_32 = SI_SPILL_V32_RESTORE %stack.3, $sp_reg, 0, 0, implicit $exec :: (load (s32) from %stack.3, addrspace 5)
+ ; CHECK-NEXT: %res9:vgpr_32 = V_ADD_F32_e64 0, %res9, 0, [[SI_SPILL_V32_RESTORE9]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[SI_SPILL_V32_RESTORE10:%[0-9]+]]:vgpr_32 = SI_SPILL_V32_RESTORE %stack.2, $sp_reg, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
+ ; CHECK-NEXT: %res10:vgpr_32 = V_ADD_F32_e64 0, %res10, 0, [[SI_SPILL_V32_RESTORE10]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[SI_SPILL_V32_RESTORE11:%[0-9]+]]:vgpr_32 = SI_SPILL_V32_RESTORE %stack.1, $sp_reg, 0, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
+ ; CHECK-NEXT: %res11:vgpr_32 = V_ADD_F32_e64 0, %res11, 0, [[SI_SPILL_V32_RESTORE11]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[SI_SPILL_V32_RESTORE12:%[0-9]+]]:vgpr_32 = SI_SPILL_V32_RESTORE %stack.0, $sp_reg, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
+ ; CHECK-NEXT: %res12:vgpr_32 = V_ADD_F32_e64 0, %res12, 0, [[SI_SPILL_V32_RESTORE12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = SI_SPILL_V32_RESTORE %stack.13, $sp_reg, 0, 0, implicit $exec :: (load (s32) from %stack.13, addrspace 5)
+ ; CHECK-NEXT: %count:sgpr_32 = nuw nsw S_ADD_I32 %count, 1, implicit-def dead $scc
+ ; CHECK-NEXT: [[SI_SPILL_V32_RESTORE13:%[0-9]+]]:vgpr_32 = SI_SPILL_V32_RESTORE %stack.12, $sp_reg, 0, 0, implicit $exec :: (load (s32) from %stack.12, addrspace 5)
+ ; CHECK-NEXT: %vcmp2:sreg_32 = V_CMP_GE_I32_e64 %count, [[SI_SPILL_V32_RESTORE13]], implicit $exec
+ ; CHECK-NEXT: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = SI_SPILL_V32_RESTORE %stack.15, $sp_reg, 0, 0, implicit $exec :: (load (s32) from %stack.15, addrspace 5)
+ ; CHECK-NEXT: %mask2:sgpr_32 = S_OR_B32 %vcmp2, %mask2, implicit-def $scc
+ ; CHECK-NEXT: $exec_lo = S_ANDN2_B32_term $exec_lo, %mask2, implicit-def $scc
+ ; CHECK-NEXT: S_CBRANCH_EXECNZ %bb.3, implicit $exec
+ ; CHECK-NEXT: S_BRANCH %bb.4
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.4:
+ ; CHECK-NEXT: successors: %bb.2(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $exec_lo = S_OR_B32 $exec_lo, %mask2, implicit-def $scc
+ ; CHECK-NEXT: SI_SPILL_V32_SAVE [[V_MOV_B32_e32_4]], %stack.16, $sp_reg, 0, implicit $exec :: (store (s32) into %stack.16, addrspace 5)
+ ; CHECK-NEXT: S_BRANCH %bb.2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.5:
+ ; CHECK-NEXT: [[SI_SPILL_V32_RESTORE14:%[0-9]+]]:vgpr_32 = SI_SPILL_V32_RESTORE %stack.16, $sp_reg, 0, 1, implicit $exec :: (load (s32) from %stack.16, addrspace 5)
+ ; CHECK-NEXT: EXP_DONE 0, [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], [[V_MOV_B32_e32_2]], [[SI_SPILL_V32_RESTORE14]], -1, 0, 15, implicit $exec
+ ; CHECK-NEXT: EXP_DONE 0, %res5, %res6, %res7, %res8, -1, 0, 15, implicit $exec
+ ; CHECK-NEXT: EXP_DONE 0, %res9, %res10, %res11, %res12, -1, 0, 15, implicit $exec
+ ; CHECK-NEXT: S_ENDPGM 0
+ bb.0: ; entry
+ successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12
+
+ %12:vgpr_32 = COPY $vgpr12
+ %11:vgpr_32 = COPY $vgpr11
+ %10:vgpr_32 = COPY $vgpr10
+ %9:vgpr_32 = COPY $vgpr9
+ %8:vgpr_32 = COPY $vgpr8
+ %7:vgpr_32 = COPY $vgpr7
+ %6:vgpr_32 = COPY $vgpr6
+ %5:vgpr_32 = COPY $vgpr5
+ %4:vgpr_32 = COPY $vgpr4
+ %3:vgpr_32 = COPY $vgpr3
+ %2:vgpr_32 = COPY $vgpr2
+ %1:vgpr_32 = COPY $vgpr1
+ %loop_end:vgpr_32 = COPY $vgpr0
+ %res1:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ %res2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ %res3:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ %res4:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ %res5:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ %res6:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ %res7:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ %res8:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ %res9:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ %res10:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ %res11:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ %res12:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ %vcmp:sreg_32 = V_CMP_LT_I32_e64 0, %loop_end, implicit $exec
+ %mask:sreg_32 = COPY $exec_lo, implicit-def $exec_lo
+ %sand:sreg_32 = S_AND_B32 %mask, %vcmp, implicit-def dead $scc
+ $exec_lo = S_MOV_B32_term %sand
+ S_CBRANCH_EXECZ %bb.2, implicit $exec
+ S_BRANCH %bb.1
+
+ bb.1: ; loop preheader
+ successors: %bb.3(0x80000000)
+
+ %mask2:sgpr_32 = S_MOV_B32 0
+ %count:sgpr_32 = S_MOV_B32 0
+ %res1:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ %res2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ %res3:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ %res4:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ %res5:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ %res6:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ %res7:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ %res8:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ %res9:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ %res10:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ %res11:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ %res12:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ S_BRANCH %bb.3
+
+ bb.2: ; flow
+ successors: %bb.5(0x80000000)
+
+ $exec_lo = S_OR_B32 $exec_lo, %mask, implicit-def $scc
+ S_BRANCH %bb.5
+
+ bb.3: ; loop
+ successors: %bb.4(0x04000000), %bb.3(0x7c000000)
+
+ %res1:vgpr_32 = V_ADD_F32_e64 0, %res1, 0, %1, 0, 0, implicit $mode, implicit $exec
+ %res2:vgpr_32 = V_ADD_F32_e64 0, %res2, 0, %2, 0, 0, implicit $mode, implicit $exec
+ %res3:vgpr_32 = V_ADD_F32_e64 0, %res3, 0, %3, 0, 0, implicit $mode, implicit $exec
+ %res4:vgpr_32 = V_ADD_F32_e64 0, %res4, 0, %4, 0, 0, implicit $mode, implicit $exec
+ %res5:vgpr_32 = V_ADD_F32_e64 0, %res5, 0, %5, 0, 0, implicit $mode, implicit $exec
+ %res6:vgpr_32 = V_ADD_F32_e64 0, %res6, 0, %6, 0, 0, implicit $mode, implicit $exec
+ %res7:vgpr_32 = V_ADD_F32_e64 0, %res7, 0, %7, 0, 0, implicit $mode, implicit $exec
+ %res8:vgpr_32 = V_ADD_F32_e64 0, %res8, 0, %8, 0, 0, implicit $mode, implicit $exec
+ %res9:vgpr_32 = V_ADD_F32_e64 0, %res9, 0, %9, 0, 0, implicit $mode, implicit $exec
+ %res10:vgpr_32 = V_ADD_F32_e64 0, %res10, 0, %10, 0, 0, implicit $mode, implicit $exec
+ %res11:vgpr_32 = V_ADD_F32_e64 0, %res11, 0, %11, 0, 0, implicit $mode, implicit $exec
+ %res12:vgpr_32 = V_ADD_F32_e64 0, %res12, 0, %12, 0, 0, implicit $mode, implicit $exec
+ %count:sgpr_32 = nuw nsw S_ADD_I32 %count, 1, implicit-def dead $scc
+ %vcmp2:sreg_32 = V_CMP_GE_I32_e64 %count, %loop_end, implicit $exec
+ %mask2:sgpr_32 = S_OR_B32 %vcmp2, %mask2, implicit-def $scc
+ $exec_lo = S_ANDN2_B32_term $exec_lo, %mask2, implicit-def $scc
+ S_CBRANCH_EXECNZ %bb.3, implicit $exec
+ S_BRANCH %bb.4
+
+ bb.4: ; flow
+ successors: %bb.2(0x80000000)
+
+ $exec_lo = S_OR_B32 $exec_lo, %mask2, implicit-def $scc
+ S_BRANCH %bb.2
+
+ bb.5: ; exit
+ EXP_DONE 0, %res1, %res2, %res3, %res4, -1, 0, 15, implicit $exec
+ EXP_DONE 0, %res5, %res6, %res7, %res8, -1, 0, 15, implicit $exec
+ EXP_DONE 0, %res9, %res10, %res11, %res12, -1, 0, 15, implicit $exec
+ S_ENDPGM 0
+
+...
>From fdec15d8d30f84c980df336ea597401505f96f37 Mon Sep 17 00:00:00 2001
From: Mirko Brkusanin <Mirko.Brkusanin at amd.com>
Date: Thu, 4 Jan 2024 13:00:07 +0100
Subject: [PATCH 3/9] move pass to addRegAssignAndRewriteOptimized
---
llvm/include/llvm/CodeGen/TargetPassConfig.h | 3 ---
llvm/lib/CodeGen/TargetPassConfig.cpp | 2 --
llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp | 7 ++-----
3 files changed, 2 insertions(+), 10 deletions(-)
diff --git a/llvm/include/llvm/CodeGen/TargetPassConfig.h b/llvm/include/llvm/CodeGen/TargetPassConfig.h
index 67c50236832f21..66365419aa330b 100644
--- a/llvm/include/llvm/CodeGen/TargetPassConfig.h
+++ b/llvm/include/llvm/CodeGen/TargetPassConfig.h
@@ -423,9 +423,6 @@ class TargetPassConfig : public ImmutablePass {
/// to physical registers.
virtual void addPostRewrite() { }
- /// Add passes to be run immediately before Stack Slot Coloring pass.
- virtual void addPreStackSlotColoring() {}
-
/// This method may be implemented by targets that want to run passes after
/// register allocation pass pipeline but before prolog-epilog insertion.
virtual void addPostRegAlloc() { }
diff --git a/llvm/lib/CodeGen/TargetPassConfig.cpp b/llvm/lib/CodeGen/TargetPassConfig.cpp
index d61d522cb42e1c..4003a08a5422dd 100644
--- a/llvm/lib/CodeGen/TargetPassConfig.cpp
+++ b/llvm/lib/CodeGen/TargetPassConfig.cpp
@@ -1502,8 +1502,6 @@ void TargetPassConfig::addOptimizedRegAlloc() {
addPass(&MachineSchedulerID);
if (addRegAssignAndRewriteOptimized()) {
- addPreStackSlotColoring();
-
// Perform stack slot coloring and post-ra machine LICM.
addPass(&StackSlotColoringID);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index c592f764c8295d..b8a7a5e2080213 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -963,7 +963,6 @@ class GCNPassConfig final : public AMDGPUPassConfig {
void addPreRegAlloc() override;
bool addPreRewrite() override;
- void addPreStackSlotColoring() override;
void addPostRegAlloc() override;
void addPreSched2() override;
void addPreEmitPass() override;
@@ -1348,10 +1347,6 @@ bool GCNPassConfig::addPreRewrite() {
return true;
}
-void GCNPassConfig::addPreStackSlotColoring() {
- addPass(&AMDGPUMarkLastScratchLoadID);
-}
-
FunctionPass *GCNPassConfig::createSGPRAllocPass(bool Optimized) {
// Initialize the global default.
llvm::call_once(InitializeDefaultSGPRRegisterAllocatorFlag,
@@ -1430,6 +1425,8 @@ bool GCNPassConfig::addRegAssignAndRewriteOptimized() {
addPreRewrite();
addPass(&VirtRegRewriterID);
+ addPass(&AMDGPUMarkLastScratchLoadID);
+
return true;
}
>From 99e37d7be19b80d0915b9f7f90cea0536bd23516 Mon Sep 17 00:00:00 2001
From: Mirko Brkusanin <Mirko.Brkusanin at amd.com>
Date: Thu, 4 Jan 2024 13:06:37 +0100
Subject: [PATCH 4/9] change expected output in .ll test
---
.../AMDGPU/vgpr-mark-last-scratch-load.ll | 342 ++++++++++--------
1 file changed, 196 insertions(+), 146 deletions(-)
diff --git a/llvm/test/CodeGen/AMDGPU/vgpr-mark-last-scratch-load.ll b/llvm/test/CodeGen/AMDGPU/vgpr-mark-last-scratch-load.ll
index 57ced76e13a3cd..b0e7e7a510eebf 100644
--- a/llvm/test/CodeGen/AMDGPU/vgpr-mark-last-scratch-load.ll
+++ b/llvm/test/CodeGen/AMDGPU/vgpr-mark-last-scratch-load.ll
@@ -1,36 +1,54 @@
-; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 3
-; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -o - %s --stop-after=amdgpu-mark-last-scratch-load | FileCheck -check-prefix=CHECK %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -o - %s | FileCheck -check-prefix=CHECK %s
define amdgpu_cs void @max_6_vgprs(ptr addrspace(1) %p) "amdgpu-num-vgpr"="6" {
- ; CHECK-LABEL: name: max_6_vgprs
- ; CHECK: bb.0 (%ir-block.0):
- ; CHECK-NEXT: liveins: $vgpr0, $vgpr1
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: renamable $vgpr2 = GLOBAL_LOAD_DWORD undef renamable $vgpr0_vgpr1, 0, 0, implicit $exec :: (volatile "amdgpu-noclobber" load (s32) from `ptr addrspace(1) undef`, addrspace 1)
- ; CHECK-NEXT: renamable $vgpr3 = V_ASHRREV_I32_e64 31, $vgpr2, implicit $exec
- ; CHECK-NEXT: renamable $vgpr2_vgpr3 = V_LSHLREV_B64_pseudo_e64 2, killed $vgpr2_vgpr3, implicit $exec
- ; CHECK-NEXT: renamable $vgpr0, renamable $vcc_lo = V_ADD_CO_U32_e64 killed $vgpr0, $vgpr2, 0, implicit $exec
- ; CHECK-NEXT: renamable $vgpr1, dead renamable $vcc_lo = V_ADDC_U32_e64 killed $vgpr1, killed $vgpr3, killed $vcc_lo, 0, implicit $exec
- ; CHECK-NEXT: renamable $vgpr5 = GLOBAL_LOAD_DWORD renamable $vgpr0_vgpr1, 0, 0, implicit $exec :: (volatile load (s32) from %ir.p1, addrspace 1)
- ; CHECK-NEXT: renamable $vgpr2 = GLOBAL_LOAD_DWORD renamable $vgpr0_vgpr1, 16, 0, implicit $exec :: (volatile load (s32) from %ir.p2, addrspace 1)
- ; CHECK-NEXT: SI_SPILL_V32_SAVE killed $vgpr2, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
- ; CHECK-NEXT: renamable $vgpr2 = GLOBAL_LOAD_DWORD renamable $vgpr0_vgpr1, 48, 0, implicit $exec :: (volatile load (s32) from %ir.p3, addrspace 1)
- ; CHECK-NEXT: SI_SPILL_V32_SAVE killed $vgpr2, %stack.1, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
- ; CHECK-NEXT: renamable $vgpr2 = GLOBAL_LOAD_DWORD renamable $vgpr0_vgpr1, 96, 0, implicit $exec :: (volatile load (s32) from %ir.p4, addrspace 1)
- ; CHECK-NEXT: SI_SPILL_V32_SAVE killed $vgpr2, %stack.2, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
- ; CHECK-NEXT: renamable $vgpr0 = GLOBAL_LOAD_DWORD killed renamable $vgpr0_vgpr1, 160, 0, implicit $exec :: (volatile load (s32) from %ir.p5, addrspace 1)
- ; CHECK-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.3, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5)
- ; CHECK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def dead early-clobber $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4
- ; CHECK-NEXT: GLOBAL_STORE_DWORD undef renamable $vgpr0_vgpr1, killed renamable $vgpr5, 0, 0, implicit $exec :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1)
- ; CHECK-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, 1, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
- ; CHECK-NEXT: GLOBAL_STORE_DWORD undef renamable $vgpr0_vgpr1, killed renamable $vgpr0, 0, 0, implicit $exec :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1)
- ; CHECK-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.1, $sgpr32, 0, 1, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
- ; CHECK-NEXT: GLOBAL_STORE_DWORD undef renamable $vgpr0_vgpr1, killed renamable $vgpr0, 0, 0, implicit $exec :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1)
- ; CHECK-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.2, $sgpr32, 0, 1, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
- ; CHECK-NEXT: GLOBAL_STORE_DWORD undef renamable $vgpr0_vgpr1, killed renamable $vgpr0, 0, 0, implicit $exec :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1)
- ; CHECK-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.3, $sgpr32, 0, 1, implicit $exec :: (load (s32) from %stack.3, addrspace 5)
- ; CHECK-NEXT: GLOBAL_STORE_DWORD undef renamable $vgpr0_vgpr1, killed renamable $vgpr0, 0, 0, implicit $exec :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1)
- ; CHECK-NEXT: S_ENDPGM 0
+; CHECK-LABEL: max_6_vgprs:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: global_load_b32 v2, v[0:1], off th:TH_LOAD_RT_NT
+; CHECK-NEXT: s_waitcnt vmcnt(0)
+; CHECK-NEXT: v_ashrrev_i32_e32 v3, 31, v2
+; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; CHECK-NEXT: v_lshlrev_b64_e32 v[2:3], 2, v[2:3]
+; CHECK-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; CHECK-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
+; CHECK-NEXT: global_load_b32 v5, v[0:1], off th:TH_LOAD_RT_NT
+; CHECK-NEXT: s_waitcnt vmcnt(0)
+; CHECK-NEXT: global_load_b32 v2, v[0:1], off offset:16 th:TH_LOAD_RT_NT
+; CHECK-NEXT: s_waitcnt vmcnt(0)
+; CHECK-NEXT: scratch_store_b32 off, v2, off offset:4 ; 4-byte Folded Spill
+; CHECK-NEXT: global_load_b32 v2, v[0:1], off offset:48 th:TH_LOAD_RT_NT
+; CHECK-NEXT: s_waitcnt vmcnt(0)
+; CHECK-NEXT: scratch_store_b32 off, v2, off offset:8 ; 4-byte Folded Spill
+; CHECK-NEXT: global_load_b32 v2, v[0:1], off offset:96 th:TH_LOAD_RT_NT
+; CHECK-NEXT: s_waitcnt vmcnt(0)
+; CHECK-NEXT: scratch_store_b32 off, v2, off offset:12 ; 4-byte Folded Spill
+; CHECK-NEXT: global_load_b32 v0, v[0:1], off offset:160 th:TH_LOAD_RT_NT
+; CHECK-NEXT: s_waitcnt vmcnt(0)
+; CHECK-NEXT: scratch_store_b32 off, v0, off offset:16 ; 4-byte Folded Spill
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: global_store_b32 v[0:1], v5, off th:TH_STORE_NT_RT
+; CHECK-NEXT: s_waitcnt_vscnt null, 0x0
+; CHECK-NEXT: scratch_load_b32 v0, off, off offset:4 ; 4-byte Folded Reload
+; CHECK-NEXT: s_waitcnt vmcnt(0)
+; CHECK-NEXT: global_store_b32 v[0:1], v0, off th:TH_STORE_NT_RT
+; CHECK-NEXT: s_waitcnt_vscnt null, 0x0
+; CHECK-NEXT: scratch_load_b32 v0, off, off offset:8 ; 4-byte Folded Reload
+; CHECK-NEXT: s_waitcnt vmcnt(0)
+; CHECK-NEXT: global_store_b32 v[0:1], v0, off th:TH_STORE_NT_RT
+; CHECK-NEXT: s_waitcnt_vscnt null, 0x0
+; CHECK-NEXT: scratch_load_b32 v0, off, off offset:12 ; 4-byte Folded Reload
+; CHECK-NEXT: s_waitcnt vmcnt(0)
+; CHECK-NEXT: global_store_b32 v[0:1], v0, off th:TH_STORE_NT_RT
+; CHECK-NEXT: s_waitcnt_vscnt null, 0x0
+; CHECK-NEXT: scratch_load_b32 v0, off, off offset:16 ; 4-byte Folded Reload
+; CHECK-NEXT: s_waitcnt vmcnt(0)
+; CHECK-NEXT: global_store_b32 v[0:1], v0, off th:TH_STORE_NT_RT
+; CHECK-NEXT: s_waitcnt_vscnt null, 0x0
+; CHECK-NEXT: s_nop 0
+; CHECK-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; CHECK-NEXT: s_endpgm
%tid = load volatile i32, ptr addrspace(1) undef
%p1 = getelementptr inbounds i32, ptr addrspace(1) %p, i32 %tid
%p2 = getelementptr inbounds i32, ptr addrspace(1) %p1, i32 4
@@ -52,121 +70,153 @@ define amdgpu_cs void @max_6_vgprs(ptr addrspace(1) %p) "amdgpu-num-vgpr"="6" {
}
define amdgpu_cs void @max_11_vgprs_branch(ptr addrspace(1) %p, i32 %tmp) "amdgpu-num-vgpr"="11" {
- ; CHECK-LABEL: name: max_11_vgprs_branch
- ; CHECK: bb.0..entry:
- ; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.1(0x40000000)
- ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: renamable $vgpr3 = GLOBAL_LOAD_DWORD undef renamable $vgpr0_vgpr1, 0, 0, implicit $exec :: (volatile "amdgpu-noclobber" load (s32) from `ptr addrspace(1) undef`, addrspace 1)
- ; CHECK-NEXT: renamable $vgpr4 = V_ASHRREV_I32_e64 31, $vgpr3, implicit $exec
- ; CHECK-NEXT: renamable $vgpr3_vgpr4 = V_LSHLREV_B64_pseudo_e64 2, killed $vgpr3_vgpr4, implicit $exec
- ; CHECK-NEXT: renamable $vgpr0, renamable $vcc_lo = V_ADD_CO_U32_e64 killed $vgpr0, $vgpr3, 0, implicit $exec
- ; CHECK-NEXT: renamable $vgpr1, dead renamable $vcc_lo = V_ADDC_U32_e64 killed $vgpr1, killed $vgpr4, killed $vcc_lo, 0, implicit $exec
- ; CHECK-NEXT: renamable $vgpr3 = GLOBAL_LOAD_DWORD renamable $vgpr0_vgpr1, 336, 0, implicit $exec :: (volatile load (s32) from %ir.p7, addrspace 1)
- ; CHECK-NEXT: SI_SPILL_V32_SAVE killed $vgpr3, %stack.2, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
- ; CHECK-NEXT: renamable $vgpr3 = GLOBAL_LOAD_DWORD renamable $vgpr0_vgpr1, 448, 0, implicit $exec :: (volatile load (s32) from %ir.p8, addrspace 1)
- ; CHECK-NEXT: SI_SPILL_V32_SAVE killed $vgpr3, %stack.3, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5)
- ; CHECK-NEXT: renamable $vgpr3 = GLOBAL_LOAD_DWORD renamable $vgpr0_vgpr1, 576, 0, implicit $exec :: (volatile load (s32) from %ir.p9, addrspace 1)
- ; CHECK-NEXT: SI_SPILL_V32_SAVE killed $vgpr3, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
- ; CHECK-NEXT: renamable $vgpr3 = GLOBAL_LOAD_DWORD renamable $vgpr0_vgpr1, 720, 0, implicit $exec :: (volatile load (s32) from %ir.p10, addrspace 1)
- ; CHECK-NEXT: SI_SPILL_V32_SAVE killed $vgpr3, %stack.1, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
- ; CHECK-NEXT: renamable $vcc_lo = V_CMP_EQ_U32_e64 0, killed $vgpr2, implicit $exec
- ; CHECK-NEXT: renamable $sgpr0 = COPY $exec_lo, implicit-def $exec_lo
- ; CHECK-NEXT: renamable $sgpr1 = S_AND_B32 renamable $sgpr0, killed renamable $vcc_lo, implicit-def dead $scc
- ; CHECK-NEXT: renamable $sgpr0 = S_XOR_B32 renamable $sgpr1, killed renamable $sgpr0, implicit-def dead $scc
- ; CHECK-NEXT: $exec_lo = S_MOV_B32_term killed renamable $sgpr1
- ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.1, implicit $exec
- ; CHECK-NEXT: S_BRANCH %bb.3
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: bb.1.Flow:
- ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.4(0x40000000)
- ; CHECK-NEXT: liveins: $sgpr0, $vgpr0_vgpr1:0x000000000000000F
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: renamable $sgpr0 = S_OR_SAVEEXEC_B32 killed renamable $sgpr0, implicit-def $exec, implicit-def $scc, implicit $exec
- ; CHECK-NEXT: $exec_lo = S_XOR_B32_term $exec_lo, renamable $sgpr0, implicit-def $scc
- ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.4, implicit $exec
- ; CHECK-NEXT: S_BRANCH %bb.2
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: bb.2..true:
- ; CHECK-NEXT: successors: %bb.4(0x80000000)
- ; CHECK-NEXT: liveins: $sgpr0, $vgpr0_vgpr1:0x000000000000000F
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: renamable $vgpr10 = GLOBAL_LOAD_DWORD renamable $vgpr0_vgpr1, 0, 0, implicit $exec :: (volatile load (s32) from %ir.p1, addrspace 1)
- ; CHECK-NEXT: renamable $vgpr2 = GLOBAL_LOAD_DWORD renamable $vgpr0_vgpr1, 16, 0, implicit $exec :: (volatile load (s32) from %ir.sunkaddr, addrspace 1)
- ; CHECK-NEXT: SI_SPILL_V32_SAVE killed $vgpr2, %stack.9, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.9, addrspace 5)
- ; CHECK-NEXT: renamable $vgpr2 = GLOBAL_LOAD_DWORD renamable $vgpr0_vgpr1, 48, 0, implicit $exec :: (volatile load (s32) from %ir.sunkaddr1, addrspace 1)
- ; CHECK-NEXT: SI_SPILL_V32_SAVE killed $vgpr2, %stack.10, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.10, addrspace 5)
- ; CHECK-NEXT: renamable $vgpr2 = GLOBAL_LOAD_DWORD renamable $vgpr0_vgpr1, 96, 0, implicit $exec :: (volatile load (s32) from %ir.sunkaddr2, addrspace 1)
- ; CHECK-NEXT: SI_SPILL_V32_SAVE killed $vgpr2, %stack.11, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.11, addrspace 5)
- ; CHECK-NEXT: renamable $vgpr2 = GLOBAL_LOAD_DWORD renamable $vgpr0_vgpr1, 160, 0, implicit $exec :: (volatile load (s32) from %ir.sunkaddr3, addrspace 1)
- ; CHECK-NEXT: SI_SPILL_V32_SAVE killed $vgpr2, %stack.12, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.12, addrspace 5)
- ; CHECK-NEXT: renamable $vgpr0 = GLOBAL_LOAD_DWORD killed renamable $vgpr0_vgpr1, 240, 0, implicit $exec :: (volatile load (s32) from %ir.sunkaddr4, addrspace 1)
- ; CHECK-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.13, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.13, addrspace 5)
- ; CHECK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def dead early-clobber $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9
- ; CHECK-NEXT: GLOBAL_STORE_DWORD undef renamable $vgpr0_vgpr1, killed renamable $vgpr10, 0, 0, implicit $exec :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1)
- ; CHECK-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.9, $sgpr32, 0, 1, implicit $exec :: (load (s32) from %stack.9, addrspace 5)
- ; CHECK-NEXT: GLOBAL_STORE_DWORD undef renamable $vgpr0_vgpr1, killed renamable $vgpr0, 0, 0, implicit $exec :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1)
- ; CHECK-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.10, $sgpr32, 0, 1, implicit $exec :: (load (s32) from %stack.10, addrspace 5)
- ; CHECK-NEXT: GLOBAL_STORE_DWORD undef renamable $vgpr0_vgpr1, killed renamable $vgpr0, 0, 0, implicit $exec :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1)
- ; CHECK-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.11, $sgpr32, 0, 1, implicit $exec :: (load (s32) from %stack.11, addrspace 5)
- ; CHECK-NEXT: GLOBAL_STORE_DWORD undef renamable $vgpr0_vgpr1, killed renamable $vgpr0, 0, 0, implicit $exec :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1)
- ; CHECK-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.12, $sgpr32, 0, 1, implicit $exec :: (load (s32) from %stack.12, addrspace 5)
- ; CHECK-NEXT: GLOBAL_STORE_DWORD undef renamable $vgpr0_vgpr1, killed renamable $vgpr0, 0, 0, implicit $exec :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1)
- ; CHECK-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.13, $sgpr32, 0, 1, implicit $exec :: (load (s32) from %stack.13, addrspace 5)
- ; CHECK-NEXT: GLOBAL_STORE_DWORD undef renamable $vgpr0_vgpr1, killed renamable $vgpr0, 0, 0, implicit $exec :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1)
- ; CHECK-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.2, $sgpr32, 0, 1, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
- ; CHECK-NEXT: GLOBAL_STORE_DWORD undef renamable $vgpr0_vgpr1, killed renamable $vgpr0, 0, 0, implicit $exec :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1)
- ; CHECK-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.3, $sgpr32, 0, 1, implicit $exec :: (load (s32) from %stack.3, addrspace 5)
- ; CHECK-NEXT: GLOBAL_STORE_DWORD undef renamable $vgpr0_vgpr1, killed renamable $vgpr0, 0, 0, implicit $exec :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1)
- ; CHECK-NEXT: S_BRANCH %bb.4
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: bb.3..false:
- ; CHECK-NEXT: successors: %bb.1(0x80000000)
- ; CHECK-NEXT: liveins: $sgpr0, $vgpr0_vgpr1:0x000000000000000F
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: renamable $vgpr10 = GLOBAL_LOAD_DWORD renamable $vgpr0_vgpr1, 0, 0, implicit $exec :: (volatile load (s32) from %ir.p1, addrspace 1)
- ; CHECK-NEXT: renamable $vgpr2 = GLOBAL_LOAD_DWORD renamable $vgpr0_vgpr1, 16, 0, implicit $exec :: (volatile load (s32) from %ir.sunkaddr5, addrspace 1)
- ; CHECK-NEXT: SI_SPILL_V32_SAVE killed $vgpr2, %stack.4, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.4, addrspace 5)
- ; CHECK-NEXT: renamable $vgpr2 = GLOBAL_LOAD_DWORD renamable $vgpr0_vgpr1, 48, 0, implicit $exec :: (volatile load (s32) from %ir.sunkaddr6, addrspace 1)
- ; CHECK-NEXT: SI_SPILL_V32_SAVE killed $vgpr2, %stack.5, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.5, addrspace 5)
- ; CHECK-NEXT: renamable $vgpr2 = GLOBAL_LOAD_DWORD renamable $vgpr0_vgpr1, 96, 0, implicit $exec :: (volatile load (s32) from %ir.sunkaddr7, addrspace 1)
- ; CHECK-NEXT: SI_SPILL_V32_SAVE killed $vgpr2, %stack.6, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.6, addrspace 5)
- ; CHECK-NEXT: renamable $vgpr2 = GLOBAL_LOAD_DWORD renamable $vgpr0_vgpr1, 160, 0, implicit $exec :: (volatile load (s32) from %ir.sunkaddr8, addrspace 1)
- ; CHECK-NEXT: SI_SPILL_V32_SAVE killed $vgpr2, %stack.7, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.7, addrspace 5)
- ; CHECK-NEXT: renamable $vgpr0 = GLOBAL_LOAD_DWORD killed renamable $vgpr0_vgpr1, 240, 0, implicit $exec :: (volatile load (s32) from %ir.sunkaddr9, addrspace 1)
- ; CHECK-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.8, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.8, addrspace 5)
- ; CHECK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def dead early-clobber $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9
- ; CHECK-NEXT: GLOBAL_STORE_DWORD undef renamable $vgpr0_vgpr1, killed renamable $vgpr10, 0, 0, implicit $exec :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1)
- ; CHECK-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.4, $sgpr32, 0, 1, implicit $exec :: (load (s32) from %stack.4, addrspace 5)
- ; CHECK-NEXT: GLOBAL_STORE_DWORD undef renamable $vgpr0_vgpr1, killed renamable $vgpr0, 0, 0, implicit $exec :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1)
- ; CHECK-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.5, $sgpr32, 0, 1, implicit $exec :: (load (s32) from %stack.5, addrspace 5)
- ; CHECK-NEXT: GLOBAL_STORE_DWORD undef renamable $vgpr0_vgpr1, killed renamable $vgpr0, 0, 0, implicit $exec :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1)
- ; CHECK-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.6, $sgpr32, 0, 1, implicit $exec :: (load (s32) from %stack.6, addrspace 5)
- ; CHECK-NEXT: GLOBAL_STORE_DWORD undef renamable $vgpr0_vgpr1, killed renamable $vgpr0, 0, 0, implicit $exec :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1)
- ; CHECK-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.7, $sgpr32, 0, 1, implicit $exec :: (load (s32) from %stack.7, addrspace 5)
- ; CHECK-NEXT: GLOBAL_STORE_DWORD undef renamable $vgpr0_vgpr1, killed renamable $vgpr0, 0, 0, implicit $exec :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1)
- ; CHECK-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.8, $sgpr32, 0, 1, implicit $exec :: (load (s32) from %stack.8, addrspace 5)
- ; CHECK-NEXT: GLOBAL_STORE_DWORD undef renamable $vgpr0_vgpr1, killed renamable $vgpr0, 0, 0, implicit $exec :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1)
- ; CHECK-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.2, $sgpr32, 0, 1, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
- ; CHECK-NEXT: GLOBAL_STORE_DWORD undef renamable $vgpr0_vgpr1, killed renamable $vgpr0, 0, 0, implicit $exec :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1)
- ; CHECK-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.3, $sgpr32, 0, 1, implicit $exec :: (load (s32) from %stack.3, addrspace 5)
- ; CHECK-NEXT: GLOBAL_STORE_DWORD undef renamable $vgpr0_vgpr1, killed renamable $vgpr0, 0, 0, implicit $exec :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1)
- ; CHECK-NEXT: renamable $vgpr0 = IMPLICIT_DEF
- ; CHECK-NEXT: KILL killed renamable $vgpr0
- ; CHECK-NEXT: renamable $vgpr0 = IMPLICIT_DEF
- ; CHECK-NEXT: KILL killed renamable $vgpr0
- ; CHECK-NEXT: renamable $vgpr0_vgpr1 = IMPLICIT_DEF
- ; CHECK-NEXT: S_BRANCH %bb.1
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: bb.4..exit:
- ; CHECK-NEXT: liveins: $sgpr0
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: $exec_lo = S_OR_B32 $exec_lo, killed renamable $sgpr0, implicit-def $scc
- ; CHECK-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, 1, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
- ; CHECK-NEXT: GLOBAL_STORE_DWORD undef renamable $vgpr0_vgpr1, killed renamable $vgpr0, 0, 0, implicit $exec :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1)
- ; CHECK-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.1, $sgpr32, 0, 1, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
- ; CHECK-NEXT: GLOBAL_STORE_DWORD undef renamable $vgpr0_vgpr1, killed renamable $vgpr0, 0, 0, implicit $exec :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1)
- ; CHECK-NEXT: S_ENDPGM 0
+; CHECK-LABEL: max_11_vgprs_branch:
+; CHECK: ; %bb.0: ; %.entry
+; CHECK-NEXT: global_load_b32 v3, v[0:1], off th:TH_LOAD_RT_NT
+; CHECK-NEXT: s_waitcnt vmcnt(0)
+; CHECK-NEXT: s_mov_b32 s0, exec_lo
+; CHECK-NEXT: v_ashrrev_i32_e32 v4, 31, v3
+; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; CHECK-NEXT: v_lshlrev_b64_e32 v[3:4], 2, v[3:4]
+; CHECK-NEXT: v_add_co_u32 v0, vcc_lo, v0, v3
+; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; CHECK-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v4, vcc_lo
+; CHECK-NEXT: global_load_b32 v3, v[0:1], off offset:336 th:TH_LOAD_RT_NT
+; CHECK-NEXT: s_waitcnt vmcnt(0)
+; CHECK-NEXT: scratch_store_b32 off, v3, off offset:12 ; 4-byte Folded Spill
+; CHECK-NEXT: global_load_b32 v3, v[0:1], off offset:448 th:TH_LOAD_RT_NT
+; CHECK-NEXT: s_waitcnt vmcnt(0)
+; CHECK-NEXT: scratch_store_b32 off, v3, off offset:16 ; 4-byte Folded Spill
+; CHECK-NEXT: global_load_b32 v3, v[0:1], off offset:576 th:TH_LOAD_RT_NT
+; CHECK-NEXT: s_waitcnt vmcnt(0)
+; CHECK-NEXT: scratch_store_b32 off, v3, off offset:4 ; 4-byte Folded Spill
+; CHECK-NEXT: global_load_b32 v3, v[0:1], off offset:720 th:TH_LOAD_RT_NT
+; CHECK-NEXT: s_waitcnt vmcnt(0)
+; CHECK-NEXT: scratch_store_b32 off, v3, off offset:8 ; 4-byte Folded Spill
+; CHECK-NEXT: v_cmpx_eq_u32_e32 0, v2
+; CHECK-NEXT: s_xor_b32 s0, exec_lo, s0
+; CHECK-NEXT: s_cbranch_execz .LBB1_2
+; CHECK-NEXT: ; %bb.1: ; %.false
+; CHECK-NEXT: global_load_b32 v10, v[0:1], off th:TH_LOAD_RT_NT
+; CHECK-NEXT: s_waitcnt vmcnt(0)
+; CHECK-NEXT: global_load_b32 v2, v[0:1], off offset:16 th:TH_LOAD_RT_NT
+; CHECK-NEXT: s_waitcnt vmcnt(0)
+; CHECK-NEXT: scratch_store_b32 off, v2, off offset:20 ; 4-byte Folded Spill
+; CHECK-NEXT: global_load_b32 v2, v[0:1], off offset:48 th:TH_LOAD_RT_NT
+; CHECK-NEXT: s_waitcnt vmcnt(0)
+; CHECK-NEXT: scratch_store_b32 off, v2, off offset:24 ; 4-byte Folded Spill
+; CHECK-NEXT: global_load_b32 v2, v[0:1], off offset:96 th:TH_LOAD_RT_NT
+; CHECK-NEXT: s_waitcnt vmcnt(0)
+; CHECK-NEXT: scratch_store_b32 off, v2, off offset:28 ; 4-byte Folded Spill
+; CHECK-NEXT: global_load_b32 v2, v[0:1], off offset:160 th:TH_LOAD_RT_NT
+; CHECK-NEXT: s_waitcnt vmcnt(0)
+; CHECK-NEXT: scratch_store_b32 off, v2, off offset:32 ; 4-byte Folded Spill
+; CHECK-NEXT: global_load_b32 v0, v[0:1], off offset:240 th:TH_LOAD_RT_NT
+; CHECK-NEXT: s_waitcnt vmcnt(0)
+; CHECK-NEXT: scratch_store_b32 off, v0, off offset:36 ; 4-byte Folded Spill
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: global_store_b32 v[0:1], v10, off th:TH_STORE_NT_RT
+; CHECK-NEXT: s_waitcnt_vscnt null, 0x0
+; CHECK-NEXT: scratch_load_b32 v0, off, off offset:20 ; 4-byte Folded Reload
+; CHECK-NEXT: s_waitcnt vmcnt(0)
+; CHECK-NEXT: global_store_b32 v[0:1], v0, off th:TH_STORE_NT_RT
+; CHECK-NEXT: s_waitcnt_vscnt null, 0x0
+; CHECK-NEXT: scratch_load_b32 v0, off, off offset:24 ; 4-byte Folded Reload
+; CHECK-NEXT: s_waitcnt vmcnt(0)
+; CHECK-NEXT: global_store_b32 v[0:1], v0, off th:TH_STORE_NT_RT
+; CHECK-NEXT: s_waitcnt_vscnt null, 0x0
+; CHECK-NEXT: scratch_load_b32 v0, off, off offset:28 ; 4-byte Folded Reload
+; CHECK-NEXT: s_waitcnt vmcnt(0)
+; CHECK-NEXT: global_store_b32 v[0:1], v0, off th:TH_STORE_NT_RT
+; CHECK-NEXT: s_waitcnt_vscnt null, 0x0
+; CHECK-NEXT: scratch_load_b32 v0, off, off offset:32 ; 4-byte Folded Reload
+; CHECK-NEXT: s_waitcnt vmcnt(0)
+; CHECK-NEXT: global_store_b32 v[0:1], v0, off th:TH_STORE_NT_RT
+; CHECK-NEXT: s_waitcnt_vscnt null, 0x0
+; CHECK-NEXT: scratch_load_b32 v0, off, off offset:36 ; 4-byte Folded Reload
+; CHECK-NEXT: s_waitcnt vmcnt(0)
+; CHECK-NEXT: global_store_b32 v[0:1], v0, off th:TH_STORE_NT_RT
+; CHECK-NEXT: s_waitcnt_vscnt null, 0x0
+; CHECK-NEXT: scratch_load_b32 v0, off, off offset:12 ; 4-byte Folded Reload
+; CHECK-NEXT: s_waitcnt vmcnt(0)
+; CHECK-NEXT: global_store_b32 v[0:1], v0, off th:TH_STORE_NT_RT
+; CHECK-NEXT: s_waitcnt_vscnt null, 0x0
+; CHECK-NEXT: scratch_load_b32 v0, off, off offset:16 ; 4-byte Folded Reload
+; CHECK-NEXT: s_waitcnt vmcnt(0)
+; CHECK-NEXT: global_store_b32 v[0:1], v0, off th:TH_STORE_NT_RT
+; CHECK-NEXT: s_waitcnt_vscnt null, 0x0
+; CHECK-NEXT: ; implicit-def: $vgpr0
+; CHECK-NEXT: ; kill: killed $vgpr0
+; CHECK-NEXT: ; implicit-def: $vgpr0
+; CHECK-NEXT: ; kill: killed $vgpr0
+; CHECK-NEXT: ; implicit-def: $vgpr0_vgpr1
+; CHECK-NEXT: .LBB1_2: ; %Flow
+; CHECK-NEXT: s_and_not1_saveexec_b32 s0, s0
+; CHECK-NEXT: s_cbranch_execz .LBB1_4
+; CHECK-NEXT: ; %bb.3: ; %.true
+; CHECK-NEXT: global_load_b32 v10, v[0:1], off th:TH_LOAD_RT_NT
+; CHECK-NEXT: s_waitcnt vmcnt(0)
+; CHECK-NEXT: global_load_b32 v2, v[0:1], off offset:16 th:TH_LOAD_RT_NT
+; CHECK-NEXT: s_waitcnt vmcnt(0)
+; CHECK-NEXT: scratch_store_b32 off, v2, off offset:20 ; 4-byte Folded Spill
+; CHECK-NEXT: global_load_b32 v2, v[0:1], off offset:48 th:TH_LOAD_RT_NT
+; CHECK-NEXT: s_waitcnt vmcnt(0)
+; CHECK-NEXT: scratch_store_b32 off, v2, off offset:24 ; 4-byte Folded Spill
+; CHECK-NEXT: global_load_b32 v2, v[0:1], off offset:96 th:TH_LOAD_RT_NT
+; CHECK-NEXT: s_waitcnt vmcnt(0)
+; CHECK-NEXT: scratch_store_b32 off, v2, off offset:28 ; 4-byte Folded Spill
+; CHECK-NEXT: global_load_b32 v2, v[0:1], off offset:160 th:TH_LOAD_RT_NT
+; CHECK-NEXT: s_waitcnt vmcnt(0)
+; CHECK-NEXT: scratch_store_b32 off, v2, off offset:32 ; 4-byte Folded Spill
+; CHECK-NEXT: global_load_b32 v0, v[0:1], off offset:240 th:TH_LOAD_RT_NT
+; CHECK-NEXT: s_waitcnt vmcnt(0)
+; CHECK-NEXT: scratch_store_b32 off, v0, off offset:36 ; 4-byte Folded Spill
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: global_store_b32 v[0:1], v10, off th:TH_STORE_NT_RT
+; CHECK-NEXT: s_waitcnt_vscnt null, 0x0
+; CHECK-NEXT: scratch_load_b32 v0, off, off offset:20 ; 4-byte Folded Reload
+; CHECK-NEXT: s_waitcnt vmcnt(0)
+; CHECK-NEXT: global_store_b32 v[0:1], v0, off th:TH_STORE_NT_RT
+; CHECK-NEXT: s_waitcnt_vscnt null, 0x0
+; CHECK-NEXT: scratch_load_b32 v0, off, off offset:24 ; 4-byte Folded Reload
+; CHECK-NEXT: s_waitcnt vmcnt(0)
+; CHECK-NEXT: global_store_b32 v[0:1], v0, off th:TH_STORE_NT_RT
+; CHECK-NEXT: s_waitcnt_vscnt null, 0x0
+; CHECK-NEXT: scratch_load_b32 v0, off, off offset:28 ; 4-byte Folded Reload
+; CHECK-NEXT: s_waitcnt vmcnt(0)
+; CHECK-NEXT: global_store_b32 v[0:1], v0, off th:TH_STORE_NT_RT
+; CHECK-NEXT: s_waitcnt_vscnt null, 0x0
+; CHECK-NEXT: scratch_load_b32 v0, off, off offset:32 ; 4-byte Folded Reload
+; CHECK-NEXT: s_waitcnt vmcnt(0)
+; CHECK-NEXT: global_store_b32 v[0:1], v0, off th:TH_STORE_NT_RT
+; CHECK-NEXT: s_waitcnt_vscnt null, 0x0
+; CHECK-NEXT: scratch_load_b32 v0, off, off offset:36 ; 4-byte Folded Reload
+; CHECK-NEXT: s_waitcnt vmcnt(0)
+; CHECK-NEXT: global_store_b32 v[0:1], v0, off th:TH_STORE_NT_RT
+; CHECK-NEXT: s_waitcnt_vscnt null, 0x0
+; CHECK-NEXT: scratch_load_b32 v0, off, off offset:12 ; 4-byte Folded Reload
+; CHECK-NEXT: s_waitcnt vmcnt(0)
+; CHECK-NEXT: global_store_b32 v[0:1], v0, off th:TH_STORE_NT_RT
+; CHECK-NEXT: s_waitcnt_vscnt null, 0x0
+; CHECK-NEXT: scratch_load_b32 v0, off, off offset:16 ; 4-byte Folded Reload
+; CHECK-NEXT: s_waitcnt vmcnt(0)
+; CHECK-NEXT: global_store_b32 v[0:1], v0, off th:TH_STORE_NT_RT
+; CHECK-NEXT: s_waitcnt_vscnt null, 0x0
+; CHECK-NEXT: .LBB1_4: ; %.exit
+; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s0
+; CHECK-NEXT: scratch_load_b32 v0, off, off offset:4 ; 4-byte Folded Reload
+; CHECK-NEXT: s_waitcnt vmcnt(0)
+; CHECK-NEXT: global_store_b32 v[0:1], v0, off th:TH_STORE_NT_RT
+; CHECK-NEXT: s_waitcnt_vscnt null, 0x0
+; CHECK-NEXT: scratch_load_b32 v0, off, off offset:8 ; 4-byte Folded Reload
+; CHECK-NEXT: s_waitcnt vmcnt(0)
+; CHECK-NEXT: global_store_b32 v[0:1], v0, off th:TH_STORE_NT_RT
+; CHECK-NEXT: s_waitcnt_vscnt null, 0x0
+; CHECK-NEXT: s_nop 0
+; CHECK-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; CHECK-NEXT: s_endpgm
.entry:
%tid = load volatile i32, ptr addrspace(1) undef
%p1 = getelementptr inbounds i32, ptr addrspace(1) %p, i32 %tid
>From 7db61b56a4c27346c2124573ad51ddc480b1783e Mon Sep 17 00:00:00 2001
From: Mirko Brkusanin <Mirko.Brkusanin at amd.com>
Date: Thu, 4 Jan 2024 13:03:06 +0100
Subject: [PATCH 5/9] set last use in cache policy
---
llvm/lib/Target/AMDGPU/SIFrameLowering.cpp | 4 +-
llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp | 28 ++++++++-----
llvm/lib/Target/AMDGPU/SIRegisterInfo.h | 4 +-
.../AMDGPU/vgpr-mark-last-scratch-load.ll | 40 +++++++++----------
4 files changed, 42 insertions(+), 34 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
index 0f89df14448667..9839b8158019dd 100644
--- a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
@@ -148,7 +148,7 @@ static void buildPrologSpill(const GCNSubtarget &ST, const SIRegisterInfo &TRI,
LiveUnits.addReg(SpillReg);
bool IsKill = !MBB.isLiveIn(SpillReg);
TRI.buildSpillLoadStore(MBB, I, DL, Opc, FI, SpillReg, IsKill, FrameReg,
- DwordOff, MMO, nullptr, &LiveUnits);
+ DwordOff, false, MMO, nullptr, &LiveUnits);
if (IsKill)
LiveUnits.removeReg(SpillReg);
}
@@ -170,7 +170,7 @@ static void buildEpilogRestore(const GCNSubtarget &ST,
PtrInfo, MachineMemOperand::MOLoad, FrameInfo.getObjectSize(FI),
FrameInfo.getObjectAlign(FI));
TRI.buildSpillLoadStore(MBB, I, DL, Opc, FI, SpillReg, false, FrameReg,
- DwordOff, MMO, nullptr, &LiveUnits);
+ DwordOff, false, MMO, nullptr, &LiveUnits);
}
static void buildGitPtr(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
index a93cf5cad411f3..774dbf761e7d25 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -1320,8 +1320,8 @@ static unsigned getFlatScratchSpillOpcode(const SIInstrInfo *TII,
void SIRegisterInfo::buildSpillLoadStore(
MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL,
unsigned LoadStoreOp, int Index, Register ValueReg, bool IsKill,
- MCRegister ScratchOffsetReg, int64_t InstOffset, MachineMemOperand *MMO,
- RegScavenger *RS, LiveRegUnits *LiveUnits) const {
+ MCRegister ScratchOffsetReg, int64_t InstOffset, bool LastUse,
+ MachineMemOperand *MMO, RegScavenger *RS, LiveRegUnits *LiveUnits) const {
assert((!RS || !LiveUnits) && "Only RS or LiveUnits can be set but not both");
MachineFunction *MF = MBB.getParent();
@@ -1657,8 +1657,10 @@ void SIRegisterInfo::buildSpillLoadStore(
} else {
MIB.addReg(SOffset, SOffsetRegState);
}
+
+ int64_t CPol = AMDGPU::isGFX12Plus(ST) && LastUse ? AMDGPU::CPol::TH_LU : 0;
MIB.addImm(Offset + RegOffset)
- .addImm(0); // cpol
+ .addImm(CPol);
if (!IsFlat)
MIB.addImm(0); // swz
MIB.addMemOperand(NewMMO);
@@ -1734,12 +1736,12 @@ void SIRegisterInfo::buildVGPRSpillLoadStore(SGPRSpillBuilder &SB, int Index,
unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_LOAD_DWORD_SADDR
: AMDGPU::BUFFER_LOAD_DWORD_OFFSET;
buildSpillLoadStore(*SB.MBB, SB.MI, SB.DL, Opc, Index, SB.TmpVGPR, false,
- FrameReg, Offset * SB.EltSize, MMO, SB.RS);
+ FrameReg, Offset * SB.EltSize, false, MMO, SB.RS);
} else {
unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_STORE_DWORD_SADDR
: AMDGPU::BUFFER_STORE_DWORD_OFFSET;
buildSpillLoadStore(*SB.MBB, SB.MI, SB.DL, Opc, Index, SB.TmpVGPR, IsKill,
- FrameReg, Offset * SB.EltSize, MMO, SB.RS);
+ FrameReg, Offset * SB.EltSize, false, MMO, SB.RS);
// This only ever adds one VGPR spill
SB.MFI.addToSpilledVGPRs(1);
}
@@ -2175,7 +2177,7 @@ bool SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
}
buildSpillLoadStore(
*MBB, MI, DL, Opc, Index, VData->getReg(), VData->isKill(), FrameReg,
- TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(),
+ TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(), false,
*MI->memoperands_begin(), RS);
MFI->addToSpilledVGPRs(getNumSubRegsForSpillOp(MI->getOpcode()));
if (IsWWMRegSpill)
@@ -2241,14 +2243,20 @@ bool SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
TII->insertScratchExecCopy(*MF, *MBB, MI, DL, MFI->getSGPRForEXECCopy(),
RS->isRegUsed(AMDGPU::SCC));
}
+ int16_t LastUseIdx =
+ AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::last_use);
+ bool LastUse = (LastUseIdx != -1)
+ ? (MI->getOperand(LastUseIdx).getImm() == 1)
+ : false;
+
buildSpillLoadStore(
*MBB, MI, DL, Opc, Index, VData->getReg(), VData->isKill(), FrameReg,
- TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(),
+ TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(), LastUse,
*MI->memoperands_begin(), RS);
-
- if (IsWWMRegSpill)
+
+ if (IsWWMRegSpill)
TII->restoreExec(*MF, *MBB, MI, DL, MFI->getSGPRForEXECCopy());
-
+
MI->eraseFromParent();
return true;
}
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
index 88d5686720985e..d39f454492cce0 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
@@ -427,8 +427,8 @@ class SIRegisterInfo final : public AMDGPUGenRegisterInfo {
MachineBasicBlock::iterator MI, const DebugLoc &DL,
unsigned LoadStoreOp, int Index, Register ValueReg,
bool ValueIsKill, MCRegister ScratchOffsetReg,
- int64_t InstrOffset, MachineMemOperand *MMO,
- RegScavenger *RS,
+ int64_t InstrOffset, bool LastUse,
+ MachineMemOperand *MMO, RegScavenger *RS,
LiveRegUnits *LiveUnits = nullptr) const;
// Return alignment in register file of first register in a register tuple.
diff --git a/llvm/test/CodeGen/AMDGPU/vgpr-mark-last-scratch-load.ll b/llvm/test/CodeGen/AMDGPU/vgpr-mark-last-scratch-load.ll
index b0e7e7a510eebf..ab112e606c0a82 100644
--- a/llvm/test/CodeGen/AMDGPU/vgpr-mark-last-scratch-load.ll
+++ b/llvm/test/CodeGen/AMDGPU/vgpr-mark-last-scratch-load.ll
@@ -30,19 +30,19 @@ define amdgpu_cs void @max_6_vgprs(ptr addrspace(1) %p) "amdgpu-num-vgpr"="6" {
; CHECK-NEXT: ;;#ASMEND
; CHECK-NEXT: global_store_b32 v[0:1], v5, off th:TH_STORE_NT_RT
; CHECK-NEXT: s_waitcnt_vscnt null, 0x0
-; CHECK-NEXT: scratch_load_b32 v0, off, off offset:4 ; 4-byte Folded Reload
+; CHECK-NEXT: scratch_load_b32 v0, off, off offset:4 th:TH_LOAD_LU ; 4-byte Folded Reload
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: global_store_b32 v[0:1], v0, off th:TH_STORE_NT_RT
; CHECK-NEXT: s_waitcnt_vscnt null, 0x0
-; CHECK-NEXT: scratch_load_b32 v0, off, off offset:8 ; 4-byte Folded Reload
+; CHECK-NEXT: scratch_load_b32 v0, off, off offset:8 th:TH_LOAD_LU ; 4-byte Folded Reload
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: global_store_b32 v[0:1], v0, off th:TH_STORE_NT_RT
; CHECK-NEXT: s_waitcnt_vscnt null, 0x0
-; CHECK-NEXT: scratch_load_b32 v0, off, off offset:12 ; 4-byte Folded Reload
+; CHECK-NEXT: scratch_load_b32 v0, off, off offset:12 th:TH_LOAD_LU ; 4-byte Folded Reload
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: global_store_b32 v[0:1], v0, off th:TH_STORE_NT_RT
; CHECK-NEXT: s_waitcnt_vscnt null, 0x0
-; CHECK-NEXT: scratch_load_b32 v0, off, off offset:16 ; 4-byte Folded Reload
+; CHECK-NEXT: scratch_load_b32 v0, off, off offset:16 th:TH_LOAD_LU ; 4-byte Folded Reload
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: global_store_b32 v[0:1], v0, off th:TH_STORE_NT_RT
; CHECK-NEXT: s_waitcnt_vscnt null, 0x0
@@ -118,31 +118,31 @@ define amdgpu_cs void @max_11_vgprs_branch(ptr addrspace(1) %p, i32 %tmp) "amdgp
; CHECK-NEXT: ;;#ASMEND
; CHECK-NEXT: global_store_b32 v[0:1], v10, off th:TH_STORE_NT_RT
; CHECK-NEXT: s_waitcnt_vscnt null, 0x0
-; CHECK-NEXT: scratch_load_b32 v0, off, off offset:20 ; 4-byte Folded Reload
+; CHECK-NEXT: scratch_load_b32 v0, off, off offset:20 th:TH_LOAD_LU ; 4-byte Folded Reload
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: global_store_b32 v[0:1], v0, off th:TH_STORE_NT_RT
; CHECK-NEXT: s_waitcnt_vscnt null, 0x0
-; CHECK-NEXT: scratch_load_b32 v0, off, off offset:24 ; 4-byte Folded Reload
+; CHECK-NEXT: scratch_load_b32 v0, off, off offset:24 th:TH_LOAD_LU ; 4-byte Folded Reload
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: global_store_b32 v[0:1], v0, off th:TH_STORE_NT_RT
; CHECK-NEXT: s_waitcnt_vscnt null, 0x0
-; CHECK-NEXT: scratch_load_b32 v0, off, off offset:28 ; 4-byte Folded Reload
+; CHECK-NEXT: scratch_load_b32 v0, off, off offset:28 th:TH_LOAD_LU ; 4-byte Folded Reload
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: global_store_b32 v[0:1], v0, off th:TH_STORE_NT_RT
; CHECK-NEXT: s_waitcnt_vscnt null, 0x0
-; CHECK-NEXT: scratch_load_b32 v0, off, off offset:32 ; 4-byte Folded Reload
+; CHECK-NEXT: scratch_load_b32 v0, off, off offset:32 th:TH_LOAD_LU ; 4-byte Folded Reload
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: global_store_b32 v[0:1], v0, off th:TH_STORE_NT_RT
; CHECK-NEXT: s_waitcnt_vscnt null, 0x0
-; CHECK-NEXT: scratch_load_b32 v0, off, off offset:36 ; 4-byte Folded Reload
+; CHECK-NEXT: scratch_load_b32 v0, off, off offset:36 th:TH_LOAD_LU ; 4-byte Folded Reload
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: global_store_b32 v[0:1], v0, off th:TH_STORE_NT_RT
; CHECK-NEXT: s_waitcnt_vscnt null, 0x0
-; CHECK-NEXT: scratch_load_b32 v0, off, off offset:12 ; 4-byte Folded Reload
+; CHECK-NEXT: scratch_load_b32 v0, off, off offset:12 th:TH_LOAD_LU ; 4-byte Folded Reload
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: global_store_b32 v[0:1], v0, off th:TH_STORE_NT_RT
; CHECK-NEXT: s_waitcnt_vscnt null, 0x0
-; CHECK-NEXT: scratch_load_b32 v0, off, off offset:16 ; 4-byte Folded Reload
+; CHECK-NEXT: scratch_load_b32 v0, off, off offset:16 th:TH_LOAD_LU ; 4-byte Folded Reload
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: global_store_b32 v[0:1], v0, off th:TH_STORE_NT_RT
; CHECK-NEXT: s_waitcnt_vscnt null, 0x0
@@ -176,41 +176,41 @@ define amdgpu_cs void @max_11_vgprs_branch(ptr addrspace(1) %p, i32 %tmp) "amdgp
; CHECK-NEXT: ;;#ASMEND
; CHECK-NEXT: global_store_b32 v[0:1], v10, off th:TH_STORE_NT_RT
; CHECK-NEXT: s_waitcnt_vscnt null, 0x0
-; CHECK-NEXT: scratch_load_b32 v0, off, off offset:20 ; 4-byte Folded Reload
+; CHECK-NEXT: scratch_load_b32 v0, off, off offset:20 th:TH_LOAD_LU ; 4-byte Folded Reload
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: global_store_b32 v[0:1], v0, off th:TH_STORE_NT_RT
; CHECK-NEXT: s_waitcnt_vscnt null, 0x0
-; CHECK-NEXT: scratch_load_b32 v0, off, off offset:24 ; 4-byte Folded Reload
+; CHECK-NEXT: scratch_load_b32 v0, off, off offset:24 th:TH_LOAD_LU ; 4-byte Folded Reload
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: global_store_b32 v[0:1], v0, off th:TH_STORE_NT_RT
; CHECK-NEXT: s_waitcnt_vscnt null, 0x0
-; CHECK-NEXT: scratch_load_b32 v0, off, off offset:28 ; 4-byte Folded Reload
+; CHECK-NEXT: scratch_load_b32 v0, off, off offset:28 th:TH_LOAD_LU ; 4-byte Folded Reload
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: global_store_b32 v[0:1], v0, off th:TH_STORE_NT_RT
; CHECK-NEXT: s_waitcnt_vscnt null, 0x0
-; CHECK-NEXT: scratch_load_b32 v0, off, off offset:32 ; 4-byte Folded Reload
+; CHECK-NEXT: scratch_load_b32 v0, off, off offset:32 th:TH_LOAD_LU ; 4-byte Folded Reload
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: global_store_b32 v[0:1], v0, off th:TH_STORE_NT_RT
; CHECK-NEXT: s_waitcnt_vscnt null, 0x0
-; CHECK-NEXT: scratch_load_b32 v0, off, off offset:36 ; 4-byte Folded Reload
+; CHECK-NEXT: scratch_load_b32 v0, off, off offset:36 th:TH_LOAD_LU ; 4-byte Folded Reload
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: global_store_b32 v[0:1], v0, off th:TH_STORE_NT_RT
; CHECK-NEXT: s_waitcnt_vscnt null, 0x0
-; CHECK-NEXT: scratch_load_b32 v0, off, off offset:12 ; 4-byte Folded Reload
+; CHECK-NEXT: scratch_load_b32 v0, off, off offset:12 th:TH_LOAD_LU ; 4-byte Folded Reload
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: global_store_b32 v[0:1], v0, off th:TH_STORE_NT_RT
; CHECK-NEXT: s_waitcnt_vscnt null, 0x0
-; CHECK-NEXT: scratch_load_b32 v0, off, off offset:16 ; 4-byte Folded Reload
+; CHECK-NEXT: scratch_load_b32 v0, off, off offset:16 th:TH_LOAD_LU ; 4-byte Folded Reload
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: global_store_b32 v[0:1], v0, off th:TH_STORE_NT_RT
; CHECK-NEXT: s_waitcnt_vscnt null, 0x0
; CHECK-NEXT: .LBB1_4: ; %.exit
; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s0
-; CHECK-NEXT: scratch_load_b32 v0, off, off offset:4 ; 4-byte Folded Reload
+; CHECK-NEXT: scratch_load_b32 v0, off, off offset:4 th:TH_LOAD_LU ; 4-byte Folded Reload
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: global_store_b32 v[0:1], v0, off th:TH_STORE_NT_RT
; CHECK-NEXT: s_waitcnt_vscnt null, 0x0
-; CHECK-NEXT: scratch_load_b32 v0, off, off offset:8 ; 4-byte Folded Reload
+; CHECK-NEXT: scratch_load_b32 v0, off, off offset:8 th:TH_LOAD_LU ; 4-byte Folded Reload
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: global_store_b32 v[0:1], v0, off th:TH_STORE_NT_RT
; CHECK-NEXT: s_waitcnt_vscnt null, 0x0
>From c8c985cf5d819ffe41c1254f49a6602a08d082b1 Mon Sep 17 00:00:00 2001
From: Mirko Brkusanin <Mirko.Brkusanin at amd.com>
Date: Thu, 4 Jan 2024 13:10:58 +0100
Subject: [PATCH 6/9] Remove some LLVM_DEBUG lines
---
llvm/lib/Target/AMDGPU/AMDGPUMarkLastScratchLoad.cpp | 3 ---
1 file changed, 3 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMarkLastScratchLoad.cpp b/llvm/lib/Target/AMDGPU/AMDGPUMarkLastScratchLoad.cpp
index 4c9d417760bd36..cef2b8375c2f4a 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUMarkLastScratchLoad.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUMarkLastScratchLoad.cpp
@@ -79,10 +79,7 @@ bool AMDGPUMarkLastScratchLoad::runOnMachineFunction(MachineFunction &MF) {
bool Changed = false;
for (auto &[SS, LI] : *LS) {
- LLVM_DEBUG(dbgs() << "Checking interval: " << LI << "\n");
-
for (const LiveRange::Segment &Segment : LI.segments) {
- LLVM_DEBUG(dbgs() << " Checking segment: " << Segment << "\n");
// Ignore segments that run to the end of basic block because in this case
// slot is still live at the end of it.
>From b5a4183a2bfbb62023b647daf40cfb81d197e5cb Mon Sep 17 00:00:00 2001
From: Mirko Brkusanin <Mirko.Brkusanin at amd.com>
Date: Thu, 4 Jan 2024 13:24:06 +0100
Subject: [PATCH 7/9] move segmentstart
---
llvm/lib/Target/AMDGPU/AMDGPUMarkLastScratchLoad.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMarkLastScratchLoad.cpp b/llvm/lib/Target/AMDGPU/AMDGPUMarkLastScratchLoad.cpp
index cef2b8375c2f4a..b2770e8df3450f 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUMarkLastScratchLoad.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUMarkLastScratchLoad.cpp
@@ -89,7 +89,6 @@ bool AMDGPUMarkLastScratchLoad::runOnMachineFunction(MachineFunction &MF) {
const int FrameIndex = Register::stackSlot2Index(LI.reg());
MachineInstr *LastLoad = nullptr;
- MachineInstr *MISegmentStart = SI->getInstructionFromIndex(Segment.start);
MachineInstr *MISegmentEnd = SI->getInstructionFromIndex(Segment.end);
if (!MISegmentEnd) {
// FIXME: The start and end can refer to deleted instructions. We should
@@ -97,6 +96,7 @@ bool AMDGPUMarkLastScratchLoad::runOnMachineFunction(MachineFunction &MF) {
// instructions.
continue;
}
+ MachineInstr *MISegmentStart = SI->getInstructionFromIndex(Segment.start);
MachineBasicBlock *BB = MISegmentEnd->getParent();
// Start iteration backwards from segment end until the start of basic
>From af15018519176de193929998f407ebc1d7e62a3e Mon Sep 17 00:00:00 2001
From: Mirko Brkusanin <Mirko.Brkusanin at amd.com>
Date: Thu, 4 Jan 2024 13:26:50 +0100
Subject: [PATCH 8/9] clang-format
---
llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
index 774dbf761e7d25..af7369b35b2584 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -1659,8 +1659,7 @@ void SIRegisterInfo::buildSpillLoadStore(
}
int64_t CPol = AMDGPU::isGFX12Plus(ST) && LastUse ? AMDGPU::CPol::TH_LU : 0;
- MIB.addImm(Offset + RegOffset)
- .addImm(CPol);
+ MIB.addImm(Offset + RegOffset).addImm(CPol);
if (!IsFlat)
MIB.addImm(0); // swz
MIB.addMemOperand(NewMMO);
>From d06bcd130a783361bf6dd769cb10760f4e796835 Mon Sep 17 00:00:00 2001
From: Mirko Brkusanin <Mirko.Brkusanin at amd.com>
Date: Thu, 4 Jan 2024 15:27:28 +0100
Subject: [PATCH 9/9] address comments
---
llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp | 9 +++++----
1 file changed, 5 insertions(+), 4 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
index af7369b35b2584..24be0427a72039 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -1658,7 +1658,9 @@ void SIRegisterInfo::buildSpillLoadStore(
MIB.addReg(SOffset, SOffsetRegState);
}
- int64_t CPol = AMDGPU::isGFX12Plus(ST) && LastUse ? AMDGPU::CPol::TH_LU : 0;
+ assert((!LastUse || AMDGPU::isGFX12Plus(ST)) &&
+ "last_use operand exists only on GFX12+");
+ int64_t CPol = LastUse ? AMDGPU::CPol::TH_LU : 0;
MIB.addImm(Offset + RegOffset).addImm(CPol);
if (!IsFlat)
MIB.addImm(0); // swz
@@ -2244,9 +2246,8 @@ bool SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
}
int16_t LastUseIdx =
AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::last_use);
- bool LastUse = (LastUseIdx != -1)
- ? (MI->getOperand(LastUseIdx).getImm() == 1)
- : false;
+ bool LastUse =
+ LastUseIdx != -1 && MI->getOperand(LastUseIdx).getImm() == 1;
buildSpillLoadStore(
*MBB, MI, DL, Opc, Index, VData->getReg(), VData->isKill(), FrameReg,
More information about the llvm-commits
mailing list