[llvm-branch-commits] [llvm] f333736 - AMDGPU: Remove SGPRSpillVGPRDefinedSet hack
Matt Arsenault via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Wed Dec 16 18:51:59 PST 2020
Author: Matt Arsenault
Date: 2020-12-16T21:33:35-05:00
New Revision: f333736757e9df318b2c3490c61341966024561b
URL: https://github.com/llvm/llvm-project/commit/f333736757e9df318b2c3490c61341966024561b
DIFF: https://github.com/llvm/llvm-project/commit/f333736757e9df318b2c3490c61341966024561b.diff
LOG: AMDGPU: Remove SGPRSpillVGPRDefinedSet hack
These VGPRs should be reserved and therefore do not need "correct"
liveness. They should not have undef uses, which can still cause
issues.
Added:
Modified:
llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp
llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
llvm/test/CodeGen/AMDGPU/fold-reload-into-exec.mir
llvm/test/CodeGen/AMDGPU/fold-reload-into-m0.mir
llvm/test/CodeGen/AMDGPU/sgpr-spill-partially-undef.mir
llvm/test/CodeGen/AMDGPU/spill-reg-tuple-super-reg-use.mir
llvm/test/CodeGen/AMDGPU/spill192.mir
llvm/test/CodeGen/AMDGPU/vgpr-spill.mir
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp b/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp
index 91da3bcc3484..130edd83bef6 100644
--- a/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp
+++ b/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp
@@ -310,10 +310,13 @@ bool SILowerSGPRSpills::runOnMachineFunction(MachineFunction &MF) {
const bool SpillToAGPR = EnableSpillVGPRToAGPR && ST.hasMAIInsts();
std::unique_ptr<RegScavenger> RS;
+ bool NewReservedRegs = false;
+
// TODO: CSR VGPRs will never be spilled to AGPRs. These can probably be
// handled as SpilledToReg in regular PrologEpilogInserter.
- if ((TRI->spillSGPRToVGPR() && (HasCSRs || FuncInfo->hasSpilledSGPRs())) ||
- SpillVGPRToAGPR) {
+ const bool HasSGPRSpillToVGPR = TRI->spillSGPRToVGPR() &&
+ (HasCSRs || FuncInfo->hasSpilledSGPRs());
+ if (HasSGPRSpillToVGPR || SpillVGPRToAGPR) {
// Process all SGPR spills before frame offsets are finalized. Ideally SGPRs
// are spilled to VGPRs, in which case we can eliminate the stack usage.
//
@@ -338,6 +341,7 @@ bool SILowerSGPRSpills::runOnMachineFunction(MachineFunction &MF) {
TII->getNamedOperand(MI, AMDGPU::OpName::vdata)->getReg();
if (FuncInfo->allocateVGPRSpillToAGPR(MF, FI,
TRI->isAGPR(MRI, VReg))) {
+ NewReservedRegs = true;
if (!RS)
RS.reset(new RegScavenger());
@@ -354,6 +358,7 @@ bool SILowerSGPRSpills::runOnMachineFunction(MachineFunction &MF) {
int FI = TII->getNamedOperand(MI, AMDGPU::OpName::addr)->getIndex();
assert(MFI.getStackID(FI) == TargetStackID::SGPRSpill);
if (FuncInfo->allocateSGPRSpillToVGPR(MF, FI)) {
+ NewReservedRegs = true;
bool Spilled = TRI->eliminateSGPRToVGPRSpillFrameIndex(MI, FI, nullptr);
(void)Spilled;
assert(Spilled && "failed to spill SGPR to VGPR when allocated");
@@ -382,5 +387,9 @@ bool SILowerSGPRSpills::runOnMachineFunction(MachineFunction &MF) {
SaveBlocks.clear();
RestoreBlocks.clear();
+ // Updated the reserved registers with any VGPRs added for SGPR spills.
+ if (NewReservedRegs)
+ MRI.freezeReservedRegs(MF);
+
return MadeChange;
}
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
index 85a64a0d388d..ab203c44e022 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -1150,7 +1150,6 @@ bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI,
MachineBasicBlock *MBB = MI->getParent();
MachineFunction *MF = MBB->getParent();
SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
- DenseSet<Register> SGPRSpillVGPRDefinedSet; // FIXME: This should be removed
ArrayRef<SIMachineFunctionInfo::SpilledReg> VGPRSpills
= MFI->getSGPRToVGPRSpills(Index);
@@ -1186,20 +1185,13 @@ bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI,
bool UseKill = IsKill && i == NumSubRegs - 1;
- // During SGPR spilling to VGPR, determine if the VGPR is defined. The
- // only circumstance in which we say it is undefined is when it is the
- // first spill to this VGPR in the first basic block.
- bool VGPRDefined = true;
- if (MBB == &MF->front())
- VGPRDefined = !SGPRSpillVGPRDefinedSet.insert(Spill.VGPR).second;
-
// Mark the "old value of vgpr" input undef only if this is the first sgpr
// spill to this specific vgpr in the first basic block.
auto MIB =
BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_WRITELANE_B32), Spill.VGPR)
.addReg(SubReg, getKillRegState(UseKill))
.addImm(Spill.Lane)
- .addReg(Spill.VGPR, VGPRDefined ? 0 : RegState::Undef);
+ .addReg(Spill.VGPR);
if (i == 0 && NumSubRegs > 1) {
// We may be spilling a super-register which is only partially defined,
diff --git a/llvm/test/CodeGen/AMDGPU/fold-reload-into-exec.mir b/llvm/test/CodeGen/AMDGPU/fold-reload-into-exec.mir
index 332f7c370c25..eab611032652 100644
--- a/llvm/test/CodeGen/AMDGPU/fold-reload-into-exec.mir
+++ b/llvm/test/CodeGen/AMDGPU/fold-reload-into-exec.mir
@@ -14,10 +14,10 @@ body: |
; CHECK: S_WAITCNT 0
; CHECK: S_NOP 0, implicit-def $exec_lo
; CHECK: $sgpr0 = S_MOV_B32 $exec_lo
- ; CHECK: $vgpr0 = V_WRITELANE_B32 killed $sgpr0, 0, undef $vgpr0
+ ; CHECK: $vgpr0 = V_WRITELANE_B32 killed $sgpr0, 0, $vgpr0
; CHECK: $sgpr0 = V_READLANE_B32 $vgpr0, 0
; CHECK: S_NOP 0, implicit-def dead renamable $sgpr1, implicit-def dead renamable $sgpr0, implicit killed renamable $sgpr0
- ; CHECK: $sgpr0 = V_READLANE_B32 killed $vgpr0, 0
+ ; CHECK: $sgpr0 = V_READLANE_B32 $vgpr0, 0
; CHECK: $exec_lo = S_MOV_B32 killed $sgpr0
; CHECK: S_SENDMSG 0, implicit $m0, implicit $exec
S_NOP 0, implicit-def $exec_lo
@@ -38,10 +38,10 @@ body: |
; CHECK: S_WAITCNT 0
; CHECK: S_NOP 0, implicit-def $exec_hi
; CHECK: $sgpr0 = S_MOV_B32 $exec_hi
- ; CHECK: $vgpr0 = V_WRITELANE_B32 killed $sgpr0, 0, undef $vgpr0
+ ; CHECK: $vgpr0 = V_WRITELANE_B32 killed $sgpr0, 0, $vgpr0
; CHECK: $sgpr0 = V_READLANE_B32 $vgpr0, 0
; CHECK: S_NOP 0, implicit-def dead renamable $sgpr1, implicit-def dead renamable $sgpr0, implicit killed renamable $sgpr0
- ; CHECK: $sgpr0 = V_READLANE_B32 killed $vgpr0, 0
+ ; CHECK: $sgpr0 = V_READLANE_B32 $vgpr0, 0
; CHECK: $exec_hi = S_MOV_B32 killed $sgpr0
; CHECK: S_SENDMSG 0, implicit $m0, implicit $exec
S_NOP 0, implicit-def $exec_hi
@@ -62,13 +62,13 @@ body: |
; CHECK: S_WAITCNT 0
; CHECK: S_NOP 0, implicit-def $exec
; CHECK: $sgpr0_sgpr1 = S_MOV_B64 $exec
- ; CHECK: $vgpr0 = V_WRITELANE_B32 killed $sgpr0, 0, undef $vgpr0, implicit-def $sgpr0_sgpr1, implicit $sgpr0_sgpr1
- ; CHECK: $vgpr0 = V_WRITELANE_B32 killed $sgpr1, 1, killed $vgpr0, implicit $sgpr0_sgpr1
+ ; CHECK: $vgpr0 = V_WRITELANE_B32 killed $sgpr0, 0, $vgpr0, implicit-def $sgpr0_sgpr1, implicit $sgpr0_sgpr1
+ ; CHECK: $vgpr0 = V_WRITELANE_B32 killed $sgpr1, 1, $vgpr0, implicit $sgpr0_sgpr1
; CHECK: $sgpr0 = V_READLANE_B32 $vgpr0, 0, implicit-def $sgpr0_sgpr1
; CHECK: $sgpr1 = V_READLANE_B32 $vgpr0, 1
; CHECK: S_NOP 0, implicit-def dead renamable $sgpr2_sgpr3, implicit-def dead renamable $sgpr0_sgpr1, implicit killed renamable $sgpr0_sgpr1
; CHECK: $sgpr0 = V_READLANE_B32 $vgpr0, 0, implicit-def $sgpr0_sgpr1
- ; CHECK: $sgpr1 = V_READLANE_B32 killed $vgpr0, 1
+ ; CHECK: $sgpr1 = V_READLANE_B32 $vgpr0, 1
; CHECK: $exec = S_MOV_B64 killed $sgpr0_sgpr1
; CHECK: S_SENDMSG 0, implicit $m0, implicit $exec
S_NOP 0, implicit-def $exec
@@ -91,10 +91,10 @@ body: |
; CHECK: liveins: $vgpr0
; CHECK: S_WAITCNT 0
; CHECK: S_NOP 0, implicit-def renamable $sgpr0, implicit-def dead renamable $sgpr1, implicit-def $exec_lo
- ; CHECK: $vgpr0 = V_WRITELANE_B32 killed $sgpr0, 0, undef $vgpr0
+ ; CHECK: $vgpr0 = V_WRITELANE_B32 killed $sgpr0, 0, $vgpr0
; CHECK: $sgpr0 = V_READLANE_B32 $vgpr0, 0
; CHECK: S_NOP 0, implicit killed renamable $sgpr0, implicit-def dead renamable $sgpr1, implicit-def dead renamable $sgpr0
- ; CHECK: $sgpr0 = V_READLANE_B32 killed $vgpr0, 0
+ ; CHECK: $sgpr0 = V_READLANE_B32 $vgpr0, 0
; CHECK: $exec_lo = S_MOV_B32 killed $sgpr0
; CHECK: S_SENDMSG 0, implicit $m0, implicit $exec
S_NOP 0, implicit-def %0:sreg_32, implicit-def %1:sreg_32, implicit-def $exec_lo
@@ -113,10 +113,10 @@ body: |
; CHECK: liveins: $vgpr0
; CHECK: S_WAITCNT 0
; CHECK: S_NOP 0, implicit-def renamable $sgpr0, implicit-def dead renamable $sgpr1, implicit-def $exec_hi
- ; CHECK: $vgpr0 = V_WRITELANE_B32 killed $sgpr0, 0, undef $vgpr0
+ ; CHECK: $vgpr0 = V_WRITELANE_B32 killed $sgpr0, 0, $vgpr0
; CHECK: $sgpr0 = V_READLANE_B32 $vgpr0, 0
; CHECK: S_NOP 0, implicit killed renamable $sgpr0, implicit-def dead renamable $sgpr1, implicit-def dead renamable $sgpr0
- ; CHECK: $sgpr0 = V_READLANE_B32 killed $vgpr0, 0
+ ; CHECK: $sgpr0 = V_READLANE_B32 $vgpr0, 0
; CHECK: $exec_hi = S_MOV_B32 killed $sgpr0
; CHECK: S_SENDMSG 0, implicit $m0, implicit $exec
S_NOP 0, implicit-def %0:sreg_32, implicit-def %1:sreg_32, implicit-def $exec_hi
@@ -135,13 +135,13 @@ body: |
; CHECK: liveins: $vgpr0
; CHECK: S_WAITCNT 0
; CHECK: S_NOP 0, implicit-def renamable $sgpr0_sgpr1, implicit-def dead renamable $sgpr2_sgpr3, implicit-def $exec
- ; CHECK: $vgpr0 = V_WRITELANE_B32 killed $sgpr0, 0, undef $vgpr0, implicit-def $sgpr0_sgpr1, implicit $sgpr0_sgpr1
- ; CHECK: $vgpr0 = V_WRITELANE_B32 killed $sgpr1, 1, killed $vgpr0, implicit $sgpr0_sgpr1
+ ; CHECK: $vgpr0 = V_WRITELANE_B32 killed $sgpr0, 0, $vgpr0, implicit-def $sgpr0_sgpr1, implicit $sgpr0_sgpr1
+ ; CHECK: $vgpr0 = V_WRITELANE_B32 killed $sgpr1, 1, $vgpr0, implicit $sgpr0_sgpr1
; CHECK: $sgpr0 = V_READLANE_B32 $vgpr0, 0, implicit-def $sgpr0_sgpr1
; CHECK: $sgpr1 = V_READLANE_B32 $vgpr0, 1
; CHECK: S_NOP 0, implicit killed renamable $sgpr0_sgpr1, implicit-def dead renamable $sgpr2_sgpr3, implicit-def dead renamable $sgpr0_sgpr1
; CHECK: $sgpr0 = V_READLANE_B32 $vgpr0, 0, implicit-def $sgpr0_sgpr1
- ; CHECK: $sgpr1 = V_READLANE_B32 killed $vgpr0, 1
+ ; CHECK: $sgpr1 = V_READLANE_B32 $vgpr0, 1
; CHECK: $exec = S_MOV_B64 killed $sgpr0_sgpr1
; CHECK: S_SENDMSG 0, implicit $m0, implicit $exec
S_NOP 0, implicit-def %0:sreg_64, implicit-def %1:sreg_64, implicit-def $exec
diff --git a/llvm/test/CodeGen/AMDGPU/fold-reload-into-m0.mir b/llvm/test/CodeGen/AMDGPU/fold-reload-into-m0.mir
index d0a129d56f2e..848e317d28c2 100644
--- a/llvm/test/CodeGen/AMDGPU/fold-reload-into-m0.mir
+++ b/llvm/test/CodeGen/AMDGPU/fold-reload-into-m0.mir
@@ -15,10 +15,10 @@ body: |
; CHECK: S_WAITCNT 0
; CHECK: S_NOP 0, implicit-def $m0
; CHECK: $sgpr0 = S_MOV_B32 $m0
- ; CHECK: $vgpr0 = V_WRITELANE_B32 killed $sgpr0, 0, undef $vgpr0
+ ; CHECK: $vgpr0 = V_WRITELANE_B32 killed $sgpr0, 0, $vgpr0
; CHECK: $sgpr0 = V_READLANE_B32 $vgpr0, 0
; CHECK: S_NOP 0, implicit-def dead renamable $sgpr1, implicit-def dead renamable $sgpr0, implicit killed renamable $sgpr0
- ; CHECK: $sgpr0 = V_READLANE_B32 killed $vgpr0, 0
+ ; CHECK: $sgpr0 = V_READLANE_B32 $vgpr0, 0
; CHECK: $m0 = S_MOV_B32 killed $sgpr0
; CHECK: S_NOP 0
; CHECK: S_SENDMSG 0, implicit $m0, implicit $exec
@@ -43,10 +43,10 @@ body: |
; CHECK: liveins: $vgpr0
; CHECK: S_WAITCNT 0
; CHECK: S_NOP 0, implicit-def renamable $sgpr0, implicit-def dead renamable $sgpr1, implicit-def $m0
- ; CHECK: $vgpr0 = V_WRITELANE_B32 killed $sgpr0, 0, undef $vgpr0
+ ; CHECK: $vgpr0 = V_WRITELANE_B32 killed $sgpr0, 0, $vgpr0
; CHECK: $sgpr0 = V_READLANE_B32 $vgpr0, 0
; CHECK: S_NOP 0, implicit killed renamable $sgpr0, implicit-def dead renamable $sgpr1, implicit-def dead renamable $sgpr0
- ; CHECK: $sgpr0 = V_READLANE_B32 killed $vgpr0, 0
+ ; CHECK: $sgpr0 = V_READLANE_B32 $vgpr0, 0
; CHECK: $m0 = S_MOV_B32 killed $sgpr0
; CHECK: S_NOP 0
; CHECK: S_SENDMSG 0, implicit $m0, implicit $exec
diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-spill-partially-undef.mir b/llvm/test/CodeGen/AMDGPU/sgpr-spill-partially-undef.mir
index 3486f57d0261..f2da7c1001e6 100644
--- a/llvm/test/CodeGen/AMDGPU/sgpr-spill-partially-undef.mir
+++ b/llvm/test/CodeGen/AMDGPU/sgpr-spill-partially-undef.mir
@@ -21,7 +21,7 @@ body: |
; CHECK-LABEL: name: sgpr_spill_s64_undef_high32
; CHECK: liveins: $sgpr4, $vgpr0
- ; CHECK: $vgpr0 = V_WRITELANE_B32 $sgpr4, 0, undef $vgpr0, implicit-def $sgpr4_sgpr5, implicit $sgpr4_sgpr5
+ ; CHECK: $vgpr0 = V_WRITELANE_B32 $sgpr4, 0, $vgpr0, implicit-def $sgpr4_sgpr5, implicit $sgpr4_sgpr5
; CHECK: $vgpr0 = V_WRITELANE_B32 $sgpr5, 1, $vgpr0, implicit $sgpr4_sgpr5
SI_SPILL_S64_SAVE renamable $sgpr4_sgpr5, %stack.0, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 :: (store 8 into %stack.0, align 4, addrspace 5)
@@ -45,7 +45,7 @@ body: |
; CHECK-LABEL: name: sgpr_spill_s64_undef_low32
; CHECK: liveins: $sgpr5, $vgpr0
- ; CHECK: $vgpr0 = V_WRITELANE_B32 $sgpr4, 0, undef $vgpr0, implicit-def $sgpr4_sgpr5, implicit $sgpr4_sgpr5
+ ; CHECK: $vgpr0 = V_WRITELANE_B32 $sgpr4, 0, $vgpr0, implicit-def $sgpr4_sgpr5, implicit $sgpr4_sgpr5
; CHECK: $vgpr0 = V_WRITELANE_B32 $sgpr5, 1, $vgpr0, implicit $sgpr4_sgpr5
SI_SPILL_S64_SAVE renamable $sgpr4_sgpr5, %stack.0, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 :: (store 8 into %stack.0, align 4, addrspace 5)
diff --git a/llvm/test/CodeGen/AMDGPU/spill-reg-tuple-super-reg-use.mir b/llvm/test/CodeGen/AMDGPU/spill-reg-tuple-super-reg-use.mir
index 3abe33f43e15..13df02ec8bdb 100644
--- a/llvm/test/CodeGen/AMDGPU/spill-reg-tuple-super-reg-use.mir
+++ b/llvm/test/CodeGen/AMDGPU/spill-reg-tuple-super-reg-use.mir
@@ -22,7 +22,7 @@ body: |
; GCN-LABEL: name: spill_sgpr128_use_subreg
; GCN: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $vgpr0, $vgpr1, $vgpr2, $vgpr3
; GCN: renamable $sgpr1 = COPY $sgpr2
- ; GCN: $vgpr0 = V_WRITELANE_B32 $sgpr0, 0, undef $vgpr0, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GCN: $vgpr0 = V_WRITELANE_B32 $sgpr0, 0, $vgpr0, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3
; GCN: $vgpr0 = V_WRITELANE_B32 $sgpr1, 1, $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3
; GCN: $vgpr0 = V_WRITELANE_B32 $sgpr2, 2, $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3
; GCN: $vgpr0 = V_WRITELANE_B32 $sgpr3, 3, $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3
@@ -52,7 +52,7 @@ body: |
; GCN-LABEL: name: spill_sgpr128_use_kill
; GCN: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $vgpr0, $vgpr1, $vgpr2, $vgpr3
; GCN: renamable $sgpr1 = COPY $sgpr2
- ; GCN: $vgpr0 = V_WRITELANE_B32 $sgpr0, 0, undef $vgpr0, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GCN: $vgpr0 = V_WRITELANE_B32 $sgpr0, 0, $vgpr0, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3
; GCN: $vgpr0 = V_WRITELANE_B32 $sgpr1, 1, $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3
; GCN: $vgpr0 = V_WRITELANE_B32 $sgpr2, 2, $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3
; GCN: $vgpr0 = V_WRITELANE_B32 killed $sgpr3, 3, $vgpr0, implicit killed $sgpr0_sgpr1_sgpr2_sgpr3
diff --git a/llvm/test/CodeGen/AMDGPU/spill192.mir b/llvm/test/CodeGen/AMDGPU/spill192.mir
index cca48437c481..3a959401aabf 100644
--- a/llvm/test/CodeGen/AMDGPU/spill192.mir
+++ b/llvm/test/CodeGen/AMDGPU/spill192.mir
@@ -30,7 +30,7 @@ body: |
; EXPANDED: successors: %bb.1(0x80000000)
; EXPANDED: liveins: $vgpr0
; EXPANDED: S_NOP 0, implicit-def renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9
- ; EXPANDED: $vgpr0 = V_WRITELANE_B32 $sgpr4, 0, undef $vgpr0, implicit-def $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9
+ ; EXPANDED: $vgpr0 = V_WRITELANE_B32 $sgpr4, 0, $vgpr0, implicit-def $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9
; EXPANDED: $vgpr0 = V_WRITELANE_B32 $sgpr5, 1, $vgpr0, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9
; EXPANDED: $vgpr0 = V_WRITELANE_B32 $sgpr6, 2, $vgpr0, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9
; EXPANDED: $vgpr0 = V_WRITELANE_B32 $sgpr7, 3, $vgpr0, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9
diff --git a/llvm/test/CodeGen/AMDGPU/vgpr-spill.mir b/llvm/test/CodeGen/AMDGPU/vgpr-spill.mir
index 375a46cf3560..e39a04d090b5 100644
--- a/llvm/test/CodeGen/AMDGPU/vgpr-spill.mir
+++ b/llvm/test/CodeGen/AMDGPU/vgpr-spill.mir
@@ -124,3 +124,25 @@ body: |
; CHECK: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1 :: (store 4 into %stack.0 + 4, addrspace 5)
SI_SPILL_V64_SAVE killed $vgpr0_vgpr1, %stack.0, $sgpr32, 0, implicit $exec :: (store 8 into %stack.0, addrspace 5)
...
+
+---
+name: spill_v128_kill
+tracksRegLiveness: true
+stack:
+ - { id: 0, type: spill-slot, size: 16, alignment: 4 }
+machineFunctionInfo:
+ scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
+ stackPtrOffsetReg: '$sgpr32'
+ frameOffsetReg: '$sgpr33'
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1_vgpr2_vgpr3
+
+ ; CHECK-LABEL: name: spill_v128_kill
+ ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3
+ ; CHECK: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store 4 into %stack.0, addrspace 5)
+ ; CHECK: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store 4 into %stack.0 + 4, addrspace 5)
+ ; CHECK: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store 4 into %stack.0 + 8, addrspace 5)
+ ; CHECK: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3 :: (store 4 into %stack.0 + 12, addrspace 5)
+ SI_SPILL_V128_SAVE killed $vgpr0_vgpr1_vgpr2_vgpr3, %stack.0, $sgpr32, 0, implicit $exec :: (store 16 into %stack.0, addrspace 5)
+...
More information about the llvm-branch-commits
mailing list