[llvm] [AMDGPU] Add writelane and readlane pseudos for SGPR spilling (PR #69923)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Oct 23 05:39:45 PDT 2023
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
Author: Christudasan Devadasan (cdevadas)
<details>
<summary>Changes</summary>
For a future patch, is it important to keep the lowered SGPR spills to be recognized as spill instructions during regalloc. Directly lowering them into V_WRITELANE/V_READLANE won't allow us to attach the SPILL flag to their instructions.
This patch introduces the readlane/writelane pseudo instructions with the SGPRSpill flag set in their Desc. They will get lowered to hardware instructions later during post RA pseudo expansion.
---
Patch is 313.47 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/69923.diff
34 Files Affected:
- (modified) llvm/lib/Target/AMDGPU/SIFrameLowering.cpp (+5-7)
- (modified) llvm/lib/Target/AMDGPU/SIInstrInfo.cpp (+16-2)
- (modified) llvm/lib/Target/AMDGPU/SIInstructions.td (+19)
- (modified) llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp (+5-5)
- (modified) llvm/test/CodeGen/AMDGPU/av_spill_cross_bb_usage.mir (+4-4)
- (modified) llvm/test/CodeGen/AMDGPU/csr-sgpr-spill-live-ins.mir (+4-4)
- (modified) llvm/test/CodeGen/AMDGPU/extend-wwm-virt-reg-liveness.mir (+8-8)
- (modified) llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr-carry-out.mir (+2-2)
- (modified) llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr-gfx9.mir (+5-4)
- (modified) llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr.mir (+2-2)
- (modified) llvm/test/CodeGen/AMDGPU/pei-scavenge-vgpr-spill.mir (+8-6)
- (modified) llvm/test/CodeGen/AMDGPU/preserve-only-inactive-lane.mir (+4-4)
- (modified) llvm/test/CodeGen/AMDGPU/same-slot-agpr-sgpr.mir (+4-4)
- (modified) llvm/test/CodeGen/AMDGPU/sgpr-spill-dead-frame-in-dbg-value.mir (+4-4)
- (modified) llvm/test/CodeGen/AMDGPU/sgpr-spill-partially-undef.mir (+4-4)
- (modified) llvm/test/CodeGen/AMDGPU/sgpr-spill-to-vmem-scc-clobber.mir (+16-16)
- (modified) llvm/test/CodeGen/AMDGPU/sgpr-spill-vmem-large-frame.mir (+2-2)
- (modified) llvm/test/CodeGen/AMDGPU/sgpr-spill.mir (+445-441)
- (modified) llvm/test/CodeGen/AMDGPU/si-lower-sgpr-spills.mir (+8-8)
- (modified) llvm/test/CodeGen/AMDGPU/snippet-copy-bundle-regression.mir (+24-24)
- (modified) llvm/test/CodeGen/AMDGPU/spill-reg-tuple-super-reg-use.mir (+8-8)
- (modified) llvm/test/CodeGen/AMDGPU/spill-sgpr-csr-live-ins.mir (+1-1)
- (modified) llvm/test/CodeGen/AMDGPU/spill-sgpr-to-virtual-vgpr.mir (+106-106)
- (modified) llvm/test/CodeGen/AMDGPU/spill-special-sgpr.mir (+20-18)
- (modified) llvm/test/CodeGen/AMDGPU/spill-writelane-vgprs.ll (-8)
- (modified) llvm/test/CodeGen/AMDGPU/spill192.mir (+14-12)
- (modified) llvm/test/CodeGen/AMDGPU/spill224.mir (+16-14)
- (modified) llvm/test/CodeGen/AMDGPU/spill288.mir (+20-18)
- (modified) llvm/test/CodeGen/AMDGPU/spill320.mir (+22-20)
- (modified) llvm/test/CodeGen/AMDGPU/spill352.mir (+24-22)
- (modified) llvm/test/CodeGen/AMDGPU/spill384.mir (+26-24)
- (modified) llvm/test/CodeGen/AMDGPU/tied-op-for-wwm-scratch-reg-spill-restore.mir (+12-12)
- (modified) llvm/test/CodeGen/AMDGPU/track-spilled-vgpr-liveness.mir (-18)
- (modified) llvm/test/CodeGen/AMDGPU/use_restore_frame_reg.mir (+5-4)
``````````diff
diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
index 5de993ec3cba1d8..1d855d9bd83157d 100644
--- a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
@@ -274,7 +274,7 @@ class PrologEpilogSGPRSpillBuilder {
Register SubReg = NumSubRegs == 1
? SuperReg
: Register(TRI.getSubReg(SuperReg, SplitParts[I]));
- BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_WRITELANE_B32), Spill[I].VGPR)
+ BuildMI(MBB, MI, DL, TII->get(AMDGPU::SI_WRITELANE_PSEUDO), Spill[I].VGPR)
.addReg(SubReg)
.addImm(Spill[I].Lane)
.addReg(Spill[I].VGPR, RegState::Undef);
@@ -319,7 +319,7 @@ class PrologEpilogSGPRSpillBuilder {
Register SubReg = NumSubRegs == 1
? SuperReg
: Register(TRI.getSubReg(SuperReg, SplitParts[I]));
- BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_READLANE_B32), SubReg)
+ BuildMI(MBB, MI, DL, TII->get(AMDGPU::SI_READLANE_PSEUDO), SubReg)
.addReg(Spill[I].VGPR)
.addImm(Spill[I].Lane);
}
@@ -1554,12 +1554,10 @@ void SIFrameLowering::determineCalleeSaves(MachineFunction &MF,
// TODO: Handle this elsewhere at an early point. Walking through all MBBs
// here would be a bad heuristic. A better way should be by calling
// allocateWWMSpill during the regalloc pipeline whenever a physical
- // register is allocated for the intended virtual registers. That will
- // also help excluding the general use of WRITELANE/READLANE intrinsics
- // that won't really need any such special handling.
- if (MI.getOpcode() == AMDGPU::V_WRITELANE_B32)
+ // register is allocated for the intended virtual registers.
+ if (MI.getOpcode() == AMDGPU::SI_WRITELANE_PSEUDO)
MFI->allocateWWMSpill(MF, MI.getOperand(0).getReg());
- else if (MI.getOpcode() == AMDGPU::V_READLANE_B32)
+ else if (MI.getOpcode() == AMDGPU::SI_READLANE_PSEUDO)
MFI->allocateWWMSpill(MF, MI.getOperand(1).getReg());
else if (TII->isWWMRegSpillOpcode(MI.getOpcode()))
NeedExecCopyReservedReg = true;
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 4ff7b462f0f3295..981dd890fb13a41 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -2090,6 +2090,14 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
MI.setDesc(get(AMDGPU::S_AND_SAVEEXEC_B32));
break;
+ case AMDGPU::SI_WRITELANE_PSEUDO:
+ MI.setDesc(get(AMDGPU::V_WRITELANE_B32));
+ break;
+
+ case AMDGPU::SI_READLANE_PSEUDO:
+ MI.setDesc(get(AMDGPU::V_READLANE_B32));
+ break;
+
case AMDGPU::V_MOV_B64_PSEUDO: {
Register Dst = MI.getOperand(0).getReg();
Register DstLo = RI.getSubReg(Dst, AMDGPU::sub0);
@@ -3907,7 +3915,9 @@ bool SIInstrInfo::hasUnwantedEffectsWhenEXECEmpty(const MachineInstr &MI) const
// However, executing them with EXEC = 0 causes them to operate on undefined
// data, which we avoid by returning true here.
if (Opcode == AMDGPU::V_READFIRSTLANE_B32 ||
- Opcode == AMDGPU::V_READLANE_B32 || Opcode == AMDGPU::V_WRITELANE_B32)
+ Opcode == AMDGPU::V_READLANE_B32 || Opcode == AMDGPU::V_WRITELANE_B32 ||
+ Opcode == AMDGPU::SI_READLANE_PSEUDO ||
+ Opcode == AMDGPU::SI_WRITELANE_PSEUDO)
return true;
return false;
@@ -4301,7 +4311,9 @@ static bool shouldReadExec(const MachineInstr &MI) {
if (SIInstrInfo::isVALU(MI)) {
switch (MI.getOpcode()) {
case AMDGPU::V_READLANE_B32:
+ case AMDGPU::SI_READLANE_PSEUDO:
case AMDGPU::V_WRITELANE_B32:
+ case AMDGPU::SI_WRITELANE_PSEUDO:
return false;
}
@@ -8970,7 +8982,9 @@ SIInstrInfo::getInstructionUniformity(const MachineInstr &MI) const {
return InstructionUniformity::NeverUniform;
unsigned opcode = MI.getOpcode();
- if (opcode == AMDGPU::V_READLANE_B32 || opcode == AMDGPU::V_READFIRSTLANE_B32)
+ if (opcode == AMDGPU::V_READLANE_B32 ||
+ opcode == AMDGPU::V_READFIRSTLANE_B32 ||
+ opcode == AMDGPU::SI_READLANE_PSEUDO)
return InstructionUniformity::AlwaysUniform;
if (isCopyInstr(MI)) {
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index 567f1b812c1808c..6929dc4f4ab91cc 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -875,6 +875,25 @@ defm SI_SPILL_S384 : SI_SPILL_SGPR <SReg_384>;
defm SI_SPILL_S512 : SI_SPILL_SGPR <SReg_512>;
defm SI_SPILL_S1024 : SI_SPILL_SGPR <SReg_1024>;
+let SGPRSpill = 1 in {
+def SI_WRITELANE_PSEUDO : PseudoInstSI <(outs VGPR_32:$vdst),
+ (ins SReg_32:$src0, i32imm:$src1, VGPR_32:$vdst_in)> {
+ let hasSideEffects = 0;
+ let mayLoad = 0;
+ let mayStore = 0;
+ let VALU = 1;
+ let Constraints = "$vdst = $vdst_in";
+}
+
+def SI_READLANE_PSEUDO : PseudoInstSI <(outs SReg_32:$sdst),
+ (ins VGPR_32:$src0, i32imm:$src1)> {
+ let hasSideEffects = 0;
+ let mayLoad = 0;
+ let mayStore = 0;
+ let VALU = 1;
+}
+} // End SGPRSpill = 1
+
// VGPR or AGPR spill instructions. In case of AGPR spilling a temp register
// needs to be used and an extra instruction to move between VGPR and AGPR.
// UsesTmp adds to the total size of an expanded spill in this case.
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
index d0a81673d6528c2..ed81fac6886adca 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -1769,7 +1769,7 @@ bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI, int Index,
// Mark the "old value of vgpr" input undef only if this is the first sgpr
// spill to this specific vgpr in the first basic block.
auto MIB = BuildMI(*SB.MBB, MI, SB.DL,
- SB.TII.get(AMDGPU::V_WRITELANE_B32), Spill.VGPR)
+ SB.TII.get(AMDGPU::SI_WRITELANE_PSEUDO), Spill.VGPR)
.addReg(SubReg, getKillRegState(UseKill))
.addImm(Spill.Lane)
.addReg(Spill.VGPR);
@@ -1815,7 +1815,7 @@ bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI, int Index,
: Register(getSubReg(SB.SuperReg, SB.SplitParts[i]));
MachineInstrBuilder WriteLane =
- BuildMI(*SB.MBB, MI, SB.DL, SB.TII.get(AMDGPU::V_WRITELANE_B32),
+ BuildMI(*SB.MBB, MI, SB.DL, SB.TII.get(AMDGPU::SI_WRITELANE_PSEUDO),
SB.TmpVGPR)
.addReg(SubReg, SubKillState)
.addImm(i % PVD.PerVGPR)
@@ -1877,8 +1877,8 @@ bool SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI, int Index,
: Register(getSubReg(SB.SuperReg, SB.SplitParts[i]));
SpilledReg Spill = VGPRSpills[i];
- auto MIB = BuildMI(*SB.MBB, MI, SB.DL, SB.TII.get(AMDGPU::V_READLANE_B32),
- SubReg)
+ auto MIB = BuildMI(*SB.MBB, MI, SB.DL,
+ SB.TII.get(AMDGPU::SI_READLANE_PSEUDO), SubReg)
.addReg(Spill.VGPR)
.addImm(Spill.Lane);
if (SB.NumSubRegs > 1 && i == 0)
@@ -1911,7 +1911,7 @@ bool SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI, int Index,
bool LastSubReg = (i + 1 == e);
auto MIB = BuildMI(*SB.MBB, MI, SB.DL,
- SB.TII.get(AMDGPU::V_READLANE_B32), SubReg)
+ SB.TII.get(AMDGPU::SI_READLANE_PSEUDO), SubReg)
.addReg(SB.TmpVGPR, getKillRegState(LastSubReg))
.addImm(i);
if (SB.NumSubRegs > 1 && i == 0)
diff --git a/llvm/test/CodeGen/AMDGPU/av_spill_cross_bb_usage.mir b/llvm/test/CodeGen/AMDGPU/av_spill_cross_bb_usage.mir
index 7209d160e6c8a7a..3e56e49bf31d5cb 100644
--- a/llvm/test/CodeGen/AMDGPU/av_spill_cross_bb_usage.mir
+++ b/llvm/test/CodeGen/AMDGPU/av_spill_cross_bb_usage.mir
@@ -60,8 +60,8 @@ body: |
; GCN-NEXT: renamable $vgpr46 = COPY $vgpr1, implicit $exec
; GCN-NEXT: renamable $vgpr45 = COPY $vgpr0, implicit $exec
; GCN-NEXT: renamable $sgpr16_sgpr17 = IMPLICIT_DEF
- ; GCN-NEXT: $vgpr40 = V_WRITELANE_B32 $sgpr30, 0, $vgpr40, implicit-def $sgpr30_sgpr31, implicit $sgpr30_sgpr31
- ; GCN-NEXT: $vgpr40 = V_WRITELANE_B32 $sgpr31, 1, $vgpr40, implicit $sgpr30_sgpr31
+ ; GCN-NEXT: $vgpr40 = SI_WRITELANE_PSEUDO $sgpr30, 0, $vgpr40, implicit-def $sgpr30_sgpr31, implicit $sgpr30_sgpr31
+ ; GCN-NEXT: $vgpr40 = SI_WRITELANE_PSEUDO $sgpr31, 1, $vgpr40, implicit $sgpr30_sgpr31
; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr14, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec, implicit-def $vgpr14_vgpr15, implicit $vgpr14_vgpr15 :: (store (s32) into %stack.1, addrspace 5)
; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr15, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec, implicit killed $vgpr14_vgpr15 :: (store (s32) into %stack.1 + 4, addrspace 5)
; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr10, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec, implicit-def $vgpr10_vgpr11, implicit $vgpr10_vgpr11 :: (store (s32) into %stack.2, addrspace 5)
@@ -124,8 +124,8 @@ body: |
ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
renamable $sgpr16_sgpr17 = IMPLICIT_DEF
- $vgpr40 = V_WRITELANE_B32 $sgpr30, 0, $vgpr40, implicit-def $sgpr30_sgpr31, implicit $sgpr30_sgpr31
- $vgpr40 = V_WRITELANE_B32 killed $sgpr31, 1, $vgpr40, implicit killed $sgpr30_sgpr31
+ $vgpr40 = SI_WRITELANE_PSEUDO $sgpr30, 0, $vgpr40, implicit-def $sgpr30_sgpr31, implicit $sgpr30_sgpr31
+ $vgpr40 = SI_WRITELANE_PSEUDO killed $sgpr31, 1, $vgpr40, implicit killed $sgpr30_sgpr31
dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr16_sgpr17, 0, csr_amdgpu, implicit-def dead $vgpr0
%8:vreg_64 = nofpexcept V_FMA_F64_e64 0, %7, 0, %6, 0, %5, 0, 0, implicit $mode, implicit $exec
ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
diff --git a/llvm/test/CodeGen/AMDGPU/csr-sgpr-spill-live-ins.mir b/llvm/test/CodeGen/AMDGPU/csr-sgpr-spill-live-ins.mir
index aed642d1f0670e1..5226f7cfad80770 100644
--- a/llvm/test/CodeGen/AMDGPU/csr-sgpr-spill-live-ins.mir
+++ b/llvm/test/CodeGen/AMDGPU/csr-sgpr-spill-live-ins.mir
@@ -19,10 +19,10 @@ body: |
; CHECK-NEXT: $sgpr4_sgpr5 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.4, addrspace 5)
; CHECK-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5
- ; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr42, 0, $vgpr0
- ; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr43, 1, $vgpr0
- ; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr46, 2, $vgpr0
- ; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr47, 3, $vgpr0
+ ; CHECK-NEXT: $vgpr0 = SI_WRITELANE_PSEUDO $sgpr42, 0, $vgpr0
+ ; CHECK-NEXT: $vgpr0 = SI_WRITELANE_PSEUDO $sgpr43, 1, $vgpr0
+ ; CHECK-NEXT: $vgpr0 = SI_WRITELANE_PSEUDO $sgpr46, 2, $vgpr0
+ ; CHECK-NEXT: $vgpr0 = SI_WRITELANE_PSEUDO $sgpr47, 3, $vgpr0
; CHECK-NEXT: S_NOP 0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
diff --git a/llvm/test/CodeGen/AMDGPU/extend-wwm-virt-reg-liveness.mir b/llvm/test/CodeGen/AMDGPU/extend-wwm-virt-reg-liveness.mir
index f802e1b1e18af5d..63e2f085bb1972c 100644
--- a/llvm/test/CodeGen/AMDGPU/extend-wwm-virt-reg-liveness.mir
+++ b/llvm/test/CodeGen/AMDGPU/extend-wwm-virt-reg-liveness.mir
@@ -26,9 +26,9 @@ body: |
; GCN: liveins: $sgpr4, $vgpr2_vgpr3
; GCN-NEXT: {{ $}}
; GCN-NEXT: renamable $vgpr0 = IMPLICIT_DEF
- ; GCN-NEXT: renamable $vgpr0 = V_WRITELANE_B32 $sgpr4, 0, killed $vgpr0
+ ; GCN-NEXT: renamable $vgpr0 = SI_WRITELANE_PSEUDO $sgpr4, 0, killed $vgpr0
; GCN-NEXT: S_NOP 0
- ; GCN-NEXT: $sgpr4 = V_READLANE_B32 $vgpr0, 0
+ ; GCN-NEXT: $sgpr4 = SI_READLANE_PSEUDO $vgpr0, 0
; GCN-NEXT: renamable $vgpr1 = V_MOV_B32_e32 20, implicit $exec
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr2_vgpr3, killed renamable $vgpr1, 0, 0, implicit $exec
; GCN-NEXT: KILL killed renamable $vgpr0
@@ -77,9 +77,9 @@ body: |
; GCN-NEXT: successors: %bb.3(0x80000000)
; GCN-NEXT: liveins: $sgpr6, $vgpr0, $sgpr10_sgpr11
; GCN-NEXT: {{ $}}
- ; GCN-NEXT: renamable $vgpr0 = V_WRITELANE_B32 $sgpr6, 0, killed $vgpr0
+ ; GCN-NEXT: renamable $vgpr0 = SI_WRITELANE_PSEUDO $sgpr6, 0, killed $vgpr0
; GCN-NEXT: S_NOP 0
- ; GCN-NEXT: $sgpr6 = V_READLANE_B32 $vgpr0, 0
+ ; GCN-NEXT: $sgpr6 = SI_READLANE_PSEUDO $vgpr0, 0
; GCN-NEXT: renamable $vgpr1 = V_MOV_B32_e32 20, implicit $exec
; GCN-NEXT: S_BRANCH %bb.3
; GCN-NEXT: {{ $}}
@@ -143,9 +143,9 @@ body: |
; GCN-NEXT: successors: %bb.2(0x80000000)
; GCN-NEXT: liveins: $sgpr4, $vgpr0, $sgpr10_sgpr11
; GCN-NEXT: {{ $}}
- ; GCN-NEXT: renamable $vgpr0 = V_WRITELANE_B32 $sgpr4, 0, killed $vgpr0
+ ; GCN-NEXT: renamable $vgpr0 = SI_WRITELANE_PSEUDO $sgpr4, 0, killed $vgpr0
; GCN-NEXT: S_NOP 0
- ; GCN-NEXT: $sgpr4 = V_READLANE_B32 $vgpr0, 0
+ ; GCN-NEXT: $sgpr4 = SI_READLANE_PSEUDO $vgpr0, 0
; GCN-NEXT: renamable $vgpr1 = V_MOV_B32_e32 20, implicit $exec
; GCN-NEXT: S_BRANCH %bb.2
; GCN-NEXT: {{ $}}
@@ -245,9 +245,9 @@ body: |
; GCN-NEXT: bb.1:
; GCN-NEXT: liveins: $sgpr4, $vgpr0, $vgpr2_vgpr3
; GCN-NEXT: {{ $}}
- ; GCN-NEXT: renamable $vgpr0 = V_WRITELANE_B32 $sgpr4, 0, killed $vgpr0
+ ; GCN-NEXT: renamable $vgpr0 = SI_WRITELANE_PSEUDO $sgpr4, 0, killed $vgpr0
; GCN-NEXT: S_NOP 0
- ; GCN-NEXT: $sgpr4 = V_READLANE_B32 $vgpr0, 0
+ ; GCN-NEXT: $sgpr4 = SI_READLANE_PSEUDO $vgpr0, 0
; GCN-NEXT: renamable $vgpr1 = V_MOV_B32_e32 10, implicit $exec
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr2_vgpr3, killed renamable $vgpr1, 0, 0, implicit $exec
; GCN-NEXT: KILL killed renamable $vgpr0
diff --git a/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr-carry-out.mir b/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr-carry-out.mir
index 33e766ad3bf9e18..217b94b5351242c 100644
--- a/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr-carry-out.mir
+++ b/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr-carry-out.mir
@@ -36,7 +36,7 @@ body: |
; CHECK-NEXT: $sgpr5 = S_ADD_I32 $sgpr33, 1048832, implicit-def dead $scc
; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr5, 0, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5)
; CHECK-NEXT: $exec = S_MOV_B64 killed $sgpr6_sgpr7
- ; CHECK-NEXT: $vgpr2 = V_WRITELANE_B32 $sgpr4, 0, undef $vgpr2
+ ; CHECK-NEXT: $vgpr2 = SI_WRITELANE_PSEUDO $sgpr4, 0, undef $vgpr2
; CHECK-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc
; CHECK-NEXT: S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr28, implicit-def $sgpr29, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc
; CHECK-NEXT: $sgpr33 = S_LSHR_B32 $sgpr33, 6, implicit-def $scc
@@ -50,7 +50,7 @@ body: |
; CHECK-NEXT: $sgpr33 = S_ADD_I32 killed $sgpr33, -16384, implicit-def $scc
; CHECK-NEXT: $sgpr33 = S_LSHL_B32 $sgpr33, 6, implicit-def $scc
; CHECK-NEXT: $vgpr0 = V_OR_B32_e32 killed $vgpr3, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr28, implicit $sgpr29, implicit $sgpr30, implicit $sgpr31
- ; CHECK-NEXT: $sgpr4 = V_READLANE_B32 $vgpr2, 0
+ ; CHECK-NEXT: $sgpr4 = SI_READLANE_PSEUDO $vgpr2, 0
; CHECK-NEXT: $sgpr6_sgpr7 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
; CHECK-NEXT: $sgpr5 = S_ADD_I32 $sgpr33, 1048832, implicit-def dead $scc
; CHECK-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr5, 0, 0, 0, implicit $exec :: (load (s32) from %stack.3, addrspace 5)
diff --git a/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr-gfx9.mir b/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr-gfx9.mir
index 8cffede47704524..ec67719e86a7827 100644
--- a/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr-gfx9.mir
+++ b/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr-gfx9.mir
@@ -32,7 +32,7 @@ body: |
; MUBUF-NEXT: $sgpr5 = S_ADD_I32 $sgpr33, 1048832, implicit-def dead $scc
; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr5, 0, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5)
; MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr6_sgpr7
- ; MUBUF-NEXT: $vgpr2 = V_WRITELANE_B32 $sgpr4, 0, undef $vgpr2
+ ; MUBUF-NEXT: $vgpr2 = SI_WRITELANE_PSEUDO $sgpr4, 0, undef $vgpr2
; MUBUF-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc
; MUBUF-NEXT: S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr28, implicit-def $sgpr29, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc
; MUBUF-NEXT: $vgpr0 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
@@ -40,7 +40,7 @@ body: |
; MUBUF-NEXT: $vgpr3 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
; MUBUF-NEXT: $vgpr3 = V_ADD_U32_e32 16384, killed $vgpr3, implicit $exec
; MUBUF-NEXT: $vgpr0 = V_OR_B32_e32 killed $vgpr3, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr28, implicit $sgpr29, implicit $sgpr30, implicit $sgpr31
- ; MUBUF-NEXT: $sgpr4 = V_READLANE_B32 $vgpr2, 0
+ ; MUBUF-NEXT: $sgpr4 = SI_READLANE_PSEUDO $vgpr2, 0
; MUBUF-NEXT: $sgpr6_sgpr7 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
; MUBUF-NEXT: $sgpr5 = S_ADD_I32 $sgpr33, 1048832, implicit-def dead $scc
; MUBUF-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr5, 0, 0, 0, implicit $exec :: (load (s32) from %stack.3, addrspace 5)
@@ -48,6 +48,7 @@ body: |
; MUBUF-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -2097152, implicit-def dead $scc
; MUBUF-NEXT: $sgpr33 = COPY $sgpr4
; MUBUF-NEXT: S_ENDPGM 0, implicit $vcc
+ ;
; FLATSCR-LABEL: name: scavenge_sgpr_pei_no_sgprs
; FLATSCR: liveins: $vgpr1, $vgpr2
; FLATSCR-NEXT: {{ $}}
@@ -58,7 +59,7 @@ body: |
; FLATSCR-NEXT: $sgpr5 = S_ADD_I32 $sgpr33, 16388, implicit-def dead $scc
; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr2, killed $sgpr5, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/69923
More information about the llvm-commits
mailing list