[llvm] [AMDGPU] Add mark last scratch load pass (PR #75512)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Dec 14 10:20:32 PST 2023
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
Author: Mirko BrkuĊĦanin (mbrkusanin)
<details>
<summary>Changes</summary>
---
Patch is 307.31 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/75512.diff
68 Files Affected:
- (modified) llvm/include/llvm/CodeGen/TargetPassConfig.h (+3)
- (modified) llvm/lib/CodeGen/TargetPassConfig.cpp (+2)
- (modified) llvm/lib/Target/AMDGPU/AMDGPU.h (+3)
- (added) llvm/lib/Target/AMDGPU/AMDGPUMarkLastScratchLoad.cpp (+143)
- (modified) llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp (+6)
- (modified) llvm/lib/Target/AMDGPU/CMakeLists.txt (+1)
- (modified) llvm/lib/Target/AMDGPU/SIInstrInfo.cpp (+5-3)
- (modified) llvm/lib/Target/AMDGPU/SIInstructions.td (+2-2)
- (modified) llvm/test/CodeGen/AMDGPU/accvgpr-spill-scc-clobber.mir (+3-3)
- (modified) llvm/test/CodeGen/AMDGPU/agpr-copy-reuse-writes.mir (+1-1)
- (modified) llvm/test/CodeGen/AMDGPU/bb-prolog-spill-during-regalloc.ll (+7-7)
- (modified) llvm/test/CodeGen/AMDGPU/extend-wwm-virt-reg-liveness.mir (+4-4)
- (modified) llvm/test/CodeGen/AMDGPU/fast-ra-kills-vcc.mir (+1-1)
- (modified) llvm/test/CodeGen/AMDGPU/fastregalloc-self-loop-heuristic.mir (+5-5)
- (modified) llvm/test/CodeGen/AMDGPU/fold-restore-undef-use.mir (+1-2)
- (modified) llvm/test/CodeGen/AMDGPU/greedy-global-heuristic.mir (+7-7)
- (modified) llvm/test/CodeGen/AMDGPU/greedy-instruction-split-subrange.mir (+3-3)
- (modified) llvm/test/CodeGen/AMDGPU/indirect-addressing-term.ll (+3-3)
- (modified) llvm/test/CodeGen/AMDGPU/infloop-subrange-spill-inspect-subrange.mir (+1-1)
- (modified) llvm/test/CodeGen/AMDGPU/llc-pipeline.ll (+4)
- (modified) llvm/test/CodeGen/AMDGPU/nsa-reassign.mir (+14-14)
- (modified) llvm/test/CodeGen/AMDGPU/partial-regcopy-and-spill-missed-at-regalloc.ll (+2-2)
- (modified) llvm/test/CodeGen/AMDGPU/pei-build-av-spill.mir (+24-24)
- (modified) llvm/test/CodeGen/AMDGPU/pei-build-spill-partial-agpr.mir (+7-7)
- (modified) llvm/test/CodeGen/AMDGPU/pei-build-spill.mir (+18-18)
- (modified) llvm/test/CodeGen/AMDGPU/pei-reg-scavenger-position.mir (+2-2)
- (modified) llvm/test/CodeGen/AMDGPU/pr51516.mir (+1-1)
- (modified) llvm/test/CodeGen/AMDGPU/ra-inserted-scalar-instructions.mir (+22-22)
- (modified) llvm/test/CodeGen/AMDGPU/ran-out-of-sgprs-allocation-failure.mir (+2-2)
- (modified) llvm/test/CodeGen/AMDGPU/regalloc-fail-unsatisfiable-overlapping-tuple-hints.mir (+4-4)
- (modified) llvm/test/CodeGen/AMDGPU/remat-dead-subreg.mir (+12-12)
- (modified) llvm/test/CodeGen/AMDGPU/remat-smrd.mir (+14-14)
- (modified) llvm/test/CodeGen/AMDGPU/remat-sop.mir (+16-16)
- (modified) llvm/test/CodeGen/AMDGPU/remat-vop.mir (+30-30)
- (modified) llvm/test/CodeGen/AMDGPU/sgpr-regalloc-flags.ll (+4)
- (modified) llvm/test/CodeGen/AMDGPU/sgpr-spill-dead-frame-in-dbg-value.mir (+1-1)
- (modified) llvm/test/CodeGen/AMDGPU/sgpr-spill-fi-skip-processing-stack-arg-dbg-value.mir (+1-1)
- (modified) llvm/test/CodeGen/AMDGPU/sgpr-spill-to-vmem-scc-clobber.mir (+6-6)
- (modified) llvm/test/CodeGen/AMDGPU/sgpr-spill-vmem-large-frame.mir (+1-1)
- (modified) llvm/test/CodeGen/AMDGPU/sgpr-spill-wrong-stack-id.mir (+10-10)
- (modified) llvm/test/CodeGen/AMDGPU/sgpr-spill.mir (+9-9)
- (modified) llvm/test/CodeGen/AMDGPU/spill-agpr.mir (+30-30)
- (modified) llvm/test/CodeGen/AMDGPU/spill-empty-live-interval.mir (+1-1)
- (modified) llvm/test/CodeGen/AMDGPU/spill-sgpr-to-virtual-vgpr.mir (+6-6)
- (modified) llvm/test/CodeGen/AMDGPU/spill-special-sgpr.mir (+1-1)
- (modified) llvm/test/CodeGen/AMDGPU/spill-to-agpr-partial.mir (+8-8)
- (modified) llvm/test/CodeGen/AMDGPU/spill-vector-superclass.ll (+1-1)
- (modified) llvm/test/CodeGen/AMDGPU/spill192.mir (+3-3)
- (modified) llvm/test/CodeGen/AMDGPU/spill224.mir (+3-3)
- (modified) llvm/test/CodeGen/AMDGPU/spill288.mir (+3-3)
- (modified) llvm/test/CodeGen/AMDGPU/spill320.mir (+3-3)
- (modified) llvm/test/CodeGen/AMDGPU/spill352.mir (+3-3)
- (modified) llvm/test/CodeGen/AMDGPU/spill384.mir (+3-3)
- (modified) llvm/test/CodeGen/AMDGPU/splitkit-copy-bundle.mir (+2-2)
- (modified) llvm/test/CodeGen/AMDGPU/splitkit-copy-live-lanes.mir (+12-12)
- (modified) llvm/test/CodeGen/AMDGPU/splitkit-nolivesubranges.mir (+1-1)
- (modified) llvm/test/CodeGen/AMDGPU/splitkit.mir (+3-3)
- (modified) llvm/test/CodeGen/AMDGPU/stack-slot-color-sgpr-vgpr-spills.mir (+1-1)
- (modified) llvm/test/CodeGen/AMDGPU/unallocatable-bundle-regression.mir (+1-1)
- (modified) llvm/test/CodeGen/AMDGPU/undefined-physreg-sgpr-spill.mir (+2-2)
- (modified) llvm/test/CodeGen/AMDGPU/unexpected-reg-unit-state.mir (+1-1)
- (modified) llvm/test/CodeGen/AMDGPU/vector-spill-restore-to-other-vector-type.mir (+8-8)
- (added) llvm/test/CodeGen/AMDGPU/vgpr-mark-last-scratch-load.ll (+231)
- (added) llvm/test/CodeGen/AMDGPU/vgpr-mark-last-scratch-load.mir (+303)
- (modified) llvm/test/CodeGen/AMDGPU/vgpr-spill-dead-frame-in-dbg-value.mir (+1-1)
- (modified) llvm/test/CodeGen/AMDGPU/vgpr-spill-fi-skip-processing-stack-arg-dbg-value.mir (+1-1)
- (modified) llvm/test/CodeGen/AMDGPU/vgpr-spill-scc-clobber.mir (+5-5)
- (modified) llvm/test/CodeGen/AMDGPU/wwm-spill-superclass-pseudo.mir (+1-1)
``````````diff
diff --git a/llvm/include/llvm/CodeGen/TargetPassConfig.h b/llvm/include/llvm/CodeGen/TargetPassConfig.h
index 66365419aa330b..67c50236832f21 100644
--- a/llvm/include/llvm/CodeGen/TargetPassConfig.h
+++ b/llvm/include/llvm/CodeGen/TargetPassConfig.h
@@ -423,6 +423,9 @@ class TargetPassConfig : public ImmutablePass {
/// to physical registers.
virtual void addPostRewrite() { }
+ /// Add passes to be run immediately before Stack Slot Coloring pass.
+ virtual void addPreStackSlotColoring() {}
+
/// This method may be implemented by targets that want to run passes after
/// register allocation pass pipeline but before prolog-epilog insertion.
virtual void addPostRegAlloc() { }
diff --git a/llvm/lib/CodeGen/TargetPassConfig.cpp b/llvm/lib/CodeGen/TargetPassConfig.cpp
index f15d59d4e35b49..7a57cca0439b25 100644
--- a/llvm/lib/CodeGen/TargetPassConfig.cpp
+++ b/llvm/lib/CodeGen/TargetPassConfig.cpp
@@ -1491,6 +1491,8 @@ void TargetPassConfig::addOptimizedRegAlloc() {
addPass(&MachineSchedulerID);
if (addRegAssignAndRewriteOptimized()) {
+ addPreStackSlotColoring();
+
// Perform stack slot coloring and post-ra machine LICM.
addPass(&StackSlotColoringID);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h
index 89319527c410a6..0de0e732693954 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.h
@@ -166,6 +166,9 @@ extern char &SILowerI1CopiesID;
void initializeAMDGPUGlobalISelDivergenceLoweringPass(PassRegistry &);
extern char &AMDGPUGlobalISelDivergenceLoweringID;
+void initializeAMDGPUMarkLastScratchLoadPass(PassRegistry &);
+extern char &AMDGPUMarkLastScratchLoadID;
+
void initializeSILowerSGPRSpillsPass(PassRegistry &);
extern char &SILowerSGPRSpillsID;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMarkLastScratchLoad.cpp b/llvm/lib/Target/AMDGPU/AMDGPUMarkLastScratchLoad.cpp
new file mode 100644
index 00000000000000..4c9d417760bd36
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/AMDGPUMarkLastScratchLoad.cpp
@@ -0,0 +1,143 @@
+//===-- AMDGPUMarkLastScratchLoad.cpp -------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Mark scratch load/spill instructions which are guaranteed to be the last time
+// this scratch slot is used so it can be evicted from caches.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "GCNSubtarget.h"
+#include "llvm/CodeGen/LiveStacks.h"
+#include "llvm/CodeGen/MachineOperand.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "amdgpu-mark-last-scratch-load"
+
+namespace {
+
+class AMDGPUMarkLastScratchLoad : public MachineFunctionPass {
+private:
+ LiveStacks *LS = nullptr;
+ SlotIndexes *SI = nullptr;
+ const SIInstrInfo *SII = nullptr;
+
+public:
+ static char ID;
+
+ AMDGPUMarkLastScratchLoad() : MachineFunctionPass(ID) {
+ initializeAMDGPUMarkLastScratchLoadPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<SlotIndexes>();
+ AU.addRequired<LiveStacks>();
+ AU.setPreservesAll();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ StringRef getPassName() const override {
+ return "AMDGPU Mark Last Scratch Load";
+ }
+};
+
+} // end anonymous namespace
+
+bool AMDGPUMarkLastScratchLoad::runOnMachineFunction(MachineFunction &MF) {
+ LLVM_DEBUG({
+ dbgs() << "********** Mark Last Scratch Load **********\n"
+ << "********** Function: " << MF.getName() << '\n';
+ });
+
+ if (skipFunction(MF.getFunction()))
+ return false;
+
+ const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
+ if (ST.getGeneration() < AMDGPUSubtarget::GFX12)
+ return false;
+
+ LS = &getAnalysis<LiveStacks>();
+ SI = &getAnalysis<SlotIndexes>();
+ SII = ST.getInstrInfo();
+
+ const unsigned NumSlots = LS->getNumIntervals();
+ if (NumSlots == 0) {
+ LLVM_DEBUG(dbgs() << "No live slots, skipping\n");
+ return false;
+ }
+
+ LLVM_DEBUG(dbgs() << LS->getNumIntervals() << " intervals\n");
+
+ bool Changed = false;
+
+ for (auto &[SS, LI] : *LS) {
+ LLVM_DEBUG(dbgs() << "Checking interval: " << LI << "\n");
+
+ for (const LiveRange::Segment &Segment : LI.segments) {
+ LLVM_DEBUG(dbgs() << " Checking segment: " << Segment << "\n");
+
+ // Ignore segments that run to the end of basic block because in this case
+ // slot is still live at the end of it.
+ if (Segment.end.isBlock())
+ continue;
+
+ const int FrameIndex = Register::stackSlot2Index(LI.reg());
+ MachineInstr *LastLoad = nullptr;
+
+ MachineInstr *MISegmentStart = SI->getInstructionFromIndex(Segment.start);
+ MachineInstr *MISegmentEnd = SI->getInstructionFromIndex(Segment.end);
+ if (!MISegmentEnd) {
+ // FIXME: The start and end can refer to deleted instructions. We should
+ // be able to handle this more gracefully by finding the closest real
+ // instructions.
+ continue;
+ }
+ MachineBasicBlock *BB = MISegmentEnd->getParent();
+
+ // Start iteration backwards from segment end until the start of basic
+ // block or start of segment if it is in the same basic block.
+ auto End = BB->instr_rend();
+ if (MISegmentStart && MISegmentStart->getParent() == BB)
+ End = MISegmentStart->getReverseIterator();
+
+ for (auto MI = MISegmentEnd->getReverseIterator(); MI != End; ++MI) {
+ int LoadFI = 0;
+
+ if (SII->isLoadFromStackSlot(*MI, LoadFI) && LoadFI == FrameIndex) {
+ LastLoad = &*MI;
+ break;
+ }
+ }
+
+ if (LastLoad) {
+ MachineOperand *LastUse =
+ SII->getNamedOperand(*LastLoad, AMDGPU::OpName::last_use);
+ assert(LastUse && "This instruction must have a last_use operand");
+ LastUse->setImm(1);
+ Changed = true;
+ LLVM_DEBUG(dbgs() << " Found last load: " << *LastLoad;);
+ }
+ }
+ }
+
+ return Changed;
+}
+
+char AMDGPUMarkLastScratchLoad::ID = 0;
+
+char &llvm::AMDGPUMarkLastScratchLoadID = AMDGPUMarkLastScratchLoad::ID;
+
+INITIALIZE_PASS_BEGIN(AMDGPUMarkLastScratchLoad, DEBUG_TYPE,
+ "AMDGPU Mark last scratch load", false, false)
+INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
+INITIALIZE_PASS_DEPENDENCY(LiveStacks)
+INITIALIZE_PASS_END(AMDGPUMarkLastScratchLoad, DEBUG_TYPE,
+ "AMDGPU Mark last scratch load", false, false)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 0e0094cb9cd6ef..68347c9b9f5d26 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -377,6 +377,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {
initializeSILowerI1CopiesPass(*PR);
initializeAMDGPUGlobalISelDivergenceLoweringPass(*PR);
initializeSILowerWWMCopiesPass(*PR);
+ initializeAMDGPUMarkLastScratchLoadPass(*PR);
initializeSILowerSGPRSpillsPass(*PR);
initializeSIFixSGPRCopiesPass(*PR);
initializeSIFixVGPRCopiesPass(*PR);
@@ -955,6 +956,7 @@ class GCNPassConfig final : public AMDGPUPassConfig {
void addPreRegAlloc() override;
bool addPreRewrite() override;
+ void addPreStackSlotColoring() override;
void addPostRegAlloc() override;
void addPreSched2() override;
void addPreEmitPass() override;
@@ -1337,6 +1339,10 @@ bool GCNPassConfig::addPreRewrite() {
return true;
}
+void GCNPassConfig::addPreStackSlotColoring() {
+ addPass(&AMDGPUMarkLastScratchLoadID);
+}
+
FunctionPass *GCNPassConfig::createSGPRAllocPass(bool Optimized) {
// Initialize the global default.
llvm::call_once(InitializeDefaultSGPRRegisterAllocatorFlag,
diff --git a/llvm/lib/Target/AMDGPU/CMakeLists.txt b/llvm/lib/Target/AMDGPU/CMakeLists.txt
index 2c92e7a0738855..9a974eaf50d235 100644
--- a/llvm/lib/Target/AMDGPU/CMakeLists.txt
+++ b/llvm/lib/Target/AMDGPU/CMakeLists.txt
@@ -79,6 +79,7 @@ add_llvm_target(AMDGPUCodeGen
AMDGPUMCInstLower.cpp
AMDGPUIGroupLP.cpp
AMDGPUInsertSingleUseVDST.cpp
+ AMDGPUMarkLastScratchLoad.cpp
AMDGPUMIRFormatter.cpp
AMDGPUOpenCLEnqueuedBlockLowering.cpp
AMDGPUPerfHintAnalysis.cpp
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index d4746b559d9256..2f7dfa702cb9c8 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -1973,9 +1973,10 @@ void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
if (RI.spillSGPRToVGPR())
FrameInfo.setStackID(FrameIndex, TargetStackID::SGPRSpill);
BuildMI(MBB, MI, DL, OpDesc, DestReg)
- .addFrameIndex(FrameIndex) // addr
- .addMemOperand(MMO)
- .addReg(MFI->getStackPtrOffsetReg(), RegState::Implicit);
+ .addFrameIndex(FrameIndex) // addr
+ .addMemOperand(MMO) // offset
+ .addImm(0) // last_use
+ .addReg(MFI->getStackPtrOffsetReg(), RegState::Implicit);
return;
}
@@ -1986,6 +1987,7 @@ void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
.addFrameIndex(FrameIndex) // vaddr
.addReg(MFI->getStackPtrOffsetReg()) // scratch_offset
.addImm(0) // offset
+ .addImm(0) // last_use
.addMemOperand(MMO);
}
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index f9bc623abcd04b..85db33906e46ec 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -899,7 +899,7 @@ multiclass SI_SPILL_SGPR <RegisterClass sgpr_class> {
def _RESTORE : PseudoInstSI <
(outs sgpr_class:$data),
- (ins i32imm:$addr)> {
+ (ins i32imm:$addr, i32imm:$last_use)> {
let mayStore = 0;
let mayLoad = 1;
}
@@ -968,7 +968,7 @@ multiclass SI_SPILL_VGPR <RegisterClass vgpr_class, bit UsesTmp = 0> {
def _RESTORE : VPseudoInstSI <
(outs vgpr_class:$vdata),
(ins i32imm:$vaddr,
- SReg_32:$soffset, i32imm:$offset)> {
+ SReg_32:$soffset, i32imm:$offset, i32imm:$last_use)> {
let mayStore = 0;
let mayLoad = 1;
diff --git a/llvm/test/CodeGen/AMDGPU/accvgpr-spill-scc-clobber.mir b/llvm/test/CodeGen/AMDGPU/accvgpr-spill-scc-clobber.mir
index 53540e4a000492..12fd66b6806f5f 100644
--- a/llvm/test/CodeGen/AMDGPU/accvgpr-spill-scc-clobber.mir
+++ b/llvm/test/CodeGen/AMDGPU/accvgpr-spill-scc-clobber.mir
@@ -1007,7 +1007,7 @@ body: |
liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
S_CMP_EQ_U32 0, 0, implicit-def $scc
- $agpr0 = SI_SPILL_A32_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.1, align 4, addrspace 5)
+ $agpr0 = SI_SPILL_A32_RESTORE %stack.1, $sgpr32, 0, 0, implicit $exec :: (load (s32) from %stack.1, align 4, addrspace 5)
S_CBRANCH_SCC1 %bb.2, implicit $scc
bb.1:
@@ -2027,7 +2027,7 @@ body: |
liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
S_CMP_EQ_U32 0, 0, implicit-def $scc
- $agpr0_agpr1 = SI_SPILL_A64_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s64) from %stack.1, align 4, addrspace 5)
+ $agpr0_agpr1 = SI_SPILL_A64_RESTORE %stack.1, $sgpr32, 0, 0, implicit $exec :: (load (s64) from %stack.1, align 4, addrspace 5)
S_CBRANCH_SCC1 %bb.2, implicit $scc
bb.1:
@@ -3052,7 +3052,7 @@ body: |
liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
S_CMP_EQ_U32 0, 0, implicit-def $scc
- $agpr0_agpr1_agpr2 = SI_SPILL_A96_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s64) from %stack.1, align 4, addrspace 5)
+ $agpr0_agpr1_agpr2 = SI_SPILL_A96_RESTORE %stack.1, $sgpr32, 0, 0, implicit $exec :: (load (s64) from %stack.1, align 4, addrspace 5)
S_CBRANCH_SCC1 %bb.2, implicit $scc
bb.1:
diff --git a/llvm/test/CodeGen/AMDGPU/agpr-copy-reuse-writes.mir b/llvm/test/CodeGen/AMDGPU/agpr-copy-reuse-writes.mir
index 0d566efcbc91cf..16bb8a89b95af5 100644
--- a/llvm/test/CodeGen/AMDGPU/agpr-copy-reuse-writes.mir
+++ b/llvm/test/CodeGen/AMDGPU/agpr-copy-reuse-writes.mir
@@ -51,7 +51,7 @@ body: |
; GFX908-NEXT: S_ENDPGM 0
$agpr0_agpr1 = IMPLICIT_DEF
SI_SPILL_AV64_SAVE killed $agpr0_agpr1, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, align 4, addrspace 5)
- $agpr0_agpr1 = SI_SPILL_AV64_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s64) from %stack.0, align 4, addrspace 5)
+ $agpr0_agpr1 = SI_SPILL_AV64_RESTORE %stack.0, $sgpr32, 0, 0, implicit $exec :: (load (s64) from %stack.0, align 4, addrspace 5)
$agpr2_agpr3 = COPY $agpr0_agpr1, implicit $exec
S_ENDPGM 0
...
diff --git a/llvm/test/CodeGen/AMDGPU/bb-prolog-spill-during-regalloc.ll b/llvm/test/CodeGen/AMDGPU/bb-prolog-spill-during-regalloc.ll
index 3ed2cb856eaea8..1589d2dcf44467 100644
--- a/llvm/test/CodeGen/AMDGPU/bb-prolog-spill-during-regalloc.ll
+++ b/llvm/test/CodeGen/AMDGPU/bb-prolog-spill-during-regalloc.ll
@@ -12,7 +12,7 @@ define i32 @prolog_spill(i32 %arg0, i32 %arg1, i32 %arg2) {
; REGALLOC-NEXT: SI_SPILL_V32_SAVE killed $vgpr2, %stack.5, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.5, addrspace 5)
; REGALLOC-NEXT: SI_SPILL_V32_SAVE killed $vgpr1, %stack.4, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.4, addrspace 5)
; REGALLOC-NEXT: renamable $vgpr1 = COPY $vgpr0
- ; REGALLOC-NEXT: $vgpr0 = SI_SPILL_WWM_V32_RESTORE %stack.2, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
+ ; REGALLOC-NEXT: $vgpr0 = SI_SPILL_WWM_V32_RESTORE %stack.2, $sgpr32, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
; REGALLOC-NEXT: renamable $sgpr4 = S_MOV_B32 49
; REGALLOC-NEXT: renamable $sgpr4_sgpr5 = V_CMP_GT_I32_e64 killed $vgpr1, killed $sgpr4, implicit $exec
; REGALLOC-NEXT: renamable $sgpr6 = IMPLICIT_DEF
@@ -31,11 +31,11 @@ define i32 @prolog_spill(i32 %arg0, i32 %arg1, i32 %arg2) {
; REGALLOC-NEXT: bb.1.Flow:
; REGALLOC-NEXT: successors: %bb.2(0x40000000), %bb.4(0x40000000)
; REGALLOC-NEXT: {{ $}}
- ; REGALLOC-NEXT: $vgpr0 = SI_SPILL_WWM_V32_RESTORE %stack.2, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
+ ; REGALLOC...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/75512
More information about the llvm-commits
mailing list