[llvm] Fix vgpr live range (PR #92940)
Gang Chen via llvm-commits
llvm-commits at lists.llvm.org
Tue May 21 10:23:19 PDT 2024
https://github.com/cmc-rep created https://github.com/llvm/llvm-project/pull/92940
None
>From e3aacdff49e331a1cd95376405b3172ec734a6aa Mon Sep 17 00:00:00 2001
From: gangc <gangc at amd.com>
Date: Tue, 30 Apr 2024 13:38:25 -0700
Subject: [PATCH 1/2] [CodeGen] change prototype of RegClassFilterFunc
change prototype of RegClassFilterFunc so that we can filter
not just by RegClass. We need to implement more complicated
filter based upon some other info associated with each register.
Signed-off-by: gangc <gangc at amd.com>
---
llvm/include/llvm/CodeGen/RegAllocCommon.h | 10 ++++++----
llvm/lib/CodeGen/RegAllocBase.cpp | 3 +--
llvm/lib/CodeGen/RegAllocFast.cpp | 3 +--
llvm/lib/CodeGen/RegAllocGreedy.cpp | 6 +++---
llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp | 13 ++++++++-----
llvm/lib/Target/RISCV/RISCVTargetMachine.cpp | 6 ++++--
llvm/lib/Target/X86/X86TargetMachine.cpp | 6 ++++--
7 files changed, 27 insertions(+), 20 deletions(-)
diff --git a/llvm/include/llvm/CodeGen/RegAllocCommon.h b/llvm/include/llvm/CodeGen/RegAllocCommon.h
index 757ca8e112eec..943c9dd9e9bf5 100644
--- a/llvm/include/llvm/CodeGen/RegAllocCommon.h
+++ b/llvm/include/llvm/CodeGen/RegAllocCommon.h
@@ -10,22 +10,24 @@
#define LLVM_CODEGEN_REGALLOCCOMMON_H
#include <functional>
+#include <llvm/CodeGen/Register.h>
namespace llvm {
-class TargetRegisterClass;
class TargetRegisterInfo;
+class MachineRegisterInfo;
typedef std::function<bool(const TargetRegisterInfo &TRI,
- const TargetRegisterClass &RC)> RegClassFilterFunc;
+ const MachineRegisterInfo &MRI, const Register Reg)>
+ RegClassFilterFunc;
/// Default register class filter function for register allocation. All virtual
/// registers should be allocated.
static inline bool allocateAllRegClasses(const TargetRegisterInfo &,
- const TargetRegisterClass &) {
+ const MachineRegisterInfo &,
+ const Register) {
return true;
}
-
}
#endif // LLVM_CODEGEN_REGALLOCCOMMON_H
diff --git a/llvm/lib/CodeGen/RegAllocBase.cpp b/llvm/lib/CodeGen/RegAllocBase.cpp
index d0dec372f6896..a4645ed93029d 100644
--- a/llvm/lib/CodeGen/RegAllocBase.cpp
+++ b/llvm/lib/CodeGen/RegAllocBase.cpp
@@ -181,8 +181,7 @@ void RegAllocBase::enqueue(const LiveInterval *LI) {
if (VRM->hasPhys(Reg))
return;
- const TargetRegisterClass &RC = *MRI->getRegClass(Reg);
- if (ShouldAllocateClass(*TRI, RC)) {
+ if (ShouldAllocateClass(*TRI, *MRI, Reg)) {
LLVM_DEBUG(dbgs() << "Enqueuing " << printReg(Reg, TRI) << '\n');
enqueueImpl(LI);
} else {
diff --git a/llvm/lib/CodeGen/RegAllocFast.cpp b/llvm/lib/CodeGen/RegAllocFast.cpp
index 6740e1f0edb4f..f6419daba6a2d 100644
--- a/llvm/lib/CodeGen/RegAllocFast.cpp
+++ b/llvm/lib/CodeGen/RegAllocFast.cpp
@@ -417,8 +417,7 @@ INITIALIZE_PASS(RegAllocFast, "regallocfast", "Fast Register Allocator", false,
bool RegAllocFast::shouldAllocateRegister(const Register Reg) const {
assert(Reg.isVirtual());
- const TargetRegisterClass &RC = *MRI->getRegClass(Reg);
- return ShouldAllocateClass(*TRI, RC);
+ return ShouldAllocateClass(*TRI, *MRI, Reg);
}
void RegAllocFast::setPhysRegState(MCPhysReg PhysReg, unsigned NewState) {
diff --git a/llvm/lib/CodeGen/RegAllocGreedy.cpp b/llvm/lib/CodeGen/RegAllocGreedy.cpp
index 348277224c7ae..c3d5984b46f51 100644
--- a/llvm/lib/CodeGen/RegAllocGreedy.cpp
+++ b/llvm/lib/CodeGen/RegAllocGreedy.cpp
@@ -2306,9 +2306,9 @@ void RAGreedy::tryHintRecoloring(const LiveInterval &VirtReg) {
if (Reg.isPhysical())
continue;
- // This may be a skipped class
+ // This may be a skipped register
if (!VRM->hasPhys(Reg)) {
- assert(!ShouldAllocateClass(*TRI, *MRI->getRegClass(Reg)) &&
+ assert(!ShouldAllocateClass(*TRI, *MRI, Reg) &&
"We have an unallocated variable which should have been handled");
continue;
}
@@ -2698,7 +2698,7 @@ bool RAGreedy::hasVirtRegAlloc() {
const TargetRegisterClass *RC = MRI->getRegClass(Reg);
if (!RC)
continue;
- if (ShouldAllocateClass(*TRI, *RC))
+ if (ShouldAllocateClass(*TRI, *MRI, Reg))
return true;
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 305a6c8c3b926..3d6965fa9876c 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -81,16 +81,19 @@ class VGPRRegisterRegAlloc : public RegisterRegAllocBase<VGPRRegisterRegAlloc> {
};
static bool onlyAllocateSGPRs(const TargetRegisterInfo &TRI,
- const TargetRegisterClass &RC) {
- return static_cast<const SIRegisterInfo &>(TRI).isSGPRClass(&RC);
+ const MachineRegisterInfo &MRI,
+ const Register Reg) {
+ const TargetRegisterClass *RC = MRI.getRegClass(Reg);
+ return static_cast<const SIRegisterInfo &>(TRI).isSGPRClass(RC);
}
static bool onlyAllocateVGPRs(const TargetRegisterInfo &TRI,
- const TargetRegisterClass &RC) {
- return !static_cast<const SIRegisterInfo &>(TRI).isSGPRClass(&RC);
+ const MachineRegisterInfo &MRI,
+ const Register Reg) {
+ const TargetRegisterClass *RC = MRI.getRegClass(Reg);
+ return !static_cast<const SIRegisterInfo &>(TRI).isSGPRClass(RC);
}
-
/// -{sgpr|vgpr}-regalloc=... command line option.
static FunctionPass *useDefaultRegisterAllocator() { return nullptr; }
diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
index 0876f46728a10..44a26c48c63e0 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
@@ -273,8 +273,10 @@ class RVVRegisterRegAlloc : public RegisterRegAllocBase<RVVRegisterRegAlloc> {
};
static bool onlyAllocateRVVReg(const TargetRegisterInfo &TRI,
- const TargetRegisterClass &RC) {
- return RISCVRegisterInfo::isRVVRegClass(&RC);
+ const MachineRegisterInfo &MRI,
+ const Register Reg) {
+ const TargetRegisterClass *RC = MRI.getRegClass(Reg);
+ return RISCVRegisterInfo::isRVVRegClass(RC);
}
static FunctionPass *useDefaultRegisterAllocator() { return nullptr; }
diff --git a/llvm/lib/Target/X86/X86TargetMachine.cpp b/llvm/lib/Target/X86/X86TargetMachine.cpp
index 86b456019c4e5..eab537e8a5f8b 100644
--- a/llvm/lib/Target/X86/X86TargetMachine.cpp
+++ b/llvm/lib/Target/X86/X86TargetMachine.cpp
@@ -652,8 +652,10 @@ std::unique_ptr<CSEConfigBase> X86PassConfig::getCSEConfig() const {
}
static bool onlyAllocateTileRegisters(const TargetRegisterInfo &TRI,
- const TargetRegisterClass &RC) {
- return static_cast<const X86RegisterInfo &>(TRI).isTileRegisterClass(&RC);
+ const MachineRegisterInfo &MRI,
+ const Register Reg) {
+ const TargetRegisterClass *RC = MRI.getRegClass(Reg);
+ return static_cast<const X86RegisterInfo &>(TRI).isTileRegisterClass(RC);
}
bool X86PassConfig::addRegAssignAndRewriteOptimized() {
>From e56920687e6504363ab4c2611b016e672b348286 Mon Sep 17 00:00:00 2001
From: gangc <gangc at amd.com>
Date: Wed, 15 May 2024 14:11:23 -0700
Subject: [PATCH 2/2] [AMDGPU] fix physical register live-range after
per-wave-RA
Use LivePhysRegs to update liveness after adding implicit defs and uses.
---
llvm/lib/Target/AMDGPU/AMDGPU.h | 4 +
.../AMDGPU/AMDGPUFixLiveRangePreWaveRA.cpp | 258 ++++++++++++++++++
.../lib/Target/AMDGPU/AMDGPUTargetMachine.cpp | 1 +
llvm/lib/Target/AMDGPU/CMakeLists.txt | 1 +
4 files changed, 264 insertions(+)
create mode 100644 llvm/lib/Target/AMDGPU/AMDGPUFixLiveRangePreWaveRA.cpp
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h
index 6016bd5187d88..8367c33338d26 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.h
@@ -44,6 +44,7 @@ FunctionPass *createSIFixControlFlowLiveIntervalsPass();
FunctionPass *createSIOptimizeExecMaskingPreRAPass();
FunctionPass *createSIOptimizeVGPRLiveRangePass();
FunctionPass *createSIFixSGPRCopiesPass();
+FunctionPass *createAMDGPUFixLiveRangePreWaveRAPass();
FunctionPass *createLowerWWMCopiesPass();
FunctionPass *createSIMemoryLegalizerPass();
FunctionPass *createSIInsertWaitcntsPass();
@@ -170,6 +171,9 @@ extern char &SIFixSGPRCopiesID;
void initializeSIFixVGPRCopiesPass(PassRegistry &);
extern char &SIFixVGPRCopiesID;
+void initializeAMDGPUFixLiveRangePreWaveRAPass(PassRegistry &);
+extern char &AMDGPUFixLiveRangePreWaveRAID;
+
void initializeSILowerWWMCopiesPass(PassRegistry &);
extern char &SILowerWWMCopiesID;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUFixLiveRangePreWaveRA.cpp b/llvm/lib/Target/AMDGPU/AMDGPUFixLiveRangePreWaveRA.cpp
new file mode 100644
index 0000000000000..4203a5c568bae
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/AMDGPUFixLiveRangePreWaveRA.cpp
@@ -0,0 +1,258 @@
+//===- AMDGPUFixLiveRangePreWaveRA.cpp - Fix Phy-VGPR live-ranges ---------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// This pass assumes that we have done register-allocation for per-thread
+/// values. It extends the live-ranges of those physical VGPRs in order to
+/// create the correct interference with those WWM/WQM values during the last
+/// register-allocation pass for those WWM/WQM values.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "GCNSubtarget.h"
+#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "SIMachineFunctionInfo.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/CodeGen/LivePhysRegs.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachinePostDominators.h"
+#include "llvm/CodeGen/RegisterClassInfo.h"
+#include "llvm/InitializePasses.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "si-fix-live-range-pre-wave-ra"
+
+namespace {
+
+class AMDGPUFixLiveRangePreWaveRA : public MachineFunctionPass {
+private:
+ const SIInstrInfo *TII;
+ const SIRegisterInfo *TRI;
+ MachineRegisterInfo *MRI;
+ MachinePostDominatorTree *PDT;
+
+ DenseMap<MachineBasicBlock *, SmallPtrSet<MachineBasicBlock *, 2>> CtrlDeps;
+
+ void buildControlDependences(MachineFunction &MF);
+ bool influences(MachineBasicBlock *CtrlMBB, MachineBasicBlock *DepMBB);
+
+public:
+ static char ID;
+
+ AMDGPUFixLiveRangePreWaveRA() : MachineFunctionPass(ID) {
+ initializeAMDGPUFixLiveRangePreWaveRAPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<MachinePostDominatorTree>();
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+private:
+};
+
+} // End anonymous namespace.
+
+INITIALIZE_PASS_BEGIN(AMDGPUFixLiveRangePreWaveRA, DEBUG_TYPE,
+ "SI Fix Live Range before Wave-RA", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree)
+INITIALIZE_PASS_END(AMDGPUFixLiveRangePreWaveRA, DEBUG_TYPE,
+ "SI Fix Live Range before Wave-RA", false, false)
+
+char AMDGPUFixLiveRangePreWaveRA::ID = 0;
+
+char &llvm::AMDGPUFixLiveRangePreWaveRAID = AMDGPUFixLiveRangePreWaveRA::ID;
+
+FunctionPass *llvm::createAMDGPUFixLiveRangePreWaveRAPass() {
+ return new AMDGPUFixLiveRangePreWaveRA();
+}
+
+static bool MBBHasWWM(const MachineBasicBlock &MBB) {
+ for (const MachineInstr &MI : MBB) {
+ if (MI.getOpcode() == AMDGPU::V_SET_INACTIVE_B32 ||
+ MI.getOpcode() == AMDGPU::V_SET_INACTIVE_B64 ||
+ MI.getOpcode() == AMDGPU::SI_SPILL_S32_TO_VGPR ||
+ MI.getOpcode() == AMDGPU::ENTER_STRICT_WWM ||
+ MI.getOpcode() == AMDGPU::ENTER_STRICT_WQM ||
+ MI.getOpcode() == AMDGPU::ENTER_PSEUDO_WM) {
+ return true;
+ }
+ }
+ return false;
+}
+
+void AMDGPUFixLiveRangePreWaveRA::buildControlDependences(MachineFunction &MF) {
+ for (auto *MBB : nodes(&MF)) {
+ // skip
+ if (MBB->getSingleSuccessor())
+ continue;
+
+ // For each successor of MBB
+ for (auto *SuccMBB : MBB->successors()) {
+ auto *PostDomMBB = PDT->findNearestCommonDominator(MBB, SuccMBB);
+ if (PostDomMBB == MBB) {
+ if (auto *ParentNode = PDT->getNode(MBB)->getIDom())
+ PostDomMBB = ParentNode->getBlock();
+ }
+ // walk PDT from SuccMBB to PostDomMBB
+ // add MBB as the control-parent of the blocks along the path (except
+ // PostDomBB)
+ for (auto *Node = PDT->getNode(SuccMBB);
+ Node && Node->getBlock() != PostDomMBB; Node = Node->getIDom()) {
+ auto *PathMBB = Node->getBlock();
+ CtrlDeps[PathMBB].insert(MBB);
+ }
+ }
+ }
+}
+
+bool AMDGPUFixLiveRangePreWaveRA::influences(MachineBasicBlock *CtrlMBB,
+ MachineBasicBlock *DepMBB) {
+ if (CtrlDeps.find(DepMBB) == CtrlDeps.end())
+ return false;
+
+ SmallVector<MachineBasicBlock *, 8> WL;
+ SmallPtrSet<MachineBasicBlock *, 8> Visited;
+ for (auto *ParMBB : CtrlDeps[DepMBB]) {
+ WL.push_back(ParMBB);
+ }
+
+ while (!WL.empty()) {
+ auto *MBB = WL.back();
+ WL.pop_back();
+ Visited.insert(MBB);
+ if (MBB == CtrlMBB)
+ return true;
+ if (CtrlDeps.find(MBB) != CtrlDeps.end()) {
+ for (auto *ParMBB : CtrlDeps[DepMBB]) {
+ if (!Visited.count(ParMBB))
+ WL.push_back(ParMBB);
+ }
+ }
+ }
+
+ return false;
+}
+
+bool AMDGPUFixLiveRangePreWaveRA::runOnMachineFunction(MachineFunction &MF) {
+ LLVM_DEBUG(dbgs() << "AMDGPUFixLiveRangePreWaveRA: function " << MF.getName()
+ << "\n");
+
+ const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
+
+ TII = ST.getInstrInfo();
+ TRI = &TII->getRegisterInfo();
+ MRI = &MF.getRegInfo();
+ PDT = &getAnalysis<MachinePostDominatorTree>();
+
+ buildControlDependences(MF);
+
+ ReversePostOrderTraversal<MachineFunction *> RPOT(&MF);
+ bool Changed = false;
+ for (unsigned i = 0, e = TRI->getNumRegUnits(); i != e; ++i) {
+ for (MCRegUnitRootIterator Root(i, TRI); Root.isValid(); ++Root) {
+ auto RC = TRI->getPhysRegBaseClass(*Root);
+ if (!RC || TRI->isSGPRClass(RC)) {
+ // dbgs() << printReg(*Root, TRI) << "\n";
+ // skip reg-class that is not relevant
+ continue;
+ }
+ for (MCPhysReg Reg : TRI->superregs_inclusive(*Root)) {
+ // if a reg is either not-seen or reserved, not a concern for RA
+ if (MRI->reg_empty(Reg) || MRI->isReserved(Reg))
+ continue;
+
+ // iterate through the CFG, processing every divergent branch
+ for (MachineBasicBlock *MBB : RPOT) {
+ MachineBasicBlock *TrueMBB = nullptr;
+ MachineBasicBlock *FalseMBB = nullptr;
+ SmallVector<MachineOperand, 1> Cond;
+ TII->analyzeBranch(*MBB, TrueMBB, FalseMBB, Cond);
+
+ if (!Cond.size())
+ break;
+
+ auto CondOpnd = Cond.back();
+ if (!FalseMBB)
+ FalseMBB = MBB->getNextNode();
+
+ // check if this is a divergent branch
+ // is this the right way?
+ if (CondOpnd.getReg() != AMDGPU::VCC &&
+ CondOpnd.getReg() != AMDGPU::VCC_LO &&
+ CondOpnd.getReg() != AMDGPU::VCC_HI &&
+ CondOpnd.getReg() != AMDGPU::EXEC &&
+ CondOpnd.getReg() != AMDGPU::EXEC_LO &&
+ CondOpnd.getReg() != AMDGPU::EXEC_HI)
+ continue;
+
+ auto *IPD = PDT->getNode(MBB)->getIDom()->getBlock();
+ // is register live at the join-point
+ if (!IPD->isLiveIn(Reg))
+ continue;
+
+ auto CBR = CondOpnd.getParent();
+ // add implicit use if a def is inside the influence region
+ bool UseAdded = false;
+ for (MachineOperand &MO : MRI->def_operands(Reg)) {
+ MachineInstr &MI = *MO.getParent();
+ auto DefMBB = MI.getParent();
+ if (influences(MBB, DefMBB)) {
+ // MI add implicit use for Reg;
+ bool UseExists = false;
+ for (auto Opnd : MI.all_uses()) {
+ if (Opnd.isReg() && Opnd.getReg() == Reg) {
+ UseExists = true;
+ break;
+ }
+ }
+ if (!UseExists) {
+ MI.addOperand(MF, MachineOperand::CreateReg(Reg, false, true));
+ UseAdded = true;
+ Changed = true;
+ }
+ }
+ }
+ // add implicit def to branch in order to cap the liveness
+ if (UseAdded && !FalseMBB->isLiveIn(Reg) && !TrueMBB->isLiveIn(Reg)) {
+ bool DefExists = false;
+ for (auto Opnd : CBR->all_defs()) {
+ if (Opnd.isReg() && Opnd.getReg() == Reg) {
+ DefExists = true;
+ break;
+ }
+ }
+ if (!DefExists) {
+ CBR->addOperand(MF, MachineOperand::CreateReg(Reg, true, true));
+ // should we try to merge implicit-def to make MIR concise?
+ }
+ }
+ } // end the block-loop
+ } // end the reg-loop
+ } // end the root-loop
+ } // end of the unit-loop
+
+ if (Changed) {
+ // recompute liveness
+ std::vector<MachineBasicBlock *> PostOrder;
+ for (auto MBB : reverse(RPOT)) {
+ PostOrder.push_back(MBB);
+ }
+ fullyRecomputeLiveIns(PostOrder);
+ for (auto *MBB : RPOT) {
+ recomputeLivenessFlags(*MBB);
+ }
+ }
+ CtrlDeps.clear();
+ return Changed;
+}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 3d6965fa9876c..59cb034889fce 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -397,6 +397,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {
initializeSILowerSGPRSpillsPass(*PR);
initializeSIFixSGPRCopiesPass(*PR);
initializeSIFixVGPRCopiesPass(*PR);
+ initializeAMDGPUFixLiveRangePreWaveRAPass(*PR);
initializeSIFoldOperandsPass(*PR);
initializeSIPeepholeSDWAPass(*PR);
initializeSIShrinkInstructionsPass(*PR);
diff --git a/llvm/lib/Target/AMDGPU/CMakeLists.txt b/llvm/lib/Target/AMDGPU/CMakeLists.txt
index 48325a0928f93..06b74fd20ffd9 100644
--- a/llvm/lib/Target/AMDGPU/CMakeLists.txt
+++ b/llvm/lib/Target/AMDGPU/CMakeLists.txt
@@ -139,6 +139,7 @@ add_llvm_target(AMDGPUCodeGen
SIAnnotateControlFlow.cpp
SIFixSGPRCopies.cpp
SIFixVGPRCopies.cpp
+ AMDGPUFixLiveRangePreWaveRA.cpp
SIFoldOperands.cpp
SIFormMemoryClauses.cpp
SIFrameLowering.cpp
More information about the llvm-commits
mailing list