[llvm] [NFC][AMDGPU] Make AMDGPUSplitModule a ModulePass (PR #95773)
Pierre van Houtryve via llvm-commits
llvm-commits at lists.llvm.org
Mon Jun 17 05:32:42 PDT 2024
https://github.com/Pierre-vh created https://github.com/llvm/llvm-project/pull/95773
It allows it to access TTI correctly, and opens the door to accessing more analysis in the future.
I went back and forth between this, and also making the default SplitModule a Pass too to make it uniform, but I decided against it because it's just needless complications. Neither llvm-split or LTOBackend have a PM ready to use so we need to create one anyway. Let's keep all the mess hidden in the AMDGPU version for now to keep this change more self-contained.
>From 4df0af5b1df43818c8f3eb55fdaf236f4ba8f626 Mon Sep 17 00:00:00 2001
From: pvanhout <pierre.vanhoutryve at amd.com>
Date: Mon, 17 Jun 2024 14:31:39 +0200
Subject: [PATCH] [NFC][AMDGPU] Make AMDGPUSplitModule a ModulePass
It allows it to access TTI correctly, and opens the door to accessing more analysis in the future.
I went back and forth between this, and also making the default SplitModule a Pass too to make it uniform, but I decided against it because it's just needless complications.
Neither llvm-split or LTOBackend have a PM ready to use so we need to create one anyway. Let's keep all the mess hidden in the AMDGPU version for now to keep this change more self-contained.
---
llvm/include/llvm/Target/TargetMachine.h | 2 +-
llvm/lib/Target/AMDGPU/AMDGPUSplitModule.cpp | 34 +++++++++++++------
llvm/lib/Target/AMDGPU/AMDGPUSplitModule.h | 21 ++++++++----
.../lib/Target/AMDGPU/AMDGPUTargetMachine.cpp | 20 +++++++++--
llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h | 2 +-
5 files changed, 58 insertions(+), 21 deletions(-)
diff --git a/llvm/include/llvm/Target/TargetMachine.h b/llvm/include/llvm/Target/TargetMachine.h
index 1ba99730ca702..e72045391bf31 100644
--- a/llvm/include/llvm/Target/TargetMachine.h
+++ b/llvm/include/llvm/Target/TargetMachine.h
@@ -431,7 +431,7 @@ class TargetMachine {
/// and \p M has not been modified.
virtual bool splitModule(
Module &M, unsigned NumParts,
- function_ref<void(std::unique_ptr<Module> MPart)> ModuleCallback) const {
+ function_ref<void(std::unique_ptr<Module> MPart)> ModuleCallback) {
return false;
}
};
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSplitModule.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSplitModule.cpp
index 2449fa581842a..3033b7f58f1a2 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSplitModule.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSplitModule.cpp
@@ -98,6 +98,7 @@ static cl::opt<bool>
using CostType = InstructionCost::CostType;
using PartitionID = unsigned;
+using GetTTIFn = function_ref<const TargetTransformInfo &(Function &)>;
static bool isEntryPoint(const Function *F) {
return AMDGPU::isEntryFunctionCC(F->getCallingConv());
@@ -214,13 +215,12 @@ static SplitModuleLogger &operator<<(SplitModuleLogger &SML, const Ty &Val) {
/// Calculate the cost of each function in \p M
/// \param SML Log Helper
-/// \param TM TargetMachine instance used to retrieve TargetTransformInfo.
+/// \param GetTTI Abstract getter for TargetTransformInfo.
/// \param M Module to analyze.
/// \param CostMap[out] Resulting Function -> Cost map.
/// \return The module's total cost.
static CostType
-calculateFunctionCosts(SplitModuleLogger &SML, const AMDGPUTargetMachine &TM,
- Module &M,
+calculateFunctionCosts(SplitModuleLogger &SML, GetTTIFn GetTTI, Module &M,
DenseMap<const Function *, CostType> &CostMap) {
CostType ModuleCost = 0;
CostType KernelCost = 0;
@@ -230,8 +230,7 @@ calculateFunctionCosts(SplitModuleLogger &SML, const AMDGPUTargetMachine &TM,
continue;
CostType FnCost = 0;
- TargetTransformInfo TTI = TM.getTargetTransformInfo(Fn);
-
+ const auto &TTI = GetTTI(Fn);
for (const auto &BB : Fn) {
for (const auto &I : BB) {
auto Cost =
@@ -438,8 +437,9 @@ doPartitioning(SplitModuleLogger &SML, Module &M, unsigned NumParts,
// assign X to a partition as usual, but when we get to Y, we check if it's
// worth also putting it in Y's partition.
const CostType LargeKernelThreshold =
- LargeKernelFactor ? CostType(((ModuleCost / NumParts) * LargeKernelFactor))
- : std::numeric_limits<CostType>::max();
+ LargeKernelFactor
+ ? CostType(((ModuleCost / NumParts) * LargeKernelFactor))
+ : std::numeric_limits<CostType>::max();
std::vector<DenseSet<const Function *>> Partitions;
Partitions.resize(NumParts);
@@ -604,10 +604,9 @@ static void externalize(GlobalValue &GV) {
if (!GV.hasName())
GV.setName("__llvmsplit_unnamed");
}
-} // end anonymous namespace
-void llvm::splitAMDGPUModule(
- const AMDGPUTargetMachine &TM, Module &M, unsigned N,
+static void splitAMDGPUModule(
+ GetTTIFn GetTTI, Module &M, unsigned N,
function_ref<void(std::unique_ptr<Module> MPart)> ModuleCallback) {
SplitModuleLogger SML(M);
@@ -648,7 +647,7 @@ void llvm::splitAMDGPUModule(
// Start by calculating the cost of every function in the module, as well as
// the module's overall cost.
DenseMap<const Function *, CostType> FnCosts;
- const CostType ModuleCost = calculateFunctionCosts(SML, TM, M, FnCosts);
+ const CostType ModuleCost = calculateFunctionCosts(SML, GetTTI, M, FnCosts);
// Gather every kernel into a WorkList, then sort it by descending total cost
// of the kernel so the biggest kernels are seen first.
@@ -742,3 +741,16 @@ void llvm::splitAMDGPUModule(
<< format("%0.2f", (float(TotalFnImpls) / FnCosts.size()) * 100)
<< "% of original module)\n";
}
+} // namespace
+
+PreservedAnalyses AMDGPUSplitModulePass::run(Module &M,
+ ModuleAnalysisManager &MAM) {
+ FunctionAnalysisManager &FAM =
+ MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
+ const auto TTIGetter = [&FAM](Function &F) -> const TargetTransformInfo & {
+ return FAM.getResult<TargetIRAnalysis>(F);
+ };
+ splitAMDGPUModule(TTIGetter, M, N, ModuleCallback);
+ // We don't change the original module.
+ return PreservedAnalyses::all();
+}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSplitModule.h b/llvm/lib/Target/AMDGPU/AMDGPUSplitModule.h
index 6171643bd4adc..d814dedd6f0c4 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSplitModule.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSplitModule.h
@@ -12,18 +12,27 @@
#define LLVM_TARGET_AMDGPUSPLITMODULE_H
#include "llvm/ADT/STLFunctionalExtras.h"
+#include "llvm/IR/PassManager.h"
#include <memory>
namespace llvm {
-class Module;
-class AMDGPUTargetMachine;
-
/// Splits the module M into N linkable partitions. The function ModuleCallback
/// is called N times passing each individual partition as the MPart argument.
-void splitAMDGPUModule(
- const AMDGPUTargetMachine &TM, Module &M, unsigned N,
- function_ref<void(std::unique_ptr<Module> MPart)> ModuleCallback);
+class AMDGPUSplitModulePass : public PassInfoMixin<AMDGPUSplitModulePass> {
+public:
+ using ModuleCreationCallback =
+ function_ref<void(std::unique_ptr<Module> MPart)>;
+
+ AMDGPUSplitModulePass(unsigned N, ModuleCreationCallback ModuleCallback)
+ : N(N), ModuleCallback(ModuleCallback) {}
+
+ PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM);
+
+private:
+ unsigned N;
+ ModuleCreationCallback ModuleCallback;
+};
} // end namespace llvm
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index ce997c659094a..3e21d8ee2e2a0 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -829,8 +829,24 @@ AMDGPUTargetMachine::getAddressSpaceForPseudoSourceKind(unsigned Kind) const {
bool AMDGPUTargetMachine::splitModule(
Module &M, unsigned NumParts,
- function_ref<void(std::unique_ptr<Module> MPart)> ModuleCallback) const {
- splitAMDGPUModule(*this, M, NumParts, ModuleCallback);
+ function_ref<void(std::unique_ptr<Module> MPart)> ModuleCallback) {
+ // FIXME(?): Would be better to use an already existing Analysis/PassManager,
+ // but all current users of this API don't have one ready and would need to
+ // create one anyway. Let's hide the boilerplate for now to keep it simple.
+
+ LoopAnalysisManager LAM;
+ FunctionAnalysisManager FAM;
+ CGSCCAnalysisManager CGAM;
+ ModuleAnalysisManager MAM;
+
+ PassBuilder PB(this);
+ PB.registerModuleAnalyses(MAM);
+ PB.registerFunctionAnalyses(FAM);
+ PB.crossRegisterProxies(LAM, FAM, CGAM, MAM);
+
+ ModulePassManager MPM;
+ MPM.addPass(AMDGPUSplitModulePass(NumParts, ModuleCallback));
+ MPM.run(M, MAM);
return true;
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h
index 2cfd232483a8a..98b0bc034b5be 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h
@@ -76,7 +76,7 @@ class AMDGPUTargetMachine : public LLVMTargetMachine {
bool splitModule(Module &M, unsigned NumParts,
function_ref<void(std::unique_ptr<Module> MPart)>
- ModuleCallback) const override;
+ ModuleCallback) override;
};
//===----------------------------------------------------------------------===//
More information about the llvm-commits
mailing list