[llvm] dd094b2 - NewPM/AMDGPU: Port AMDGPUPerfHintAnalysis to new pass manager (#102645)
via llvm-commits
llvm-commits at lists.llvm.org
Sun Aug 11 04:11:13 PDT 2024
Author: Matt Arsenault
Date: 2024-08-11T15:11:10+04:00
New Revision: dd094b2647976050295384ce7e72582607090602
URL: https://github.com/llvm/llvm-project/commit/dd094b2647976050295384ce7e72582607090602
DIFF: https://github.com/llvm/llvm-project/commit/dd094b2647976050295384ce7e72582607090602.diff
LOG: NewPM/AMDGPU: Port AMDGPUPerfHintAnalysis to new pass manager (#102645)
This was much more difficult than I anticipated. The pass is
not in a good state, with poor test coverage. The legacy PM
does seem to be relying on maintaining the map state between
different SCCs, which seems bad. The pass is going out of its
way to avoid putting the attributes it introduces onto non-callee
functions. If it just added them, we could use them directly
instead of relying on the map, I would think.
The NewPM path uses a ModulePass; I'm not sure if we should be
using CGSCC here but there seems to be some missing infrastructure
to support backend defined ones.
Added:
Modified:
llvm/lib/Target/AMDGPU/AMDGPU.h
llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.cpp
llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.h
llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
llvm/test/CodeGen/AMDGPU/perfhint.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h
index 4de20bbf5e060..34ef19637bf97 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.h
@@ -218,8 +218,8 @@ extern char &SIPreAllocateWWMRegsID;
void initializeAMDGPUImageIntrinsicOptimizerPass(PassRegistry &);
extern char &AMDGPUImageIntrinsicOptimizerID;
-void initializeAMDGPUPerfHintAnalysisPass(PassRegistry &);
-extern char &AMDGPUPerfHintAnalysisID;
+void initializeAMDGPUPerfHintAnalysisLegacyPass(PassRegistry &);
+extern char &AMDGPUPerfHintAnalysisLegacyID;
void initializeGCNRegPressurePrinterPass(PassRegistry &);
extern char &GCNRegPressurePrinterID;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index 8579774f52230..bbb4573655ab7 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -102,7 +102,7 @@ INITIALIZE_PASS_BEGIN(AMDGPUDAGToDAGISelLegacy, "amdgpu-isel",
"AMDGPU DAG->DAG Pattern Instruction Selection", false,
false)
INITIALIZE_PASS_DEPENDENCY(AMDGPUArgumentUsageInfo)
-INITIALIZE_PASS_DEPENDENCY(AMDGPUPerfHintAnalysis)
+INITIALIZE_PASS_DEPENDENCY(AMDGPUPerfHintAnalysisLegacy)
INITIALIZE_PASS_DEPENDENCY(UniformityInfoWrapperPass)
#ifdef EXPENSIVE_CHECKS
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
index 9991ee553f1ec..4a2e459a53319 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
@@ -21,6 +21,9 @@ MODULE_PASS("amdgpu-lower-buffer-fat-pointers",
AMDGPULowerBufferFatPointersPass(*this))
MODULE_PASS("amdgpu-lower-ctor-dtor", AMDGPUCtorDtorLoweringPass())
MODULE_PASS("amdgpu-lower-module-lds", AMDGPULowerModuleLDSPass(*this))
+MODULE_PASS("amdgpu-perf-hint",
+ AMDGPUPerfHintAnalysisPass(
+ *static_cast<const GCNTargetMachine *>(this)))
MODULE_PASS("amdgpu-printf-runtime-binding", AMDGPUPrintfRuntimeBindingPass())
MODULE_PASS("amdgpu-unify-metadata", AMDGPUUnifyMetadataPass())
#undef MODULE_PASS
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.cpp
index 1213d5e0b41db..040e931b82af2 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.cpp
@@ -12,12 +12,15 @@
///
//===----------------------------------------------------------------------===//
-#include "AMDGPU.h"
#include "AMDGPUPerfHintAnalysis.h"
+#include "AMDGPU.h"
+#include "AMDGPUTargetMachine.h"
#include "Utils/AMDGPUBaseInfo.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/CallGraph.h"
+#include "llvm/Analysis/CallGraphSCCPass.h"
+#include "llvm/Analysis/LazyCallGraph.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/TargetPassConfig.h"
@@ -54,12 +57,6 @@ static cl::opt<unsigned>
STATISTIC(NumMemBound, "Number of functions marked as memory bound");
STATISTIC(NumLimitWave, "Number of functions marked as needing limit wave");
-char llvm::AMDGPUPerfHintAnalysis::ID = 0;
-char &llvm::AMDGPUPerfHintAnalysisID = AMDGPUPerfHintAnalysis::ID;
-
-INITIALIZE_PASS(AMDGPUPerfHintAnalysis, DEBUG_TYPE,
- "Analysis if a function is memory bound", true, true)
-
namespace {
struct AMDGPUPerfHint {
@@ -67,7 +64,7 @@ struct AMDGPUPerfHint {
public:
AMDGPUPerfHint(AMDGPUPerfHintAnalysis::FuncInfoMap &FIM_,
- const TargetLowering *TLI_)
+ const SITargetLowering *TLI_)
: FIM(FIM_), TLI(TLI_) {}
bool runOnFunction(Function &F);
@@ -97,7 +94,7 @@ struct AMDGPUPerfHint {
const DataLayout *DL = nullptr;
- const TargetLowering *TLI;
+ const SITargetLowering *TLI;
AMDGPUPerfHintAnalysis::FuncInfo *visit(const Function &F);
static bool isMemBound(const AMDGPUPerfHintAnalysis::FuncInfo &F);
@@ -388,23 +385,52 @@ bool AMDGPUPerfHint::MemAccessInfo::isLargeStride(
<< Reference.print() << "Result:" << Result << '\n');
return Result;
}
+
+class AMDGPUPerfHintAnalysisLegacy : public CallGraphSCCPass {
+private:
+ // FIXME: This is relying on maintaining state between
diff erent SCCs.
+ AMDGPUPerfHintAnalysis Impl;
+
+public:
+ static char ID;
+
+ AMDGPUPerfHintAnalysisLegacy() : CallGraphSCCPass(ID) {}
+
+ bool runOnSCC(CallGraphSCC &SCC) override;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesAll();
+ }
+};
+
} // namespace
-bool AMDGPUPerfHintAnalysis::runOnSCC(CallGraphSCC &SCC) {
- auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
- if (!TPC)
+bool AMDGPUPerfHintAnalysis::isMemoryBound(const Function *F) const {
+ auto FI = FIM.find(F);
+ if (FI == FIM.end())
return false;
- const TargetMachine &TM = TPC->getTM<TargetMachine>();
+ return AMDGPUPerfHint::isMemBound(FI->second);
+}
+
+bool AMDGPUPerfHintAnalysis::needsWaveLimiter(const Function *F) const {
+ auto FI = FIM.find(F);
+ if (FI == FIM.end())
+ return false;
+ return AMDGPUPerfHint::needLimitWave(FI->second);
+}
+
+bool AMDGPUPerfHintAnalysis::runOnSCC(const GCNTargetMachine &TM,
+ CallGraphSCC &SCC) {
bool Changed = false;
for (CallGraphNode *I : SCC) {
Function *F = I->getFunction();
if (!F || F->isDeclaration())
continue;
- const TargetSubtargetInfo *ST = TM.getSubtargetImpl(*F);
- AMDGPUPerfHint Analyzer(FIM, ST->getTargetLowering());
+ const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(*F);
+ AMDGPUPerfHint Analyzer(FIM, ST.getTargetLowering());
if (Analyzer.runOnFunction(*F))
Changed = true;
@@ -413,18 +439,55 @@ bool AMDGPUPerfHintAnalysis::runOnSCC(CallGraphSCC &SCC) {
return Changed;
}
-bool AMDGPUPerfHintAnalysis::isMemoryBound(const Function *F) const {
- auto FI = FIM.find(F);
- if (FI == FIM.end())
- return false;
+bool AMDGPUPerfHintAnalysis::run(const GCNTargetMachine &TM,
+ LazyCallGraph &CG) {
+ bool Changed = false;
- return AMDGPUPerfHint::isMemBound(FI->second);
+ CG.buildRefSCCs();
+
+ for (LazyCallGraph::RefSCC &RC : CG.postorder_ref_sccs()) {
+ for (LazyCallGraph::SCC &SCC : RC) {
+ if (SCC.size() != 1)
+ continue;
+ Function &F = SCC.begin()->getFunction();
+ // TODO: Skip without norecurse, or interposable?
+ if (F.isDeclaration())
+ continue;
+
+ const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
+ AMDGPUPerfHint Analyzer(FIM, ST.getTargetLowering());
+ if (Analyzer.runOnFunction(F))
+ Changed = true;
+ }
+ }
+
+ return Changed;
}
-bool AMDGPUPerfHintAnalysis::needsWaveLimiter(const Function *F) const {
- auto FI = FIM.find(F);
- if (FI == FIM.end())
+char AMDGPUPerfHintAnalysisLegacy::ID = 0;
+char &llvm::AMDGPUPerfHintAnalysisLegacyID = AMDGPUPerfHintAnalysisLegacy::ID;
+
+INITIALIZE_PASS(AMDGPUPerfHintAnalysisLegacy, DEBUG_TYPE,
+ "Analysis if a function is memory bound", true, true)
+
+bool AMDGPUPerfHintAnalysisLegacy::runOnSCC(CallGraphSCC &SCC) {
+ auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
+ if (!TPC)
return false;
- return AMDGPUPerfHint::needLimitWave(FI->second);
+ const GCNTargetMachine &TM = TPC->getTM<GCNTargetMachine>();
+ return Impl.runOnSCC(TM, SCC);
+}
+
+PreservedAnalyses AMDGPUPerfHintAnalysisPass::run(Module &M,
+ ModuleAnalysisManager &AM) {
+ auto &CG = AM.getResult<LazyCallGraphAnalysis>(M);
+
+ bool Changed = Impl->run(TM, CG);
+ if (!Changed)
+ return PreservedAnalyses::all();
+
+ PreservedAnalyses PA;
+ PA.preserve<LazyCallGraphAnalysis>();
+ return PA;
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.h b/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.h
index 2db8db6957cee..cf2ab82537800 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.h
@@ -12,35 +12,29 @@
///
//===----------------------------------------------------------------------===//
-#ifndef LLVM_LIB_TARGET_AMDGPU_MDGPUPERFHINTANALYSIS_H
-#define LLVM_LIB_TARGET_AMDGPU_MDGPUPERFHINTANALYSIS_H
+#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUPERFHINTANALYSIS_H
+#define LLVM_LIB_TARGET_AMDGPU_AMDGPUPERFHINTANALYSIS_H
-#include "llvm/Analysis/CallGraphSCCPass.h"
+#include "llvm/IR/PassManager.h"
#include "llvm/IR/ValueMap.h"
+#include "llvm/Analysis/CGSCCPassManager.h"
+#include "llvm/Analysis/LazyCallGraph.h"
+
namespace llvm {
-struct AMDGPUPerfHintAnalysis : public CallGraphSCCPass {
- static char ID;
+class AMDGPUPerfHintAnalysis;
+class CallGraphSCC;
+class GCNTargetMachine;
+class LazyCallGraph;
+class AMDGPUPerfHintAnalysis {
public:
- AMDGPUPerfHintAnalysis() : CallGraphSCCPass(ID) {}
-
- bool runOnSCC(CallGraphSCC &SCC) override;
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.setPreservesAll();
- }
-
- bool isMemoryBound(const Function *F) const;
-
- bool needsWaveLimiter(const Function *F) const;
-
struct FuncInfo {
unsigned MemInstCost;
unsigned InstCost;
- unsigned IAMInstCost; // Indirect access memory instruction count
- unsigned LSMInstCost; // Large stride memory instruction count
+ unsigned IAMInstCost; // Indirect access memory instruction count
+ unsigned LSMInstCost; // Large stride memory instruction count
bool HasDenseGlobalMemAcc; // Set if at least 1 basic block has relatively
// high global memory access
FuncInfo()
@@ -48,11 +42,35 @@ struct AMDGPUPerfHintAnalysis : public CallGraphSCCPass {
HasDenseGlobalMemAcc(false) {}
};
- typedef ValueMap<const Function*, FuncInfo> FuncInfoMap;
+ typedef ValueMap<const Function *, FuncInfo> FuncInfoMap;
private:
-
FuncInfoMap FIM;
+
+public:
+ AMDGPUPerfHintAnalysis() {}
+
+ // OldPM
+ bool runOnSCC(const GCNTargetMachine &TM, CallGraphSCC &SCC);
+
+ // NewPM
+ bool run(const GCNTargetMachine &TM, LazyCallGraph &CG);
+
+ bool isMemoryBound(const Function *F) const;
+
+ bool needsWaveLimiter(const Function *F) const;
};
+
+struct AMDGPUPerfHintAnalysisPass
+ : public PassInfoMixin<AMDGPUPerfHintAnalysisPass> {
+ const GCNTargetMachine &TM;
+ std::unique_ptr<AMDGPUPerfHintAnalysis> Impl;
+
+ AMDGPUPerfHintAnalysisPass(const GCNTargetMachine &TM)
+ : TM(TM), Impl(std::make_unique<AMDGPUPerfHintAnalysis>()) {}
+
+ PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
+};
+
} // namespace llvm
-#endif // LLVM_LIB_TARGET_AMDGPU_MDGPUPERFHINTANALYSIS_H
+#endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUPERFHINTANALYSIS_H
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 2e5d64de9e4c2..09b5988253b64 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -21,6 +21,7 @@
#include "AMDGPUIGroupLP.h"
#include "AMDGPUISelDAGToDAG.h"
#include "AMDGPUMacroFusion.h"
+#include "AMDGPUPerfHintAnalysis.h"
#include "AMDGPURegBankSelect.h"
#include "AMDGPUSplitModule.h"
#include "AMDGPUTargetObjectFile.h"
@@ -1249,7 +1250,7 @@ bool GCNPassConfig::addPreISel() {
addPass(createLCSSAPass());
if (TM->getOptLevel() > CodeGenOptLevel::Less)
- addPass(&AMDGPUPerfHintAnalysisID);
+ addPass(&AMDGPUPerfHintAnalysisLegacyID);
return false;
}
diff --git a/llvm/test/CodeGen/AMDGPU/perfhint.ll b/llvm/test/CodeGen/AMDGPU/perfhint.ll
index 77e0f46a3d457..f4ee4fb82e7a3 100644
--- a/llvm/test/CodeGen/AMDGPU/perfhint.ll
+++ b/llvm/test/CodeGen/AMDGPU/perfhint.ll
@@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 5
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-perf-hint < %s | FileCheck -check-prefix=CHECK %s
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-perf-hint < %s | FileCheck -check-prefix=CHECK %s
; RUN: llc -mtriple=amdgcn < %s | FileCheck -check-prefix=GCN %s
; GCN-LABEL: {{^}}test_membound:
More information about the llvm-commits
mailing list