[llvm] bcdc047 - speeding up ext-tsp for huge instances
via llvm-commits
llvm-commits at lists.llvm.org
Wed Mar 2 07:23:05 PST 2022
Author: spupyrev
Date: 2022-03-02T07:17:48-08:00
New Revision: bcdc0477319a26fd8dcdde5ace3bdd6743599f44
URL: https://github.com/llvm/llvm-project/commit/bcdc0477319a26fd8dcdde5ace3bdd6743599f44
DIFF: https://github.com/llvm/llvm-project/commit/bcdc0477319a26fd8dcdde5ace3bdd6743599f44.diff
LOG: speeding up ext-tsp for huge instances
Differential Revision: https://reviews.llvm.org/D120780
Added:
Modified:
llvm/lib/CodeGen/MachineBlockPlacement.cpp
llvm/lib/Transforms/Utils/CodeLayout.cpp
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/MachineBlockPlacement.cpp b/llvm/lib/CodeGen/MachineBlockPlacement.cpp
index 5ad3d4d21ce2e..e4534ddad1032 100644
--- a/llvm/lib/CodeGen/MachineBlockPlacement.cpp
+++ b/llvm/lib/CodeGen/MachineBlockPlacement.cpp
@@ -201,6 +201,7 @@ static cl::opt<unsigned> TriangleChainCount(
cl::Hidden);
extern cl::opt<bool> EnableExtTspBlockPlacement;
+extern cl::opt<bool> ApplyExtTspWithoutProfile;
namespace llvm {
extern cl::opt<unsigned> StaticLikelyProb;
@@ -3419,7 +3420,8 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) {
}
// Apply a post-processing optimizing block placement.
- if (MF.size() >= 3 && EnableExtTspBlockPlacement) {
+ if (MF.size() >= 3 && EnableExtTspBlockPlacement &&
+ (ApplyExtTspWithoutProfile || MF.getFunction().hasProfileData())) {
// Find a new placement and modify the layout of the blocks in the function.
applyExtTsp();
diff --git a/llvm/lib/Transforms/Utils/CodeLayout.cpp b/llvm/lib/Transforms/Utils/CodeLayout.cpp
index 43225bc2f534a..a4ded18c6f0ca 100644
--- a/llvm/lib/Transforms/Utils/CodeLayout.cpp
+++ b/llvm/lib/Transforms/Utils/CodeLayout.cpp
@@ -49,6 +49,11 @@ cl::opt<bool> EnableExtTspBlockPlacement(
cl::desc("Enable machine block placement based on the ext-tsp model, "
"optimizing I-cache utilization."));
+cl::opt<bool> ApplyExtTspWithoutProfile(
+ "ext-tsp-apply-without-profile",
+ cl::desc("Whether to apply ext-tsp placement for instances w/o profile"),
+ cl::init(true), cl::Hidden, cl::ZeroOrMore);
+
// Algorithm-specific constants. The values are tuned for the best performance
// of large-scale front-end bound binaries.
static cl::opt<double>
@@ -67,6 +72,12 @@ static cl::opt<unsigned> BackwardDistance(
"ext-tsp-backward-distance", cl::Hidden, cl::init(640),
cl::desc("The maximum distance (in bytes) of a backward jump for ExtTSP"));
+// The maximum size of a chain created by the algorithm. The size is bounded
+// so that the algorithm can efficiently process extremely large instance.
+static cl::opt<unsigned>
+ MaxChainSize("ext-tsp-max-chain-size", cl::Hidden, cl::init(4096),
+ cl::desc("The maximum size of a chain to create."));
+
// The maximum size of a chain for splitting. Larger values of the threshold
// may yield better quality at the cost of worsen run-time.
static cl::opt<unsigned> ChainSplitThreshold(
@@ -226,6 +237,8 @@ class Chain {
const std::vector<Block *> &blocks() const { return Blocks; }
+ size_t numBlocks() const { return Blocks.size(); }
+
const std::vector<std::pair<Chain *, ChainEdge *>> &edges() const {
return Edges;
}
@@ -502,7 +515,7 @@ class ExtTSPImpl {
AllEdges.reserve(AllJumps.size());
for (auto &Block : AllBlocks) {
for (auto &Jump : Block.OutJumps) {
- const auto SuccBlock = Jump->Target;
+ auto SuccBlock = Jump->Target;
auto CurEdge = Block.CurChain->getEdge(SuccBlock->CurChain);
// this edge is already present in the graph
if (CurEdge != nullptr) {
@@ -592,6 +605,10 @@ class ExtTSPImpl {
if (ChainPred == ChainSucc)
continue;
+ // Stop early if the combined chain violates the maximum allowed size
+ if (ChainPred->numBlocks() + ChainSucc->numBlocks() >= MaxChainSize)
+ continue;
+
// Compute the gain of merging the two chains
auto CurGain = getBestMergeGain(ChainPred, ChainSucc, ChainEdge);
if (CurGain.score() <= EPS)
More information about the llvm-commits
mailing list