[llvm] [DFAJumpThreading] Constraint the number of cloned instructions (PR #161632)
Hongyu Chen via llvm-commits
llvm-commits at lists.llvm.org
Wed Oct 1 23:18:12 PDT 2025
https://github.com/XChy created https://github.com/llvm/llvm-project/pull/161632
Duplicating blocks of threaded paths may cause a significant regression in IR size and slow down compile-time in later optimizations. This patch adds a coarse constraint on the number of duplicated instructions.
New compile-time number: https://llvm-compile-time-tracker.com/compare.php?from=6935a3362628e823451baf86d15694d57f9664ed&to=455bd99ac151dd0445737f6e02210c2a4ec2dac5&stat=instructions%3Au
>From 1010fc02a4d8cf17abde4651e6fd68f28861d17d Mon Sep 17 00:00:00 2001
From: XChy <xxs_chy at outlook.com>
Date: Thu, 2 Oct 2025 14:13:45 +0800
Subject: [PATCH] [DFAJumpThreading] Constraint the number of cloned
instructions
---
.../Transforms/Scalar/DFAJumpThreading.cpp | 26 ++++++++++++++++++-
1 file changed, 25 insertions(+), 1 deletion(-)
diff --git a/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp b/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp
index e9a3e983bc1e2..3bd7f8e2fcf93 100644
--- a/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp
+++ b/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp
@@ -120,6 +120,12 @@ static cl::opt<unsigned>
cl::desc("Maximum cost accepted for the transformation"),
cl::Hidden, cl::init(50));
+static cl::opt<double> MaxClonedRate(
+ "dfa-max-cloned-rate",
+ cl::desc(
+ "Maximum cloned instructions rate accepted for the transformation"),
+ cl::Hidden, cl::init(7.5));
+
namespace {
class SelectInstToUnfold {
@@ -828,6 +834,7 @@ struct TransformDFA {
/// also returns false if it is illegal to clone some required block.
bool isLegalAndProfitableToTransform() {
CodeMetrics Metrics;
+ uint64_t NumClonedInst = 0;
SwitchInst *Switch = SwitchPaths->getSwitchInst();
// Don't thread switch without multiple successors.
@@ -837,7 +844,6 @@ struct TransformDFA {
// Note that DuplicateBlockMap is not being used as intended here. It is
// just being used to ensure (BB, State) pairs are only counted once.
DuplicateBlockMap DuplicateMap;
-
for (ThreadingPath &TPath : SwitchPaths->getThreadingPaths()) {
PathType PathBBs = TPath.getPath();
APInt NextState = TPath.getExitValue();
@@ -848,6 +854,7 @@ struct TransformDFA {
BasicBlock *VisitedBB = getClonedBB(BB, NextState, DuplicateMap);
if (!VisitedBB) {
Metrics.analyzeBasicBlock(BB, *TTI, EphValues);
+ NumClonedInst += range_size(*BB);
DuplicateMap[BB].push_back({BB, NextState});
}
@@ -865,6 +872,7 @@ struct TransformDFA {
if (VisitedBB)
continue;
Metrics.analyzeBasicBlock(BB, *TTI, EphValues);
+ NumClonedInst += range_size(*BB);
DuplicateMap[BB].push_back({BB, NextState});
}
@@ -901,6 +909,22 @@ struct TransformDFA {
}
}
+ // Too much cloned instructions slow down later optimizations, especially
+ // SLPVectorizer.
+ // TODO: Thread the switch partially before reaching the threshold.
+ uint64_t NumOrigInst = 0;
+ for (auto &[BB, _] : DuplicateMap)
+ NumOrigInst += range_size(*BB);
+ if (double(NumClonedInst) / double(NumOrigInst) > MaxClonedRate) {
+ LLVM_DEBUG(dbgs() << "DFA Jump Threading: Not jump threading, too much "
+ "instructions wll be cloned\n");
+ ORE->emit([&]() {
+ return OptimizationRemarkMissed(DEBUG_TYPE, "NotProfitable", Switch)
+ << "Too much instructions will be cloned.";
+ });
+ return false;
+ }
+
InstructionCost DuplicationCost = 0;
unsigned JumpTableSize = 0;
More information about the llvm-commits
mailing list