[llvm] [DFAJumpThreading] Constraint the number of cloned instructions (PR #161632)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Oct 1 23:18:49 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-transforms
Author: Hongyu Chen (XChy)
<details>
<summary>Changes</summary>
Duplicating blocks of threaded paths may cause a significant regression in IR size and slow down compile-time in later optimizations. This patch adds a coarse constraint on the number of duplicated instructions.
New compile-time number: https://llvm-compile-time-tracker.com/compare.php?from=6935a3362628e823451baf86d15694d57f9664ed&to=455bd99ac151dd0445737f6e02210c2a4ec2dac5&stat=instructions%3Au
---
Full diff: https://github.com/llvm/llvm-project/pull/161632.diff
1 Files Affected:
- (modified) llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp (+25-1)
``````````diff
diff --git a/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp b/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp
index e9a3e983bc1e2..3bd7f8e2fcf93 100644
--- a/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp
+++ b/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp
@@ -120,6 +120,12 @@ static cl::opt<unsigned>
cl::desc("Maximum cost accepted for the transformation"),
cl::Hidden, cl::init(50));
+static cl::opt<double> MaxClonedRate(
+ "dfa-max-cloned-rate",
+ cl::desc(
+ "Maximum cloned instructions rate accepted for the transformation"),
+ cl::Hidden, cl::init(7.5));
+
namespace {
class SelectInstToUnfold {
@@ -828,6 +834,7 @@ struct TransformDFA {
/// also returns false if it is illegal to clone some required block.
bool isLegalAndProfitableToTransform() {
CodeMetrics Metrics;
+ uint64_t NumClonedInst = 0;
SwitchInst *Switch = SwitchPaths->getSwitchInst();
// Don't thread switch without multiple successors.
@@ -837,7 +844,6 @@ struct TransformDFA {
// Note that DuplicateBlockMap is not being used as intended here. It is
// just being used to ensure (BB, State) pairs are only counted once.
DuplicateBlockMap DuplicateMap;
-
for (ThreadingPath &TPath : SwitchPaths->getThreadingPaths()) {
PathType PathBBs = TPath.getPath();
APInt NextState = TPath.getExitValue();
@@ -848,6 +854,7 @@ struct TransformDFA {
BasicBlock *VisitedBB = getClonedBB(BB, NextState, DuplicateMap);
if (!VisitedBB) {
Metrics.analyzeBasicBlock(BB, *TTI, EphValues);
+ NumClonedInst += range_size(*BB);
DuplicateMap[BB].push_back({BB, NextState});
}
@@ -865,6 +872,7 @@ struct TransformDFA {
if (VisitedBB)
continue;
Metrics.analyzeBasicBlock(BB, *TTI, EphValues);
+ NumClonedInst += range_size(*BB);
DuplicateMap[BB].push_back({BB, NextState});
}
@@ -901,6 +909,22 @@ struct TransformDFA {
}
}
+ // Too much cloned instructions slow down later optimizations, especially
+ // SLPVectorizer.
+ // TODO: Thread the switch partially before reaching the threshold.
+ uint64_t NumOrigInst = 0;
+ for (auto &[BB, _] : DuplicateMap)
+ NumOrigInst += range_size(*BB);
+ if (double(NumClonedInst) / double(NumOrigInst) > MaxClonedRate) {
+ LLVM_DEBUG(dbgs() << "DFA Jump Threading: Not jump threading, too much "
+ "instructions wll be cloned\n");
+ ORE->emit([&]() {
+ return OptimizationRemarkMissed(DEBUG_TYPE, "NotProfitable", Switch)
+ << "Too much instructions will be cloned.";
+ });
+ return false;
+ }
+
InstructionCost DuplicationCost = 0;
unsigned JumpTableSize = 0;
``````````
</details>
https://github.com/llvm/llvm-project/pull/161632
More information about the llvm-commits
mailing list