[llvm] [DFAJumpThreading] Add MaxOuterUseBlocks threshold (PR #163428)
Hongyu Chen via llvm-commits
llvm-commits at lists.llvm.org
Tue Oct 14 10:47:22 PDT 2025
https://github.com/XChy created https://github.com/llvm/llvm-project/pull/163428
For every threadable path `B1 -> B2 -> ... -> Bn`, we need to insert phi nodes into every unduplicated successor of `Bi` if there are outer uses of duplicated definitions in `B_i`. To prevent the booming of phi nodes, this patch adds a threshold for the maximum number of unduplicated successors that may contain outer uses. This threshold makes sense especially when multi-target branches like switch/indirectbr/callbr are duplicated.
Note that the O3 statistics in llvm-test-suite are not influenced.
>From 8704e9a50ad3beb894e3119d1bf0926996bf080c Mon Sep 17 00:00:00 2001
From: XChy <xxs_chy at outlook.com>
Date: Wed, 15 Oct 2025 01:30:40 +0800
Subject: [PATCH] [DFAJumpThreading] Add MaxOuterUseBlocks threshold
---
.../Transforms/Scalar/DFAJumpThreading.cpp | 38 ++++++++++++++++---
1 file changed, 33 insertions(+), 5 deletions(-)
diff --git a/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp b/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp
index ff5f390d6fe18..11d65d5a59175 100644
--- a/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp
+++ b/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp
@@ -122,16 +122,22 @@ static cl::opt<unsigned>
cl::desc("Maximum cost accepted for the transformation"),
cl::Hidden, cl::init(50));
-extern cl::opt<bool> ProfcheckDisableMetadataFixes;
-
-} // namespace llvm
-
static cl::opt<double> MaxClonedRate(
"dfa-max-cloned-rate",
cl::desc(
"Maximum cloned instructions rate accepted for the transformation"),
cl::Hidden, cl::init(7.5));
+static cl::opt<unsigned>
+ MaxOuterUseBlocks("dfa-max-out-use-blocks",
+ cl::desc("Maximum unduplicated blocks with outer uses "
+ "accepted for the transformation"),
+ cl::Hidden, cl::init(40));
+
+extern cl::opt<bool> ProfcheckDisableMetadataFixes;
+
+} // namespace llvm
+
namespace {
class SelectInstToUnfold {
SelectInst *SI;
@@ -966,8 +972,16 @@ struct TransformDFA {
// SLPVectorizer.
// TODO: Thread the switch partially before reaching the threshold.
uint64_t NumOrigInst = 0;
- for (auto *BB : DuplicateMap.keys())
+ uint64_t NumOuterUseBlock = 0;
+ for (auto *BB : DuplicateMap.keys()) {
NumOrigInst += BB->sizeWithoutDebug();
+ // Only unduplicated blocks with single predecessor require new phi
+ // nodes.
+ for (auto *Succ : successors(BB))
+ if (!DuplicateMap.count(Succ) && Succ->getSinglePredecessor())
+ NumOuterUseBlock++;
+ }
+
if (double(NumClonedInst) / double(NumOrigInst) > MaxClonedRate) {
LLVM_DEBUG(dbgs() << "DFA Jump Threading: Not jump threading, too much "
"instructions wll be cloned\n");
@@ -978,6 +992,20 @@ struct TransformDFA {
return false;
}
+ // Too much unduplicated blocks with outer uses may cause too much
+ // insertions of phi nodes for duplicated definitions. TODO: Drop this
+ // threshold if we come up with another way to reduce the number of inserted
+ // phi nodes.
+ if (NumOuterUseBlock > MaxOuterUseBlocks) {
+ LLVM_DEBUG(dbgs() << "DFA Jump Threading: Not jump threading, too much "
+ "blocks with outer uses\n");
+ ORE->emit([&]() {
+ return OptimizationRemarkMissed(DEBUG_TYPE, "NotProfitable", Switch)
+ << "Too much blocks with outer uses.";
+ });
+ return false;
+ }
+
InstructionCost DuplicationCost = 0;
unsigned JumpTableSize = 0;
More information about the llvm-commits
mailing list