[llvm] affed57 - [DFAJumpThreading] Add MaxOuterUseBlocks threshold (#163428)

via llvm-commits llvm-commits at lists.llvm.org
Wed Oct 29 09:34:52 PDT 2025


Author: Hongyu Chen
Date: 2025-10-30T00:34:48+08:00
New Revision: affed57d3611b114ade0f703d48e5217c8cbf248

URL: https://github.com/llvm/llvm-project/commit/affed57d3611b114ade0f703d48e5217c8cbf248
DIFF: https://github.com/llvm/llvm-project/commit/affed57d3611b114ade0f703d48e5217c8cbf248.diff

LOG: [DFAJumpThreading] Add MaxOuterUseBlocks threshold (#163428)

For every threadable path `B1 -> B2 -> ... -> Bn`, we need to insert phi
nodes into every unduplicated successor of `Bi` if there are outer uses
of duplicated definitions in `B_i`. To prevent the booming of phi nodes,
this patch adds a threshold for the maximum number of unduplicated
successors that may contain outer uses. This threshold makes sense
especially when multi-target branches like switch/indirectbr/callbr are
duplicated.

Note that the O3 statistics in llvm-test-suite are not influenced.

Added: 
    llvm/test/Transforms/DFAJumpThreading/max-outer-uses.ll

Modified: 
    llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp b/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp
index 66e45ecbde7df..e84ca819b93d8 100644
--- a/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp
+++ b/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp
@@ -122,16 +122,22 @@ static cl::opt<unsigned>
                   cl::desc("Maximum cost accepted for the transformation"),
                   cl::Hidden, cl::init(50));
 
-extern cl::opt<bool> ProfcheckDisableMetadataFixes;
-
-} // namespace llvm
-
 static cl::opt<double> MaxClonedRate(
     "dfa-max-cloned-rate",
     cl::desc(
         "Maximum cloned instructions rate accepted for the transformation"),
     cl::Hidden, cl::init(7.5));
 
+static cl::opt<unsigned>
+    MaxOuterUseBlocks("dfa-max-out-use-blocks",
+                      cl::desc("Maximum unduplicated blocks with outer uses "
+                               "accepted for the transformation"),
+                      cl::Hidden, cl::init(40));
+
+extern cl::opt<bool> ProfcheckDisableMetadataFixes;
+
+} // namespace llvm
+
 namespace {
 class SelectInstToUnfold {
   SelectInst *SI;
@@ -965,8 +971,16 @@ struct TransformDFA {
     // SLPVectorizer.
     // TODO: Thread the switch partially before reaching the threshold.
     uint64_t NumOrigInst = 0;
-    for (auto *BB : DuplicateMap.keys())
+    uint64_t NumOuterUseBlock = 0;
+    for (auto *BB : DuplicateMap.keys()) {
       NumOrigInst += BB->sizeWithoutDebug();
+      // Only unduplicated blocks with single predecessor require new phi
+      // nodes.
+      for (auto *Succ : successors(BB))
+        if (!DuplicateMap.count(Succ) && Succ->getSinglePredecessor())
+          NumOuterUseBlock++;
+    }
+
     if (double(NumClonedInst) / double(NumOrigInst) > MaxClonedRate) {
       LLVM_DEBUG(dbgs() << "DFA Jump Threading: Not jump threading, too much "
                            "instructions wll be cloned\n");
@@ -977,6 +991,20 @@ struct TransformDFA {
       return false;
     }
 
+    // Too much unduplicated blocks with outer uses may cause too much
+    // insertions of phi nodes for duplicated definitions. TODO: Drop this
+    // threshold if we come up with another way to reduce the number of inserted
+    // phi nodes.
+    if (NumOuterUseBlock > MaxOuterUseBlocks) {
+      LLVM_DEBUG(dbgs() << "DFA Jump Threading: Not jump threading, too much "
+                           "blocks with outer uses\n");
+      ORE->emit([&]() {
+        return OptimizationRemarkMissed(DEBUG_TYPE, "NotProfitable", Switch)
+               << "Too much blocks with outer uses.";
+      });
+      return false;
+    }
+
     InstructionCost DuplicationCost = 0;
 
     unsigned JumpTableSize = 0;

diff  --git a/llvm/test/Transforms/DFAJumpThreading/max-outer-uses.ll b/llvm/test/Transforms/DFAJumpThreading/max-outer-uses.ll
new file mode 100644
index 0000000000000..dfcc5b1a5c3fe
--- /dev/null
+++ b/llvm/test/Transforms/DFAJumpThreading/max-outer-uses.ll
@@ -0,0 +1,326 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt -S -passes=dfa-jump-threading -dfa-max-out-use-blocks=5 %s | FileCheck %s
+
+declare void @use(i32)
+
+define void @max_outer_uses_by_switch(i32 %cond, ptr %p) {
+; CHECK-LABEL: define void @max_outer_uses_by_switch(
+; CHECK-SAME: i32 [[COND:%.*]], ptr [[P:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br label %[[SWITCH_BB:.*]]
+; CHECK:       [[SWITCH_BB]]:
+; CHECK-NEXT:    [[PHI:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[DETERMINE:%.*]], %[[SUB_SWITCH_BB:.*]] ], [ 2, %[[CASE2:.*]] ]
+; CHECK-NEXT:    switch i32 [[PHI]], label %[[DEFAULT_DEST:.*]] [
+; CHECK-NEXT:      i32 0, label %[[CASE1:.*]]
+; CHECK-NEXT:      i32 1, label %[[CASE2]]
+; CHECK-NEXT:      i32 2, label %[[CASE3:.*]]
+; CHECK-NEXT:    ]
+; CHECK:       [[CASE1]]:
+; CHECK-NEXT:    br label %[[SUB_SWITCH_BB]]
+; CHECK:       [[CASE3]]:
+; CHECK-NEXT:    br label %[[SUB_SWITCH_BB]]
+; CHECK:       [[SUB_SWITCH_BB]]:
+; CHECK-NEXT:    [[DETERMINE]] = phi i32 [ 1, %[[CASE1]] ], [ 3, %[[CASE3]] ]
+; CHECK-NEXT:    [[DEF:%.*]] = load i32, ptr [[P]], align 4
+; CHECK-NEXT:    switch i32 [[COND]], label %[[SWITCH_BB]] [
+; CHECK-NEXT:      i32 0, label %[[OUTER1:.*]]
+; CHECK-NEXT:      i32 1, label %[[OUTER2:.*]]
+; CHECK-NEXT:      i32 2, label %[[OUTER3:.*]]
+; CHECK-NEXT:      i32 3, label %[[OUTER4:.*]]
+; CHECK-NEXT:    ]
+; CHECK:       [[CASE2]]:
+; CHECK-NEXT:    br label %[[SWITCH_BB]]
+; CHECK:       [[OUTER1]]:
+; CHECK-NEXT:    call void @use(i32 [[DEF]])
+; CHECK-NEXT:    ret void
+; CHECK:       [[OUTER2]]:
+; CHECK-NEXT:    call void @use(i32 [[DEF]])
+; CHECK-NEXT:    ret void
+; CHECK:       [[OUTER3]]:
+; CHECK-NEXT:    call void @use(i32 [[DEF]])
+; CHECK-NEXT:    ret void
+; CHECK:       [[OUTER4]]:
+; CHECK-NEXT:    call void @use(i32 [[DEF]])
+; CHECK-NEXT:    ret void
+; CHECK:       [[DEFAULT_DEST]]:
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %switch_bb
+
+switch_bb:
+  %phi = phi i32 [ 0, %entry ], [ %determine, %sub_switch_bb ], [ 2, %case2 ]
+  switch i32 %phi, label %default_dest [
+  i32 0, label %case1
+  i32 1, label %case2
+  i32 2, label %case3
+  ]
+
+case1:
+  br label %sub_switch_bb
+
+case3:
+  br label %sub_switch_bb
+
+sub_switch_bb:
+  %determine = phi i32 [ 1, %case1 ], [ 3, %case3 ]
+  %def = load i32, ptr %p
+  switch i32 %cond, label %switch_bb [
+  i32 0, label %outer1
+  i32 1, label %outer2
+  i32 2, label %outer3
+  i32 3, label %outer4
+  ]
+
+case2:
+  br label %switch_bb
+
+outer1:
+  call void @use(i32 %def)
+  ret void
+
+outer2:
+  call void @use(i32 %def)
+  ret void
+
+outer3:
+  call void @use(i32 %def)
+  ret void
+
+outer4:
+  call void @use(i32 %def)
+  ret void
+
+default_dest:
+  ret void
+}
+
+define void @less_outer_uses_by_switch(i32 %cond, ptr %p) {
+; CHECK-LABEL: define void @less_outer_uses_by_switch(
+; CHECK-SAME: i32 [[COND:%.*]], ptr [[P:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br label %[[SWITCH_BB:.*]]
+; CHECK:       [[SWITCH_BB]]:
+; CHECK-NEXT:    [[PHI:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ poison, %[[SUB_SWITCH_BB:.*]] ]
+; CHECK-NEXT:    switch i32 [[PHI]], label %[[DEFAULT_DEST:.*]] [
+; CHECK-NEXT:      i32 0, label %[[CASE1:.*]]
+; CHECK-NEXT:      i32 1, label %[[CASE2:.*]]
+; CHECK-NEXT:      i32 2, label %[[CASE3:.*]]
+; CHECK-NEXT:    ]
+; CHECK:       [[SWITCH_BB_JT2:.*]]:
+; CHECK-NEXT:    [[PHI_JT2:%.*]] = phi i32 [ 2, %[[CASE2]] ]
+; CHECK-NEXT:    br label %[[CASE3]]
+; CHECK:       [[SWITCH_BB_JT3:.*]]:
+; CHECK-NEXT:    [[PHI_JT3:%.*]] = phi i32 [ [[DETERMINE_JT3:%.*]], %[[SUB_SWITCH_BB_JT3:.*]] ]
+; CHECK-NEXT:    br label %[[DEFAULT_DEST]]
+; CHECK:       [[SWITCH_BB_JT1:.*]]:
+; CHECK-NEXT:    [[PHI_JT1:%.*]] = phi i32 [ [[DETERMINE_JT1:%.*]], %[[SUB_SWITCH_BB_JT1:.*]] ]
+; CHECK-NEXT:    br label %[[CASE2]]
+; CHECK:       [[CASE1]]:
+; CHECK-NEXT:    br label %[[SUB_SWITCH_BB_JT1]]
+; CHECK:       [[CASE3]]:
+; CHECK-NEXT:    br label %[[SUB_SWITCH_BB_JT3]]
+; CHECK:       [[SUB_SWITCH_BB]]:
+; CHECK-NEXT:    [[DEF:%.*]] = load i32, ptr [[P]], align 4
+; CHECK-NEXT:    switch i32 [[COND]], label %[[SWITCH_BB]] [
+; CHECK-NEXT:      i32 0, label %[[OUTER1:.*]]
+; CHECK-NEXT:    ]
+; CHECK:       [[SUB_SWITCH_BB_JT3]]:
+; CHECK-NEXT:    [[DETERMINE_JT3]] = phi i32 [ 3, %[[CASE3]] ]
+; CHECK-NEXT:    [[DEF_JT3:%.*]] = load i32, ptr [[P]], align 4
+; CHECK-NEXT:    switch i32 [[COND]], label %[[SWITCH_BB_JT3]] [
+; CHECK-NEXT:      i32 0, label %[[OUTER1]]
+; CHECK-NEXT:    ]
+; CHECK:       [[SUB_SWITCH_BB_JT1]]:
+; CHECK-NEXT:    [[DETERMINE_JT1]] = phi i32 [ 1, %[[CASE1]] ]
+; CHECK-NEXT:    [[DEF_JT1:%.*]] = load i32, ptr [[P]], align 4
+; CHECK-NEXT:    switch i32 [[COND]], label %[[SWITCH_BB_JT1]] [
+; CHECK-NEXT:      i32 0, label %[[OUTER1]]
+; CHECK-NEXT:    ]
+; CHECK:       [[CASE2]]:
+; CHECK-NEXT:    br label %[[SWITCH_BB_JT2]]
+; CHECK:       [[OUTER1]]:
+; CHECK-NEXT:    [[DEF1:%.*]] = phi i32 [ [[DEF_JT3]], %[[SUB_SWITCH_BB_JT3]] ], [ [[DEF_JT1]], %[[SUB_SWITCH_BB_JT1]] ], [ [[DEF]], %[[SUB_SWITCH_BB]] ]
+; CHECK-NEXT:    call void @use(i32 [[DEF1]])
+; CHECK-NEXT:    ret void
+; CHECK:       [[DEFAULT_DEST]]:
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %switch_bb
+
+switch_bb:
+  %phi = phi i32 [ 0, %entry ], [ %determine, %sub_switch_bb ], [ 2, %case2 ]
+  switch i32 %phi, label %default_dest [
+  i32 0, label %case1
+  i32 1, label %case2
+  i32 2, label %case3
+  ]
+
+case1:
+  br label %sub_switch_bb
+
+case3:
+  br label %sub_switch_bb
+
+sub_switch_bb:
+  %determine = phi i32 [ 1, %case1 ], [ 3, %case3 ]
+  %def = load i32, ptr %p
+  switch i32 %cond, label %switch_bb [
+  i32 0, label %outer1
+  ]
+
+case2:
+  br label %switch_bb
+
+outer1:
+  call void @use(i32 %def)
+  ret void
+
+default_dest:
+  ret void
+}
+
+
+define void @max_outer_uses_multi_preds(i32 %cond, ptr %p) {
+; CHECK-LABEL: define void @max_outer_uses_multi_preds(
+; CHECK-SAME: i32 [[COND:%.*]], ptr [[P:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br label %[[SWITCH_BB:.*]]
+; CHECK:       [[SWITCH_BB]]:
+; CHECK-NEXT:    [[PHI:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ poison, %[[SUB_SWITCH_BB:.*]] ]
+; CHECK-NEXT:    switch i32 [[PHI]], label %[[DEFAULT_DEST:.*]] [
+; CHECK-NEXT:      i32 0, label %[[CASE1:.*]]
+; CHECK-NEXT:      i32 1, label %[[CASE2:.*]]
+; CHECK-NEXT:      i32 2, label %[[CASE3:.*]]
+; CHECK-NEXT:      i32 3, label %[[CASE4:.*]]
+; CHECK-NEXT:    ]
+; CHECK:       [[SWITCH_BB_JT2:.*]]:
+; CHECK-NEXT:    [[PHI_JT2:%.*]] = phi i32 [ 2, %[[CASE2]] ]
+; CHECK-NEXT:    br label %[[CASE3]]
+; CHECK:       [[SWITCH_BB_JT3:.*]]:
+; CHECK-NEXT:    [[PHI_JT3:%.*]] = phi i32 [ [[DETERMINE_JT3:%.*]], %[[SUB_SWITCH_BB_JT3:.*]] ]
+; CHECK-NEXT:    br label %[[CASE4]]
+; CHECK:       [[SWITCH_BB_JT1:.*]]:
+; CHECK-NEXT:    [[PHI_JT1:%.*]] = phi i32 [ [[DETERMINE_JT1:%.*]], %[[SUB_SWITCH_BB_JT1:.*]] ]
+; CHECK-NEXT:    br label %[[CASE2]]
+; CHECK:       [[CASE1]]:
+; CHECK-NEXT:    br label %[[SUB_SWITCH_BB_JT1]]
+; CHECK:       [[CASE3]]:
+; CHECK-NEXT:    br label %[[SUB_SWITCH_BB_JT3]]
+; CHECK:       [[SUB_SWITCH_BB]]:
+; CHECK-NEXT:    [[DEF:%.*]] = load i32, ptr [[P]], align 4
+; CHECK-NEXT:    switch i32 [[COND]], label %[[SWITCH_BB]] [
+; CHECK-NEXT:      i32 0, label %[[OUTER1:.*]]
+; CHECK-NEXT:      i32 1, label %[[OUTER2:.*]]
+; CHECK-NEXT:      i32 2, label %[[OUTER3:.*]]
+; CHECK-NEXT:      i32 3, label %[[OUTER4:.*]]
+; CHECK-NEXT:    ]
+; CHECK:       [[SUB_SWITCH_BB_JT3]]:
+; CHECK-NEXT:    [[DETERMINE_JT3]] = phi i32 [ 3, %[[CASE3]] ]
+; CHECK-NEXT:    [[DEF_JT3:%.*]] = load i32, ptr [[P]], align 4
+; CHECK-NEXT:    switch i32 [[COND]], label %[[SWITCH_BB_JT3]] [
+; CHECK-NEXT:      i32 0, label %[[OUTER1]]
+; CHECK-NEXT:      i32 1, label %[[OUTER2]]
+; CHECK-NEXT:      i32 2, label %[[OUTER3]]
+; CHECK-NEXT:      i32 3, label %[[OUTER4]]
+; CHECK-NEXT:    ]
+; CHECK:       [[SUB_SWITCH_BB_JT1]]:
+; CHECK-NEXT:    [[DETERMINE_JT1]] = phi i32 [ 1, %[[CASE1]] ]
+; CHECK-NEXT:    [[DEF_JT1:%.*]] = load i32, ptr [[P]], align 4
+; CHECK-NEXT:    switch i32 [[COND]], label %[[SWITCH_BB_JT1]] [
+; CHECK-NEXT:      i32 0, label %[[OUTER1]]
+; CHECK-NEXT:      i32 1, label %[[OUTER2]]
+; CHECK-NEXT:      i32 2, label %[[OUTER3]]
+; CHECK-NEXT:      i32 3, label %[[OUTER4]]
+; CHECK-NEXT:    ]
+; CHECK:       [[CASE4]]:
+; CHECK-NEXT:    [[DEF1:%.*]] = load i32, ptr [[P]], align 4
+; CHECK-NEXT:    switch i32 [[COND]], label %[[OUTER4]] [
+; CHECK-NEXT:      i32 0, label %[[OUTER1]]
+; CHECK-NEXT:      i32 1, label %[[OUTER2]]
+; CHECK-NEXT:      i32 2, label %[[OUTER3]]
+; CHECK-NEXT:    ]
+; CHECK:       [[CASE2]]:
+; CHECK-NEXT:    br label %[[SWITCH_BB_JT2]]
+; CHECK:       [[OUTER1]]:
+; CHECK-NEXT:    [[PHI1:%.*]] = phi i32 [ [[DEF]], %[[SUB_SWITCH_BB]] ], [ [[DEF1]], %[[CASE4]] ], [ [[DEF_JT1]], %[[SUB_SWITCH_BB_JT1]] ], [ [[DEF_JT3]], %[[SUB_SWITCH_BB_JT3]] ]
+; CHECK-NEXT:    call void @use(i32 [[PHI1]])
+; CHECK-NEXT:    ret void
+; CHECK:       [[OUTER2]]:
+; CHECK-NEXT:    [[PHI2:%.*]] = phi i32 [ [[DEF]], %[[SUB_SWITCH_BB]] ], [ [[DEF1]], %[[CASE4]] ], [ [[DEF_JT1]], %[[SUB_SWITCH_BB_JT1]] ], [ [[DEF_JT3]], %[[SUB_SWITCH_BB_JT3]] ]
+; CHECK-NEXT:    call void @use(i32 [[PHI2]])
+; CHECK-NEXT:    ret void
+; CHECK:       [[OUTER3]]:
+; CHECK-NEXT:    [[PHI3:%.*]] = phi i32 [ [[DEF]], %[[SUB_SWITCH_BB]] ], [ [[DEF1]], %[[CASE4]] ], [ [[DEF_JT1]], %[[SUB_SWITCH_BB_JT1]] ], [ [[DEF_JT3]], %[[SUB_SWITCH_BB_JT3]] ]
+; CHECK-NEXT:    call void @use(i32 [[PHI3]])
+; CHECK-NEXT:    ret void
+; CHECK:       [[OUTER4]]:
+; CHECK-NEXT:    [[PHI4:%.*]] = phi i32 [ [[DEF]], %[[SUB_SWITCH_BB]] ], [ [[DEF1]], %[[CASE4]] ], [ [[DEF_JT1]], %[[SUB_SWITCH_BB_JT1]] ], [ [[DEF_JT3]], %[[SUB_SWITCH_BB_JT3]] ]
+; CHECK-NEXT:    call void @use(i32 [[PHI4]])
+; CHECK-NEXT:    ret void
+; CHECK:       [[DEFAULT_DEST]]:
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %switch_bb
+
+switch_bb:
+  %phi = phi i32 [ 0, %entry ], [ %determine, %sub_switch_bb ], [ 2, %case2 ]
+  switch i32 %phi, label %default_dest [
+  i32 0, label %case1
+  i32 1, label %case2
+  i32 2, label %case3
+  i32 3, label %case4
+  ]
+
+case1:
+  br label %sub_switch_bb
+
+case3:
+  br label %sub_switch_bb
+
+sub_switch_bb:
+  %determine = phi i32 [ 1, %case1 ], [ 3, %case3 ]
+  %def = load i32, ptr %p
+  switch i32 %cond, label %switch_bb [
+  i32 0, label %outer1
+  i32 1, label %outer2
+  i32 2, label %outer3
+  i32 3, label %outer4
+  ]
+
+case4:
+  %def1 = load i32, ptr %p
+  switch i32 %cond, label %outer4 [
+  i32 0, label %outer1
+  i32 1, label %outer2
+  i32 2, label %outer3
+  ]
+
+case2:
+  br label %switch_bb
+
+outer1:
+  %phi1 = phi i32 [ %def, %sub_switch_bb ], [ %def1, %case4 ]
+  call void @use(i32 %phi1)
+  ret void
+
+outer2:
+  %phi2 = phi i32 [ %def, %sub_switch_bb ], [ %def1, %case4 ]
+  call void @use(i32 %phi2)
+  ret void
+
+outer3:
+  %phi3 = phi i32 [ %def, %sub_switch_bb ], [ %def1, %case4 ]
+  call void @use(i32 %phi3)
+  ret void
+
+outer4:
+  %phi4 = phi i32 [ %def, %sub_switch_bb ], [ %def1, %case4 ]
+  call void @use(i32 %phi4)
+  ret void
+
+default_dest:
+  ret void
+}


        


More information about the llvm-commits mailing list