[llvm] 6197205 - [CodeGen] Limit number of analyzed predecessors

Fri Jun 20 02:23:03 PDT 2025

Author: Alexis Engelke
Date: 2025-06-20T11:23:00+02:00
New Revision: 61972054f3fcaf59096799342bac9c93dd9aa432

URL: https://github.com/llvm/llvm-project/commit/61972054f3fcaf59096799342bac9c93dd9aa432
DIFF: https://github.com/llvm/llvm-project/commit/61972054f3fcaf59096799342bac9c93dd9aa432.diff

LOG: [CodeGen] Limit number of analyzed predecessors

MachineBlockPlacement has quadratic runtime in the number of
predecessors: in some situation, for an edge, all predecessors of the
successor are considered.

Limit the number of considered predecessors to bound compile time for
large functions.

Pull Request: https://github.com/llvm/llvm-project/pull/142584

Added: 
    

Modified: 
    llvm/lib/CodeGen/MachineBlockPlacement.cpp
    llvm/test/CodeGen/RISCV/branch.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/MachineBlockPlacement.cpp b/llvm/lib/CodeGen/MachineBlockPlacement.cpp
index 08fe3d47e2ff5..2dbabfe345d5e 100644

--- a/llvm/lib/CodeGen/MachineBlockPlacement.cpp
+++ b/llvm/lib/CodeGen/MachineBlockPlacement.cpp
@@ -104,6 +104,12 @@ static cl::opt<unsigned> MaxBytesForAlignmentOverride(
              "alignment"),
     cl::init(0), cl::Hidden);
 
+static cl::opt<unsigned> PredecessorLimit(
+    "block-placement-predecessor-limit",
+    cl::desc("For blocks with more predecessors, certain layout optimizations"
+             "will be disabled to prevent quadratic compile time."),
+    cl::init(1000), cl::Hidden);
+
 // FIXME: Find a good default for this flag and remove the flag.
 static cl::opt<unsigned> ExitBlockBias(
     "block-placement-exit-block-bias",
@@ -1030,6 +1036,11 @@ bool MachineBlockPlacement::isTrellis(
   SmallPtrSet<const MachineBasicBlock *, 8> SeenPreds;
 
   for (MachineBasicBlock *Succ : ViableSuccs) {
+    // Compile-time optimization: runtime is quadratic in the number of
+    // predecessors. For such uncommon cases, exit early.
+    if (Succ->pred_size() > PredecessorLimit)
+      return false;
+
     int PredCount = 0;
     for (auto *SuccPred : Succ->predecessors()) {
       // Allow triangle successors, but don't count them.
@@ -1472,6 +1483,11 @@ bool MachineBlockPlacement::hasBetterLayoutPredecessor(
   if (SuccChain.UnscheduledPredecessors == 0)
     return false;
 
+  // Compile-time optimization: runtime is quadratic in the number of
+  // predecessors. For such uncommon cases, exit early.
+  if (Succ->pred_size() > PredecessorLimit)
+    return false;
+
   // There are two basic scenarios here:
   // -------------------------------------
   // Case 1: triangular shape CFG (if-then):

diff  --git a/llvm/test/CodeGen/RISCV/branch.ll b/llvm/test/CodeGen/RISCV/branch.ll
index 578080cd3a240..ed86ca8ca4dd1 100644
--- a/llvm/test/CodeGen/RISCV/branch.ll
+++ b/llvm/test/CodeGen/RISCV/branch.ll
@@ -1,6 +1,8 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \
 ; RUN:   | FileCheck -check-prefix=RV32I %s
+; RUN: llc -mtriple=riscv32 -verify-machineinstrs -block-placement-predecessor-limit=10 < %s \
+; RUN:   | FileCheck -check-prefix=RV32I-MBPLIMIT %s
 
 define void @foo(i32 %a, ptr %b, i1 %c) nounwind {
 ; RV32I-LABEL: foo:
@@ -48,6 +50,53 @@ define void @foo(i32 %a, ptr %b, i1 %c) nounwind {
 ; RV32I-NEXT:    lw zero, 0(a1)
 ; RV32I-NEXT:  .LBB0_14: # %end
 ; RV32I-NEXT:    ret
+;
+; RV32I-MBPLIMIT-LABEL: foo:
+; RV32I-MBPLIMIT:       # %bb.0:
+; RV32I-MBPLIMIT-NEXT:    lw a3, 0(a1)
+; RV32I-MBPLIMIT-NEXT:    bne a3, a0, .LBB0_2
+; RV32I-MBPLIMIT-NEXT:  .LBB0_1: # %end
+; RV32I-MBPLIMIT-NEXT:    ret
+; RV32I-MBPLIMIT-NEXT:  .LBB0_2: # %test2
+; RV32I-MBPLIMIT-NEXT:    lw a3, 0(a1)
+; RV32I-MBPLIMIT-NEXT:    bne a3, a0, .LBB0_1
+; RV32I-MBPLIMIT-NEXT:  # %bb.3: # %test3
+; RV32I-MBPLIMIT-NEXT:    lw a3, 0(a1)
+; RV32I-MBPLIMIT-NEXT:    blt a3, a0, .LBB0_1
+; RV32I-MBPLIMIT-NEXT:  # %bb.4: # %test4
+; RV32I-MBPLIMIT-NEXT:    lw a3, 0(a1)
+; RV32I-MBPLIMIT-NEXT:    bge a3, a0, .LBB0_1
+; RV32I-MBPLIMIT-NEXT:  # %bb.5: # %test5
+; RV32I-MBPLIMIT-NEXT:    lw a3, 0(a1)
+; RV32I-MBPLIMIT-NEXT:    bltu a3, a0, .LBB0_1
+; RV32I-MBPLIMIT-NEXT:  # %bb.6: # %test6
+; RV32I-MBPLIMIT-NEXT:    lw a3, 0(a1)
+; RV32I-MBPLIMIT-NEXT:    bgeu a3, a0, .LBB0_1
+; RV32I-MBPLIMIT-NEXT:  # %bb.7: # %test7
+; RV32I-MBPLIMIT-NEXT:    lw a3, 0(a1)
+; RV32I-MBPLIMIT-NEXT:    blt a0, a3, .LBB0_1
+; RV32I-MBPLIMIT-NEXT:  # %bb.8: # %test8
+; RV32I-MBPLIMIT-NEXT:    lw a3, 0(a1)
+; RV32I-MBPLIMIT-NEXT:    bge a0, a3, .LBB0_1
+; RV32I-MBPLIMIT-NEXT:  # %bb.9: # %test9
+; RV32I-MBPLIMIT-NEXT:    lw a3, 0(a1)
+; RV32I-MBPLIMIT-NEXT:    bltu a0, a3, .LBB0_1
+; RV32I-MBPLIMIT-NEXT:  # %bb.10: # %test10
+; RV32I-MBPLIMIT-NEXT:    lw a3, 0(a1)
+; RV32I-MBPLIMIT-NEXT:    bgeu a0, a3, .LBB0_1
+; RV32I-MBPLIMIT-NEXT:  # %bb.11: # %test11
+; RV32I-MBPLIMIT-NEXT:    lw zero, 0(a1)
+; RV32I-MBPLIMIT-NEXT:    andi a2, a2, 1
+; RV32I-MBPLIMIT-NEXT:    bnez a2, .LBB0_1
+; RV32I-MBPLIMIT-NEXT:  # %bb.12: # %test12
+; RV32I-MBPLIMIT-NEXT:    lw a0, 0(a1)
+; RV32I-MBPLIMIT-NEXT:    bgez a0, .LBB0_1
+; RV32I-MBPLIMIT-NEXT:  # %bb.13: # %test13
+; RV32I-MBPLIMIT-NEXT:    lw a0, 0(a1)
+; RV32I-MBPLIMIT-NEXT:    blez a0, .LBB0_1
+; RV32I-MBPLIMIT-NEXT:  # %bb.14: # %test14
+; RV32I-MBPLIMIT-NEXT:    lw zero, 0(a1)
+; RV32I-MBPLIMIT-NEXT:    ret
   %val1 = load volatile i32, ptr %b
   %tst1 = icmp eq i32 %val1, %a
   br i1 %tst1, label %end, label %test2