[llvm] [SimpleLoopUnswitch] Adjust cost multiplier accounting for parent loop size (PR #155379)

via llvm-commits llvm-commits at lists.llvm.org
Tue Aug 26 02:02:21 PDT 2025


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-llvm-transforms

Author: Antonio Frighetto (antoniofrighetto)

<details>
<summary>Changes</summary>

When estimating the cost to avoid exponential unswitches of non-trivial invariant conditions, also consider the parent loop basic blocks size, ensuring this does not grow unexpectedly.

Alternative solution might involve adding a threshold on the number of times LoopRotate is allowed to rotate the same loop across invocations (https://github.com/llvm/llvm-project/issues/138509#issuecomment-3193855772), though I think this would require a per-loop count to be serialized/deserialized in a metadata each time, which seems far more invasive.

Fixes: https://github.com/llvm/llvm-project/issues/138509.

---
Full diff: https://github.com/llvm/llvm-project/pull/155379.diff


2 Files Affected:

- (modified) llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp (+22-5) 
- (added) llvm/test/Transforms/SimpleLoopUnswitch/pr138509.ll (+54) 


``````````diff
diff --git a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
index 9b40fc03da6bb..e4ba70d1bce16 100644
--- a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
+++ b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
@@ -98,6 +98,9 @@ static cl::opt<bool> EnableUnswitchCostMultiplier(
 static cl::opt<int> UnswitchSiblingsToplevelDiv(
     "unswitch-siblings-toplevel-div", cl::init(2), cl::Hidden,
     cl::desc("Toplevel siblings divisor for cost multiplier."));
+static cl::opt<int> UnswitchParentBlocksDiv(
+    "unswitch-parent-blocks-div", cl::init(8), cl::Hidden,
+    cl::desc("Outer loop size divisor for cost multiplier."));
 static cl::opt<int> UnswitchNumInitialUnscaledCandidates(
     "unswitch-num-initial-unscaled-candidates", cl::init(8), cl::Hidden,
     cl::desc("Number of unswitch candidates that are ignored when calculating "
@@ -2809,9 +2812,9 @@ static BranchInst *turnGuardIntoBranch(IntrinsicInst *GI, Loop &L,
 }
 
 /// Cost multiplier is a way to limit potentially exponential behavior
-/// of loop-unswitch. Cost is multipied in proportion of 2^number of unswitch
-/// candidates available. Also accounting for the number of "sibling" loops with
-/// the idea to account for previous unswitches that already happened on this
+/// of loop-unswitch. Cost is multiplied in proportion of 2^number of unswitch
+/// candidates available. Also consider the number of "sibling" loops with
+/// the idea of accounting for previous unswitches that already happened on this
 /// cluster of loops. There was an attempt to keep this formula simple,
 /// just enough to limit the worst case behavior. Even if it is not that simple
 /// now it is still not an attempt to provide a detailed heuristic size
@@ -2842,7 +2845,19 @@ static int CalculateUnswitchCostMultiplier(
     return 1;
   }
 
+  // Each invariant non-trivial condition, after being unswitched, is supposed
+  // to have its own specialized sibling loop (the invariant condition has been
+  // hoisted out of the child loop into a newly-cloned loop). When unswitching
+  // conditions in nested loops, the basic block size of the outer loop should
+  // not be altered. If such a size significantly increases across unswitching
+  // invocations, something may be wrong; so adjust the final cost taking this
+  // into account.
   auto *ParentL = L.getParentLoop();
+  int ParentLoopSizeMultiplier = 1;
+  if (ParentL)
+    ParentLoopSizeMultiplier =
+        std::max<int>(ParentL->getNumBlocks() / UnswitchParentBlocksDiv, 1);
+
   int SiblingsCount = (ParentL ? ParentL->getSubLoopsVector().size()
                                : std::distance(LI.begin(), LI.end()));
   // Count amount of clones that all the candidates might cause during
@@ -2887,14 +2902,16 @@ static int CalculateUnswitchCostMultiplier(
   // at an upper bound.
   int CostMultiplier;
   if (ClonesPower > Log2_32(UnswitchThreshold) ||
-      SiblingsMultiplier > UnswitchThreshold)
+      SiblingsMultiplier > UnswitchThreshold ||
+      ParentLoopSizeMultiplier > UnswitchThreshold)
     CostMultiplier = UnswitchThreshold;
   else
     CostMultiplier = std::min(SiblingsMultiplier * (1 << ClonesPower),
                               (int)UnswitchThreshold);
 
   LLVM_DEBUG(dbgs() << "  Computed multiplier  " << CostMultiplier
-                    << " (siblings " << SiblingsMultiplier << " * clones "
+                    << " (siblings " << SiblingsMultiplier << " * parent size "
+                    << ParentLoopSizeMultiplier << " * clones "
                     << (1 << ClonesPower) << ")"
                     << " for unswitch candidate: " << TI << "\n");
   return CostMultiplier;
diff --git a/llvm/test/Transforms/SimpleLoopUnswitch/pr138509.ll b/llvm/test/Transforms/SimpleLoopUnswitch/pr138509.ll
new file mode 100644
index 0000000000000..2ce0cb5c3f299
--- /dev/null
+++ b/llvm/test/Transforms/SimpleLoopUnswitch/pr138509.ll
@@ -0,0 +1,54 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt < %s -S -enable-unswitch-cost-multiplier=true -unswitch-parent-blocks-div=1 \
+; RUN:     -passes="loop-mssa(loop-simplifycfg,licm,loop-rotate,simple-loop-unswitch<nontrivial>),print<loops>" \
+; RUN:     -disable-output 2>&1 | sort -b -k 1 | FileCheck %s --check-prefixes=LOOP-DIV-1
+
+; RUN: opt < %s -S -enable-unswitch-cost-multiplier=true -unswitch-parent-blocks-div=2 \
+; RUN:     -passes="loop-mssa(loop-simplifycfg,licm,loop-rotate,simple-loop-unswitch<nontrivial>),print<loops>" \
+; RUN:     -disable-output 2>&1 | sort -b -k 1 | FileCheck %s --check-prefixes=LOOP-DIV-2
+
+; LOOP-DIV-1-COUNT-6: Loop at depth 1 containing:
+; LOOP-DIV-2-COUNT-12: Loop at depth 1 containing:
+
+ at a = global i32 0, align 4
+ at b = global i32 0, align 4
+ at c = global i32 0, align 4
+ at d = global i32 0, align 4
+
+define i32 @main() {
+entry:
+  br label %outer.loop.header
+
+outer.loop.header:                                ; preds = %outer.loop.latch, %entry
+  br i1 false, label %exit, label %outer.loop.body
+
+outer.loop.body:                                  ; preds = %inner.loop.header, %outer.loop.header
+  store i32 1, ptr @c, align 4
+  %cmp = icmp sgt i32 0, -1
+  br i1 %cmp, label %outer.loop.latch, label %exit
+
+inner.loop.header:                                ; preds = %outer.loop.latch, %inner.loop.body
+  %a_val = load i32, ptr @a, align 4
+  %c_val = load i32, ptr @c, align 4
+  %mul = mul nsw i32 %c_val, %a_val
+  store i32 %mul, ptr @b, align 4
+  %cmp2 = icmp sgt i32 %mul, -1
+  br i1 %cmp2, label %inner.loop.body, label %outer.loop.body
+
+inner.loop.body:                                  ; preds = %inner.loop.header
+  %mul2 = mul nsw i32 %c_val, 3
+  store i32 %mul2, ptr @c, align 4
+  store i32 %c_val, ptr @d, align 4
+  %mul3 = mul nsw i32 %c_val, %a_val
+  %cmp3 = icmp sgt i32 %mul3, -1
+  br i1 %cmp3, label %inner.loop.header, label %exit
+
+outer.loop.latch:                                 ; preds = %outer.loop.body
+  %d_val = load i32, ptr @d, align 4
+  store i32 %d_val, ptr @b, align 4
+  %cmp4 = icmp eq i32 %d_val, 0
+  br i1 %cmp4, label %inner.loop.header, label %outer.loop.header
+
+exit:                                             ; preds = %inner.loop.body, %outer.loop.body, %outer.loop.header
+  ret i32 0
+}

``````````

</details>


https://github.com/llvm/llvm-project/pull/155379


More information about the llvm-commits mailing list