[llvm] [SLU][profcheck] Estimate branch weights in partial unswitch cases (PR #164035)

Mircea Trofin via llvm-commits llvm-commits at lists.llvm.org
Fri Oct 17 18:03:17 PDT 2025


https://github.com/mtrofin updated https://github.com/llvm/llvm-project/pull/164035

>From 4fc6b44499e3b424eabde2a91de637724b0ed6be Mon Sep 17 00:00:00 2001
From: Mircea Trofin <mtrofin at google.com>
Date: Fri, 17 Oct 2025 16:43:02 -0700
Subject: [PATCH] [SLU][profcheck] Estimate branch weights in partial unswitch
 cases

---
 .../Transforms/Scalar/SimpleLoopUnswitch.cpp  |  52 +++++-
 .../nontrivial-unswitch-profile.ll            |  89 ++++++++++
 .../simple-unswitch-profile.ll                | 157 ++++++++++++++++++
 3 files changed, 290 insertions(+), 8 deletions(-)
 create mode 100644 llvm/test/Transforms/SimpleLoopUnswitch/nontrivial-unswitch-profile.ll
 create mode 100644 llvm/test/Transforms/SimpleLoopUnswitch/simple-unswitch-profile.ll

diff --git a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
index e4ba70d1bce16..3b38b830a8e3a 100644
--- a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
+++ b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
@@ -82,6 +82,7 @@ STATISTIC(
 STATISTIC(NumInvariantConditionsInjected,
           "Number of invariant conditions injected and unswitched");
 
+namespace llvm {
 static cl::opt<bool> EnableNonTrivialUnswitch(
     "enable-nontrivial-unswitch", cl::init(false), cl::Hidden,
     cl::desc("Forcibly enables non-trivial loop unswitching rather than "
@@ -132,11 +133,17 @@ static cl::opt<bool> InjectInvariantConditions(
 
 static cl::opt<unsigned> InjectInvariantConditionHotnesThreshold(
     "simple-loop-unswitch-inject-invariant-condition-hotness-threshold",
-    cl::Hidden, cl::desc("Only try to inject loop invariant conditions and "
-                         "unswitch on them to eliminate branches that are "
-                         "not-taken 1/<this option> times or less."),
+    cl::Hidden,
+    cl::desc("Only try to inject loop invariant conditions and "
+             "unswitch on them to eliminate branches that are "
+             "not-taken 1/<this option> times or less."),
     cl::init(16));
 
+static cl::opt<bool> EstimateProfile("simple-loop-unswitch-estimate-profile",
+                                     cl::Hidden, cl::init(true));
+extern cl::opt<bool> ProfcheckDisableMetadataFixes;
+} // namespace llvm
+
 AnalysisKey ShouldRunExtraSimpleLoopUnswitch::Key;
 namespace {
 struct CompareDesc {
@@ -272,10 +279,33 @@ static bool areLoopExitPHIsLoopInvariant(const Loop &L,
 /// Copy a set of loop invariant values \p ToDuplicate and insert them at the
 /// end of \p BB and conditionally branch on the copied condition. We only
 /// branch on a single value.
+/// We attempt to estimate the profile of the resulting conditional branch from
+/// \p ComputeProfFrom, which is the original conditional branch we're
+/// unswitching.
 static void buildPartialUnswitchConditionalBranch(
     BasicBlock &BB, ArrayRef<Value *> Invariants, bool Direction,
     BasicBlock &UnswitchedSucc, BasicBlock &NormalSucc, bool InsertFreeze,
-    const Instruction *I, AssumptionCache *AC, const DominatorTree &DT) {
+    const Instruction *I, AssumptionCache *AC, const DominatorTree &DT,
+    const BranchInst &ComputeProfFrom) {
+
+  SmallVector<uint32_t> BranchWeights;
+  bool HasBranchWeights = EstimateProfile && !ProfcheckDisableMetadataFixes &&
+                          extractBranchWeights(ComputeProfFrom, BranchWeights);
+  // If Direction is true, that means we had a disjunction and that the "true"
+  // case exits. The probability of the disjunction of the subset of terms is at
+  // most as high as the original one. So, if the probability is higher than the
+  // one we'd assign in absence of a profile (i.e. 0.5), we will use 0.5,
+  // but if it's lower, we will use the original probability.
+  // Coversely, if Direction is false, that means we had a conjunction, and the
+  // probability of exiting is captured in the second branch weight. That
+  // probability is a disjunction (of the negation of the original terms). The
+  // same reasoning applies as above.
+  if (HasBranchWeights &&
+      static_cast<double>(BranchWeights[Direction ? 0 : 1]) /
+              static_cast<double>(sum_of(BranchWeights)) >
+          0.5)
+    HasBranchWeights = false;
+
   IRBuilder<> IRB(&BB);
   IRB.SetCurrentDebugLocation(DebugLoc::getCompilerGenerated());
 
@@ -288,8 +318,14 @@ static void buildPartialUnswitchConditionalBranch(
 
   Value *Cond = Direction ? IRB.CreateOr(FrozenInvariants)
                           : IRB.CreateAnd(FrozenInvariants);
-  IRB.CreateCondBr(Cond, Direction ? &UnswitchedSucc : &NormalSucc,
-                   Direction ? &NormalSucc : &UnswitchedSucc);
+  auto *BR = IRB.CreateCondBr(
+      Cond, Direction ? &UnswitchedSucc : &NormalSucc,
+      Direction ? &NormalSucc : &UnswitchedSucc,
+      HasBranchWeights ? ComputeProfFrom.getMetadata(LLVMContext::MD_prof)
+                       : nullptr);
+  if (!HasBranchWeights)
+    setExplicitlyUnknownBranchWeightsIfProfiled(
+        *BR, *BR->getParent()->getParent(), DEBUG_TYPE);
 }
 
 /// Copy a set of loop invariant values, and conditionally branch on them.
@@ -659,7 +695,7 @@ static bool unswitchTrivialBranch(Loop &L, BranchInst &BI, DominatorTree &DT,
              " condition!");
     buildPartialUnswitchConditionalBranch(
         *OldPH, Invariants, ExitDirection, *UnswitchedBB, *NewPH,
-        FreezeLoopUnswitchCond, OldPH->getTerminator(), nullptr, DT);
+        FreezeLoopUnswitchCond, OldPH->getTerminator(), nullptr, DT, BI);
   }
 
   // Update the dominator tree with the added edge.
@@ -2478,7 +2514,7 @@ static void unswitchNontrivialInvariants(
     else {
       buildPartialUnswitchConditionalBranch(
           *SplitBB, Invariants, Direction, *ClonedPH, *LoopPH,
-          FreezeLoopUnswitchCond, BI, &AC, DT);
+          FreezeLoopUnswitchCond, BI, &AC, DT, *BI);
     }
     DTUpdates.push_back({DominatorTree::Insert, SplitBB, ClonedPH});
 
diff --git a/llvm/test/Transforms/SimpleLoopUnswitch/nontrivial-unswitch-profile.ll b/llvm/test/Transforms/SimpleLoopUnswitch/nontrivial-unswitch-profile.ll
new file mode 100644
index 0000000000000..9cc417f6b874e
--- /dev/null
+++ b/llvm/test/Transforms/SimpleLoopUnswitch/nontrivial-unswitch-profile.ll
@@ -0,0 +1,89 @@
+; RUN: split-file %s %t
+; RUN: cat %t/main.ll %t/probable-or.prof > %t/probable-or.ll
+; RUN: cat %t/main.ll %t/probable-and.prof > %t/probable-and.ll
+; RUN: opt -passes='loop(simple-loop-unswitch<nontrivial>)' -S %t/probable-or.ll -o -| FileCheck %t/probable-or.prof
+; RUN: opt -passes='loop(simple-loop-unswitch<nontrivial>)' -S %t/probable-and.ll -o -| FileCheck %t/probable-and.prof
+
+;--- main.ll
+declare i32 @a()
+declare i32 @b()
+
+define i32 @or(ptr %ptr, i1 %cond) !prof !0 {
+entry:
+  br label %loop_begin
+
+loop_begin:
+  %v1 = load i1, ptr %ptr
+  %cond_or = or i1 %v1, %cond
+  br i1 %cond_or, label %loop_a, label %loop_b, !prof !1
+
+loop_a:
+  call i32 @a()
+  br label %latch
+
+loop_b:
+  call i32 @b()
+  br label %latch
+
+latch:
+  %v2 = load i1, ptr %ptr
+  br i1 %v2, label %loop_begin, label %loop_exit, !prof !2
+
+loop_exit:
+  ret i32 0
+}
+
+define i32 @and(ptr %ptr, i1 %cond) !prof !0 {
+entry:
+  br label %loop_begin
+
+loop_begin:
+  %v1 = load i1, ptr %ptr
+  %cond_and = and i1 %v1, %cond
+  br i1 %cond_and, label %loop_a, label %loop_b, !prof !1
+
+loop_a:
+  call i32 @a()
+  br label %latch
+
+loop_b:
+  call i32 @b()
+  br label %latch
+
+latch:
+  %v2 = load i1, ptr %ptr
+  br i1 %v2, label %loop_begin, label %loop_exit, !prof !2
+
+loop_exit:
+  ret i32 0
+}
+
+;--- probable-or.prof
+!0 = !{!"function_entry_count", i32 10}
+!1 = !{!"branch_weights", i32 1, i32 1000}
+!2 = !{!"branch_weights", i32 5, i32 7}
+; CHECK-LABEL: @or
+; CHECK-LABEL: entry:
+; CHECK-NEXT:   %cond.fr = freeze i1 %cond
+; CHECK-NEXT:   br i1 %cond.fr, label %entry.split.us, label %entry.split, !prof !1
+; CHECK-LABEL: @and
+; CHECK-LABEL: entry:
+; CHECK-NEXT:   %cond.fr = freeze i1 %cond
+; CHECK-NEXT:   br i1 %cond.fr, label %entry.split, label %entry.split.us, !prof !3
+; CHECK: !1 = !{!"branch_weights", i32 1, i32 1000}
+; CHECK: !3 = !{!"unknown", !"simple-loop-unswitch"}
+
+;--- probable-and.prof
+!0 = !{!"function_entry_count", i32 10}
+!1 = !{!"branch_weights", i32 1000, i32 1}
+!2 = !{!"branch_weights", i32 5, i32 7}
+; CHECK-LABEL: @or
+; CHECK-LABEL: entry:
+; CHECK-NEXT:   %cond.fr = freeze i1 %cond
+; CHECK-NEXT:   br i1 %cond.fr, label %entry.split.us, label %entry.split, !prof !1
+; CHECK-LABEL: @and
+; CHECK-LABEL: entry:
+; CHECK-NEXT:   %cond.fr = freeze i1 %cond
+; CHECK-NEXT:   br i1 %cond.fr, label %entry.split, label %entry.split.us, !prof !3
+; CHECK: !1 = !{!"unknown", !"simple-loop-unswitch"}
+; CHECK: !3 = !{!"branch_weights", i32 1000, i32 1}
diff --git a/llvm/test/Transforms/SimpleLoopUnswitch/simple-unswitch-profile.ll b/llvm/test/Transforms/SimpleLoopUnswitch/simple-unswitch-profile.ll
new file mode 100644
index 0000000000000..ec6baa5b3772f
--- /dev/null
+++ b/llvm/test/Transforms/SimpleLoopUnswitch/simple-unswitch-profile.ll
@@ -0,0 +1,157 @@
+; RUN: split-file %s %t
+; RUN: cat %t/main.ll %t/probable-or.prof > %t/probable-or.ll
+; RUN: cat %t/main.ll %t/probable-and.prof > %t/probable-and.ll
+; RUN: opt -passes='loop-mssa(simple-loop-unswitch)' -S %t/probable-or.ll -o - | FileCheck %t/probable-or.prof
+; RUN: opt -passes='loop-mssa(simple-loop-unswitch)' -S %t/probable-and.ll -o - | FileCheck %t/probable-and.prof
+;
+; RUN: opt -passes='module(print<block-freq>),function(loop-mssa(simple-loop-unswitch)),module(print<block-freq>)' \
+; RUN:   %t/probable-or.ll -disable-output -simple-loop-unswitch-estimate-profile=0 2>&1 | FileCheck %t/probable-or.prof --check-prefixes=PROFILE-COM,PROFILE-REF
+
+; RUN: opt -passes='module(print<block-freq>),function(loop-mssa(simple-loop-unswitch)),module(print<block-freq>)' \
+; RUN:   %t/probable-or.ll -disable-output -simple-loop-unswitch-estimate-profile=1 2>&1 | FileCheck %t/probable-or.prof --check-prefixes=PROFILE-COM,PROFILE-CHK
+
+; RUN: opt -passes='module(print<block-freq>),function(loop-mssa(simple-loop-unswitch)),module(print<block-freq>)' \
+; RUN:   %t/probable-and.ll -disable-output -simple-loop-unswitch-estimate-profile=0 2>&1 | FileCheck %t/probable-and.prof --check-prefixes=PROFILE-COM,PROFILE-REF
+
+; RUN: opt -passes='module(print<block-freq>),function(loop-mssa(simple-loop-unswitch)),module(print<block-freq>)' \
+; RUN:   %t/probable-and.ll -disable-output -simple-loop-unswitch-estimate-profile=1 2>&1 | FileCheck %t/probable-and.prof --check-prefixes=PROFILE-COM,PROFILE-CHK
+
+;--- main.ll
+declare void @some_func() noreturn
+
+define i32 @or(i1 %cond1, i32 %var1) !prof !0 {
+entry:
+  br label %loop_begin
+
+loop_begin:
+  %var3 = phi i32 [%var1, %entry], [%var2, %do_something]
+  %cond2 = icmp eq i32 %var3, 10
+  %cond.or = or i1 %cond1, %cond2
+  br i1 %cond.or, label %loop_exit, label %do_something, !prof !1
+
+do_something:
+  %var2 = add i32 %var3, 1
+  call void @some_func() noreturn nounwind
+  br label %loop_begin
+
+loop_exit:
+  ret i32 0
+}
+
+define i32 @and(i1 %cond1, i32 %var1) !prof !0 {
+entry:
+  br label %loop_begin
+
+loop_begin:
+  %var3 = phi i32 [%var1, %entry], [%var2, %do_something]
+  %cond2 = icmp eq i32 %var3, 10
+  %cond.and = and i1 %cond1, %cond2
+  br i1 %cond.and, label %do_something, label %loop_exit, !prof !1
+
+do_something:
+  %var2 = add i32 %var3, 1
+  call void @some_func() noreturn nounwind
+  br label %loop_begin
+
+loop_exit:
+  ret i32 0
+}
+
+;--- probable-or.prof
+!0 = !{!"function_entry_count", i32 10}
+!1 = !{!"branch_weights", i32 1, i32 1000}
+; CHECK-LABEL: @or
+; CHECK-LABEL: entry:
+; CHECK-NEXT:   %cond1.fr = freeze i1 %cond1
+; CHECK-NEXT:   br i1 %cond1.fr, label %loop_exit.split, label %entry.split, !prof !1
+; CHECK-LABEL: @and
+; CHECK-LABEL: entry:
+; CHECK-NEXT:   %cond1.fr = freeze i1 %cond1
+; CHECK-NEXT:   br i1 %cond1.fr, label %entry.split, label %loop_exit.split, !prof !2
+; CHECK: !1 = !{!"branch_weights", i32 1, i32 1000}
+; CHECK: !2 = !{!"unknown", !"simple-loop-unswitch"}
+
+; PROFILE-COM: Printing analysis results of BFI for function 'or':
+; PROFILE-COM: block-frequency-info: or
+ ; PROFILE-COM: - entry: {{.*}} count = 10
+ ; PROFILE-COM: - loop_begin: {{.*}} count = 10010
+ ; PROFILE-COM: - do_something: {{.*}} count = 10000
+ ; PROFILE-COM: - loop_exit: {{.*}} count = 10
+
+; PROFILE-COM: Printing analysis results of BFI for function 'and':
+; PROFILE-COM: block-frequency-info: and
+ ; PROFILE-COM: - entry: {{.*}} count = 10
+ ; PROFILE-COM: - loop_begin: {{.*}} count = 10
+ ; PROFILE-COM: - do_something: {{.*}} count = 0
+ ; PROFILE-COM: - loop_exit: {{.*}} count = 10
+
+; PROFILE-COM: Printing analysis results of BFI for function 'or':
+; PROFILE-COM: block-frequency-info: or
+ ; PROFILE-COM: - entry: {{.*}} count = 10
+ ; PROFILE-REF: - entry.split: {{.*}} count = 5
+ ; PROFILE-CHK: - entry.split: {{.*}} count = 10
+ ; PROFILE-REF: - loop_begin: {{.*}} count = 5005
+ ; PROFILE-CHK: - loop_begin: {{.*}} count = 10000
+ ; PROFILE-REF: - do_something: {{.*}} count = 5000
+ ; PROFILE-CHK: - do_something: {{.*}} count = 9990
+ ; PROFILE-REF: - loop_exit: {{.*}} count = 5
+ ; PROFILE-CHK: - loop_exit: {{.*}} count = 10
+ ; PROFILE-COM: - loop_exit.split: {{.*}} count = 10
+
+; PROFILE-COM: Printing analysis results of BFI for function 'and':
+; PROFILE-COM: block-frequency-info: and
+ ; PROFILE-COM: - entry: {{.*}} count = 10
+ ; PROFILE-COM: - entry.split: {{.*}} count = 5
+ ; PROFILE-COM: - loop_begin: {{.*}} count = 5
+ ; PROFILE-COM: - do_something: {{.*}} count = 0
+ ; PROFILE-COM: - loop_exit: {{.*}} count = 5
+ ; PROFILE-COM: - loop_exit.split: {{.*}} count = 10
+
+;--- probable-and.prof
+!0 = !{!"function_entry_count", i32 10}
+!1 = !{!"branch_weights", i32 1000, i32 1}
+; CHECK-LABEL: @or
+; CHECK-LABEL: entry:
+; CHECK-NEXT:   %cond1.fr = freeze i1 %cond1
+; CHECK-NEXT:   br i1 %cond1.fr, label %loop_exit.split, label %entry.split, !prof !1
+; CHECK-LABEL: @and
+; CHECK-LABEL: entry:
+; CHECK-NEXT:   %cond1.fr = freeze i1 %cond1
+; CHECK-NEXT:   br i1 %cond1.fr, label %entry.split, label %loop_exit.split, !prof !2
+; CHECK: !1 = !{!"unknown", !"simple-loop-unswitch"}
+; CHECK: !2 = !{!"branch_weights", i32 1000, i32 1}
+; PROFILE-COM: Printing analysis results of BFI for function 'or':
+; PROFILE-COM: block-frequency-info: or
+ ; PROFILE-COM: - entry: {{.*}}, count = 10
+ ; PROFILE-COM: - loop_begin: {{.*}}, count = 10
+ ; PROFILE-COM: - do_something: {{.*}}, count = 0
+ ; PROFILE-COM: - loop_exit: {{.*}}, count = 10
+
+; PROFILE-COM: Printing analysis results of BFI for function 'and':
+; PROFILE-COM: block-frequency-info: and
+ ; PROFILE-COM: - entry: {{.*}} count = 10
+ ; PROFILE-COM: - loop_begin: {{.*}} count = 10010
+ ; PROFILE-COM: - do_something: {{.*}} count = 10000
+ ; PROFILE-COM: - loop_exit: {{.*}} count = 10
+
+; PROFILE-COM: Printing analysis results of BFI for function 'or':
+; PROFILE-COM: block-frequency-info: or
+ ; PROFILE-COM: - entry: {{.*}} count = 10
+ ; PROFILE-COM: - entry.split: {{.*}} count = 5
+ ; PROFILE-COM: - loop_begin: {{.*}} count = 5
+ ; PROFILE-COM: - do_something: {{.*}} count = 0
+ ; PROFILE-COM: - loop_exit: {{.*}} count = 5
+ ; PROFILE-COM: - loop_exit.split: {{.*}} count = 10
+
+; PROFILE-COM: Printing analysis results of BFI for function 'and':
+; PROFILE-COM: block-frequency-info: and
+ ; PROFILE-COM: - entry: {{.*}} count = 10
+ ; PROFILE-REF: - entry.split: {{.*}} count = 5
+ ; PROFILE-CHK: - entry.split: {{.*}} count = 10
+ ; PROFILE-REF: - loop_begin: {{.*}} count = 5005
+ ; PROFILE-CHK: - loop_begin: {{.*}} count = 10000
+ ; PROFILE-REF: - do_something: {{.*}} count = 5000
+ ; PROFILE-CHK: - do_something: {{.*}} count = 9990
+ ; PROFILE-REF: - loop_exit: {{.*}} count = 5
+ ; PROFILE-CHK: - loop_exit: {{.*}} count = 10
+ ; PROFILE-COM: - loop_exit.split: {{.*}} count = 10



More information about the llvm-commits mailing list