[llvm] [IRCE] Relax profitability check (PR #104659)

Jan Ječmen via llvm-commits llvm-commits at lists.llvm.org
Wed Dec 4 07:57:52 PST 2024


https://github.com/JanJecmen updated https://github.com/llvm/llvm-project/pull/104659

>From b172bcfb8c7c018b2201be26582b7947945b9b1a Mon Sep 17 00:00:00 2001
From: Jan Jecmen <jjecmen at azul.com>
Date: Fri, 15 Nov 2024 13:21:20 +0000
Subject: [PATCH] [IRCE] Relax profitability check

IRCE currently has two profitability checks:
  1. min number of iterations (10 by default)
  2. branch is highly biased (> 15/16)
However, it may still be profitable to eliminate range checks even
if the branch isn't as biased. Consider, for example, a loop with
100 iterations, where IRCE currently eliminates all 100 range checks.
The same range checks, if performed in a loop with 200 iterations,
are not eliminated because their branch is now only 1:1.

This patch proposes to relax the profitability checks of IRCE.
Namely, instead of the two checks currenly in place, consider IRCE
profitable if the branch probability scaled by the expected number
of iterations (i.e., the estimated number of eliminated checks)
is over a threshold. This covers the minimum number of iterations check
(there are at least as many iterations as eliminated range checks),
and changes the bias check from a percent of iterations to at least
a constant threshold of eliminated checks.
The effect is shown in the new test `profitability.ll`. The loop has
100 iterations (the backedge is taken 99:1). The range
check's branch weights are 1:1, so current IRCE wouldn't even
consider this a range check. However, with the new implementaion,
setting the minimum eliminated checks as high as 50, the
transformation is still applied.

If the number of iterations can't be estimated, the check falls back
to the current 15/16 likelihood check.
---
 .../Scalar/InductiveRangeCheckElimination.cpp | 100 ++++++++++--------
 llvm/test/Transforms/IRCE/low-iterations.ll   |   4 +-
 llvm/test/Transforms/IRCE/profitability.ll    |  38 +++++++
 3 files changed, 98 insertions(+), 44 deletions(-)
 create mode 100644 llvm/test/Transforms/IRCE/profitability.ll

diff --git a/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp b/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp
index 0bc783412595e5..e706a6f83b1e75 100644
--- a/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp
@@ -105,8 +105,8 @@ static cl::opt<bool> PrintRangeChecks("irce-print-range-checks", cl::Hidden,
 static cl::opt<bool> SkipProfitabilityChecks("irce-skip-profitability-checks",
                                              cl::Hidden, cl::init(false));
 
-static cl::opt<unsigned> MinRuntimeIterations("irce-min-runtime-iterations",
-                                              cl::Hidden, cl::init(10));
+static cl::opt<unsigned> MinEliminatedChecks("irce-min-eliminated-checks",
+                                             cl::Hidden, cl::init(10));
 
 static cl::opt<bool> AllowUnsignedLatchCondition("irce-allow-unsigned-latch",
                                                  cl::Hidden, cl::init(true));
@@ -130,15 +130,9 @@ static cl::opt<bool>
 
 namespace {
 
-/// An inductive range check is conditional branch in a loop with
-///
-///  1. a very cold successor (i.e. the branch jumps to that successor very
-///     rarely)
-///
-///  and
-///
-///  2. a condition that is provably true for some contiguous range of values
-///     taken by the containing loop's induction variable.
+/// An inductive range check is conditional branch in a loop with a condition
+/// that is provably true for some contiguous range of values taken by the
+/// containing loop's induction variable.
 ///
 class InductiveRangeCheck {
 
@@ -233,6 +227,7 @@ class InductiveRangeCheck {
   /// checks, and hence don't end up in \p Checks.
   static void extractRangeChecksFromBranch(
       BranchInst *BI, Loop *L, ScalarEvolution &SE, BranchProbabilityInfo *BPI,
+      std::optional<uint64_t> EstimatedTripCount,
       SmallVectorImpl<InductiveRangeCheck> &Checks, bool &Changed);
 };
 
@@ -246,9 +241,10 @@ class InductiveRangeCheckElimination {
       std::optional<llvm::function_ref<llvm::BlockFrequencyInfo &()>>;
   GetBFIFunc GetBFI;
 
-  // Returns true if it is profitable to do a transform basing on estimation of
-  // number of iterations.
-  bool isProfitableToTransform(const Loop &L);
+  // Returns the estimated number of iterations based on block frequency info if
+  // available, or on branch probability info. Nullopt is returned if the number
+  // of iterations cannot be estimated.
+  std::optional<uint64_t> estimatedTripCount(const Loop &L);
 
 public:
   InductiveRangeCheckElimination(ScalarEvolution &SE,
@@ -522,6 +518,7 @@ void InductiveRangeCheck::extractRangeChecksFromCond(
 
 void InductiveRangeCheck::extractRangeChecksFromBranch(
     BranchInst *BI, Loop *L, ScalarEvolution &SE, BranchProbabilityInfo *BPI,
+    std::optional<uint64_t> EstimatedTripCount,
     SmallVectorImpl<InductiveRangeCheck> &Checks, bool &Changed) {
   if (BI->isUnconditional() || BI->getParent() == L->getLoopLatch())
     return;
@@ -529,11 +526,32 @@ void InductiveRangeCheck::extractRangeChecksFromBranch(
   unsigned IndexLoopSucc = L->contains(BI->getSuccessor(0)) ? 0 : 1;
   assert(L->contains(BI->getSuccessor(IndexLoopSucc)) &&
          "No edges coming to loop?");
-  BranchProbability LikelyTaken(15, 16);
 
-  if (!SkipProfitabilityChecks && BPI &&
-      BPI->getEdgeProbability(BI->getParent(), IndexLoopSucc) < LikelyTaken)
-    return;
+  if (!SkipProfitabilityChecks && BPI) {
+    auto SuccessProbability =
+        BPI->getEdgeProbability(BI->getParent(), IndexLoopSucc);
+    if (EstimatedTripCount) {
+      auto EstimatedEliminatedChecks =
+          SuccessProbability.scale(*EstimatedTripCount);
+      if (EstimatedEliminatedChecks < MinEliminatedChecks) {
+        LLVM_DEBUG(dbgs() << "irce: could not prove profitability for branch "
+                          << *BI << ": "
+                          << "estimated eliminated checks too low "
+                          << EstimatedEliminatedChecks << "\n";);
+        return;
+      }
+    } else {
+      BranchProbability LikelyTaken(15, 16);
+      if (SuccessProbability < LikelyTaken) {
+        LLVM_DEBUG(dbgs() << "irce: could not prove profitability for branch "
+                          << *BI << ": "
+                          << "could not estimate trip count "
+                          << "and branch success probability too low "
+                          << SuccessProbability << "\n";);
+        return;
+      }
+    }
+  }
 
   // IRCE expects branch's true edge comes to loop. Invert branch for opposite
   // case.
@@ -938,42 +956,34 @@ PreservedAnalyses IRCEPass::run(Function &F, FunctionAnalysisManager &AM) {
   return getLoopPassPreservedAnalyses();
 }
 
-bool InductiveRangeCheckElimination::isProfitableToTransform(const Loop &L) {
-  if (SkipProfitabilityChecks)
-    return true;
+std::optional<uint64_t>
+InductiveRangeCheckElimination::estimatedTripCount(const Loop &L) {
   if (GetBFI) {
     BlockFrequencyInfo &BFI = (*GetBFI)();
     uint64_t hFreq = BFI.getBlockFreq(L.getHeader()).getFrequency();
     uint64_t phFreq = BFI.getBlockFreq(L.getLoopPreheader()).getFrequency();
-    if (phFreq != 0 && hFreq != 0 && (hFreq / phFreq < MinRuntimeIterations)) {
-      LLVM_DEBUG(dbgs() << "irce: could not prove profitability: "
-                        << "the estimated number of iterations basing on "
-                           "frequency info is " << (hFreq / phFreq) << "\n";);
-      return false;
-    }
-    return true;
+    if (phFreq == 0 || hFreq == 0)
+      return std::nullopt;
+    return {hFreq / phFreq};
   }
 
   if (!BPI)
-    return true;
+    return std::nullopt;
 
   auto *Latch = L.getLoopLatch();
   if (!Latch)
-    return true;
+    return std::nullopt;
   auto *LatchBr = dyn_cast<BranchInst>(Latch->getTerminator());
   if (!LatchBr)
-    return true;
-  auto LatchBrExitIdx = LatchBr->getSuccessor(0) == L.getHeader() ? 1 : 0;
+    return std::nullopt;
 
+  auto LatchBrExitIdx = LatchBr->getSuccessor(0) == L.getHeader() ? 1 : 0;
   BranchProbability ExitProbability =
       BPI->getEdgeProbability(Latch, LatchBrExitIdx);
-  if (ExitProbability > BranchProbability(1, MinRuntimeIterations)) {
-    LLVM_DEBUG(dbgs() << "irce: could not prove profitability: "
-                      << "the exit probability is too big " << ExitProbability
-                      << "\n";);
-    return false;
-  }
-  return true;
+  if (ExitProbability.isUnknown() || ExitProbability.isZero())
+    return std::nullopt;
+
+  return {ExitProbability.scaleByInverse(1)};
 }
 
 bool InductiveRangeCheckElimination::run(
@@ -989,8 +999,14 @@ bool InductiveRangeCheckElimination::run(
     return false;
   }
 
-  if (!isProfitableToTransform(*L))
+  auto EstimatedTripCount = estimatedTripCount(*L);
+  if (!SkipProfitabilityChecks && EstimatedTripCount &&
+      *EstimatedTripCount < MinEliminatedChecks) {
+    LLVM_DEBUG(dbgs() << "irce: could not prove profitability: "
+                      << "the estimated number of iterations is "
+                      << *EstimatedTripCount << "\n");
     return false;
+  }
 
   LLVMContext &Context = Preheader->getContext();
   SmallVector<InductiveRangeCheck, 16> RangeChecks;
@@ -998,8 +1014,8 @@ bool InductiveRangeCheckElimination::run(
 
   for (auto *BBI : L->getBlocks())
     if (BranchInst *TBI = dyn_cast<BranchInst>(BBI->getTerminator()))
-      InductiveRangeCheck::extractRangeChecksFromBranch(TBI, L, SE, BPI,
-                                                        RangeChecks, Changed);
+      InductiveRangeCheck::extractRangeChecksFromBranch(
+          TBI, L, SE, BPI, EstimatedTripCount, RangeChecks, Changed);
 
   if (RangeChecks.empty())
     return Changed;
diff --git a/llvm/test/Transforms/IRCE/low-iterations.ll b/llvm/test/Transforms/IRCE/low-iterations.ll
index 071ab4d0156852..e044c455fe6e2b 100644
--- a/llvm/test/Transforms/IRCE/low-iterations.ll
+++ b/llvm/test/Transforms/IRCE/low-iterations.ll
@@ -1,5 +1,5 @@
-; RUN: opt -verify-loop-info -irce-print-changed-loops -passes=irce -irce-min-runtime-iterations=3 < %s 2>&1 | FileCheck %s --check-prefixes=CHECK-NO
-; RUN: opt -verify-loop-info -irce-print-changed-loops -passes=irce -irce-min-runtime-iterations=0 < %s 2>&1 | FileCheck %s --check-prefixes=CHECK-YES
+; RUN: opt -verify-loop-info -irce-print-changed-loops -passes=irce -irce-min-eliminated-checks=3 < %s 2>&1 | FileCheck %s --check-prefixes=CHECK-NO
+; RUN: opt -verify-loop-info -irce-print-changed-loops -passes=irce -irce-min-eliminated-checks=0 < %s 2>&1 | FileCheck %s --check-prefixes=CHECK-YES
 
 ; CHECK-YES: constrained Loop
 ; CHECK-NO-NOT: constrained Loop
diff --git a/llvm/test/Transforms/IRCE/profitability.ll b/llvm/test/Transforms/IRCE/profitability.ll
new file mode 100644
index 00000000000000..04cea2cfce2fdc
--- /dev/null
+++ b/llvm/test/Transforms/IRCE/profitability.ll
@@ -0,0 +1,38 @@
+; RUN: opt -S -verify-loop-info -irce-print-changed-loops -passes=irce -irce-min-eliminated-checks=51 < %s 2>&1 | FileCheck %s --check-prefixes=CHECK-NO
+; RUN: opt -S -verify-loop-info -irce-print-changed-loops -passes=irce -irce-min-eliminated-checks=50 < %s 2>&1 | FileCheck %s --check-prefixes=CHECK-YES
+
+; CHECK-YES: constrained Loop
+; CHECK-NO-NOT: constrained Loop
+
+declare void @bar(i32)
+
+define i32 @foo(ptr %arr_a, ptr %a_len_ptr, i32 %n) {
+entry:
+  %len.a = load i32, ptr %a_len_ptr, !range !0
+  %first.itr.check = icmp sgt i32 %n, 0
+  br i1 %first.itr.check, label %loop, label %exit, !prof !1
+
+loop:
+  %idx = phi i32 [ 0, %entry ] , [ %idx.next, %backedge ]
+  %abc.a = icmp slt i32 %idx, %len.a
+  br i1 %abc.a, label %in.bounds.a, label %backedge, !prof !2
+
+in.bounds.a:
+  %addr.a = getelementptr i32, ptr %arr_a, i32 %idx
+  %val = load i32, ptr %addr.a
+  call void @bar(i32 %val)
+  br label %backedge
+
+backedge:
+  %idx.next = add i32 %idx, 1
+  %next = icmp slt i32 %idx.next, %n
+  br i1 %next, label %loop, label %exit, !prof !3
+
+exit:
+  ret i32 0
+}
+
+!0 = !{i32 0, i32 2147483647}
+!1 = !{!"branch_weights", i32 1024, i32 1}
+!2 = !{!"branch_weights", i32 1, i32 1}
+!3 = !{!"branch_weights", i32 99, i32 1}



More information about the llvm-commits mailing list