[llvm] [IRCE] Relax profitability check (PR #104659)

Jan Ječmen via llvm-commits llvm-commits at lists.llvm.org
Thu Sep 19 05:16:58 PDT 2024


https://github.com/JanJecmen updated https://github.com/llvm/llvm-project/pull/104659

>From f16553c7521137ebefebf8512484ed5782ac9640 Mon Sep 17 00:00:00 2001
From: Jan Jecmen <jjecmen at azul.com>
Date: Thu, 19 Sep 2024 10:04:20 +0000
Subject: [PATCH] [IRCE] Relax profitability check

IRCE currently has two profitability checks:
  1. min number of iterations (10 by default)
  2. branch is highly biased (> 15/16)
However, it may still be profitable to eliminate range checks even
if the branch isn't as biased. Consider, for example, a loop with
100 iterations, where IRCE currently eliminates all 100 range checks.
The same range checks, if performed in a loop with 200 iterations,
are not eliminated because their branch is now only 1:1.

This patch proposes to relax the profitability checks of IRCE.
Namely, instead of the two checks currenly in place, consider IRCE
profitable if the branch probability scaled by the expected number
of iterations (i.e., the estimated number of eliminated checks)
is over a threshold. This covers the minimum number of iterations check
(there are at least as many iterations as eliminated range checks),
and changes the bias check from a percent of iterations to at least
a constant threshold of eliminated checks.
The effect is shown in the new test `profitability.ll`. The loop has
100 iterations (the backedge is taken 99:1). The range
check's branch weights are 1:1, so current IRCE wouldn't even
consider this a range check. However, with the new implementaion,
setting the minimum eliminated checks as high as 50, the
transformation is still applied.

If the number of iterations can't be estimated, the check falls back
to the current 15/16 likelihood check.
---
 .../Scalar/InductiveRangeCheckElimination.cpp | 105 ++++++++++--------
 llvm/test/Transforms/IRCE/low-iterations.ll   |   4 +-
 llvm/test/Transforms/IRCE/profitability.ll    |  38 +++++++
 3 files changed, 101 insertions(+), 46 deletions(-)
 create mode 100644 llvm/test/Transforms/IRCE/profitability.ll

diff --git a/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp b/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp
index 104e8ceb796700..a61daaf07f7bc8 100644
--- a/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp
@@ -107,8 +107,8 @@ static cl::opt<bool> PrintRangeChecks("irce-print-range-checks", cl::Hidden,
 static cl::opt<bool> SkipProfitabilityChecks("irce-skip-profitability-checks",
                                              cl::Hidden, cl::init(false));
 
-static cl::opt<unsigned> MinRuntimeIterations("irce-min-runtime-iterations",
-                                              cl::Hidden, cl::init(10));
+static cl::opt<unsigned> MinEliminatedChecks("irce-min-eliminated-checks",
+                                             cl::Hidden, cl::init(10));
 
 static cl::opt<bool> AllowUnsignedLatchCondition("irce-allow-unsigned-latch",
                                                  cl::Hidden, cl::init(true));
@@ -132,15 +132,9 @@ static cl::opt<bool>
 
 namespace {
 
-/// An inductive range check is conditional branch in a loop with
-///
-///  1. a very cold successor (i.e. the branch jumps to that successor very
-///     rarely)
-///
-///  and
-///
-///  2. a condition that is provably true for some contiguous range of values
-///     taken by the containing loop's induction variable.
+/// An inductive range check is conditional branch in a loop with a condition
+/// that is provably true for some contiguous range of values taken by the
+/// containing loop's induction variable.
 ///
 class InductiveRangeCheck {
 
@@ -235,6 +229,7 @@ class InductiveRangeCheck {
   /// checks, and hence don't end up in \p Checks.
   static void extractRangeChecksFromBranch(
       BranchInst *BI, Loop *L, ScalarEvolution &SE, BranchProbabilityInfo *BPI,
+      std::optional<uint64_t> EstimatedTripCount,
       SmallVectorImpl<InductiveRangeCheck> &Checks, bool &Changed);
 };
 
@@ -248,9 +243,10 @@ class InductiveRangeCheckElimination {
       std::optional<llvm::function_ref<llvm::BlockFrequencyInfo &()>>;
   GetBFIFunc GetBFI;
 
-  // Returns true if it is profitable to do a transform basing on estimation of
-  // number of iterations.
-  bool isProfitableToTransform(const Loop &L, LoopStructure &LS);
+  // Returns the estimated number of iterations based on block frequency info if
+  // available, or on branch probability info. Nullopt is returned if the number
+  // of iterations cannot be estimated.
+  std::optional<uint64_t> estimatedTripCount(const Loop &L);
 
 public:
   InductiveRangeCheckElimination(ScalarEvolution &SE,
@@ -524,6 +520,7 @@ void InductiveRangeCheck::extractRangeChecksFromCond(
 
 void InductiveRangeCheck::extractRangeChecksFromBranch(
     BranchInst *BI, Loop *L, ScalarEvolution &SE, BranchProbabilityInfo *BPI,
+    std::optional<uint64_t> EstimatedTripCount,
     SmallVectorImpl<InductiveRangeCheck> &Checks, bool &Changed) {
   if (BI->isUnconditional() || BI->getParent() == L->getLoopLatch())
     return;
@@ -531,11 +528,32 @@ void InductiveRangeCheck::extractRangeChecksFromBranch(
   unsigned IndexLoopSucc = L->contains(BI->getSuccessor(0)) ? 0 : 1;
   assert(L->contains(BI->getSuccessor(IndexLoopSucc)) &&
          "No edges coming to loop?");
-  BranchProbability LikelyTaken(15, 16);
 
-  if (!SkipProfitabilityChecks && BPI &&
-      BPI->getEdgeProbability(BI->getParent(), IndexLoopSucc) < LikelyTaken)
-    return;
+  if (!SkipProfitabilityChecks && BPI) {
+    auto SuccessProbability =
+        BPI->getEdgeProbability(BI->getParent(), IndexLoopSucc);
+    if (EstimatedTripCount) {
+      auto EstimatedEliminatedChecks =
+          SuccessProbability.scale(*EstimatedTripCount);
+      if (EstimatedEliminatedChecks < MinEliminatedChecks) {
+        LLVM_DEBUG(dbgs() << "irce: could not prove profitability for branch "
+                          << *BI << ": "
+                          << "estimated eliminated checks too low "
+                          << EstimatedEliminatedChecks << "\n";);
+        return;
+      }
+    } else {
+      BranchProbability LikelyTaken(15, 16);
+      if (SuccessProbability < LikelyTaken) {
+        LLVM_DEBUG(dbgs() << "irce: could not prove profitability for branch "
+                          << *BI << ": "
+                          << "could not estimate trip count "
+                          << "and branch success probability too low "
+                          << SuccessProbability << "\n";);
+        return;
+      }
+    }
+  }
 
   // IRCE expects branch's true edge comes to loop. Invert branch for opposite
   // case.
@@ -940,35 +958,35 @@ PreservedAnalyses IRCEPass::run(Function &F, FunctionAnalysisManager &AM) {
   return getLoopPassPreservedAnalyses();
 }
 
-bool
-InductiveRangeCheckElimination::isProfitableToTransform(const Loop &L,
-                                                        LoopStructure &LS) {
-  if (SkipProfitabilityChecks)
-    return true;
+std::optional<uint64_t>
+InductiveRangeCheckElimination::estimatedTripCount(const Loop &L) {
   if (GetBFI) {
     BlockFrequencyInfo &BFI = (*GetBFI)();
-    uint64_t hFreq = BFI.getBlockFreq(LS.Header).getFrequency();
+    uint64_t hFreq = BFI.getBlockFreq(L.getHeader()).getFrequency();
     uint64_t phFreq = BFI.getBlockFreq(L.getLoopPreheader()).getFrequency();
-    if (phFreq != 0 && hFreq != 0 && (hFreq / phFreq < MinRuntimeIterations)) {
-      LLVM_DEBUG(dbgs() << "irce: could not prove profitability: "
-                        << "the estimated number of iterations basing on "
-                           "frequency info is " << (hFreq / phFreq) << "\n";);
-      return false;
-    }
-    return true;
+    if (phFreq == 0 || hFreq == 0)
+      return std::nullopt;
+    return {hFreq / phFreq};
   }
 
   if (!BPI)
-    return true;
+    return std::nullopt;
+
+  auto *Latch = L.getLoopLatch();
+  if (!Latch)
+    return std::nullopt;
+
+  auto *LatchBr = dyn_cast<BranchInst>(Latch->getTerminator());
+  if (!LatchBr)
+    return std::nullopt;
+
+  auto LatchBrExitIdx = LatchBr->getSuccessor(0) == L.getHeader() ? 1 : 0;
   BranchProbability ExitProbability =
-      BPI->getEdgeProbability(LS.Latch, LS.LatchBrExitIdx);
-  if (ExitProbability > BranchProbability(1, MinRuntimeIterations)) {
-    LLVM_DEBUG(dbgs() << "irce: could not prove profitability: "
-                      << "the exit probability is too big " << ExitProbability
-                      << "\n";);
-    return false;
-  }
-  return true;
+      BPI->getEdgeProbability(Latch, LatchBrExitIdx);
+  if (ExitProbability.isUnknown() || ExitProbability.isZero())
+    return std::nullopt;
+
+  return {ExitProbability.scaleByInverse(1)};
 }
 
 bool InductiveRangeCheckElimination::run(
@@ -988,10 +1006,11 @@ bool InductiveRangeCheckElimination::run(
   SmallVector<InductiveRangeCheck, 16> RangeChecks;
   bool Changed = false;
 
+  auto EstimatedTripCount = estimatedTripCount(*L);
   for (auto *BBI : L->getBlocks())
     if (BranchInst *TBI = dyn_cast<BranchInst>(BBI->getTerminator()))
-      InductiveRangeCheck::extractRangeChecksFromBranch(TBI, L, SE, BPI,
-                                                        RangeChecks, Changed);
+      InductiveRangeCheck::extractRangeChecksFromBranch(
+          TBI, L, SE, BPI, EstimatedTripCount, RangeChecks, Changed);
 
   if (RangeChecks.empty())
     return Changed;
@@ -1019,8 +1038,6 @@ bool InductiveRangeCheckElimination::run(
     return Changed;
   }
   LoopStructure LS = *MaybeLoopStructure;
-  if (!isProfitableToTransform(*L, LS))
-    return Changed;
   const SCEVAddRecExpr *IndVar =
       cast<SCEVAddRecExpr>(SE.getMinusSCEV(SE.getSCEV(LS.IndVarBase), SE.getSCEV(LS.IndVarStep)));
 
diff --git a/llvm/test/Transforms/IRCE/low-iterations.ll b/llvm/test/Transforms/IRCE/low-iterations.ll
index 071ab4d0156852..e044c455fe6e2b 100644
--- a/llvm/test/Transforms/IRCE/low-iterations.ll
+++ b/llvm/test/Transforms/IRCE/low-iterations.ll
@@ -1,5 +1,5 @@
-; RUN: opt -verify-loop-info -irce-print-changed-loops -passes=irce -irce-min-runtime-iterations=3 < %s 2>&1 | FileCheck %s --check-prefixes=CHECK-NO
-; RUN: opt -verify-loop-info -irce-print-changed-loops -passes=irce -irce-min-runtime-iterations=0 < %s 2>&1 | FileCheck %s --check-prefixes=CHECK-YES
+; RUN: opt -verify-loop-info -irce-print-changed-loops -passes=irce -irce-min-eliminated-checks=3 < %s 2>&1 | FileCheck %s --check-prefixes=CHECK-NO
+; RUN: opt -verify-loop-info -irce-print-changed-loops -passes=irce -irce-min-eliminated-checks=0 < %s 2>&1 | FileCheck %s --check-prefixes=CHECK-YES
 
 ; CHECK-YES: constrained Loop
 ; CHECK-NO-NOT: constrained Loop
diff --git a/llvm/test/Transforms/IRCE/profitability.ll b/llvm/test/Transforms/IRCE/profitability.ll
new file mode 100644
index 00000000000000..04cea2cfce2fdc
--- /dev/null
+++ b/llvm/test/Transforms/IRCE/profitability.ll
@@ -0,0 +1,38 @@
+; RUN: opt -S -verify-loop-info -irce-print-changed-loops -passes=irce -irce-min-eliminated-checks=51 < %s 2>&1 | FileCheck %s --check-prefixes=CHECK-NO
+; RUN: opt -S -verify-loop-info -irce-print-changed-loops -passes=irce -irce-min-eliminated-checks=50 < %s 2>&1 | FileCheck %s --check-prefixes=CHECK-YES
+
+; CHECK-YES: constrained Loop
+; CHECK-NO-NOT: constrained Loop
+
+declare void @bar(i32)
+
+define i32 @foo(ptr %arr_a, ptr %a_len_ptr, i32 %n) {
+entry:
+  %len.a = load i32, ptr %a_len_ptr, !range !0
+  %first.itr.check = icmp sgt i32 %n, 0
+  br i1 %first.itr.check, label %loop, label %exit, !prof !1
+
+loop:
+  %idx = phi i32 [ 0, %entry ] , [ %idx.next, %backedge ]
+  %abc.a = icmp slt i32 %idx, %len.a
+  br i1 %abc.a, label %in.bounds.a, label %backedge, !prof !2
+
+in.bounds.a:
+  %addr.a = getelementptr i32, ptr %arr_a, i32 %idx
+  %val = load i32, ptr %addr.a
+  call void @bar(i32 %val)
+  br label %backedge
+
+backedge:
+  %idx.next = add i32 %idx, 1
+  %next = icmp slt i32 %idx.next, %n
+  br i1 %next, label %loop, label %exit, !prof !3
+
+exit:
+  ret i32 0
+}
+
+!0 = !{i32 0, i32 2147483647}
+!1 = !{!"branch_weights", i32 1024, i32 1}
+!2 = !{!"branch_weights", i32 1, i32 1}
+!3 = !{!"branch_weights", i32 99, i32 1}



More information about the llvm-commits mailing list