[llvm] 3879997 - [IRCE] Do not transform if loop has small number of iterations

Mon Oct 19 20:42:30 PDT 2020

Author: Serguei Katkov
Date: 2020-10-20T10:33:59+07:00
New Revision: 38799975ceb21da0e3d2ce70c3f23ba69adf0269

URL: https://github.com/llvm/llvm-project/commit/38799975ceb21da0e3d2ce70c3f23ba69adf0269
DIFF: https://github.com/llvm/llvm-project/commit/38799975ceb21da0e3d2ce70c3f23ba69adf0269.diff

LOG: [IRCE] Do not transform if loop has small number of iterations

IRCE has some overhead for runtime checks and in case number of iteration is small
the overhead can kill the benefit from optimizations.

This CL bases on BlockFrequencyInfo of pre-header and header to estimate the
number of loop iterations. If it is less than irce-min-estimated-iters we do not transform the loop.

Probably it is better to make more complex cost model but for simplicity it seems the be enough.

The usage of BFI is added only for new pass manager and tries to use it efficiently.

Reviewers: ebrevnov, dantrushin, asbirlea, mkazantsev
Reviewed By: mkazantsev
Subscribers: llvm-commits, fhahn
Differential Revision: https://reviews.llvm.org/D89541

Added: 
    llvm/test/Transforms/IRCE/low-iterations.ll

Modified: 
    llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp b/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp
index 30e4822b6769..be7527b3efd1 100644

--- a/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp
@@ -52,6 +52,7 @@
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/Twine.h"
+#include "llvm/Analysis/BlockFrequencyInfo.h"
 #include "llvm/Analysis/BranchProbabilityInfo.h"
 #include "llvm/Analysis/LoopAnalysisManager.h"
 #include "llvm/Analysis/LoopInfo.h"
@@ -115,6 +116,9 @@ static cl::opt<int> MaxExitProbReciprocal("irce-max-exit-prob-reciprocal",
 static cl::opt<bool> SkipProfitabilityChecks("irce-skip-profitability-checks",
                                              cl::Hidden, cl::init(false));
 
+static cl::opt<unsigned> MinRuntimeIterations("min-runtime-iterations",
+                                              cl::Hidden, cl::init(3));
+
 static cl::opt<bool> AllowUnsignedLatchCondition("irce-allow-unsigned-latch",
                                                  cl::Hidden, cl::init(true));
 
@@ -235,11 +239,15 @@ class InductiveRangeCheckElimination {
   DominatorTree &DT;
   LoopInfo &LI;
 
+  using GetBFIFunc =
+      llvm::Optional<llvm::function_ref<llvm::BlockFrequencyInfo &()> >;
+  GetBFIFunc GetBFI;
+
 public:
   InductiveRangeCheckElimination(ScalarEvolution &SE,
                                  BranchProbabilityInfo *BPI, DominatorTree &DT,
-                                 LoopInfo &LI)
-      : SE(SE), BPI(BPI), DT(DT), LI(LI) {}
+                                 LoopInfo &LI, GetBFIFunc GetBFI = None)
+      : SE(SE), BPI(BPI), DT(DT), LI(LI), GetBFI(GetBFI) {}
 
   bool run(Loop *L, function_ref<void(Loop *, bool)> LPMAddNewLoop);
 };
@@ -1772,14 +1780,25 @@ PreservedAnalyses IRCEPass::run(Function &F, FunctionAnalysisManager &AM) {
   auto &BPI = AM.getResult<BranchProbabilityAnalysis>(F);
   LoopInfo &LI = AM.getResult<LoopAnalysis>(F);
 
-  InductiveRangeCheckElimination IRCE(SE, &BPI, DT, LI);
+  // Get BFI analysis result on demand. Please note that modification of
+  // CFG invalidates this analysis and we should handle it.
+  auto getBFI = [&F, &AM ]()->BlockFrequencyInfo & {
+    return AM.getResult<BlockFrequencyAnalysis>(F);
+  };
+  InductiveRangeCheckElimination IRCE(SE, &BPI, DT, LI, { getBFI });
 
   bool Changed = false;
+  {
+    bool CFGChanged = false;
+    for (const auto &L : LI) {
+      CFGChanged |= simplifyLoop(L, &DT, &LI, &SE, nullptr, nullptr,
+                                 /*PreserveLCSSA=*/false);
+      Changed |= formLCSSARecursively(*L, DT, &LI, &SE);
+    }
+    Changed |= CFGChanged;
 
-  for (const auto &L : LI) {
-    Changed |= simplifyLoop(L, &DT, &LI, &SE, nullptr, nullptr,
-                            /*PreserveLCSSA=*/false);
-    Changed |= formLCSSARecursively(*L, DT, &LI, &SE);
+    if (CFGChanged && !SkipProfitabilityChecks)
+      AM.invalidate<BlockFrequencyAnalysis>(F);
   }
 
   SmallPriorityWorklist<Loop *, 4> Worklist;
@@ -1791,7 +1810,11 @@ PreservedAnalyses IRCEPass::run(Function &F, FunctionAnalysisManager &AM) {
 
   while (!Worklist.empty()) {
     Loop *L = Worklist.pop_back_val();
-    Changed |= IRCE.run(L, LPMAddNewLoop);
+    if (IRCE.run(L, LPMAddNewLoop)) {
+      Changed = true;
+      if (!SkipProfitabilityChecks)
+        AM.invalidate<BlockFrequencyAnalysis>(F);
+    }
   }
 
   if (!Changed)
@@ -1878,6 +1901,18 @@ bool InductiveRangeCheckElimination::run(
     return false;
   }
   LoopStructure LS = MaybeLoopStructure.getValue();
+  // Profitability check.
+  if (!SkipProfitabilityChecks && GetBFI.hasValue()) {
+    BlockFrequencyInfo &BFI = (*GetBFI)();
+    uint64_t hFreq = BFI.getBlockFreq(LS.Header).getFrequency();
+    uint64_t phFreq = BFI.getBlockFreq(Preheader).getFrequency();
+    if (phFreq != 0 && hFreq != 0 && (hFreq / phFreq < MinRuntimeIterations)) {
+      LLVM_DEBUG(dbgs() << "irce: could not prove profitability: "
+                        << "the estimated number of iterations basing on "
+                           "frequency info is " << (hFreq / phFreq) << "\n";);
+      return false;
+    }
+  }
   const SCEVAddRecExpr *IndVar =
       cast<SCEVAddRecExpr>(SE.getMinusSCEV(SE.getSCEV(LS.IndVarBase), SE.getSCEV(LS.IndVarStep)));
 

diff  --git a/llvm/test/Transforms/IRCE/low-iterations.ll b/llvm/test/Transforms/IRCE/low-iterations.ll
new file mode 100644
index 000000000000..4d174e9fcbc6
--- /dev/null
+++ b/llvm/test/Transforms/IRCE/low-iterations.ll
@@ -0,0 +1,43 @@
+; RUN: opt -verify-loop-info -irce-print-changed-loops -passes=irce -min-runtime-iterations=3 < %s 2>&1 | FileCheck %s --check-prefixes=CHECK-NO
+; RUN: opt -verify-loop-info -irce-print-changed-loops -passes=irce -min-runtime-iterations=0 < %s 2>&1 | FileCheck %s --check-prefixes=CHECK-YES
+
+; CHECK-YES: constrained Loop
+; CHECK-NO-NOT: constrained Loop
+
+define i32 @multiple_access_no_preloop(
+  i32* %arr_a, i32* %a_len_ptr, i32* %arr_b, i32* %b_len_ptr, i32 %n) {
+
+  entry:
+  %len.a = load i32, i32* %a_len_ptr, !range !0
+  %first.itr.check = icmp sgt i32 %n, 0
+  br i1 %first.itr.check, label %loop, label %exit, !prof !1
+
+  loop:
+  %idx = phi i32 [ 0, %entry ] , [ %idx.next, %backedge ]
+  %idx.next = add i32 %idx, 1
+  %abc.a = icmp slt i32 %idx, %len.a
+  br i1 %abc.a, label %in.bounds.a, label %exit, !prof !2
+
+  in.bounds.a:
+  %addr.a = getelementptr i32, i32* %arr_a, i32 %idx
+  %val = load i32, i32* %addr.a
+  %cond = icmp ne i32 %val, 0
+; Most probable exit from a loop.
+  br i1 %cond, label %found, label %backedge, !prof !3
+
+  backedge:
+  %next = icmp slt i32 %idx.next, %n
+  br i1 %next, label %loop, label %exit, !prof !4
+
+  found:
+  ret i32 %val
+
+  exit:
+  ret i32 0
+}
+
+!0 = !{i32 0, i32 2147483647}
+!1 = !{!"branch_weights", i32 1024, i32 1}
+!2 = !{!"branch_weights", i32 512, i32 1}
+!3 = !{!"branch_weights", i32 1, i32 2}
+!4 = !{!"branch_weights", i32 512, i32 1}