[llvm] fc539b0 - [SCEV] Infer ranges for SCC consisting of cycled Phis
    Max Kazantsev via llvm-commits 
    llvm-commits at lists.llvm.org
       
    Thu Feb 17 03:04:01 PST 2022
    
    
  
Author: Max Kazantsev
Date: 2022-02-17T18:03:52+07:00
New Revision: fc539b0004d4fe8072aca00e38599a2300a955ce
URL: https://github.com/llvm/llvm-project/commit/fc539b0004d4fe8072aca00e38599a2300a955ce
DIFF: https://github.com/llvm/llvm-project/commit/fc539b0004d4fe8072aca00e38599a2300a955ce.diff
LOG: [SCEV] Infer ranges for SCC consisting of cycled Phis
Our current strategy of computing ranges of SCEVUnknown Phis was to simply
compute the union of ranges of all its inputs. In order to avoid infinite recursion,
we mark Phis as pending and conservatively return full set for them. As result,
even simplest patterns of cycled phis always have a range of full set.
This patch makes this logic a bit smarter. We basically do the same, but instead
of taking inputs of single Phi we find its strongly connected component (SCC)
and compute the union of all inputs that come into this SCC from outside.
Processing entire SCC together has one more advantage: we can set range for all
of them at once, because the only thing that happens to them is the same value is
being passed between those Phis. So, despite we spend more time analyzing a
single Phi, overall we may save time by not processing other SCC members, so
amortized compile time spent should be approximately the same.
Differential Revision: https://reviews.llvm.org/D110620
Reviewed By: reames
Added: 
    
Modified: 
    llvm/include/llvm/Analysis/ScalarEvolution.h
    llvm/lib/Analysis/ScalarEvolution.cpp
    llvm/test/Analysis/ScalarEvolution/cycled_phis.ll
    llvm/test/Analysis/ScalarEvolution/unknown_phis.ll
Removed: 
    
################################################################################
diff  --git a/llvm/include/llvm/Analysis/ScalarEvolution.h b/llvm/include/llvm/Analysis/ScalarEvolution.h
index c224346442f1f..768925433bed5 100644
--- a/llvm/include/llvm/Analysis/ScalarEvolution.h
+++ b/llvm/include/llvm/Analysis/ScalarEvolution.h
@@ -1588,6 +1588,19 @@ class ScalarEvolution {
   /// SCEVUnknowns and thus don't use this mechanism.
   ConstantRange getRangeForUnknownRecurrence(const SCEVUnknown *U);
 
+  /// Return true and fill \p SCC with elements of PNINode-composed strongly
+  /// connected component that contains \p Phi. Here SCC is a maximum by
+  /// inclusion subgraph composed of Phis that transitively use one another as
+  /// inputs. Otherwise, return false and conservatively put \p Phi into \p SCC
+  /// as the only element of its strongly connected component.
+  bool collectSCC(const PHINode *Phi,
+                  SmallVectorImpl<const PHINode *> &SCC) const;
+
+  /// Sharpen range of entire SCEVUnknown Phi strongly connected component that
+  /// includes \p Phi. On output, \p ConservativeResult is the sharpened range.
+  void sharpenPhiSCCRange(const PHINode *Phi, ConstantRange &ConservativeResult,
+                          ScalarEvolution::RangeSignHint SignHint);
+
   /// We know that there is no SCEV for the specified value.  Analyze the
   /// expression.
   const SCEV *createSCEV(Value *V);
diff  --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp
index e1b2b12a4df5b..613379a54a3d4 100644
--- a/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -145,6 +145,8 @@ STATISTIC(NumTripCountsNotComputed,
           "Number of loops without predictable loop counts");
 STATISTIC(NumBruteForceTripCountsComputed,
           "Number of loops with trip counts computed by force");
+STATISTIC(NumFoundPhiSCCs,
+          "Number of found Phi-composed strongly connected components");
 
 static cl::opt<unsigned>
 MaxBruteForceIterations("scalar-evolution-max-iterations", cl::ReallyHidden,
@@ -231,6 +233,12 @@ static cl::opt<bool> UseExpensiveRangeSharpening(
     cl::desc("Use more powerful methods of sharpening expression ranges. May "
              "be costly in terms of compile time"));
 
+static cl::opt<unsigned> MaxPhiSCCAnalysisSize(
+    "scalar-evolution-max-scc-analysis-depth", cl::Hidden,
+    cl::desc("Maximum amount of nodes to process while searching SCEVUnknown "
+             "Phi strongly connected components"),
+    cl::init(8));
+
 //===----------------------------------------------------------------------===//
 //                           SCEV class definitions
 //===----------------------------------------------------------------------===//
@@ -6566,29 +6574,130 @@ ScalarEvolution::getRangeRef(const SCEV *S,
           RangeType);
 
     // A range of Phi is a subset of union of all ranges of its input.
-    if (const PHINode *Phi = dyn_cast<PHINode>(U->getValue())) {
-      // Make sure that we do not run over cycled Phis.
-      if (PendingPhiRanges.insert(Phi).second) {
-        ConstantRange RangeFromOps(BitWidth, /*isFullSet=*/false);
-        for (auto &Op : Phi->operands()) {
-          auto OpRange = getRangeRef(getSCEV(Op), SignHint);
-          RangeFromOps = RangeFromOps.unionWith(OpRange);
-          // No point to continue if we already have a full set.
-          if (RangeFromOps.isFullSet())
-            break;
+    if (const PHINode *Phi = dyn_cast<PHINode>(U->getValue()))
+      if (!PendingPhiRanges.count(Phi))
+        sharpenPhiSCCRange(Phi, ConservativeResult, SignHint);
+
+    return setRange(U, SignHint, std::move(ConservativeResult));
+  }
+
+  return setRange(S, SignHint, std::move(ConservativeResult));
+}
+
+bool ScalarEvolution::collectSCC(const PHINode *Phi,
+                                 SmallVectorImpl<const PHINode *> &SCC) const {
+  assert(SCC.empty() && "Precondition: SCC should be empty.");
+  auto Bail = [&]() {
+    SCC.clear();
+    SCC.push_back(Phi);
+    return false;
+  };
+  SmallPtrSet<const PHINode *, 4> Reachable;
+  {
+    // First, find all PHI nodes that are reachable from Phi.
+    SmallVector<const PHINode *, 4> Worklist;
+    Reachable.insert(Phi);
+    Worklist.push_back(Phi);
+    while (!Worklist.empty()) {
+      if (Reachable.size() > MaxPhiSCCAnalysisSize)
+        // Too many nodes to process. Assume that SCC is composed of Phi alone.
+        return Bail();
+      auto *Curr = Worklist.pop_back_val();
+      for (auto &Op : Curr->operands()) {
+        if (auto *PhiOp = dyn_cast<PHINode>(&*Op)) {
+          if (PendingPhiRanges.count(PhiOp))
+            // Do not want to deal with this situation, so conservatively bail.
+            return Bail();
+          if (Reachable.insert(PhiOp).second)
+            Worklist.push_back(PhiOp);
         }
-        ConservativeResult =
-            ConservativeResult.intersectWith(RangeFromOps, RangeType);
-        bool Erased = PendingPhiRanges.erase(Phi);
-        assert(Erased && "Failed to erase Phi properly?");
-        (void) Erased;
       }
     }
+  }
+  {
+    // Out of reachable nodes, find those from which Phi is also reachable. This
+    // defines a SCC.
+    SmallVector<const PHINode *, 4> Worklist;
+    SmallPtrSet<const PHINode *, 4> SCCSet;
+    SCCSet.insert(Phi);
+    SCC.push_back(Phi);
+    Worklist.push_back(Phi);
+    while (!Worklist.empty()) {
+      auto *Curr = Worklist.pop_back_val();
+      for (auto *User : Curr->users())
+        if (auto *PN = dyn_cast<PHINode>(User))
+          if (Reachable.count(PN) && SCCSet.insert(PN).second) {
+            Worklist.push_back(PN);
+            SCC.push_back(PN);
+          }
+    }
+  }
+  return true;
+}
 
-    return setRange(U, SignHint, std::move(ConservativeResult));
+void
+ScalarEvolution::sharpenPhiSCCRange(const PHINode *Phi,
+                                    ConstantRange &ConservativeResult,
+                                    ScalarEvolution::RangeSignHint SignHint) {
+  // Collect strongly connected component (further on - SCC ) composed of Phis.
+  // Analyze all values that are incoming to this SCC (we call them roots).
+  // All SCC elements have range that is not wider than union of ranges of
+  // roots.
+  SmallVector<const PHINode *, 8> SCC;
+  if (collectSCC(Phi, SCC))
+    ++NumFoundPhiSCCs;
+
+  // Collect roots: inputs of SCC nodes that come from outside of SCC.
+  SmallPtrSet<Value *, 4> Roots;
+  const SmallPtrSet<const PHINode *, 8> SCCSet(SCC.begin(), SCC.end());
+  for (auto *PN : SCC)
+    for (auto &Op : PN->operands()) {
+      auto *PhiInput = dyn_cast<PHINode>(Op);
+      if (!PhiInput || !SCCSet.count(PhiInput))
+        Roots.insert(Op);
+    }
+
+  // Mark SCC elements as pending to avoid infinite recursion if there is a
+  // cyclic dependency through some instruction that is not a PHI.
+  for (auto *PN : SCC) {
+    bool Inserted = PendingPhiRanges.insert(PN).second;
+    assert(Inserted && "PHI is already pending?");
+    (void)Inserted;
+  }
+
+  auto BitWidth = ConservativeResult.getBitWidth();
+  ConstantRange RangeFromRoots(BitWidth, /*isFullSet=*/false);
+  for (auto *Root : Roots) {
+    auto OpRange = getRangeRef(getSCEV(Root), SignHint);
+    RangeFromRoots = RangeFromRoots.unionWith(OpRange);
+    // No point to continue if we already have a full set.
+    if (RangeFromRoots.isFullSet())
+      break;
   }
+  ConstantRange::PreferredRangeType RangeType =
+      SignHint == ScalarEvolution::HINT_RANGE_UNSIGNED ? ConstantRange::Unsigned
+                                                       : ConstantRange::Signed;
+  ConservativeResult =
+      ConservativeResult.intersectWith(RangeFromRoots, RangeType);
 
-  return setRange(S, SignHint, std::move(ConservativeResult));
+  DenseMap<const SCEV *, ConstantRange> &Cache =
+      SignHint == ScalarEvolution::HINT_RANGE_UNSIGNED ? UnsignedRanges
+                                                       : SignedRanges;
+  // Entire SCC has the same range.
+  for (auto *PN : SCC) {
+    bool Erased = PendingPhiRanges.erase(PN);
+    assert(Erased && "Failed to erase Phi properly?");
+    (void)Erased;
+    auto *PNSCEV = getSCEV(const_cast<PHINode *>(PN));
+    auto I = Cache.find(PNSCEV);
+    if (I == Cache.end())
+      setRange(PNSCEV, SignHint, ConservativeResult);
+    else {
+      auto SharpenedRange =
+          I->second.intersectWith(ConservativeResult, RangeType);
+      setRange(PNSCEV, SignHint, SharpenedRange);
+    }
+  }
 }
 
 // Given a StartRange, Step and MaxBECount for an expression compute a range of
diff  --git a/llvm/test/Analysis/ScalarEvolution/cycled_phis.ll b/llvm/test/Analysis/ScalarEvolution/cycled_phis.ll
index 7183bb8c0a634..b8ac4a81d74ca 100644
--- a/llvm/test/Analysis/ScalarEvolution/cycled_phis.ll
+++ b/llvm/test/Analysis/ScalarEvolution/cycled_phis.ll
@@ -3,14 +3,13 @@
 
 declare i1 @cond()
 
-; FIXME: Range of phi_1 and phi_2 here can be sharpened to [10, 21).
 define void @test_01() {
 ; CHECK-LABEL: 'test_01'
 ; CHECK-NEXT:  Classifying expressions for: @test_01
 ; CHECK-NEXT:    %phi_1 = phi i32 [ 10, %entry ], [ %phi_2, %loop ]
-; CHECK-NEXT:    --> %phi_1 U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %loop: Variant }
+; CHECK-NEXT:    --> %phi_1 U: [10,21) S: [10,21) Exits: <<Unknown>> LoopDispositions: { %loop: Variant }
 ; CHECK-NEXT:    %phi_2 = phi i32 [ 20, %entry ], [ %phi_1, %loop ]
-; CHECK-NEXT:    --> %phi_2 U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %loop: Variant }
+; CHECK-NEXT:    --> %phi_2 U: [10,21) S: [10,21) Exits: <<Unknown>> LoopDispositions: { %loop: Variant }
 ; CHECK-NEXT:    %cond = call i1 @cond()
 ; CHECK-NEXT:    --> %cond U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %loop: Variant }
 ; CHECK-NEXT:  Determining loop execution counts for: @test_01
@@ -38,15 +37,15 @@ define void @test_02(i32* %p, i32* %q) {
 ; CHECK-NEXT:    %start = load i32, i32* %p, align 4, !range !0
 ; CHECK-NEXT:    --> %start U: [0,1000) S: [0,1000)
 ; CHECK-NEXT:    %outer_phi = phi i32 [ %start, %entry ], [ %inner_lcssa, %outer_backedge ]
-; CHECK-NEXT:    --> %outer_phi U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %outer_loop: Variant, %inner_loop: Invariant }
+; CHECK-NEXT:    --> %outer_phi U: [0,3000) S: [0,3000) Exits: <<Unknown>> LoopDispositions: { %outer_loop: Variant, %inner_loop: Invariant }
 ; CHECK-NEXT:    %inner_phi = phi i32 [ %outer_phi, %outer_loop ], [ %inner_load, %inner_loop ]
-; CHECK-NEXT:    --> %inner_phi U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %inner_loop: Variant, %outer_loop: Variant }
+; CHECK-NEXT:    --> %inner_phi U: [0,3000) S: [0,3000) Exits: <<Unknown>> LoopDispositions: { %inner_loop: Variant, %outer_loop: Variant }
 ; CHECK-NEXT:    %inner_load = load i32, i32* %q, align 4, !range !1
 ; CHECK-NEXT:    --> %inner_load U: [2000,3000) S: [2000,3000) Exits: <<Unknown>> LoopDispositions: { %inner_loop: Variant, %outer_loop: Variant }
 ; CHECK-NEXT:    %inner_cond = call i1 @cond()
 ; CHECK-NEXT:    --> %inner_cond U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %inner_loop: Variant, %outer_loop: Variant }
 ; CHECK-NEXT:    %inner_lcssa = phi i32 [ %inner_phi, %inner_loop ]
-; CHECK-NEXT:    --> %inner_lcssa U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %outer_loop: Variant, %inner_loop: Invariant }
+; CHECK-NEXT:    --> %inner_lcssa U: [0,3000) S: [0,3000) Exits: <<Unknown>> LoopDispositions: { %outer_loop: Variant, %inner_loop: Invariant }
 ; CHECK-NEXT:    %outer_cond = call i1 @cond()
 ; CHECK-NEXT:    --> %outer_cond U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %outer_loop: Variant, %inner_loop: Invariant }
 ; CHECK-NEXT:  Determining loop execution counts for: @test_02
@@ -80,7 +79,6 @@ exit:
   ret void
 }
 
-; FIXME: All phis should have range [0, 3000)
 define void @test_03(i32* %p, i32* %q) {
 ; CHECK-LABEL: 'test_03'
 ; CHECK-NEXT:  Classifying expressions for: @test_03
@@ -89,15 +87,15 @@ define void @test_03(i32* %p, i32* %q) {
 ; CHECK-NEXT:    %start_2 = load i32, i32* %q, align 4, !range !1
 ; CHECK-NEXT:    --> %start_2 U: [2000,3000) S: [2000,3000)
 ; CHECK-NEXT:    %outer_phi = phi i32 [ %start_1, %entry ], [ %inner_lcssa, %outer_backedge ]
-; CHECK-NEXT:    --> %outer_phi U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %outer_loop: Variant, %inner_loop: Invariant }
+; CHECK-NEXT:    --> %outer_phi U: [0,3000) S: [0,3000) Exits: <<Unknown>> LoopDispositions: { %outer_loop: Variant, %inner_loop: Invariant }
 ; CHECK-NEXT:    %inner_phi_1 = phi i32 [ %outer_phi, %outer_loop ], [ %inner_phi_2, %inner_loop ]
-; CHECK-NEXT:    --> %inner_phi_1 U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %inner_loop: Variant, %outer_loop: Variant }
+; CHECK-NEXT:    --> %inner_phi_1 U: [0,3000) S: [0,3000) Exits: <<Unknown>> LoopDispositions: { %inner_loop: Variant, %outer_loop: Variant }
 ; CHECK-NEXT:    %inner_phi_2 = phi i32 [ %start_2, %outer_loop ], [ %inner_phi_1, %inner_loop ]
-; CHECK-NEXT:    --> %inner_phi_2 U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %inner_loop: Variant, %outer_loop: Variant }
+; CHECK-NEXT:    --> %inner_phi_2 U: [0,3000) S: [0,3000) Exits: <<Unknown>> LoopDispositions: { %inner_loop: Variant, %outer_loop: Variant }
 ; CHECK-NEXT:    %inner_cond = call i1 @cond()
 ; CHECK-NEXT:    --> %inner_cond U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %inner_loop: Variant, %outer_loop: Variant }
 ; CHECK-NEXT:    %inner_lcssa = phi i32 [ %inner_phi_1, %inner_loop ]
-; CHECK-NEXT:    --> %inner_lcssa U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %outer_loop: Variant, %inner_loop: Invariant }
+; CHECK-NEXT:    --> %inner_lcssa U: [0,3000) S: [0,3000) Exits: <<Unknown>> LoopDispositions: { %outer_loop: Variant, %inner_loop: Invariant }
 ; CHECK-NEXT:    %outer_cond = call i1 @cond()
 ; CHECK-NEXT:    --> %outer_cond U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %outer_loop: Variant, %inner_loop: Invariant }
 ; CHECK-NEXT:  Determining loop execution counts for: @test_03
diff  --git a/llvm/test/Analysis/ScalarEvolution/unknown_phis.ll b/llvm/test/Analysis/ScalarEvolution/unknown_phis.ll
index 3d2c774507dbc..84428d36d52d6 100644
--- a/llvm/test/Analysis/ScalarEvolution/unknown_phis.ll
+++ b/llvm/test/Analysis/ScalarEvolution/unknown_phis.ll
@@ -30,8 +30,6 @@ merge:
 }
 
 define void @merge_values_with_ranges_looped(i32 *%a_len_ptr, i32 *%b_len_ptr) {
-; TODO: We could be much smarter here. So far we just make sure that we do not
-;       go into infinite loop analyzing these Phis.
 ; CHECK-LABEL: 'merge_values_with_ranges_looped'
 ; CHECK-NEXT:  Classifying expressions for: @merge_values_with_ranges_looped
 ; CHECK-NEXT:    %len_a = load i32, i32* %a_len_ptr, align 4, !range !0
@@ -39,9 +37,9 @@ define void @merge_values_with_ranges_looped(i32 *%a_len_ptr, i32 *%b_len_ptr) {
 ; CHECK-NEXT:    %len_b = load i32, i32* %b_len_ptr, align 4, !range !0
 ; CHECK-NEXT:    --> %len_b U: [0,2147483647) S: [0,2147483647)
 ; CHECK-NEXT:    %p1 = phi i32 [ %len_a, %entry ], [ %p2, %loop ]
-; CHECK-NEXT:    --> %p1 U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %loop: Variant }
+; CHECK-NEXT:    --> %p1 U: [0,2147483647) S: [0,2147483647) Exits: <<Unknown>> LoopDispositions: { %loop: Variant }
 ; CHECK-NEXT:    %p2 = phi i32 [ %len_b, %entry ], [ %p1, %loop ]
-; CHECK-NEXT:    --> %p2 U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %loop: Variant }
+; CHECK-NEXT:    --> %p2 U: [0,2147483647) S: [0,2147483647) Exits: <<Unknown>> LoopDispositions: { %loop: Variant }
 ; CHECK-NEXT:    %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
 ; CHECK-NEXT:    --> {0,+,1}<%loop> U: [0,100) S: [0,100) Exits: 99 LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:    %iv.next = add i32 %iv, 1
        
    
    
More information about the llvm-commits
mailing list