[llvm] 296671f - [LoopUnroll] Store more information in UnrollCostEstimator (NFCI)

Wed Sep 27 03:06:38 PDT 2023

Author: Nikita Popov
Date: 2023-09-27T12:06:13+02:00
New Revision: 296671f059198185bedc3eba80056ebf4bc7ad30

URL: https://github.com/llvm/llvm-project/commit/296671f059198185bedc3eba80056ebf4bc7ad30
DIFF: https://github.com/llvm/llvm-project/commit/296671f059198185bedc3eba80056ebf4bc7ad30.diff

LOG: [LoopUnroll] Store more information in UnrollCostEstimator (NFCI)

Instead of having ApproximateLoopSize() use a bunch of out parameters,
from which we later construct an UnrollCostEstimator, directly
construct UnrollCostEstimator which holds all the information
derived from loop analysis. This makes it easier to add additional
metrics in the future.

Added: 
    

Modified: 
    llvm/include/llvm/Transforms/Utils/UnrollLoop.h
    llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
    llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp
    llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/Transforms/Utils/UnrollLoop.h b/llvm/include/llvm/Transforms/Utils/UnrollLoop.h
index 4f3010965b591df..e8b03f81b348305 100644

--- a/llvm/include/llvm/Transforms/Utils/UnrollLoop.h
+++ b/llvm/include/llvm/Transforms/Utils/UnrollLoop.h
@@ -99,17 +99,6 @@ LoopUnrollResult UnrollAndJamLoop(Loop *L, unsigned Count, unsigned TripCount,
 bool isSafeToUnrollAndJam(Loop *L, ScalarEvolution &SE, DominatorTree &DT,
                           DependenceInfo &DI, LoopInfo &LI);
 
-bool computeUnrollCount(Loop *L, const TargetTransformInfo &TTI,
-                        DominatorTree &DT, LoopInfo *LI, AssumptionCache *AC,
-                        ScalarEvolution &SE,
-                        const SmallPtrSetImpl<const Value *> &EphValues,
-                        OptimizationRemarkEmitter *ORE, unsigned TripCount,
-                        unsigned MaxTripCount, bool MaxOrZero,
-                        unsigned TripMultiple, unsigned LoopSize,
-                        TargetTransformInfo::UnrollingPreferences &UP,
-                        TargetTransformInfo::PeelingPreferences &PP,
-                        bool &UseUpperBound);
-
 void simplifyLoopAfterUnroll(Loop *L, bool SimplifyIVs, LoopInfo *LI,
                              ScalarEvolution *SE, DominatorTree *DT,
                              AssumptionCache *AC,
@@ -126,9 +115,44 @@ TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences(
     std::optional<bool> UserUpperBound,
     std::optional<unsigned> UserFullUnrollMaxCount);
 
-InstructionCost ApproximateLoopSize(const Loop *L, unsigned &NumCalls,
-    bool &NotDuplicatable, bool &Convergent, const TargetTransformInfo &TTI,
-    const SmallPtrSetImpl<const Value *> &EphValues, unsigned BEInsns);
+/// Produce an estimate of the unrolled cost of the specified loop.  This
+/// is used to a) produce a cost estimate for partial unrolling and b) to
+/// cheaply estimate cost for full unrolling when we don't want to symbolically
+/// evaluate all iterations.
+class UnrollCostEstimator {
+  InstructionCost LoopSize;
+  bool NotDuplicatable;
+
+public:
+  unsigned NumInlineCandidates;
+  bool Convergent;
+
+  UnrollCostEstimator(const Loop *L, const TargetTransformInfo &TTI,
+                      const SmallPtrSetImpl<const Value *> &EphValues,
+                      unsigned BEInsns);
+
+  /// Whether it is legal to unroll this loop.
+  bool canUnroll() const { return LoopSize.isValid() && !NotDuplicatable; }
+
+  uint64_t getRolledLoopSize() const { return *LoopSize.getValue(); }
+
+  /// Returns loop size estimation for unrolled loop, given the unrolling
+  /// configuration specified by UP.
+  uint64_t
+  getUnrolledLoopSize(const TargetTransformInfo::UnrollingPreferences &UP,
+                      unsigned CountOverwrite = 0) const;
+};
+
+bool computeUnrollCount(Loop *L, const TargetTransformInfo &TTI,
+                        DominatorTree &DT, LoopInfo *LI, AssumptionCache *AC,
+                        ScalarEvolution &SE,
+                        const SmallPtrSetImpl<const Value *> &EphValues,
+                        OptimizationRemarkEmitter *ORE, unsigned TripCount,
+                        unsigned MaxTripCount, bool MaxOrZero,
+                        unsigned TripMultiple, const UnrollCostEstimator &UCE,
+                        TargetTransformInfo::UnrollingPreferences &UP,
+                        TargetTransformInfo::PeelingPreferences &PP,
+                        bool &UseUpperBound);
 
 } // end namespace llvm
 

diff  --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index 8a5f4cb1b85ff9c..fc15c565ca890f3 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -3520,20 +3520,16 @@ static int32_t computeHeuristicUnrollFactor(CanonicalLoopInfo *CLI) {
     }
   }
 
-  unsigned NumInlineCandidates;
-  bool NotDuplicatable;
-  bool Convergent;
-  InstructionCost LoopSizeIC =
-      ApproximateLoopSize(L, NumInlineCandidates, NotDuplicatable, Convergent,
-                          TTI, EphValues, UP.BEInsns);
-  LLVM_DEBUG(dbgs() << "Estimated loop size is " << LoopSizeIC << "\n");
+  UnrollCostEstimator UCE(L, TTI, EphValues, UP.BEInsns);
 
   // Loop is not unrollable if the loop contains certain instructions.
-  if (NotDuplicatable || Convergent || !LoopSizeIC.isValid()) {
+  if (UCE.canUnroll() || UCE.Convergent) {
     LLVM_DEBUG(dbgs() << "Loop not considered unrollable\n");
     return 1;
   }
-  unsigned LoopSize = *LoopSizeIC.getValue();
+
+  unsigned LoopSize = UCE.getRolledLoopSize();
+  LLVM_DEBUG(dbgs() << "Estimated loop size is " << LoopSize << "\n");
 
   // TODO: Determine trip count of \p CLI if constant, computeUnrollCount might
   // be able to use it.
@@ -3544,7 +3540,7 @@ static int32_t computeHeuristicUnrollFactor(CanonicalLoopInfo *CLI) {
 
   bool UseUpperBound = false;
   computeUnrollCount(L, TTI, DT, &LI, &AC, SE, EphValues, &ORE, TripCount,
-                     MaxTripCount, MaxOrZero, TripMultiple, LoopSize, UP, PP,
+                     MaxTripCount, MaxOrZero, TripMultiple, UCE, UP, PP,
                      UseUpperBound);
   unsigned Factor = UP.Count;
   LLVM_DEBUG(dbgs() << "Suggesting unroll factor of " << Factor << "\n");

diff  --git a/llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp b/llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp
index 9c6e4ebf62a9c5f..79599726bc1eec3 100644
--- a/llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp
@@ -153,9 +153,11 @@ static bool computeUnrollAndJamCount(
     LoopInfo *LI, AssumptionCache *AC, ScalarEvolution &SE,
     const SmallPtrSetImpl<const Value *> &EphValues,
     OptimizationRemarkEmitter *ORE, unsigned OuterTripCount,
-    unsigned OuterTripMultiple, unsigned OuterLoopSize, unsigned InnerTripCount,
-    unsigned InnerLoopSize, TargetTransformInfo::UnrollingPreferences &UP,
+    unsigned OuterTripMultiple, const UnrollCostEstimator &OuterUCE,
+    unsigned InnerTripCount, unsigned InnerLoopSize,
+    TargetTransformInfo::UnrollingPreferences &UP,
     TargetTransformInfo::PeelingPreferences &PP) {
+  unsigned OuterLoopSize = OuterUCE.getRolledLoopSize();
   // First up use computeUnrollCount from the loop unroller to get a count
   // for unrolling the outer loop, plus any loops requiring explicit
   // unrolling we leave to the unroller. This uses UP.Threshold /
@@ -165,7 +167,7 @@ static bool computeUnrollAndJamCount(
   bool UseUpperBound = false;
   bool ExplicitUnroll = computeUnrollCount(
     L, TTI, DT, LI, AC, SE, EphValues, ORE, OuterTripCount, MaxTripCount,
-      /*MaxOrZero*/ false, OuterTripMultiple, OuterLoopSize, UP, PP,
+      /*MaxOrZero*/ false, OuterTripMultiple, OuterUCE, UP, PP,
       UseUpperBound);
   if (ExplicitUnroll || UseUpperBound) {
     // If the user explicitly set the loop as unrolled, dont UnJ it. Leave it
@@ -318,39 +320,28 @@ tryToUnrollAndJamLoop(Loop *L, DominatorTree &DT, LoopInfo *LI,
   }
 
   // Approximate the loop size and collect useful info
-  unsigned NumInlineCandidates;
-  bool NotDuplicatable;
-  bool Convergent;
   SmallPtrSet<const Value *, 32> EphValues;
   CodeMetrics::collectEphemeralValues(L, &AC, EphValues);
   Loop *SubLoop = L->getSubLoops()[0];
-  InstructionCost InnerLoopSizeIC =
-      ApproximateLoopSize(SubLoop, NumInlineCandidates, NotDuplicatable,
-                          Convergent, TTI, EphValues, UP.BEInsns);
-  InstructionCost OuterLoopSizeIC =
-      ApproximateLoopSize(L, NumInlineCandidates, NotDuplicatable, Convergent,
-                          TTI, EphValues, UP.BEInsns);
-  LLVM_DEBUG(dbgs() << "  Outer Loop Size: " << OuterLoopSizeIC << "\n");
-  LLVM_DEBUG(dbgs() << "  Inner Loop Size: " << InnerLoopSizeIC << "\n");
-
-  if (!InnerLoopSizeIC.isValid() || !OuterLoopSizeIC.isValid()) {
+  UnrollCostEstimator InnerUCE(SubLoop, TTI, EphValues, UP.BEInsns);
+  UnrollCostEstimator OuterUCE(L, TTI, EphValues, UP.BEInsns);
+
+  if (!InnerUCE.canUnroll() || !OuterUCE.canUnroll()) {
     LLVM_DEBUG(dbgs() << "  Not unrolling loop which contains instructions"
-                      << " with invalid cost.\n");
+                      << " which cannot be duplicated or have invalid cost.\n");
     return LoopUnrollResult::Unmodified;
   }
-  unsigned InnerLoopSize = *InnerLoopSizeIC.getValue();
-  unsigned OuterLoopSize = *OuterLoopSizeIC.getValue();
 
-  if (NotDuplicatable) {
-    LLVM_DEBUG(dbgs() << "  Not unrolling loop which contains non-duplicatable "
-                         "instructions.\n");
-    return LoopUnrollResult::Unmodified;
-  }
-  if (NumInlineCandidates != 0) {
+  unsigned InnerLoopSize = InnerUCE.getRolledLoopSize();
+  unsigned OuterLoopSize = OuterUCE.getRolledLoopSize();
+  LLVM_DEBUG(dbgs() << "  Outer Loop Size: " << OuterLoopSize << "\n");
+  LLVM_DEBUG(dbgs() << "  Inner Loop Size: " << InnerLoopSize << "\n");
+
+  if (InnerUCE.NumInlineCandidates != 0 || OuterUCE.NumInlineCandidates != 0) {
     LLVM_DEBUG(dbgs() << "  Not unrolling loop with inlinable calls.\n");
     return LoopUnrollResult::Unmodified;
   }
-  if (Convergent) {
+  if (InnerUCE.Convergent || OuterUCE.Convergent) {
     LLVM_DEBUG(
         dbgs() << "  Not unrolling loop with convergent instructions.\n");
     return LoopUnrollResult::Unmodified;
@@ -379,7 +370,7 @@ tryToUnrollAndJamLoop(Loop *L, DominatorTree &DT, LoopInfo *LI,
   // Decide if, and by how much, to unroll
   bool IsCountSetExplicitly = computeUnrollAndJamCount(
     L, SubLoop, TTI, DT, LI, &AC, SE, EphValues, &ORE, OuterTripCount,
-      OuterTripMultiple, OuterLoopSize, InnerTripCount, InnerLoopSize, UP, PP);
+      OuterTripMultiple, OuterUCE, InnerTripCount, InnerLoopSize, UP, PP);
   if (UP.Count <= 1)
     return LoopUnrollResult::Unmodified;
   // Unroll factor (Count) must be less or equal to TripCount.

diff  --git a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
index 335b489d3cb25ba..446aa497026d3fb 100644
--- a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -662,19 +662,16 @@ static std::optional<EstimatedUnrollCost> analyzeLoopUnrollCost(
            unsigned(*RolledDynamicCost.getValue())}};
 }
 
-/// ApproximateLoopSize - Approximate the size of the loop.
-InstructionCost llvm::ApproximateLoopSize(
-    const Loop *L, unsigned &NumCalls, bool &NotDuplicatable, bool &Convergent,
-    const TargetTransformInfo &TTI,
+UnrollCostEstimator::UnrollCostEstimator(
+    const Loop *L, const TargetTransformInfo &TTI,
     const SmallPtrSetImpl<const Value *> &EphValues, unsigned BEInsns) {
   CodeMetrics Metrics;
   for (BasicBlock *BB : L->blocks())
     Metrics.analyzeBasicBlock(BB, TTI, EphValues);
-  NumCalls = Metrics.NumInlineCandidates;
+  NumInlineCandidates = Metrics.NumInlineCandidates;
   NotDuplicatable = Metrics.notDuplicatable;
   Convergent = Metrics.convergent;
-
-  InstructionCost LoopSize = Metrics.NumInsts;
+  LoopSize = Metrics.NumInsts;
 
   // Don't allow an estimate of size zero.  This would allows unrolling of loops
   // with huge iteration counts, which is a compile time problem even if it's
@@ -685,8 +682,17 @@ InstructionCost llvm::ApproximateLoopSize(
   if (LoopSize.isValid() && LoopSize < BEInsns + 1)
     // This is an open coded max() on InstructionCost
     LoopSize = BEInsns + 1;
+}
 
-  return LoopSize;
+uint64_t UnrollCostEstimator::getUnrolledLoopSize(
+    const TargetTransformInfo::UnrollingPreferences &UP,
+    unsigned CountOverwrite) const {
+  unsigned LS = *LoopSize.getValue();
+  assert(LS >= UP.BEInsns && "LoopSize should not be less than BEInsns!");
+  if (CountOverwrite)
+    return static_cast<uint64_t>(LS - UP.BEInsns) * CountOverwrite + UP.BEInsns;
+  else
+    return static_cast<uint64_t>(LS - UP.BEInsns) * UP.Count + UP.BEInsns;
 }
 
 // Returns the loop hint metadata node with the given name (for example,
@@ -746,32 +752,6 @@ static unsigned getFullUnrollBoostingFactor(const EstimatedUnrollCost &Cost,
     return MaxPercentThresholdBoost;
 }
 
-// Produce an estimate of the unrolled cost of the specified loop.  This
-// is used to a) produce a cost estimate for partial unrolling and b) to
-// cheaply estimate cost for full unrolling when we don't want to symbolically
-// evaluate all iterations.
-class UnrollCostEstimator {
-  const unsigned LoopSize;
-
-public:
-  UnrollCostEstimator(Loop &L, unsigned LoopSize) : LoopSize(LoopSize) {}
-
-  // Returns loop size estimation for unrolled loop, given the unrolling
-  // configuration specified by UP.
-  uint64_t
-  getUnrolledLoopSize(const TargetTransformInfo::UnrollingPreferences &UP,
-                      const unsigned CountOverwrite = 0) const {
-    assert(LoopSize >= UP.BEInsns &&
-           "LoopSize should not be less than BEInsns!");
-    if (CountOverwrite)
-      return static_cast<uint64_t>(LoopSize - UP.BEInsns) * CountOverwrite +
-             UP.BEInsns;
-    else
-      return static_cast<uint64_t>(LoopSize - UP.BEInsns) * UP.Count +
-             UP.BEInsns;
-  }
-};
-
 static std::optional<unsigned>
 shouldPragmaUnroll(Loop *L, const PragmaInfo &PInfo,
                    const unsigned TripMultiple, const unsigned TripCount,
@@ -888,14 +868,14 @@ shouldPartialUnroll(const unsigned LoopSize, const unsigned TripCount,
 // refactored into it own function.
 bool llvm::computeUnrollCount(
     Loop *L, const TargetTransformInfo &TTI, DominatorTree &DT, LoopInfo *LI,
-    AssumptionCache *AC,
-    ScalarEvolution &SE, const SmallPtrSetImpl<const Value *> &EphValues,
+    AssumptionCache *AC, ScalarEvolution &SE,
+    const SmallPtrSetImpl<const Value *> &EphValues,
     OptimizationRemarkEmitter *ORE, unsigned TripCount, unsigned MaxTripCount,
-    bool MaxOrZero, unsigned TripMultiple, unsigned LoopSize,
+    bool MaxOrZero, unsigned TripMultiple, const UnrollCostEstimator &UCE,
     TargetTransformInfo::UnrollingPreferences &UP,
     TargetTransformInfo::PeelingPreferences &PP, bool &UseUpperBound) {
 
-  UnrollCostEstimator UCE(*L, LoopSize);
+  unsigned LoopSize = UCE.getRolledLoopSize();
 
   const bool UserUnrollCount = UnrollCount.getNumOccurrences() > 0;
   const bool PragmaFullUnroll = hasUnrollFullPragma(L);
@@ -1177,9 +1157,6 @@ tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE,
     return LoopUnrollResult::Unmodified;
 
   bool OptForSize = L->getHeader()->getParent()->hasOptSize();
-  unsigned NumInlineCandidates;
-  bool NotDuplicatable;
-  bool Convergent;
   TargetTransformInfo::UnrollingPreferences UP = gatherUnrollingPreferences(
       L, SE, TTI, BFI, PSI, ORE, OptLevel, ProvidedThreshold, ProvidedCount,
       ProvidedAllowPartial, ProvidedRuntime, ProvidedUpperBound,
@@ -1196,30 +1173,22 @@ tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE,
   SmallPtrSet<const Value *, 32> EphValues;
   CodeMetrics::collectEphemeralValues(L, &AC, EphValues);
 
-  InstructionCost LoopSizeIC =
-      ApproximateLoopSize(L, NumInlineCandidates, NotDuplicatable, Convergent,
-                          TTI, EphValues, UP.BEInsns);
-  LLVM_DEBUG(dbgs() << "  Loop Size = " << LoopSizeIC << "\n");
-
-  if (!LoopSizeIC.isValid()) {
+  UnrollCostEstimator UCE(L, TTI, EphValues, UP.BEInsns);
+  if (!UCE.canUnroll()) {
     LLVM_DEBUG(dbgs() << "  Not unrolling loop which contains instructions"
-                      << " with invalid cost.\n");
+                      << " which cannot be duplicated or have invalid cost.\n");
     return LoopUnrollResult::Unmodified;
   }
-  unsigned LoopSize = *LoopSizeIC.getValue();
 
-  if (NotDuplicatable) {
-    LLVM_DEBUG(dbgs() << "  Not unrolling loop which contains non-duplicatable"
-                      << " instructions.\n");
-    return LoopUnrollResult::Unmodified;
-  }
+  unsigned LoopSize = UCE.getRolledLoopSize();
+  LLVM_DEBUG(dbgs() << "  Loop Size = " << LoopSize << "\n");
 
   // When optimizing for size, use LoopSize + 1 as threshold (we use < Threshold
   // later), to (fully) unroll loops, if it does not increase code size.
   if (OptForSize)
     UP.Threshold = std::max(UP.Threshold, LoopSize + 1);
 
-  if (NumInlineCandidates != 0) {
+  if (UCE.NumInlineCandidates != 0) {
     LLVM_DEBUG(dbgs() << "  Not unrolling loop with inlinable calls.\n");
     return LoopUnrollResult::Unmodified;
   }
@@ -1261,7 +1230,7 @@ tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE,
   // Assuming n is the same on all threads, any kind of unrolling is
   // safe.  But currently llvm's notion of convergence isn't powerful
   // enough to express this.
-  if (Convergent)
+  if (UCE.Convergent)
     UP.AllowRemainder = false;
 
   // Try to find the trip count upper bound if we cannot find the exact trip
@@ -1277,8 +1246,8 @@ tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE,
   // fully unroll the loop.
   bool UseUpperBound = false;
   bool IsCountSetExplicitly = computeUnrollCount(
-    L, TTI, DT, LI, &AC, SE, EphValues, &ORE, TripCount, MaxTripCount, MaxOrZero,
-      TripMultiple, LoopSize, UP, PP, UseUpperBound);
+      L, TTI, DT, LI, &AC, SE, EphValues, &ORE, TripCount, MaxTripCount,
+      MaxOrZero, TripMultiple, UCE, UP, PP, UseUpperBound);
   if (!UP.Count)
     return LoopUnrollResult::Unmodified;