[llvm] [LoopUnroll] Structural cost savings analysis for full loop unrolling (PR #114579)

Fri Dec 20 09:51:37 PST 2024

================
@@ -337,8 +336,239 @@ struct PragmaInfo {
   const bool PragmaEnableUnroll;
 };
 
+/// Helper type to estimate per-iteration cost savings coming from fully
+/// unrolling a loop.
+///
+/// The analysis maintains a set of "known instructions" inside the loop (i.e.,
+/// instructions whose result will be statically known after loop unrolling)
+/// that we assume will be entirely removable if the loop is fully unrolled.
+/// These instructions' cost can be deducted from the unrolled cost when
+/// comparing against a threshold.
+struct FullUnrollCostSavings {
+  FullUnrollCostSavings(const Loop *L) : L(L) {}
+
+  /// Returns whether the instruction is known.
+  inline bool isKnown(const Instruction *I) const {
+    return KnownVals.contains(I);
+  }
+
+  /// If the value is an instruction, returns whether that instruction is known,
+  /// false otherwise.
+  bool isKnown(const Value *V) const {
+    if (const Instruction *I = dyn_cast<Instruction>(V))
+      return isKnown(I);
+    return false;
+  }
+
+  /// Adds an instruction to the known set and re-evaluates unknown instructions
+  /// in the loop to determine whether their result can now be known.
+  void addToKnown(const Instruction *I) {
+    if (!KnownVals.insert(I).second)
+      return;
+
+    // Every time we assume knowledge of an additional instruction result, we
+    // potentially need to revisit instructions that were previously seen as
+    // unoptimizable.
+    Evaluated.clear();
+
+    addUsersToExploreSet(I);
+    while (ToEvaluate.size()) {
+      const Instruction *I = ToEvaluate.back();
+      ToEvaluate.pop_back();
+      evalInstruction(I);
+    }
+  }
+
+  /// Returns savings incurred by all known instructions, according to the \p
+  /// TTI.
+  InstructionCost computeSavings(const TargetTransformInfo &TTI) const {
+    TargetTransformInfo::TargetCostKind CostKind =
+        L->getHeader()->getParent()->hasMinSize()
+            ? TargetTransformInfo::TCK_CodeSize
+            : TargetTransformInfo::TCK_SizeAndLatency;
+
+    InstructionCost CostSavings;
+    for (const Value *Val : KnownVals)
+      CostSavings += TTI.getInstructionCost(cast<Instruction>(Val), CostKind);
+    return CostSavings;
+  }
+
+private:
+  /// The set of instruction inside the loop whose results are considered known.
+  SmallPtrSet<const Instruction *, 4> KnownVals;
+  /// Caches the set of instructions we have already evaluated when adding a new
+  /// instruction to the known set.
+  SmallPtrSet<const Instruction *, 4> Evaluated;
+  /// Stack of instructions to evaluate when adding a new instruction to the
+  /// known set.
+  SmallVector<const Instruction *, 4> ToEvaluate;
+  /// The loop under consideration.
+  const Loop *L;
+
+  /// Adds all value users to the stack of instructions to evaluate, if they
+  /// have not been evaluated already.
+  void addUsersToExploreSet(const Value *Val) {
+    for (const User *U : Val->users()) {
+      if (const Instruction *I = dyn_cast<Instruction>(U))
+        if (!Evaluated.contains(I))
+          ToEvaluate.push_back(I);
+    }
+  }
+
+  /// Evaluates an instruction to determine whether its result is "known", and
+  /// returns if that is the case. This may recurse on operands that are the
+  /// resul of yet unevaluated instructions inside the loop.
+  bool evalInstruction(const Instruction *I) {
+    Evaluated.insert(I);
+    if (isKnown(I))
+      return true;
+    if (!isa<BinaryOperator, CastInst, CmpInst>(I))
+      return false;
----------------
lucas-rami wrote:

I may be too restrictive indeed. My rationale is that I only want to count instructions that would be folded by unrolling because all of their operands would be constants. I may just need to filter out instructions with side effects, phis, and branches?

https://github.com/llvm/llvm-project/pull/114579