[llvm] [LoopUnroll] Add CSE to remove redundant loads after unrolling. (PR #83860)

Florian Hahn via llvm-commits llvm-commits at lists.llvm.org
Wed May 1 02:30:06 PDT 2024


================
@@ -209,13 +212,142 @@ static bool isEpilogProfitable(Loop *L) {
   return false;
 }
 
+struct LoadValue {
+  Instruction *DefI = nullptr;
+  unsigned Generation = 0;
+  LoadValue() = default;
+  LoadValue(Instruction *Inst, unsigned Generation)
+      : DefI(Inst), Generation(Generation) {}
+};
+
+class StackNode {
+  ScopedHashTable<const SCEV *, LoadValue>::ScopeTy LoadScope;
+  unsigned CurrentGeneration;
+  unsigned ChildGeneration;
+  DomTreeNode *Node;
+  DomTreeNode::const_iterator ChildIter;
+  DomTreeNode::const_iterator EndIter;
+  bool Processed = false;
+
+public:
+  StackNode(ScopedHashTable<const SCEV *, LoadValue> &AvailableLoads,
+            unsigned cg, DomTreeNode *N, DomTreeNode::const_iterator Child,
+            DomTreeNode::const_iterator End)
+      : LoadScope(AvailableLoads), CurrentGeneration(cg), ChildGeneration(cg),
+        Node(N), ChildIter(Child), EndIter(End) {}
+  // Accessors.
+  unsigned currentGeneration() const { return CurrentGeneration; }
+  unsigned childGeneration() const { return ChildGeneration; }
+  void childGeneration(unsigned generation) { ChildGeneration = generation; }
+  DomTreeNode *node() { return Node; }
+  DomTreeNode::const_iterator childIter() const { return ChildIter; }
+
+  DomTreeNode *nextChild() {
+    DomTreeNode *child = *ChildIter;
+    ++ChildIter;
+    return child;
+  }
+
+  DomTreeNode::const_iterator end() const { return EndIter; }
+  bool isProcessed() const { return Processed; }
+  void process() { Processed = true; }
+};
+
+Value *getMatchingValue(LoadValue LV, LoadInst *LI, unsigned CurrentGeneration,
+                        function_ref<MemorySSA *()> GetMSSA) {
+  if (!LV.DefI)
+    return nullptr;
+  if (LV.Generation != CurrentGeneration) {
+    MemorySSA *MSSA = GetMSSA();
+    if (!MSSA)
+      return nullptr;
+    auto *EarlierMA = MSSA->getMemoryAccess(LV.DefI);
+    MemoryAccess *LaterDef = MSSA->getWalker()->getClobberingMemoryAccess(LI);
+    if (!MSSA->dominates(LaterDef, EarlierMA))
+      return nullptr;
+  }
+
+  if (LV.DefI->getType() != LI->getType())
+    return nullptr;
+  return LV.DefI;
+}
+
+void loadCSE(Loop *L, DominatorTree &DT, ScalarEvolution &SE, LoopInfo &LI,
+             function_ref<MemorySSA *()> GetMSSA) {
+  ScopedHashTable<const SCEV *, LoadValue> AvailableLoads;
+  SmallVector<std::unique_ptr<StackNode>> NodesToProcess;
+  DomTreeNode *HeaderD = DT.getNode(L->getHeader());
+  NodesToProcess.emplace_back(new StackNode(AvailableLoads, 0, HeaderD,
+                                            HeaderD->begin(), HeaderD->end()));
+
+  unsigned CurrentGeneration = 0;
+  while (!NodesToProcess.empty()) {
+    // Grab the first item off the stack. Set the current generation, remove
+    // the node from the stack, and process it.
+    StackNode *NodeToProcess = &*NodesToProcess.back();
+
+    // Initialize class members.
+    CurrentGeneration = NodeToProcess->currentGeneration();
+
+    if (!NodeToProcess->isProcessed()) {
+      // Process the node.
+
+      // If this block has a single predecessor, then the predecessor is the
+      // parent
+      // of the domtree node and all of the live out memory values are still
+      // current in this block.  If this block has multiple predecessors, then
+      // they could have invalidated the live-out memory values of our parent
+      // value.  For now, just be conservative and invalidate memory if this
+      // block has multiple predecessors.
+      if (!NodeToProcess->node()->getBlock()->getSinglePredecessor())
+        ++CurrentGeneration;
+      for (auto &I : make_early_inc_range(*NodeToProcess->node()->getBlock())) {
+
+        auto *Load = dyn_cast<LoadInst>(&I);
+        if (!Load || !Load->isSimple()) {
+          if (I.mayWriteToMemory())
+            CurrentGeneration++;
+          continue;
+        }
+
+        const SCEV *PtrSCEV = SE.getSCEV(Load->getPointerOperand());
+        LoadValue LV = AvailableLoads.lookup(PtrSCEV);
+        if (Value *M = getMatchingValue(LV, Load, CurrentGeneration, GetMSSA)) {
+
+          if (LI.replacementPreservesLCSSAForm(Load, M)) {
+            Load->replaceAllUsesWith(M);
+            Load->eraseFromParent();
+          }
+        } else {
+          AvailableLoads.insert(PtrSCEV, LoadValue(Load, CurrentGeneration));
+        }
+      }
+      NodeToProcess->childGeneration(CurrentGeneration);
+      NodeToProcess->process();
+    } else if (NodeToProcess->childIter() != NodeToProcess->end()) {
+      // Push the next child onto the stack.
+      DomTreeNode *child = NodeToProcess->nextChild();
----------------
fhahn wrote:

Fixed, thanks!

https://github.com/llvm/llvm-project/pull/83860


More information about the llvm-commits mailing list