[llvm] [GVN] Speed up GVN by caching DT nodes (PR #112903)

via llvm-commits llvm-commits at lists.llvm.org
Fri Oct 18 07:11:17 PDT 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-llvm-transforms

Author: Lewis Crawford (LewisCrawford)

<details>
<summary>Changes</summary>

Speed up GVN for very large methods where a lot of time is spent in BasicBlock->DomTreeNode lookup.

This lookup to:
```
  DenseMapBase<llvm::DenseMap<
                 llvm::BasicBlock*,
                 std::unique_ptr<llvm::DomTreeNodeBase...
```
is not an inherently expensive operation, but appears to happen with frequency > O(N).

Caching the DTNodes can save ~15% compilation time in a large synthetic pathalogical case, and 1-2% in other more representative examples of large programs. Impact is likely to be negligible on smaller programs, so there is an option added to disable this optimization.

---
Full diff: https://github.com/llvm/llvm-project/pull/112903.diff


2 Files Affected:

- (modified) llvm/include/llvm/Transforms/Scalar/GVN.h (+9) 
- (modified) llvm/lib/Transforms/Scalar/GVN.cpp (+31-1) 


``````````diff
diff --git a/llvm/include/llvm/Transforms/Scalar/GVN.h b/llvm/include/llvm/Transforms/Scalar/GVN.h
index be6c0ec5edab07..47ad95bca1948e 100644
--- a/llvm/include/llvm/Transforms/Scalar/GVN.h
+++ b/llvm/include/llvm/Transforms/Scalar/GVN.h
@@ -227,6 +227,7 @@ class GVNPass : public PassInfoMixin<GVNPass> {
   ImplicitControlFlowTracking *ICF = nullptr;
   LoopInfo *LI = nullptr;
   MemorySSAUpdater *MSSAU = nullptr;
+  bool DomTreeCacheValid;
 
   ValueTable VN;
 
@@ -237,6 +238,8 @@ class GVNPass : public PassInfoMixin<GVNPass> {
     struct LeaderTableEntry {
       Value *Val;
       const BasicBlock *BB;
+      // Cache domtree node in LeaderTableEntry to avoid having to look it up.
+      DomTreeNode *DTNode;
     };
 
   private:
@@ -246,6 +249,7 @@ class GVNPass : public PassInfoMixin<GVNPass> {
     };
     DenseMap<uint32_t, LeaderListNode> NumToLeaders;
     BumpPtrAllocator TableAllocator;
+    DominatorTree *DT;
 
   public:
     class leader_iterator {
@@ -291,6 +295,7 @@ class GVNPass : public PassInfoMixin<GVNPass> {
       NumToLeaders.clear();
       TableAllocator.Reset();
     }
+    void setDomTree(DominatorTree *D) { DT = D; }
   };
   LeaderMap LeaderTable;
 
@@ -381,6 +386,10 @@ class GVNPass : public PassInfoMixin<GVNPass> {
   void addDeadBlock(BasicBlock *BB);
   void assignValNumForDeadCode();
   void assignBlockRPONumber(Function &F);
+
+  // DT node cache-related routines
+  DomTreeNode *getDomTreeNode(const LeaderMap::LeaderTableEntry &Vals);
+  void invalidateDTCache();
 };
 
 /// Create a legacy GVN pass. This also allows parameterizing whether or not
diff --git a/llvm/lib/Transforms/Scalar/GVN.cpp b/llvm/lib/Transforms/Scalar/GVN.cpp
index 2ba600497e00d3..c52425fd034b59 100644
--- a/llvm/lib/Transforms/Scalar/GVN.cpp
+++ b/llvm/lib/Transforms/Scalar/GVN.cpp
@@ -135,6 +135,10 @@ static cl::opt<uint32_t> MaxNumInsnsPerBlock(
     cl::desc("Max number of instructions to scan in each basic block in GVN "
              "(default = 100)"));
 
+static cl::opt<bool>
+    EnableDomTreeCache("gvn-dom-cache", cl::init(true), cl::Hidden,
+                       cl::desc("enable caching of dom tree nodes"));
+
 struct llvm::GVNPass::Expression {
   uint32_t opcode;
   bool commutative = false;
@@ -735,6 +739,7 @@ void GVNPass::LeaderMap::insert(uint32_t N, Value *V, const BasicBlock *BB) {
   if (!Curr.Entry.Val) {
     Curr.Entry.Val = V;
     Curr.Entry.BB = BB;
+    Curr.Entry.DTNode = DT->getNode(BB);
     return;
   }
 
@@ -742,6 +747,7 @@ void GVNPass::LeaderMap::insert(uint32_t N, Value *V, const BasicBlock *BB) {
   Node->Entry.Val = V;
   Node->Entry.BB = BB;
   Node->Next = Curr.Next;
+  Node->Entry.DTNode = DT->getNode(BB);
   Curr.Next = Node;
 }
 
@@ -771,6 +777,7 @@ void GVNPass::LeaderMap::erase(uint32_t N, Instruction *I,
       Curr->Entry.Val = Next->Entry.Val;
       Curr->Entry.BB = Next->Entry.BB;
       Curr->Next = Next->Next;
+      Curr->Entry.DTNode = Next->Entry.DTNode;
     }
   }
 }
@@ -827,6 +834,7 @@ PreservedAnalyses GVNPass::run(Function &F, FunctionAnalysisManager &AM) {
   auto &LI = AM.getResult<LoopAnalysis>(F);
   auto *MSSA = AM.getCachedResult<MemorySSAAnalysis>(F);
   auto &ORE = AM.getResult<OptimizationRemarkEmitterAnalysis>(F);
+  DomTreeCacheValid = true;
   bool Changed = runImpl(F, AC, DT, TLI, AA, MemDep, LI, &ORE,
                          MSSA ? &MSSA->getMSSA() : nullptr);
   if (!Changed)
@@ -2382,6 +2390,14 @@ void GVNPass::ValueTable::eraseTranslateCacheEntry(
     PhiTranslateTable.erase({Num, Pred});
 }
 
+DomTreeNode *GVNPass::getDomTreeNode(const LeaderMap::LeaderTableEntry &Vals) {
+  if (EnableDomTreeCache && DomTreeCacheValid) {
+    assert(DT->getNode(Vals.BB) == Vals.DTNode);
+    return Vals.DTNode;
+  }
+  return DT->getNode(Vals.BB);
+}
+
 // In order to find a leader for a given value number at a
 // specific basic block, we first obtain the list of all Values for that number,
 // and then scan the list to find one whose block dominates the block in
@@ -2393,8 +2409,10 @@ Value *GVNPass::findLeader(const BasicBlock *BB, uint32_t num) {
     return nullptr;
 
   Value *Val = nullptr;
+  DomTreeNode *BBDTNode = DT->getNode(BB);
+
   for (const auto &Entry : Leaders) {
-    if (DT->dominates(Entry.BB, BB)) {
+    if (DT->dominates(getDomTreeNode(Entry), BBDTNode)) {
       Val = Entry.Val;
       if (isa<Constant>(Val))
         return Val;
@@ -2762,8 +2780,10 @@ bool GVNPass::runImpl(Function &F, AssumptionCache &RunAC, DominatorTree &RunDT,
   AC = &RunAC;
   DT = &RunDT;
   VN.setDomTree(DT);
+  LeaderTable.setDomTree(DT);
   TLI = &RunTLI;
   VN.setAliasAnalysis(&RunAA);
+  DomTreeCacheValid = true;
   MD = RunMD;
   ImplicitControlFlowTracking ImplicitCFT;
   ICF = &ImplicitCFT;
@@ -3137,9 +3157,19 @@ BasicBlock *GVNPass::splitCriticalEdges(BasicBlock *Pred, BasicBlock *Succ) {
       MD->invalidateCachedPredecessors();
     InvalidBlockRPONumbers = true;
   }
+  invalidateDTCache();
   return BB;
 }
 
+void GVNPass::invalidateDTCache() {
+  // It is possible to refresh the leader table here but there are several
+  // reasons not to do so:
+  // 1. Invalidation is rare and usually occurs w/ PRE splitting critical edges.
+  // 2. In cases where invalidation happens, it can happen hundreds of times.
+  if (!EnableDomTreeCache)
+    return;
+  DomTreeCacheValid = false;
+}
 /// Split critical edges found during the previous
 /// iteration that may enable further optimization.
 bool GVNPass::splitCriticalEdges() {

``````````

</details>


https://github.com/llvm/llvm-project/pull/112903


More information about the llvm-commits mailing list