<html><head></head><body style="word-wrap: break-word; -webkit-nbsp-mode: space; -webkit-line-break: after-white-space; ">Extra special credit to Craig Silverstein for coming up with the insight that led to this improvement.  I'd been banging my head against this design for a <i>long</i> time!<div><br></div><div>--Owen</div><div><br><div><div>On Nov 18, 2010, at 10:32 AM, Owen Anderson wrote:</div><br class="Apple-interchange-newline"><blockquote type="cite"><div>Author: resistor<br>Date: Thu Nov 18 12:32:40 2010<br>New Revision: 119714<br><br>URL: <a href="http://llvm.org/viewvc/llvm-project?rev=119714&view=rev">http://llvm.org/viewvc/llvm-project?rev=119714&view=rev</a><br>Log:<br>Completely rework the datastructure GVN uses to represent the value number to leader mapping.  Previously,<br>this was a tree of hashtables, and a query recursed into the table for the immediate dominator ad infinitum<br>if the initial lookup failed.  This led to really bad performance on tall, narrow CFGs.<br><br>We can instead replace it with what is conceptually a multimap of value numbers to leaders (actually<br>represented by a hashtable with a list of Value*'s as the value type), and then<br>determine which leader from that set to use very cheaply thanks to the DFS numberings maintained by<br>DominatorTree.  Because there are typically few duplicates of a given value, this scan tends to be<br>quite fast.  Additionally, we use a custom linked list and BumpPtr allocation to avoid any unnecessary<br>allocation in representing the value-side of the multimap.<br><br>This change brings with it a 15% (!) improvement in the total running time of GVN on 403.gcc, which I<br>think is pretty good considering that includes all the "real work" being done by MemDep as well.<br><br>The one downside to this approach is that we can no longer use GVN to perform simple conditional progation,<br>but that seems like an acceptable loss since we now have LVI and CorrelatedValuePropagation to pick up<br>the slack.  If you see conditional propagation that's not happening, please file bugs against LVI or CVP.<br><br>Removed:<br>    llvm/trunk/test/Transforms/GVN/condprop.ll<br>Modified:<br>    llvm/trunk/lib/Transforms/Scalar/GVN.cpp<br><br>Modified: llvm/trunk/lib/Transforms/Scalar/GVN.cpp<br>URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/GVN.cpp?rev=119714&r1=119713&r2=119714&view=diff">http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/GVN.cpp?rev=119714&r1=119713&r2=119714&view=diff</a><br>==============================================================================<br>--- llvm/trunk/lib/Transforms/Scalar/GVN.cpp (original)<br>+++ llvm/trunk/lib/Transforms/Scalar/GVN.cpp Thu Nov 18 12:32:40 2010<br>@@ -40,6 +40,7 @@<br> #include "llvm/Analysis/MemoryBuiltins.h"<br> #include "llvm/Analysis/MemoryDependenceAnalysis.h"<br> #include "llvm/Analysis/PHITransAddr.h"<br>+#include "llvm/Support/Allocator.h"<br> #include "llvm/Support/CFG.h"<br> #include "llvm/Support/CommandLine.h"<br> #include "llvm/Support/Debug.h"<br>@@ -51,6 +52,7 @@<br> #include "llvm/Transforms/Utils/BasicBlockUtils.h"<br> #include "llvm/Transforms/Utils/Local.h"<br> #include "llvm/Transforms/Utils/SSAUpdater.h"<br>+#include <list><br> using namespace llvm;<br><br> STATISTIC(NumGVNInstr,  "Number of instructions deleted");<br>@@ -674,7 +676,45 @@<br>     const TargetData* TD;<br><br>     ValueTable VN;<br>-    DenseMap<BasicBlock*, ValueNumberScope*> localAvail;<br>+    <br>+    DenseMap<uint32_t, std::pair<Value*, void*> > NumberTable;<br>+    BumpPtrAllocator TableAllocator;<br>+    void insert_table(uint32_t N, Value *V) {<br>+      std::pair<Value*, void*>& Curr = NumberTable[N];<br>+      if (!Curr.first) {<br>+        Curr.first = V;<br>+        return;<br>+      }<br>+      <br>+      std::pair<Value*, void*>* Node =<br>+        TableAllocator.Allocate<std::pair<Value*, void*> >();<br>+      Node->first = V;<br>+      Node->second = Curr.second;<br>+      Curr.second = Node;<br>+    }<br>+    <br>+    void erase_table(uint32_t N, Value *V) {<br>+      std::pair<Value*, void*>* Prev = 0;<br>+      std::pair<Value*, void*>* Curr = &NumberTable[N];<br>+<br>+      while (Curr->first != V) {<br>+        Prev = Curr;<br>+        Curr = static_cast<std::pair<Value*, void*>*>(Curr->second);<br>+      }<br>+      <br>+      if (Prev) {<br>+        Prev->second = Curr->second;<br>+      } else {<br>+        if (!Curr->second) {<br>+          Curr->first = 0;<br>+        } else {<br>+          std::pair<Value*, void*>* Next =<br>+            static_cast<std::pair<Value*, void*>*>(Curr->second);<br>+          Curr->first = Next->first;<br>+          Curr->second = Next->second;<br>+        }<br>+      }<br>+    }<br><br>     // List of critical edges to be split between iterations.<br>     SmallVector<std::pair<TerminatorInst*, unsigned>, 4> toSplit;<br>@@ -1847,16 +1887,25 @@<br> }<br><br> Value *GVN::lookupNumber(BasicBlock *BB, uint32_t num) {<br>-  DenseMap<BasicBlock*, ValueNumberScope*>::iterator I = localAvail.find(BB);<br>-  if (I == localAvail.end())<br>-    return 0;<br>-<br>-  ValueNumberScope *Locals = I->second;<br>-  while (Locals) {<br>-    DenseMap<uint32_t, Value*>::iterator I = Locals->table.find(num);<br>-    if (I != Locals->table.end())<br>-      return I->second;<br>-    Locals = Locals->parent;<br>+  std::pair<Value*, void*> Vals = NumberTable[num];<br>+  if (!Vals.first) return 0;<br>+  Instruction *Inst = dyn_cast<Instruction>(Vals.first);<br>+  if (!Inst) return Vals.first;<br>+  BasicBlock *Parent = Inst->getParent();<br>+  if (DT->dominates(Parent, BB))<br>+    return Inst;<br>+  <br>+  std::pair<Value*, void*>* Next =<br>+    static_cast<std::pair<Value*, void*>*>(Vals.second);<br>+  while (Next) {<br>+    Instruction *CurrInst = dyn_cast<Instruction>(Next->first);<br>+    if (!CurrInst) return Next->first;<br>+    <br>+    BasicBlock *Parent = CurrInst->getParent();<br>+    if (DT->dominates(Parent, BB))<br>+      return CurrInst;<br>+    <br>+    Next = static_cast<std::pair<Value*, void*>*>(Next->second);<br>   }<br><br>   return 0;<br>@@ -1889,7 +1938,7 @@<br><br>     if (!Changed) {<br>       unsigned Num = VN.lookup_or_add(LI);<br>-      localAvail[I->getParent()]->table.insert(std::make_pair(Num, LI));<br>+      insert_table(Num, LI);<br>     }<br><br>     return Changed;<br>@@ -1898,41 +1947,21 @@<br>   uint32_t NextNum = VN.getNextUnusedValueNumber();<br>   unsigned Num = VN.lookup_or_add(I);<br><br>-  if (BranchInst *BI = dyn_cast<BranchInst>(I)) {<br>-    localAvail[I->getParent()]->table.insert(std::make_pair(Num, I));<br>-<br>-    if (!BI->isConditional() || isa<Constant>(BI->getCondition()))<br>-      return false;<br>-<br>-    Value *BranchCond = BI->getCondition();<br>-    uint32_t CondVN = VN.lookup_or_add(BranchCond);<br>-<br>-    BasicBlock *TrueSucc = BI->getSuccessor(0);<br>-    BasicBlock *FalseSucc = BI->getSuccessor(1);<br>-<br>-    if (TrueSucc->getSinglePredecessor())<br>-      localAvail[TrueSucc]->table[CondVN] =<br>-        ConstantInt::getTrue(TrueSucc->getContext());<br>-    if (FalseSucc->getSinglePredecessor())<br>-      localAvail[FalseSucc]->table[CondVN] =<br>-        ConstantInt::getFalse(TrueSucc->getContext());<br>-<br>-    return false;<br>-<br>   // Allocations are always uniquely numbered, so we can save time and memory<br>   // by fast failing them.<br>-  } else if (isa<AllocaInst>(I) || isa<TerminatorInst>(I)) {<br>-    localAvail[I->getParent()]->table.insert(std::make_pair(Num, I));<br>+  if (isa<AllocaInst>(I) || isa<TerminatorInst>(I)) {<br>+    insert_table(Num, I);<br>     return false;<br>   }<br><br>   if (isa<PHINode>(I)) {<br>-    localAvail[I->getParent()]->table.insert(std::make_pair(Num, I));<br>+    insert_table(Num, I);<br>+  <br>   // If the number we were assigned was a brand new VN, then we don't<br>   // need to do a lookup to see if the number already exists<br>   // somewhere in the domtree: it can't!<br>   } else if (Num == NextNum) {<br>-    localAvail[I->getParent()]->table.insert(std::make_pair(Num, I));<br>+    insert_table(Num, I);<br><br>   // Perform fast-path value-number based elimination of values inherited from<br>   // dominators.<br>@@ -1946,7 +1975,7 @@<br>     return true;<br><br>   } else {<br>-    localAvail[I->getParent()]->table.insert(std::make_pair(Num, I));<br>+    insert_table(Num, I);<br>   }<br><br>   return false;<br>@@ -2095,20 +2124,19 @@<br>         if (P == CurrentBlock) {<br>           NumWithout = 2;<br>           break;<br>-        } else if (!localAvail.count(P))  {<br>+        } else if (!DT->dominates(&F.getEntryBlock(), P))  {<br>           NumWithout = 2;<br>           break;<br>         }<br><br>-        DenseMap<uint32_t, Value*>::iterator predV =<br>-                                            localAvail[P]->table.find(ValNo);<br>-        if (predV == localAvail[P]->table.end()) {<br>+        Value* predV = lookupNumber(P, ValNo);<br>+        if (predV == 0) {<br>           PREPred = P;<br>           ++NumWithout;<br>-        } else if (predV->second == CurInst) {<br>+        } else if (predV == CurInst) {<br>           NumWithout = 2;<br>         } else {<br>-          predMap[P] = predV->second;<br>+          predMap[P] = predV;<br>           ++NumWith;<br>         }<br>       }<br>@@ -2167,7 +2195,7 @@<br>       ++NumGVNPRE;<br><br>       // Update the availability map to include the new instruction.<br>-      localAvail[PREPred]->table.insert(std::make_pair(ValNo, PREInstr));<br>+      insert_table(ValNo, PREInstr);<br><br>       // Create a PHI to make the value available in this block.<br>       PHINode* Phi = PHINode::Create(CurInst->getType(),<br>@@ -2180,12 +2208,13 @@<br>       }<br><br>       VN.add(Phi, ValNo);<br>-      localAvail[CurrentBlock]->table[ValNo] = Phi;<br>+      insert_table(ValNo, Phi);<br><br>       CurInst->replaceAllUsesWith(Phi);<br>       if (MD && Phi->getType()->isPointerTy())<br>         MD->invalidateCachedPointerInfo(Phi);<br>       VN.erase(CurInst);<br>+      erase_table(ValNo, CurInst);<br><br>       DEBUG(dbgs() << "GVN PRE removed: " << *CurInst << '\n');<br>       if (MD) MD->removeInstruction(CurInst);<br>@@ -2217,16 +2246,7 @@<br> /// iterateOnFunction - Executes one iteration of GVN<br> bool GVN::iterateOnFunction(Function &F) {<br>   cleanupGlobalSets();<br>-<br>-  for (df_iterator<DomTreeNode*> DI = df_begin(DT->getRootNode()),<br>-       DE = df_end(DT->getRootNode()); DI != DE; ++DI) {<br>-    if (DI->getIDom())<br>-      localAvail[DI->getBlock()] =<br>-                   new ValueNumberScope(localAvail[DI->getIDom()->getBlock()]);<br>-    else<br>-      localAvail[DI->getBlock()] = new ValueNumberScope(0);<br>-  }<br>-<br>+  <br>   // Top-down walk of the dominator tree<br>   bool Changed = false;<br> #if 0<br>@@ -2246,11 +2266,8 @@<br><br> void GVN::cleanupGlobalSets() {<br>   VN.clear();<br>-<br>-  for (DenseMap<BasicBlock*, ValueNumberScope*>::iterator<br>-       I = localAvail.begin(), E = localAvail.end(); I != E; ++I)<br>-    delete I->second;<br>-  localAvail.clear();<br>+  NumberTable.clear();<br>+  TableAllocator.Reset();<br> }<br><br> /// verifyRemoved - Verify that the specified instruction does not occur in our<br>@@ -2260,17 +2277,14 @@<br><br>   // Walk through the value number scope to make sure the instruction isn't<br>   // ferreted away in it.<br>-  for (DenseMap<BasicBlock*, ValueNumberScope*>::const_iterator<br>-         I = localAvail.begin(), E = localAvail.end(); I != E; ++I) {<br>-    const ValueNumberScope *VNS = I->second;<br>-<br>-    while (VNS) {<br>-      for (DenseMap<uint32_t, Value*>::const_iterator<br>-             II = VNS->table.begin(), IE = VNS->table.end(); II != IE; ++II) {<br>-        assert(II->second != Inst && "Inst still in value numbering scope!");<br>-      }<br>-<br>-      VNS = VNS->parent;<br>+  for (DenseMap<uint32_t, std::pair<Value*, void*> >::const_iterator<br>+       I = NumberTable.begin(), E = NumberTable.end(); I != E; ++I) {<br>+    std::pair<Value*, void*> const * Node = &I->second;<br>+    assert(Node->first != Inst && "Inst still in value numbering scope!");<br>+    <br>+    while (Node->second) {<br>+      Node = static_cast<std::pair<Value*, void*>*>(Node->second);<br>+      assert(Node->first != Inst && "Inst still in value numbering scope!");<br>     }<br>   }<br> }<br><br>Removed: llvm/trunk/test/Transforms/GVN/condprop.ll<br>URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/GVN/condprop.ll?rev=119713&view=auto">http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/GVN/condprop.ll?rev=119713&view=auto</a><br>==============================================================================<br>--- llvm/trunk/test/Transforms/GVN/condprop.ll (original)<br>+++ llvm/trunk/test/Transforms/GVN/condprop.ll (removed)<br>@@ -1,52 +0,0 @@<br>-; RUN: opt < %s -basicaa -gvn -S | grep {br i1 false}<br>-<br>-@a = external global i32<span class="Apple-tab-span" style="white-space:pre">    </span><span class="Apple-tab-span" style="white-space:pre">    </span>; <i32*> [#uses=7]<br>-<br>-define i32 @foo() nounwind {<br>-entry:<br>-<span class="Apple-tab-span" style="white-space:pre">      </span>%0 = load i32* @a, align 4<span class="Apple-tab-span" style="white-space:pre">  </span><span class="Apple-tab-span" style="white-space:pre">    </span>; <i32> [#uses=1]<br>-<span class="Apple-tab-span" style="white-space:pre">  </span>%1 = icmp eq i32 %0, 4<span class="Apple-tab-span" style="white-space:pre">      </span><span class="Apple-tab-span" style="white-space:pre">    </span>; <i1> [#uses=1]<br>-<span class="Apple-tab-span" style="white-space:pre">   </span>br i1 %1, label %bb, label %bb1<br>-<br>-bb:<span class="Apple-tab-span" style="white-space:pre">    </span><span class="Apple-tab-span" style="white-space:pre">    </span>; preds = %entry<br>-<span class="Apple-tab-span" style="white-space:pre"> </span>br label %bb8<br>-<br>-bb1:<span class="Apple-tab-span" style="white-space:pre">     </span><span class="Apple-tab-span" style="white-space:pre">    </span>; preds = %entry<br>-<span class="Apple-tab-span" style="white-space:pre"> </span>%2 = load i32* @a, align 4<span class="Apple-tab-span" style="white-space:pre">  </span><span class="Apple-tab-span" style="white-space:pre">    </span>; <i32> [#uses=1]<br>-<span class="Apple-tab-span" style="white-space:pre">  </span>%3 = icmp eq i32 %2, 5<span class="Apple-tab-span" style="white-space:pre">      </span><span class="Apple-tab-span" style="white-space:pre">    </span>; <i1> [#uses=1]<br>-<span class="Apple-tab-span" style="white-space:pre">   </span>br i1 %3, label %bb2, label %bb3<br>-<br>-bb2:<span class="Apple-tab-span" style="white-space:pre">  </span><span class="Apple-tab-span" style="white-space:pre">    </span>; preds = %bb1<br>-<span class="Apple-tab-span" style="white-space:pre">   </span>br label %bb8<br>-<br>-bb3:<span class="Apple-tab-span" style="white-space:pre">     </span><span class="Apple-tab-span" style="white-space:pre">    </span>; preds = %bb1<br>-<span class="Apple-tab-span" style="white-space:pre">   </span>%4 = load i32* @a, align 4<span class="Apple-tab-span" style="white-space:pre">  </span><span class="Apple-tab-span" style="white-space:pre">    </span>; <i32> [#uses=1]<br>-<span class="Apple-tab-span" style="white-space:pre">  </span>%5 = icmp eq i32 %4, 4<span class="Apple-tab-span" style="white-space:pre">      </span><span class="Apple-tab-span" style="white-space:pre">    </span>; <i1> [#uses=1]<br>-<span class="Apple-tab-span" style="white-space:pre">   </span>br i1 %5, label %bb4, label %bb5<br>-<br>-bb4:<span class="Apple-tab-span" style="white-space:pre">  </span><span class="Apple-tab-span" style="white-space:pre">    </span>; preds = %bb3<br>-<span class="Apple-tab-span" style="white-space:pre">   </span>%6 = load i32* @a, align 4<span class="Apple-tab-span" style="white-space:pre">  </span><span class="Apple-tab-span" style="white-space:pre">    </span>; <i32> [#uses=1]<br>-<span class="Apple-tab-span" style="white-space:pre">  </span>%7 = add i32 %6, 5<span class="Apple-tab-span" style="white-space:pre">  </span><span class="Apple-tab-span" style="white-space:pre">    </span>; <i32> [#uses=1]<br>-<span class="Apple-tab-span" style="white-space:pre">  </span>br label %bb8<br>-<br>-bb5:<span class="Apple-tab-span" style="white-space:pre">     </span><span class="Apple-tab-span" style="white-space:pre">    </span>; preds = %bb3<br>-<span class="Apple-tab-span" style="white-space:pre">   </span>%8 = load i32* @a, align 4<span class="Apple-tab-span" style="white-space:pre">  </span><span class="Apple-tab-span" style="white-space:pre">    </span>; <i32> [#uses=1]<br>-<span class="Apple-tab-span" style="white-space:pre">  </span>%9 = icmp eq i32 %8, 5<span class="Apple-tab-span" style="white-space:pre">      </span><span class="Apple-tab-span" style="white-space:pre">    </span>; <i1> [#uses=1]<br>-<span class="Apple-tab-span" style="white-space:pre">   </span>br i1 %9, label %bb6, label %bb7<br>-<br>-bb6:<span class="Apple-tab-span" style="white-space:pre">  </span><span class="Apple-tab-span" style="white-space:pre">    </span>; preds = %bb5<br>-<span class="Apple-tab-span" style="white-space:pre">   </span>%10 = load i32* @a, align 4<span class="Apple-tab-span" style="white-space:pre"> </span><span class="Apple-tab-span" style="white-space:pre">    </span>; <i32> [#uses=1]<br>-<span class="Apple-tab-span" style="white-space:pre">  </span>%11 = add i32 %10, 4<span class="Apple-tab-span" style="white-space:pre">        </span><span class="Apple-tab-span" style="white-space:pre">    </span>; <i32> [#uses=1]<br>-<span class="Apple-tab-span" style="white-space:pre">  </span>br label %bb8<br>-<br>-bb7:<span class="Apple-tab-span" style="white-space:pre">     </span><span class="Apple-tab-span" style="white-space:pre">    </span>; preds = %bb5<br>-<span class="Apple-tab-span" style="white-space:pre">   </span>%12 = load i32* @a, align 4<span class="Apple-tab-span" style="white-space:pre"> </span><span class="Apple-tab-span" style="white-space:pre">    </span>; <i32> [#uses=1]<br>-<span class="Apple-tab-span" style="white-space:pre">  </span>br label %bb8<br>-<br>-bb8:<span class="Apple-tab-span" style="white-space:pre">     </span><span class="Apple-tab-span" style="white-space:pre">    </span>; preds = %bb7, %bb6, %bb4, %bb2, %bb<br>-<span class="Apple-tab-span" style="white-space:pre">    </span>%.0 = phi i32 [ %12, %bb7 ], [ %11, %bb6 ], [ %7, %bb4 ], [ 4, %bb2 ], [ 5, %bb ]<span class="Apple-tab-span" style="white-space:pre">   </span><span class="Apple-tab-span" style="white-space:pre">    </span>; <i32> [#uses=1]<br>-<span class="Apple-tab-span" style="white-space:pre">  </span>br label %return<br>-<br>-return:<span class="Apple-tab-span" style="white-space:pre">       </span><span class="Apple-tab-span" style="white-space:pre">    </span>; preds = %bb8<br>-<span class="Apple-tab-span" style="white-space:pre">   </span>ret i32 %.0<br>-}<br><br><br>_______________________________________________<br>llvm-commits mailing list<br><a href="mailto:llvm-commits@cs.uiuc.edu">llvm-commits@cs.uiuc.edu</a><br>http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits<br></div></blockquote></div><br></div></body></html>