[llvm] r293258 - NewGVN: Add basic dead and redundant store elimination

Daniel Berlin via llvm-commits llvm-commits at lists.llvm.org
Thu Jan 26 18:37:12 PST 2017


Author: dannyb
Date: Thu Jan 26 20:37:11 2017
New Revision: 293258

URL: http://llvm.org/viewvc/llvm-project?rev=293258&view=rev
Log:
NewGVN: Add basic dead and redundant store elimination

Summary:
This adds basic dead and redundant store elimination to
NewGVN.  Unlike our current DSE, it will happily do cross-block DSE if
it meets our requirements.

We get a bunch of DSE's simple.ll cases, and some stuff it doesn't.
Unlike DSE, however, we only try to eliminate stores of the same value
to the same memory location, not just general stores to the same
memory location.

Reviewers: davide

Subscribers: llvm-commits

Differential Revision: https://reviews.llvm.org/D29149

Added:
    llvm/trunk/test/Transforms/NewGVN/deadstore.ll
Modified:
    llvm/trunk/lib/Transforms/Scalar/NewGVN.cpp
    llvm/trunk/test/Transforms/NewGVN/basic-cyclic-opt.ll

Modified: llvm/trunk/lib/Transforms/Scalar/NewGVN.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/NewGVN.cpp?rev=293258&r1=293257&r2=293258&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Scalar/NewGVN.cpp (original)
+++ llvm/trunk/lib/Transforms/Scalar/NewGVN.cpp Thu Jan 26 20:37:11 2017
@@ -108,6 +108,7 @@ STATISTIC(NumGVNAvoidedSortedLeaderChang
           "Number of avoided sorted leader changes");
 STATISTIC(NumGVNNotMostDominatingLeader,
           "Number of times a member dominated it's new classes' leader");
+STATISTIC(NumGVNDeadStores, "Number of redundant/dead stores eliminated");
 
 //===----------------------------------------------------------------------===//
 //                                GVN Pass
@@ -362,6 +363,7 @@ private:
                                      CongruenceClass *);
   bool setMemoryAccessEquivTo(MemoryAccess *From, CongruenceClass *To);
   MemoryAccess *lookupMemoryAccessEquiv(MemoryAccess *) const;
+  bool isMemoryAccessTop(const MemoryAccess *) const;
 
   // Reachability handling.
   void updateReachableEdge(BasicBlock *, BasicBlock *);
@@ -371,8 +373,10 @@ private:
 
   // Elimination.
   struct ValueDFS;
-  void convertDenseToDFSOrdered(CongruenceClass::MemberSet &,
+  void convertDenseToDFSOrdered(const CongruenceClass::MemberSet &,
                                 SmallVectorImpl<ValueDFS> &);
+  void convertDenseToLoadsAndStores(const CongruenceClass::MemberSet &,
+                                    SmallVectorImpl<ValueDFS> &);
 
   bool eliminateInstructions(Function &);
   void replaceInstruction(Instruction *, Value *);
@@ -733,6 +737,13 @@ MemoryAccess *NewGVN::lookupMemoryAccess
   return MA;
 }
 
+// Return true if the MemoryAccess is really equivalent to everything. This is
+// equivalent to the lattice value "TOP" in most lattices.  This is the initial
+// state of all memory accesses.
+bool NewGVN::isMemoryAccessTop(const MemoryAccess *MA) const {
+  return MemoryAccessToClass.lookup(MA) == InitialClass;
+}
+
 LoadExpression *NewGVN::createLoadExpression(Type *LoadType, Value *PointerOp,
                                              LoadInst *LI, MemoryAccess *DA,
                                              const BasicBlock *B) {
@@ -786,13 +797,17 @@ const Expression *NewGVN::performSymboli
   // are simple and avoid value numbering them.
   auto *SI = cast<StoreInst>(I);
   MemoryAccess *StoreAccess = MSSA->getMemoryAccess(SI);
-  // See if we are defined by a previous store expression, it already has a
-  // value, and it's the same value as our current store. FIXME: Right now, we
-  // only do this for simple stores, we should expand to cover memcpys, etc.
+  // Get the expression, if any, for the RHS of the MemoryDef.
+  MemoryAccess *StoreRHS = lookupMemoryAccessEquiv(
+      cast<MemoryDef>(StoreAccess)->getDefiningAccess());
+  // If we are defined by ourselves, use the live on entry def.
+  if (StoreRHS == StoreAccess)
+    StoreRHS = MSSA->getLiveOnEntryDef();
+
   if (SI->isSimple()) {
-    // Get the expression, if any, for the RHS of the MemoryDef.
-    MemoryAccess *StoreRHS = lookupMemoryAccessEquiv(
-        cast<MemoryDef>(StoreAccess)->getDefiningAccess());
+    // See if we are defined by a previous store expression, it already has a
+    // value, and it's the same value as our current store. FIXME: Right now, we
+    // only do this for simple stores, we should expand to cover memcpys, etc.
     const Expression *OldStore = createStoreExpression(SI, StoreRHS, B);
     CongruenceClass *CC = ExpressionToClass.lookup(OldStore);
     // Basically, check if the congruence class the store is in is defined by a
@@ -803,8 +818,18 @@ const Expression *NewGVN::performSymboli
     if (CC &&
         CC->RepStoredValue == lookupOperandLeader(SI->getValueOperand(), SI, B))
       return createStoreExpression(SI, StoreRHS, B);
+    // Also check if our value operand is defined by a load of the same memory
+    // location, and the memory state is the same as it was then
+    // (otherwise, it could have been overwritten later. See test32 in
+    // transforms/DeadStoreElimination/simple.ll)
+    if (LoadInst *LI = dyn_cast<LoadInst>(SI->getValueOperand())) {
+      if ((lookupOperandLeader(LI->getPointerOperand(), LI, LI->getParent()) ==
+           lookupOperandLeader(SI->getPointerOperand(), SI, B)) &&
+          (lookupMemoryAccessEquiv(
+               MSSA->getMemoryAccess(LI)->getDefiningAccess()) == StoreRHS))
+        return createVariableExpression(LI);
+    }
   }
-
   return createStoreExpression(SI, StoreAccess, B);
 }
 
@@ -1531,14 +1556,20 @@ void NewGVN::updateProcessedCount(Value
 void NewGVN::valueNumberMemoryPhi(MemoryPhi *MP) {
   // If all the arguments are the same, the MemoryPhi has the same value as the
   // argument.
-  // Filter out unreachable blocks from our operands.
+  // Filter out unreachable blocks and self phis from our operands.
   auto Filtered = make_filter_range(MP->operands(), [&](const Use &U) {
-    return ReachableBlocks.count(MP->getIncomingBlock(U));
+    return lookupMemoryAccessEquiv(cast<MemoryAccess>(U)) != MP &&
+           !isMemoryAccessTop(cast<MemoryAccess>(U)) &&
+           ReachableBlocks.count(MP->getIncomingBlock(U));
   });
-
-  assert(Filtered.begin() != Filtered.end() &&
-         "We should not be processing a MemoryPhi in a completely "
-         "unreachable block");
+  // If all that is left is nothing, our memoryphi is undef. We keep it as
+  // InitialClass.  Note: The only case this should happen is if we have at
+  // least one self-argument.
+  if (Filtered.begin() == Filtered.end()) {
+    if (setMemoryAccessEquivTo(MP, InitialClass))
+      markMemoryUsersTouched(MP);
+    return;
+  }
 
   // Transform the remaining operands into operand leaders.
   // FIXME: mapped_iterator should have a range version.
@@ -1951,8 +1982,10 @@ struct NewGVN::ValueDFS {
   }
 };
 
+// This function converts the set of members for a congruence class from values,
+// to sets of defs and uses with associated DFS info.
 void NewGVN::convertDenseToDFSOrdered(
-    CongruenceClass::MemberSet &Dense,
+    const CongruenceClass::MemberSet &Dense,
     SmallVectorImpl<ValueDFS> &DFSOrderedSet) {
   for (auto D : Dense) {
     // First add the value.
@@ -1961,7 +1994,6 @@ void NewGVN::convertDenseToDFSOrdered(
     // we should only be left with instructions as members.
     assert(BB && "Should have figured out a basic block for value");
     ValueDFS VD;
-
     DomTreeNode *DomNode = DT->getNode(BB);
     VD.DFSIn = DomNode->getDFSNumIn();
     VD.DFSOut = DomNode->getDFSNumOut();
@@ -1974,7 +2006,6 @@ void NewGVN::convertDenseToDFSOrdered(
       VD.Val = D;
     }
 
-    // If it's an instruction, use the real local dfs number.
     if (auto *I = dyn_cast<Instruction>(D))
       VD.LocalNum = InstrDFS.lookup(I);
     else
@@ -2013,6 +2044,32 @@ void NewGVN::convertDenseToDFSOrdered(
   }
 }
 
+// This function converts the set of members for a congruence class from values,
+// to the set of defs for loads and stores, with associated DFS info.
+void NewGVN::convertDenseToLoadsAndStores(
+    const CongruenceClass::MemberSet &Dense,
+    SmallVectorImpl<ValueDFS> &LoadsAndStores) {
+  for (auto D : Dense) {
+    if (!isa<LoadInst>(D) && !isa<StoreInst>(D))
+      continue;
+
+    BasicBlock *BB = getBlockForValue(D);
+    ValueDFS VD;
+    DomTreeNode *DomNode = DT->getNode(BB);
+    VD.DFSIn = DomNode->getDFSNumIn();
+    VD.DFSOut = DomNode->getDFSNumOut();
+    VD.Val = D;
+
+    // If it's an instruction, use the real local dfs number.
+    if (auto *I = dyn_cast<Instruction>(D))
+      VD.LocalNum = InstrDFS.lookup(I);
+    else
+      llvm_unreachable("Should have been an instruction");
+
+    LoadsAndStores.emplace_back(VD);
+  }
+}
+
 static void patchReplacementInstruction(Instruction *I, Value *Repl) {
   // Patch the replacement so that it is not more restrictive than the value
   // being replaced.
@@ -2167,6 +2224,10 @@ bool NewGVN::eliminateInstructions(Funct
   }
 
   for (CongruenceClass *CC : CongruenceClasses) {
+    // Track the equivalent store info so we can decide whether to try
+    // dead store elimination.
+    SmallVector<ValueDFS, 8> PossibleDeadStores;
+
     // FIXME: We should eventually be able to replace everything still
     // in the initial class with undef, as they should be unreachable.
     // Right now, initial still contains some things we skip value
@@ -2183,15 +2244,12 @@ bool NewGVN::eliminateInstructions(Funct
     if (alwaysAvailable(Leader)) {
       SmallPtrSet<Value *, 4> MembersLeft;
       for (auto M : CC->Members) {
-
         Value *Member = M;
-
         // Void things have no uses we can replace.
         if (Member == CC->RepLeader || Member->getType()->isVoidTy()) {
           MembersLeft.insert(Member);
           continue;
         }
-
         DEBUG(dbgs() << "Found replacement " << *(Leader) << " for " << *Member
                      << "\n");
         // Due to equality propagation, these may not always be
@@ -2208,7 +2266,6 @@ bool NewGVN::eliminateInstructions(Funct
         }
       }
       CC->Members.swap(MembersLeft);
-
     } else {
       DEBUG(dbgs() << "Eliminating in congruence class " << CC->ID << "\n");
       // If this is a singleton, we can skip it.
@@ -2226,20 +2283,15 @@ bool NewGVN::eliminateInstructions(Funct
 
         // Sort the whole thing.
         std::sort(DFSOrderedSet.begin(), DFSOrderedSet.end());
-
         for (auto &VD : DFSOrderedSet) {
           int MemberDFSIn = VD.DFSIn;
           int MemberDFSOut = VD.DFSOut;
           Value *Member = VD.Val;
           Use *MemberUse = VD.U;
 
-          if (Member) {
-            // We ignore void things because we can't get a value from them.
-            // FIXME: We could actually use this to kill dead stores that are
-            // dominated by equivalent earlier stores.
-            if (Member->getType()->isVoidTy())
-              continue;
-          }
+          // We ignore void things because we can't get a value from them.
+          if (Member && Member->getType()->isVoidTy())
+            continue;
 
           if (EliminationStack.empty()) {
             DEBUG(dbgs() << "Elimination Stack is empty\n");
@@ -2326,6 +2378,39 @@ bool NewGVN::eliminateInstructions(Funct
       MembersLeft.insert(Member);
     }
     CC->Members.swap(MembersLeft);
+
+    // If we have possible dead stores to look at, try to eliminate them.
+    if (CC->StoreCount > 0) {
+      convertDenseToLoadsAndStores(CC->Members, PossibleDeadStores);
+      std::sort(PossibleDeadStores.begin(), PossibleDeadStores.end());
+      ValueDFSStack EliminationStack;
+      for (auto &VD : PossibleDeadStores) {
+        int MemberDFSIn = VD.DFSIn;
+        int MemberDFSOut = VD.DFSOut;
+        Instruction *Member = cast<Instruction>(VD.Val);
+        if (EliminationStack.empty() ||
+            !EliminationStack.isInScope(MemberDFSIn, MemberDFSOut)) {
+          // Sync to our current scope.
+          EliminationStack.popUntilDFSScope(MemberDFSIn, MemberDFSOut);
+          if (EliminationStack.empty()) {
+            EliminationStack.push_back(Member, MemberDFSIn, MemberDFSOut);
+            continue;
+          }
+        }
+        // We already did load elimination, so nothing to do here.
+        if (isa<LoadInst>(Member))
+          continue;
+        assert(!EliminationStack.empty());
+        Instruction *Leader = cast<Instruction>(EliminationStack.back());
+        assert(DT->dominates(Leader->getParent(), Member->getParent()));
+        // Member is dominater by Leader, and thus dead
+        DEBUG(dbgs() << "Marking dead store " << *Member
+                     << " that is dominated by " << *Leader << "\n");
+        markInstructionForDeletion(Member);
+        CC->Members.erase(Member);
+        ++NumGVNDeadStores;
+      }
+    }
   }
 
   return AnythingReplaced;

Modified: llvm/trunk/test/Transforms/NewGVN/basic-cyclic-opt.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/NewGVN/basic-cyclic-opt.ll?rev=293258&r1=293257&r2=293258&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/NewGVN/basic-cyclic-opt.ll (original)
+++ llvm/trunk/test/Transforms/NewGVN/basic-cyclic-opt.ll Thu Jan 26 20:37:11 2017
@@ -169,7 +169,6 @@ define i32 @vnum_test3(i32* %data) #0 {
 ; CHECK-NEXT:    [[TMP10:%.*]] = icmp slt i32 [[I_0]], 30
 ; CHECK-NEXT:    br i1 [[TMP10]], label [[BB11:%.*]], label [[BB14:%.*]]
 ; CHECK:       bb11:
-; CHECK-NEXT:    store i32 0, i32* [[TMP9]], align 4
 ; CHECK-NEXT:    br label [[BB14]]
 ; CHECK:       bb14:
 ; CHECK-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i32, i32* [[DATA]], i64 1
@@ -228,6 +227,53 @@ bb23:
   ret i32 %p.0
 }
 
+;; This is an irreducible test case that will cause a memoryphi node loop
+;; in the two block.
+;; it's equivalent to something like
+;; *a = 0
+;; if (<....>) goto loopmiddle
+;; loopstart:
+;; loopmiddle:
+;; load *a
+;; *a = 0
+;; if (<....>) goto loopstart otherwise goto loopend
+;; loopend:
+;; load *a
+;; add the results of the loads
+;; return them
+;;
+;; Both loads should equal 0, but it requires being
+;; completely optimistic about MemoryPhis, otherwise
+;; we will not be able to see through the cycle.
+define i8 @quux(i8* noalias %arg, i8* noalias %arg2) {
+; CHECK-LABEL: @quux(
+; CHECK-NEXT:  bb:
+; CHECK-NEXT:    store i8 0, i8* [[ARG:%.*]]
+; CHECK-NEXT:    br i1 undef, label [[BB2:%.*]], label [[BB1:%.*]]
+; CHECK:       bb1:
+; CHECK-NEXT:    br label [[BB2]]
+; CHECK:       bb2:
+; CHECK-NEXT:    br i1 undef, label [[BB1]], label [[BB3:%.*]]
+; CHECK:       bb3:
+; CHECK-NEXT:    ret i8 0
+;
+bb:
+  store i8 0, i8 *%arg
+  br i1 undef, label %bb2, label %bb1
+
+bb1:                                              ; preds = %bb2, %bb
+  br label %bb2
+
+bb2:                                              ; preds = %bb1, %bb
+  %tmp2 = load i8, i8* %arg
+  store i8 0, i8 *%arg
+  br i1 undef, label %bb1, label %bb3
+
+bb3:                                              ; preds = %bb2
+  %tmp = load i8, i8* %arg
+  %tmp3 = add i8 %tmp, %tmp2
+  ret i8 %tmp3
+}
 attributes #0 = { nounwind ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
 
 !llvm.ident = !{!0, !0, !0}

Added: llvm/trunk/test/Transforms/NewGVN/deadstore.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/NewGVN/deadstore.ll?rev=293258&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/NewGVN/deadstore.ll (added)
+++ llvm/trunk/test/Transforms/NewGVN/deadstore.ll Thu Jan 26 20:37:11 2017
@@ -0,0 +1,79 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -basicaa -newgvn -S | FileCheck %s
+
+;; Most of these are borrowed from transforms/DSE/simple.ll
+;; NewGVN should be able to eliminate any stores of the same value that are actually redundnat.
+
+;; tmp5 is store of the same value to the same location as the load.
+define void @test12({ i32, i32 }* %x) nounwind  {
+; CHECK-LABEL: @test12(
+; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr { i32, i32 }, { i32, i32 }* [[X:%.*]], i32 0, i32 0
+; CHECK-NEXT:    [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4
+; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr { i32, i32 }, { i32, i32 }* [[X]], i32 0, i32 1
+; CHECK-NEXT:    [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4
+; CHECK-NEXT:    [[TMP17:%.*]] = sub i32 0, [[TMP8]]
+; CHECK-NEXT:    store i32 [[TMP17]], i32* [[TMP7]], align 4
+; CHECK-NEXT:    ret void
+;
+  %tmp4 = getelementptr { i32, i32 }, { i32, i32 }* %x, i32 0, i32 0
+  %tmp5 = load i32, i32* %tmp4, align 4
+  %tmp7 = getelementptr { i32, i32 }, { i32, i32 }* %x, i32 0, i32 1
+  %tmp8 = load i32, i32* %tmp7, align 4
+  %tmp17 = sub i32 0, %tmp8
+  store i32 %tmp5, i32* %tmp4, align 4
+  store i32 %tmp17, i32* %tmp7, align 4
+  ret void
+}
+; Remove redundant store if loaded value is in another block.
+define i32 @test26(i1 %c, i32* %p) {
+; CHECK-LABEL: @test26(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[V:%.*]] = load i32, i32* [[P:%.*]], align 4
+; CHECK-NEXT:    br i1 [[C:%.*]], label [[BB1:%.*]], label [[BB2:%.*]]
+; CHECK:       bb1:
+; CHECK-NEXT:    br label [[BB3:%.*]]
+; CHECK:       bb2:
+; CHECK-NEXT:    br label [[BB3]]
+; CHECK:       bb3:
+; CHECK-NEXT:    ret i32 0
+;
+entry:
+  %v = load i32, i32* %p, align 4
+  br i1 %c, label %bb1, label %bb2
+bb1:
+  br label %bb3
+bb2:
+  store i32 %v, i32* %p, align 4
+  br label %bb3
+bb3:
+  ret i32 0
+}
+
+declare void @unknown_func()
+; Remove redundant store, which is in the same loop as the load.
+define i32 @test33(i1 %c, i32* %p, i32 %i) {
+; CHECK-LABEL: @test33(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[BB1:%.*]]
+; CHECK:       bb1:
+; CHECK-NEXT:    [[V:%.*]] = load i32, i32* [[P:%.*]], align 4
+; CHECK-NEXT:    br label [[BB2:%.*]]
+; CHECK:       bb2:
+; CHECK-NEXT:    call void @unknown_func()
+; CHECK-NEXT:    br i1 undef, label [[BB1]], label [[BB3:%.*]]
+; CHECK:       bb3:
+; CHECK-NEXT:    ret i32 0
+;
+entry:
+  br label %bb1
+bb1:
+  %v = load i32, i32* %p, align 4
+  br label %bb2
+bb2:
+  store i32 %v, i32* %p, align 4
+  ; Might read and overwrite value at %p, but doesn't matter.
+  call void @unknown_func()
+  br i1 undef, label %bb1, label %bb3
+bb3:
+  ret i32 0
+}




More information about the llvm-commits mailing list