[llvm] b99a5eb - [DSE,MemorySSA] Delay PointerMayBeCaptured calls until actually needed.

Mon Aug 24 06:06:12 PDT 2020

Author: Florian Hahn
Date: 2020-08-24T14:05:44+01:00
New Revision: b99a5eb659c1965fc4f25a3020a358cae298ec5f

URL: https://github.com/llvm/llvm-project/commit/b99a5eb659c1965fc4f25a3020a358cae298ec5f
DIFF: https://github.com/llvm/llvm-project/commit/b99a5eb659c1965fc4f25a3020a358cae298ec5f.diff

LOG: [DSE,MemorySSA] Delay PointerMayBeCaptured calls until actually needed.

Avoid computing InvisibleToCallerBefore/AfterRet up front. In most
cases, this information is not really needed. Instead, introduce helper
functions to compute and cache the result on demand.

Notably, this also does not use PointerMayBeCapturedBefore for
isInvisibleToCallerBeforeRet, as it requires the killing MemoryDef as
starting instruction, making the caching ineffective. But it appears the
use of PointerMayBeCapturedBefore has very limited benefits in practice
(e.g. on SPEC2000/SPEC2006/MultiSource there are no binary changes with
-O3 -flto). Refrain from using it for now, to limit-compile-time.

This gives some nice compile-time improvements:
http://llvm-compile-time-tracker.com/compare.php?from=db9345f6810f379a36752dc52caf5230585d0ebd&to=b4d091047e1b8a3d377d200137b79d03aca65663&stat=instructions

Added: 
    

Modified: 
    llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
    llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-captures.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
index fd3a6623eec6..b0ed22ec3ca2 100644

--- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -1527,10 +1527,11 @@ struct DSEState {
   SmallPtrSet<MemoryAccess *, 4> SkipStores;
   // Keep track of all of the objects that are invisible to the caller before
   // the function returns.
-  SmallPtrSet<const Value *, 16> InvisibleToCallerBeforeRet;
+  // SmallPtrSet<const Value *, 16> InvisibleToCallerBeforeRet;
+  DenseMap<const Value *, bool> InvisibleToCallerBeforeRet;
   // Keep track of all of the objects that are invisible to the caller after
   // the function returns.
-  SmallPtrSet<const Value *, 16> InvisibleToCallerAfterRet;
+  DenseMap<const Value *, bool> InvisibleToCallerAfterRet;
   // Keep track of blocks with throwing instructions not modeled in MemorySSA.
   SmallPtrSet<BasicBlock *, 16> ThrowingBlocks;
   // Post-order numbers for each basic block. Used to figure out if memory
@@ -1564,26 +1565,6 @@ struct DSEState {
         if (MD && State.MemDefs.size() < MemorySSADefsPerBlockLimit &&
             (State.getLocForWriteEx(&I) || State.isMemTerminatorInst(&I)))
           State.MemDefs.push_back(MD);
-
-        // Track whether alloca and alloca-like objects are visible in the
-        // caller before and after the function returns. Alloca objects are
-        // invalid in the caller, so they are neither visible before or after
-        // the function returns.
-        if (isa<AllocaInst>(&I)) {
-          State.InvisibleToCallerBeforeRet.insert(&I);
-          State.InvisibleToCallerAfterRet.insert(&I);
-        }
-
-        // For alloca-like objects we need to check if they are captured before
-        // the function returns and if the return might capture the object.
-        if (isAllocLikeFn(&I, &TLI)) {
-          bool CapturesBeforeRet = PointerMayBeCaptured(&I, false, true);
-          if (!CapturesBeforeRet) {
-            State.InvisibleToCallerBeforeRet.insert(&I);
-            if (!PointerMayBeCaptured(&I, true, false))
-              State.InvisibleToCallerAfterRet.insert(&I);
-          }
-        }
       }
     }
 
@@ -1593,13 +1574,45 @@ struct DSEState {
       if (AI.hasPassPointeeByValueCopyAttr()) {
         // For byval, the caller doesn't know the address of the allocation.
         if (AI.hasByValAttr())
-          State.InvisibleToCallerBeforeRet.insert(&AI);
-        State.InvisibleToCallerAfterRet.insert(&AI);
+          State.InvisibleToCallerBeforeRet.insert({&AI, true});
+        State.InvisibleToCallerAfterRet.insert({&AI, true});
       }
 
     return State;
   }
 
+  bool isInvisibleToCallerAfterRet(const Value *V) {
+    if (isa<AllocaInst>(V))
+      return true;
+    auto I = InvisibleToCallerAfterRet.insert({V, false});
+    if (I.second) {
+      if (!isInvisibleToCallerBeforeRet(V)) {
+        I.first->second = false;
+      } else {
+        auto *Inst = dyn_cast<Instruction>(V);
+        if (Inst && isAllocLikeFn(Inst, &TLI))
+          I.first->second = !PointerMayBeCaptured(V, true, false);
+      }
+    }
+    return I.first->second;
+  }
+
+  bool isInvisibleToCallerBeforeRet(const Value *V) {
+    if (isa<AllocaInst>(V))
+      return true;
+    auto I = InvisibleToCallerBeforeRet.insert({V, false});
+    if (I.second) {
+      auto *Inst = dyn_cast<Instruction>(V);
+      if (Inst && isAllocLikeFn(Inst, &TLI))
+        // NOTE: This could be made more precise by PointerMayBeCapturedBefore
+        // with the killing MemoryDef. But we refrain from doing so for now to
+        // limit compile-time and this does not cause any changes to the number
+        // of stores removed on a large test set in practice.
+        I.first->second = !PointerMayBeCaptured(V, false, true);
+    }
+    return I.first->second;
+  }
+
   Optional<MemoryLocation> getLocForWriteEx(Instruction *I) const {
     if (!I->mayWriteToMemory())
       return None;
@@ -1762,10 +1775,11 @@ struct DSEState {
   // MemoryDef, return None. The returned value may not (completely) overwrite
   // \p DefLoc. Currently we bail out when we encounter an aliasing MemoryUse
   // (read).
-  Optional<MemoryAccess *>
-  getDomMemoryDef(MemoryDef *KillingDef, MemoryAccess *Current,
-                  MemoryLocation DefLoc, bool DefVisibleToCallerBeforeRet,
-                  bool DefVisibleToCallerAfterRet, unsigned &ScanLimit) {
+  Optional<MemoryAccess *> getDomMemoryDef(MemoryDef *KillingDef,
+                                           MemoryAccess *Current,
+                                           MemoryLocation DefLoc,
+                                           const Value *DefUO,
+                                           unsigned &ScanLimit) {
     if (ScanLimit == 0) {
       LLVM_DEBUG(dbgs() << "\n    ...  hit scan limit\n");
       return None;
@@ -1798,7 +1812,7 @@ struct DSEState {
 
       // Check if we can skip DomDef for DSE.
       MemoryDef *DomDef = dyn_cast<MemoryDef>(DomAccess);
-      if (DomDef && canSkipDef(DomDef, DefVisibleToCallerBeforeRet)) {
+      if (DomDef && canSkipDef(DomDef, !isInvisibleToCallerBeforeRet(DefUO))) {
         StepAgain = true;
         Current = DomDef->getDefiningAccess();
       }
@@ -1900,7 +1914,7 @@ struct DSEState {
       //                  stores [0,1]
       if (MemoryDef *UseDef = dyn_cast<MemoryDef>(UseAccess)) {
         if (isCompleteOverwrite(DefLoc, UseInst)) {
-          if (DefVisibleToCallerAfterRet && UseAccess != DomAccess) {
+          if (!isInvisibleToCallerAfterRet(DefUO) && UseAccess != DomAccess) {
             BasicBlock *MaybeKillingBlock = UseInst->getParent();
             if (PostOrderNumbers.find(MaybeKillingBlock)->second <
                 PostOrderNumbers.find(DomAccess->getBlock())->second) {
@@ -1918,7 +1932,7 @@ struct DSEState {
     // For accesses to locations visible after the function returns, make sure
     // that the location is killed (=overwritten) along all paths from DomAccess
     // to the exit.
-    if (DefVisibleToCallerAfterRet) {
+    if (!isInvisibleToCallerAfterRet(DefUO)) {
       SmallPtrSet<BasicBlock *, 16> KillingBlocks;
       for (Instruction *KD : KillingDefs)
         KillingBlocks.insert(KD->getParent());
@@ -2038,11 +2052,11 @@ struct DSEState {
   // checks extra maythrows (those that aren't MemoryDef's). MemoryDef that may
   // throw are handled during the walk from one def to the next.
   bool mayThrowBetween(Instruction *SI, Instruction *NI,
-                       const Value *SILocUnd) const {
+                       const Value *SILocUnd) {
     // First see if we can ignore it by using the fact that SI is an
     // alloca/alloca like object that is not visible to the caller during
     // execution of the function.
-    if (SILocUnd && InvisibleToCallerBeforeRet.count(SILocUnd))
+    if (SILocUnd && isInvisibleToCallerBeforeRet(SILocUnd))
       return false;
 
     if (SI->getParent() == NI->getParent())
@@ -2055,10 +2069,10 @@ struct DSEState {
   //  * A memory instruction that may throw and \p SI accesses a non-stack
   //  object.
   //  * Atomic stores stronger that monotonic.
-  bool isDSEBarrier(const Value *SILocUnd, Instruction *NI) const {
+  bool isDSEBarrier(const Value *SILocUnd, Instruction *NI) {
     // If NI may throw it acts as a barrier, unless we are to an alloca/alloca
     // like object that does not escape.
-    if (NI->mayThrow() && !InvisibleToCallerBeforeRet.count(SILocUnd))
+    if (NI->mayThrow() && !isInvisibleToCallerBeforeRet(SILocUnd))
       return true;
 
     // If NI is an atomic load/store stronger than monotonic, do not try to
@@ -2103,7 +2117,7 @@ struct DSEState {
       // uncommon. If it turns out to be important, we can use
       // getUnderlyingObjects here instead.
       const Value *UO = getUnderlyingObject(DefLoc->Ptr);
-      if (!UO || !InvisibleToCallerAfterRet.count(UO))
+      if (!UO || !isInvisibleToCallerAfterRet(UO))
         continue;
 
       if (isWriteAtEndOfFunction(Def)) {
@@ -2189,18 +2203,6 @@ bool eliminateDeadStoresMemorySSA(Function &F, AliasAnalysis &AA,
       continue;
     }
 
-    Instruction *DefObj =
-        const_cast<Instruction *>(dyn_cast<Instruction>(SILocUnd));
-    bool DefVisibleToCallerBeforeRet =
-        !State.InvisibleToCallerBeforeRet.count(SILocUnd);
-    bool DefVisibleToCallerAfterRet =
-        !State.InvisibleToCallerAfterRet.count(SILocUnd);
-    if (DefObj && isAllocLikeFn(DefObj, &TLI)) {
-      if (DefVisibleToCallerBeforeRet)
-        DefVisibleToCallerBeforeRet =
-            PointerMayBeCapturedBefore(DefObj, false, true, SI, &DT);
-    }
-
     MemoryAccess *Current = KillingDef;
     LLVM_DEBUG(dbgs() << "Trying to eliminate MemoryDefs killed by "
                       << *KillingDef << " (" << *SI << ")\n");
@@ -2217,8 +2219,7 @@ bool eliminateDeadStoresMemorySSA(Function &F, AliasAnalysis &AA,
         continue;
 
       Optional<MemoryAccess *> Next = State.getDomMemoryDef(
-          KillingDef, Current, SILoc, DefVisibleToCallerBeforeRet,
-          DefVisibleToCallerAfterRet, ScanLimit);
+          KillingDef, Current, SILoc, SILocUnd, ScanLimit);
 
       if (!Next) {
         LLVM_DEBUG(dbgs() << "  finished walk\n");

diff  --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-captures.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-captures.ll
index f2bec0c36cd2..fc3e99723d6e 100644
--- a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-captures.ll
+++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-captures.ll
@@ -184,11 +184,16 @@ exit:
   ret i8* %m
 }
 
-; We can remove the first store 'store i8 0, i8* %m' even though there is a
+; We *could* remove the first store 'store i8 0, i8* %m' even though there is a
 ; throwing instruction between them, because %m escapes after the killing store.
+; But this would require using PointerMayBeCapturedBefore in
+; isInvisibleToCallerBeforeRet, which we currently do not do to limit
+; compile-time, as this appears to hardly ever lead to more stores eliminated
+; in practice.
 define i8* @test_malloc_capture_7() {
 ; CHECK-LABEL: @test_malloc_capture_7(
 ; CHECK-NEXT:    [[M:%.*]] = call i8* @malloc(i64 24)
+; CHECK-NEXT:    store i8 0, i8* [[M]], align 1
 ; CHECK-NEXT:    call void @may_throw()
 ; CHECK-NEXT:    br label [[EXIT:%.*]]
 ; CHECK:       exit: