[llvm] cf9ee49 - [DSE] Lift post-dominance for objs not accessible in caller.

Florian Hahn via llvm-commits llvm-commits at lists.llvm.org
Wed Apr 15 03:40:10 PDT 2020


Author: Florian Hahn
Date: 2020-04-15T11:37:14+01:00
New Revision: cf9ee49b4d7f2e1341b84f941ad55cce1b16722d

URL: https://github.com/llvm/llvm-project/commit/cf9ee49b4d7f2e1341b84f941ad55cce1b16722d
DIFF: https://github.com/llvm/llvm-project/commit/cf9ee49b4d7f2e1341b84f941ad55cce1b16722d.diff

LOG: [DSE] Lift post-dominance for objs not accessible in caller.

We can eliminate MemoryDefs of objects not accessible after the function
returns (e.g. alloca), if there are no reads between the MemoryDef and
any function exits. We can stop traversing paths that completely
overwrite the memory location of the MemoryDef.

This patch was split off D73763.

Reviewers: dmgreen, bryant, asbirlea, Tyker, efriedma, george.burgess.iv

Reviewed By: asbirlea, george.burgess.iv

Differential Revision: https://reviews.llvm.org/D77736

Added: 
    

Modified: 
    llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
    llvm/test/Transforms/DeadStoreElimination/MSSA/memset-unknown-sizes.ll
    llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-malloc-free.ll
    llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-memintrinsics.ll
    llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-multipath.ll
    llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-simple.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
index f8a9f4f9bcc1..53ae373d68cd 100644
--- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -1484,9 +1484,12 @@ struct DSEState {
   SmallVector<MemoryDef *, 64> MemDefs;
   // Any that should be skipped as they are already deleted
   SmallPtrSet<MemoryAccess *, 4> SkipStores;
-  // Keep track of all of the objects that are invisible to the caller until the
-  // function returns.
-  SmallPtrSet<const Value *, 16> InvisibleToCaller;
+  // Keep track of all of the objects that are invisible to the caller before
+  // the function returns.
+  SmallPtrSet<const Value *, 16> InvisibleToCallerBeforeRet;
+  // Keep track of all of the objects that are invisible to the caller after
+  // the function returns.
+  SmallPtrSet<const Value *, 16> InvisibleToCallerAfterRet;
   // Keep track of blocks with throwing instructions not modeled in MemorySSA.
   SmallPtrSet<BasicBlock *, 16> ThrowingBlocks;
   // Post-order numbers for each basic block. Used to figure out if memory
@@ -1520,13 +1523,25 @@ struct DSEState {
             hasAnalyzableMemoryWrite(&I, TLI) && isRemovable(&I))
           State.MemDefs.push_back(MD);
 
-        // Track alloca and alloca-like objects. Here we care about objects not
-        // visible to the caller during function execution. Alloca objects are
-        // invalid in the caller, for alloca-like objects we ensure that they
-        // are not captured throughout the function.
-        if (isa<AllocaInst>(&I) ||
-            (isAllocLikeFn(&I, &TLI) && !PointerMayBeCaptured(&I, false, true)))
-          State.InvisibleToCaller.insert(&I);
+        // Track whether alloca and alloca-like objects are visible in the
+        // caller before and after the function returns. Alloca objects are
+        // invalid in the caller, so they are neither visible before or after
+        // the function returns.
+        if (isa<AllocaInst>(&I)) {
+          State.InvisibleToCallerBeforeRet.insert(&I);
+          State.InvisibleToCallerAfterRet.insert(&I);
+        }
+
+        // For alloca-like objects we need to check if they are captured before
+        // the function returns and if the return might capture the object.
+        if (isAllocLikeFn(&I, &TLI)) {
+          bool CapturesBeforeRet = PointerMayBeCaptured(&I, false, true);
+          if (!CapturesBeforeRet) {
+            State.InvisibleToCallerBeforeRet.insert(&I);
+            if (!PointerMayBeCaptured(&I, true, false))
+              State.InvisibleToCallerAfterRet.insert(&I);
+          }
+        }
       }
     }
 
@@ -1534,7 +1549,7 @@ struct DSEState {
     // dead at the end of the function.
     for (Argument &AI : F.args())
       if (AI.hasByValOrInAllocaAttr())
-        State.InvisibleToCaller.insert(&AI);
+        State.InvisibleToCallerBeforeRet.insert(&AI);
     return State;
   }
 
@@ -1603,14 +1618,15 @@ struct DSEState {
   }
 
   // Find a MemoryDef writing to \p DefLoc and dominating \p Current, with no
-  // read access in between or return None otherwise. The returned value may not
-  // (completely) overwrite \p DefLoc. Currently we bail out when we encounter
-  // an aliasing MemoryUse (read).
-  Optional<MemoryAccess *> getDomMemoryDef(MemoryDef *KillingDef,
-                                           MemoryAccess *Current,
-                                           MemoryLocation DefLoc,
-                                           bool DefVisibleToCaller,
-                                           int &ScanLimit) const {
+  // read access between them or on any other path to a function exit block if
+  // \p DefLoc is not accessible after the function returns. If there is no such
+  // MemoryDef, return None. The returned value may not (completely) overwrite
+  // \p DefLoc. Currently we bail out when we encounter an aliasing MemoryUse
+  // (read).
+  Optional<MemoryAccess *>
+  getDomMemoryDef(MemoryDef *KillingDef, MemoryAccess *Current,
+                  MemoryLocation DefLoc, bool DefVisibleToCallerBeforeRet,
+                  bool DefVisibleToCallerAfterRet, int &ScanLimit) const {
     MemoryAccess *DomAccess;
     bool StepAgain;
     LLVM_DEBUG(dbgs() << "  trying to get dominating access for " << *Current
@@ -1636,12 +1652,13 @@ struct DSEState {
       if (isa<MemoryPhi>(DomAccess))
         break;
 
-      // Check if we can skip DomDef for DSE. We also require the KillingDef
-      // execute whenever DomDef executes and use post-dominance to ensure that.
-
+      // Check if we can skip DomDef for DSE. For accesses to objects that are
+      // accessible after the function returns, KillingDef must execute whenever
+      // DomDef executes and use post-dominance to ensure that.
       MemoryDef *DomDef = dyn_cast<MemoryDef>(DomAccess);
-      if ((DomDef && canSkipDef(DomDef, DefVisibleToCaller)) ||
-          !PDT.dominates(KillingDef->getBlock(), DomDef->getBlock())) {
+      if ((DomDef && canSkipDef(DomDef, DefVisibleToCallerBeforeRet)) ||
+          (DefVisibleToCallerAfterRet &&
+           !PDT.dominates(KillingDef->getBlock(), DomDef->getBlock()))) {
         StepAgain = true;
         Current = DomDef->getDefiningAccess();
       }
@@ -1764,7 +1781,7 @@ struct DSEState {
     // First see if we can ignore it by using the fact that SI is an
     // alloca/alloca like object that is not visible to the caller during
     // execution of the function.
-    if (SILocUnd && InvisibleToCaller.count(SILocUnd))
+    if (SILocUnd && InvisibleToCallerBeforeRet.count(SILocUnd))
       return false;
 
     if (SI->getParent() == NI->getParent())
@@ -1782,7 +1799,7 @@ struct DSEState {
                     MemoryLocation &NILoc) const {
     // If NI may throw it acts as a barrier, unless we are to an alloca/alloca
     // like object that does not escape.
-    if (NI->mayThrow() && !InvisibleToCaller.count(SILocUnd))
+    if (NI->mayThrow() && !InvisibleToCallerBeforeRet.count(SILocUnd))
       return true;
 
     if (NI->isAtomic()) {
@@ -1823,10 +1840,15 @@ bool eliminateDeadStoresMemorySSA(Function &F, AliasAnalysis &AA,
     const Value *SILocUnd = GetUnderlyingObject(SILoc.Ptr, DL);
     Instruction *DefObj =
         const_cast<Instruction *>(dyn_cast<Instruction>(SILocUnd));
-    bool DefVisibleToCaller = !State.InvisibleToCaller.count(SILocUnd);
-    if (DefObj && ((isAllocLikeFn(DefObj, &TLI) &&
-                    !PointerMayBeCapturedBefore(DefObj, false, true, SI, &DT))))
-      DefVisibleToCaller = false;
+    bool DefVisibleToCallerBeforeRet =
+        !State.InvisibleToCallerBeforeRet.count(SILocUnd);
+    bool DefVisibleToCallerAfterRet =
+        !State.InvisibleToCallerAfterRet.count(SILocUnd);
+    if (DefObj && isAllocLikeFn(DefObj, &TLI)) {
+      if (DefVisibleToCallerBeforeRet)
+        DefVisibleToCallerBeforeRet =
+            PointerMayBeCapturedBefore(DefObj, false, true, SI, &DT);
+    }
 
     MemoryAccess *Current = KillingDef;
     LLVM_DEBUG(dbgs() << "Trying to eliminate MemoryDefs killed by "
@@ -1844,7 +1866,8 @@ bool eliminateDeadStoresMemorySSA(Function &F, AliasAnalysis &AA,
         continue;
 
       Optional<MemoryAccess *> Next = State.getDomMemoryDef(
-          KillingDef, Current, SILoc, DefVisibleToCaller, ScanLimit);
+          KillingDef, Current, SILoc, DefVisibleToCallerBeforeRet,
+          DefVisibleToCallerAfterRet, ScanLimit);
 
       if (!Next) {
         LLVM_DEBUG(dbgs() << "  finished walk\n");

diff  --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/memset-unknown-sizes.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/memset-unknown-sizes.ll
index 741508b9cec7..115540e54a26 100644
--- a/llvm/test/Transforms/DeadStoreElimination/MSSA/memset-unknown-sizes.ll
+++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/memset-unknown-sizes.ll
@@ -13,7 +13,7 @@ define void @test1(i1 %c) {
 ; CHECK:       cond.true.i.i.i:
 ; CHECK-NEXT:    ret void
 ; CHECK:       cond.end.i.i.i:
-; CHECK-NEXT:    [[CALL_I_I_I_I_I:%.*]] = tail call noalias nonnull i8* @_Znam() #2
+; CHECK-NEXT:    [[CALL_I_I_I_I_I:%.*]] = tail call noalias nonnull i8* @_Znam()
 ; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[CALL_I_I_I_I_I]] to i64*
 ; CHECK-NEXT:    tail call void @llvm.memset.p0i8.i64(i8* nonnull align 8 [[CALL_I_I_I_I_I]], i8 0, i64 undef, i1 false)
 ; CHECK-NEXT:    store i64 0, i64* [[TMP0]], align 8
@@ -42,7 +42,7 @@ define void @test2(i1 %c) {
 ; CHECK-NEXT:    br i1 [[C:%.*]], label [[CLEANUP_CONT104:%.*]], label [[IF_THEN:%.*]]
 ; CHECK:       if.then:
 ; CHECK-NEXT:    [[MUL_I_I_I_I:%.*]] = shl nuw nsw i64 undef, 3
-; CHECK-NEXT:    [[CALL_I_I_I_I_I_I131:%.*]] = call noalias nonnull i8* @_Znwm() #2
+; CHECK-NEXT:    [[CALL_I_I_I_I_I_I131:%.*]] = call noalias nonnull i8* @_Znwm()
 ; CHECK-NEXT:    [[DOTCAST_I_I:%.*]] = bitcast i8* [[CALL_I_I_I_I_I_I131]] to i64*
 ; CHECK-NEXT:    store i64 0, i64* [[DOTCAST_I_I]], align 8
 ; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* nonnull align 8 [[CALL_I_I_I_I_I_I131]], i8 0, i64 [[MUL_I_I_I_I]], i1 false)

diff  --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-malloc-free.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-malloc-free.ll
index cc35d50195d4..20b2d281570a 100644
--- a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-malloc-free.ll
+++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-malloc-free.ll
@@ -1,8 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 
-; XFAIL: *
-; TODO: Handling of free not implemented yet.
-
 ; RUN: opt < %s -basicaa -dse -enable-dse-memoryssa -S | FileCheck %s
 
 target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
@@ -308,7 +305,7 @@ declare %struct.NodePtrVecStruct* @NodePtrVec_new(i32)
 define noalias %struct.SystemCallMapElementStruct* @SystemCallMapElement_new(i8* nocapture readonly %label, i32 %initialSize) {
 ; CHECK-LABEL: @SystemCallMapElement_new(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[CALL:%.*]] = tail call dereferenceable_or_null(24) i8* @malloc(i64 24) #7
+; CHECK-NEXT:    [[CALL:%.*]] = tail call dereferenceable_or_null(24) i8* @malloc(i64 24) #6
 ; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[CALL]] to %struct.SystemCallMapElementStruct*
 ; CHECK-NEXT:    [[TOBOOL:%.*]] = icmp eq i8* [[CALL]], null
 ; CHECK-NEXT:    br i1 [[TOBOOL]], label [[CLEANUP:%.*]], label [[IF_THEN:%.*]]
@@ -380,7 +377,7 @@ cleanup:                                          ; preds = %entry, %if.end, %if
 define noalias %struct.BitfieldStruct* @Bitfield_new(i32 %bitsNeeded) {
 ; CHECK-LABEL: @Bitfield_new(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[CALL:%.*]] = tail call dereferenceable_or_null(16) i8* @malloc(i64 16) #7
+; CHECK-NEXT:    [[CALL:%.*]] = tail call dereferenceable_or_null(16) i8* @malloc(i64 16) #6
 ; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[CALL]] to %struct.BitfieldStruct*
 ; CHECK-NEXT:    [[TOBOOL:%.*]] = icmp eq i8* [[CALL]], null
 ; CHECK-NEXT:    br i1 [[TOBOOL]], label [[CLEANUP:%.*]], label [[IF_END:%.*]]
@@ -388,7 +385,7 @@ define noalias %struct.BitfieldStruct* @Bitfield_new(i32 %bitsNeeded) {
 ; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[BITSNEEDED:%.*]], 7
 ; CHECK-NEXT:    [[DIV:%.*]] = sdiv i32 [[ADD]], 8
 ; CHECK-NEXT:    [[CONV:%.*]] = sext i32 [[DIV]] to i64
-; CHECK-NEXT:    [[CALL1:%.*]] = tail call i8* @calloc(i64 [[CONV]], i64 1) #8
+; CHECK-NEXT:    [[CALL1:%.*]] = tail call i8* @calloc(i64 [[CONV]], i64 1) #7
 ; CHECK-NEXT:    [[BITFIELD:%.*]] = getelementptr inbounds i8, i8* [[CALL]], i64 8
 ; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[BITFIELD]] to i8**
 ; CHECK-NEXT:    store i8* [[CALL1]], i8** [[TMP1]], align 8

diff  --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-memintrinsics.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-memintrinsics.ll
index af45ec2fb710..4500c31477e1 100644
--- a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-memintrinsics.ll
+++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-memintrinsics.ll
@@ -126,7 +126,8 @@ define void @alloca_1(i1 %c) {
 ; CHECK-NEXT:    [[P:%.*]] = bitcast [32 x i32]* [[P_ALLOCA]] to i32*
 ; CHECK-NEXT:    [[ARRAYIDX0:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 1
 ; CHECK-NEXT:    [[P3:%.*]] = bitcast i32* [[ARRAYIDX0]] to i8*
-; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* align 4 [[P3]], i8 0, i64 28, i1 false)
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i8, i8* [[P3]], i64 4
+; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* align 4 [[TMP0]], i8 0, i64 24, i1 false)
 ; CHECK-NEXT:    br i1 [[C:%.*]], label [[BB1:%.*]], label [[BB2:%.*]]
 ; CHECK:       bb1:
 ; CHECK-NEXT:    br label [[BB3:%.*]]
@@ -162,7 +163,8 @@ define void @alloca_2(i1 %c) {
 ; CHECK-NEXT:    [[P:%.*]] = bitcast [32 x i32]* [[P_ALLOCA]] to i32*
 ; CHECK-NEXT:    [[ARRAYIDX0:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 1
 ; CHECK-NEXT:    [[P3:%.*]] = bitcast i32* [[ARRAYIDX0]] to i8*
-; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* align 4 [[P3]], i8 0, i64 28, i1 false)
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i8, i8* [[P3]], i64 4
+; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* align 4 [[TMP0]], i8 0, i64 24, i1 false)
 ; CHECK-NEXT:    br i1 [[C:%.*]], label [[BB1:%.*]], label [[BB2:%.*]]
 ; CHECK:       bb1:
 ; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 1

diff  --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-multipath.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-multipath.ll
index a5d7b24c8a81..4e61ade72c06 100644
--- a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-multipath.ll
+++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-multipath.ll
@@ -136,10 +136,11 @@ bb5:
 
 ; Tests where the pointer/object is *NOT* accessible after the function returns.
 
+; The store in the entry block can be eliminated, because it is overwritten
+; on all paths to the exit.
 define void @alloca_1(i1 %c1) {
 ; CHECK-LABEL: @alloca_1(
 ; CHECK-NEXT:    [[P:%.*]] = alloca i32
-; CHECK-NEXT:    store i32 1, i32* [[P]]
 ; CHECK-NEXT:    br i1 [[C1:%.*]], label [[BB1:%.*]], label [[BB2:%.*]]
 ; CHECK:       bb1:
 ; CHECK-NEXT:    store i32 0, i32* [[P]]
@@ -167,10 +168,11 @@ bb5:
   ret void
 }
 
+; The store in the entry block can be eliminated, because it is overwritten
+; on all paths to the exit.
 define void @alloca_2(i1 %c.1, i1 %c.2) {
 ; CHECK-LABEL: @alloca_2(
 ; CHECK-NEXT:    [[P:%.*]] = alloca i32
-; CHECK-NEXT:    store i32 1, i32* [[P]]
 ; CHECK-NEXT:    br i1 [[C_1:%.*]], label [[BB1:%.*]], label [[BB2:%.*]]
 ; CHECK:       bb1:
 ; CHECK-NEXT:    store i32 0, i32* [[P]]
@@ -211,6 +213,8 @@ bb5:
   ret void
 }
 
+; The store in the entry block cannot be eliminated. There's a path from the
+; first store to the read in bb5, where the location is not overwritten.
 define void @alloca_3(i1 %c1) {
 ; CHECK-LABEL: @alloca_3(
 ; CHECK-NEXT:    [[P:%.*]] = alloca i32
@@ -240,10 +244,12 @@ bb5:
   ret void
 }
 
+; The store in the entry block can be eliminated, because it is overwritten
+; before the use in bb1 and not read on other paths to the function exit. The
+; object cannot be accessed by the caller.
 define void @alloca_4(i1 %c1) {
 ; CHECK-LABEL: @alloca_4(
 ; CHECK-NEXT:    [[P:%.*]] = alloca i32
-; CHECK-NEXT:    store i32 1, i32* [[P]]
 ; CHECK-NEXT:    br i1 [[C1:%.*]], label [[BB1:%.*]], label [[BB2:%.*]]
 ; CHECK:       bb1:
 ; CHECK-NEXT:    store i32 0, i32* [[P]]

diff  --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-simple.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-simple.ll
index 060cb8c17bdf..b8913d0b2da9 100644
--- a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-simple.ll
+++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-simple.ll
@@ -175,7 +175,6 @@ bb3:
 define void @test11() {
 ; CHECK-LABEL: @test11(
 ; CHECK-NEXT:    [[P:%.*]] = alloca i32
-; CHECK-NEXT:    store i32 0, i32* [[P]]
 ; CHECK-NEXT:    br i1 true, label [[BB1:%.*]], label [[BB2:%.*]]
 ; CHECK:       bb1:
 ; CHECK-NEXT:    store i32 0, i32* [[P]]


        


More information about the llvm-commits mailing list