[llvm-branch-commits] [llvm] 1f11450 - [DSE] Use correct memory location for read clobber check

Nikita Popov via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Fri Dec 18 11:31:14 PST 2020


Author: Nikita Popov
Date: 2020-12-18T20:26:53+01:00
New Revision: 1f1145006b32533484c9ebc0f45e241a02fe6c8b

URL: https://github.com/llvm/llvm-project/commit/1f1145006b32533484c9ebc0f45e241a02fe6c8b
DIFF: https://github.com/llvm/llvm-project/commit/1f1145006b32533484c9ebc0f45e241a02fe6c8b.diff

LOG: [DSE] Use correct memory location for read clobber check

MSSA DSE starts at a killing store, finds an earlier store and
then checks that the earlier store is not read along any paths
(without being killed first). However, it uses the memory location
of the killing store for that, not the earlier store that we're
attempting to eliminate.

This has a number of problems:

* Mismatches between what BasicAA considers aliasing and what DSE
  considers an overwrite (even though both are correct in isolation)
  can result in miscompiles. This is PR48279, which D92045 tries to
  fix in a different way. The problem is that we're using a location
  from a store that is potentially not executed and thus may be UB,
  in which case analysis results can be arbitrary.
* Metadata on the killing store may be used to determine aliasing,
  but there is no guarantee that the metadata is valid, as the specific
  killing store may not be executed. Using the metadata on the earlier
  store is valid (it is the store we're removing, so on any execution
  where its removal may be observed, it must be executed).
* The location is imprecise. For full overwrites the killing store
  will always have a location that is larger or equal than the earlier
  access location, so it's beneficial to use the earlier access
  location. This is not the case for partial overwrites, in which
  case either location might be smaller. There is some room for
  improvement here.

Using the earlier access location means that we can no longer cache
which accesses are read for a given killing store, as we may be
querying different locations. However, it turns out that simply
dropping the cache has no notable impact on compile-time.

Differential Revision: https://reviews.llvm.org/D93523

Added: 
    

Modified: 
    llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
    llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-memintrinsics.ll
    llvm/test/Transforms/DeadStoreElimination/MSSA/out-of-bounds-stores.ll
    llvm/test/Transforms/DeadStoreElimination/MSSA/overlap.ll
    llvm/test/Transforms/DeadStoreElimination/MSSA/scoped-noalias.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
index 6cc1b43c05e8..1a1d0c01f77e 100644
--- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -1634,18 +1634,6 @@ struct DSEState {
   /// basic block.
   DenseMap<BasicBlock *, InstOverlapIntervalsTy> IOLs;
 
-  struct CheckCache {
-    SmallPtrSet<MemoryAccess *, 16> KnownNoReads;
-    SmallPtrSet<MemoryAccess *, 16> KnownReads;
-
-    bool isKnownNoRead(MemoryAccess *A) const {
-      return KnownNoReads.find(A) != KnownNoReads.end();
-    }
-    bool isKnownRead(MemoryAccess *A) const {
-      return KnownReads.find(A) != KnownReads.end();
-    }
-  };
-
   DSEState(Function &F, AliasAnalysis &AA, MemorySSA &MSSA, DominatorTree &DT,
            PostDominatorTree &PDT, const TargetLibraryInfo &TLI)
       : F(F), AA(AA), BatchAA(AA), MSSA(MSSA), DT(DT), PDT(PDT), TLI(TLI),
@@ -1940,9 +1928,8 @@ struct DSEState {
   Optional<MemoryAccess *>
   getDomMemoryDef(MemoryDef *KillingDef, MemoryAccess *StartAccess,
                   const MemoryLocation &DefLoc, const Value *DefUO,
-                  CheckCache &Cache, unsigned &ScanLimit,
-                  unsigned &WalkerStepLimit, bool IsMemTerm,
-                  unsigned &PartialLimit) {
+                  unsigned &ScanLimit, unsigned &WalkerStepLimit,
+                  bool IsMemTerm, unsigned &PartialLimit) {
     if (ScanLimit == 0 || WalkerStepLimit == 0) {
       LLVM_DEBUG(dbgs() << "\n    ...  hit scan limit\n");
       return None;
@@ -1954,6 +1941,7 @@ struct DSEState {
     LLVM_DEBUG(dbgs() << "  trying to get dominating access\n");
 
     // Find the next clobbering Mod access for DefLoc, starting at StartAccess.
+    Optional<MemoryLocation> CurrentLoc;
     do {
       StepAgain = false;
       LLVM_DEBUG({
@@ -2017,12 +2005,8 @@ struct DSEState {
       // clobber, bail out, as the path is not profitable. We skip this check
       // for intrinsic calls, because the code knows how to handle memcpy
       // intrinsics.
-      if (!isa<IntrinsicInst>(CurrentI) &&
-          (Cache.KnownReads.contains(Current) ||
-           isReadClobber(DefLoc, CurrentI))) {
-        Cache.KnownReads.insert(Current);
+      if (!isa<IntrinsicInst>(CurrentI) && isReadClobber(DefLoc, CurrentI))
         return None;
-      }
 
       // Quick check if there are direct uses that are read-clobbers.
       if (any_of(Current->uses(), [this, &DefLoc, StartAccess](Use &U) {
@@ -2031,7 +2015,6 @@ struct DSEState {
                      isReadClobber(DefLoc, UseOrDef->getMemoryInst());
             return false;
           })) {
-        Cache.KnownReads.insert(Current);
         LLVM_DEBUG(dbgs() << "   ...  found a read clobber\n");
         return None;
       }
@@ -2045,13 +2028,24 @@ struct DSEState {
       }
 
       // If Current does not have an analyzable write location, skip it
-      auto CurrentLoc = getLocForWriteEx(CurrentI);
+      CurrentLoc = getLocForWriteEx(CurrentI);
       if (!CurrentLoc) {
         StepAgain = true;
         Current = CurrentDef->getDefiningAccess();
         continue;
       }
 
+      // AliasAnalysis does not account for loops. Limit elimination to
+      // candidates for which we can guarantee they always store to the same
+      // memory location and not multiple locations in a loop.
+      if (Current->getBlock() != KillingDef->getBlock() &&
+          !IsGuaranteedLoopInvariant(const_cast<Value *>(CurrentLoc->Ptr))) {
+        StepAgain = true;
+        Current = CurrentDef->getDefiningAccess();
+        WalkerStepLimit -= 1;
+        continue;
+      }
+
       if (IsMemTerm) {
         // If the killing def is a memory terminator (e.g. lifetime.end), check
         // the next candidate if the current Current does not write the same
@@ -2062,17 +2056,6 @@ struct DSEState {
         }
         continue;
       } else {
-        // AliasAnalysis does not account for loops. Limit elimination to
-        // candidates for which we can guarantee they always store to the same
-        // memory location and not multiple locations in a loop.
-        if (Current->getBlock() != KillingDef->getBlock() &&
-            !IsGuaranteedLoopInvariant(const_cast<Value *>(CurrentLoc->Ptr))) {
-          StepAgain = true;
-          Current = CurrentDef->getDefiningAccess();
-          WalkerStepLimit -= 1;
-          continue;
-        }
-
         int64_t InstWriteOffset, DepWriteOffset;
         auto OR = isOverwrite(KillingI, CurrentI, DefLoc, *CurrentLoc, DL, TLI,
                               DepWriteOffset, InstWriteOffset, BatchAA, &F);
@@ -2133,18 +2116,6 @@ struct DSEState {
       }
       --ScanLimit;
       NumDomMemDefChecks++;
-
-      // Check if we already visited this access.
-      if (Cache.isKnownNoRead(UseAccess)) {
-        LLVM_DEBUG(dbgs() << " ... skip, discovered that " << *UseAccess
-                          << " is safe earlier.\n");
-        continue;
-      }
-      if (Cache.isKnownRead(UseAccess)) {
-        LLVM_DEBUG(dbgs() << " ... bail out, discovered that " << *UseAccess
-                          << " has a read-clobber earlier.\n");
-        return None;
-      }
       KnownNoReads.insert(UseAccess);
 
       if (isa<MemoryPhi>(UseAccess)) {
@@ -2172,7 +2143,7 @@ struct DSEState {
 
       // A memory terminator kills all preceeding MemoryDefs and all succeeding
       // MemoryAccesses. We do not have to check it's users.
-      if (isMemTerminator(DefLoc, KillingI, UseInst)) {
+      if (isMemTerminator(*CurrentLoc, EarlierMemInst, UseInst)) {
         LLVM_DEBUG(
             dbgs()
             << " ... skipping, memterminator invalidates following accesses\n");
@@ -2187,19 +2158,13 @@ struct DSEState {
 
       if (UseInst->mayThrow() && !isInvisibleToCallerBeforeRet(DefUO)) {
         LLVM_DEBUG(dbgs() << "  ... found throwing instruction\n");
-        Cache.KnownReads.insert(UseAccess);
-        Cache.KnownReads.insert(StartAccess);
-        Cache.KnownReads.insert(EarlierAccess);
         return None;
       }
 
       // Uses which may read the original MemoryDef mean we cannot eliminate the
       // original MD. Stop walk.
-      if (isReadClobber(DefLoc, UseInst)) {
+      if (isReadClobber(*CurrentLoc, UseInst)) {
         LLVM_DEBUG(dbgs() << "    ... found read clobber\n");
-        Cache.KnownReads.insert(UseAccess);
-        Cache.KnownReads.insert(StartAccess);
-        Cache.KnownReads.insert(EarlierAccess);
         return None;
       }
 
@@ -2223,7 +2188,7 @@ struct DSEState {
       //   3 = Def(1)   ; <---- Current  (3, 2) = NoAlias, (3,1) = MayAlias,
       //                  stores [0,1]
       if (MemoryDef *UseDef = dyn_cast<MemoryDef>(UseAccess)) {
-        if (isCompleteOverwrite(DefLoc, KillingI, UseInst)) {
+        if (isCompleteOverwrite(*CurrentLoc, EarlierMemInst, UseInst)) {
           if (!isInvisibleToCallerAfterRet(DefUO) &&
               UseAccess != EarlierAccess) {
             BasicBlock *MaybeKillingBlock = UseInst->getParent();
@@ -2311,7 +2276,6 @@ struct DSEState {
 
     // No aliasing MemoryUses of EarlierAccess found, EarlierAccess is
     // potentially dead.
-    Cache.KnownNoReads.insert(KnownNoReads.begin(), KnownNoReads.end());
     return {EarlierAccess};
   }
 
@@ -2550,7 +2514,6 @@ bool eliminateDeadStoresMemorySSA(Function &F, AliasAnalysis &AA,
 
     bool Shortend = false;
     bool IsMemTerm = State.isMemTerminatorInst(SI);
-    DSEState::CheckCache Cache;
     // Check if MemoryAccesses in the worklist are killed by KillingDef.
     for (unsigned I = 0; I < ToCheck.size(); I++) {
       Current = ToCheck[I];
@@ -2558,8 +2521,8 @@ bool eliminateDeadStoresMemorySSA(Function &F, AliasAnalysis &AA,
         continue;
 
       Optional<MemoryAccess *> Next = State.getDomMemoryDef(
-          KillingDef, Current, SILoc, SILocUnd, Cache, ScanLimit,
-          WalkerStepLimit, IsMemTerm, PartialLimit);
+          KillingDef, Current, SILoc, SILocUnd, ScanLimit, WalkerStepLimit,
+          IsMemTerm, PartialLimit);
 
       if (!Next) {
         LLVM_DEBUG(dbgs() << "  finished walk\n");

diff  --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-memintrinsics.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-memintrinsics.ll
index b22f5b60d758..fe5087646184 100644
--- a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-memintrinsics.ll
+++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-memintrinsics.ll
@@ -41,13 +41,13 @@ bb3:
 }
 
 ; Post-dominating store.
+; TODO: The memset can be shortened.
 define void @accessible_after_return_2(i32* noalias %P, i1 %c) {
 ; CHECK-LABEL: @accessible_after_return_2(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[ARRAYIDX0:%.*]] = getelementptr inbounds i32, i32* [[P:%.*]], i64 1
 ; CHECK-NEXT:    [[P3:%.*]] = bitcast i32* [[ARRAYIDX0]] to i8*
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i8, i8* [[P3]], i64 4
-; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* align 4 [[TMP0]], i8 0, i64 24, i1 false)
+; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* align 4 [[P3]], i8 0, i64 28, i1 false)
 ; CHECK-NEXT:    br i1 [[C:%.*]], label [[BB1:%.*]], label [[BB2:%.*]]
 ; CHECK:       bb1:
 ; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 1

diff  --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/out-of-bounds-stores.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/out-of-bounds-stores.ll
index 3a16a62dddd4..925ab4e99597 100644
--- a/llvm/test/Transforms/DeadStoreElimination/MSSA/out-of-bounds-stores.ll
+++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/out-of-bounds-stores.ll
@@ -36,6 +36,8 @@ define i32 @test_out_of_bounds_store_nonlocal(i1 %c) {
 ; CHECK-NEXT:    [[D:%.*]] = alloca [1 x i32], align 4
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [1 x i32], [1 x i32]* [[D]], i64 0, i64 0
+; CHECK-NEXT:    store i32 10, i32* [[ARRAYIDX]], align 4
 ; CHECK-NEXT:    br label [[FOR_INC:%.*]]
 ; CHECK:       for.inc:
 ; CHECK-NEXT:    br i1 [[C:%.*]], label [[FOR_BODY_1:%.*]], label [[FOR_END:%.*]]

diff  --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/overlap.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/overlap.ll
index 05326de07382..b4ed6cc2ea81 100644
--- a/llvm/test/Transforms/DeadStoreElimination/MSSA/overlap.ll
+++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/overlap.ll
@@ -67,13 +67,11 @@ end:
   ret void
 }
 
-; TODO: The store to %a0 is dead, because only %a1 is read later.
+; The store to %a0 is dead, because only %a1 is read later.
 define void @test3(i1 %c) {
 ; CHECK-LABEL: @test3(
 ; CHECK-NEXT:    [[A:%.*]] = alloca [2 x i8], align 1
-; CHECK-NEXT:    [[A0:%.*]] = getelementptr [2 x i8], [2 x i8]* [[A]], i32 0, i32 0
 ; CHECK-NEXT:    [[A1:%.*]] = getelementptr [2 x i8], [2 x i8]* [[A]], i32 0, i32 1
-; CHECK-NEXT:    store i8 1, i8* [[A0]], align 1
 ; CHECK-NEXT:    br i1 [[C:%.*]], label [[IF:%.*]], label [[ELSE:%.*]]
 ; CHECK:       if:
 ; CHECK-NEXT:    store [2 x i8] zeroinitializer, [2 x i8]* [[A]], align 1
@@ -102,10 +100,8 @@ else:
 define void @test4(i1 %c) {
 ; CHECK-LABEL: @test4(
 ; CHECK-NEXT:    [[A:%.*]] = alloca [2 x i8], align 1
-; CHECK-NEXT:    [[A0:%.*]] = getelementptr [2 x i8], [2 x i8]* [[A]], i32 0, i32 0
 ; CHECK-NEXT:    [[A1:%.*]] = getelementptr [2 x i8], [2 x i8]* [[A]], i32 0, i32 1
 ; CHECK-NEXT:    store i8 1, i8* [[A1]], align 1
-; CHECK-NEXT:    store i8 1, i8* [[A0]], align 1
 ; CHECK-NEXT:    br i1 [[C:%.*]], label [[IF:%.*]], label [[ELSE:%.*]]
 ; CHECK:       if:
 ; CHECK-NEXT:    store [2 x i8] zeroinitializer, [2 x i8]* [[A]], align 1

diff  --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/scoped-noalias.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/scoped-noalias.ll
index b2e626250817..89870738a08e 100644
--- a/llvm/test/Transforms/DeadStoreElimination/MSSA/scoped-noalias.ll
+++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/scoped-noalias.ll
@@ -4,13 +4,13 @@
 ; Assume that %p1 != %p2 if and only if %c is true. In that case the noalias
 ; metadata is correct, but the first store cannot be eliminated, as it may be
 ; read-clobbered by the load.
-; TODO The store is incorrectly eliminated.
 define void @test(i1 %c, i8* %p1, i8* %p2) {
 ; CHECK-LABEL: @test(
+; CHECK-NEXT:    store i8 0, i8* [[P1:%.*]], align 1
 ; CHECK-NEXT:    [[TMP1:%.*]] = load i8, i8* [[P2:%.*]], align 1, !alias.scope !0
 ; CHECK-NEXT:    br i1 [[C:%.*]], label [[IF:%.*]], label [[ELSE:%.*]]
 ; CHECK:       if:
-; CHECK-NEXT:    store i8 1, i8* [[P1:%.*]], align 1, !noalias !0
+; CHECK-NEXT:    store i8 1, i8* [[P1]], align 1, !noalias !0
 ; CHECK-NEXT:    ret void
 ; CHECK:       else:
 ; CHECK-NEXT:    store i8 2, i8* [[P1]], align 1


        


More information about the llvm-branch-commits mailing list