[llvm-branch-commits] [llvm] release/18.x: [DSE] Delay deleting non-memory-defs until end of DSE. (#83411) (PR #84227)

via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Wed Mar 6 12:18:02 PST 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-llvm-transforms

Author: None (llvmbot)

<details>
<summary>Changes</summary>

Backport 10f5e983a9e3162a569cbebeb32168716e391340

Requested by: @<!-- -->fhahn

---
Full diff: https://github.com/llvm/llvm-project/pull/84227.diff


2 Files Affected:

- (modified) llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp (+26-5) 
- (added) llvm/test/Transforms/DeadStoreElimination/batchaa-caching-new-pointers.ll (+189) 


``````````diff
diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
index 11a91bfbe5baff..340fba4fb9c5a2 100644
--- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -857,6 +857,9 @@ struct DSEState {
   // no longer be captured.
   bool ShouldIterateEndOfFunctionDSE;
 
+  /// Dead instructions to be removed at the end of DSE.
+  SmallVector<Instruction *> ToRemove;
+
   // Class contains self-reference, make sure it's not copied/moved.
   DSEState(const DSEState &) = delete;
   DSEState &operator=(const DSEState &) = delete;
@@ -1692,7 +1695,8 @@ struct DSEState {
     return {MaybeDeadAccess};
   }
 
-  // Delete dead memory defs
+  /// Delete dead memory defs and recursively add their operands to ToRemove if
+  /// they became dead.
   void deleteDeadInstruction(Instruction *SI) {
     MemorySSAUpdater Updater(&MSSA);
     SmallVector<Instruction *, 32> NowDeadInsts;
@@ -1708,8 +1712,11 @@ struct DSEState {
       salvageKnowledge(DeadInst);
 
       // Remove the Instruction from MSSA.
-      if (MemoryAccess *MA = MSSA.getMemoryAccess(DeadInst)) {
-        if (MemoryDef *MD = dyn_cast<MemoryDef>(MA)) {
+      MemoryAccess *MA = MSSA.getMemoryAccess(DeadInst);
+      bool IsMemDef = MA && isa<MemoryDef>(MA);
+      if (MA) {
+        if (IsMemDef) {
+          auto *MD = cast<MemoryDef>(MA);
           SkipStores.insert(MD);
           if (auto *SI = dyn_cast<StoreInst>(MD->getMemoryInst())) {
             if (SI->getValueOperand()->getType()->isPointerTy()) {
@@ -1730,13 +1737,21 @@ struct DSEState {
       // Remove its operands
       for (Use &O : DeadInst->operands())
         if (Instruction *OpI = dyn_cast<Instruction>(O)) {
-          O = nullptr;
+          O.set(PoisonValue::get(O->getType()));
           if (isInstructionTriviallyDead(OpI, &TLI))
             NowDeadInsts.push_back(OpI);
         }
 
       EI.removeInstruction(DeadInst);
-      DeadInst->eraseFromParent();
+      // Remove memory defs directly if they don't produce results, but only
+      // queue other dead instructions for later removal. They may have been
+      // used as memory locations that have been cached by BatchAA. Removing
+      // them here may lead to newly created instructions to be allocated at the
+      // same address, yielding stale cache entries.
+      if (IsMemDef && DeadInst->getType()->isVoidTy())
+        DeadInst->eraseFromParent();
+      else
+        ToRemove.push_back(DeadInst);
     }
   }
 
@@ -2233,6 +2248,12 @@ static bool eliminateDeadStores(Function &F, AliasAnalysis &AA, MemorySSA &MSSA,
 
   MadeChange |= State.eliminateRedundantStoresOfExistingValues();
   MadeChange |= State.eliminateDeadWritesAtEndOfFunction();
+
+  while (!State.ToRemove.empty()) {
+    Instruction *DeadInst = State.ToRemove.pop_back_val();
+    DeadInst->eraseFromParent();
+  }
+
   return MadeChange;
 }
 } // end anonymous namespace
diff --git a/llvm/test/Transforms/DeadStoreElimination/batchaa-caching-new-pointers.ll b/llvm/test/Transforms/DeadStoreElimination/batchaa-caching-new-pointers.ll
new file mode 100644
index 00000000000000..ee9bd6912e2ae4
--- /dev/null
+++ b/llvm/test/Transforms/DeadStoreElimination/batchaa-caching-new-pointers.ll
@@ -0,0 +1,189 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt -S -passes=dse < %s | FileCheck %s
+;
+; DSE kills `store i32 44, ptr %struct.byte.4, align 4` but should not kill
+; `call void @llvm.memset.p0.i64(...)`  because it has a clobber read:
+; `%ret = load ptr, ptr %struct.byte.8`
+
+
+%struct.type = type { ptr, ptr }
+
+define ptr @foo(ptr noundef %ptr) {
+; CHECK-LABEL: define ptr @foo(
+; CHECK-SAME: ptr noundef [[PTR:%.*]]) {
+; CHECK-NEXT:    [[STRUCT_ALLOCA:%.*]] = alloca [[STRUCT_TYPE:%.*]], align 8
+; CHECK-NEXT:    call void @llvm.lifetime.start.p0(i64 56, ptr nonnull [[STRUCT_ALLOCA]]) #[[ATTR6:[0-9]+]]
+; CHECK-NEXT:    [[STRUCT_BYTE_8:%.*]] = getelementptr inbounds i8, ptr [[STRUCT_ALLOCA]], i64 8
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[STRUCT_BYTE_8]], i64 4
+; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr noundef nonnull align 4 [[TMP1]], i8 42, i64 4, i1 false)
+; CHECK-NEXT:    store i32 43, ptr [[STRUCT_BYTE_8]], align 4
+; CHECK-NEXT:    [[RET:%.*]] = load ptr, ptr [[STRUCT_BYTE_8]], align 8
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 56, ptr nonnull [[STRUCT_ALLOCA]]) #[[ATTR6]]
+; CHECK-NEXT:    ret ptr [[RET]]
+;
+  %struct.alloca = alloca %struct.type, align 8
+  call void @llvm.lifetime.start.p0(i64 56, ptr nonnull %struct.alloca) nounwind
+  %struct.byte.8 = getelementptr inbounds i8, ptr %struct.alloca, i64 8
+  ; Set %struct.alloca[8, 16) to 42.
+  call void @llvm.memset.p0.i64(ptr noundef nonnull align 4 %struct.byte.8, i8 42, i64 8, i1 false)
+  ; Set %struct.alloca[8, 12) to 43.
+  store i32 43, ptr %struct.byte.8, align 4
+  ; Set %struct.alloca[4, 8) to 44.
+  %struct.byte.4 = getelementptr inbounds i8, ptr %struct.alloca, i64 4
+  store i32 44, ptr %struct.byte.4, align 4
+  ; Return %struct.alloca[8, 16).
+  %ret = load ptr, ptr %struct.byte.8
+  call void @llvm.lifetime.end.p0(i64 56, ptr nonnull %struct.alloca) nounwind
+  ret ptr %ret
+}
+
+; Set of tests based on @foo, but where the memset's operands cannot be erased
+; due to other uses. Instead, they contain a number of removable MemoryDefs;
+; with non-void types result types.
+
+define ptr @foo_with_removable_malloc() {
+; CHECK-LABEL: define ptr @foo_with_removable_malloc() {
+; CHECK-NEXT:    [[STRUCT_ALLOCA:%.*]] = alloca [[STRUCT_TYPE:%.*]], align 8
+; CHECK-NEXT:    call void @llvm.lifetime.start.p0(i64 56, ptr nonnull [[STRUCT_ALLOCA]]) #[[ATTR6]]
+; CHECK-NEXT:    [[STRUCT_BYTE_4:%.*]] = getelementptr inbounds i8, ptr [[STRUCT_ALLOCA]], i64 4
+; CHECK-NEXT:    [[STRUCT_BYTE_8:%.*]] = getelementptr inbounds i8, ptr [[STRUCT_ALLOCA]], i64 8
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[STRUCT_BYTE_8]], i64 4
+; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr noundef nonnull align 4 [[TMP1]], i8 42, i64 4, i1 false)
+; CHECK-NEXT:    store i32 43, ptr [[STRUCT_BYTE_8]], align 4
+; CHECK-NEXT:    [[RET:%.*]] = load ptr, ptr [[STRUCT_BYTE_8]], align 8
+; CHECK-NEXT:    call void @readnone(ptr [[STRUCT_BYTE_4]])
+; CHECK-NEXT:    call void @readnone(ptr [[STRUCT_BYTE_8]])
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 56, ptr nonnull [[STRUCT_ALLOCA]]) #[[ATTR6]]
+; CHECK-NEXT:    ret ptr [[RET]]
+;
+  %struct.alloca = alloca %struct.type, align 8
+  call void @llvm.lifetime.start.p0(i64 56, ptr nonnull %struct.alloca) nounwind
+  %struct.byte.4 = getelementptr inbounds i8, ptr %struct.alloca, i64 4
+  %struct.byte.8 = getelementptr inbounds i8, ptr %struct.alloca, i64 8
+
+  ; Set of removable memory deffs
+  %m2 = tail call ptr @malloc(i64 4)
+  %m1 = tail call ptr @malloc(i64 4)
+  store i32 0, ptr %struct.byte.8
+  store i32 0, ptr %struct.byte.8
+  store i32 123, ptr %m1
+  store i32 123, ptr %m2
+
+  ; Set %struct.alloca[8, 16) to 42.
+  call void @llvm.memset.p0.i64(ptr noundef nonnull align 4 %struct.byte.8, i8 42, i64 8, i1 false)
+  ; Set %struct.alloca[8, 12) to 43.
+  store i32 43, ptr %struct.byte.8, align 4
+  ; Set %struct.alloca[4, 8) to 44.
+  store i32 44, ptr %struct.byte.4, align 4
+  ; Return %struct.alloca[8, 16).
+  %ret = load ptr, ptr %struct.byte.8
+  call void @readnone(ptr %struct.byte.4);
+  call void @readnone(ptr %struct.byte.8);
+  call void @llvm.lifetime.end.p0(i64 56, ptr nonnull %struct.alloca) nounwind
+  ret ptr %ret
+}
+
+define ptr @foo_with_removable_malloc_free() {
+; CHECK-LABEL: define ptr @foo_with_removable_malloc_free() {
+; CHECK-NEXT:    [[STRUCT_ALLOCA:%.*]] = alloca [[STRUCT_TYPE:%.*]], align 8
+; CHECK-NEXT:    [[M1:%.*]] = tail call ptr @malloc(i64 4)
+; CHECK-NEXT:    call void @llvm.lifetime.start.p0(i64 56, ptr nonnull [[STRUCT_ALLOCA]]) #[[ATTR6]]
+; CHECK-NEXT:    [[STRUCT_BYTE_4:%.*]] = getelementptr inbounds i8, ptr [[STRUCT_ALLOCA]], i64 4
+; CHECK-NEXT:    [[STRUCT_BYTE_8:%.*]] = getelementptr inbounds i8, ptr [[STRUCT_ALLOCA]], i64 8
+; CHECK-NEXT:    [[M2:%.*]] = tail call ptr @malloc(i64 4)
+; CHECK-NEXT:    call void @free(ptr [[M1]])
+; CHECK-NEXT:    call void @free(ptr [[M2]])
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[STRUCT_BYTE_8]], i64 4
+; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr noundef nonnull align 4 [[TMP1]], i8 42, i64 4, i1 false)
+; CHECK-NEXT:    store i32 43, ptr [[STRUCT_BYTE_8]], align 4
+; CHECK-NEXT:    [[RET:%.*]] = load ptr, ptr [[STRUCT_BYTE_8]], align 8
+; CHECK-NEXT:    call void @readnone(ptr [[STRUCT_BYTE_4]])
+; CHECK-NEXT:    call void @readnone(ptr [[STRUCT_BYTE_8]])
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 56, ptr nonnull [[STRUCT_ALLOCA]]) #[[ATTR6]]
+; CHECK-NEXT:    ret ptr [[RET]]
+;
+  %struct.alloca = alloca %struct.type, align 8
+  %m1 = tail call ptr @malloc(i64 4)
+  call void @llvm.lifetime.start.p0(i64 56, ptr nonnull %struct.alloca) nounwind
+  %struct.byte.4 = getelementptr inbounds i8, ptr %struct.alloca, i64 4
+  %struct.byte.8 = getelementptr inbounds i8, ptr %struct.alloca, i64 8
+
+  store i32 0, ptr %struct.byte.4
+  store i32 0, ptr %struct.byte.8
+  %m2 = tail call ptr @malloc(i64 4)
+  store i32 123, ptr %m1
+  call void @free(ptr %m1);
+  store i32 123, ptr %m2
+  call void @free(ptr %m2);
+
+  ; Set %struct.alloca[8, 16) to 42.
+  call void @llvm.memset.p0.i64(ptr noundef nonnull align 4 %struct.byte.8, i8 42, i64 8, i1 false)
+  ; Set %struct.alloca[8, 12) to 43.
+  store i32 43, ptr %struct.byte.8, align 4
+  ; Set %struct.alloca[4, 8) to 44.
+  store i32 44, ptr %struct.byte.4, align 4
+  ; Return %struct.alloca[8, 16).
+  %ret = load ptr, ptr %struct.byte.8
+  call void @readnone(ptr %struct.byte.4);
+  call void @readnone(ptr %struct.byte.8);
+  call void @llvm.lifetime.end.p0(i64 56, ptr nonnull %struct.alloca) nounwind
+  ret ptr %ret
+}
+
+define ptr @foo_with_malloc_to_calloc() {
+; CHECK-LABEL: define ptr @foo_with_malloc_to_calloc() {
+; CHECK-NEXT:    [[STRUCT_ALLOCA:%.*]] = alloca [[STRUCT_TYPE:%.*]], align 8
+; CHECK-NEXT:    call void @llvm.lifetime.start.p0(i64 56, ptr nonnull [[STRUCT_ALLOCA]]) #[[ATTR6]]
+; CHECK-NEXT:    [[STRUCT_BYTE_8:%.*]] = getelementptr inbounds i8, ptr [[STRUCT_ALLOCA]], i64 8
+; CHECK-NEXT:    [[STRUCT_BYTE_4:%.*]] = getelementptr inbounds i8, ptr [[STRUCT_ALLOCA]], i64 4
+; CHECK-NEXT:    [[CALLOC1:%.*]] = call ptr @calloc(i64 1, i64 4)
+; CHECK-NEXT:    [[CALLOC:%.*]] = call ptr @calloc(i64 1, i64 4)
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[STRUCT_BYTE_8]], i64 4
+; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr noundef nonnull align 4 [[TMP1]], i8 42, i64 4, i1 false)
+; CHECK-NEXT:    store i32 43, ptr [[STRUCT_BYTE_8]], align 4
+; CHECK-NEXT:    [[RET:%.*]] = load ptr, ptr [[STRUCT_BYTE_8]], align 8
+; CHECK-NEXT:    call void @readnone(ptr [[STRUCT_BYTE_4]])
+; CHECK-NEXT:    call void @readnone(ptr [[STRUCT_BYTE_8]])
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 56, ptr nonnull [[STRUCT_ALLOCA]]) #[[ATTR6]]
+; CHECK-NEXT:    call void @use(ptr [[CALLOC1]])
+; CHECK-NEXT:    call void @use(ptr [[CALLOC]])
+; CHECK-NEXT:    ret ptr [[RET]]
+;
+  %struct.alloca = alloca %struct.type, align 8
+  call void @llvm.lifetime.start.p0(i64 56, ptr nonnull %struct.alloca) nounwind
+  %struct.byte.8 = getelementptr inbounds i8, ptr %struct.alloca, i64 8
+  %struct.byte.4 = getelementptr inbounds i8, ptr %struct.alloca, i64 4
+
+  ; Set of removable memory deffs
+  %m1 = tail call ptr @malloc(i64 4)
+  %m2 = tail call ptr @malloc(i64 4)
+  store i32 0, ptr %struct.byte.4
+  store i32 0, ptr %struct.byte.8
+  call void @llvm.memset.p0.i64(ptr noundef nonnull align 4 %m2, i8 0, i64 4, i1 false)
+  call void @llvm.memset.p0.i64(ptr noundef nonnull align 4 %m1, i8 0, i64 4, i1 false)
+
+  ; Set %struct.alloca[8, 16) to 42.
+  call void @llvm.memset.p0.i64(ptr noundef nonnull align 4 %struct.byte.8, i8 42, i64 8, i1 false)
+  ; Set %struct.alloca[8, 12) to 43.
+  store i32 43, ptr %struct.byte.8, align 4
+  ; Set %struct.alloca[4, 8) to 44.
+  store i32 44, ptr %struct.byte.4, align 4
+  ; Return %struct.alloca[8, 16).
+  %ret = load ptr, ptr %struct.byte.8
+  call void @readnone(ptr %struct.byte.4);
+  call void @readnone(ptr %struct.byte.8);
+  call void @llvm.lifetime.end.p0(i64 56, ptr nonnull %struct.alloca) nounwind
+  call void @use(ptr %m1)
+  call void @use(ptr %m2)
+  ret ptr %ret
+}
+
+declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg)
+declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture)
+declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture)
+
+declare noalias ptr @malloc(i64) willreturn allockind("alloc,uninitialized") "alloc-family"="malloc"
+declare void @readnone(ptr) readnone nounwind
+declare void @free(ptr nocapture) allockind("free") "alloc-family"="malloc"
+
+declare void @use(ptr)

``````````

</details>


https://github.com/llvm/llvm-project/pull/84227


More information about the llvm-branch-commits mailing list