[llvm-branch-commits] [llvm] release/18.x: [DSE] Delay deleting non-memory-defs until end of DSE. (#83411) (PR #84227)
via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Wed Mar 6 12:18:02 PST 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-transforms
Author: None (llvmbot)
<details>
<summary>Changes</summary>
Backport 10f5e983a9e3162a569cbebeb32168716e391340
Requested by: @<!-- -->fhahn
---
Full diff: https://github.com/llvm/llvm-project/pull/84227.diff
2 Files Affected:
- (modified) llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp (+26-5)
- (added) llvm/test/Transforms/DeadStoreElimination/batchaa-caching-new-pointers.ll (+189)
``````````diff
diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
index 11a91bfbe5baff..340fba4fb9c5a2 100644
--- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -857,6 +857,9 @@ struct DSEState {
// no longer be captured.
bool ShouldIterateEndOfFunctionDSE;
+ /// Dead instructions to be removed at the end of DSE.
+ SmallVector<Instruction *> ToRemove;
+
// Class contains self-reference, make sure it's not copied/moved.
DSEState(const DSEState &) = delete;
DSEState &operator=(const DSEState &) = delete;
@@ -1692,7 +1695,8 @@ struct DSEState {
return {MaybeDeadAccess};
}
- // Delete dead memory defs
+ /// Delete dead memory defs and recursively add their operands to ToRemove if
+ /// they became dead.
void deleteDeadInstruction(Instruction *SI) {
MemorySSAUpdater Updater(&MSSA);
SmallVector<Instruction *, 32> NowDeadInsts;
@@ -1708,8 +1712,11 @@ struct DSEState {
salvageKnowledge(DeadInst);
// Remove the Instruction from MSSA.
- if (MemoryAccess *MA = MSSA.getMemoryAccess(DeadInst)) {
- if (MemoryDef *MD = dyn_cast<MemoryDef>(MA)) {
+ MemoryAccess *MA = MSSA.getMemoryAccess(DeadInst);
+ bool IsMemDef = MA && isa<MemoryDef>(MA);
+ if (MA) {
+ if (IsMemDef) {
+ auto *MD = cast<MemoryDef>(MA);
SkipStores.insert(MD);
if (auto *SI = dyn_cast<StoreInst>(MD->getMemoryInst())) {
if (SI->getValueOperand()->getType()->isPointerTy()) {
@@ -1730,13 +1737,21 @@ struct DSEState {
// Remove its operands
for (Use &O : DeadInst->operands())
if (Instruction *OpI = dyn_cast<Instruction>(O)) {
- O = nullptr;
+ O.set(PoisonValue::get(O->getType()));
if (isInstructionTriviallyDead(OpI, &TLI))
NowDeadInsts.push_back(OpI);
}
EI.removeInstruction(DeadInst);
- DeadInst->eraseFromParent();
+ // Remove memory defs directly if they don't produce results, but only
+ // queue other dead instructions for later removal. They may have been
+ // used as memory locations that have been cached by BatchAA. Removing
+ // them here may lead to newly created instructions to be allocated at the
+ // same address, yielding stale cache entries.
+ if (IsMemDef && DeadInst->getType()->isVoidTy())
+ DeadInst->eraseFromParent();
+ else
+ ToRemove.push_back(DeadInst);
}
}
@@ -2233,6 +2248,12 @@ static bool eliminateDeadStores(Function &F, AliasAnalysis &AA, MemorySSA &MSSA,
MadeChange |= State.eliminateRedundantStoresOfExistingValues();
MadeChange |= State.eliminateDeadWritesAtEndOfFunction();
+
+ while (!State.ToRemove.empty()) {
+ Instruction *DeadInst = State.ToRemove.pop_back_val();
+ DeadInst->eraseFromParent();
+ }
+
return MadeChange;
}
} // end anonymous namespace
diff --git a/llvm/test/Transforms/DeadStoreElimination/batchaa-caching-new-pointers.ll b/llvm/test/Transforms/DeadStoreElimination/batchaa-caching-new-pointers.ll
new file mode 100644
index 00000000000000..ee9bd6912e2ae4
--- /dev/null
+++ b/llvm/test/Transforms/DeadStoreElimination/batchaa-caching-new-pointers.ll
@@ -0,0 +1,189 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt -S -passes=dse < %s | FileCheck %s
+;
+; DSE kills `store i32 44, ptr %struct.byte.4, align 4` but should not kill
+; `call void @llvm.memset.p0.i64(...)` because it has a clobber read:
+; `%ret = load ptr, ptr %struct.byte.8`
+
+
+%struct.type = type { ptr, ptr }
+
+define ptr @foo(ptr noundef %ptr) {
+; CHECK-LABEL: define ptr @foo(
+; CHECK-SAME: ptr noundef [[PTR:%.*]]) {
+; CHECK-NEXT: [[STRUCT_ALLOCA:%.*]] = alloca [[STRUCT_TYPE:%.*]], align 8
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 56, ptr nonnull [[STRUCT_ALLOCA]]) #[[ATTR6:[0-9]+]]
+; CHECK-NEXT: [[STRUCT_BYTE_8:%.*]] = getelementptr inbounds i8, ptr [[STRUCT_ALLOCA]], i64 8
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[STRUCT_BYTE_8]], i64 4
+; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr noundef nonnull align 4 [[TMP1]], i8 42, i64 4, i1 false)
+; CHECK-NEXT: store i32 43, ptr [[STRUCT_BYTE_8]], align 4
+; CHECK-NEXT: [[RET:%.*]] = load ptr, ptr [[STRUCT_BYTE_8]], align 8
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 56, ptr nonnull [[STRUCT_ALLOCA]]) #[[ATTR6]]
+; CHECK-NEXT: ret ptr [[RET]]
+;
+ %struct.alloca = alloca %struct.type, align 8
+ call void @llvm.lifetime.start.p0(i64 56, ptr nonnull %struct.alloca) nounwind
+ %struct.byte.8 = getelementptr inbounds i8, ptr %struct.alloca, i64 8
+ ; Set %struct.alloca[8, 16) to 42.
+ call void @llvm.memset.p0.i64(ptr noundef nonnull align 4 %struct.byte.8, i8 42, i64 8, i1 false)
+ ; Set %struct.alloca[8, 12) to 43.
+ store i32 43, ptr %struct.byte.8, align 4
+ ; Set %struct.alloca[4, 8) to 44.
+ %struct.byte.4 = getelementptr inbounds i8, ptr %struct.alloca, i64 4
+ store i32 44, ptr %struct.byte.4, align 4
+ ; Return %struct.alloca[8, 16).
+ %ret = load ptr, ptr %struct.byte.8
+ call void @llvm.lifetime.end.p0(i64 56, ptr nonnull %struct.alloca) nounwind
+ ret ptr %ret
+}
+
+; Set of tests based on @foo, but where the memset's operands cannot be erased
+; due to other uses. Instead, they contain a number of removable MemoryDefs;
+; with non-void types result types.
+
+define ptr @foo_with_removable_malloc() {
+; CHECK-LABEL: define ptr @foo_with_removable_malloc() {
+; CHECK-NEXT: [[STRUCT_ALLOCA:%.*]] = alloca [[STRUCT_TYPE:%.*]], align 8
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 56, ptr nonnull [[STRUCT_ALLOCA]]) #[[ATTR6]]
+; CHECK-NEXT: [[STRUCT_BYTE_4:%.*]] = getelementptr inbounds i8, ptr [[STRUCT_ALLOCA]], i64 4
+; CHECK-NEXT: [[STRUCT_BYTE_8:%.*]] = getelementptr inbounds i8, ptr [[STRUCT_ALLOCA]], i64 8
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[STRUCT_BYTE_8]], i64 4
+; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr noundef nonnull align 4 [[TMP1]], i8 42, i64 4, i1 false)
+; CHECK-NEXT: store i32 43, ptr [[STRUCT_BYTE_8]], align 4
+; CHECK-NEXT: [[RET:%.*]] = load ptr, ptr [[STRUCT_BYTE_8]], align 8
+; CHECK-NEXT: call void @readnone(ptr [[STRUCT_BYTE_4]])
+; CHECK-NEXT: call void @readnone(ptr [[STRUCT_BYTE_8]])
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 56, ptr nonnull [[STRUCT_ALLOCA]]) #[[ATTR6]]
+; CHECK-NEXT: ret ptr [[RET]]
+;
+ %struct.alloca = alloca %struct.type, align 8
+ call void @llvm.lifetime.start.p0(i64 56, ptr nonnull %struct.alloca) nounwind
+ %struct.byte.4 = getelementptr inbounds i8, ptr %struct.alloca, i64 4
+ %struct.byte.8 = getelementptr inbounds i8, ptr %struct.alloca, i64 8
+
+ ; Set of removable memory deffs
+ %m2 = tail call ptr @malloc(i64 4)
+ %m1 = tail call ptr @malloc(i64 4)
+ store i32 0, ptr %struct.byte.8
+ store i32 0, ptr %struct.byte.8
+ store i32 123, ptr %m1
+ store i32 123, ptr %m2
+
+ ; Set %struct.alloca[8, 16) to 42.
+ call void @llvm.memset.p0.i64(ptr noundef nonnull align 4 %struct.byte.8, i8 42, i64 8, i1 false)
+ ; Set %struct.alloca[8, 12) to 43.
+ store i32 43, ptr %struct.byte.8, align 4
+ ; Set %struct.alloca[4, 8) to 44.
+ store i32 44, ptr %struct.byte.4, align 4
+ ; Return %struct.alloca[8, 16).
+ %ret = load ptr, ptr %struct.byte.8
+ call void @readnone(ptr %struct.byte.4);
+ call void @readnone(ptr %struct.byte.8);
+ call void @llvm.lifetime.end.p0(i64 56, ptr nonnull %struct.alloca) nounwind
+ ret ptr %ret
+}
+
+define ptr @foo_with_removable_malloc_free() {
+; CHECK-LABEL: define ptr @foo_with_removable_malloc_free() {
+; CHECK-NEXT: [[STRUCT_ALLOCA:%.*]] = alloca [[STRUCT_TYPE:%.*]], align 8
+; CHECK-NEXT: [[M1:%.*]] = tail call ptr @malloc(i64 4)
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 56, ptr nonnull [[STRUCT_ALLOCA]]) #[[ATTR6]]
+; CHECK-NEXT: [[STRUCT_BYTE_4:%.*]] = getelementptr inbounds i8, ptr [[STRUCT_ALLOCA]], i64 4
+; CHECK-NEXT: [[STRUCT_BYTE_8:%.*]] = getelementptr inbounds i8, ptr [[STRUCT_ALLOCA]], i64 8
+; CHECK-NEXT: [[M2:%.*]] = tail call ptr @malloc(i64 4)
+; CHECK-NEXT: call void @free(ptr [[M1]])
+; CHECK-NEXT: call void @free(ptr [[M2]])
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[STRUCT_BYTE_8]], i64 4
+; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr noundef nonnull align 4 [[TMP1]], i8 42, i64 4, i1 false)
+; CHECK-NEXT: store i32 43, ptr [[STRUCT_BYTE_8]], align 4
+; CHECK-NEXT: [[RET:%.*]] = load ptr, ptr [[STRUCT_BYTE_8]], align 8
+; CHECK-NEXT: call void @readnone(ptr [[STRUCT_BYTE_4]])
+; CHECK-NEXT: call void @readnone(ptr [[STRUCT_BYTE_8]])
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 56, ptr nonnull [[STRUCT_ALLOCA]]) #[[ATTR6]]
+; CHECK-NEXT: ret ptr [[RET]]
+;
+ %struct.alloca = alloca %struct.type, align 8
+ %m1 = tail call ptr @malloc(i64 4)
+ call void @llvm.lifetime.start.p0(i64 56, ptr nonnull %struct.alloca) nounwind
+ %struct.byte.4 = getelementptr inbounds i8, ptr %struct.alloca, i64 4
+ %struct.byte.8 = getelementptr inbounds i8, ptr %struct.alloca, i64 8
+
+ store i32 0, ptr %struct.byte.4
+ store i32 0, ptr %struct.byte.8
+ %m2 = tail call ptr @malloc(i64 4)
+ store i32 123, ptr %m1
+ call void @free(ptr %m1);
+ store i32 123, ptr %m2
+ call void @free(ptr %m2);
+
+ ; Set %struct.alloca[8, 16) to 42.
+ call void @llvm.memset.p0.i64(ptr noundef nonnull align 4 %struct.byte.8, i8 42, i64 8, i1 false)
+ ; Set %struct.alloca[8, 12) to 43.
+ store i32 43, ptr %struct.byte.8, align 4
+ ; Set %struct.alloca[4, 8) to 44.
+ store i32 44, ptr %struct.byte.4, align 4
+ ; Return %struct.alloca[8, 16).
+ %ret = load ptr, ptr %struct.byte.8
+ call void @readnone(ptr %struct.byte.4);
+ call void @readnone(ptr %struct.byte.8);
+ call void @llvm.lifetime.end.p0(i64 56, ptr nonnull %struct.alloca) nounwind
+ ret ptr %ret
+}
+
+define ptr @foo_with_malloc_to_calloc() {
+; CHECK-LABEL: define ptr @foo_with_malloc_to_calloc() {
+; CHECK-NEXT: [[STRUCT_ALLOCA:%.*]] = alloca [[STRUCT_TYPE:%.*]], align 8
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 56, ptr nonnull [[STRUCT_ALLOCA]]) #[[ATTR6]]
+; CHECK-NEXT: [[STRUCT_BYTE_8:%.*]] = getelementptr inbounds i8, ptr [[STRUCT_ALLOCA]], i64 8
+; CHECK-NEXT: [[STRUCT_BYTE_4:%.*]] = getelementptr inbounds i8, ptr [[STRUCT_ALLOCA]], i64 4
+; CHECK-NEXT: [[CALLOC1:%.*]] = call ptr @calloc(i64 1, i64 4)
+; CHECK-NEXT: [[CALLOC:%.*]] = call ptr @calloc(i64 1, i64 4)
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[STRUCT_BYTE_8]], i64 4
+; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr noundef nonnull align 4 [[TMP1]], i8 42, i64 4, i1 false)
+; CHECK-NEXT: store i32 43, ptr [[STRUCT_BYTE_8]], align 4
+; CHECK-NEXT: [[RET:%.*]] = load ptr, ptr [[STRUCT_BYTE_8]], align 8
+; CHECK-NEXT: call void @readnone(ptr [[STRUCT_BYTE_4]])
+; CHECK-NEXT: call void @readnone(ptr [[STRUCT_BYTE_8]])
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 56, ptr nonnull [[STRUCT_ALLOCA]]) #[[ATTR6]]
+; CHECK-NEXT: call void @use(ptr [[CALLOC1]])
+; CHECK-NEXT: call void @use(ptr [[CALLOC]])
+; CHECK-NEXT: ret ptr [[RET]]
+;
+ %struct.alloca = alloca %struct.type, align 8
+ call void @llvm.lifetime.start.p0(i64 56, ptr nonnull %struct.alloca) nounwind
+ %struct.byte.8 = getelementptr inbounds i8, ptr %struct.alloca, i64 8
+ %struct.byte.4 = getelementptr inbounds i8, ptr %struct.alloca, i64 4
+
+ ; Set of removable memory deffs
+ %m1 = tail call ptr @malloc(i64 4)
+ %m2 = tail call ptr @malloc(i64 4)
+ store i32 0, ptr %struct.byte.4
+ store i32 0, ptr %struct.byte.8
+ call void @llvm.memset.p0.i64(ptr noundef nonnull align 4 %m2, i8 0, i64 4, i1 false)
+ call void @llvm.memset.p0.i64(ptr noundef nonnull align 4 %m1, i8 0, i64 4, i1 false)
+
+ ; Set %struct.alloca[8, 16) to 42.
+ call void @llvm.memset.p0.i64(ptr noundef nonnull align 4 %struct.byte.8, i8 42, i64 8, i1 false)
+ ; Set %struct.alloca[8, 12) to 43.
+ store i32 43, ptr %struct.byte.8, align 4
+ ; Set %struct.alloca[4, 8) to 44.
+ store i32 44, ptr %struct.byte.4, align 4
+ ; Return %struct.alloca[8, 16).
+ %ret = load ptr, ptr %struct.byte.8
+ call void @readnone(ptr %struct.byte.4);
+ call void @readnone(ptr %struct.byte.8);
+ call void @llvm.lifetime.end.p0(i64 56, ptr nonnull %struct.alloca) nounwind
+ call void @use(ptr %m1)
+ call void @use(ptr %m2)
+ ret ptr %ret
+}
+
+declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg)
+declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture)
+declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture)
+
+declare noalias ptr @malloc(i64) willreturn allockind("alloc,uninitialized") "alloc-family"="malloc"
+declare void @readnone(ptr) readnone nounwind
+declare void @free(ptr nocapture) allockind("free") "alloc-family"="malloc"
+
+declare void @use(ptr)
``````````
</details>
https://github.com/llvm/llvm-project/pull/84227
More information about the llvm-branch-commits
mailing list