[llvm] [DeadStoreElimination] Handle dominating conditions established by all predecessors (PR #181615)
Antonio Frighetto via llvm-commits
llvm-commits at lists.llvm.org
Mon Feb 16 01:05:14 PST 2026
https://github.com/antoniofrighetto created https://github.com/llvm/llvm-project/pull/181615
While optimizing tautological assignments, if there exists a dominating condition that implies the value being stored in a pointer, and such a condition appears either in its idom or in all of its predecessors, then subsequents stores may be redundant, if no write to such a pointer occurs in between.
Fixes: https://github.com/llvm/llvm-project/issues/86920.
>From 6f49b56af55b809ee9deedb4ab708d9bf3c02f98 Mon Sep 17 00:00:00 2001
From: Antonio Frighetto <me at antoniofrighetto.com>
Date: Thu, 12 Feb 2026 16:48:49 +0100
Subject: [PATCH 1/2] [DeadStoreElimination] Precommit tests (NFC)
---
.../DeadStoreElimination/noop-stores.ll | 174 ++++++++++++++++--
1 file changed, 162 insertions(+), 12 deletions(-)
diff --git a/llvm/test/Transforms/DeadStoreElimination/noop-stores.ll b/llvm/test/Transforms/DeadStoreElimination/noop-stores.ll
index 283935d60a6da..708c5caf20ed3 100644
--- a/llvm/test/Transforms/DeadStoreElimination/noop-stores.ll
+++ b/llvm/test/Transforms/DeadStoreElimination/noop-stores.ll
@@ -325,7 +325,7 @@ define ptr @zero_memset_after_malloc(i64 %size) {
; based on pr25892_lite
define ptr @zero_memset_after_malloc_with_intermediate_clobbering(i64 %size) {
; CHECK-LABEL: @zero_memset_after_malloc_with_intermediate_clobbering(
-; CHECK-NEXT: [[CALL:%.*]] = call ptr @malloc(i64 [[SIZE:%.*]]) #[[ATTR7:[0-9]+]]
+; CHECK-NEXT: [[CALL:%.*]] = call ptr @malloc(i64 [[SIZE:%.*]]) #[[ATTR11:[0-9]+]]
; CHECK-NEXT: call void @clobber_memory(ptr [[CALL]])
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr [[CALL]], i8 0, i64 [[SIZE]], i1 false)
; CHECK-NEXT: ret ptr [[CALL]]
@@ -339,7 +339,7 @@ define ptr @zero_memset_after_malloc_with_intermediate_clobbering(i64 %size) {
; based on pr25892_lite
define ptr @zero_memset_after_malloc_with_different_sizes(i64 %size) {
; CHECK-LABEL: @zero_memset_after_malloc_with_different_sizes(
-; CHECK-NEXT: [[CALL:%.*]] = call ptr @malloc(i64 [[SIZE:%.*]]) #[[ATTR7]]
+; CHECK-NEXT: [[CALL:%.*]] = call ptr @malloc(i64 [[SIZE:%.*]]) #[[ATTR11]]
; CHECK-NEXT: [[SIZE2:%.*]] = add nsw i64 [[SIZE]], -1
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr [[CALL]], i8 0, i64 [[SIZE2]], i1 false)
; CHECK-NEXT: ret ptr [[CALL]]
@@ -376,9 +376,10 @@ define ptr @notmalloc_memset(i64 %size, ptr %notmalloc) {
; This should create a customalloc_zeroed call and eliminate the memset
define ptr @customalloc_memset(i64 %size, i64 %align) {
-; CHECK-LABEL: @customalloc_memset
-; CHECK-NEXT: [[CALL:%.*]] = call ptr @customalloc_zeroed(i64 [[SIZE:%.*]], i64 [[ALIGN:%.*]])
-; CHECK-NEXT: ret ptr [[CALL]]
+; CHECK-LABEL: @customalloc_memset(
+; CHECK-NEXT: [[CUSTOMALLOC_ZEROED:%.*]] = call ptr @customalloc_zeroed(i64 [[SIZE:%.*]], i64 [[ALIGN:%.*]])
+; CHECK-NEXT: ret ptr [[CUSTOMALLOC_ZEROED]]
+;
%call = call ptr @customalloc(i64 %size, i64 %align)
call void @llvm.memset.p0.i64(ptr %call, i8 0, i64 %size, i1 false)
ret ptr %call
@@ -390,9 +391,10 @@ declare ptr @customalloc_zeroed(i64, i64) allockind("alloc,zeroed") "alloc-famil
; This should create a customalloc_zeroed_custom_cc call and eliminate the memset while
; respecting the custom calling convention of the zeroed variant.
define cc99 ptr @customalloc_memset_custom_cc(i64 %size, i64 %align) {
-; CHECK-LABEL: @customalloc_memset_custom_cc
-; CHECK-NEXT: [[CALL:%.*]] = call cc99 ptr @customalloc_zeroed_custom_cc(i64 [[SIZE:%.*]], i64 [[ALIGN:%.*]])
-; CHECK-NEXT: ret ptr [[CALL]]
+; CHECK-LABEL: @customalloc_memset_custom_cc(
+; CHECK-NEXT: [[CUSTOMALLOC_ZEROED_CUSTOM_CC:%.*]] = call cc99 ptr @customalloc_zeroed_custom_cc(i64 [[SIZE:%.*]], i64 [[ALIGN:%.*]])
+; CHECK-NEXT: ret ptr [[CUSTOMALLOC_ZEROED_CUSTOM_CC]]
+;
%call = call cc99 ptr @customalloc_custom_cc(i64 %size, i64 %align)
call void @llvm.memset.p0.i64(ptr %call, i8 0, i64 %size, i1 false)
ret ptr %call
@@ -482,7 +484,7 @@ cleanup:
define ptr @malloc_with_no_nointer_null_check(i64 %0, i32 %1) {
; CHECK-LABEL: @malloc_with_no_nointer_null_check(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[CALL:%.*]] = call ptr @malloc(i64 [[TMP0:%.*]]) #[[ATTR7]]
+; CHECK-NEXT: [[CALL:%.*]] = call ptr @malloc(i64 [[TMP0:%.*]]) #[[ATTR11]]
; CHECK-NEXT: [[A:%.*]] = and i32 [[TMP1:%.*]], 32
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[A]], 0
; CHECK-NEXT: br i1 [[CMP]], label [[CLEANUP:%.*]], label [[IF_END:%.*]]
@@ -507,7 +509,7 @@ cleanup:
; PR50143
define ptr @store_zero_after_calloc_inaccessiblememonly() {
; CHECK-LABEL: @store_zero_after_calloc_inaccessiblememonly(
-; CHECK-NEXT: [[CALL:%.*]] = tail call ptr @calloc(i64 1, i64 10) #[[ATTR7]]
+; CHECK-NEXT: [[CALL:%.*]] = tail call ptr @calloc(i64 1, i64 10) #[[ATTR11]]
; CHECK-NEXT: ret ptr [[CALL]]
;
%call = tail call ptr @calloc(i64 1, i64 10) inaccessiblememonly
@@ -600,7 +602,7 @@ define ptr @partial_zero_memset_and_store_with_dyn_index_after_calloc(i8 %v, i64
define ptr @zero_memset_after_calloc_inaccessiblememonly() {
; CHECK-LABEL: @zero_memset_after_calloc_inaccessiblememonly(
-; CHECK-NEXT: [[CALL:%.*]] = tail call ptr @calloc(i64 10000, i64 4) #[[ATTR7]]
+; CHECK-NEXT: [[CALL:%.*]] = tail call ptr @calloc(i64 10000, i64 4) #[[ATTR11]]
; CHECK-NEXT: ret ptr [[CALL]]
;
%call = tail call ptr @calloc(i64 10000, i64 4) inaccessiblememonly
@@ -696,7 +698,7 @@ if.end:
define ptr @readnone_malloc() {
; CHECK-LABEL: @readnone_malloc(
-; CHECK-NEXT: [[ALLOC:%.*]] = call ptr @malloc(i64 16) #[[ATTR8:[0-9]+]]
+; CHECK-NEXT: [[ALLOC:%.*]] = call ptr @malloc(i64 16) #[[ATTR12:[0-9]+]]
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr [[ALLOC]], i8 0, i64 16, i1 false)
; CHECK-NEXT: ret ptr [[ALLOC]]
;
@@ -1179,3 +1181,151 @@ if.else:
end:
ret void
}
+
+; Multiple predecessors w/ dom conditions implying the stored value.
+define void @remove_tautological_store_multi_preds(ptr %p) {
+; CHECK-LABEL: @remove_tautological_store_multi_preds(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[L1:%.*]] = load i32, ptr [[P:%.*]], align 4
+; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[L1]], 0
+; CHECK-NEXT: br i1 [[CMP1]], label [[THEN:%.*]], label [[ELSE:%.*]]
+; CHECK: else:
+; CHECK-NEXT: call void @unkown_write(ptr null)
+; CHECK-NEXT: [[L2:%.*]] = load i32, ptr [[P]], align 4
+; CHECK-NEXT: [[CMP2:%.*]] = icmp eq i32 [[L2]], 0
+; CHECK-NEXT: br i1 [[CMP2]], label [[THEN]], label [[EXIT:%.*]]
+; CHECK: then:
+; CHECK-NEXT: store i32 0, ptr [[P]], align 4
+; CHECK-NEXT: br label [[EXIT]]
+; CHECK: exit:
+; CHECK-NEXT: ret void
+;
+entry:
+ %l1 = load i32, ptr %p, align 4
+ %cmp1 = icmp eq i32 %l1, 0
+ br i1 %cmp1, label %then, label %else
+
+else:
+ call void @unkown_write(ptr null)
+ %l2 = load i32, ptr %p, align 4
+ %cmp2 = icmp eq i32 %l2, 0
+ br i1 %cmp2, label %then, label %exit
+
+then:
+ store i32 0, ptr %p, align 4
+ br label %exit
+
+exit:
+ ret void
+}
+
+; Negative tests.
+
+; Store in between, %p being clobbered.
+define void @remove_tautological_store_multi_preds_clobbering_between(ptr %p) {
+; CHECK-LABEL: @remove_tautological_store_multi_preds_clobbering_between(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[L1:%.*]] = load i32, ptr [[P:%.*]], align 4
+; CHECK-NEXT: store i32 1, ptr [[P]], align 4
+; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[L1]], 0
+; CHECK-NEXT: br i1 [[CMP1]], label [[THEN:%.*]], label [[ELSE:%.*]]
+; CHECK: else:
+; CHECK-NEXT: [[L2:%.*]] = load i32, ptr [[P]], align 4
+; CHECK-NEXT: [[CMP2:%.*]] = icmp eq i32 [[L2]], 0
+; CHECK-NEXT: br i1 [[CMP2]], label [[THEN]], label [[EXIT:%.*]]
+; CHECK: then:
+; CHECK-NEXT: store i32 0, ptr [[P]], align 4
+; CHECK-NEXT: br label [[EXIT]]
+; CHECK: exit:
+; CHECK-NEXT: ret void
+;
+entry:
+ %l1 = load i32, ptr %p, align 4
+ store i32 1, ptr %p, align 4
+ %cmp1 = icmp eq i32 %l1, 0
+ br i1 %cmp1, label %then, label %else
+
+else:
+ %l2 = load i32, ptr %p, align 4
+ %cmp2 = icmp eq i32 %l2, 0
+ br i1 %cmp2, label %then, label %exit
+
+then:
+ store i32 0, ptr %p, align 4
+ br label %exit
+
+exit:
+ ret void
+}
+
+; Not a MemoryPhi as defining access for the store, %p being clobbered.
+define void @remove_tautological_store_multi_preds_clobbering_between_2(ptr %p) {
+; CHECK-LABEL: @remove_tautological_store_multi_preds_clobbering_between_2(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[L1:%.*]] = load i32, ptr [[P:%.*]], align 4
+; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[L1]], 0
+; CHECK-NEXT: br i1 [[CMP1]], label [[THEN:%.*]], label [[ELSE:%.*]]
+; CHECK: else:
+; CHECK-NEXT: [[L2:%.*]] = load i32, ptr [[P]], align 4
+; CHECK-NEXT: [[CMP2:%.*]] = icmp eq i32 [[L2]], 0
+; CHECK-NEXT: br i1 [[CMP2]], label [[THEN]], label [[EXIT:%.*]]
+; CHECK: then:
+; CHECK-NEXT: call void @unkown_write(ptr [[P]])
+; CHECK-NEXT: store i32 0, ptr [[P]], align 4
+; CHECK-NEXT: br label [[EXIT]]
+; CHECK: exit:
+; CHECK-NEXT: ret void
+;
+entry:
+ %l1 = load i32, ptr %p, align 4
+ %cmp1 = icmp eq i32 %l1, 0
+ br i1 %cmp1, label %then, label %else
+
+else:
+ %l2 = load i32, ptr %p, align 4
+ %cmp2 = icmp eq i32 %l2, 0
+ br i1 %cmp2, label %then, label %exit
+
+then:
+ call void @unkown_write(ptr %p)
+ store i32 0, ptr %p, align 4
+ br label %exit
+
+exit:
+ ret void
+}
+
+; Different implying successor via icmp ne.
+define void @remove_tautological_store_multi_preds_cond_mixed(ptr %p) {
+; CHECK-LABEL: @remove_tautological_store_multi_preds_cond_mixed(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[L1:%.*]] = load i32, ptr [[P:%.*]], align 4
+; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[L1]], 0
+; CHECK-NEXT: br i1 [[CMP1]], label [[THEN:%.*]], label [[ELSE:%.*]]
+; CHECK: else:
+; CHECK-NEXT: [[L2:%.*]] = load i32, ptr [[P]], align 4
+; CHECK-NEXT: [[CMP2:%.*]] = icmp ne i32 [[L2]], 0
+; CHECK-NEXT: br i1 [[CMP2]], label [[THEN]], label [[EXIT:%.*]]
+; CHECK: then:
+; CHECK-NEXT: store i32 0, ptr [[P]], align 4
+; CHECK-NEXT: br label [[EXIT]]
+; CHECK: exit:
+; CHECK-NEXT: ret void
+;
+entry:
+ %l1 = load i32, ptr %p, align 4
+ %cmp1 = icmp eq i32 %l1, 0
+ br i1 %cmp1, label %then, label %else
+
+else:
+ %l2 = load i32, ptr %p, align 4
+ %cmp2 = icmp ne i32 %l2, 0
+ br i1 %cmp2, label %then, label %exit
+
+then:
+ store i32 0, ptr %p, align 4
+ br label %exit
+
+exit:
+ ret void
+}
>From ffe062564883a73802dfe9d2ff261d2201473de0 Mon Sep 17 00:00:00 2001
From: Antonio Frighetto <me at antoniofrighetto.com>
Date: Thu, 12 Feb 2026 17:00:09 +0100
Subject: [PATCH 2/2] [DeadStoreElimination] Handle dominating conditions
established by all predecessors
While optimizing tautological assignments, if there exists a dominating
condition that implies the value being stored in a pointer, and such a
condition appears either in its idom or in all of its predecessors,
then subsequents stores may be redundant, if no write to such a pointer
occurs in between.
Fixes: https://github.com/llvm/llvm-project/issues/86920.
---
.../Scalar/DeadStoreElimination.cpp | 109 +++++++++++-------
.../DeadStoreElimination/noop-stores.ll | 1 -
2 files changed, 68 insertions(+), 42 deletions(-)
diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
index e056f0c1f6390..be9efdae32bec 100644
--- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -2140,55 +2140,82 @@ struct DSEState {
return true;
}
- // Check if there is a dominating condition, that implies that the value
- // being stored in a ptr is already present in the ptr.
- bool dominatingConditionImpliesValue(MemoryDef *Def) {
- auto *StoreI = cast<StoreInst>(Def->getMemoryInst());
- BasicBlock *StoreBB = StoreI->getParent();
- Value *StorePtr = StoreI->getPointerOperand();
- Value *StoreVal = StoreI->getValueOperand();
-
- DomTreeNode *IDom = DT.getNode(StoreBB)->getIDom();
- if (!IDom)
+ // If there is a dominating condition that implies the value being stored in a
+ // pointer, and such a condition appears either in its immediate dominator or
+ // in all of its predecessors, then the store may be redundant.
+ bool dominatingConditionImpliesValue(StoreInst *SI, MemoryDef *Def) {
+ BasicBlock *StoreBB = SI->getParent();
+ Value *StorePtr = SI->getPointerOperand();
+ Value *StoreVal = SI->getValueOperand();
+
+ unsigned NumPreds = pred_size(StoreBB);
+ if (!NumPreds || NumPreds > 4)
return false;
- auto *BI = dyn_cast<BranchInst>(IDom->getBlock()->getTerminator());
- if (!BI || !BI->isConditional())
- return false;
+ // Collect dominating conditions.
+ // TODO: May be possible to generalize this to perform a reverse DFS
+ // instead.
+ SmallDenseMap<BasicBlock *, Instruction *, 4> PredToLoad;
+ for (BasicBlock *PredBB : predecessors(StoreBB)) {
+ auto *BI = dyn_cast<BranchInst>(PredBB->getTerminator());
+ if (!BI || !BI->isConditional())
+ return false;
- // In case both blocks are the same, it is not possible to determine
- // if optimization is possible. (We would not want to optimize a store
- // in the FalseBB if condition is true and vice versa.)
- if (BI->getSuccessor(0) == BI->getSuccessor(1))
- return false;
+ // In case both blocks are the same, it is not possible to determine
+ // if optimization is possible. (We would not want to optimize a store
+ // in the FalseBB if condition is true and vice versa.)
+ if (BI->getSuccessor(0) == BI->getSuccessor(1))
+ return false;
- Instruction *ICmpL;
- CmpPredicate Pred;
- if (!match(BI->getCondition(),
- m_c_ICmp(Pred,
- m_CombineAnd(m_Load(m_Specific(StorePtr)),
- m_Instruction(ICmpL)),
- m_Specific(StoreVal))) ||
- !ICmpInst::isEquality(Pred))
- return false;
+ Instruction *ICmpL;
+ CmpPredicate Pred;
+ if (!match(BI->getCondition(),
+ m_c_ICmp(Pred,
+ m_CombineAnd(m_Load(m_Specific(StorePtr)),
+ m_Instruction(ICmpL)),
+ m_Specific(StoreVal))) ||
+ !ICmpInst::isEquality(Pred))
+ return false;
- // In case the else blocks also branches to the if block or the other way
- // around it is not possible to determine if the optimization is possible.
- if (Pred == ICmpInst::ICMP_EQ &&
- !DT.dominates(BasicBlockEdge(BI->getParent(), BI->getSuccessor(0)),
- StoreBB))
- return false;
+ BasicBlock *ImpliedSucc =
+ Pred == ICmpInst::ICMP_EQ ? BI->getSuccessor(0) : BI->getSuccessor(1);
+ if (ImpliedSucc != StoreBB)
+ return false;
- if (Pred == ICmpInst::ICMP_NE &&
- !DT.dominates(BasicBlockEdge(BI->getParent(), BI->getSuccessor(1)),
- StoreBB))
+ PredToLoad[PredBB] = ICmpL;
+ }
+ assert(PredToLoad.size() == NumPreds);
+
+ MemoryAccess *DefiningAccess = Def->getDefiningAccess();
+ MemoryLocation StoreLoc = MemoryLocation::get(SI);
+
+ // Make sure there does not exist any clobbering access between the load and
+ // the potential redundant store.
+ auto IsLoadClobbered = [&](MemoryAccess *IncomingAcc, Instruction *LI) {
+ MemoryAccess *LoadAccess = MSSA.getMemoryAccess(LI);
+ MemoryAccess *ClobberingAccess =
+ MSSA.getSkipSelfWalker()->getClobberingMemoryAccess(IncomingAcc,
+ StoreLoc);
+ return !MSSA.dominates(ClobberingAccess, LoadAccess);
+ };
+
+ if (NumPreds == 1)
+ if (!IsLoadClobbered(DefiningAccess,
+ PredToLoad[StoreBB->getSinglePredecessor()]))
+ return true;
+
+ // If we are not merging the memory reads from the predecessors, the memory
+ // location may be clobbered.
+ auto *MPhi = dyn_cast<MemoryPhi>(DefiningAccess);
+ if (!MPhi || MPhi->getBlock() != StoreBB)
return false;
- MemoryAccess *LoadAcc = MSSA.getMemoryAccess(ICmpL);
- MemoryAccess *ClobAcc =
- MSSA.getSkipSelfWalker()->getClobberingMemoryAccess(Def, BatchAA);
+ for (unsigned I = 0; I < MPhi->getNumIncomingValues(); ++I)
+ if (IsLoadClobbered(MPhi->getIncomingValue(I),
+ PredToLoad[MPhi->getIncomingBlock(I)]))
+ return false;
- return MSSA.dominates(ClobAcc, LoadAcc);
+ return true;
}
/// \returns true if \p Def is a no-op store, either because it
@@ -2221,7 +2248,7 @@ struct DSEState {
if (!Store)
return false;
- if (dominatingConditionImpliesValue(Def))
+ if (dominatingConditionImpliesValue(Store, Def))
return true;
if (auto *LoadI = dyn_cast<LoadInst>(Store->getOperand(0))) {
diff --git a/llvm/test/Transforms/DeadStoreElimination/noop-stores.ll b/llvm/test/Transforms/DeadStoreElimination/noop-stores.ll
index 708c5caf20ed3..76a34b25a906d 100644
--- a/llvm/test/Transforms/DeadStoreElimination/noop-stores.ll
+++ b/llvm/test/Transforms/DeadStoreElimination/noop-stores.ll
@@ -1195,7 +1195,6 @@ define void @remove_tautological_store_multi_preds(ptr %p) {
; CHECK-NEXT: [[CMP2:%.*]] = icmp eq i32 [[L2]], 0
; CHECK-NEXT: br i1 [[CMP2]], label [[THEN]], label [[EXIT:%.*]]
; CHECK: then:
-; CHECK-NEXT: store i32 0, ptr [[P]], align 4
; CHECK-NEXT: br label [[EXIT]]
; CHECK: exit:
; CHECK-NEXT: ret void
More information about the llvm-commits
mailing list