[llvm] [DSE] Consider all dominating conditions in `dominatingConditionImpliesValue` (PR #181709)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Feb 16 09:40:00 PST 2026
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-transforms
Author: Antonio Frighetto (antoniofrighetto)
<details>
<summary>Changes</summary>
While optimizing tautological assignments, if there exists a dominating condition that implies the value being stored in a pointer, and such a condition appears either in its immediate dominator or in a node that strictly dominates the store, then subsequents stores may be redundant.
---
Full diff: https://github.com/llvm/llvm-project/pull/181709.diff
2 Files Affected:
- (modified) llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp (+57-39)
- (modified) llvm/test/Transforms/DeadStoreElimination/noop-stores.ll (+142-12)
``````````diff
diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
index e056f0c1f6390..42e2dbbd9129f 100644
--- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -2140,51 +2140,69 @@ struct DSEState {
return true;
}
- // Check if there is a dominating condition, that implies that the value
- // being stored in a ptr is already present in the ptr.
- bool dominatingConditionImpliesValue(MemoryDef *Def) {
- auto *StoreI = cast<StoreInst>(Def->getMemoryInst());
- BasicBlock *StoreBB = StoreI->getParent();
- Value *StorePtr = StoreI->getPointerOperand();
- Value *StoreVal = StoreI->getValueOperand();
-
- DomTreeNode *IDom = DT.getNode(StoreBB)->getIDom();
- if (!IDom)
- return false;
+ // If there is a dominating condition that implies the value being stored in a
+ // pointer, and such a condition appears either in its idom or in a node that
+ // strictly dominates the store, then the store may be redundant as long as
+ // no write occurs in between.
+ bool dominatingConditionImpliesValue(StoreInst *SI, MemoryDef *Def) {
+ BasicBlock *EntryBB = &SI->getFunction()->getEntryBlock();
+ BasicBlock *StoreBB = SI->getParent();
+
+ static constexpr unsigned Limit = 4;
+ SmallVector<BasicBlock *, 4> DomChain;
+ BasicBlock *Node = StoreBB;
+ // Walk up the dominator tree until the entry block is found, up to limit.
+ for (unsigned Depth = 0; Depth < Limit; ++Depth) {
+ DomTreeNode *IDomNode = DT.getNode(Node)->getIDom();
+ if (!IDomNode)
+ break;
+ Node = IDomNode->getBlock();
+ DomChain.emplace_back(Node);
+ if (Node == EntryBB)
+ break;
+ }
- auto *BI = dyn_cast<BranchInst>(IDom->getBlock()->getTerminator());
- if (!BI || !BI->isConditional())
- return false;
+ Value *StorePtr = SI->getPointerOperand();
+ Value *StoreVal = SI->getValueOperand();
+ SmallVector<std::pair<BasicBlock *, Instruction *>, 4> VisitConditions;
+ for (BasicBlock *DomBB : DomChain) {
+ auto *BI = dyn_cast<BranchInst>(DomBB->getTerminator());
+ if (!BI || !BI->isConditional())
+ continue;
- // In case both blocks are the same, it is not possible to determine
- // if optimization is possible. (We would not want to optimize a store
- // in the FalseBB if condition is true and vice versa.)
- if (BI->getSuccessor(0) == BI->getSuccessor(1))
- return false;
+ // In case both blocks are the same, it is not possible to determine
+ // if optimization is possible. (We would not want to optimize a store
+ // in the FalseBB if condition is true and vice versa.)
+ if (BI->getSuccessor(0) == BI->getSuccessor(1))
+ continue;
- Instruction *ICmpL;
- CmpPredicate Pred;
- if (!match(BI->getCondition(),
- m_c_ICmp(Pred,
- m_CombineAnd(m_Load(m_Specific(StorePtr)),
- m_Instruction(ICmpL)),
- m_Specific(StoreVal))) ||
- !ICmpInst::isEquality(Pred))
- return false;
+ Instruction *ICmpL;
+ CmpPredicate Pred;
+ if (!match(BI->getCondition(),
+ m_c_ICmp(Pred,
+ m_CombineAnd(m_Load(m_Specific(StorePtr)),
+ m_Instruction(ICmpL)),
+ m_Specific(StoreVal))) ||
+ !ICmpInst::isEquality(Pred))
+ continue;
- // In case the else blocks also branches to the if block or the other way
- // around it is not possible to determine if the optimization is possible.
- if (Pred == ICmpInst::ICMP_EQ &&
- !DT.dominates(BasicBlockEdge(BI->getParent(), BI->getSuccessor(0)),
- StoreBB))
- return false;
+ unsigned ImpliedSucc = (Pred == ICmpInst::ICMP_EQ) ? 0 : 1;
+ if (!DT.dominates(BasicBlockEdge(DomBB, BI->getSuccessor(ImpliedSucc)),
+ StoreBB))
+ continue;
+
+ // Found a dominating condition.
+ VisitConditions.emplace_back(DomBB, ICmpL);
+ break;
+ }
- if (Pred == ICmpInst::ICMP_NE &&
- !DT.dominates(BasicBlockEdge(BI->getParent(), BI->getSuccessor(1)),
- StoreBB))
+ if (VisitConditions.empty())
return false;
- MemoryAccess *LoadAcc = MSSA.getMemoryAccess(ICmpL);
+ // Make sure there does not exist any clobbering access between the load and
+ // the potential redundant store.
+ const auto &[_, LI] = VisitConditions[0];
+ MemoryAccess *LoadAcc = MSSA.getMemoryAccess(LI);
MemoryAccess *ClobAcc =
MSSA.getSkipSelfWalker()->getClobberingMemoryAccess(Def, BatchAA);
@@ -2221,7 +2239,7 @@ struct DSEState {
if (!Store)
return false;
- if (dominatingConditionImpliesValue(Def))
+ if (dominatingConditionImpliesValue(Store, Def))
return true;
if (auto *LoadI = dyn_cast<LoadInst>(Store->getOperand(0))) {
diff --git a/llvm/test/Transforms/DeadStoreElimination/noop-stores.ll b/llvm/test/Transforms/DeadStoreElimination/noop-stores.ll
index 283935d60a6da..bafae1070eb86 100644
--- a/llvm/test/Transforms/DeadStoreElimination/noop-stores.ll
+++ b/llvm/test/Transforms/DeadStoreElimination/noop-stores.ll
@@ -325,7 +325,7 @@ define ptr @zero_memset_after_malloc(i64 %size) {
; based on pr25892_lite
define ptr @zero_memset_after_malloc_with_intermediate_clobbering(i64 %size) {
; CHECK-LABEL: @zero_memset_after_malloc_with_intermediate_clobbering(
-; CHECK-NEXT: [[CALL:%.*]] = call ptr @malloc(i64 [[SIZE:%.*]]) #[[ATTR7:[0-9]+]]
+; CHECK-NEXT: [[CALL:%.*]] = call ptr @malloc(i64 [[SIZE:%.*]]) #[[ATTR11:[0-9]+]]
; CHECK-NEXT: call void @clobber_memory(ptr [[CALL]])
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr [[CALL]], i8 0, i64 [[SIZE]], i1 false)
; CHECK-NEXT: ret ptr [[CALL]]
@@ -339,7 +339,7 @@ define ptr @zero_memset_after_malloc_with_intermediate_clobbering(i64 %size) {
; based on pr25892_lite
define ptr @zero_memset_after_malloc_with_different_sizes(i64 %size) {
; CHECK-LABEL: @zero_memset_after_malloc_with_different_sizes(
-; CHECK-NEXT: [[CALL:%.*]] = call ptr @malloc(i64 [[SIZE:%.*]]) #[[ATTR7]]
+; CHECK-NEXT: [[CALL:%.*]] = call ptr @malloc(i64 [[SIZE:%.*]]) #[[ATTR11]]
; CHECK-NEXT: [[SIZE2:%.*]] = add nsw i64 [[SIZE]], -1
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr [[CALL]], i8 0, i64 [[SIZE2]], i1 false)
; CHECK-NEXT: ret ptr [[CALL]]
@@ -376,9 +376,10 @@ define ptr @notmalloc_memset(i64 %size, ptr %notmalloc) {
; This should create a customalloc_zeroed call and eliminate the memset
define ptr @customalloc_memset(i64 %size, i64 %align) {
-; CHECK-LABEL: @customalloc_memset
-; CHECK-NEXT: [[CALL:%.*]] = call ptr @customalloc_zeroed(i64 [[SIZE:%.*]], i64 [[ALIGN:%.*]])
-; CHECK-NEXT: ret ptr [[CALL]]
+; CHECK-LABEL: @customalloc_memset(
+; CHECK-NEXT: [[CUSTOMALLOC_ZEROED:%.*]] = call ptr @customalloc_zeroed(i64 [[SIZE:%.*]], i64 [[ALIGN:%.*]])
+; CHECK-NEXT: ret ptr [[CUSTOMALLOC_ZEROED]]
+;
%call = call ptr @customalloc(i64 %size, i64 %align)
call void @llvm.memset.p0.i64(ptr %call, i8 0, i64 %size, i1 false)
ret ptr %call
@@ -390,9 +391,10 @@ declare ptr @customalloc_zeroed(i64, i64) allockind("alloc,zeroed") "alloc-famil
; This should create a customalloc_zeroed_custom_cc call and eliminate the memset while
; respecting the custom calling convention of the zeroed variant.
define cc99 ptr @customalloc_memset_custom_cc(i64 %size, i64 %align) {
-; CHECK-LABEL: @customalloc_memset_custom_cc
-; CHECK-NEXT: [[CALL:%.*]] = call cc99 ptr @customalloc_zeroed_custom_cc(i64 [[SIZE:%.*]], i64 [[ALIGN:%.*]])
-; CHECK-NEXT: ret ptr [[CALL]]
+; CHECK-LABEL: @customalloc_memset_custom_cc(
+; CHECK-NEXT: [[CUSTOMALLOC_ZEROED_CUSTOM_CC:%.*]] = call cc99 ptr @customalloc_zeroed_custom_cc(i64 [[SIZE:%.*]], i64 [[ALIGN:%.*]])
+; CHECK-NEXT: ret ptr [[CUSTOMALLOC_ZEROED_CUSTOM_CC]]
+;
%call = call cc99 ptr @customalloc_custom_cc(i64 %size, i64 %align)
call void @llvm.memset.p0.i64(ptr %call, i8 0, i64 %size, i1 false)
ret ptr %call
@@ -482,7 +484,7 @@ cleanup:
define ptr @malloc_with_no_nointer_null_check(i64 %0, i32 %1) {
; CHECK-LABEL: @malloc_with_no_nointer_null_check(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[CALL:%.*]] = call ptr @malloc(i64 [[TMP0:%.*]]) #[[ATTR7]]
+; CHECK-NEXT: [[CALL:%.*]] = call ptr @malloc(i64 [[TMP0:%.*]]) #[[ATTR11]]
; CHECK-NEXT: [[A:%.*]] = and i32 [[TMP1:%.*]], 32
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[A]], 0
; CHECK-NEXT: br i1 [[CMP]], label [[CLEANUP:%.*]], label [[IF_END:%.*]]
@@ -507,7 +509,7 @@ cleanup:
; PR50143
define ptr @store_zero_after_calloc_inaccessiblememonly() {
; CHECK-LABEL: @store_zero_after_calloc_inaccessiblememonly(
-; CHECK-NEXT: [[CALL:%.*]] = tail call ptr @calloc(i64 1, i64 10) #[[ATTR7]]
+; CHECK-NEXT: [[CALL:%.*]] = tail call ptr @calloc(i64 1, i64 10) #[[ATTR11]]
; CHECK-NEXT: ret ptr [[CALL]]
;
%call = tail call ptr @calloc(i64 1, i64 10) inaccessiblememonly
@@ -600,7 +602,7 @@ define ptr @partial_zero_memset_and_store_with_dyn_index_after_calloc(i8 %v, i64
define ptr @zero_memset_after_calloc_inaccessiblememonly() {
; CHECK-LABEL: @zero_memset_after_calloc_inaccessiblememonly(
-; CHECK-NEXT: [[CALL:%.*]] = tail call ptr @calloc(i64 10000, i64 4) #[[ATTR7]]
+; CHECK-NEXT: [[CALL:%.*]] = tail call ptr @calloc(i64 10000, i64 4) #[[ATTR11]]
; CHECK-NEXT: ret ptr [[CALL]]
;
%call = tail call ptr @calloc(i64 10000, i64 4) inaccessiblememonly
@@ -696,7 +698,7 @@ if.end:
define ptr @readnone_malloc() {
; CHECK-LABEL: @readnone_malloc(
-; CHECK-NEXT: [[ALLOC:%.*]] = call ptr @malloc(i64 16) #[[ATTR8:[0-9]+]]
+; CHECK-NEXT: [[ALLOC:%.*]] = call ptr @malloc(i64 16) #[[ATTR12:[0-9]+]]
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr [[ALLOC]], i8 0, i64 16, i1 false)
; CHECK-NEXT: ret ptr [[ALLOC]]
;
@@ -1179,3 +1181,131 @@ if.else:
end:
ret void
}
+
+; There exists a dominating condition in the entry block, not the immediate
+; dominator for `inner` block, the edge entry->if.eq always dominates the store,
+; no clobber in between, the store is redundant.
+define void @remove_tautological_store_block_not_idom(ptr %x, i1 %c) {
+; CHECK-LABEL: @remove_tautological_store_block_not_idom(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[X:%.*]], align 4
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[VAL]], 0
+; CHECK-NEXT: br i1 [[CMP]], label [[THEN:%.*]], label [[END:%.*]]
+; CHECK: then:
+; CHECK-NEXT: br i1 [[C:%.*]], label [[IF_EQ:%.*]], label [[IF_ELSE:%.*]]
+; CHECK: if.eq:
+; CHECK-NEXT: br label [[JOIN:%.*]]
+; CHECK: if.else:
+; CHECK-NEXT: br label [[JOIN]]
+; CHECK: join:
+; CHECK-NEXT: br label [[INNER:%.*]]
+; CHECK: inner:
+; CHECK-NEXT: br label [[END]]
+; CHECK: end:
+; CHECK-NEXT: ret void
+;
+entry:
+ %val = load i32, ptr %x, align 4
+ %cmp = icmp eq i32 %val, 0
+ br i1 %cmp, label %then, label %end
+
+then:
+ br i1 %c, label %if.eq, label %if.else
+
+if.eq:
+ br label %join
+
+if.else:
+ br label %join
+
+join:
+ br label %inner
+
+inner:
+ store i32 0, ptr %x, align 4
+ br label %end
+
+end:
+ ret void
+}
+
+; There exists a dominating condition in the entry block, however,
+; the edge entry->if.eq does not dominate the store.
+define void @remove_tautological_store_not_idom_no_edge_domination(ptr %x) {
+; CHECK-LABEL: @remove_tautological_store_not_idom_no_edge_domination(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[X:%.*]], align 4
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[VAL]], 0
+; CHECK-NEXT: br i1 [[CMP]], label [[IF_EQ:%.*]], label [[IF_ELSE:%.*]]
+; CHECK: if.eq:
+; CHECK-NEXT: br label [[JOIN:%.*]]
+; CHECK: if.else:
+; CHECK-NEXT: br label [[JOIN]]
+; CHECK: join:
+; CHECK-NEXT: br label [[INNER:%.*]]
+; CHECK: inner:
+; CHECK-NEXT: store i32 0, ptr [[X]], align 4
+; CHECK-NEXT: br label [[END:%.*]]
+; CHECK: end:
+; CHECK-NEXT: ret void
+;
+entry:
+ %val = load i32, ptr %x, align 4
+ %cmp = icmp eq i32 %val, 0
+ br i1 %cmp, label %if.eq, label %if.else
+
+if.eq:
+ br label %join
+
+if.else:
+ br label %join
+
+join:
+ br label %inner
+
+inner:
+ store i32 0, ptr %x, align 4
+ br label %end
+
+end:
+ ret void
+}
+
+; There exists a dominating condition in the entry block, however,
+; the pointer whose value is implied is clobbered in between.
+define void @remove_tautological_store_block_not_idom_clobber_between(ptr %x, i1 %c) {
+; CHECK-LABEL: @remove_tautological_store_block_not_idom_clobber_between(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[X:%.*]], align 4
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[VAL]], 0
+; CHECK-NEXT: br i1 [[CMP]], label [[IF_EQ:%.*]], label [[END:%.*]]
+; CHECK: if.eq:
+; CHECK-NEXT: br label [[NEXT:%.*]]
+; CHECK: next:
+; CHECK-NEXT: call void @unkown_write(ptr [[X]])
+; CHECK-NEXT: br i1 [[C:%.*]], label [[INNER:%.*]], label [[END]]
+; CHECK: inner:
+; CHECK-NEXT: store i32 0, ptr [[X]], align 4
+; CHECK-NEXT: br label [[END]]
+; CHECK: end:
+; CHECK-NEXT: ret void
+;
+entry:
+ %val = load i32, ptr %x, align 4
+ %cmp = icmp eq i32 %val, 0
+ br i1 %cmp, label %if.eq, label %end
+
+if.eq:
+ br label %next
+
+next:
+ call void @unkown_write(ptr %x)
+ br i1 %c, label %inner, label %end
+
+inner:
+ store i32 0, ptr %x, align 4
+ br label %end
+
+end:
+ ret void
+}
``````````
</details>
https://github.com/llvm/llvm-project/pull/181709
More information about the llvm-commits
mailing list