[llvm] [DSE] Consider all dominating conditions in `dominatingConditionImpliesValue` (PR #181709)

via llvm-commits llvm-commits at lists.llvm.org
Mon Feb 16 09:40:00 PST 2026


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-llvm-transforms

Author: Antonio Frighetto (antoniofrighetto)

<details>
<summary>Changes</summary>

While optimizing tautological assignments, if there exists a dominating condition that implies the value being stored in a pointer, and such a condition appears either in its immediate dominator or in a node that strictly dominates the store, then subsequents stores may be redundant.

---
Full diff: https://github.com/llvm/llvm-project/pull/181709.diff


2 Files Affected:

- (modified) llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp (+57-39) 
- (modified) llvm/test/Transforms/DeadStoreElimination/noop-stores.ll (+142-12) 


``````````diff
diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
index e056f0c1f6390..42e2dbbd9129f 100644
--- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -2140,51 +2140,69 @@ struct DSEState {
     return true;
   }
 
-  // Check if there is a dominating condition, that implies that the value
-  // being stored in a ptr is already present in the ptr.
-  bool dominatingConditionImpliesValue(MemoryDef *Def) {
-    auto *StoreI = cast<StoreInst>(Def->getMemoryInst());
-    BasicBlock *StoreBB = StoreI->getParent();
-    Value *StorePtr = StoreI->getPointerOperand();
-    Value *StoreVal = StoreI->getValueOperand();
-
-    DomTreeNode *IDom = DT.getNode(StoreBB)->getIDom();
-    if (!IDom)
-      return false;
+  // If there is a dominating condition that implies the value being stored in a
+  // pointer, and such a condition appears either in its idom or in a node that
+  // strictly dominates the store, then the store may be redundant as long as
+  // no write occurs in between.
+  bool dominatingConditionImpliesValue(StoreInst *SI, MemoryDef *Def) {
+    BasicBlock *EntryBB = &SI->getFunction()->getEntryBlock();
+    BasicBlock *StoreBB = SI->getParent();
+
+    static constexpr unsigned Limit = 4;
+    SmallVector<BasicBlock *, 4> DomChain;
+    BasicBlock *Node = StoreBB;
+    // Walk up the dominator tree until the entry block is found, up to limit.
+    for (unsigned Depth = 0; Depth < Limit; ++Depth) {
+      DomTreeNode *IDomNode = DT.getNode(Node)->getIDom();
+      if (!IDomNode)
+        break;
+      Node = IDomNode->getBlock();
+      DomChain.emplace_back(Node);
+      if (Node == EntryBB)
+        break;
+    }
 
-    auto *BI = dyn_cast<BranchInst>(IDom->getBlock()->getTerminator());
-    if (!BI || !BI->isConditional())
-      return false;
+    Value *StorePtr = SI->getPointerOperand();
+    Value *StoreVal = SI->getValueOperand();
+    SmallVector<std::pair<BasicBlock *, Instruction *>, 4> VisitConditions;
+    for (BasicBlock *DomBB : DomChain) {
+      auto *BI = dyn_cast<BranchInst>(DomBB->getTerminator());
+      if (!BI || !BI->isConditional())
+        continue;
 
-    // In case both blocks are the same, it is not possible to determine
-    // if optimization is possible. (We would not want to optimize a store
-    // in the FalseBB if condition is true and vice versa.)
-    if (BI->getSuccessor(0) == BI->getSuccessor(1))
-      return false;
+      // In case both blocks are the same, it is not possible to determine
+      // if optimization is possible. (We would not want to optimize a store
+      // in the FalseBB if condition is true and vice versa.)
+      if (BI->getSuccessor(0) == BI->getSuccessor(1))
+        continue;
 
-    Instruction *ICmpL;
-    CmpPredicate Pred;
-    if (!match(BI->getCondition(),
-               m_c_ICmp(Pred,
-                        m_CombineAnd(m_Load(m_Specific(StorePtr)),
-                                     m_Instruction(ICmpL)),
-                        m_Specific(StoreVal))) ||
-        !ICmpInst::isEquality(Pred))
-      return false;
+      Instruction *ICmpL;
+      CmpPredicate Pred;
+      if (!match(BI->getCondition(),
+                 m_c_ICmp(Pred,
+                          m_CombineAnd(m_Load(m_Specific(StorePtr)),
+                                       m_Instruction(ICmpL)),
+                          m_Specific(StoreVal))) ||
+          !ICmpInst::isEquality(Pred))
+        continue;
 
-    // In case the else blocks also branches to the if block or the other way
-    // around it is not possible to determine if the optimization is possible.
-    if (Pred == ICmpInst::ICMP_EQ &&
-        !DT.dominates(BasicBlockEdge(BI->getParent(), BI->getSuccessor(0)),
-                      StoreBB))
-      return false;
+      unsigned ImpliedSucc = (Pred == ICmpInst::ICMP_EQ) ? 0 : 1;
+      if (!DT.dominates(BasicBlockEdge(DomBB, BI->getSuccessor(ImpliedSucc)),
+                        StoreBB))
+        continue;
+
+      // Found a dominating condition.
+      VisitConditions.emplace_back(DomBB, ICmpL);
+      break;
+    }
 
-    if (Pred == ICmpInst::ICMP_NE &&
-        !DT.dominates(BasicBlockEdge(BI->getParent(), BI->getSuccessor(1)),
-                      StoreBB))
+    if (VisitConditions.empty())
       return false;
 
-    MemoryAccess *LoadAcc = MSSA.getMemoryAccess(ICmpL);
+    // Make sure there does not exist any clobbering access between the load and
+    // the potential redundant store.
+    const auto &[_, LI] = VisitConditions[0];
+    MemoryAccess *LoadAcc = MSSA.getMemoryAccess(LI);
     MemoryAccess *ClobAcc =
         MSSA.getSkipSelfWalker()->getClobberingMemoryAccess(Def, BatchAA);
 
@@ -2221,7 +2239,7 @@ struct DSEState {
     if (!Store)
       return false;
 
-    if (dominatingConditionImpliesValue(Def))
+    if (dominatingConditionImpliesValue(Store, Def))
       return true;
 
     if (auto *LoadI = dyn_cast<LoadInst>(Store->getOperand(0))) {
diff --git a/llvm/test/Transforms/DeadStoreElimination/noop-stores.ll b/llvm/test/Transforms/DeadStoreElimination/noop-stores.ll
index 283935d60a6da..bafae1070eb86 100644
--- a/llvm/test/Transforms/DeadStoreElimination/noop-stores.ll
+++ b/llvm/test/Transforms/DeadStoreElimination/noop-stores.ll
@@ -325,7 +325,7 @@ define ptr @zero_memset_after_malloc(i64 %size) {
 ; based on pr25892_lite
 define ptr @zero_memset_after_malloc_with_intermediate_clobbering(i64 %size) {
 ; CHECK-LABEL: @zero_memset_after_malloc_with_intermediate_clobbering(
-; CHECK-NEXT:    [[CALL:%.*]] = call ptr @malloc(i64 [[SIZE:%.*]]) #[[ATTR7:[0-9]+]]
+; CHECK-NEXT:    [[CALL:%.*]] = call ptr @malloc(i64 [[SIZE:%.*]]) #[[ATTR11:[0-9]+]]
 ; CHECK-NEXT:    call void @clobber_memory(ptr [[CALL]])
 ; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr [[CALL]], i8 0, i64 [[SIZE]], i1 false)
 ; CHECK-NEXT:    ret ptr [[CALL]]
@@ -339,7 +339,7 @@ define ptr @zero_memset_after_malloc_with_intermediate_clobbering(i64 %size) {
 ; based on pr25892_lite
 define ptr @zero_memset_after_malloc_with_different_sizes(i64 %size) {
 ; CHECK-LABEL: @zero_memset_after_malloc_with_different_sizes(
-; CHECK-NEXT:    [[CALL:%.*]] = call ptr @malloc(i64 [[SIZE:%.*]]) #[[ATTR7]]
+; CHECK-NEXT:    [[CALL:%.*]] = call ptr @malloc(i64 [[SIZE:%.*]]) #[[ATTR11]]
 ; CHECK-NEXT:    [[SIZE2:%.*]] = add nsw i64 [[SIZE]], -1
 ; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr [[CALL]], i8 0, i64 [[SIZE2]], i1 false)
 ; CHECK-NEXT:    ret ptr [[CALL]]
@@ -376,9 +376,10 @@ define ptr @notmalloc_memset(i64 %size, ptr %notmalloc) {
 
 ; This should create a customalloc_zeroed call and eliminate the memset
 define ptr @customalloc_memset(i64 %size, i64 %align) {
-; CHECK-LABEL: @customalloc_memset
-; CHECK-NEXT:  [[CALL:%.*]] = call ptr @customalloc_zeroed(i64 [[SIZE:%.*]], i64 [[ALIGN:%.*]])
-; CHECK-NEXT:  ret ptr [[CALL]]
+; CHECK-LABEL: @customalloc_memset(
+; CHECK-NEXT:    [[CUSTOMALLOC_ZEROED:%.*]] = call ptr @customalloc_zeroed(i64 [[SIZE:%.*]], i64 [[ALIGN:%.*]])
+; CHECK-NEXT:    ret ptr [[CUSTOMALLOC_ZEROED]]
+;
   %call = call ptr @customalloc(i64 %size, i64 %align)
   call void @llvm.memset.p0.i64(ptr %call, i8 0, i64 %size, i1 false)
   ret ptr %call
@@ -390,9 +391,10 @@ declare ptr @customalloc_zeroed(i64, i64) allockind("alloc,zeroed") "alloc-famil
 ; This should create a customalloc_zeroed_custom_cc call and eliminate the memset while
 ; respecting the custom calling convention of the zeroed variant.
 define cc99 ptr @customalloc_memset_custom_cc(i64 %size, i64 %align) {
-; CHECK-LABEL: @customalloc_memset_custom_cc
-; CHECK-NEXT:  [[CALL:%.*]] = call cc99 ptr @customalloc_zeroed_custom_cc(i64 [[SIZE:%.*]], i64 [[ALIGN:%.*]])
-; CHECK-NEXT:  ret ptr [[CALL]]
+; CHECK-LABEL: @customalloc_memset_custom_cc(
+; CHECK-NEXT:    [[CUSTOMALLOC_ZEROED_CUSTOM_CC:%.*]] = call cc99 ptr @customalloc_zeroed_custom_cc(i64 [[SIZE:%.*]], i64 [[ALIGN:%.*]])
+; CHECK-NEXT:    ret ptr [[CUSTOMALLOC_ZEROED_CUSTOM_CC]]
+;
   %call = call cc99 ptr @customalloc_custom_cc(i64 %size, i64 %align)
   call void @llvm.memset.p0.i64(ptr %call, i8 0, i64 %size, i1 false)
   ret ptr %call
@@ -482,7 +484,7 @@ cleanup:
 define ptr @malloc_with_no_nointer_null_check(i64 %0, i32 %1) {
 ; CHECK-LABEL: @malloc_with_no_nointer_null_check(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[CALL:%.*]] = call ptr @malloc(i64 [[TMP0:%.*]]) #[[ATTR7]]
+; CHECK-NEXT:    [[CALL:%.*]] = call ptr @malloc(i64 [[TMP0:%.*]]) #[[ATTR11]]
 ; CHECK-NEXT:    [[A:%.*]] = and i32 [[TMP1:%.*]], 32
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[A]], 0
 ; CHECK-NEXT:    br i1 [[CMP]], label [[CLEANUP:%.*]], label [[IF_END:%.*]]
@@ -507,7 +509,7 @@ cleanup:
 ; PR50143
 define ptr @store_zero_after_calloc_inaccessiblememonly() {
 ; CHECK-LABEL: @store_zero_after_calloc_inaccessiblememonly(
-; CHECK-NEXT:    [[CALL:%.*]] = tail call ptr @calloc(i64 1, i64 10) #[[ATTR7]]
+; CHECK-NEXT:    [[CALL:%.*]] = tail call ptr @calloc(i64 1, i64 10) #[[ATTR11]]
 ; CHECK-NEXT:    ret ptr [[CALL]]
 ;
   %call = tail call ptr @calloc(i64 1, i64 10)  inaccessiblememonly
@@ -600,7 +602,7 @@ define ptr @partial_zero_memset_and_store_with_dyn_index_after_calloc(i8 %v, i64
 
 define ptr @zero_memset_after_calloc_inaccessiblememonly()  {
 ; CHECK-LABEL: @zero_memset_after_calloc_inaccessiblememonly(
-; CHECK-NEXT:    [[CALL:%.*]] = tail call ptr @calloc(i64 10000, i64 4) #[[ATTR7]]
+; CHECK-NEXT:    [[CALL:%.*]] = tail call ptr @calloc(i64 10000, i64 4) #[[ATTR11]]
 ; CHECK-NEXT:    ret ptr [[CALL]]
 ;
   %call = tail call ptr @calloc(i64 10000, i64 4) inaccessiblememonly
@@ -696,7 +698,7 @@ if.end:
 
 define ptr @readnone_malloc() {
 ; CHECK-LABEL: @readnone_malloc(
-; CHECK-NEXT:    [[ALLOC:%.*]] = call ptr @malloc(i64 16) #[[ATTR8:[0-9]+]]
+; CHECK-NEXT:    [[ALLOC:%.*]] = call ptr @malloc(i64 16) #[[ATTR12:[0-9]+]]
 ; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr [[ALLOC]], i8 0, i64 16, i1 false)
 ; CHECK-NEXT:    ret ptr [[ALLOC]]
 ;
@@ -1179,3 +1181,131 @@ if.else:
 end:
   ret void
 }
+
+; There exists a dominating condition in the entry block, not the immediate
+; dominator for `inner` block, the edge entry->if.eq always dominates the store,
+; no clobber in between, the store is redundant.
+define void @remove_tautological_store_block_not_idom(ptr %x, i1 %c) {
+; CHECK-LABEL: @remove_tautological_store_block_not_idom(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[VAL:%.*]] = load i32, ptr [[X:%.*]], align 4
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[VAL]], 0
+; CHECK-NEXT:    br i1 [[CMP]], label [[THEN:%.*]], label [[END:%.*]]
+; CHECK:       then:
+; CHECK-NEXT:    br i1 [[C:%.*]], label [[IF_EQ:%.*]], label [[IF_ELSE:%.*]]
+; CHECK:       if.eq:
+; CHECK-NEXT:    br label [[JOIN:%.*]]
+; CHECK:       if.else:
+; CHECK-NEXT:    br label [[JOIN]]
+; CHECK:       join:
+; CHECK-NEXT:    br label [[INNER:%.*]]
+; CHECK:       inner:
+; CHECK-NEXT:    br label [[END]]
+; CHECK:       end:
+; CHECK-NEXT:    ret void
+;
+entry:
+  %val = load i32, ptr %x, align 4
+  %cmp = icmp eq i32 %val, 0
+  br i1 %cmp, label %then, label %end
+
+then:
+  br i1 %c, label %if.eq, label %if.else
+
+if.eq:
+  br label %join
+
+if.else:
+  br label %join
+
+join:
+  br label %inner
+
+inner:
+  store i32 0, ptr %x, align 4
+  br label %end
+
+end:
+  ret void
+}
+
+; There exists a dominating condition in the entry block, however,
+; the edge entry->if.eq does not dominate the store.
+define void @remove_tautological_store_not_idom_no_edge_domination(ptr %x) {
+; CHECK-LABEL: @remove_tautological_store_not_idom_no_edge_domination(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[VAL:%.*]] = load i32, ptr [[X:%.*]], align 4
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[VAL]], 0
+; CHECK-NEXT:    br i1 [[CMP]], label [[IF_EQ:%.*]], label [[IF_ELSE:%.*]]
+; CHECK:       if.eq:
+; CHECK-NEXT:    br label [[JOIN:%.*]]
+; CHECK:       if.else:
+; CHECK-NEXT:    br label [[JOIN]]
+; CHECK:       join:
+; CHECK-NEXT:    br label [[INNER:%.*]]
+; CHECK:       inner:
+; CHECK-NEXT:    store i32 0, ptr [[X]], align 4
+; CHECK-NEXT:    br label [[END:%.*]]
+; CHECK:       end:
+; CHECK-NEXT:    ret void
+;
+entry:
+  %val = load i32, ptr %x, align 4
+  %cmp = icmp eq i32 %val, 0
+  br i1 %cmp, label %if.eq, label %if.else
+
+if.eq:
+  br label %join
+
+if.else:
+  br label %join
+
+join:
+  br label %inner
+
+inner:
+  store i32 0, ptr %x, align 4
+  br label %end
+
+end:
+  ret void
+}
+
+; There exists a dominating condition in the entry block, however,
+; the pointer whose value is implied is clobbered in between.
+define void @remove_tautological_store_block_not_idom_clobber_between(ptr %x, i1 %c) {
+; CHECK-LABEL: @remove_tautological_store_block_not_idom_clobber_between(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[VAL:%.*]] = load i32, ptr [[X:%.*]], align 4
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[VAL]], 0
+; CHECK-NEXT:    br i1 [[CMP]], label [[IF_EQ:%.*]], label [[END:%.*]]
+; CHECK:       if.eq:
+; CHECK-NEXT:    br label [[NEXT:%.*]]
+; CHECK:       next:
+; CHECK-NEXT:    call void @unkown_write(ptr [[X]])
+; CHECK-NEXT:    br i1 [[C:%.*]], label [[INNER:%.*]], label [[END]]
+; CHECK:       inner:
+; CHECK-NEXT:    store i32 0, ptr [[X]], align 4
+; CHECK-NEXT:    br label [[END]]
+; CHECK:       end:
+; CHECK-NEXT:    ret void
+;
+entry:
+  %val = load i32, ptr %x, align 4
+  %cmp = icmp eq i32 %val, 0
+  br i1 %cmp, label %if.eq, label %end
+
+if.eq:
+  br label %next
+
+next:
+  call void @unkown_write(ptr %x)
+  br i1 %c, label %inner, label %end
+
+inner:
+  store i32 0, ptr %x, align 4
+  br label %end
+
+end:
+  ret void
+}

``````````

</details>


https://github.com/llvm/llvm-project/pull/181709


More information about the llvm-commits mailing list