[llvm] [DeadStoreElimination] Handle dominating conditions established by all predecessors (PR #181615)

Mon Feb 16 01:05:14 PST 2026

https://github.com/antoniofrighetto created https://github.com/llvm/llvm-project/pull/181615

While optimizing tautological assignments, if there exists a dominating condition that implies the value being stored in a pointer, and such a condition appears either in its idom or in all of its predecessors, then subsequents stores may be redundant, if no write to such a pointer occurs in between.

Fixes: https://github.com/llvm/llvm-project/issues/86920.

>From 6f49b56af55b809ee9deedb4ab708d9bf3c02f98 Mon Sep 17 00:00:00 2001
From: Antonio Frighetto <me at antoniofrighetto.com>
Date: Thu, 12 Feb 2026 16:48:49 +0100
Subject: [PATCH 1/2] [DeadStoreElimination] Precommit tests (NFC)

---
 .../DeadStoreElimination/noop-stores.ll       | 174 ++++++++++++++++--
 1 file changed, 162 insertions(+), 12 deletions(-)

diff --git a/llvm/test/Transforms/DeadStoreElimination/noop-stores.ll b/llvm/test/Transforms/DeadStoreElimination/noop-stores.ll
index 283935d60a6da..708c5caf20ed3 100644
--- a/llvm/test/Transforms/DeadStoreElimination/noop-stores.ll
+++ b/llvm/test/Transforms/DeadStoreElimination/noop-stores.ll
@@ -325,7 +325,7 @@ define ptr @zero_memset_after_malloc(i64 %size) {
 ; based on pr25892_lite
 define ptr @zero_memset_after_malloc_with_intermediate_clobbering(i64 %size) {
 ; CHECK-LABEL: @zero_memset_after_malloc_with_intermediate_clobbering(
-; CHECK-NEXT:    [[CALL:%.*]] = call ptr @malloc(i64 [[SIZE:%.*]]) #[[ATTR7:[0-9]+]]
+; CHECK-NEXT:    [[CALL:%.*]] = call ptr @malloc(i64 [[SIZE:%.*]]) #[[ATTR11:[0-9]+]]
 ; CHECK-NEXT:    call void @clobber_memory(ptr [[CALL]])
 ; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr [[CALL]], i8 0, i64 [[SIZE]], i1 false)
 ; CHECK-NEXT:    ret ptr [[CALL]]
@@ -339,7 +339,7 @@ define ptr @zero_memset_after_malloc_with_intermediate_clobbering(i64 %size) {
 ; based on pr25892_lite
 define ptr @zero_memset_after_malloc_with_different_sizes(i64 %size) {
 ; CHECK-LABEL: @zero_memset_after_malloc_with_different_sizes(
-; CHECK-NEXT:    [[CALL:%.*]] = call ptr @malloc(i64 [[SIZE:%.*]]) #[[ATTR7]]
+; CHECK-NEXT:    [[CALL:%.*]] = call ptr @malloc(i64 [[SIZE:%.*]]) #[[ATTR11]]
 ; CHECK-NEXT:    [[SIZE2:%.*]] = add nsw i64 [[SIZE]], -1
 ; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr [[CALL]], i8 0, i64 [[SIZE2]], i1 false)
 ; CHECK-NEXT:    ret ptr [[CALL]]
@@ -376,9 +376,10 @@ define ptr @notmalloc_memset(i64 %size, ptr %notmalloc) {
 
 ; This should create a customalloc_zeroed call and eliminate the memset
 define ptr @customalloc_memset(i64 %size, i64 %align) {
-; CHECK-LABEL: @customalloc_memset
-; CHECK-NEXT:  [[CALL:%.*]] = call ptr @customalloc_zeroed(i64 [[SIZE:%.*]], i64 [[ALIGN:%.*]])
-; CHECK-NEXT:  ret ptr [[CALL]]
+; CHECK-LABEL: @customalloc_memset(
+; CHECK-NEXT:    [[CUSTOMALLOC_ZEROED:%.*]] = call ptr @customalloc_zeroed(i64 [[SIZE:%.*]], i64 [[ALIGN:%.*]])
+; CHECK-NEXT:    ret ptr [[CUSTOMALLOC_ZEROED]]
+;
   %call = call ptr @customalloc(i64 %size, i64 %align)
   call void @llvm.memset.p0.i64(ptr %call, i8 0, i64 %size, i1 false)
   ret ptr %call
@@ -390,9 +391,10 @@ declare ptr @customalloc_zeroed(i64, i64) allockind("alloc,zeroed") "alloc-famil
 ; This should create a customalloc_zeroed_custom_cc call and eliminate the memset while
 ; respecting the custom calling convention of the zeroed variant.
 define cc99 ptr @customalloc_memset_custom_cc(i64 %size, i64 %align) {
-; CHECK-LABEL: @customalloc_memset_custom_cc
-; CHECK-NEXT:  [[CALL:%.*]] = call cc99 ptr @customalloc_zeroed_custom_cc(i64 [[SIZE:%.*]], i64 [[ALIGN:%.*]])
-; CHECK-NEXT:  ret ptr [[CALL]]
+; CHECK-LABEL: @customalloc_memset_custom_cc(
+; CHECK-NEXT:    [[CUSTOMALLOC_ZEROED_CUSTOM_CC:%.*]] = call cc99 ptr @customalloc_zeroed_custom_cc(i64 [[SIZE:%.*]], i64 [[ALIGN:%.*]])
+; CHECK-NEXT:    ret ptr [[CUSTOMALLOC_ZEROED_CUSTOM_CC]]
+;
   %call = call cc99 ptr @customalloc_custom_cc(i64 %size, i64 %align)
   call void @llvm.memset.p0.i64(ptr %call, i8 0, i64 %size, i1 false)
   ret ptr %call
@@ -482,7 +484,7 @@ cleanup:
 define ptr @malloc_with_no_nointer_null_check(i64 %0, i32 %1) {
 ; CHECK-LABEL: @malloc_with_no_nointer_null_check(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[CALL:%.*]] = call ptr @malloc(i64 [[TMP0:%.*]]) #[[ATTR7]]
+; CHECK-NEXT:    [[CALL:%.*]] = call ptr @malloc(i64 [[TMP0:%.*]]) #[[ATTR11]]
 ; CHECK-NEXT:    [[A:%.*]] = and i32 [[TMP1:%.*]], 32
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[A]], 0
 ; CHECK-NEXT:    br i1 [[CMP]], label [[CLEANUP:%.*]], label [[IF_END:%.*]]
@@ -507,7 +509,7 @@ cleanup:
 ; PR50143
 define ptr @store_zero_after_calloc_inaccessiblememonly() {
 ; CHECK-LABEL: @store_zero_after_calloc_inaccessiblememonly(
-; CHECK-NEXT:    [[CALL:%.*]] = tail call ptr @calloc(i64 1, i64 10) #[[ATTR7]]
+; CHECK-NEXT:    [[CALL:%.*]] = tail call ptr @calloc(i64 1, i64 10) #[[ATTR11]]
 ; CHECK-NEXT:    ret ptr [[CALL]]
 ;
   %call = tail call ptr @calloc(i64 1, i64 10)  inaccessiblememonly
@@ -600,7 +602,7 @@ define ptr @partial_zero_memset_and_store_with_dyn_index_after_calloc(i8 %v, i64
 
 define ptr @zero_memset_after_calloc_inaccessiblememonly()  {
 ; CHECK-LABEL: @zero_memset_after_calloc_inaccessiblememonly(
-; CHECK-NEXT:    [[CALL:%.*]] = tail call ptr @calloc(i64 10000, i64 4) #[[ATTR7]]
+; CHECK-NEXT:    [[CALL:%.*]] = tail call ptr @calloc(i64 10000, i64 4) #[[ATTR11]]
 ; CHECK-NEXT:    ret ptr [[CALL]]
 ;
   %call = tail call ptr @calloc(i64 10000, i64 4) inaccessiblememonly
@@ -696,7 +698,7 @@ if.end:
 
 define ptr @readnone_malloc() {
 ; CHECK-LABEL: @readnone_malloc(
-; CHECK-NEXT:    [[ALLOC:%.*]] = call ptr @malloc(i64 16) #[[ATTR8:[0-9]+]]
+; CHECK-NEXT:    [[ALLOC:%.*]] = call ptr @malloc(i64 16) #[[ATTR12:[0-9]+]]
 ; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr [[ALLOC]], i8 0, i64 16, i1 false)
 ; CHECK-NEXT:    ret ptr [[ALLOC]]
 ;
@@ -1179,3 +1181,151 @@ if.else:
 end:
   ret void
 }
+
+; Multiple predecessors w/ dom conditions implying the stored value.
+define void @remove_tautological_store_multi_preds(ptr %p) {
+; CHECK-LABEL: @remove_tautological_store_multi_preds(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[L1:%.*]] = load i32, ptr [[P:%.*]], align 4
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp eq i32 [[L1]], 0
+; CHECK-NEXT:    br i1 [[CMP1]], label [[THEN:%.*]], label [[ELSE:%.*]]
+; CHECK:       else:
+; CHECK-NEXT:    call void @unkown_write(ptr null)
+; CHECK-NEXT:    [[L2:%.*]] = load i32, ptr [[P]], align 4
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp eq i32 [[L2]], 0
+; CHECK-NEXT:    br i1 [[CMP2]], label [[THEN]], label [[EXIT:%.*]]
+; CHECK:       then:
+; CHECK-NEXT:    store i32 0, ptr [[P]], align 4
+; CHECK-NEXT:    br label [[EXIT]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret void
+;
+entry:
+  %l1 = load i32, ptr %p, align 4
+  %cmp1 = icmp eq i32 %l1, 0
+  br i1 %cmp1, label %then, label %else
+
+else:
+  call void @unkown_write(ptr null)
+  %l2 = load i32, ptr %p, align 4
+  %cmp2 = icmp eq i32 %l2, 0
+  br i1 %cmp2, label %then, label %exit
+
+then:
+  store i32 0, ptr %p, align 4
+  br label %exit
+
+exit:
+  ret void
+}
+
+; Negative tests.
+
+; Store in between, %p being clobbered.
+define void @remove_tautological_store_multi_preds_clobbering_between(ptr %p) {
+; CHECK-LABEL: @remove_tautological_store_multi_preds_clobbering_between(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[L1:%.*]] = load i32, ptr [[P:%.*]], align 4
+; CHECK-NEXT:    store i32 1, ptr [[P]], align 4
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp eq i32 [[L1]], 0
+; CHECK-NEXT:    br i1 [[CMP1]], label [[THEN:%.*]], label [[ELSE:%.*]]
+; CHECK:       else:
+; CHECK-NEXT:    [[L2:%.*]] = load i32, ptr [[P]], align 4
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp eq i32 [[L2]], 0
+; CHECK-NEXT:    br i1 [[CMP2]], label [[THEN]], label [[EXIT:%.*]]
+; CHECK:       then:
+; CHECK-NEXT:    store i32 0, ptr [[P]], align 4
+; CHECK-NEXT:    br label [[EXIT]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret void
+;
+entry:
+  %l1 = load i32, ptr %p, align 4
+  store i32 1, ptr %p, align 4
+  %cmp1 = icmp eq i32 %l1, 0
+  br i1 %cmp1, label %then, label %else
+
+else:
+  %l2 = load i32, ptr %p, align 4
+  %cmp2 = icmp eq i32 %l2, 0
+  br i1 %cmp2, label %then, label %exit
+
+then:
+  store i32 0, ptr %p, align 4
+  br label %exit
+
+exit:
+  ret void
+}
+
+; Not a MemoryPhi as defining access for the store, %p being clobbered.
+define void @remove_tautological_store_multi_preds_clobbering_between_2(ptr %p) {
+; CHECK-LABEL: @remove_tautological_store_multi_preds_clobbering_between_2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[L1:%.*]] = load i32, ptr [[P:%.*]], align 4
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp eq i32 [[L1]], 0
+; CHECK-NEXT:    br i1 [[CMP1]], label [[THEN:%.*]], label [[ELSE:%.*]]
+; CHECK:       else:
+; CHECK-NEXT:    [[L2:%.*]] = load i32, ptr [[P]], align 4
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp eq i32 [[L2]], 0
+; CHECK-NEXT:    br i1 [[CMP2]], label [[THEN]], label [[EXIT:%.*]]
+; CHECK:       then:
+; CHECK-NEXT:    call void @unkown_write(ptr [[P]])
+; CHECK-NEXT:    store i32 0, ptr [[P]], align 4
+; CHECK-NEXT:    br label [[EXIT]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret void
+;
+entry:
+  %l1 = load i32, ptr %p, align 4
+  %cmp1 = icmp eq i32 %l1, 0
+  br i1 %cmp1, label %then, label %else
+
+else:
+  %l2 = load i32, ptr %p, align 4
+  %cmp2 = icmp eq i32 %l2, 0
+  br i1 %cmp2, label %then, label %exit
+
+then:
+  call void @unkown_write(ptr %p)
+  store i32 0, ptr %p, align 4
+  br label %exit
+
+exit:
+  ret void
+}
+
+; Different implying successor via icmp ne.
+define void @remove_tautological_store_multi_preds_cond_mixed(ptr %p) {
+; CHECK-LABEL: @remove_tautological_store_multi_preds_cond_mixed(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[L1:%.*]] = load i32, ptr [[P:%.*]], align 4
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp eq i32 [[L1]], 0
+; CHECK-NEXT:    br i1 [[CMP1]], label [[THEN:%.*]], label [[ELSE:%.*]]
+; CHECK:       else:
+; CHECK-NEXT:    [[L2:%.*]] = load i32, ptr [[P]], align 4
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp ne i32 [[L2]], 0
+; CHECK-NEXT:    br i1 [[CMP2]], label [[THEN]], label [[EXIT:%.*]]
+; CHECK:       then:
+; CHECK-NEXT:    store i32 0, ptr [[P]], align 4
+; CHECK-NEXT:    br label [[EXIT]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret void
+;
+entry:
+  %l1 = load i32, ptr %p, align 4
+  %cmp1 = icmp eq i32 %l1, 0
+  br i1 %cmp1, label %then, label %else
+
+else:
+  %l2 = load i32, ptr %p, align 4
+  %cmp2 = icmp ne i32 %l2, 0
+  br i1 %cmp2, label %then, label %exit
+
+then:
+  store i32 0, ptr %p, align 4
+  br label %exit
+
+exit:
+  ret void
+}

>From ffe062564883a73802dfe9d2ff261d2201473de0 Mon Sep 17 00:00:00 2001
From: Antonio Frighetto <me at antoniofrighetto.com>
Date: Thu, 12 Feb 2026 17:00:09 +0100
Subject: [PATCH 2/2] [DeadStoreElimination] Handle dominating conditions
 established by all predecessors

While optimizing tautological assignments, if there exists a dominating
condition that implies the value being stored in a pointer, and such a
condition appears either in its idom or in all of its predecessors,
then subsequents stores may be redundant, if no write to such a pointer
occurs in between.

Fixes: https://github.com/llvm/llvm-project/issues/86920.
---
 .../Scalar/DeadStoreElimination.cpp           | 109 +++++++++++-------
 .../DeadStoreElimination/noop-stores.ll       |   1 -
 2 files changed, 68 insertions(+), 42 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
index e056f0c1f6390..be9efdae32bec 100644
--- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -2140,55 +2140,82 @@ struct DSEState {
     return true;
   }
 
-  // Check if there is a dominating condition, that implies that the value
-  // being stored in a ptr is already present in the ptr.
-  bool dominatingConditionImpliesValue(MemoryDef *Def) {
-    auto *StoreI = cast<StoreInst>(Def->getMemoryInst());
-    BasicBlock *StoreBB = StoreI->getParent();
-    Value *StorePtr = StoreI->getPointerOperand();
-    Value *StoreVal = StoreI->getValueOperand();
-
-    DomTreeNode *IDom = DT.getNode(StoreBB)->getIDom();
-    if (!IDom)
+  // If there is a dominating condition that implies the value being stored in a
+  // pointer, and such a condition appears either in its immediate dominator or
+  // in all of its predecessors, then the store may be redundant.
+  bool dominatingConditionImpliesValue(StoreInst *SI, MemoryDef *Def) {
+    BasicBlock *StoreBB = SI->getParent();
+    Value *StorePtr = SI->getPointerOperand();
+    Value *StoreVal = SI->getValueOperand();
+
+    unsigned NumPreds = pred_size(StoreBB);
+    if (!NumPreds || NumPreds > 4)
       return false;
 
-    auto *BI = dyn_cast<BranchInst>(IDom->getBlock()->getTerminator());
-    if (!BI || !BI->isConditional())
-      return false;
+    // Collect dominating conditions.
+    // TODO: May be possible to generalize this to perform a reverse DFS
+    // instead.
+    SmallDenseMap<BasicBlock *, Instruction *, 4> PredToLoad;
+    for (BasicBlock *PredBB : predecessors(StoreBB)) {
+      auto *BI = dyn_cast<BranchInst>(PredBB->getTerminator());
+      if (!BI || !BI->isConditional())
+        return false;
 
-    // In case both blocks are the same, it is not possible to determine
-    // if optimization is possible. (We would not want to optimize a store
-    // in the FalseBB if condition is true and vice versa.)
-    if (BI->getSuccessor(0) == BI->getSuccessor(1))
-      return false;
+      // In case both blocks are the same, it is not possible to determine
+      // if optimization is possible. (We would not want to optimize a store
+      // in the FalseBB if condition is true and vice versa.)
+      if (BI->getSuccessor(0) == BI->getSuccessor(1))
+        return false;
 
-    Instruction *ICmpL;
-    CmpPredicate Pred;
-    if (!match(BI->getCondition(),
-               m_c_ICmp(Pred,
-                        m_CombineAnd(m_Load(m_Specific(StorePtr)),
-                                     m_Instruction(ICmpL)),
-                        m_Specific(StoreVal))) ||
-        !ICmpInst::isEquality(Pred))
-      return false;
+      Instruction *ICmpL;
+      CmpPredicate Pred;
+      if (!match(BI->getCondition(),
+                 m_c_ICmp(Pred,
+                          m_CombineAnd(m_Load(m_Specific(StorePtr)),
+                                       m_Instruction(ICmpL)),
+                          m_Specific(StoreVal))) ||
+          !ICmpInst::isEquality(Pred))
+        return false;
 
-    // In case the else blocks also branches to the if block or the other way
-    // around it is not possible to determine if the optimization is possible.
-    if (Pred == ICmpInst::ICMP_EQ &&
-        !DT.dominates(BasicBlockEdge(BI->getParent(), BI->getSuccessor(0)),
-                      StoreBB))
-      return false;
+      BasicBlock *ImpliedSucc =
+          Pred == ICmpInst::ICMP_EQ ? BI->getSuccessor(0) : BI->getSuccessor(1);
+      if (ImpliedSucc != StoreBB)
+        return false;
 
-    if (Pred == ICmpInst::ICMP_NE &&
-        !DT.dominates(BasicBlockEdge(BI->getParent(), BI->getSuccessor(1)),
-                      StoreBB))
+      PredToLoad[PredBB] = ICmpL;
+    }
+    assert(PredToLoad.size() == NumPreds);
+
+    MemoryAccess *DefiningAccess = Def->getDefiningAccess();
+    MemoryLocation StoreLoc = MemoryLocation::get(SI);
+
+    // Make sure there does not exist any clobbering access between the load and
+    // the potential redundant store.
+    auto IsLoadClobbered = [&](MemoryAccess *IncomingAcc, Instruction *LI) {
+      MemoryAccess *LoadAccess = MSSA.getMemoryAccess(LI);
+      MemoryAccess *ClobberingAccess =
+          MSSA.getSkipSelfWalker()->getClobberingMemoryAccess(IncomingAcc,
+                                                              StoreLoc);
+      return !MSSA.dominates(ClobberingAccess, LoadAccess);
+    };
+
+    if (NumPreds == 1)
+      if (!IsLoadClobbered(DefiningAccess,
+                           PredToLoad[StoreBB->getSinglePredecessor()]))
+        return true;
+
+    // If we are not merging the memory reads from the predecessors, the memory
+    // location may be clobbered.
+    auto *MPhi = dyn_cast<MemoryPhi>(DefiningAccess);
+    if (!MPhi || MPhi->getBlock() != StoreBB)
       return false;
 
-    MemoryAccess *LoadAcc = MSSA.getMemoryAccess(ICmpL);
-    MemoryAccess *ClobAcc =
-        MSSA.getSkipSelfWalker()->getClobberingMemoryAccess(Def, BatchAA);
+    for (unsigned I = 0; I < MPhi->getNumIncomingValues(); ++I)
+      if (IsLoadClobbered(MPhi->getIncomingValue(I),
+                          PredToLoad[MPhi->getIncomingBlock(I)]))
+        return false;
 
-    return MSSA.dominates(ClobAcc, LoadAcc);
+    return true;
   }
 
   /// \returns true if \p Def is a no-op store, either because it
@@ -2221,7 +2248,7 @@ struct DSEState {
     if (!Store)
       return false;
 
-    if (dominatingConditionImpliesValue(Def))
+    if (dominatingConditionImpliesValue(Store, Def))
       return true;
 
     if (auto *LoadI = dyn_cast<LoadInst>(Store->getOperand(0))) {
diff --git a/llvm/test/Transforms/DeadStoreElimination/noop-stores.ll b/llvm/test/Transforms/DeadStoreElimination/noop-stores.ll
index 708c5caf20ed3..76a34b25a906d 100644
--- a/llvm/test/Transforms/DeadStoreElimination/noop-stores.ll
+++ b/llvm/test/Transforms/DeadStoreElimination/noop-stores.ll
@@ -1195,7 +1195,6 @@ define void @remove_tautological_store_multi_preds(ptr %p) {
 ; CHECK-NEXT:    [[CMP2:%.*]] = icmp eq i32 [[L2]], 0
 ; CHECK-NEXT:    br i1 [[CMP2]], label [[THEN]], label [[EXIT:%.*]]
 ; CHECK:       then:
-; CHECK-NEXT:    store i32 0, ptr [[P]], align 4
 ; CHECK-NEXT:    br label [[EXIT]]
 ; CHECK:       exit:
 ; CHECK-NEXT:    ret void