[llvm] [DSE] Apply initializes attribute to DSE (PR #107282)

Haopeng Liu via llvm-commits llvm-commits at lists.llvm.org
Wed Oct 23 22:00:42 PDT 2024


https://github.com/haopliu updated https://github.com/llvm/llvm-project/pull/107282

>From a94a7341f921de06d65b1491eb5bc1a5e58970fe Mon Sep 17 00:00:00 2001
From: Haopeng Liu <haopliu at google.com>
Date: Wed, 4 Sep 2024 18:13:54 +0000
Subject: [PATCH 01/15] Apply initializes attribute to DSE

---
 .../Scalar/DeadStoreElimination.cpp           | 226 ++++++++++++++----
 .../DeadStoreElimination/inter-procedural.ll  | 159 ++++++++++++
 2 files changed, 343 insertions(+), 42 deletions(-)
 create mode 100644 llvm/test/Transforms/DeadStoreElimination/inter-procedural.ll

diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
index a37f295abbd31c..3ccb064adbf0df 100644
--- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -52,6 +52,7 @@
 #include "llvm/IR/Argument.h"
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/Constant.h"
+#include "llvm/IR/ConstantRangeList.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/DebugInfo.h"
@@ -164,6 +165,10 @@ static cl::opt<bool>
     OptimizeMemorySSA("dse-optimize-memoryssa", cl::init(true), cl::Hidden,
                       cl::desc("Allow DSE to optimize memory accesses."));
 
+static cl::opt<bool> EnableInitializesImprovement(
+    "enable-dse-initializes-attr-improvement", cl::init(false), cl::Hidden,
+    cl::desc("Enable the initializes attr improvement in DSE"));
+
 //===----------------------------------------------------------------------===//
 // Helper functions
 //===----------------------------------------------------------------------===//
@@ -809,8 +814,10 @@ bool canSkipDef(MemoryDef *D, bool DefVisibleToCaller) {
 // A memory location wrapper that represents a MemoryLocation, `MemLoc`,
 // defined by `MemDef`.
 struct MemoryLocationWrapper {
-  MemoryLocationWrapper(MemoryLocation MemLoc, MemoryDef *MemDef)
-      : MemLoc(MemLoc), MemDef(MemDef) {
+  MemoryLocationWrapper(MemoryLocation MemLoc, MemoryDef *MemDef,
+                        bool DefByInitializesAttr)
+      : MemLoc(MemLoc), MemDef(MemDef),
+        DefByInitializesAttr(DefByInitializesAttr) {
     assert(MemLoc.Ptr && "MemLoc should be not null");
     UnderlyingObject = getUnderlyingObject(MemLoc.Ptr);
     DefInst = MemDef->getMemoryInst();
@@ -820,20 +827,121 @@ struct MemoryLocationWrapper {
   const Value *UnderlyingObject;
   MemoryDef *MemDef;
   Instruction *DefInst;
+  bool DefByInitializesAttr = false;
 };
 
 // A memory def wrapper that represents a MemoryDef and the MemoryLocation(s)
 // defined by this MemoryDef.
 struct MemoryDefWrapper {
-  MemoryDefWrapper(MemoryDef *MemDef, std::optional<MemoryLocation> MemLoc) {
+  MemoryDefWrapper(
+      MemoryDef *MemDef,
+      const SmallVectorImpl<std::pair<MemoryLocation, bool>> &MemLocations) {
     DefInst = MemDef->getMemoryInst();
-    if (MemLoc.has_value())
-      DefinedLocation = MemoryLocationWrapper(*MemLoc, MemDef);
+    for (auto &[MemLoc, DefByInitializesAttr] : MemLocations)
+      DefinedLocations.push_back(
+          MemoryLocationWrapper(MemLoc, MemDef, DefByInitializesAttr));
   }
   Instruction *DefInst;
-  std::optional<MemoryLocationWrapper> DefinedLocation = std::nullopt;
+  SmallVector<MemoryLocationWrapper, 1> DefinedLocations;
+};
+
+bool HasInitializesAttr(Instruction *I) {
+  CallBase *CB = dyn_cast<CallBase>(I);
+  if (!CB)
+    return false;
+
+  for (size_t Idx = 0; Idx < CB->arg_size(); Idx++)
+    if (CB->paramHasAttr(Idx, Attribute::Initializes))
+      return true;
+  return false;
+}
+
+struct ArgumentInitInfo {
+  size_t Idx = -1;
+  ConstantRangeList Inits;
+  bool HasDeadOnUnwindAttr = false;
+  bool FuncHasNoUnwindAttr = false;
 };
 
+ConstantRangeList
+GetMergedInitAttr(const SmallVectorImpl<ArgumentInitInfo> &Args) {
+  if (Args.empty())
+    return {};
+
+  // To address unwind, the function should have nounwind attribute or the
+  // arguments have dead_on_unwind attribute. Otherwise, return empty.
+  for (const auto &Arg : Args) {
+    if (!Arg.FuncHasNoUnwindAttr && !Arg.HasDeadOnUnwindAttr)
+      return {};
+    if (Arg.Inits.empty())
+      return {};
+  }
+
+  if (Args.size() == 1)
+    return Args[0].Inits;
+
+  ConstantRangeList MergedIntervals = Args[0].Inits;
+  for (size_t i = 1; i < Args.size(); i++)
+    MergedIntervals = MergedIntervals.intersectWith(Args[i].Inits);
+
+  return MergedIntervals;
+}
+
+// Return the locations wrote by the initializes attribute.
+// Note that this function considers:
+// 1. Unwind edge: apply "initializes" attribute only if the callee has
+//    "nounwind" attribute or the argument has "dead_on_unwind" attribute.
+// 2. Argument alias: for aliasing arguments, the "initializes" attribute is
+//    the merged range list of their "initializes" attributes.
+SmallVector<MemoryLocation, 1>
+GetInitializesArgMemLoc(const Instruction *I, BatchAAResults &BatchAA) {
+  const CallBase *CB = dyn_cast<CallBase>(I);
+  if (!CB)
+    return {};
+
+  // Collect aliasing arguments and their initializes ranges.
+  bool HasNoUnwindAttr = CB->hasFnAttr(Attribute::NoUnwind);
+  SmallMapVector<Value *, SmallVector<ArgumentInitInfo, 2>, 2> Arguments;
+  for (size_t Idx = 0; Idx < CB->arg_size(); Idx++) {
+    ConstantRangeList Inits;
+    if (CB->paramHasAttr(Idx, Attribute::Initializes))
+      Inits = CB->getParamAttr(Idx, Attribute::Initializes)
+                  .getValueAsConstantRangeList();
+
+    bool HasDeadOnUnwindAttr = CB->paramHasAttr(Idx, Attribute::DeadOnUnwind);
+    ArgumentInitInfo InitInfo{Idx, Inits, HasDeadOnUnwindAttr, HasNoUnwindAttr};
+    Value *CurArg = CB->getArgOperand(Idx);
+    bool FoundAliasing = false;
+    for (auto &[Arg, AliasList] : Arguments) {
+      if (BatchAA.isMustAlias(Arg, CurArg)) {
+        FoundAliasing = true;
+        AliasList.push_back(InitInfo);
+      }
+    }
+    if (!FoundAliasing)
+      Arguments[CurArg] = {InitInfo};
+  }
+
+  SmallVector<MemoryLocation, 1> Locations;
+  for (const auto &[_, Args] : Arguments) {
+    auto MergedInitAttr = GetMergedInitAttr(Args);
+    if (MergedInitAttr.empty())
+      continue;
+
+    for (const auto &Arg : Args) {
+      for (const auto &Range : MergedInitAttr) {
+        int64_t Start = Range.getLower().getSExtValue();
+        int64_t End = Range.getUpper().getSExtValue();
+        if (Start == 0)
+          Locations.push_back(MemoryLocation(CB->getArgOperand(Arg.Idx),
+                                             LocationSize::precise(End - Start),
+                                             CB->getAAMetadata()));
+      }
+    }
+  }
+  return Locations;
+}
+
 struct DSEState {
   Function &F;
   AliasAnalysis &AA;
@@ -911,7 +1019,8 @@ struct DSEState {
 
         auto *MD = dyn_cast_or_null<MemoryDef>(MA);
         if (MD && MemDefs.size() < MemorySSADefsPerBlockLimit &&
-            (getLocForWrite(&I) || isMemTerminatorInst(&I)))
+            (getLocForWrite(&I) || isMemTerminatorInst(&I) ||
+             HasInitializesAttr(&I)))
           MemDefs.push_back(MD);
       }
     }
@@ -1147,13 +1256,26 @@ struct DSEState {
     return MemoryLocation::getOrNone(I);
   }
 
-  std::optional<MemoryLocation> getLocForInst(Instruction *I) {
+  // Returns a list of <MemoryLocation, bool> pairs wrote by I.
+  // The bool means whether the write is from Initializes attr.
+  SmallVector<std::pair<MemoryLocation, bool>, 1>
+  getLocForInst(Instruction *I, bool ConsiderInitializesAttr) {
+    SmallVector<std::pair<MemoryLocation, bool>, 1> Locations;
     if (isMemTerminatorInst(I)) {
-      if (auto Loc = getLocForTerminator(I)) {
-        return Loc->first;
+      if (auto Loc = getLocForTerminator(I))
+        Locations.push_back(std::make_pair(Loc->first, false));
+      return Locations;
+    }
+
+    if (auto Loc = getLocForWrite(I))
+      Locations.push_back(std::make_pair(*Loc, false));
+
+    if (ConsiderInitializesAttr) {
+      for (auto &MemLoc : GetInitializesArgMemLoc(I, BatchAA)) {
+        Locations.push_back(std::make_pair(MemLoc, true));
       }
     }
-    return getLocForWrite(I);
+    return Locations;
   }
 
   /// Assuming this instruction has a dead analyzable write, can we delete
@@ -1365,7 +1487,8 @@ struct DSEState {
   getDomMemoryDef(MemoryDef *KillingDef, MemoryAccess *StartAccess,
                   const MemoryLocation &KillingLoc, const Value *KillingUndObj,
                   unsigned &ScanLimit, unsigned &WalkerStepLimit,
-                  bool IsMemTerm, unsigned &PartialLimit) {
+                  bool IsMemTerm, unsigned &PartialLimit,
+                  bool IsInitializesAttrMemLoc) {
     if (ScanLimit == 0 || WalkerStepLimit == 0) {
       LLVM_DEBUG(dbgs() << "\n    ...  hit scan limit\n");
       return std::nullopt;
@@ -1602,7 +1725,17 @@ struct DSEState {
 
       // Uses which may read the original MemoryDef mean we cannot eliminate the
       // original MD. Stop walk.
-      if (isReadClobber(MaybeDeadLoc, UseInst)) {
+      // If KillingDef is a CallInst with "initializes" attribute, the reads in
+      // Callee would be dominated by initializations, so this should be safe.
+      bool IsKillingDefFromInitAttr = false;
+      if (IsInitializesAttrMemLoc) {
+        if (KillingI == UseInst &&
+            KillingUndObj == getUnderlyingObject(MaybeDeadLoc.Ptr)) {
+          IsKillingDefFromInitAttr = true;
+        }
+      }
+
+      if (isReadClobber(MaybeDeadLoc, UseInst) && !IsKillingDefFromInitAttr) {
         LLVM_DEBUG(dbgs() << "    ... found read clobber\n");
         return std::nullopt;
       }
@@ -2207,7 +2340,8 @@ DSEState::eliminateDeadDefs(const MemoryLocationWrapper &KillingLocWrapper) {
     std::optional<MemoryAccess *> MaybeDeadAccess = getDomMemoryDef(
         KillingLocWrapper.MemDef, Current, KillingLocWrapper.MemLoc,
         KillingLocWrapper.UnderlyingObject, ScanLimit, WalkerStepLimit,
-        isMemTerminatorInst(KillingLocWrapper.DefInst), PartialLimit);
+        isMemTerminatorInst(KillingLocWrapper.DefInst), PartialLimit,
+        KillingLocWrapper.DefByInitializesAttr);
 
     if (!MaybeDeadAccess) {
       LLVM_DEBUG(dbgs() << "  finished walk\n");
@@ -2232,8 +2366,11 @@ DSEState::eliminateDeadDefs(const MemoryLocationWrapper &KillingLocWrapper) {
     }
     MemoryDefWrapper DeadDefWrapper(
         cast<MemoryDef>(DeadAccess),
-        getLocForInst(cast<MemoryDef>(DeadAccess)->getMemoryInst()));
-    MemoryLocationWrapper &DeadLocWrapper = *DeadDefWrapper.DefinedLocation;
+        getLocForInst(cast<MemoryDef>(DeadAccess)->getMemoryInst(),
+                      /*ConsiderInitializesAttr=*/false));
+    assert(DeadDefWrapper.DefinedLocations.size() == 1);
+    MemoryLocationWrapper &DeadLocWrapper =
+        DeadDefWrapper.DefinedLocations.front();
     LLVM_DEBUG(dbgs() << " (" << *DeadLocWrapper.DefInst << ")\n");
     ToCheck.insert(DeadLocWrapper.MemDef->getDefiningAccess());
     NumGetDomMemoryDefPassed++;
@@ -2311,37 +2448,41 @@ DSEState::eliminateDeadDefs(const MemoryLocationWrapper &KillingLocWrapper) {
 }
 
 bool DSEState::eliminateDeadDefs(const MemoryDefWrapper &KillingDefWrapper) {
-  if (!KillingDefWrapper.DefinedLocation.has_value()) {
+  if (KillingDefWrapper.DefinedLocations.empty()) {
     LLVM_DEBUG(dbgs() << "Failed to find analyzable write location for "
                       << *KillingDefWrapper.DefInst << "\n");
     return false;
   }
 
-  auto &KillingLocWrapper = *KillingDefWrapper.DefinedLocation;
-  LLVM_DEBUG(dbgs() << "Trying to eliminate MemoryDefs killed by "
-                    << *KillingLocWrapper.MemDef << " ("
-                    << *KillingLocWrapper.DefInst << ")\n");
-  auto [Changed, DeletedKillingLoc] = eliminateDeadDefs(KillingLocWrapper);
-
-  // Check if the store is a no-op.
-  if (!DeletedKillingLoc && storeIsNoop(KillingLocWrapper.MemDef,
-                                        KillingLocWrapper.UnderlyingObject)) {
-    LLVM_DEBUG(dbgs() << "DSE: Remove No-Op Store:\n  DEAD: "
-                      << *KillingLocWrapper.DefInst << '\n');
-    deleteDeadInstruction(KillingLocWrapper.DefInst);
-    NumRedundantStores++;
-    return true;
-  }
-  // Can we form a calloc from a memset/malloc pair?
-  if (!DeletedKillingLoc &&
-      tryFoldIntoCalloc(KillingLocWrapper.MemDef,
-                        KillingLocWrapper.UnderlyingObject)) {
-    LLVM_DEBUG(dbgs() << "DSE: Remove memset after forming calloc:\n"
-                      << "  DEAD: " << *KillingLocWrapper.DefInst << '\n');
-    deleteDeadInstruction(KillingLocWrapper.DefInst);
-    return true;
+  bool MadeChange = false;
+  for (auto &KillingLocWrapper : KillingDefWrapper.DefinedLocations) {
+    LLVM_DEBUG(dbgs() << "Trying to eliminate MemoryDefs killed by "
+                      << *KillingLocWrapper.MemDef << " ("
+                      << *KillingLocWrapper.DefInst << ")\n");
+    auto [Changed, DeletedKillingLoc] = eliminateDeadDefs(KillingLocWrapper);
+
+    // Check if the store is a no-op.
+    if (!DeletedKillingLoc && storeIsNoop(KillingLocWrapper.MemDef,
+                                          KillingLocWrapper.UnderlyingObject)) {
+      LLVM_DEBUG(dbgs() << "DSE: Remove No-Op Store:\n  DEAD: "
+                        << *KillingLocWrapper.DefInst << '\n');
+      deleteDeadInstruction(KillingLocWrapper.DefInst);
+      NumRedundantStores++;
+      MadeChange = true;
+      continue;
+    }
+    // Can we form a calloc from a memset/malloc pair?
+    if (!DeletedKillingLoc &&
+        tryFoldIntoCalloc(KillingLocWrapper.MemDef,
+                          KillingLocWrapper.UnderlyingObject)) {
+      LLVM_DEBUG(dbgs() << "DSE: Remove memset after forming calloc:\n"
+                        << "  DEAD: " << *KillingLocWrapper.DefInst << '\n');
+      deleteDeadInstruction(KillingLocWrapper.DefInst);
+      MadeChange = true;
+      continue;
+    }
   }
-  return Changed;
+  return MadeChange;
 }
 
 static bool eliminateDeadStores(Function &F, AliasAnalysis &AA, MemorySSA &MSSA,
@@ -2357,7 +2498,8 @@ static bool eliminateDeadStores(Function &F, AliasAnalysis &AA, MemorySSA &MSSA,
       continue;
 
     MemoryDefWrapper KillingDefWrapper(
-        KillingDef, State.getLocForInst(KillingDef->getMemoryInst()));
+        KillingDef, State.getLocForInst(KillingDef->getMemoryInst(),
+                                        EnableInitializesImprovement));
     MadeChange |= State.eliminateDeadDefs(KillingDefWrapper);
   }
 
diff --git a/llvm/test/Transforms/DeadStoreElimination/inter-procedural.ll b/llvm/test/Transforms/DeadStoreElimination/inter-procedural.ll
new file mode 100644
index 00000000000000..c4ff69af9051bc
--- /dev/null
+++ b/llvm/test/Transforms/DeadStoreElimination/inter-procedural.ll
@@ -0,0 +1,159 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -aa-pipeline=basic-aa -passes=function-attrs,dse -enable-dse-initializes-attr-improvement -S | FileCheck %s
+
+declare void @p1_write_only(ptr nocapture noundef writeonly initializes((0, 2)) dead_on_unwind)
+declare void @p1_write_then_read(ptr nocapture noundef initializes((0, 2)) dead_on_unwind)
+declare void @p2_same_range(ptr nocapture noundef initializes((0, 2)) dead_on_unwind, ptr nocapture noundef initializes((0, 2)) dead_on_unwind)
+declare void @p2_no_init(ptr nocapture noundef initializes((0, 2)) dead_on_unwind, ptr nocapture noundef dead_on_unwind)
+declare void @p2_no_dead_on_unwind(ptr nocapture noundef initializes((0, 2)) dead_on_unwind, ptr nocapture noundef initializes((0, 2)))
+
+; Function Attrs: mustprogress nounwind uwtable
+define i16 @p1_write_only_caller() {
+; CHECK-LABEL: @p1_write_only_caller(
+; CHECK-NEXT:    %ptr = alloca i16, align 2
+; CHECK-NEXT:    call void @p1_write_only(ptr %ptr)
+; CHECK-NEXT:    %l = load i16, ptr %ptr
+; CHECK-NEXT:    ret i16 %l
+;
+  %ptr = alloca i16
+  store i16 0, ptr %ptr
+  call void @p1_write_only(ptr %ptr)
+  %l = load i16, ptr %ptr
+  ret i16 %l
+}
+
+; Function Attrs: mustprogress nounwind uwtable
+define i16 @p1_write_then_read_caller() {
+; CHECK-LABEL: @p1_write_then_read_caller(
+; CHECK-NEXT:    %ptr = alloca i16, align 2
+; CHECK-NEXT:    call void @p1_write_then_read(ptr %ptr)
+; CHECK-NEXT:    %l = load i16, ptr %ptr
+; CHECK-NEXT:    ret i16 %l
+;
+  %ptr = alloca i16
+  store i16 0, ptr %ptr
+  call void @p1_write_then_read(ptr %ptr)
+  %l = load i16, ptr %ptr
+  ret i16 %l
+}
+
+; Function Attrs: mustprogress nounwind uwtable
+define i16 @p2_same_range_nonalias_caller() {
+; CHECK-LABEL: @p2_same_range_nonalias_caller(
+; CHECK-NEXT:    %ptr1 = alloca i16, align 2
+; CHECK-NEXT:    %ptr2 = alloca i16, align 2
+; CHECK-NEXT:    call void @p2_same_range(ptr %ptr1, ptr %ptr2)
+; CHECK-NEXT:    %l = load i16, ptr %ptr1
+; CHECK-NEXT:    ret i16 %l
+;
+  %ptr1 = alloca i16
+  %ptr2 = alloca i16
+  store i16 0, ptr %ptr1
+  store i16 0, ptr %ptr2
+  call void @p2_same_range(ptr %ptr1, ptr %ptr2)
+  %l = load i16, ptr %ptr1
+  ret i16 %l
+}
+
+; Function Attrs: mustprogress nounwind uwtable
+define i16 @p2_same_range_alias_caller() {
+; CHECK-LABEL: @p2_same_range_alias_caller(
+; CHECK-NEXT:    %ptr = alloca i16, align 2
+; CHECK-NEXT:    call void @p2_same_range(ptr %ptr, ptr %ptr)
+; CHECK-NEXT:    %l = load i16, ptr %ptr
+; CHECK-NEXT:    ret i16 %l
+;
+  %ptr = alloca i16
+  store i16 0, ptr %ptr
+  call void @p2_same_range(ptr %ptr, ptr %ptr)
+  %l = load i16, ptr %ptr
+  ret i16 %l
+}
+
+; Function Attrs: mustprogress nounwind uwtable
+define i16 @p2_no_init_alias_caller() {
+; CHECK-LABEL: @p2_no_init_alias_caller(
+; CHECK-NEXT:    %ptr = alloca i16, align 2
+; CHECK-NEXT:    store i16 0, ptr %ptr
+; CHECK-NEXT:    call void @p2_no_init(ptr %ptr, ptr %ptr)
+; CHECK-NEXT:    %l = load i16, ptr %ptr
+; CHECK-NEXT:    ret i16 %l
+;
+  %ptr = alloca i16
+  store i16 0, ptr %ptr
+  call void @p2_no_init(ptr %ptr, ptr %ptr)
+  %l = load i16, ptr %ptr
+  ret i16 %l
+}
+
+; Function Attrs: mustprogress nounwind uwtable
+define i16 @p2_no_dead_on_unwind_alias_caller() {
+; CHECK-LABEL: @p2_no_dead_on_unwind_alias_caller(
+; CHECK-NEXT:    %ptr = alloca i16, align 2
+; CHECK-NEXT:    store i16 0, ptr %ptr
+; CHECK-NEXT:    call void @p2_no_dead_on_unwind(ptr %ptr, ptr %ptr)
+; CHECK-NEXT:    %l = load i16, ptr %ptr
+; CHECK-NEXT:    ret i16 %l
+;
+  %ptr = alloca i16
+  store i16 0, ptr %ptr
+  call void @p2_no_dead_on_unwind(ptr %ptr, ptr %ptr)
+  %l = load i16, ptr %ptr
+  ret i16 %l
+}
+
+declare void @llvm.memset.p0.i64(ptr nocapture, i8, i64, i1) nounwind
+declare void @large_p1(ptr nocapture noundef initializes((0, 200))) nounwind
+declare void @large_p2(ptr nocapture noundef initializes((0, 200)), ptr nocapture noundef initializes((0, 100))) nounwind
+
+; Function Attrs: mustprogress nounwind uwtable
+define i16 @large_p1_caller() {
+; CHECK-LABEL: @large_p1_caller(
+; CHECK-NEXT:    %ptr = alloca i16, align 2
+; CHECK-NEXT:    call void @large_p1(ptr %ptr)
+; CHECK-NEXT:    %l = load i16, ptr %ptr
+; CHECK-NEXT:    ret i16 %l
+;
+  %ptr = alloca i16
+  call void @llvm.memset.p0.i64(ptr %ptr, i8 42, i64 100, i1 false)
+  call void @large_p1(ptr %ptr)
+  %l = load i16, ptr %ptr
+  ret i16 %l
+}
+
+; Function Attrs: mustprogress nounwind uwtable
+define i16 @large_p2_nonalias_caller() {
+; CHECK-LABEL: @large_p2_nonalias_caller(
+; CHECK-NEXT:    %ptr1 = alloca i16, align 2
+; CHECK-NEXT:    %ptr2 = alloca i16, align 2
+; CHECK-NEXT:    call void @large_p2(ptr %ptr1, ptr %ptr2)
+; CHECK-NEXT:    %l = load i16, ptr %ptr1
+; CHECK-NEXT:    ret i16 %l
+;
+  %ptr1 = alloca i16
+  %ptr2 = alloca i16
+  call void @llvm.memset.p0.i64(ptr %ptr1, i8 42, i64 200, i1 false)
+  call void @llvm.memset.p0.i64(ptr %ptr2, i8 42, i64 100, i1 false)
+  call void @large_p2(ptr %ptr1, ptr %ptr2)
+  %l = load i16, ptr %ptr1
+  ret i16 %l
+}
+
+
+; Function Attrs: mustprogress nounwind uwtable
+define i16 @large_p2_alias_caller() {
+; CHECK-LABEL: @large_p2_alias_caller(
+; CHECK-NEXT:    %ptr = alloca i16, align 2
+; CHECK-NEXT:    %1 = getelementptr inbounds i8, ptr %ptr, i64 100
+; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 1 %1, i8 42, i64 200, i1 false)
+; CHECK-NEXT:    call void @large_p2(ptr %ptr, ptr %ptr)
+; CHECK-NEXT:    %l = load i16, ptr %ptr
+; CHECK-NEXT:    ret i16 %l
+;
+  %ptr = alloca i16
+  call void @llvm.memset.p0.i64(ptr %ptr, i8 42, i64 300, i1 false)
+  call void @large_p2(ptr %ptr, ptr %ptr)
+  %l = load i16, ptr %ptr
+  ret i16 %l
+}
+

>From 002d984bef56e88556405a94ecec3c9e2bb0d846 Mon Sep 17 00:00:00 2001
From: Haopeng Liu <haopliu at google.com>
Date: Thu, 5 Sep 2024 04:56:11 +0000
Subject: [PATCH 02/15] Update comments and unit test

---
 llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp           | 4 ++++
 llvm/test/Transforms/DeadStoreElimination/inter-procedural.ll | 2 +-
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
index 3ccb064adbf0df..fd178bc7238c54 100644
--- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -165,6 +165,7 @@ static cl::opt<bool>
     OptimizeMemorySSA("dse-optimize-memoryssa", cl::init(true), cl::Hidden,
                       cl::desc("Allow DSE to optimize memory accesses."));
 
+// TODO: turn on and remove this flag.
 static cl::opt<bool> EnableInitializesImprovement(
     "enable-dse-initializes-attr-improvement", cl::init(false), cl::Hidden,
     cl::desc("Enable the initializes attr improvement in DSE"));
@@ -2364,6 +2365,9 @@ DSEState::eliminateDeadDefs(const MemoryLocationWrapper &KillingLocWrapper) {
       }
       continue;
     }
+    // We cannot apply the initializes attribute to DeadAccess/DeadDef.
+    // It would incorrectly consider a call instruction as redundant store
+    // and remove this call instruction.
     MemoryDefWrapper DeadDefWrapper(
         cast<MemoryDef>(DeadAccess),
         getLocForInst(cast<MemoryDef>(DeadAccess)->getMemoryInst(),
diff --git a/llvm/test/Transforms/DeadStoreElimination/inter-procedural.ll b/llvm/test/Transforms/DeadStoreElimination/inter-procedural.ll
index c4ff69af9051bc..6975670379faf9 100644
--- a/llvm/test/Transforms/DeadStoreElimination/inter-procedural.ll
+++ b/llvm/test/Transforms/DeadStoreElimination/inter-procedural.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -aa-pipeline=basic-aa -passes=function-attrs,dse -enable-dse-initializes-attr-improvement -S | FileCheck %s
+; RUN: opt < %s -passes=dse -enable-dse-initializes-attr-improvement -S | FileCheck %s
 
 declare void @p1_write_only(ptr nocapture noundef writeonly initializes((0, 2)) dead_on_unwind)
 declare void @p1_write_then_read(ptr nocapture noundef initializes((0, 2)) dead_on_unwind)

>From eed0dff3942831e0bf1a873bf691f0b4bcae268c Mon Sep 17 00:00:00 2001
From: Haopeng Liu <haopliu at google.com>
Date: Thu, 5 Sep 2024 20:52:25 +0000
Subject: [PATCH 03/15] Fix loop format, index type, and clean up
 ArgumentInitInfo struct members

---
 .../Scalar/DeadStoreElimination.cpp           | 47 ++++++++++---------
 .../DeadStoreElimination/inter-procedural.ll  | 16 +++++++
 2 files changed, 42 insertions(+), 21 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
index fd178bc7238c54..cf827417df191d 100644
--- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -846,33 +846,37 @@ struct MemoryDefWrapper {
   SmallVector<MemoryLocationWrapper, 1> DefinedLocations;
 };
 
-bool HasInitializesAttr(Instruction *I) {
+bool hasInitializesAttr(Instruction *I) {
   CallBase *CB = dyn_cast<CallBase>(I);
   if (!CB)
     return false;
 
-  for (size_t Idx = 0; Idx < CB->arg_size(); Idx++)
+  for (unsigned Idx = 0, Count = CB->arg_size(); Idx < Count; ++Idx)
     if (CB->paramHasAttr(Idx, Attribute::Initializes))
       return true;
   return false;
 }
 
 struct ArgumentInitInfo {
-  size_t Idx = -1;
+  unsigned Idx;
+  bool HasDeadOnUnwindAttr;
   ConstantRangeList Inits;
-  bool HasDeadOnUnwindAttr = false;
-  bool FuncHasNoUnwindAttr = false;
 };
 
+// Return the intersected range list of the initializes attributes of "Args".
+// "Args" are call arguments that alias to each other.
+// If any argument in "Args" doesn't have dead_on_unwind attr and
+// "FuncHasNoUnwindAttr" is false, return empty.
 ConstantRangeList
-GetMergedInitAttr(const SmallVectorImpl<ArgumentInitInfo> &Args) {
+getIntersectedInitRangeList(const SmallVectorImpl<ArgumentInitInfo> &Args,
+                            bool FuncHasNoUnwindAttr) {
   if (Args.empty())
     return {};
 
   // To address unwind, the function should have nounwind attribute or the
   // arguments have dead_on_unwind attribute. Otherwise, return empty.
   for (const auto &Arg : Args) {
-    if (!Arg.FuncHasNoUnwindAttr && !Arg.HasDeadOnUnwindAttr)
+    if (!FuncHasNoUnwindAttr && !Arg.HasDeadOnUnwindAttr)
       return {};
     if (Arg.Inits.empty())
       return {};
@@ -881,11 +885,11 @@ GetMergedInitAttr(const SmallVectorImpl<ArgumentInitInfo> &Args) {
   if (Args.size() == 1)
     return Args[0].Inits;
 
-  ConstantRangeList MergedIntervals = Args[0].Inits;
-  for (size_t i = 1; i < Args.size(); i++)
-    MergedIntervals = MergedIntervals.intersectWith(Args[i].Inits);
+  ConstantRangeList IntersectedIntervals = Args[0].Inits;
+  for (unsigned I = 1, Count = Args.size(); I < Count; ++I)
+    IntersectedIntervals = IntersectedIntervals.intersectWith(Args[I].Inits);
 
-  return MergedIntervals;
+  return IntersectedIntervals;
 }
 
 // Return the locations wrote by the initializes attribute.
@@ -893,24 +897,23 @@ GetMergedInitAttr(const SmallVectorImpl<ArgumentInitInfo> &Args) {
 // 1. Unwind edge: apply "initializes" attribute only if the callee has
 //    "nounwind" attribute or the argument has "dead_on_unwind" attribute.
 // 2. Argument alias: for aliasing arguments, the "initializes" attribute is
-//    the merged range list of their "initializes" attributes.
+//    the intersected range list of their "initializes" attributes.
 SmallVector<MemoryLocation, 1>
-GetInitializesArgMemLoc(const Instruction *I, BatchAAResults &BatchAA) {
+getInitializesArgMemLoc(const Instruction *I, BatchAAResults &BatchAA) {
   const CallBase *CB = dyn_cast<CallBase>(I);
   if (!CB)
     return {};
 
   // Collect aliasing arguments and their initializes ranges.
-  bool HasNoUnwindAttr = CB->hasFnAttr(Attribute::NoUnwind);
   SmallMapVector<Value *, SmallVector<ArgumentInitInfo, 2>, 2> Arguments;
-  for (size_t Idx = 0; Idx < CB->arg_size(); Idx++) {
+  for (unsigned Idx = 0, Count = CB->arg_size(); Idx < Count; ++Idx) {
     ConstantRangeList Inits;
     if (CB->paramHasAttr(Idx, Attribute::Initializes))
       Inits = CB->getParamAttr(Idx, Attribute::Initializes)
                   .getValueAsConstantRangeList();
 
     bool HasDeadOnUnwindAttr = CB->paramHasAttr(Idx, Attribute::DeadOnUnwind);
-    ArgumentInitInfo InitInfo{Idx, Inits, HasDeadOnUnwindAttr, HasNoUnwindAttr};
+    ArgumentInitInfo InitInfo{Idx, HasDeadOnUnwindAttr, Inits};
     Value *CurArg = CB->getArgOperand(Idx);
     bool FoundAliasing = false;
     for (auto &[Arg, AliasList] : Arguments) {
@@ -925,14 +928,16 @@ GetInitializesArgMemLoc(const Instruction *I, BatchAAResults &BatchAA) {
 
   SmallVector<MemoryLocation, 1> Locations;
   for (const auto &[_, Args] : Arguments) {
-    auto MergedInitAttr = GetMergedInitAttr(Args);
-    if (MergedInitAttr.empty())
+    auto IntersectedRanges =
+        getIntersectedInitRangeList(Args, CB->hasFnAttr(Attribute::NoUnwind));
+    if (IntersectedRanges.empty())
       continue;
 
     for (const auto &Arg : Args) {
-      for (const auto &Range : MergedInitAttr) {
+      for (const auto &Range : IntersectedRanges) {
         int64_t Start = Range.getLower().getSExtValue();
         int64_t End = Range.getUpper().getSExtValue();
+        // For now, we only handle locations starting at offset 0.
         if (Start == 0)
           Locations.push_back(MemoryLocation(CB->getArgOperand(Arg.Idx),
                                              LocationSize::precise(End - Start),
@@ -1021,7 +1026,7 @@ struct DSEState {
         auto *MD = dyn_cast_or_null<MemoryDef>(MA);
         if (MD && MemDefs.size() < MemorySSADefsPerBlockLimit &&
             (getLocForWrite(&I) || isMemTerminatorInst(&I) ||
-             HasInitializesAttr(&I)))
+             (EnableInitializesImprovement && hasInitializesAttr(&I))))
           MemDefs.push_back(MD);
       }
     }
@@ -1272,7 +1277,7 @@ struct DSEState {
       Locations.push_back(std::make_pair(*Loc, false));
 
     if (ConsiderInitializesAttr) {
-      for (auto &MemLoc : GetInitializesArgMemLoc(I, BatchAA)) {
+      for (auto &MemLoc : getInitializesArgMemLoc(I, BatchAA)) {
         Locations.push_back(std::make_pair(MemLoc, true));
       }
     }
diff --git a/llvm/test/Transforms/DeadStoreElimination/inter-procedural.ll b/llvm/test/Transforms/DeadStoreElimination/inter-procedural.ll
index 6975670379faf9..00c9633f123b98 100644
--- a/llvm/test/Transforms/DeadStoreElimination/inter-procedural.ll
+++ b/llvm/test/Transforms/DeadStoreElimination/inter-procedural.ll
@@ -6,6 +6,7 @@ declare void @p1_write_then_read(ptr nocapture noundef initializes((0, 2)) dead_
 declare void @p2_same_range(ptr nocapture noundef initializes((0, 2)) dead_on_unwind, ptr nocapture noundef initializes((0, 2)) dead_on_unwind)
 declare void @p2_no_init(ptr nocapture noundef initializes((0, 2)) dead_on_unwind, ptr nocapture noundef dead_on_unwind)
 declare void @p2_no_dead_on_unwind(ptr nocapture noundef initializes((0, 2)) dead_on_unwind, ptr nocapture noundef initializes((0, 2)))
+declare void @p2_no_dead_on_unwind_but_nounwind(ptr nocapture noundef initializes((0, 2)) dead_on_unwind, ptr nocapture noundef initializes((0, 2))) nounwind
 
 ; Function Attrs: mustprogress nounwind uwtable
 define i16 @p1_write_only_caller() {
@@ -102,6 +103,21 @@ define i16 @p2_no_dead_on_unwind_alias_caller() {
   ret i16 %l
 }
 
+; Function Attrs: mustprogress nounwind uwtable
+define i16 @p2_no_dead_on_unwind_but_nounwind_alias_caller() {
+; CHECK-LABEL: @p2_no_dead_on_unwind_but_nounwind_alias_caller(
+; CHECK-NEXT:    %ptr = alloca i16, align 2
+; CHECK-NEXT:    call void @p2_no_dead_on_unwind_but_nounwind(ptr %ptr, ptr %ptr)
+; CHECK-NEXT:    %l = load i16, ptr %ptr
+; CHECK-NEXT:    ret i16 %l
+;
+  %ptr = alloca i16
+  store i16 0, ptr %ptr
+  call void @p2_no_dead_on_unwind_but_nounwind(ptr %ptr, ptr %ptr)
+  %l = load i16, ptr %ptr
+  ret i16 %l
+}
+
 declare void @llvm.memset.p0.i64(ptr nocapture, i8, i64, i1) nounwind
 declare void @large_p1(ptr nocapture noundef initializes((0, 200))) nounwind
 declare void @large_p2(ptr nocapture noundef initializes((0, 200)), ptr nocapture noundef initializes((0, 100))) nounwind

>From e8163c9c87197a5b4aadae37d3dbfc3c621c52c2 Mon Sep 17 00:00:00 2001
From: Haopeng Liu <haopliu at google.com>
Date: Thu, 5 Sep 2024 21:01:26 +0000
Subject: [PATCH 04/15] Change 'Count = Args.size()' to size_t type

---
 llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
index cf827417df191d..caf4d97086bdca 100644
--- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -886,7 +886,7 @@ getIntersectedInitRangeList(const SmallVectorImpl<ArgumentInitInfo> &Args,
     return Args[0].Inits;
 
   ConstantRangeList IntersectedIntervals = Args[0].Inits;
-  for (unsigned I = 1, Count = Args.size(); I < Count; ++I)
+  for (size_t I = 1, Count = Args.size(); I < Count; ++I)
     IntersectedIntervals = IntersectedIntervals.intersectWith(Args[I].Inits);
 
   return IntersectedIntervals;

>From 7e6f960a83a44c04419978b261acedeedb55d49c Mon Sep 17 00:00:00 2001
From: Haopeng Liu <haopliu at google.com>
Date: Tue, 10 Sep 2024 04:27:13 +0000
Subject: [PATCH 05/15] Handle may/parital alias and clean up code

---
 llvm/include/llvm/Analysis/AliasAnalysis.h    |  5 +++
 .../Scalar/DeadStoreElimination.cpp           | 42 +++++++++----------
 .../DeadStoreElimination/inter-procedural.ll  | 19 +++++++++
 3 files changed, 44 insertions(+), 22 deletions(-)

diff --git a/llvm/include/llvm/Analysis/AliasAnalysis.h b/llvm/include/llvm/Analysis/AliasAnalysis.h
index 1990172116582b..a1a084de621358 100644
--- a/llvm/include/llvm/Analysis/AliasAnalysis.h
+++ b/llvm/include/llvm/Analysis/AliasAnalysis.h
@@ -667,6 +667,11 @@ class BatchAAResults {
                  MemoryLocation(V2, LocationSize::precise(1))) ==
            AliasResult::MustAlias;
   }
+  bool isNoAlias(const Value *V1, const Value *V2) {
+    return alias(MemoryLocation(V1, LocationSize::precise(1)),
+                 MemoryLocation(V2, LocationSize::precise(1))) ==
+           AliasResult::NoAlias;
+  }
   ModRefInfo callCapturesBefore(const Instruction *I,
                                 const MemoryLocation &MemLoc,
                                 DominatorTree *DT) {
diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
index caf4d97086bdca..fb1499e4f758c8 100644
--- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -834,9 +834,8 @@ struct MemoryLocationWrapper {
 // A memory def wrapper that represents a MemoryDef and the MemoryLocation(s)
 // defined by this MemoryDef.
 struct MemoryDefWrapper {
-  MemoryDefWrapper(
-      MemoryDef *MemDef,
-      const SmallVectorImpl<std::pair<MemoryLocation, bool>> &MemLocations) {
+  MemoryDefWrapper(MemoryDef *MemDef,
+                   ArrayRef<std::pair<MemoryLocation, bool>> MemLocations) {
     DefInst = MemDef->getMemoryInst();
     for (auto &[MemLoc, DefByInitializesAttr] : MemLocations)
       DefinedLocations.push_back(
@@ -866,17 +865,16 @@ struct ArgumentInitInfo {
 // Return the intersected range list of the initializes attributes of "Args".
 // "Args" are call arguments that alias to each other.
 // If any argument in "Args" doesn't have dead_on_unwind attr and
-// "FuncHasNoUnwindAttr" is false, return empty.
-ConstantRangeList
-getIntersectedInitRangeList(const SmallVectorImpl<ArgumentInitInfo> &Args,
-                            bool FuncHasNoUnwindAttr) {
+// "CallHasNoUnwindAttr" is false, return empty.
+ConstantRangeList getIntersectedInitRangeList(ArrayRef<ArgumentInitInfo> Args,
+                                              bool CallHasNoUnwindAttr) {
   if (Args.empty())
     return {};
 
   // To address unwind, the function should have nounwind attribute or the
   // arguments have dead_on_unwind attribute. Otherwise, return empty.
   for (const auto &Arg : Args) {
-    if (!FuncHasNoUnwindAttr && !Arg.HasDeadOnUnwindAttr)
+    if (!CallHasNoUnwindAttr && !Arg.HasDeadOnUnwindAttr)
       return {};
     if (Arg.Inits.empty())
       return {};
@@ -885,14 +883,14 @@ getIntersectedInitRangeList(const SmallVectorImpl<ArgumentInitInfo> &Args,
   if (Args.size() == 1)
     return Args[0].Inits;
 
-  ConstantRangeList IntersectedIntervals = Args[0].Inits;
-  for (size_t I = 1, Count = Args.size(); I < Count; ++I)
-    IntersectedIntervals = IntersectedIntervals.intersectWith(Args[I].Inits);
+  ConstantRangeList IntersectedIntervals = Args.front().Inits;
+  for (auto &Arg : Args.drop_front())
+    IntersectedIntervals = IntersectedIntervals.intersectWith(Arg.Inits);
 
   return IntersectedIntervals;
 }
 
-// Return the locations wrote by the initializes attribute.
+// Return the locations written by the initializes attribute.
 // Note that this function considers:
 // 1. Unwind edge: apply "initializes" attribute only if the callee has
 //    "nounwind" attribute or the argument has "dead_on_unwind" attribute.
@@ -908,19 +906,20 @@ getInitializesArgMemLoc(const Instruction *I, BatchAAResults &BatchAA) {
   SmallMapVector<Value *, SmallVector<ArgumentInitInfo, 2>, 2> Arguments;
   for (unsigned Idx = 0, Count = CB->arg_size(); Idx < Count; ++Idx) {
     ConstantRangeList Inits;
-    if (CB->paramHasAttr(Idx, Attribute::Initializes))
-      Inits = CB->getParamAttr(Idx, Attribute::Initializes)
-                  .getValueAsConstantRangeList();
+    Attribute InitializesAttr = CB->getParamAttr(Idx, Attribute::Initializes);
+    if (InitializesAttr.isValid())
+      Inits = InitializesAttr.getValueAsConstantRangeList();
 
     bool HasDeadOnUnwindAttr = CB->paramHasAttr(Idx, Attribute::DeadOnUnwind);
     ArgumentInitInfo InitInfo{Idx, HasDeadOnUnwindAttr, Inits};
     Value *CurArg = CB->getArgOperand(Idx);
     bool FoundAliasing = false;
     for (auto &[Arg, AliasList] : Arguments) {
-      if (BatchAA.isMustAlias(Arg, CurArg)) {
-        FoundAliasing = true;
-        AliasList.push_back(InitInfo);
-      }
+      if (BatchAA.isNoAlias(Arg, CurArg))
+        continue;
+      // Conservatively consider must/may/partial-alias as aliasing.
+      FoundAliasing = true;
+      AliasList.push_back(InitInfo);
     }
     if (!FoundAliasing)
       Arguments[CurArg] = {InitInfo};
@@ -929,7 +928,7 @@ getInitializesArgMemLoc(const Instruction *I, BatchAAResults &BatchAA) {
   SmallVector<MemoryLocation, 1> Locations;
   for (const auto &[_, Args] : Arguments) {
     auto IntersectedRanges =
-        getIntersectedInitRangeList(Args, CB->hasFnAttr(Attribute::NoUnwind));
+        getIntersectedInitRangeList(Args, CB->doesNotThrow());
     if (IntersectedRanges.empty())
       continue;
 
@@ -1736,9 +1735,8 @@ struct DSEState {
       bool IsKillingDefFromInitAttr = false;
       if (IsInitializesAttrMemLoc) {
         if (KillingI == UseInst &&
-            KillingUndObj == getUnderlyingObject(MaybeDeadLoc.Ptr)) {
+            KillingUndObj == getUnderlyingObject(MaybeDeadLoc.Ptr))
           IsKillingDefFromInitAttr = true;
-        }
       }
 
       if (isReadClobber(MaybeDeadLoc, UseInst) && !IsKillingDefFromInitAttr) {
diff --git a/llvm/test/Transforms/DeadStoreElimination/inter-procedural.ll b/llvm/test/Transforms/DeadStoreElimination/inter-procedural.ll
index 00c9633f123b98..974ccdff13c835 100644
--- a/llvm/test/Transforms/DeadStoreElimination/inter-procedural.ll
+++ b/llvm/test/Transforms/DeadStoreElimination/inter-procedural.ll
@@ -3,6 +3,7 @@
 
 declare void @p1_write_only(ptr nocapture noundef writeonly initializes((0, 2)) dead_on_unwind)
 declare void @p1_write_then_read(ptr nocapture noundef initializes((0, 2)) dead_on_unwind)
+declare void @p1_clobber(ptr nocapture noundef)
 declare void @p2_same_range(ptr nocapture noundef initializes((0, 2)) dead_on_unwind, ptr nocapture noundef initializes((0, 2)) dead_on_unwind)
 declare void @p2_no_init(ptr nocapture noundef initializes((0, 2)) dead_on_unwind, ptr nocapture noundef dead_on_unwind)
 declare void @p2_no_dead_on_unwind(ptr nocapture noundef initializes((0, 2)) dead_on_unwind, ptr nocapture noundef initializes((0, 2)))
@@ -38,6 +39,24 @@ define i16 @p1_write_then_read_caller() {
   ret i16 %l
 }
 
+; Function Attrs: mustprogress nounwind uwtable
+define i16 @p1_write_then_read_caller_with_clobber() {
+; CHECK-LABEL: @p1_write_then_read_caller_with_clobber(
+; CHECK-NEXT:    %ptr = alloca i16, align 2
+; CHECK-NEXT:    store i16 0, ptr %ptr
+; CHECK-NEXT:    call void @p1_clobber(ptr %ptr)
+; CHECK-NEXT:    call void @p1_write_then_read(ptr %ptr)
+; CHECK-NEXT:    %l = load i16, ptr %ptr
+; CHECK-NEXT:    ret i16 %l
+;
+  %ptr = alloca i16
+  store i16 0, ptr %ptr
+  call void @p1_clobber(ptr %ptr)
+  call void @p1_write_then_read(ptr %ptr)
+  %l = load i16, ptr %ptr
+  ret i16 %l
+}
+
 ; Function Attrs: mustprogress nounwind uwtable
 define i16 @p2_same_range_nonalias_caller() {
 ; CHECK-LABEL: @p2_same_range_nonalias_caller(

>From debf11fc92d1b46a5e5bc01bc19d809322389e52 Mon Sep 17 00:00:00 2001
From: Haopeng Liu <haopliu at google.com>
Date: Tue, 10 Sep 2024 05:31:40 +0000
Subject: [PATCH 06/15] Early return in getLocForInst()

---
 llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
index fb1499e4f758c8..2f998662123594 100644
--- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -1272,14 +1272,15 @@ struct DSEState {
       return Locations;
     }
 
-    if (auto Loc = getLocForWrite(I))
-      Locations.push_back(std::make_pair(*Loc, false));
-
     if (ConsiderInitializesAttr) {
       for (auto &MemLoc : getInitializesArgMemLoc(I, BatchAA)) {
         Locations.push_back(std::make_pair(MemLoc, true));
       }
+      return Locations;
     }
+
+    if (auto Loc = getLocForWrite(I))
+      Locations.push_back(std::make_pair(*Loc, false));
     return Locations;
   }
 

>From 72dcab31b78224eff20a86efd7319b41b3da3ed2 Mon Sep 17 00:00:00 2001
From: Haopeng Liu <haopliu at google.com>
Date: Wed, 11 Sep 2024 18:15:12 +0000
Subject: [PATCH 07/15] Insert an empty init range for May/PartialAlias
 arguments

---
 .../Scalar/DeadStoreElimination.cpp           |  25 +--
 .../DeadStoreElimination/inter-procedural.ll  | 148 +++++++++++-------
 2 files changed, 104 insertions(+), 69 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
index 2f998662123594..3a7d11e33fc8ac 100644
--- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -847,13 +847,8 @@ struct MemoryDefWrapper {
 
 bool hasInitializesAttr(Instruction *I) {
   CallBase *CB = dyn_cast<CallBase>(I);
-  if (!CB)
-    return false;
-
-  for (unsigned Idx = 0, Count = CB->arg_size(); Idx < Count; ++Idx)
-    if (CB->paramHasAttr(Idx, Attribute::Initializes))
-      return true;
-  return false;
+  return CB != nullptr &&
+         CB->getArgOperandWithAttribute(Attribute::Initializes) != nullptr;
 }
 
 struct ArgumentInitInfo {
@@ -915,11 +910,19 @@ getInitializesArgMemLoc(const Instruction *I, BatchAAResults &BatchAA) {
     Value *CurArg = CB->getArgOperand(Idx);
     bool FoundAliasing = false;
     for (auto &[Arg, AliasList] : Arguments) {
-      if (BatchAA.isNoAlias(Arg, CurArg))
+      if (BatchAA.isNoAlias(Arg, CurArg)) {
         continue;
-      // Conservatively consider must/may/partial-alias as aliasing.
-      FoundAliasing = true;
-      AliasList.push_back(InitInfo);
+      } else if (BatchAA.isMustAlias(Arg, CurArg)) {
+        FoundAliasing = true;
+        AliasList.push_back(InitInfo);
+      } else {
+        // For ParitialAlias and MayAlias, there is an offset or may be an
+        // unknown offset between the arguments and we insert an empty init
+        // range to discard the entire initializes info while intersecting.
+        FoundAliasing = true;
+        AliasList.push_back(
+            ArgumentInitInfo{Idx, HasDeadOnUnwindAttr, ConstantRangeList()});
+      }
     }
     if (!FoundAliasing)
       Arguments[CurArg] = {InitInfo};
diff --git a/llvm/test/Transforms/DeadStoreElimination/inter-procedural.ll b/llvm/test/Transforms/DeadStoreElimination/inter-procedural.ll
index 974ccdff13c835..9953908f240cb9 100644
--- a/llvm/test/Transforms/DeadStoreElimination/inter-procedural.ll
+++ b/llvm/test/Transforms/DeadStoreElimination/inter-procedural.ll
@@ -12,10 +12,10 @@ declare void @p2_no_dead_on_unwind_but_nounwind(ptr nocapture noundef initialize
 ; Function Attrs: mustprogress nounwind uwtable
 define i16 @p1_write_only_caller() {
 ; CHECK-LABEL: @p1_write_only_caller(
-; CHECK-NEXT:    %ptr = alloca i16, align 2
-; CHECK-NEXT:    call void @p1_write_only(ptr %ptr)
-; CHECK-NEXT:    %l = load i16, ptr %ptr
-; CHECK-NEXT:    ret i16 %l
+; CHECK-NEXT:    [[PTR:%.*]] = alloca i16, align 2
+; CHECK-NEXT:    call void @p1_write_only(ptr [[PTR]])
+; CHECK-NEXT:    [[L:%.*]] = load i16, ptr [[PTR]], align 2
+; CHECK-NEXT:    ret i16 [[L]]
 ;
   %ptr = alloca i16
   store i16 0, ptr %ptr
@@ -27,10 +27,10 @@ define i16 @p1_write_only_caller() {
 ; Function Attrs: mustprogress nounwind uwtable
 define i16 @p1_write_then_read_caller() {
 ; CHECK-LABEL: @p1_write_then_read_caller(
-; CHECK-NEXT:    %ptr = alloca i16, align 2
-; CHECK-NEXT:    call void @p1_write_then_read(ptr %ptr)
-; CHECK-NEXT:    %l = load i16, ptr %ptr
-; CHECK-NEXT:    ret i16 %l
+; CHECK-NEXT:    [[PTR:%.*]] = alloca i16, align 2
+; CHECK-NEXT:    call void @p1_write_then_read(ptr [[PTR]])
+; CHECK-NEXT:    [[L:%.*]] = load i16, ptr [[PTR]], align 2
+; CHECK-NEXT:    ret i16 [[L]]
 ;
   %ptr = alloca i16
   store i16 0, ptr %ptr
@@ -42,12 +42,12 @@ define i16 @p1_write_then_read_caller() {
 ; Function Attrs: mustprogress nounwind uwtable
 define i16 @p1_write_then_read_caller_with_clobber() {
 ; CHECK-LABEL: @p1_write_then_read_caller_with_clobber(
-; CHECK-NEXT:    %ptr = alloca i16, align 2
-; CHECK-NEXT:    store i16 0, ptr %ptr
-; CHECK-NEXT:    call void @p1_clobber(ptr %ptr)
-; CHECK-NEXT:    call void @p1_write_then_read(ptr %ptr)
-; CHECK-NEXT:    %l = load i16, ptr %ptr
-; CHECK-NEXT:    ret i16 %l
+; CHECK-NEXT:    [[PTR:%.*]] = alloca i16, align 2
+; CHECK-NEXT:    store i16 0, ptr [[PTR]], align 2
+; CHECK-NEXT:    call void @p1_clobber(ptr [[PTR]])
+; CHECK-NEXT:    call void @p1_write_then_read(ptr [[PTR]])
+; CHECK-NEXT:    [[L:%.*]] = load i16, ptr [[PTR]], align 2
+; CHECK-NEXT:    ret i16 [[L]]
 ;
   %ptr = alloca i16
   store i16 0, ptr %ptr
@@ -58,13 +58,13 @@ define i16 @p1_write_then_read_caller_with_clobber() {
 }
 
 ; Function Attrs: mustprogress nounwind uwtable
-define i16 @p2_same_range_nonalias_caller() {
-; CHECK-LABEL: @p2_same_range_nonalias_caller(
-; CHECK-NEXT:    %ptr1 = alloca i16, align 2
-; CHECK-NEXT:    %ptr2 = alloca i16, align 2
-; CHECK-NEXT:    call void @p2_same_range(ptr %ptr1, ptr %ptr2)
-; CHECK-NEXT:    %l = load i16, ptr %ptr1
-; CHECK-NEXT:    ret i16 %l
+define i16 @p2_same_range_noalias_caller() {
+; CHECK-LABEL: @p2_same_range_noalias_caller(
+; CHECK-NEXT:    [[PTR1:%.*]] = alloca i16, align 2
+; CHECK-NEXT:    [[PTR2:%.*]] = alloca i16, align 2
+; CHECK-NEXT:    call void @p2_same_range(ptr [[PTR1]], ptr [[PTR2]])
+; CHECK-NEXT:    [[L:%.*]] = load i16, ptr [[PTR1]], align 2
+; CHECK-NEXT:    ret i16 [[L]]
 ;
   %ptr1 = alloca i16
   %ptr2 = alloca i16
@@ -76,12 +76,12 @@ define i16 @p2_same_range_nonalias_caller() {
 }
 
 ; Function Attrs: mustprogress nounwind uwtable
-define i16 @p2_same_range_alias_caller() {
-; CHECK-LABEL: @p2_same_range_alias_caller(
-; CHECK-NEXT:    %ptr = alloca i16, align 2
-; CHECK-NEXT:    call void @p2_same_range(ptr %ptr, ptr %ptr)
-; CHECK-NEXT:    %l = load i16, ptr %ptr
-; CHECK-NEXT:    ret i16 %l
+define i16 @p2_same_range_must_alias_caller() {
+; CHECK-LABEL: @p2_same_range_must_alias_caller(
+; CHECK-NEXT:    [[PTR:%.*]] = alloca i16, align 2
+; CHECK-NEXT:    call void @p2_same_range(ptr [[PTR]], ptr [[PTR]])
+; CHECK-NEXT:    [[L:%.*]] = load i16, ptr [[PTR]], align 2
+; CHECK-NEXT:    ret i16 [[L]]
 ;
   %ptr = alloca i16
   store i16 0, ptr %ptr
@@ -90,14 +90,46 @@ define i16 @p2_same_range_alias_caller() {
   ret i16 %l
 }
 
+; Function Attrs: mustprogress nounwind uwtable
+define i16 @p2_same_range_may_or_partial_alias_caller1(ptr %base, i1 %x) {
+; CHECK-LABEL: @p2_same_range_may_or_partial_alias_caller1(
+; CHECK-NEXT:    [[BASEPLUS:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 1
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[X:%.*]], ptr [[BASEPLUS]], ptr [[BASE]]
+; CHECK-NEXT:    store i32 0, ptr [[BASE]], align 4
+; CHECK-NEXT:    call void @p2_same_range(ptr [[BASE]], ptr [[SEL]])
+; CHECK-NEXT:    [[L:%.*]] = load i16, ptr [[BASE]], align 2
+; CHECK-NEXT:    ret i16 [[L]]
+;
+  %baseplus = getelementptr i8, ptr %base, i64 1
+  %sel = select i1 %x, ptr %baseplus, ptr %base
+  store i32 0, ptr %base
+  call void @p2_same_range(ptr %base, ptr %sel)
+  %l = load i16, ptr %base
+  ret i16 %l
+}
+
+; Function Attrs: mustprogress nounwind uwtable
+define i16 @p2_same_range_may_or_partial_alias_caller2(ptr %base1, ptr %base2) {
+; CHECK-LABEL: @p2_same_range_may_or_partial_alias_caller2(
+; CHECK-NEXT:    store i32 0, ptr [[BASE1:%.*]], align 4
+; CHECK-NEXT:    call void @p2_same_range(ptr [[BASE1]], ptr [[BASE2:%.*]])
+; CHECK-NEXT:    [[L:%.*]] = load i16, ptr [[BASE1]], align 2
+; CHECK-NEXT:    ret i16 [[L]]
+;
+  store i32 0, ptr %base1
+  call void @p2_same_range(ptr %base1, ptr %base2)
+  %l = load i16, ptr %base1
+  ret i16 %l
+}
+
 ; Function Attrs: mustprogress nounwind uwtable
 define i16 @p2_no_init_alias_caller() {
 ; CHECK-LABEL: @p2_no_init_alias_caller(
-; CHECK-NEXT:    %ptr = alloca i16, align 2
-; CHECK-NEXT:    store i16 0, ptr %ptr
-; CHECK-NEXT:    call void @p2_no_init(ptr %ptr, ptr %ptr)
-; CHECK-NEXT:    %l = load i16, ptr %ptr
-; CHECK-NEXT:    ret i16 %l
+; CHECK-NEXT:    [[PTR:%.*]] = alloca i16, align 2
+; CHECK-NEXT:    store i16 0, ptr [[PTR]], align 2
+; CHECK-NEXT:    call void @p2_no_init(ptr [[PTR]], ptr [[PTR]])
+; CHECK-NEXT:    [[L:%.*]] = load i16, ptr [[PTR]], align 2
+; CHECK-NEXT:    ret i16 [[L]]
 ;
   %ptr = alloca i16
   store i16 0, ptr %ptr
@@ -109,11 +141,11 @@ define i16 @p2_no_init_alias_caller() {
 ; Function Attrs: mustprogress nounwind uwtable
 define i16 @p2_no_dead_on_unwind_alias_caller() {
 ; CHECK-LABEL: @p2_no_dead_on_unwind_alias_caller(
-; CHECK-NEXT:    %ptr = alloca i16, align 2
-; CHECK-NEXT:    store i16 0, ptr %ptr
-; CHECK-NEXT:    call void @p2_no_dead_on_unwind(ptr %ptr, ptr %ptr)
-; CHECK-NEXT:    %l = load i16, ptr %ptr
-; CHECK-NEXT:    ret i16 %l
+; CHECK-NEXT:    [[PTR:%.*]] = alloca i16, align 2
+; CHECK-NEXT:    store i16 0, ptr [[PTR]], align 2
+; CHECK-NEXT:    call void @p2_no_dead_on_unwind(ptr [[PTR]], ptr [[PTR]])
+; CHECK-NEXT:    [[L:%.*]] = load i16, ptr [[PTR]], align 2
+; CHECK-NEXT:    ret i16 [[L]]
 ;
   %ptr = alloca i16
   store i16 0, ptr %ptr
@@ -125,10 +157,10 @@ define i16 @p2_no_dead_on_unwind_alias_caller() {
 ; Function Attrs: mustprogress nounwind uwtable
 define i16 @p2_no_dead_on_unwind_but_nounwind_alias_caller() {
 ; CHECK-LABEL: @p2_no_dead_on_unwind_but_nounwind_alias_caller(
-; CHECK-NEXT:    %ptr = alloca i16, align 2
-; CHECK-NEXT:    call void @p2_no_dead_on_unwind_but_nounwind(ptr %ptr, ptr %ptr)
-; CHECK-NEXT:    %l = load i16, ptr %ptr
-; CHECK-NEXT:    ret i16 %l
+; CHECK-NEXT:    [[PTR:%.*]] = alloca i16, align 2
+; CHECK-NEXT:    call void @p2_no_dead_on_unwind_but_nounwind(ptr [[PTR]], ptr [[PTR]])
+; CHECK-NEXT:    [[L:%.*]] = load i16, ptr [[PTR]], align 2
+; CHECK-NEXT:    ret i16 [[L]]
 ;
   %ptr = alloca i16
   store i16 0, ptr %ptr
@@ -144,10 +176,10 @@ declare void @large_p2(ptr nocapture noundef initializes((0, 200)), ptr nocaptur
 ; Function Attrs: mustprogress nounwind uwtable
 define i16 @large_p1_caller() {
 ; CHECK-LABEL: @large_p1_caller(
-; CHECK-NEXT:    %ptr = alloca i16, align 2
-; CHECK-NEXT:    call void @large_p1(ptr %ptr)
-; CHECK-NEXT:    %l = load i16, ptr %ptr
-; CHECK-NEXT:    ret i16 %l
+; CHECK-NEXT:    [[PTR:%.*]] = alloca i16, align 2
+; CHECK-NEXT:    call void @large_p1(ptr [[PTR]])
+; CHECK-NEXT:    [[L:%.*]] = load i16, ptr [[PTR]], align 2
+; CHECK-NEXT:    ret i16 [[L]]
 ;
   %ptr = alloca i16
   call void @llvm.memset.p0.i64(ptr %ptr, i8 42, i64 100, i1 false)
@@ -159,11 +191,11 @@ define i16 @large_p1_caller() {
 ; Function Attrs: mustprogress nounwind uwtable
 define i16 @large_p2_nonalias_caller() {
 ; CHECK-LABEL: @large_p2_nonalias_caller(
-; CHECK-NEXT:    %ptr1 = alloca i16, align 2
-; CHECK-NEXT:    %ptr2 = alloca i16, align 2
-; CHECK-NEXT:    call void @large_p2(ptr %ptr1, ptr %ptr2)
-; CHECK-NEXT:    %l = load i16, ptr %ptr1
-; CHECK-NEXT:    ret i16 %l
+; CHECK-NEXT:    [[PTR1:%.*]] = alloca i16, align 2
+; CHECK-NEXT:    [[PTR2:%.*]] = alloca i16, align 2
+; CHECK-NEXT:    call void @large_p2(ptr [[PTR1]], ptr [[PTR2]])
+; CHECK-NEXT:    [[L:%.*]] = load i16, ptr [[PTR1]], align 2
+; CHECK-NEXT:    ret i16 [[L]]
 ;
   %ptr1 = alloca i16
   %ptr2 = alloca i16
@@ -176,14 +208,14 @@ define i16 @large_p2_nonalias_caller() {
 
 
 ; Function Attrs: mustprogress nounwind uwtable
-define i16 @large_p2_alias_caller() {
-; CHECK-LABEL: @large_p2_alias_caller(
-; CHECK-NEXT:    %ptr = alloca i16, align 2
-; CHECK-NEXT:    %1 = getelementptr inbounds i8, ptr %ptr, i64 100
-; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 1 %1, i8 42, i64 200, i1 false)
-; CHECK-NEXT:    call void @large_p2(ptr %ptr, ptr %ptr)
-; CHECK-NEXT:    %l = load i16, ptr %ptr
-; CHECK-NEXT:    ret i16 %l
+define i16 @large_p2_must_alias_caller() {
+; CHECK-LABEL: @large_p2_must_alias_caller(
+; CHECK-NEXT:    [[PTR:%.*]] = alloca i16, align 2
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 100
+; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 1 [[TMP1]], i8 42, i64 200, i1 false)
+; CHECK-NEXT:    call void @large_p2(ptr [[PTR]], ptr [[PTR]])
+; CHECK-NEXT:    [[L:%.*]] = load i16, ptr [[PTR]], align 2
+; CHECK-NEXT:    ret i16 [[L]]
 ;
   %ptr = alloca i16
   call void @llvm.memset.p0.i64(ptr %ptr, i8 42, i64 300, i1 false)

>From f6601100a7a29bc4fbdabecfc23b2e7a09052ae3 Mon Sep 17 00:00:00 2001
From: Haopeng Liu <haopliu at google.com>
Date: Thu, 12 Sep 2024 06:08:17 +0000
Subject: [PATCH 08/15] Fix a mistake in getLocForInst: add getLocForWith no
 matter ConsiderInitializesAttr is true or false

---
 llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
index 3a7d11e33fc8ac..b031d1a3723c3a 100644
--- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -1275,15 +1275,14 @@ struct DSEState {
       return Locations;
     }
 
+    if (auto Loc = getLocForWrite(I))
+      Locations.push_back(std::make_pair(*Loc, false));
+
     if (ConsiderInitializesAttr) {
       for (auto &MemLoc : getInitializesArgMemLoc(I, BatchAA)) {
         Locations.push_back(std::make_pair(MemLoc, true));
       }
-      return Locations;
     }
-
-    if (auto Loc = getLocForWrite(I))
-      Locations.push_back(std::make_pair(*Loc, false));
     return Locations;
   }
 

>From e9c9941a8e3824488db9f874de45ff9c7b5273db Mon Sep 17 00:00:00 2001
From: Haopeng Liu <haopliu at google.com>
Date: Thu, 12 Sep 2024 23:51:34 +0000
Subject: [PATCH 09/15] Use MemoryLocation::getBeforeOrAfter() in
 BatchAA.isNoAlias(Value*, Value*)

---
 llvm/include/llvm/Analysis/AliasAnalysis.h    |  5 +-
 .../Scalar/DeadStoreElimination.cpp           |  2 +-
 .../DeadStoreElimination/inter-procedural.ll  | 46 +++++++++++++++----
 3 files changed, 41 insertions(+), 12 deletions(-)

diff --git a/llvm/include/llvm/Analysis/AliasAnalysis.h b/llvm/include/llvm/Analysis/AliasAnalysis.h
index a1a084de621358..e46c76f4317a5a 100644
--- a/llvm/include/llvm/Analysis/AliasAnalysis.h
+++ b/llvm/include/llvm/Analysis/AliasAnalysis.h
@@ -668,9 +668,8 @@ class BatchAAResults {
            AliasResult::MustAlias;
   }
   bool isNoAlias(const Value *V1, const Value *V2) {
-    return alias(MemoryLocation(V1, LocationSize::precise(1)),
-                 MemoryLocation(V2, LocationSize::precise(1))) ==
-           AliasResult::NoAlias;
+    return alias(MemoryLocation::getBeforeOrAfter(V1),
+                 MemoryLocation::getBeforeOrAfter(V2)) == AliasResult::NoAlias;
   }
   ModRefInfo callCapturesBefore(const Instruction *I,
                                 const MemoryLocation &MemLoc,
diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
index b031d1a3723c3a..3fad6fccc5d05b 100644
--- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -916,7 +916,7 @@ getInitializesArgMemLoc(const Instruction *I, BatchAAResults &BatchAA) {
         FoundAliasing = true;
         AliasList.push_back(InitInfo);
       } else {
-        // For ParitialAlias and MayAlias, there is an offset or may be an
+        // For PartialAlias and MayAlias, there is an offset or may be an
         // unknown offset between the arguments and we insert an empty init
         // range to discard the entire initializes info while intersecting.
         FoundAliasing = true;
diff --git a/llvm/test/Transforms/DeadStoreElimination/inter-procedural.ll b/llvm/test/Transforms/DeadStoreElimination/inter-procedural.ll
index 9953908f240cb9..53f5aea4649cbf 100644
--- a/llvm/test/Transforms/DeadStoreElimination/inter-procedural.ll
+++ b/llvm/test/Transforms/DeadStoreElimination/inter-procedural.ll
@@ -176,12 +176,12 @@ declare void @large_p2(ptr nocapture noundef initializes((0, 200)), ptr nocaptur
 ; Function Attrs: mustprogress nounwind uwtable
 define i16 @large_p1_caller() {
 ; CHECK-LABEL: @large_p1_caller(
-; CHECK-NEXT:    [[PTR:%.*]] = alloca i16, align 2
+; CHECK-NEXT:    [[PTR:%.*]] = alloca [200 x i8], align 1
 ; CHECK-NEXT:    call void @large_p1(ptr [[PTR]])
 ; CHECK-NEXT:    [[L:%.*]] = load i16, ptr [[PTR]], align 2
 ; CHECK-NEXT:    ret i16 [[L]]
 ;
-  %ptr = alloca i16
+  %ptr = alloca [200 x i8]
   call void @llvm.memset.p0.i64(ptr %ptr, i8 42, i64 100, i1 false)
   call void @large_p1(ptr %ptr)
   %l = load i16, ptr %ptr
@@ -191,14 +191,14 @@ define i16 @large_p1_caller() {
 ; Function Attrs: mustprogress nounwind uwtable
 define i16 @large_p2_nonalias_caller() {
 ; CHECK-LABEL: @large_p2_nonalias_caller(
-; CHECK-NEXT:    [[PTR1:%.*]] = alloca i16, align 2
-; CHECK-NEXT:    [[PTR2:%.*]] = alloca i16, align 2
+; CHECK-NEXT:    [[PTR1:%.*]] = alloca [200 x i8], align 1
+; CHECK-NEXT:    [[PTR2:%.*]] = alloca [100 x i8], align 1
 ; CHECK-NEXT:    call void @large_p2(ptr [[PTR1]], ptr [[PTR2]])
 ; CHECK-NEXT:    [[L:%.*]] = load i16, ptr [[PTR1]], align 2
 ; CHECK-NEXT:    ret i16 [[L]]
 ;
-  %ptr1 = alloca i16
-  %ptr2 = alloca i16
+  %ptr1 = alloca [200 x i8]
+  %ptr2 = alloca [100 x i8]
   call void @llvm.memset.p0.i64(ptr %ptr1, i8 42, i64 200, i1 false)
   call void @llvm.memset.p0.i64(ptr %ptr2, i8 42, i64 100, i1 false)
   call void @large_p2(ptr %ptr1, ptr %ptr2)
@@ -210,17 +210,47 @@ define i16 @large_p2_nonalias_caller() {
 ; Function Attrs: mustprogress nounwind uwtable
 define i16 @large_p2_must_alias_caller() {
 ; CHECK-LABEL: @large_p2_must_alias_caller(
-; CHECK-NEXT:    [[PTR:%.*]] = alloca i16, align 2
+; CHECK-NEXT:    [[PTR:%.*]] = alloca [300 x i8], align 1
 ; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 100
 ; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 1 [[TMP1]], i8 42, i64 200, i1 false)
 ; CHECK-NEXT:    call void @large_p2(ptr [[PTR]], ptr [[PTR]])
 ; CHECK-NEXT:    [[L:%.*]] = load i16, ptr [[PTR]], align 2
 ; CHECK-NEXT:    ret i16 [[L]]
 ;
-  %ptr = alloca i16
+  %ptr = alloca [300 x i8]
   call void @llvm.memset.p0.i64(ptr %ptr, i8 42, i64 300, i1 false)
   call void @large_p2(ptr %ptr, ptr %ptr)
   %l = load i16, ptr %ptr
   ret i16 %l
 }
 
+; Function Attrs: mustprogress nounwind uwtable
+define i16 @large_p2_may_or_partial_alias_caller1(ptr %base) {
+; CHECK-LABEL: @large_p2_may_or_partial_alias_caller1(
+; CHECK-NEXT:    [[BASEPLUS:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 100
+; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr [[BASE]], i8 42, i64 300, i1 false)
+; CHECK-NEXT:    call void @large_p2(ptr [[BASE]], ptr [[BASEPLUS]])
+; CHECK-NEXT:    [[L:%.*]] = load i16, ptr [[BASE]], align 2
+; CHECK-NEXT:    ret i16 [[L]]
+;
+  %baseplus = getelementptr i8, ptr %base, i64 100
+  call void @llvm.memset.p0.i64(ptr %base, i8 42, i64 300, i1 false)
+  call void @large_p2(ptr %base, ptr %baseplus)
+  %l = load i16, ptr %base
+  ret i16 %l
+}
+
+; Function Attrs: mustprogress nounwind uwtable
+define i16 @large_p2_may_or_partial_alias_caller2(ptr %base1, ptr %base2) {
+; CHECK-LABEL: @large_p2_may_or_partial_alias_caller2(
+; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr [[BASE1:%.*]], i8 42, i64 300, i1 false)
+; CHECK-NEXT:    call void @large_p2(ptr [[BASE1]], ptr [[BASE2:%.*]])
+; CHECK-NEXT:    [[L:%.*]] = load i16, ptr [[BASE1]], align 2
+; CHECK-NEXT:    ret i16 [[L]]
+;
+  call void @llvm.memset.p0.i64(ptr %base1, i8 42, i64 300, i1 false)
+  call void @large_p2(ptr %base1, ptr %base2)
+  %l = load i16, ptr %base1
+  ret i16 %l
+}
+

>From 634948e5f5d837a67b6770162cc166a9679ec0fd Mon Sep 17 00:00:00 2001
From: Haopeng Liu <haopliu at google.com>
Date: Sat, 14 Sep 2024 02:40:19 +0000
Subject: [PATCH 10/15] Call BatchAA.alias() only once then check AAResult

---
 llvm/include/llvm/Analysis/AliasAnalysis.h          | 4 ----
 llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp | 6 ++++--
 2 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/llvm/include/llvm/Analysis/AliasAnalysis.h b/llvm/include/llvm/Analysis/AliasAnalysis.h
index e46c76f4317a5a..1990172116582b 100644
--- a/llvm/include/llvm/Analysis/AliasAnalysis.h
+++ b/llvm/include/llvm/Analysis/AliasAnalysis.h
@@ -667,10 +667,6 @@ class BatchAAResults {
                  MemoryLocation(V2, LocationSize::precise(1))) ==
            AliasResult::MustAlias;
   }
-  bool isNoAlias(const Value *V1, const Value *V2) {
-    return alias(MemoryLocation::getBeforeOrAfter(V1),
-                 MemoryLocation::getBeforeOrAfter(V2)) == AliasResult::NoAlias;
-  }
   ModRefInfo callCapturesBefore(const Instruction *I,
                                 const MemoryLocation &MemLoc,
                                 DominatorTree *DT) {
diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
index 3fad6fccc5d05b..6436a905a0d798 100644
--- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -910,9 +910,11 @@ getInitializesArgMemLoc(const Instruction *I, BatchAAResults &BatchAA) {
     Value *CurArg = CB->getArgOperand(Idx);
     bool FoundAliasing = false;
     for (auto &[Arg, AliasList] : Arguments) {
-      if (BatchAA.isNoAlias(Arg, CurArg)) {
+      auto AAR = BatchAA.alias(MemoryLocation::getBeforeOrAfter(Arg),
+                               MemoryLocation::getBeforeOrAfter(CurArg));
+      if (AAR == AliasResult::NoAlias) {
         continue;
-      } else if (BatchAA.isMustAlias(Arg, CurArg)) {
+      } else if (AAR == AliasResult::MustAlias) {
         FoundAliasing = true;
         AliasList.push_back(InitInfo);
       } else {

>From 11a9cd9eda29d958470007959a05aa2ff1d29790 Mon Sep 17 00:00:00 2001
From: Haopeng Liu <haopliu at google.com>
Date: Sat, 5 Oct 2024 21:23:39 +0000
Subject: [PATCH 11/15] Consider isInvisibleToCallerOnUnwind while checking an
 argument in CB is dead on unwind

---
 .../Scalar/DeadStoreElimination.cpp           | 155 +++++++++---------
 .../DeadStoreElimination/inter-procedural.ll  |  22 ++-
 2 files changed, 101 insertions(+), 76 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
index 6436a905a0d798..838b7f14c5653e 100644
--- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -853,7 +853,7 @@ bool hasInitializesAttr(Instruction *I) {
 
 struct ArgumentInitInfo {
   unsigned Idx;
-  bool HasDeadOnUnwindAttr;
+  bool IsDeadOnUnwind;
   ConstantRangeList Inits;
 };
 
@@ -869,15 +869,12 @@ ConstantRangeList getIntersectedInitRangeList(ArrayRef<ArgumentInitInfo> Args,
   // To address unwind, the function should have nounwind attribute or the
   // arguments have dead_on_unwind attribute. Otherwise, return empty.
   for (const auto &Arg : Args) {
-    if (!CallHasNoUnwindAttr && !Arg.HasDeadOnUnwindAttr)
+    if (!CallHasNoUnwindAttr && !Arg.IsDeadOnUnwind)
       return {};
     if (Arg.Inits.empty())
       return {};
   }
 
-  if (Args.size() == 1)
-    return Args[0].Inits;
-
   ConstantRangeList IntersectedIntervals = Args.front().Inits;
   for (auto &Arg : Args.drop_front())
     IntersectedIntervals = IntersectedIntervals.intersectWith(Arg.Inits);
@@ -885,73 +882,6 @@ ConstantRangeList getIntersectedInitRangeList(ArrayRef<ArgumentInitInfo> Args,
   return IntersectedIntervals;
 }
 
-// Return the locations written by the initializes attribute.
-// Note that this function considers:
-// 1. Unwind edge: apply "initializes" attribute only if the callee has
-//    "nounwind" attribute or the argument has "dead_on_unwind" attribute.
-// 2. Argument alias: for aliasing arguments, the "initializes" attribute is
-//    the intersected range list of their "initializes" attributes.
-SmallVector<MemoryLocation, 1>
-getInitializesArgMemLoc(const Instruction *I, BatchAAResults &BatchAA) {
-  const CallBase *CB = dyn_cast<CallBase>(I);
-  if (!CB)
-    return {};
-
-  // Collect aliasing arguments and their initializes ranges.
-  SmallMapVector<Value *, SmallVector<ArgumentInitInfo, 2>, 2> Arguments;
-  for (unsigned Idx = 0, Count = CB->arg_size(); Idx < Count; ++Idx) {
-    ConstantRangeList Inits;
-    Attribute InitializesAttr = CB->getParamAttr(Idx, Attribute::Initializes);
-    if (InitializesAttr.isValid())
-      Inits = InitializesAttr.getValueAsConstantRangeList();
-
-    bool HasDeadOnUnwindAttr = CB->paramHasAttr(Idx, Attribute::DeadOnUnwind);
-    ArgumentInitInfo InitInfo{Idx, HasDeadOnUnwindAttr, Inits};
-    Value *CurArg = CB->getArgOperand(Idx);
-    bool FoundAliasing = false;
-    for (auto &[Arg, AliasList] : Arguments) {
-      auto AAR = BatchAA.alias(MemoryLocation::getBeforeOrAfter(Arg),
-                               MemoryLocation::getBeforeOrAfter(CurArg));
-      if (AAR == AliasResult::NoAlias) {
-        continue;
-      } else if (AAR == AliasResult::MustAlias) {
-        FoundAliasing = true;
-        AliasList.push_back(InitInfo);
-      } else {
-        // For PartialAlias and MayAlias, there is an offset or may be an
-        // unknown offset between the arguments and we insert an empty init
-        // range to discard the entire initializes info while intersecting.
-        FoundAliasing = true;
-        AliasList.push_back(
-            ArgumentInitInfo{Idx, HasDeadOnUnwindAttr, ConstantRangeList()});
-      }
-    }
-    if (!FoundAliasing)
-      Arguments[CurArg] = {InitInfo};
-  }
-
-  SmallVector<MemoryLocation, 1> Locations;
-  for (const auto &[_, Args] : Arguments) {
-    auto IntersectedRanges =
-        getIntersectedInitRangeList(Args, CB->doesNotThrow());
-    if (IntersectedRanges.empty())
-      continue;
-
-    for (const auto &Arg : Args) {
-      for (const auto &Range : IntersectedRanges) {
-        int64_t Start = Range.getLower().getSExtValue();
-        int64_t End = Range.getUpper().getSExtValue();
-        // For now, we only handle locations starting at offset 0.
-        if (Start == 0)
-          Locations.push_back(MemoryLocation(CB->getArgOperand(Arg.Idx),
-                                             LocationSize::precise(End - Start),
-                                             CB->getAAMetadata()));
-      }
-    }
-  }
-  return Locations;
-}
-
 struct DSEState {
   Function &F;
   AliasAnalysis &AA;
@@ -1281,7 +1211,7 @@ struct DSEState {
       Locations.push_back(std::make_pair(*Loc, false));
 
     if (ConsiderInitializesAttr) {
-      for (auto &MemLoc : getInitializesArgMemLoc(I, BatchAA)) {
+      for (auto &MemLoc : getInitializesArgMemLoc(I)) {
         Locations.push_back(std::make_pair(MemLoc, true));
       }
     }
@@ -2312,6 +2242,16 @@ struct DSEState {
     return MadeChange;
   }
 
+  // Return the locations written by the initializes attribute.
+  // Note that this function considers:
+  // 1. Unwind edge: use "initializes" attribute only if the callee has
+  //    "nounwind" attribute, or the argument has "dead_on_unwind" attribute,
+  //    or the argument is invisble to caller on unwind. That is, we don't
+  //    perform incorrect DSE on unwind edges in the current function.
+  // 2. Argument alias: for aliasing arguments, the "initializes" attribute is
+  //    the intersected range list of their "initializes" attributes.
+  SmallVector<MemoryLocation, 1> getInitializesArgMemLoc(const Instruction *I);
+
   // Try to eliminate dead defs that access `KillingLocWrapper.MemLoc` and are
   // killed by `KillingLocWrapper.MemDef`. Return whether
   // any changes were made, and whether `KillingLocWrapper.DefInst` was deleted.
@@ -2323,6 +2263,75 @@ struct DSEState {
   bool eliminateDeadDefs(const MemoryDefWrapper &KillingDefWrapper);
 };
 
+SmallVector<MemoryLocation, 1>
+DSEState::getInitializesArgMemLoc(const Instruction *I) {
+  const CallBase *CB = dyn_cast<CallBase>(I);
+  if (!CB)
+    return {};
+
+  // Collect aliasing arguments and their initializes ranges.
+  SmallMapVector<Value *, SmallVector<ArgumentInitInfo, 2>, 2> Arguments;
+  for (unsigned Idx = 0, Count = CB->arg_size(); Idx < Count; ++Idx) {
+    ConstantRangeList Inits;
+    Attribute InitializesAttr = CB->getParamAttr(Idx, Attribute::Initializes);
+    if (InitializesAttr.isValid())
+      Inits = InitializesAttr.getValueAsConstantRangeList();
+
+    Value *CurArg = CB->getArgOperand(Idx);
+    // We don't perform incorrect DSE on unwind edges in the current function,
+    // and use the "initialize" attribute to kill dead stores if :
+    // - The call does not throw exceptions, "CB->doesNotThrow()".
+    // - Or the argument has "dead_on_unwind" attribute.
+    // - Or the argument is invisble to caller on unwind, and CB isa<CallInst>
+    // which means no unwind edges.
+    bool IsDeadOnUnwind =
+        CB->paramHasAttr(Idx, Attribute::DeadOnUnwind) ||
+        (isInvisibleToCallerOnUnwind(CurArg) && isa<CallInst>(CB));
+    ArgumentInitInfo InitInfo{Idx, IsDeadOnUnwind, Inits};
+    bool FoundAliasing = false;
+    for (auto &[Arg, AliasList] : Arguments) {
+      auto AAR = BatchAA.alias(MemoryLocation::getBeforeOrAfter(Arg),
+                               MemoryLocation::getBeforeOrAfter(CurArg));
+      if (AAR == AliasResult::NoAlias) {
+        continue;
+      } else if (AAR == AliasResult::MustAlias) {
+        FoundAliasing = true;
+        AliasList.push_back(InitInfo);
+      } else {
+        // For PartialAlias and MayAlias, there is an offset or may be an
+        // unknown offset between the arguments and we insert an empty init
+        // range to discard the entire initializes info while intersecting.
+        FoundAliasing = true;
+        AliasList.push_back(
+            ArgumentInitInfo{Idx, IsDeadOnUnwind, ConstantRangeList()});
+      }
+    }
+    if (!FoundAliasing)
+      Arguments[CurArg] = {InitInfo};
+  }
+
+  SmallVector<MemoryLocation, 1> Locations;
+  for (const auto &[_, Args] : Arguments) {
+    auto IntersectedRanges =
+        getIntersectedInitRangeList(Args, CB->doesNotThrow());
+    if (IntersectedRanges.empty())
+      continue;
+
+    for (const auto &Arg : Args) {
+      for (const auto &Range : IntersectedRanges) {
+        int64_t Start = Range.getLower().getSExtValue();
+        int64_t End = Range.getUpper().getSExtValue();
+        // For now, we only handle locations starting at offset 0.
+        if (Start == 0)
+          Locations.push_back(MemoryLocation(CB->getArgOperand(Arg.Idx),
+                                             LocationSize::precise(End - Start),
+                                             CB->getAAMetadata()));
+      }
+    }
+  }
+  return Locations;
+}
+
 std::pair<bool, bool>
 DSEState::eliminateDeadDefs(const MemoryLocationWrapper &KillingLocWrapper) {
   bool Changed = false;
diff --git a/llvm/test/Transforms/DeadStoreElimination/inter-procedural.ll b/llvm/test/Transforms/DeadStoreElimination/inter-procedural.ll
index 53f5aea4649cbf..ca6f162cfb9f58 100644
--- a/llvm/test/Transforms/DeadStoreElimination/inter-procedural.ll
+++ b/llvm/test/Transforms/DeadStoreElimination/inter-procedural.ll
@@ -138,11 +138,13 @@ define i16 @p2_no_init_alias_caller() {
   ret i16 %l
 }
 
+; Althrough the 2nd parameter of `p2_no_dead_on_unwind` doesn't have
+; the 'dead_on_unwind' attribute, it's invisble to caller on unwind.
+; DSE still uses the 'initializes' attribute and kill the dead store.
 ; Function Attrs: mustprogress nounwind uwtable
-define i16 @p2_no_dead_on_unwind_alias_caller() {
-; CHECK-LABEL: @p2_no_dead_on_unwind_alias_caller(
+define i16 @p2_no_dead_on_unwind_but_invisble_to_caller_alias_caller() {
+; CHECK-LABEL: @p2_no_dead_on_unwind_but_invisble_to_caller_alias_caller(
 ; CHECK-NEXT:    [[PTR:%.*]] = alloca i16, align 2
-; CHECK-NEXT:    store i16 0, ptr [[PTR]], align 2
 ; CHECK-NEXT:    call void @p2_no_dead_on_unwind(ptr [[PTR]], ptr [[PTR]])
 ; CHECK-NEXT:    [[L:%.*]] = load i16, ptr [[PTR]], align 2
 ; CHECK-NEXT:    ret i16 [[L]]
@@ -154,6 +156,20 @@ define i16 @p2_no_dead_on_unwind_alias_caller() {
   ret i16 %l
 }
 
+; Function Attrs: mustprogress nounwind uwtable
+define i16 @p2_no_dead_on_unwind_alias_caller(ptr %ptr) {
+; CHECK-LABEL: @p2_no_dead_on_unwind_alias_caller(
+; CHECK-NEXT:    store i16 0, ptr [[PTR:%.*]], align 2
+; CHECK-NEXT:    call void @p2_no_dead_on_unwind(ptr [[PTR]], ptr [[PTR]])
+; CHECK-NEXT:    [[L:%.*]] = load i16, ptr [[PTR]], align 2
+; CHECK-NEXT:    ret i16 [[L]]
+;
+  store i16 0, ptr %ptr
+  call void @p2_no_dead_on_unwind(ptr %ptr, ptr %ptr)
+  %l = load i16, ptr %ptr
+  ret i16 %l
+}
+
 ; Function Attrs: mustprogress nounwind uwtable
 define i16 @p2_no_dead_on_unwind_but_nounwind_alias_caller() {
 ; CHECK-LABEL: @p2_no_dead_on_unwind_but_nounwind_alias_caller(

>From 2277de003efce3a410a06ad3d10c40222a039a8b Mon Sep 17 00:00:00 2001
From: Haopeng Liu <haopliu at google.com>
Date: Sat, 12 Oct 2024 03:47:08 +0000
Subject: [PATCH 12/15] Update comments and unit tests

---
 .../Scalar/DeadStoreElimination.cpp           | 12 ++++---
 .../DeadStoreElimination/inter-procedural.ll  | 33 +++++++++++++++++--
 2 files changed, 39 insertions(+), 6 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
index 838b7f14c5653e..59c87b6fcbdf7d 100644
--- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -2279,11 +2279,11 @@ DSEState::getInitializesArgMemLoc(const Instruction *I) {
 
     Value *CurArg = CB->getArgOperand(Idx);
     // We don't perform incorrect DSE on unwind edges in the current function,
-    // and use the "initialize" attribute to kill dead stores if :
+    // and use the "initializes" attribute to kill dead stores if:
     // - The call does not throw exceptions, "CB->doesNotThrow()".
-    // - Or the argument has "dead_on_unwind" attribute.
-    // - Or the argument is invisble to caller on unwind, and CB isa<CallInst>
-    // which means no unwind edges.
+    // - Or the callee parameter has "dead_on_unwind" attribute.
+    // - Or the argument is invisible to caller on unwind, and CB isa<CallInst>
+    // which means no unwind edges from this call in the current function.
     bool IsDeadOnUnwind =
         CB->paramHasAttr(Idx, Attribute::DeadOnUnwind) ||
         (isInvisibleToCallerOnUnwind(CurArg) && isa<CallInst>(CB));
@@ -2389,6 +2389,10 @@ DSEState::eliminateDeadDefs(const MemoryLocationWrapper &KillingLocWrapper) {
         cast<MemoryDef>(DeadAccess),
         getLocForInst(cast<MemoryDef>(DeadAccess)->getMemoryInst(),
                       /*ConsiderInitializesAttr=*/false));
+    // Note that we don't consider the initializes attribute for DeadAccess.
+    // The dead access would be just a regular write access, like Store
+    // instruction, and its MemoryDefWrapper would only contain one
+    // MemoryLocationWrapper.
     assert(DeadDefWrapper.DefinedLocations.size() == 1);
     MemoryLocationWrapper &DeadLocWrapper =
         DeadDefWrapper.DefinedLocations.front();
diff --git a/llvm/test/Transforms/DeadStoreElimination/inter-procedural.ll b/llvm/test/Transforms/DeadStoreElimination/inter-procedural.ll
index ca6f162cfb9f58..d93da9b6612b05 100644
--- a/llvm/test/Transforms/DeadStoreElimination/inter-procedural.ll
+++ b/llvm/test/Transforms/DeadStoreElimination/inter-procedural.ll
@@ -57,6 +57,35 @@ define i16 @p1_write_then_read_caller_with_clobber() {
   ret i16 %l
 }
 
+declare void @p1_write_then_read_raw(ptr nocapture noundef initializes((0, 2)))
+define i16 @p1_initializes_invoke() personality ptr undef {
+; CHECK-LABEL: @p1_initializes_invoke(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[PTR:%.*]] = alloca i16, align 2
+; CHECK-NEXT:    store i16 0, ptr [[PTR]], align 2
+; CHECK-NEXT:    invoke void @p1_write_then_read_raw(ptr [[PTR]])
+; CHECK-NEXT:            to label [[BB1:%.*]] unwind label [[BB2:%.*]]
+; CHECK:       bb1:
+; CHECK-NEXT:    ret i16 0
+; CHECK:       bb2:
+; CHECK-NEXT:    [[TMP:%.*]] = landingpad { ptr, i32 }
+; CHECK-NEXT:            cleanup
+; CHECK-NEXT:    [[L:%.*]] = load i16, ptr [[PTR]], align 2
+; CHECK-NEXT:    ret i16 [[L]]
+;
+entry:
+  %ptr = alloca i16
+  store i16 0, ptr %ptr
+  invoke void @p1_write_then_read_raw(ptr %ptr) to label %bb1 unwind label %bb2
+bb1:
+  ret i16 0
+bb2:
+  %tmp = landingpad { ptr, i32 }
+  cleanup
+  %l = load i16, ptr %ptr
+  ret i16 %l
+}
+
 ; Function Attrs: mustprogress nounwind uwtable
 define i16 @p2_same_range_noalias_caller() {
 ; CHECK-LABEL: @p2_same_range_noalias_caller(
@@ -142,8 +171,8 @@ define i16 @p2_no_init_alias_caller() {
 ; the 'dead_on_unwind' attribute, it's invisble to caller on unwind.
 ; DSE still uses the 'initializes' attribute and kill the dead store.
 ; Function Attrs: mustprogress nounwind uwtable
-define i16 @p2_no_dead_on_unwind_but_invisble_to_caller_alias_caller() {
-; CHECK-LABEL: @p2_no_dead_on_unwind_but_invisble_to_caller_alias_caller(
+define i16 @p2_no_dead_on_unwind_but_invisible_to_caller_alias_caller() {
+; CHECK-LABEL: @p2_no_dead_on_unwind_but_invisible_to_caller_alias_caller(
 ; CHECK-NEXT:    [[PTR:%.*]] = alloca i16, align 2
 ; CHECK-NEXT:    call void @p2_no_dead_on_unwind(ptr [[PTR]], ptr [[PTR]])
 ; CHECK-NEXT:    [[L:%.*]] = load i16, ptr [[PTR]], align 2

>From 1b8c278d8a00b8e00b0db683ebe47939bac72fc9 Mon Sep 17 00:00:00 2001
From: Haopeng Liu <haopliu at google.com>
Date: Fri, 18 Oct 2024 03:44:45 +0000
Subject: [PATCH 13/15] Rename IsDeadOnUnwind to IsDeadOrInvisibleOnUnwind

---
 .../lib/Transforms/Scalar/DeadStoreElimination.cpp | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
index 59c87b6fcbdf7d..8ceecec16b594a 100644
--- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -853,7 +853,7 @@ bool hasInitializesAttr(Instruction *I) {
 
 struct ArgumentInitInfo {
   unsigned Idx;
-  bool IsDeadOnUnwind;
+  bool IsDeadOrInvisibleOnUnwind;
   ConstantRangeList Inits;
 };
 
@@ -867,9 +867,9 @@ ConstantRangeList getIntersectedInitRangeList(ArrayRef<ArgumentInitInfo> Args,
     return {};
 
   // To address unwind, the function should have nounwind attribute or the
-  // arguments have dead_on_unwind attribute. Otherwise, return empty.
+  // arguments have dead or invisible on unwind. Otherwise, return empty.
   for (const auto &Arg : Args) {
-    if (!CallHasNoUnwindAttr && !Arg.IsDeadOnUnwind)
+    if (!CallHasNoUnwindAttr && !Arg.IsDeadOrInvisibleOnUnwind)
       return {};
     if (Arg.Inits.empty())
       return {};
@@ -2284,10 +2284,10 @@ DSEState::getInitializesArgMemLoc(const Instruction *I) {
     // - Or the callee parameter has "dead_on_unwind" attribute.
     // - Or the argument is invisible to caller on unwind, and CB isa<CallInst>
     // which means no unwind edges from this call in the current function.
-    bool IsDeadOnUnwind =
+    bool IsDeadOrInvisibleOnUnwind =
         CB->paramHasAttr(Idx, Attribute::DeadOnUnwind) ||
         (isInvisibleToCallerOnUnwind(CurArg) && isa<CallInst>(CB));
-    ArgumentInitInfo InitInfo{Idx, IsDeadOnUnwind, Inits};
+    ArgumentInitInfo InitInfo{Idx, IsDeadOrInvisibleOnUnwind, Inits};
     bool FoundAliasing = false;
     for (auto &[Arg, AliasList] : Arguments) {
       auto AAR = BatchAA.alias(MemoryLocation::getBeforeOrAfter(Arg),
@@ -2302,8 +2302,8 @@ DSEState::getInitializesArgMemLoc(const Instruction *I) {
         // unknown offset between the arguments and we insert an empty init
         // range to discard the entire initializes info while intersecting.
         FoundAliasing = true;
-        AliasList.push_back(
-            ArgumentInitInfo{Idx, IsDeadOnUnwind, ConstantRangeList()});
+        AliasList.push_back(ArgumentInitInfo{Idx, IsDeadOrInvisibleOnUnwind,
+                                             ConstantRangeList()});
       }
     }
     if (!FoundAliasing)

>From c2db695eefca01cfee839dfbc0dd4ef2456aaf52 Mon Sep 17 00:00:00 2001
From: Haopeng Liu <haopliu at google.com>
Date: Fri, 18 Oct 2024 03:59:42 +0000
Subject: [PATCH 14/15] Add a TODO about isRemovable()

---
 llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
index 8ceecec16b594a..4d2fcacbebc0cd 100644
--- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -2385,14 +2385,14 @@ DSEState::eliminateDeadDefs(const MemoryLocationWrapper &KillingLocWrapper) {
     // We cannot apply the initializes attribute to DeadAccess/DeadDef.
     // It would incorrectly consider a call instruction as redundant store
     // and remove this call instruction.
+    // TODO: this conflates the existence of a MemoryLocation with being able
+    // to delete the instruction. Fix isRemovable() to consider calls with
+    // side effects that cannot be removed, e.g. calls with the initializes
+    // attribute, and remove getLocForInst(ConsiderInitializesAttr = false).
     MemoryDefWrapper DeadDefWrapper(
         cast<MemoryDef>(DeadAccess),
         getLocForInst(cast<MemoryDef>(DeadAccess)->getMemoryInst(),
                       /*ConsiderInitializesAttr=*/false));
-    // Note that we don't consider the initializes attribute for DeadAccess.
-    // The dead access would be just a regular write access, like Store
-    // instruction, and its MemoryDefWrapper would only contain one
-    // MemoryLocationWrapper.
     assert(DeadDefWrapper.DefinedLocations.size() == 1);
     MemoryLocationWrapper &DeadLocWrapper =
         DeadDefWrapper.DefinedLocations.front();

>From c855aecaedca21472d9fcd7656bab8aef3f18d3a Mon Sep 17 00:00:00 2001
From: Haopeng Liu <haopliu at google.com>
Date: Thu, 24 Oct 2024 05:00:16 +0000
Subject: [PATCH 15/15] Update comments

---
 .../Transforms/Scalar/DeadStoreElimination.cpp    | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
index 4d2fcacbebc0cd..1f9c9588659ce1 100644
--- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -847,8 +847,7 @@ struct MemoryDefWrapper {
 
 bool hasInitializesAttr(Instruction *I) {
   CallBase *CB = dyn_cast<CallBase>(I);
-  return CB != nullptr &&
-         CB->getArgOperandWithAttribute(Attribute::Initializes) != nullptr;
+  return CB && CB->getArgOperandWithAttribute(Attribute::Initializes);
 }
 
 struct ArgumentInitInfo {
@@ -1196,7 +1195,7 @@ struct DSEState {
     return MemoryLocation::getOrNone(I);
   }
 
-  // Returns a list of <MemoryLocation, bool> pairs wrote by I.
+  // Returns a list of <MemoryLocation, bool> pairs written by I.
   // The bool means whether the write is from Initializes attr.
   SmallVector<std::pair<MemoryLocation, bool>, 1>
   getLocForInst(Instruction *I, bool ConsiderInitializesAttr) {
@@ -1666,7 +1665,7 @@ struct DSEState {
       // Uses which may read the original MemoryDef mean we cannot eliminate the
       // original MD. Stop walk.
       // If KillingDef is a CallInst with "initializes" attribute, the reads in
-      // Callee would be dominated by initializations, so this should be safe.
+      // the callee would be dominated by initializations, so it should be safe.
       bool IsKillingDefFromInitAttr = false;
       if (IsInitializesAttrMemLoc) {
         if (KillingI == UseInst &&
@@ -2246,7 +2245,7 @@ struct DSEState {
   // Note that this function considers:
   // 1. Unwind edge: use "initializes" attribute only if the callee has
   //    "nounwind" attribute, or the argument has "dead_on_unwind" attribute,
-  //    or the argument is invisble to caller on unwind. That is, we don't
+  //    or the argument is invisible to caller on unwind. That is, we don't
   //    perform incorrect DSE on unwind edges in the current function.
   // 2. Argument alias: for aliasing arguments, the "initializes" attribute is
   //    the intersected range list of their "initializes" attributes.
@@ -2282,11 +2281,11 @@ DSEState::getInitializesArgMemLoc(const Instruction *I) {
     // and use the "initializes" attribute to kill dead stores if:
     // - The call does not throw exceptions, "CB->doesNotThrow()".
     // - Or the callee parameter has "dead_on_unwind" attribute.
-    // - Or the argument is invisible to caller on unwind, and CB isa<CallInst>
-    // which means no unwind edges from this call in the current function.
+    // - Or the argument is invisible to caller on unwind, and there are no
+    //   unwind edges from this call in the current function (e.g. `CallInst`).
     bool IsDeadOrInvisibleOnUnwind =
         CB->paramHasAttr(Idx, Attribute::DeadOnUnwind) ||
-        (isInvisibleToCallerOnUnwind(CurArg) && isa<CallInst>(CB));
+        (isa<CallInst>(CB) && isInvisibleToCallerOnUnwind(CurArg));
     ArgumentInitInfo InitInfo{Idx, IsDeadOrInvisibleOnUnwind, Inits};
     bool FoundAliasing = false;
     for (auto &[Arg, AliasList] : Arguments) {



More information about the llvm-commits mailing list