[llvm] [DSE] Apply initializes attribute to DSE (PR #107282)
Haopeng Liu via llvm-commits
llvm-commits at lists.llvm.org
Thu Sep 5 14:01:40 PDT 2024
https://github.com/haopliu updated https://github.com/llvm/llvm-project/pull/107282
>From a94a7341f921de06d65b1491eb5bc1a5e58970fe Mon Sep 17 00:00:00 2001
From: Haopeng Liu <haopliu at google.com>
Date: Wed, 4 Sep 2024 18:13:54 +0000
Subject: [PATCH 1/4] Apply initializes attribute to DSE
---
.../Scalar/DeadStoreElimination.cpp | 226 ++++++++++++++----
.../DeadStoreElimination/inter-procedural.ll | 159 ++++++++++++
2 files changed, 343 insertions(+), 42 deletions(-)
create mode 100644 llvm/test/Transforms/DeadStoreElimination/inter-procedural.ll
diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
index a37f295abbd31c..3ccb064adbf0df 100644
--- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -52,6 +52,7 @@
#include "llvm/IR/Argument.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constant.h"
+#include "llvm/IR/ConstantRangeList.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DebugInfo.h"
@@ -164,6 +165,10 @@ static cl::opt<bool>
OptimizeMemorySSA("dse-optimize-memoryssa", cl::init(true), cl::Hidden,
cl::desc("Allow DSE to optimize memory accesses."));
+static cl::opt<bool> EnableInitializesImprovement(
+ "enable-dse-initializes-attr-improvement", cl::init(false), cl::Hidden,
+ cl::desc("Enable the initializes attr improvement in DSE"));
+
//===----------------------------------------------------------------------===//
// Helper functions
//===----------------------------------------------------------------------===//
@@ -809,8 +814,10 @@ bool canSkipDef(MemoryDef *D, bool DefVisibleToCaller) {
// A memory location wrapper that represents a MemoryLocation, `MemLoc`,
// defined by `MemDef`.
struct MemoryLocationWrapper {
- MemoryLocationWrapper(MemoryLocation MemLoc, MemoryDef *MemDef)
- : MemLoc(MemLoc), MemDef(MemDef) {
+ MemoryLocationWrapper(MemoryLocation MemLoc, MemoryDef *MemDef,
+ bool DefByInitializesAttr)
+ : MemLoc(MemLoc), MemDef(MemDef),
+ DefByInitializesAttr(DefByInitializesAttr) {
assert(MemLoc.Ptr && "MemLoc should be not null");
UnderlyingObject = getUnderlyingObject(MemLoc.Ptr);
DefInst = MemDef->getMemoryInst();
@@ -820,20 +827,121 @@ struct MemoryLocationWrapper {
const Value *UnderlyingObject;
MemoryDef *MemDef;
Instruction *DefInst;
+ bool DefByInitializesAttr = false;
};
// A memory def wrapper that represents a MemoryDef and the MemoryLocation(s)
// defined by this MemoryDef.
struct MemoryDefWrapper {
- MemoryDefWrapper(MemoryDef *MemDef, std::optional<MemoryLocation> MemLoc) {
+ MemoryDefWrapper(
+ MemoryDef *MemDef,
+ const SmallVectorImpl<std::pair<MemoryLocation, bool>> &MemLocations) {
DefInst = MemDef->getMemoryInst();
- if (MemLoc.has_value())
- DefinedLocation = MemoryLocationWrapper(*MemLoc, MemDef);
+ for (auto &[MemLoc, DefByInitializesAttr] : MemLocations)
+ DefinedLocations.push_back(
+ MemoryLocationWrapper(MemLoc, MemDef, DefByInitializesAttr));
}
Instruction *DefInst;
- std::optional<MemoryLocationWrapper> DefinedLocation = std::nullopt;
+ SmallVector<MemoryLocationWrapper, 1> DefinedLocations;
+};
+
+bool HasInitializesAttr(Instruction *I) {
+ CallBase *CB = dyn_cast<CallBase>(I);
+ if (!CB)
+ return false;
+
+ for (size_t Idx = 0; Idx < CB->arg_size(); Idx++)
+ if (CB->paramHasAttr(Idx, Attribute::Initializes))
+ return true;
+ return false;
+}
+
+struct ArgumentInitInfo {
+ size_t Idx = -1;
+ ConstantRangeList Inits;
+ bool HasDeadOnUnwindAttr = false;
+ bool FuncHasNoUnwindAttr = false;
};
+ConstantRangeList
+GetMergedInitAttr(const SmallVectorImpl<ArgumentInitInfo> &Args) {
+ if (Args.empty())
+ return {};
+
+ // To address unwind, the function should have nounwind attribute or the
+ // arguments have dead_on_unwind attribute. Otherwise, return empty.
+ for (const auto &Arg : Args) {
+ if (!Arg.FuncHasNoUnwindAttr && !Arg.HasDeadOnUnwindAttr)
+ return {};
+ if (Arg.Inits.empty())
+ return {};
+ }
+
+ if (Args.size() == 1)
+ return Args[0].Inits;
+
+ ConstantRangeList MergedIntervals = Args[0].Inits;
+ for (size_t i = 1; i < Args.size(); i++)
+ MergedIntervals = MergedIntervals.intersectWith(Args[i].Inits);
+
+ return MergedIntervals;
+}
+
+// Return the locations wrote by the initializes attribute.
+// Note that this function considers:
+// 1. Unwind edge: apply "initializes" attribute only if the callee has
+// "nounwind" attribute or the argument has "dead_on_unwind" attribute.
+// 2. Argument alias: for aliasing arguments, the "initializes" attribute is
+// the merged range list of their "initializes" attributes.
+SmallVector<MemoryLocation, 1>
+GetInitializesArgMemLoc(const Instruction *I, BatchAAResults &BatchAA) {
+ const CallBase *CB = dyn_cast<CallBase>(I);
+ if (!CB)
+ return {};
+
+ // Collect aliasing arguments and their initializes ranges.
+ bool HasNoUnwindAttr = CB->hasFnAttr(Attribute::NoUnwind);
+ SmallMapVector<Value *, SmallVector<ArgumentInitInfo, 2>, 2> Arguments;
+ for (size_t Idx = 0; Idx < CB->arg_size(); Idx++) {
+ ConstantRangeList Inits;
+ if (CB->paramHasAttr(Idx, Attribute::Initializes))
+ Inits = CB->getParamAttr(Idx, Attribute::Initializes)
+ .getValueAsConstantRangeList();
+
+ bool HasDeadOnUnwindAttr = CB->paramHasAttr(Idx, Attribute::DeadOnUnwind);
+ ArgumentInitInfo InitInfo{Idx, Inits, HasDeadOnUnwindAttr, HasNoUnwindAttr};
+ Value *CurArg = CB->getArgOperand(Idx);
+ bool FoundAliasing = false;
+ for (auto &[Arg, AliasList] : Arguments) {
+ if (BatchAA.isMustAlias(Arg, CurArg)) {
+ FoundAliasing = true;
+ AliasList.push_back(InitInfo);
+ }
+ }
+ if (!FoundAliasing)
+ Arguments[CurArg] = {InitInfo};
+ }
+
+ SmallVector<MemoryLocation, 1> Locations;
+ for (const auto &[_, Args] : Arguments) {
+ auto MergedInitAttr = GetMergedInitAttr(Args);
+ if (MergedInitAttr.empty())
+ continue;
+
+ for (const auto &Arg : Args) {
+ for (const auto &Range : MergedInitAttr) {
+ int64_t Start = Range.getLower().getSExtValue();
+ int64_t End = Range.getUpper().getSExtValue();
+ if (Start == 0)
+ Locations.push_back(MemoryLocation(CB->getArgOperand(Arg.Idx),
+ LocationSize::precise(End - Start),
+ CB->getAAMetadata()));
+ }
+ }
+ }
+ return Locations;
+}
+
struct DSEState {
Function &F;
AliasAnalysis &AA;
@@ -911,7 +1019,8 @@ struct DSEState {
auto *MD = dyn_cast_or_null<MemoryDef>(MA);
if (MD && MemDefs.size() < MemorySSADefsPerBlockLimit &&
- (getLocForWrite(&I) || isMemTerminatorInst(&I)))
+ (getLocForWrite(&I) || isMemTerminatorInst(&I) ||
+ HasInitializesAttr(&I)))
MemDefs.push_back(MD);
}
}
@@ -1147,13 +1256,26 @@ struct DSEState {
return MemoryLocation::getOrNone(I);
}
- std::optional<MemoryLocation> getLocForInst(Instruction *I) {
+ // Returns a list of <MemoryLocation, bool> pairs wrote by I.
+ // The bool means whether the write is from Initializes attr.
+ SmallVector<std::pair<MemoryLocation, bool>, 1>
+ getLocForInst(Instruction *I, bool ConsiderInitializesAttr) {
+ SmallVector<std::pair<MemoryLocation, bool>, 1> Locations;
if (isMemTerminatorInst(I)) {
- if (auto Loc = getLocForTerminator(I)) {
- return Loc->first;
+ if (auto Loc = getLocForTerminator(I))
+ Locations.push_back(std::make_pair(Loc->first, false));
+ return Locations;
+ }
+
+ if (auto Loc = getLocForWrite(I))
+ Locations.push_back(std::make_pair(*Loc, false));
+
+ if (ConsiderInitializesAttr) {
+ for (auto &MemLoc : GetInitializesArgMemLoc(I, BatchAA)) {
+ Locations.push_back(std::make_pair(MemLoc, true));
}
}
- return getLocForWrite(I);
+ return Locations;
}
/// Assuming this instruction has a dead analyzable write, can we delete
@@ -1365,7 +1487,8 @@ struct DSEState {
getDomMemoryDef(MemoryDef *KillingDef, MemoryAccess *StartAccess,
const MemoryLocation &KillingLoc, const Value *KillingUndObj,
unsigned &ScanLimit, unsigned &WalkerStepLimit,
- bool IsMemTerm, unsigned &PartialLimit) {
+ bool IsMemTerm, unsigned &PartialLimit,
+ bool IsInitializesAttrMemLoc) {
if (ScanLimit == 0 || WalkerStepLimit == 0) {
LLVM_DEBUG(dbgs() << "\n ... hit scan limit\n");
return std::nullopt;
@@ -1602,7 +1725,17 @@ struct DSEState {
// Uses which may read the original MemoryDef mean we cannot eliminate the
// original MD. Stop walk.
- if (isReadClobber(MaybeDeadLoc, UseInst)) {
+ // If KillingDef is a CallInst with "initializes" attribute, the reads in
+ // Callee would be dominated by initializations, so this should be safe.
+ bool IsKillingDefFromInitAttr = false;
+ if (IsInitializesAttrMemLoc) {
+ if (KillingI == UseInst &&
+ KillingUndObj == getUnderlyingObject(MaybeDeadLoc.Ptr)) {
+ IsKillingDefFromInitAttr = true;
+ }
+ }
+
+ if (isReadClobber(MaybeDeadLoc, UseInst) && !IsKillingDefFromInitAttr) {
LLVM_DEBUG(dbgs() << " ... found read clobber\n");
return std::nullopt;
}
@@ -2207,7 +2340,8 @@ DSEState::eliminateDeadDefs(const MemoryLocationWrapper &KillingLocWrapper) {
std::optional<MemoryAccess *> MaybeDeadAccess = getDomMemoryDef(
KillingLocWrapper.MemDef, Current, KillingLocWrapper.MemLoc,
KillingLocWrapper.UnderlyingObject, ScanLimit, WalkerStepLimit,
- isMemTerminatorInst(KillingLocWrapper.DefInst), PartialLimit);
+ isMemTerminatorInst(KillingLocWrapper.DefInst), PartialLimit,
+ KillingLocWrapper.DefByInitializesAttr);
if (!MaybeDeadAccess) {
LLVM_DEBUG(dbgs() << " finished walk\n");
@@ -2232,8 +2366,11 @@ DSEState::eliminateDeadDefs(const MemoryLocationWrapper &KillingLocWrapper) {
}
MemoryDefWrapper DeadDefWrapper(
cast<MemoryDef>(DeadAccess),
- getLocForInst(cast<MemoryDef>(DeadAccess)->getMemoryInst()));
- MemoryLocationWrapper &DeadLocWrapper = *DeadDefWrapper.DefinedLocation;
+ getLocForInst(cast<MemoryDef>(DeadAccess)->getMemoryInst(),
+ /*ConsiderInitializesAttr=*/false));
+ assert(DeadDefWrapper.DefinedLocations.size() == 1);
+ MemoryLocationWrapper &DeadLocWrapper =
+ DeadDefWrapper.DefinedLocations.front();
LLVM_DEBUG(dbgs() << " (" << *DeadLocWrapper.DefInst << ")\n");
ToCheck.insert(DeadLocWrapper.MemDef->getDefiningAccess());
NumGetDomMemoryDefPassed++;
@@ -2311,37 +2448,41 @@ DSEState::eliminateDeadDefs(const MemoryLocationWrapper &KillingLocWrapper) {
}
bool DSEState::eliminateDeadDefs(const MemoryDefWrapper &KillingDefWrapper) {
- if (!KillingDefWrapper.DefinedLocation.has_value()) {
+ if (KillingDefWrapper.DefinedLocations.empty()) {
LLVM_DEBUG(dbgs() << "Failed to find analyzable write location for "
<< *KillingDefWrapper.DefInst << "\n");
return false;
}
- auto &KillingLocWrapper = *KillingDefWrapper.DefinedLocation;
- LLVM_DEBUG(dbgs() << "Trying to eliminate MemoryDefs killed by "
- << *KillingLocWrapper.MemDef << " ("
- << *KillingLocWrapper.DefInst << ")\n");
- auto [Changed, DeletedKillingLoc] = eliminateDeadDefs(KillingLocWrapper);
-
- // Check if the store is a no-op.
- if (!DeletedKillingLoc && storeIsNoop(KillingLocWrapper.MemDef,
- KillingLocWrapper.UnderlyingObject)) {
- LLVM_DEBUG(dbgs() << "DSE: Remove No-Op Store:\n DEAD: "
- << *KillingLocWrapper.DefInst << '\n');
- deleteDeadInstruction(KillingLocWrapper.DefInst);
- NumRedundantStores++;
- return true;
- }
- // Can we form a calloc from a memset/malloc pair?
- if (!DeletedKillingLoc &&
- tryFoldIntoCalloc(KillingLocWrapper.MemDef,
- KillingLocWrapper.UnderlyingObject)) {
- LLVM_DEBUG(dbgs() << "DSE: Remove memset after forming calloc:\n"
- << " DEAD: " << *KillingLocWrapper.DefInst << '\n');
- deleteDeadInstruction(KillingLocWrapper.DefInst);
- return true;
+ bool MadeChange = false;
+ for (auto &KillingLocWrapper : KillingDefWrapper.DefinedLocations) {
+ LLVM_DEBUG(dbgs() << "Trying to eliminate MemoryDefs killed by "
+ << *KillingLocWrapper.MemDef << " ("
+ << *KillingLocWrapper.DefInst << ")\n");
+ auto [Changed, DeletedKillingLoc] = eliminateDeadDefs(KillingLocWrapper);
+
+ // Check if the store is a no-op.
+ if (!DeletedKillingLoc && storeIsNoop(KillingLocWrapper.MemDef,
+ KillingLocWrapper.UnderlyingObject)) {
+ LLVM_DEBUG(dbgs() << "DSE: Remove No-Op Store:\n DEAD: "
+ << *KillingLocWrapper.DefInst << '\n');
+ deleteDeadInstruction(KillingLocWrapper.DefInst);
+ NumRedundantStores++;
+ MadeChange = true;
+ continue;
+ }
+ // Can we form a calloc from a memset/malloc pair?
+ if (!DeletedKillingLoc &&
+ tryFoldIntoCalloc(KillingLocWrapper.MemDef,
+ KillingLocWrapper.UnderlyingObject)) {
+ LLVM_DEBUG(dbgs() << "DSE: Remove memset after forming calloc:\n"
+ << " DEAD: " << *KillingLocWrapper.DefInst << '\n');
+ deleteDeadInstruction(KillingLocWrapper.DefInst);
+ MadeChange = true;
+ continue;
+ }
}
- return Changed;
+ return MadeChange;
}
static bool eliminateDeadStores(Function &F, AliasAnalysis &AA, MemorySSA &MSSA,
@@ -2357,7 +2498,8 @@ static bool eliminateDeadStores(Function &F, AliasAnalysis &AA, MemorySSA &MSSA,
continue;
MemoryDefWrapper KillingDefWrapper(
- KillingDef, State.getLocForInst(KillingDef->getMemoryInst()));
+ KillingDef, State.getLocForInst(KillingDef->getMemoryInst(),
+ EnableInitializesImprovement));
MadeChange |= State.eliminateDeadDefs(KillingDefWrapper);
}
diff --git a/llvm/test/Transforms/DeadStoreElimination/inter-procedural.ll b/llvm/test/Transforms/DeadStoreElimination/inter-procedural.ll
new file mode 100644
index 00000000000000..c4ff69af9051bc
--- /dev/null
+++ b/llvm/test/Transforms/DeadStoreElimination/inter-procedural.ll
@@ -0,0 +1,159 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -aa-pipeline=basic-aa -passes=function-attrs,dse -enable-dse-initializes-attr-improvement -S | FileCheck %s
+
+declare void @p1_write_only(ptr nocapture noundef writeonly initializes((0, 2)) dead_on_unwind)
+declare void @p1_write_then_read(ptr nocapture noundef initializes((0, 2)) dead_on_unwind)
+declare void @p2_same_range(ptr nocapture noundef initializes((0, 2)) dead_on_unwind, ptr nocapture noundef initializes((0, 2)) dead_on_unwind)
+declare void @p2_no_init(ptr nocapture noundef initializes((0, 2)) dead_on_unwind, ptr nocapture noundef dead_on_unwind)
+declare void @p2_no_dead_on_unwind(ptr nocapture noundef initializes((0, 2)) dead_on_unwind, ptr nocapture noundef initializes((0, 2)))
+
+; Function Attrs: mustprogress nounwind uwtable
+define i16 @p1_write_only_caller() {
+; CHECK-LABEL: @p1_write_only_caller(
+; CHECK-NEXT: %ptr = alloca i16, align 2
+; CHECK-NEXT: call void @p1_write_only(ptr %ptr)
+; CHECK-NEXT: %l = load i16, ptr %ptr
+; CHECK-NEXT: ret i16 %l
+;
+ %ptr = alloca i16
+ store i16 0, ptr %ptr
+ call void @p1_write_only(ptr %ptr)
+ %l = load i16, ptr %ptr
+ ret i16 %l
+}
+
+; Function Attrs: mustprogress nounwind uwtable
+define i16 @p1_write_then_read_caller() {
+; CHECK-LABEL: @p1_write_then_read_caller(
+; CHECK-NEXT: %ptr = alloca i16, align 2
+; CHECK-NEXT: call void @p1_write_then_read(ptr %ptr)
+; CHECK-NEXT: %l = load i16, ptr %ptr
+; CHECK-NEXT: ret i16 %l
+;
+ %ptr = alloca i16
+ store i16 0, ptr %ptr
+ call void @p1_write_then_read(ptr %ptr)
+ %l = load i16, ptr %ptr
+ ret i16 %l
+}
+
+; Function Attrs: mustprogress nounwind uwtable
+define i16 @p2_same_range_nonalias_caller() {
+; CHECK-LABEL: @p2_same_range_nonalias_caller(
+; CHECK-NEXT: %ptr1 = alloca i16, align 2
+; CHECK-NEXT: %ptr2 = alloca i16, align 2
+; CHECK-NEXT: call void @p2_same_range(ptr %ptr1, ptr %ptr2)
+; CHECK-NEXT: %l = load i16, ptr %ptr1
+; CHECK-NEXT: ret i16 %l
+;
+ %ptr1 = alloca i16
+ %ptr2 = alloca i16
+ store i16 0, ptr %ptr1
+ store i16 0, ptr %ptr2
+ call void @p2_same_range(ptr %ptr1, ptr %ptr2)
+ %l = load i16, ptr %ptr1
+ ret i16 %l
+}
+
+; Function Attrs: mustprogress nounwind uwtable
+define i16 @p2_same_range_alias_caller() {
+; CHECK-LABEL: @p2_same_range_alias_caller(
+; CHECK-NEXT: %ptr = alloca i16, align 2
+; CHECK-NEXT: call void @p2_same_range(ptr %ptr, ptr %ptr)
+; CHECK-NEXT: %l = load i16, ptr %ptr
+; CHECK-NEXT: ret i16 %l
+;
+ %ptr = alloca i16
+ store i16 0, ptr %ptr
+ call void @p2_same_range(ptr %ptr, ptr %ptr)
+ %l = load i16, ptr %ptr
+ ret i16 %l
+}
+
+; Function Attrs: mustprogress nounwind uwtable
+define i16 @p2_no_init_alias_caller() {
+; CHECK-LABEL: @p2_no_init_alias_caller(
+; CHECK-NEXT: %ptr = alloca i16, align 2
+; CHECK-NEXT: store i16 0, ptr %ptr
+; CHECK-NEXT: call void @p2_no_init(ptr %ptr, ptr %ptr)
+; CHECK-NEXT: %l = load i16, ptr %ptr
+; CHECK-NEXT: ret i16 %l
+;
+ %ptr = alloca i16
+ store i16 0, ptr %ptr
+ call void @p2_no_init(ptr %ptr, ptr %ptr)
+ %l = load i16, ptr %ptr
+ ret i16 %l
+}
+
+; Function Attrs: mustprogress nounwind uwtable
+define i16 @p2_no_dead_on_unwind_alias_caller() {
+; CHECK-LABEL: @p2_no_dead_on_unwind_alias_caller(
+; CHECK-NEXT: %ptr = alloca i16, align 2
+; CHECK-NEXT: store i16 0, ptr %ptr
+; CHECK-NEXT: call void @p2_no_dead_on_unwind(ptr %ptr, ptr %ptr)
+; CHECK-NEXT: %l = load i16, ptr %ptr
+; CHECK-NEXT: ret i16 %l
+;
+ %ptr = alloca i16
+ store i16 0, ptr %ptr
+ call void @p2_no_dead_on_unwind(ptr %ptr, ptr %ptr)
+ %l = load i16, ptr %ptr
+ ret i16 %l
+}
+
+declare void @llvm.memset.p0.i64(ptr nocapture, i8, i64, i1) nounwind
+declare void @large_p1(ptr nocapture noundef initializes((0, 200))) nounwind
+declare void @large_p2(ptr nocapture noundef initializes((0, 200)), ptr nocapture noundef initializes((0, 100))) nounwind
+
+; Function Attrs: mustprogress nounwind uwtable
+define i16 @large_p1_caller() {
+; CHECK-LABEL: @large_p1_caller(
+; CHECK-NEXT: %ptr = alloca i16, align 2
+; CHECK-NEXT: call void @large_p1(ptr %ptr)
+; CHECK-NEXT: %l = load i16, ptr %ptr
+; CHECK-NEXT: ret i16 %l
+;
+ %ptr = alloca i16
+ call void @llvm.memset.p0.i64(ptr %ptr, i8 42, i64 100, i1 false)
+ call void @large_p1(ptr %ptr)
+ %l = load i16, ptr %ptr
+ ret i16 %l
+}
+
+; Function Attrs: mustprogress nounwind uwtable
+define i16 @large_p2_nonalias_caller() {
+; CHECK-LABEL: @large_p2_nonalias_caller(
+; CHECK-NEXT: %ptr1 = alloca i16, align 2
+; CHECK-NEXT: %ptr2 = alloca i16, align 2
+; CHECK-NEXT: call void @large_p2(ptr %ptr1, ptr %ptr2)
+; CHECK-NEXT: %l = load i16, ptr %ptr1
+; CHECK-NEXT: ret i16 %l
+;
+ %ptr1 = alloca i16
+ %ptr2 = alloca i16
+ call void @llvm.memset.p0.i64(ptr %ptr1, i8 42, i64 200, i1 false)
+ call void @llvm.memset.p0.i64(ptr %ptr2, i8 42, i64 100, i1 false)
+ call void @large_p2(ptr %ptr1, ptr %ptr2)
+ %l = load i16, ptr %ptr1
+ ret i16 %l
+}
+
+
+; Function Attrs: mustprogress nounwind uwtable
+define i16 @large_p2_alias_caller() {
+; CHECK-LABEL: @large_p2_alias_caller(
+; CHECK-NEXT: %ptr = alloca i16, align 2
+; CHECK-NEXT: %1 = getelementptr inbounds i8, ptr %ptr, i64 100
+; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 1 %1, i8 42, i64 200, i1 false)
+; CHECK-NEXT: call void @large_p2(ptr %ptr, ptr %ptr)
+; CHECK-NEXT: %l = load i16, ptr %ptr
+; CHECK-NEXT: ret i16 %l
+;
+ %ptr = alloca i16
+ call void @llvm.memset.p0.i64(ptr %ptr, i8 42, i64 300, i1 false)
+ call void @large_p2(ptr %ptr, ptr %ptr)
+ %l = load i16, ptr %ptr
+ ret i16 %l
+}
+
>From 002d984bef56e88556405a94ecec3c9e2bb0d846 Mon Sep 17 00:00:00 2001
From: Haopeng Liu <haopliu at google.com>
Date: Thu, 5 Sep 2024 04:56:11 +0000
Subject: [PATCH 2/4] Update comments and unit test
---
llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp | 4 ++++
llvm/test/Transforms/DeadStoreElimination/inter-procedural.ll | 2 +-
2 files changed, 5 insertions(+), 1 deletion(-)
diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
index 3ccb064adbf0df..fd178bc7238c54 100644
--- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -165,6 +165,7 @@ static cl::opt<bool>
OptimizeMemorySSA("dse-optimize-memoryssa", cl::init(true), cl::Hidden,
cl::desc("Allow DSE to optimize memory accesses."));
+// TODO: turn on and remove this flag.
static cl::opt<bool> EnableInitializesImprovement(
"enable-dse-initializes-attr-improvement", cl::init(false), cl::Hidden,
cl::desc("Enable the initializes attr improvement in DSE"));
@@ -2364,6 +2365,9 @@ DSEState::eliminateDeadDefs(const MemoryLocationWrapper &KillingLocWrapper) {
}
continue;
}
+ // We cannot apply the initializes attribute to DeadAccess/DeadDef.
+ // It would incorrectly consider a call instruction as redundant store
+ // and remove this call instruction.
MemoryDefWrapper DeadDefWrapper(
cast<MemoryDef>(DeadAccess),
getLocForInst(cast<MemoryDef>(DeadAccess)->getMemoryInst(),
diff --git a/llvm/test/Transforms/DeadStoreElimination/inter-procedural.ll b/llvm/test/Transforms/DeadStoreElimination/inter-procedural.ll
index c4ff69af9051bc..6975670379faf9 100644
--- a/llvm/test/Transforms/DeadStoreElimination/inter-procedural.ll
+++ b/llvm/test/Transforms/DeadStoreElimination/inter-procedural.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -aa-pipeline=basic-aa -passes=function-attrs,dse -enable-dse-initializes-attr-improvement -S | FileCheck %s
+; RUN: opt < %s -passes=dse -enable-dse-initializes-attr-improvement -S | FileCheck %s
declare void @p1_write_only(ptr nocapture noundef writeonly initializes((0, 2)) dead_on_unwind)
declare void @p1_write_then_read(ptr nocapture noundef initializes((0, 2)) dead_on_unwind)
>From eed0dff3942831e0bf1a873bf691f0b4bcae268c Mon Sep 17 00:00:00 2001
From: Haopeng Liu <haopliu at google.com>
Date: Thu, 5 Sep 2024 20:52:25 +0000
Subject: [PATCH 3/4] Fix loop format, index type, and clean up
ArgumentInitInfo struct members
---
.../Scalar/DeadStoreElimination.cpp | 47 ++++++++++---------
.../DeadStoreElimination/inter-procedural.ll | 16 +++++++
2 files changed, 42 insertions(+), 21 deletions(-)
diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
index fd178bc7238c54..cf827417df191d 100644
--- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -846,33 +846,37 @@ struct MemoryDefWrapper {
SmallVector<MemoryLocationWrapper, 1> DefinedLocations;
};
-bool HasInitializesAttr(Instruction *I) {
+bool hasInitializesAttr(Instruction *I) {
CallBase *CB = dyn_cast<CallBase>(I);
if (!CB)
return false;
- for (size_t Idx = 0; Idx < CB->arg_size(); Idx++)
+ for (unsigned Idx = 0, Count = CB->arg_size(); Idx < Count; ++Idx)
if (CB->paramHasAttr(Idx, Attribute::Initializes))
return true;
return false;
}
struct ArgumentInitInfo {
- size_t Idx = -1;
+ unsigned Idx;
+ bool HasDeadOnUnwindAttr;
ConstantRangeList Inits;
- bool HasDeadOnUnwindAttr = false;
- bool FuncHasNoUnwindAttr = false;
};
+// Return the intersected range list of the initializes attributes of "Args".
+// "Args" are call arguments that alias to each other.
+// If any argument in "Args" doesn't have dead_on_unwind attr and
+// "FuncHasNoUnwindAttr" is false, return empty.
ConstantRangeList
-GetMergedInitAttr(const SmallVectorImpl<ArgumentInitInfo> &Args) {
+getIntersectedInitRangeList(const SmallVectorImpl<ArgumentInitInfo> &Args,
+ bool FuncHasNoUnwindAttr) {
if (Args.empty())
return {};
// To address unwind, the function should have nounwind attribute or the
// arguments have dead_on_unwind attribute. Otherwise, return empty.
for (const auto &Arg : Args) {
- if (!Arg.FuncHasNoUnwindAttr && !Arg.HasDeadOnUnwindAttr)
+ if (!FuncHasNoUnwindAttr && !Arg.HasDeadOnUnwindAttr)
return {};
if (Arg.Inits.empty())
return {};
@@ -881,11 +885,11 @@ GetMergedInitAttr(const SmallVectorImpl<ArgumentInitInfo> &Args) {
if (Args.size() == 1)
return Args[0].Inits;
- ConstantRangeList MergedIntervals = Args[0].Inits;
- for (size_t i = 1; i < Args.size(); i++)
- MergedIntervals = MergedIntervals.intersectWith(Args[i].Inits);
+ ConstantRangeList IntersectedIntervals = Args[0].Inits;
+ for (unsigned I = 1, Count = Args.size(); I < Count; ++I)
+ IntersectedIntervals = IntersectedIntervals.intersectWith(Args[I].Inits);
- return MergedIntervals;
+ return IntersectedIntervals;
}
// Return the locations wrote by the initializes attribute.
@@ -893,24 +897,23 @@ GetMergedInitAttr(const SmallVectorImpl<ArgumentInitInfo> &Args) {
// 1. Unwind edge: apply "initializes" attribute only if the callee has
// "nounwind" attribute or the argument has "dead_on_unwind" attribute.
// 2. Argument alias: for aliasing arguments, the "initializes" attribute is
-// the merged range list of their "initializes" attributes.
+// the intersected range list of their "initializes" attributes.
SmallVector<MemoryLocation, 1>
-GetInitializesArgMemLoc(const Instruction *I, BatchAAResults &BatchAA) {
+getInitializesArgMemLoc(const Instruction *I, BatchAAResults &BatchAA) {
const CallBase *CB = dyn_cast<CallBase>(I);
if (!CB)
return {};
// Collect aliasing arguments and their initializes ranges.
- bool HasNoUnwindAttr = CB->hasFnAttr(Attribute::NoUnwind);
SmallMapVector<Value *, SmallVector<ArgumentInitInfo, 2>, 2> Arguments;
- for (size_t Idx = 0; Idx < CB->arg_size(); Idx++) {
+ for (unsigned Idx = 0, Count = CB->arg_size(); Idx < Count; ++Idx) {
ConstantRangeList Inits;
if (CB->paramHasAttr(Idx, Attribute::Initializes))
Inits = CB->getParamAttr(Idx, Attribute::Initializes)
.getValueAsConstantRangeList();
bool HasDeadOnUnwindAttr = CB->paramHasAttr(Idx, Attribute::DeadOnUnwind);
- ArgumentInitInfo InitInfo{Idx, Inits, HasDeadOnUnwindAttr, HasNoUnwindAttr};
+ ArgumentInitInfo InitInfo{Idx, HasDeadOnUnwindAttr, Inits};
Value *CurArg = CB->getArgOperand(Idx);
bool FoundAliasing = false;
for (auto &[Arg, AliasList] : Arguments) {
@@ -925,14 +928,16 @@ GetInitializesArgMemLoc(const Instruction *I, BatchAAResults &BatchAA) {
SmallVector<MemoryLocation, 1> Locations;
for (const auto &[_, Args] : Arguments) {
- auto MergedInitAttr = GetMergedInitAttr(Args);
- if (MergedInitAttr.empty())
+ auto IntersectedRanges =
+ getIntersectedInitRangeList(Args, CB->hasFnAttr(Attribute::NoUnwind));
+ if (IntersectedRanges.empty())
continue;
for (const auto &Arg : Args) {
- for (const auto &Range : MergedInitAttr) {
+ for (const auto &Range : IntersectedRanges) {
int64_t Start = Range.getLower().getSExtValue();
int64_t End = Range.getUpper().getSExtValue();
+ // For now, we only handle locations starting at offset 0.
if (Start == 0)
Locations.push_back(MemoryLocation(CB->getArgOperand(Arg.Idx),
LocationSize::precise(End - Start),
@@ -1021,7 +1026,7 @@ struct DSEState {
auto *MD = dyn_cast_or_null<MemoryDef>(MA);
if (MD && MemDefs.size() < MemorySSADefsPerBlockLimit &&
(getLocForWrite(&I) || isMemTerminatorInst(&I) ||
- HasInitializesAttr(&I)))
+ (EnableInitializesImprovement && hasInitializesAttr(&I))))
MemDefs.push_back(MD);
}
}
@@ -1272,7 +1277,7 @@ struct DSEState {
Locations.push_back(std::make_pair(*Loc, false));
if (ConsiderInitializesAttr) {
- for (auto &MemLoc : GetInitializesArgMemLoc(I, BatchAA)) {
+ for (auto &MemLoc : getInitializesArgMemLoc(I, BatchAA)) {
Locations.push_back(std::make_pair(MemLoc, true));
}
}
diff --git a/llvm/test/Transforms/DeadStoreElimination/inter-procedural.ll b/llvm/test/Transforms/DeadStoreElimination/inter-procedural.ll
index 6975670379faf9..00c9633f123b98 100644
--- a/llvm/test/Transforms/DeadStoreElimination/inter-procedural.ll
+++ b/llvm/test/Transforms/DeadStoreElimination/inter-procedural.ll
@@ -6,6 +6,7 @@ declare void @p1_write_then_read(ptr nocapture noundef initializes((0, 2)) dead_
declare void @p2_same_range(ptr nocapture noundef initializes((0, 2)) dead_on_unwind, ptr nocapture noundef initializes((0, 2)) dead_on_unwind)
declare void @p2_no_init(ptr nocapture noundef initializes((0, 2)) dead_on_unwind, ptr nocapture noundef dead_on_unwind)
declare void @p2_no_dead_on_unwind(ptr nocapture noundef initializes((0, 2)) dead_on_unwind, ptr nocapture noundef initializes((0, 2)))
+declare void @p2_no_dead_on_unwind_but_nounwind(ptr nocapture noundef initializes((0, 2)) dead_on_unwind, ptr nocapture noundef initializes((0, 2))) nounwind
; Function Attrs: mustprogress nounwind uwtable
define i16 @p1_write_only_caller() {
@@ -102,6 +103,21 @@ define i16 @p2_no_dead_on_unwind_alias_caller() {
ret i16 %l
}
+; Function Attrs: mustprogress nounwind uwtable
+define i16 @p2_no_dead_on_unwind_but_nounwind_alias_caller() {
+; CHECK-LABEL: @p2_no_dead_on_unwind_but_nounwind_alias_caller(
+; CHECK-NEXT: %ptr = alloca i16, align 2
+; CHECK-NEXT: call void @p2_no_dead_on_unwind_but_nounwind(ptr %ptr, ptr %ptr)
+; CHECK-NEXT: %l = load i16, ptr %ptr
+; CHECK-NEXT: ret i16 %l
+;
+ %ptr = alloca i16
+ store i16 0, ptr %ptr
+ call void @p2_no_dead_on_unwind_but_nounwind(ptr %ptr, ptr %ptr)
+ %l = load i16, ptr %ptr
+ ret i16 %l
+}
+
declare void @llvm.memset.p0.i64(ptr nocapture, i8, i64, i1) nounwind
declare void @large_p1(ptr nocapture noundef initializes((0, 200))) nounwind
declare void @large_p2(ptr nocapture noundef initializes((0, 200)), ptr nocapture noundef initializes((0, 100))) nounwind
>From e8163c9c87197a5b4aadae37d3dbfc3c621c52c2 Mon Sep 17 00:00:00 2001
From: Haopeng Liu <haopliu at google.com>
Date: Thu, 5 Sep 2024 21:01:26 +0000
Subject: [PATCH 4/4] Change 'Count = Args.size()' to size_t type
---
llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
index cf827417df191d..caf4d97086bdca 100644
--- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -886,7 +886,7 @@ getIntersectedInitRangeList(const SmallVectorImpl<ArgumentInitInfo> &Args,
return Args[0].Inits;
ConstantRangeList IntersectedIntervals = Args[0].Inits;
- for (unsigned I = 1, Count = Args.size(); I < Count; ++I)
+ for (size_t I = 1, Count = Args.size(); I < Count; ++I)
IntersectedIntervals = IntersectedIntervals.intersectWith(Args[I].Inits);
return IntersectedIntervals;
More information about the llvm-commits
mailing list