[llvm] a947419 - [SROA] Improve handling of lifetimes in load-only promotion (#135382)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Apr 14 02:52:45 PDT 2025
Author: Nikita Popov
Date: 2025-04-14T11:52:42+02:00
New Revision: a9474191e059edbb9b25c769afbe52c7a823f359
URL: https://github.com/llvm/llvm-project/commit/a9474191e059edbb9b25c769afbe52c7a823f359
DIFF: https://github.com/llvm/llvm-project/commit/a9474191e059edbb9b25c769afbe52c7a823f359.diff
LOG: [SROA] Improve handling of lifetimes in load-only promotion (#135382)
The propagateStoredValuesToLoads() transform currently bails out if
there is a lifetime intrinsic spanning the whole alloca, but the
individual loads/stores operate on some smaller part, because the slice
/ partition size does not match.
Fix this by ignoring assume-like slices early, regardless of which range
they cover.
I've changed the overall code structure here a bit because I was getting
confused by the different iterators.
Added:
Modified:
llvm/lib/Transforms/Scalar/SROA.cpp
llvm/test/Transforms/SROA/non-capturing-call-readonly.ll
llvm/test/Transforms/SROA/readonlynocapture.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp
index 4e444d8d4cefc..7d49b63a8e4f6 100644
--- a/llvm/lib/Transforms/Scalar/SROA.cpp
+++ b/llvm/lib/Transforms/Scalar/SROA.cpp
@@ -5498,45 +5498,14 @@ bool SROA::propagateStoredValuesToLoads(AllocaInst &AI, AllocaSlices &AS) {
// that do not overlap with any before them. The slices are sorted by
// increasing beginOffset. We don't use AS.partitions(), as it will use a more
// sophisticated algorithm that takes splittable slices into account.
- auto PartitionBegin = AS.begin();
- auto PartitionEnd = PartitionBegin;
- uint64_t BeginOffset = PartitionBegin->beginOffset();
- uint64_t EndOffset = PartitionBegin->endOffset();
- while (PartitionBegin != AS.end()) {
- bool AllSameAndValid = true;
- SmallVector<Instruction *> Insts;
- Type *PartitionType = nullptr;
- while (PartitionEnd != AS.end() &&
- (PartitionEnd->beginOffset() < EndOffset ||
- PartitionEnd->endOffset() <= EndOffset)) {
- if (AllSameAndValid) {
- AllSameAndValid &= PartitionEnd->beginOffset() == BeginOffset &&
- PartitionEnd->endOffset() == EndOffset;
- Instruction *User =
- cast<Instruction>(PartitionEnd->getUse()->getUser());
- if (auto *LI = dyn_cast<LoadInst>(User)) {
- Type *UserTy = LI->getType();
- // LoadAndStorePromoter requires all the types to be the same.
- if (!LI->isSimple() || (PartitionType && UserTy != PartitionType))
- AllSameAndValid = false;
- PartitionType = UserTy;
- Insts.push_back(User);
- } else if (auto *SI = dyn_cast<StoreInst>(User)) {
- Type *UserTy = SI->getValueOperand()->getType();
- if (!SI->isSimple() || (PartitionType && UserTy != PartitionType))
- AllSameAndValid = false;
- PartitionType = UserTy;
- Insts.push_back(User);
- } else if (!isAssumeLikeIntrinsic(User)) {
- AllSameAndValid = false;
- }
- }
- EndOffset = std::max(EndOffset, PartitionEnd->endOffset());
- ++PartitionEnd;
- }
+ LLVM_DEBUG(dbgs() << "Attempting to propagate values on " << AI << "\n");
+ bool AllSameAndValid = true;
+ Type *PartitionType = nullptr;
+ SmallVector<Instruction *> Insts;
+ uint64_t BeginOffset = 0;
+ uint64_t EndOffset = 0;
- // So long as all the slices start and end offsets matched, update loads to
- // the values stored in the partition.
+ auto Flush = [&]() {
if (AllSameAndValid && !Insts.empty()) {
LLVM_DEBUG(dbgs() << "Propagate values on slice [" << BeginOffset << ", "
<< EndOffset << ")\n");
@@ -5546,14 +5515,56 @@ bool SROA::propagateStoredValuesToLoads(AllocaInst &AI, AllocaSlices &AS) {
BasicLoadAndStorePromoter Promoter(Insts, SSA, PartitionType);
Promoter.run(Insts);
}
+ AllSameAndValid = true;
+ PartitionType = nullptr;
+ Insts.clear();
+ };
- // Step on to the next partition.
- PartitionBegin = PartitionEnd;
- if (PartitionBegin == AS.end())
- break;
- BeginOffset = PartitionBegin->beginOffset();
- EndOffset = PartitionBegin->endOffset();
+ for (Slice &S : AS) {
+ auto *User = cast<Instruction>(S.getUse()->getUser());
+ if (isAssumeLikeIntrinsic(User)) {
+ LLVM_DEBUG({
+ dbgs() << "Ignoring slice: ";
+ AS.print(dbgs(), &S);
+ });
+ continue;
+ }
+ if (S.beginOffset() >= EndOffset) {
+ Flush();
+ BeginOffset = S.beginOffset();
+ EndOffset = S.endOffset();
+ } else if (S.beginOffset() != BeginOffset || S.endOffset() != EndOffset) {
+ if (AllSameAndValid) {
+ LLVM_DEBUG({
+ dbgs() << "Slice does not match range [" << BeginOffset << ", "
+ << EndOffset << ")";
+ AS.print(dbgs(), &S);
+ });
+ AllSameAndValid = false;
+ }
+ EndOffset = std::max(EndOffset, S.endOffset());
+ continue;
+ }
+
+ if (auto *LI = dyn_cast<LoadInst>(User)) {
+ Type *UserTy = LI->getType();
+ // LoadAndStorePromoter requires all the types to be the same.
+ if (!LI->isSimple() || (PartitionType && UserTy != PartitionType))
+ AllSameAndValid = false;
+ PartitionType = UserTy;
+ Insts.push_back(User);
+ } else if (auto *SI = dyn_cast<StoreInst>(User)) {
+ Type *UserTy = SI->getValueOperand()->getType();
+ if (!SI->isSimple() || (PartitionType && UserTy != PartitionType))
+ AllSameAndValid = false;
+ PartitionType = UserTy;
+ Insts.push_back(User);
+ } else {
+ AllSameAndValid = false;
+ }
}
+
+ Flush();
return true;
}
diff --git a/llvm/test/Transforms/SROA/non-capturing-call-readonly.ll b/llvm/test/Transforms/SROA/non-capturing-call-readonly.ll
index a37f02df46c75..13808b2aa8916 100644
--- a/llvm/test/Transforms/SROA/non-capturing-call-readonly.ll
+++ b/llvm/test/Transforms/SROA/non-capturing-call-readonly.ll
@@ -803,8 +803,7 @@ define i64 @do_schedule_instrs_for_dce_after_fixups() {
; CHECK: if.end:
; CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 1
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @user_of_alloca(ptr [[ADD_PTR]])
-; CHECK-NEXT: [[LD:%.*]] = load i64, ptr [[C]], align 4
-; CHECK-NEXT: ret i64 [[LD]]
+; CHECK-NEXT: ret i64 0
;
entry:
%c = alloca i64, align 2
diff --git a/llvm/test/Transforms/SROA/readonlynocapture.ll b/llvm/test/Transforms/SROA/readonlynocapture.ll
index 2ad20fcc51dc5..1cbfe436f9591 100644
--- a/llvm/test/Transforms/SROA/readonlynocapture.ll
+++ b/llvm/test/Transforms/SROA/readonlynocapture.ll
@@ -456,4 +456,47 @@ define i32 @provenance_only_capture() {
ret i32 %l1
}
+define i32 @simple_with_lifetimes() {
+; CHECK-LABEL: @simple_with_lifetimes(
+; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[A]])
+; CHECK-NEXT: store i32 0, ptr [[A]], align 4
+; CHECK-NEXT: call void @callee(ptr [[A]])
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr [[A]])
+; CHECK-NEXT: ret i32 0
+;
+ %a = alloca i32
+ call void @llvm.lifetime.start(i64 4, ptr %a)
+ store i32 0, ptr %a
+ call void @callee(ptr %a)
+ %l1 = load i32, ptr %a
+ call void @llvm.lifetime.end(i64 4, ptr %a)
+ ret i32 %l1
+}
+
+define i32 @twoalloc_with_lifetimes() {
+; CHECK-LABEL: @twoalloc_with_lifetimes(
+; CHECK-NEXT: [[A:%.*]] = alloca { i32, i32 }, align 8
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr [[A]])
+; CHECK-NEXT: store i32 0, ptr [[A]], align 4
+; CHECK-NEXT: [[B:%.*]] = getelementptr i32, ptr [[A]], i32 1
+; CHECK-NEXT: store i32 1, ptr [[B]], align 4
+; CHECK-NEXT: call void @callee(ptr [[A]])
+; CHECK-NEXT: [[R:%.*]] = add i32 0, 1
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr [[A]])
+; CHECK-NEXT: ret i32 [[R]]
+;
+ %a = alloca {i32, i32}
+ call void @llvm.lifetime.start(i64 8, ptr %a)
+ store i32 0, ptr %a
+ %b = getelementptr i32, ptr %a, i32 1
+ store i32 1, ptr %b
+ call void @callee(ptr %a)
+ %l1 = load i32, ptr %a
+ %l2 = load i32, ptr %b
+ %r = add i32 %l1, %l2
+ call void @llvm.lifetime.end(i64 8, ptr %a)
+ ret i32 %r
+}
+
declare void @llvm.memcpy.p0.p0.i64(ptr, ptr, i64, i1)
More information about the llvm-commits
mailing list