[llvm] [SROA] Optimize reloaded values in allocas that escape into readonly nocapture calls. (PR #116645)
David Green via llvm-commits
llvm-commits at lists.llvm.org
Mon Nov 18 08:11:11 PST 2024
https://github.com/davemgreen created https://github.com/llvm/llvm-project/pull/116645
Given an alloca that potentially has many uses in complex code and escapes into a call that is readonly+nocapture, we cannot easily split up the alloca. There are several optimizations that will attempt to take a value that is stored and a reload, and replace the load with the original stored value. Instcombine has some simple heiristics, GVN can sometimes do, as can CSE in limited situations. They all suffer from the same issue with complex code - they start from a load/store and need to prove no-alias for all code between, which in complex cases might be a lot to look through. Especially if the ptr is an alloca with many uses that is over the normal escape capture limits.
The pass that does do well with allocas is SROA, as it has a complete view of all of its uses. This patch adds a case to SROA where it can detect allocas that are passed into calls that are no-capture readonly. It can then optimize the reloaded values inside the alloca slice with the stored value knowing that it is valid no matter the location of the loads/stores from the no-escaping nature of the alloca.
>From 8eb51dd5d83bb1f3269a7f523a3f95e85214c96f Mon Sep 17 00:00:00 2001
From: David Green <david.green at arm.com>
Date: Mon, 18 Nov 2024 13:58:31 +0000
Subject: [PATCH 1/2] [SROA] Escaping readonly nocapture tests. NFC
---
.../SROA/non-capturing-call-readonly.ll | 8 +-
.../test/Transforms/SROA/readonlynocapture.ll | 204 ++++++++++++++++++
2 files changed, 208 insertions(+), 4 deletions(-)
create mode 100644 llvm/test/Transforms/SROA/readonlynocapture.ll
diff --git a/llvm/test/Transforms/SROA/non-capturing-call-readonly.ll b/llvm/test/Transforms/SROA/non-capturing-call-readonly.ll
index 3756afadbf884a..87862b929a7511 100644
--- a/llvm/test/Transforms/SROA/non-capturing-call-readonly.ll
+++ b/llvm/test/Transforms/SROA/non-capturing-call-readonly.ll
@@ -364,12 +364,12 @@ define i32 @alloca_used_in_maybe_throwing_call(ptr %data, i64 %n) personality pt
; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT:%.*]]
; CHECK: exit:
; CHECK-NEXT: [[I0:%.*]] = invoke i32 @user_of_alloca(ptr [[RETVAL]])
-; CHECK-NEXT: to label [[CONT:%.*]] unwind label [[UW:%.*]]
+; CHECK-NEXT: to label [[CONT:%.*]] unwind label [[UW:%.*]]
; CHECK: cont:
; CHECK-NEXT: br label [[END:%.*]]
; CHECK: uw:
; CHECK-NEXT: [[I1:%.*]] = landingpad { ptr, i32 }
-; CHECK-NEXT: catch ptr null
+; CHECK-NEXT: catch ptr null
; CHECK-NEXT: br label [[END]]
; CHECK: end:
; CHECK-NEXT: [[I2:%.*]] = load i32, ptr [[RETVAL]], align 4
@@ -424,10 +424,10 @@ define i32 @alloca_used_in_maybe_throwing_call_with_same_dests(ptr %data, i64 %n
; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT:%.*]]
; CHECK: exit:
; CHECK-NEXT: [[I0:%.*]] = invoke i32 @user_of_alloca(ptr [[RETVAL]])
-; CHECK-NEXT: to label [[END:%.*]] unwind label [[UW:%.*]]
+; CHECK-NEXT: to label [[END:%.*]] unwind label [[UW:%.*]]
; CHECK: uw:
; CHECK-NEXT: [[I1:%.*]] = landingpad { ptr, i32 }
-; CHECK-NEXT: catch ptr null
+; CHECK-NEXT: catch ptr null
; CHECK-NEXT: br label [[END]]
; CHECK: end:
; CHECK-NEXT: [[I2:%.*]] = load i32, ptr [[RETVAL]], align 4
diff --git a/llvm/test/Transforms/SROA/readonlynocapture.ll b/llvm/test/Transforms/SROA/readonlynocapture.ll
new file mode 100644
index 00000000000000..0cc93bccc6e9d9
--- /dev/null
+++ b/llvm/test/Transforms/SROA/readonlynocapture.ll
@@ -0,0 +1,204 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -passes=sroa -S | FileCheck %s
+
+declare void @callee(ptr nocapture readonly %p)
+
+define i32 @simple() {
+; CHECK-LABEL: @simple(
+; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4
+; CHECK-NEXT: store i32 0, ptr [[A]], align 4
+; CHECK-NEXT: call void @callee(ptr [[A]])
+; CHECK-NEXT: [[L1:%.*]] = load i32, ptr [[A]], align 4
+; CHECK-NEXT: ret i32 [[L1]]
+;
+ %a = alloca i32
+ store i32 0, ptr %a
+ call void @callee(ptr %a)
+ %l1 = load i32, ptr %a
+ ret i32 %l1
+}
+
+define i32 @twoalloc() {
+; CHECK-LABEL: @twoalloc(
+; CHECK-NEXT: [[A:%.*]] = alloca { i32, i32 }, align 8
+; CHECK-NEXT: store i32 0, ptr [[A]], align 4
+; CHECK-NEXT: [[B:%.*]] = getelementptr i32, ptr [[A]], i32 1
+; CHECK-NEXT: store i32 1, ptr [[B]], align 4
+; CHECK-NEXT: call void @callee(ptr [[A]])
+; CHECK-NEXT: [[L1:%.*]] = load i32, ptr [[A]], align 4
+; CHECK-NEXT: [[L2:%.*]] = load i32, ptr [[B]], align 4
+; CHECK-NEXT: ret i32 [[L2]]
+;
+ %a = alloca {i32, i32}
+ store i32 0, ptr %a
+ %b = getelementptr i32, ptr %a, i32 1
+ store i32 1, ptr %b
+ call void @callee(ptr %a)
+ %l1 = load i32, ptr %a
+ %l2 = load i32, ptr %b
+ ret i32 %l2
+}
+
+define i32 @twostore() {
+; CHECK-LABEL: @twostore(
+; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4
+; CHECK-NEXT: store i32 1, ptr [[A]], align 4
+; CHECK-NEXT: call void @callee(ptr [[A]])
+; CHECK-NEXT: store i32 2, ptr [[A]], align 4
+; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[A]], align 4
+; CHECK-NEXT: ret i32 [[L]]
+;
+ %a = alloca i32
+ store i32 1, ptr %a
+ call void @callee(ptr %a)
+ store i32 2, ptr %a
+ %l = load i32, ptr %a
+ ret i32 %l
+}
+
+define i32 @twoalloc_store64(i64 %x) {
+; CHECK-LABEL: @twoalloc_store64(
+; CHECK-NEXT: [[A:%.*]] = alloca i64, align 8
+; CHECK-NEXT: store i64 [[X:%.*]], ptr [[A]], align 4
+; CHECK-NEXT: call void @callee(ptr [[A]])
+; CHECK-NEXT: [[L1:%.*]] = load i32, ptr [[A]], align 4
+; CHECK-NEXT: [[B:%.*]] = getelementptr i32, ptr [[A]], i32 1
+; CHECK-NEXT: [[L2:%.*]] = load i32, ptr [[B]], align 4
+; CHECK-NEXT: ret i32 [[L2]]
+;
+ %a = alloca i64
+ store i64 %x, ptr %a
+ call void @callee(ptr %a)
+ %l1 = load i32, ptr %a
+ %b = getelementptr i32, ptr %a, i32 1
+ %l2 = load i32, ptr %b
+ ret i32 %l2
+}
+
+define i32 @twocalls() {
+; CHECK-LABEL: @twocalls(
+; CHECK-NEXT: [[A:%.*]] = alloca { i32, i32 }, align 8
+; CHECK-NEXT: store i32 0, ptr [[A]], align 4
+; CHECK-NEXT: [[B:%.*]] = getelementptr i32, ptr [[A]], i32 1
+; CHECK-NEXT: store i32 1, ptr [[B]], align 4
+; CHECK-NEXT: call void @callee(ptr [[A]])
+; CHECK-NEXT: [[L1:%.*]] = load i32, ptr [[A]], align 4
+; CHECK-NEXT: call void @callee(ptr [[A]])
+; CHECK-NEXT: [[L2:%.*]] = load i32, ptr [[B]], align 4
+; CHECK-NEXT: ret i32 [[L2]]
+;
+ %a = alloca {i32, i32}
+ store i32 0, ptr %a
+ %b = getelementptr i32, ptr %a, i32 1
+ store i32 1, ptr %b
+ call void @callee(ptr %a)
+ %l1 = load i32, ptr %a
+ call void @callee(ptr %a)
+ %l2 = load i32, ptr %b
+ ret i32 %l2
+}
+
+define i32 @volatile() {
+; CHECK-LABEL: @volatile(
+; CHECK-NEXT: [[A:%.*]] = alloca { i32, i32 }, align 8
+; CHECK-NEXT: store i32 0, ptr [[A]], align 4
+; CHECK-NEXT: [[B:%.*]] = getelementptr i32, ptr [[A]], i32 1
+; CHECK-NEXT: store i32 1, ptr [[B]], align 4
+; CHECK-NEXT: call void @callee(ptr [[A]])
+; CHECK-NEXT: [[L1:%.*]] = load volatile i32, ptr [[A]], align 4
+; CHECK-NEXT: [[L2:%.*]] = load volatile i32, ptr [[B]], align 4
+; CHECK-NEXT: ret i32 [[L2]]
+;
+ %a = alloca {i32, i32}
+ store i32 0, ptr %a
+ %b = getelementptr i32, ptr %a, i32 1
+ store i32 1, ptr %b
+ call void @callee(ptr %a)
+ %l1 = load volatile i32, ptr %a
+ %l2 = load volatile i32, ptr %b
+ ret i32 %l2
+}
+
+define i32 @notdominating() {
+; CHECK-LABEL: @notdominating(
+; CHECK-NEXT: [[A:%.*]] = alloca { i32, i32 }, align 8
+; CHECK-NEXT: [[B:%.*]] = getelementptr i32, ptr [[A]], i32 1
+; CHECK-NEXT: [[L1:%.*]] = load volatile i32, ptr [[A]], align 4
+; CHECK-NEXT: [[L2:%.*]] = load volatile i32, ptr [[B]], align 4
+; CHECK-NEXT: store i32 0, ptr [[A]], align 4
+; CHECK-NEXT: store i32 1, ptr [[B]], align 4
+; CHECK-NEXT: call void @callee(ptr [[A]])
+; CHECK-NEXT: ret i32 [[L2]]
+;
+ %a = alloca {i32, i32}
+ %b = getelementptr i32, ptr %a, i32 1
+ %l1 = load volatile i32, ptr %a
+ %l2 = load volatile i32, ptr %b
+ store i32 0, ptr %a
+ store i32 1, ptr %b
+ call void @callee(ptr %a)
+ ret i32 %l2
+}
+
+declare void @callee_notreadonly(ptr %p)
+define i32 @notreadonly() {
+; CHECK-LABEL: @notreadonly(
+; CHECK-NEXT: [[A:%.*]] = alloca { i32, i32 }, align 8
+; CHECK-NEXT: store i32 0, ptr [[A]], align 4
+; CHECK-NEXT: [[B:%.*]] = getelementptr i32, ptr [[A]], i32 1
+; CHECK-NEXT: store i32 1, ptr [[B]], align 4
+; CHECK-NEXT: call void @callee_notreadonly(ptr [[A]])
+; CHECK-NEXT: [[L1:%.*]] = load i32, ptr [[A]], align 4
+; CHECK-NEXT: [[L2:%.*]] = load i32, ptr [[B]], align 4
+; CHECK-NEXT: ret i32 [[L2]]
+;
+ %a = alloca {i32, i32}
+ store i32 0, ptr %a
+ %b = getelementptr i32, ptr %a, i32 1
+ store i32 1, ptr %b
+ call void @callee_notreadonly(ptr %a)
+ %l1 = load i32, ptr %a
+ %l2 = load i32, ptr %b
+ ret i32 %l2
+}
+
+declare void @callee_multiuse(ptr nocapture readonly %p, ptr nocapture readonly %q)
+define i32 @multiuse() {
+; CHECK-LABEL: @multiuse(
+; CHECK-NEXT: [[A:%.*]] = alloca { i32, i32 }, align 8
+; CHECK-NEXT: store i32 0, ptr [[A]], align 4
+; CHECK-NEXT: [[B:%.*]] = getelementptr i32, ptr [[A]], i32 1
+; CHECK-NEXT: store i32 1, ptr [[B]], align 4
+; CHECK-NEXT: call void @callee_multiuse(ptr [[A]], ptr [[A]])
+; CHECK-NEXT: [[L1:%.*]] = load i32, ptr [[A]], align 4
+; CHECK-NEXT: [[L2:%.*]] = load i32, ptr [[B]], align 4
+; CHECK-NEXT: ret i32 [[L2]]
+;
+ %a = alloca {i32, i32}
+ store i32 0, ptr %a
+ %b = getelementptr i32, ptr %a, i32 1
+ store i32 1, ptr %b
+ call void @callee_multiuse(ptr %a, ptr %a)
+ %l1 = load i32, ptr %a
+ %l2 = load i32, ptr %b
+ ret i32 %l2
+}
+
+define i32 @memcpyed(ptr %src) {
+; CHECK-LABEL: @memcpyed(
+; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4
+; CHECK-NEXT: store i32 0, ptr [[A]], align 4
+; CHECK-NEXT: call void @callee(ptr [[A]])
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[A]], ptr [[SRC:%.*]], i64 4, i1 false)
+; CHECK-NEXT: [[L1:%.*]] = load i32, ptr [[A]], align 4
+; CHECK-NEXT: ret i32 [[L1]]
+;
+ %a = alloca i32
+ store i32 0, ptr %a
+ call void @callee(ptr %a)
+ call void @llvm.memcpy.p0.p0.i64(ptr %a, ptr %src, i64 4, i1 false)
+ %l1 = load i32, ptr %a
+ ret i32 %l1
+}
+
+declare void @llvm.memcpy.p0.p0.i64(ptr, ptr, i64, i1)
>From 0f1db00691f57306829072f2864846ce032878bd Mon Sep 17 00:00:00 2001
From: David Green <david.green at arm.com>
Date: Mon, 18 Nov 2024 15:58:22 +0000
Subject: [PATCH 2/2] [SROA] Optimize reloaded values in allocas that escape
into readonly nocapture calls.
Given an alloca that potentially has many uses in complex code and escapes into
a call that is readonly+nocapture, we cannot easily split up the alloca. There
are several optimizations that will attempt to take a value that is stored and
a reload, and replace the load with the original stored value. Instcombine has
some simple heiristics, GVN can sometimes do, as can early CSE in limited
situations. They all suffer from the same issue with complex code - they start
from a load/store and need to prove no-alias for all code between, which in
complex cases might be a loti to look through. Especially if the ptr is an
alloca with many uses that is over the normal escape capture limits.
The pass that does do well with allocas is SROA, as it has a complete view of
the alloca and all of its uses. This patch adds a case to SROA where it can
detect allocas that are passed into calls that are no-capture readonly. It can
then optimize the reloaded values inside the alloca slice with the stored
value knowing that it is valid no matter the location of the loads/stores from
the no-escaping nature of the alloca.
---
llvm/include/llvm/Analysis/PtrUseVisitor.h | 14 ++++
llvm/lib/Transforms/Scalar/SROA.cpp | 80 ++++++++++++++++++-
.../SROA/non-capturing-call-readonly.ll | 12 +--
.../test/Transforms/SROA/readonlynocapture.ll | 10 +--
4 files changed, 104 insertions(+), 12 deletions(-)
diff --git a/llvm/include/llvm/Analysis/PtrUseVisitor.h b/llvm/include/llvm/Analysis/PtrUseVisitor.h
index bbe2741f44fc3d..c9d3874e7dd961 100644
--- a/llvm/include/llvm/Analysis/PtrUseVisitor.h
+++ b/llvm/include/llvm/Analysis/PtrUseVisitor.h
@@ -64,6 +64,9 @@ class PtrUseVisitorBase {
/// Is the pointer escaped at some point?
bool isEscaped() const { return EscapedInfo != nullptr; }
+ /// Is the pointer escaped into a read-only nocapture call at some point?
+ bool isEscapedReadOnly() const { return EscapedReadOnly != nullptr; }
+
/// Get the instruction causing the visit to abort.
/// \returns a pointer to the instruction causing the abort if one is
/// available; otherwise returns null.
@@ -74,6 +77,10 @@ class PtrUseVisitorBase {
/// is available; otherwise returns null.
Instruction *getEscapingInst() const { return EscapedInfo; }
+ /// Get the instruction causing the pointer to escape which is a read-only
+ /// nocapture call.
+ Instruction *getEscapedReadOnlyInst() const { return EscapedReadOnly; }
+
/// Mark the visit as aborted. Intended for use in a void return.
/// \param I The instruction which caused the visit to abort, if available.
void setAborted(Instruction *I) {
@@ -88,6 +95,12 @@ class PtrUseVisitorBase {
EscapedInfo = I;
}
+ /// Mark the pointer as escaped into a readonly-nocapture call.
+ void setEscapedReadOnly(Instruction *I) {
+ assert(I && "Expected a valid pointer in setEscapedReadOnly");
+ EscapedReadOnly = I;
+ }
+
/// Mark the pointer as escaped, and the visit as aborted. Intended
/// for use in a void return.
/// \param I The instruction which both escapes the pointer and aborts the
@@ -100,6 +113,7 @@ class PtrUseVisitorBase {
private:
Instruction *AbortedInfo = nullptr;
Instruction *EscapedInfo = nullptr;
+ Instruction *EscapedReadOnly = nullptr;
};
protected:
diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp
index d80af26451ac75..1404ab58fc829b 100644
--- a/llvm/lib/Transforms/Scalar/SROA.cpp
+++ b/llvm/lib/Transforms/Scalar/SROA.cpp
@@ -43,6 +43,7 @@
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/PtrUseVisitor.h"
+#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Config/llvm-config.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constant.h"
@@ -246,6 +247,7 @@ class SROA {
bool presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS);
AllocaInst *rewritePartition(AllocaInst &AI, AllocaSlices &AS, Partition &P);
bool splitAlloca(AllocaInst &AI, AllocaSlices &AS);
+ bool propagateStoredValuesToLoads(AllocaInst &AI, AllocaSlices &AS);
std::pair<bool /*Changed*/, bool /*CFGChanged*/> runOnAlloca(AllocaInst &AI);
void clobberUse(Use &U);
bool deleteDeadInstructions(SmallPtrSetImpl<AllocaInst *> &DeletedAllocas);
@@ -598,6 +600,7 @@ class AllocaSlices {
/// If this is true, the slices are never fully built and should be
/// ignored.
bool isEscaped() const { return PointerEscapingInstr; }
+ bool isEscapedReadOnly() const { return PointerEscapingInstrReadOnly; }
/// Support for iterating over the slices.
/// @{
@@ -680,6 +683,7 @@ class AllocaSlices {
/// store a pointer to that here and abort trying to form slices of the
/// alloca. This will be null if the alloca slices are analyzed successfully.
Instruction *PointerEscapingInstr;
+ Instruction *PointerEscapingInstrReadOnly;
/// The slices of the alloca.
///
@@ -1390,6 +1394,23 @@ class AllocaSlices::SliceBuilder : public PtrUseVisitor<SliceBuilder> {
/// Disable SROA entirely if there are unhandled users of the alloca.
void visitInstruction(Instruction &I) { PI.setAborted(&I); }
+
+ void visitCallBase(CallBase &CB) {
+ // If the operands that are U are NoCapture ReadOnly, then we mark it as
+ // EscapedReadOnly.
+ Function *Callee = CB.getCalledFunction();
+ if (Callee && CB.arg_size() == Callee->arg_size() &&
+ !CB.hasOperandBundles() && all_of(enumerate(CB.args()), [&](auto V) {
+ return V.value() != *U ||
+ (Callee->getArg(V.index())->hasNoCaptureAttr() &&
+ Callee->getArg(V.index())->onlyReadsMemory());
+ })) {
+ PI.setEscapedReadOnly(&CB);
+ return;
+ }
+
+ Base::visitCallBase(CB);
+ }
};
AllocaSlices::AllocaSlices(const DataLayout &DL, AllocaInst &AI)
@@ -1397,7 +1418,7 @@ AllocaSlices::AllocaSlices(const DataLayout &DL, AllocaInst &AI)
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
AI(AI),
#endif
- PointerEscapingInstr(nullptr) {
+ PointerEscapingInstr(nullptr), PointerEscapingInstrReadOnly(nullptr) {
SliceBuilder PB(DL, AI, *this);
SliceBuilder::PtrInfo PtrI = PB.visitPtr(AI);
if (PtrI.isEscaped() || PtrI.isAborted()) {
@@ -1408,6 +1429,7 @@ AllocaSlices::AllocaSlices(const DataLayout &DL, AllocaInst &AI)
assert(PointerEscapingInstr && "Did not track a bad instruction");
return;
}
+ PointerEscapingInstrReadOnly = PtrI.getEscapedReadOnlyInst();
llvm::erase_if(Slices, [](const Slice &S) { return S.isDead(); });
@@ -1445,6 +1467,9 @@ void AllocaSlices::print(raw_ostream &OS) const {
return;
}
+ if (PointerEscapingInstrReadOnly)
+ OS << "Escapes into ReadOnly: " << *PointerEscapingInstrReadOnly << "\n";
+
OS << "Slices of alloca: " << AI << "\n";
for (const_iterator I = begin(), E = end(); I != E; ++I)
print(OS, I);
@@ -5454,6 +5479,54 @@ void SROA::clobberUse(Use &U) {
}
}
+bool SROA::propagateStoredValuesToLoads(AllocaInst &AI, AllocaSlices &AS) {
+ for (auto &P : AS.partitions()) {
+ StoreInst *Store = nullptr;
+ // Make sure all the slices inside the partition are the full width.
+ if (any_of(P, [&P](Slice &S) {
+ return S.beginOffset() != P.beginOffset() ||
+ S.beginOffset() != P.beginOffset();
+ }))
+ continue;
+
+ // Check there is a single store and nothing else other than loads.
+ for (Slice &S : P) {
+ if (S.isDead())
+ continue;
+ if (auto *St = dyn_cast<StoreInst>(S.getUse()->getUser())) {
+ if (Store) {
+ Store = nullptr;
+ break;
+ }
+ Store = St;
+ } else if (!isa<LoadInst>(S.getUse()->getUser()) &&
+ !isAssumeLikeIntrinsic(
+ cast<Instruction>(S.getUse()->getUser()))) {
+ Store = nullptr;
+ break;
+ }
+ }
+
+ if (!Store)
+ continue;
+
+ // Replace loads by the value that was stored.
+ for (Slice &S : P) {
+ if (auto *Ld = dyn_cast<LoadInst>(S.getUse()->getUser())) {
+ if (DTU->getDomTree().dominates(Store, Ld)) {
+ if (Store->getValueOperand()->getType() == Ld->getType()) {
+ LLVM_DEBUG(dbgs() << " Replacing " << *Ld << " with "
+ << *Store->getValueOperand() << "\n");
+ Ld->replaceAllUsesWith(Store->getValueOperand());
+ }
+ }
+ }
+ }
+ }
+
+ return true;
+}
+
/// Analyze an alloca for SROA.
///
/// This analyzes the alloca to ensure we can reason about it, builds
@@ -5494,6 +5567,11 @@ SROA::runOnAlloca(AllocaInst &AI) {
if (AS.isEscaped())
return {Changed, CFGChanged};
+ if (AS.isEscapedReadOnly()) {
+ Changed |= propagateStoredValuesToLoads(AI, AS);
+ return {Changed, CFGChanged};
+ }
+
// Delete all the dead users of this alloca before splitting and rewriting it.
for (Instruction *DeadUser : AS.getDeadUsers()) {
// Free up everything used by this instruction.
diff --git a/llvm/test/Transforms/SROA/non-capturing-call-readonly.ll b/llvm/test/Transforms/SROA/non-capturing-call-readonly.ll
index 87862b929a7511..65f819122a6951 100644
--- a/llvm/test/Transforms/SROA/non-capturing-call-readonly.ll
+++ b/llvm/test/Transforms/SROA/non-capturing-call-readonly.ll
@@ -485,7 +485,7 @@ define [2 x i32] @part_of_alloca_used_in_call(ptr %data, i64 %n) {
; CHECK-NEXT: [[I0:%.*]] = call i32 @user_of_alloca(ptr [[RETVAL]])
; CHECK-NEXT: [[I1_FCA_0_GEP:%.*]] = getelementptr inbounds [2 x i32], ptr [[RETVAL_FULL]], i32 0, i32 0
; CHECK-NEXT: [[I1_FCA_0_LOAD:%.*]] = load i32, ptr [[I1_FCA_0_GEP]], align 4
-; CHECK-NEXT: [[I1_FCA_0_INSERT:%.*]] = insertvalue [2 x i32] poison, i32 [[I1_FCA_0_LOAD]], 0
+; CHECK-NEXT: [[I1_FCA_0_INSERT:%.*]] = insertvalue [2 x i32] poison, i32 0, 0
; CHECK-NEXT: [[I1_FCA_1_GEP:%.*]] = getelementptr inbounds [2 x i32], ptr [[RETVAL_FULL]], i32 0, i32 1
; CHECK-NEXT: [[I1_FCA_1_LOAD:%.*]] = load i32, ptr [[I1_FCA_1_GEP]], align 4
; CHECK-NEXT: [[I1_FCA_1_INSERT:%.*]] = insertvalue [2 x i32] [[I1_FCA_0_INSERT]], i32 [[I1_FCA_1_LOAD]], 1
@@ -538,7 +538,7 @@ define [2 x i32] @all_parts_of_alloca_used_in_call_with_multiple_args(ptr %data,
; CHECK-NEXT: [[I0:%.*]] = call i32 @user_of_alloca_with_multiple_args(ptr [[RETVAL]], ptr [[RETVAL_FULL]])
; CHECK-NEXT: [[I1_FCA_0_GEP:%.*]] = getelementptr inbounds [2 x i32], ptr [[RETVAL_FULL]], i32 0, i32 0
; CHECK-NEXT: [[I1_FCA_0_LOAD:%.*]] = load i32, ptr [[I1_FCA_0_GEP]], align 4
-; CHECK-NEXT: [[I1_FCA_0_INSERT:%.*]] = insertvalue [2 x i32] poison, i32 [[I1_FCA_0_LOAD]], 0
+; CHECK-NEXT: [[I1_FCA_0_INSERT:%.*]] = insertvalue [2 x i32] poison, i32 0, 0
; CHECK-NEXT: [[I1_FCA_1_GEP:%.*]] = getelementptr inbounds [2 x i32], ptr [[RETVAL_FULL]], i32 0, i32 1
; CHECK-NEXT: [[I1_FCA_1_LOAD:%.*]] = load i32, ptr [[I1_FCA_1_GEP]], align 4
; CHECK-NEXT: [[I1_FCA_1_INSERT:%.*]] = insertvalue [2 x i32] [[I1_FCA_0_INSERT]], i32 [[I1_FCA_1_LOAD]], 1
@@ -701,7 +701,7 @@ define [2 x i32] @part_of_alloca_used_in_call_with_multiple_args(ptr %data, i64
; CHECK-NEXT: [[I0:%.*]] = call i32 @user_of_alloca_with_multiple_args(ptr [[RETVAL]], ptr [[RETVAL]])
; CHECK-NEXT: [[I1_FCA_0_GEP:%.*]] = getelementptr inbounds [2 x i32], ptr [[RETVAL_FULL]], i32 0, i32 0
; CHECK-NEXT: [[I1_FCA_0_LOAD:%.*]] = load i32, ptr [[I1_FCA_0_GEP]], align 4
-; CHECK-NEXT: [[I1_FCA_0_INSERT:%.*]] = insertvalue [2 x i32] poison, i32 [[I1_FCA_0_LOAD]], 0
+; CHECK-NEXT: [[I1_FCA_0_INSERT:%.*]] = insertvalue [2 x i32] poison, i32 0, 0
; CHECK-NEXT: [[I1_FCA_1_GEP:%.*]] = getelementptr inbounds [2 x i32], ptr [[RETVAL_FULL]], i32 0, i32 1
; CHECK-NEXT: [[I1_FCA_1_LOAD:%.*]] = load i32, ptr [[I1_FCA_1_GEP]], align 4
; CHECK-NEXT: [[I1_FCA_1_INSERT:%.*]] = insertvalue [2 x i32] [[I1_FCA_0_INSERT]], i32 [[I1_FCA_1_LOAD]], 1
@@ -757,7 +757,7 @@ define [2 x i32] @all_parts_of_alloca_used_in_calls_with_multiple_args(ptr %data
; CHECK-NEXT: [[I2:%.*]] = call i32 @capture_of_alloca(ptr [[SOME_ANOTHER_ALLOCA_FULL]])
; CHECK-NEXT: [[I3_FCA_0_GEP:%.*]] = getelementptr inbounds [2 x i32], ptr [[RETVAL_FULL]], i32 0, i32 0
; CHECK-NEXT: [[I3_FCA_0_LOAD:%.*]] = load i32, ptr [[I3_FCA_0_GEP]], align 4
-; CHECK-NEXT: [[I3_FCA_0_INSERT:%.*]] = insertvalue [2 x i32] poison, i32 [[I3_FCA_0_LOAD]], 0
+; CHECK-NEXT: [[I3_FCA_0_INSERT:%.*]] = insertvalue [2 x i32] poison, i32 0, 0
; CHECK-NEXT: [[I3_FCA_1_GEP:%.*]] = getelementptr inbounds [2 x i32], ptr [[RETVAL_FULL]], i32 0, i32 1
; CHECK-NEXT: [[I3_FCA_1_LOAD:%.*]] = load i32, ptr [[I3_FCA_1_GEP]], align 4
; CHECK-NEXT: [[I3_FCA_1_INSERT:%.*]] = insertvalue [2 x i32] [[I3_FCA_0_INSERT]], i32 [[I3_FCA_1_LOAD]], 1
@@ -817,7 +817,7 @@ define i64 @do_schedule_instrs_for_dce_after_fixups() {
; CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 1
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @user_of_alloca(ptr [[ADD_PTR]])
; CHECK-NEXT: [[LD:%.*]] = load i64, ptr [[C]], align 4
-; CHECK-NEXT: ret i64 [[LD]]
+; CHECK-NEXT: ret i64 0
;
entry:
%c = alloca i64, align 2
@@ -867,7 +867,7 @@ define i8 @transform_load_and_store() {
; CHECK-NEXT: store i8 0, ptr [[A]], align 1
; CHECK-NEXT: call void @byte_user_of_alloca(ptr [[A]])
; CHECK-NEXT: [[R:%.*]] = load i8, ptr [[A]], align 1
-; CHECK-NEXT: ret i8 [[R]]
+; CHECK-NEXT: ret i8 0
;
entry:
%a = alloca i8
diff --git a/llvm/test/Transforms/SROA/readonlynocapture.ll b/llvm/test/Transforms/SROA/readonlynocapture.ll
index 0cc93bccc6e9d9..7679f3d0a4070a 100644
--- a/llvm/test/Transforms/SROA/readonlynocapture.ll
+++ b/llvm/test/Transforms/SROA/readonlynocapture.ll
@@ -9,7 +9,7 @@ define i32 @simple() {
; CHECK-NEXT: store i32 0, ptr [[A]], align 4
; CHECK-NEXT: call void @callee(ptr [[A]])
; CHECK-NEXT: [[L1:%.*]] = load i32, ptr [[A]], align 4
-; CHECK-NEXT: ret i32 [[L1]]
+; CHECK-NEXT: ret i32 0
;
%a = alloca i32
store i32 0, ptr %a
@@ -27,7 +27,7 @@ define i32 @twoalloc() {
; CHECK-NEXT: call void @callee(ptr [[A]])
; CHECK-NEXT: [[L1:%.*]] = load i32, ptr [[A]], align 4
; CHECK-NEXT: [[L2:%.*]] = load i32, ptr [[B]], align 4
-; CHECK-NEXT: ret i32 [[L2]]
+; CHECK-NEXT: ret i32 1
;
%a = alloca {i32, i32}
store i32 0, ptr %a
@@ -85,7 +85,7 @@ define i32 @twocalls() {
; CHECK-NEXT: [[L1:%.*]] = load i32, ptr [[A]], align 4
; CHECK-NEXT: call void @callee(ptr [[A]])
; CHECK-NEXT: [[L2:%.*]] = load i32, ptr [[B]], align 4
-; CHECK-NEXT: ret i32 [[L2]]
+; CHECK-NEXT: ret i32 1
;
%a = alloca {i32, i32}
store i32 0, ptr %a
@@ -107,7 +107,7 @@ define i32 @volatile() {
; CHECK-NEXT: call void @callee(ptr [[A]])
; CHECK-NEXT: [[L1:%.*]] = load volatile i32, ptr [[A]], align 4
; CHECK-NEXT: [[L2:%.*]] = load volatile i32, ptr [[B]], align 4
-; CHECK-NEXT: ret i32 [[L2]]
+; CHECK-NEXT: ret i32 1
;
%a = alloca {i32, i32}
store i32 0, ptr %a
@@ -172,7 +172,7 @@ define i32 @multiuse() {
; CHECK-NEXT: call void @callee_multiuse(ptr [[A]], ptr [[A]])
; CHECK-NEXT: [[L1:%.*]] = load i32, ptr [[A]], align 4
; CHECK-NEXT: [[L2:%.*]] = load i32, ptr [[B]], align 4
-; CHECK-NEXT: ret i32 [[L2]]
+; CHECK-NEXT: ret i32 1
;
%a = alloca {i32, i32}
store i32 0, ptr %a
More information about the llvm-commits
mailing list