[llvm] [Inliner] Add support for preserving `nocapture` param attr (PR #113418)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Oct 22 21:46:04 PDT 2024
https://github.com/goldsteinn created https://github.com/llvm/llvm-project/pull/113418
Currently if we have:
```
define @foo(ptr nocapture %p) {
entry:
...
bar(ptr %p)
...
}
```
When inlining `foo`, we will lose the `nocapture` on `%p` which might
not be recoverable.
The goal of this patch is to preserve the `nocapture` if some
conservative analysis indicates we can.
1) Return value of `bar` is either unused or only used as return of
`foo` (this rules of capture via return).
2) No `alloca` (or scratch memory of any sort) in `foo` s.t there is a
path from `entry` to `bar` that goes an `alloca`. This helps rule
out `bar` capturing `%p` in memory in a way that wouldn't be
capturing outside of the scope of `foo`.
3) No paths in `foo` that go through `bar` have any instructions with
side-effects other than `bar`. This rules out `bar` capturing `%p`
in memory, but then some later instructions clearing the memory
capture s.t `nocapture` in `foo` still holds. It also rules out
some function (i.e `malloc`) creating scratch memory that `bar`
could capture `%p` in but still only visible in the scope of `foo`.
Ultimately these three checks are highly conservative, but should
allow some preservation.
>From 3038f5588e4420355b5a25ef27907afad178b9d2 Mon Sep 17 00:00:00 2001
From: Noah Goldstein <goldstein.w.n at gmail.com>
Date: Tue, 22 Oct 2024 16:32:15 -0500
Subject: [PATCH 1/2] [Inliner] Add tests for preserving `nocapture` when
inlining; NFC
---
llvm/test/Transforms/Inline/prop-nocapture.ll | 327 ++++++++++++++++++
1 file changed, 327 insertions(+)
create mode 100644 llvm/test/Transforms/Inline/prop-nocapture.ll
diff --git a/llvm/test/Transforms/Inline/prop-nocapture.ll b/llvm/test/Transforms/Inline/prop-nocapture.ll
new file mode 100644
index 00000000000000..aec45ae3e1f6f7
--- /dev/null
+++ b/llvm/test/Transforms/Inline/prop-nocapture.ll
@@ -0,0 +1,327 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature
+; RUN: opt -passes=inline -S < %s | FileCheck --check-prefixes=CHECK,NO_ASSUME %s
+; RUN: opt -passes=inline -S --enable-knowledge-retention < %s | FileCheck %s --check-prefixes=CHECK,USE_ASSUME
+
+declare void @void.call.p0(ptr)
+declare void @void.call.p0.p1(ptr, ptr)
+declare i32 @ret.call.p0(ptr)
+declare ptr @retp.call.p0(ptr)
+
+define void @simple_nocapture_prop(ptr nocapture %p) {
+; CHECK-LABEL: define {{[^@]+}}@simple_nocapture_prop
+; CHECK-SAME: (ptr nocapture [[P:%.*]]) {
+; CHECK-NEXT: call void @void.call.p0(ptr [[P]])
+; CHECK-NEXT: ret void
+;
+ call void @void.call.p0(ptr %p)
+ ret void
+}
+
+define void @simple_nocapture_prop_caller(ptr %p) {
+; CHECK-LABEL: define {{[^@]+}}@simple_nocapture_prop_caller
+; CHECK-SAME: (ptr [[P:%.*]]) {
+; CHECK-NEXT: call void @void.call.p0(ptr [[P]])
+; CHECK-NEXT: ret void
+;
+ call void @simple_nocapture_prop(ptr %p)
+ ret void
+}
+
+define i32 @nocapture_with_return_prop(ptr nocapture %p) {
+; CHECK-LABEL: define {{[^@]+}}@nocapture_with_return_prop
+; CHECK-SAME: (ptr nocapture [[P:%.*]]) {
+; CHECK-NEXT: [[R:%.*]] = call i32 @ret.call.p0(ptr [[P]])
+; CHECK-NEXT: ret i32 [[R]]
+;
+ %r = call i32 @ret.call.p0(ptr %p)
+ ret i32 %r
+}
+
+define i32 @nocapture_with_return_prop_caller(ptr %p) {
+; CHECK-LABEL: define {{[^@]+}}@nocapture_with_return_prop_caller
+; CHECK-SAME: (ptr [[P:%.*]]) {
+; CHECK-NEXT: [[R_I:%.*]] = call i32 @ret.call.p0(ptr [[P]])
+; CHECK-NEXT: ret i32 [[R_I]]
+;
+ %r = call i32 @nocapture_with_return_prop(ptr %p)
+ ret i32 %r
+}
+
+define i32 @nocapture_with_return_prop_todo_indirect(ptr nocapture %p) {
+; CHECK-LABEL: define {{[^@]+}}@nocapture_with_return_prop_todo_indirect
+; CHECK-SAME: (ptr nocapture [[P:%.*]]) {
+; CHECK-NEXT: [[R:%.*]] = call i32 @ret.call.p0(ptr [[P]])
+; CHECK-NEXT: [[RR:%.*]] = xor i32 [[R]], -1
+; CHECK-NEXT: ret i32 [[RR]]
+;
+ %r = call i32 @ret.call.p0(ptr %p)
+ %rr = xor i32 %r, -1
+ ret i32 %rr
+}
+
+define i32 @nocapture_with_return_prop_todo_indirect_caller(ptr %p) {
+; CHECK-LABEL: define {{[^@]+}}@nocapture_with_return_prop_todo_indirect_caller
+; CHECK-SAME: (ptr [[P:%.*]]) {
+; CHECK-NEXT: [[R_I:%.*]] = call i32 @ret.call.p0(ptr [[P]])
+; CHECK-NEXT: [[RR_I:%.*]] = xor i32 [[R_I]], -1
+; CHECK-NEXT: ret i32 [[RR_I]]
+;
+ %r = call i32 @nocapture_with_return_prop_todo_indirect(ptr %p)
+ ret i32 %r
+}
+
+define i32 @nocapture_with_return_prop_fail_maybe_captures(ptr nocapture %p) {
+; CHECK-LABEL: define {{[^@]+}}@nocapture_with_return_prop_fail_maybe_captures
+; CHECK-SAME: (ptr nocapture [[P:%.*]]) {
+; CHECK-NEXT: [[R:%.*]] = call ptr @void.call.p0(ptr [[P]])
+; CHECK-NEXT: [[RR:%.*]] = load i32, ptr [[R]], align 4
+; CHECK-NEXT: ret i32 [[RR]]
+;
+ %r = call ptr @void.call.p0(ptr %p)
+ %rr = load i32, ptr %r
+ ret i32 %rr
+}
+
+define i32 @nocapture_with_return_prop_fail_maybe_captures_caller(ptr %p) {
+; CHECK-LABEL: define {{[^@]+}}@nocapture_with_return_prop_fail_maybe_captures_caller
+; CHECK-SAME: (ptr [[P:%.*]]) {
+; CHECK-NEXT: [[R_I:%.*]] = call ptr @void.call.p0(ptr [[P]])
+; CHECK-NEXT: [[RR_I:%.*]] = load i32, ptr [[R_I]], align 4
+; CHECK-NEXT: ret i32 [[RR_I]]
+;
+ %r = call i32 @nocapture_with_return_prop_fail_maybe_captures(ptr %p)
+ ret i32 %r
+}
+
+define void @nocapture_prop_fail_preceding_alloca(ptr nocapture %p) {
+; CHECK-LABEL: define {{[^@]+}}@nocapture_prop_fail_preceding_alloca
+; CHECK-SAME: (ptr nocapture [[P:%.*]]) {
+; CHECK-NEXT: [[P2:%.*]] = alloca i32, align 4
+; CHECK-NEXT: call void @void.call.p0.p1(ptr [[P]], ptr [[P2]])
+; CHECK-NEXT: ret void
+;
+ %p2 = alloca i32
+ call void @void.call.p0.p1(ptr %p, ptr %p2)
+ ret void
+}
+
+define void @nocapture_prop_fail_preceding_alloca_caller(ptr %p) {
+; CHECK-LABEL: define {{[^@]+}}@nocapture_prop_fail_preceding_alloca_caller
+; CHECK-SAME: (ptr [[P:%.*]]) {
+; CHECK-NEXT: [[P2:%.*]] = alloca i32, align 4
+; CHECK-NEXT: call void @nocapture_prop_fail_preceding_alloca(ptr [[P]], ptr [[P2]])
+; CHECK-NEXT: ret void
+;
+ %p2 = alloca i32
+ call void @nocapture_prop_fail_preceding_alloca(ptr %p, ptr %p2)
+ ret void
+}
+
+define void @nocapture_prop_fail_preceding_alloca2(ptr nocapture %p, i1 %c) {
+; CHECK-LABEL: define {{[^@]+}}@nocapture_prop_fail_preceding_alloca2
+; CHECK-SAME: (ptr nocapture [[P:%.*]], i1 [[C:%.*]]) {
+; CHECK-NEXT: [[P2:%.*]] = alloca i32, align 4
+; CHECK-NEXT: br i1 [[C]], label [[T:%.*]], label [[F:%.*]]
+; CHECK: T:
+; CHECK-NEXT: call void @void.call.p0(ptr [[P2]])
+; CHECK-NEXT: ret void
+; CHECK: F:
+; CHECK-NEXT: call void @void.call.p0.p1(ptr [[P]], ptr [[P2]])
+; CHECK-NEXT: ret void
+;
+ %p2 = alloca i32
+ br i1 %c, label %T, label %F
+T:
+ call void @void.call.p0(ptr %p2)
+ ret void
+F:
+ call void @void.call.p0.p1(ptr %p, ptr %p2)
+ ret void
+}
+
+define void @nocapture_prop_fail_preceding_alloca2_caller(ptr %p, i1 %c) {
+; CHECK-LABEL: define {{[^@]+}}@nocapture_prop_fail_preceding_alloca2_caller
+; CHECK-SAME: (ptr [[P:%.*]], i1 [[C:%.*]]) {
+; CHECK-NEXT: [[P2_I:%.*]] = alloca i32, align 4
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[P2_I]])
+; CHECK-NEXT: br i1 [[C]], label [[T_I:%.*]], label [[F_I:%.*]]
+; CHECK: T.i:
+; CHECK-NEXT: call void @void.call.p0(ptr [[P2_I]])
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr [[P2_I]])
+; CHECK-NEXT: br label [[NOCAPTURE_PROP_FAIL_PRECEDING_ALLOCA2_EXIT:%.*]]
+; CHECK: F.i:
+; CHECK-NEXT: call void @void.call.p0.p1(ptr [[P]], ptr [[P2_I]])
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr [[P2_I]])
+; CHECK-NEXT: br label [[NOCAPTURE_PROP_FAIL_PRECEDING_ALLOCA2_EXIT]]
+; CHECK: nocapture_prop_fail_preceding_alloca2.exit:
+; CHECK-NEXT: ret void
+;
+ call void @nocapture_prop_fail_preceding_alloca2(ptr %p, i1 %c)
+ ret void
+}
+
+define void @nocapture_prop_okay_seperate_alloca(ptr nocapture %p, i1 %c) alwaysinline {
+; CHECK-LABEL: define {{[^@]+}}@nocapture_prop_okay_seperate_alloca
+; CHECK-SAME: (ptr nocapture [[P:%.*]], i1 [[C:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: br i1 [[C]], label [[T:%.*]], label [[F:%.*]]
+; CHECK: T:
+; CHECK-NEXT: [[P2:%.*]] = alloca i32, align 4
+; CHECK-NEXT: call void @void.call.p0(ptr [[P2]])
+; CHECK-NEXT: ret void
+; CHECK: F:
+; CHECK-NEXT: call void @void.call.p0(ptr [[P]])
+; CHECK-NEXT: ret void
+;
+ br i1 %c, label %T, label %F
+T:
+ %p2 = alloca i32
+ call void @void.call.p0(ptr %p2)
+ ret void
+F:
+ call void @void.call.p0(ptr %p)
+ ret void
+}
+
+define void @nocapture_prop_okay_seperate_alloca_caller(ptr %p, i1 %c) {
+; CHECK-LABEL: define {{[^@]+}}@nocapture_prop_okay_seperate_alloca_caller
+; CHECK-SAME: (ptr [[P:%.*]], i1 [[C:%.*]]) {
+; CHECK-NEXT: [[SAVEDSTACK:%.*]] = call ptr @llvm.stacksave.p0()
+; CHECK-NEXT: br i1 [[C]], label [[T_I:%.*]], label [[F_I:%.*]]
+; CHECK: T.i:
+; CHECK-NEXT: [[P2_I:%.*]] = alloca i32, align 4
+; CHECK-NEXT: call void @void.call.p0(ptr [[P2_I]])
+; CHECK-NEXT: call void @llvm.stackrestore.p0(ptr [[SAVEDSTACK]])
+; CHECK-NEXT: br label [[NOCAPTURE_PROP_OKAY_SEPERATE_ALLOCA_EXIT:%.*]]
+; CHECK: F.i:
+; CHECK-NEXT: call void @void.call.p0(ptr [[P]])
+; CHECK-NEXT: call void @llvm.stackrestore.p0(ptr [[SAVEDSTACK]])
+; CHECK-NEXT: br label [[NOCAPTURE_PROP_OKAY_SEPERATE_ALLOCA_EXIT]]
+; CHECK: nocapture_prop_okay_seperate_alloca.exit:
+; CHECK-NEXT: ret void
+;
+ call void @nocapture_prop_okay_seperate_alloca(ptr %p, i1 %c)
+ ret void
+}
+
+define void @nocapture_prop_fail_ensuing_side_effects(ptr nocapture %p) {
+; CHECK-LABEL: define {{[^@]+}}@nocapture_prop_fail_ensuing_side_effects
+; CHECK-SAME: (ptr nocapture [[P:%.*]]) {
+; CHECK-NEXT: call void @void.call.p0(ptr [[P]])
+; CHECK-NEXT: call void @void.call.p0(ptr [[P]])
+; CHECK-NEXT: ret void
+;
+ call void @void.call.p0(ptr %p)
+ call void @void.call.p0(ptr %p)
+ ret void
+}
+
+define void @nocapture_prop_fail_ensuing_side_effects_caller(ptr %p) {
+; CHECK-LABEL: define {{[^@]+}}@nocapture_prop_fail_ensuing_side_effects_caller
+; CHECK-SAME: (ptr [[P:%.*]]) {
+; CHECK-NEXT: call void @void.call.p0(ptr [[P]])
+; CHECK-NEXT: call void @void.call.p0(ptr [[P]])
+; CHECK-NEXT: ret void
+;
+ call void @nocapture_prop_fail_ensuing_side_effects(ptr %p)
+ ret void
+}
+
+define void @nocapture_prop_fail_ensuing_side_effects2(ptr nocapture %p, i1 %c) {
+; CHECK-LABEL: define {{[^@]+}}@nocapture_prop_fail_ensuing_side_effects2
+; CHECK-SAME: (ptr nocapture [[P:%.*]], i1 [[C:%.*]]) {
+; CHECK-NEXT: call void @void.call.p0(ptr [[P]])
+; CHECK-NEXT: br i1 [[C]], label [[T:%.*]], label [[F:%.*]]
+; CHECK: T:
+; CHECK-NEXT: call void @void.call.p0(ptr [[P]])
+; CHECK-NEXT: ret void
+; CHECK: F:
+; CHECK-NEXT: ret void
+;
+ call void @void.call.p0(ptr %p)
+ br i1 %c, label %T, label %F
+T:
+ call void @void.call.p0(ptr %p)
+ ret void
+F:
+ ret void
+}
+
+define void @nocapture_prop_fail_ensuing_side_effects2_caller(ptr %p, i1 %c) {
+; CHECK-LABEL: define {{[^@]+}}@nocapture_prop_fail_ensuing_side_effects2_caller
+; CHECK-SAME: (ptr [[P:%.*]], i1 [[C:%.*]]) {
+; CHECK-NEXT: call void @void.call.p0(ptr [[P]])
+; CHECK-NEXT: br i1 [[C]], label [[T_I:%.*]], label [[F_I:%.*]]
+; CHECK: T.i:
+; CHECK-NEXT: call void @void.call.p0(ptr [[P]])
+; CHECK-NEXT: br label [[NOCAPTURE_PROP_FAIL_ENSUING_SIDE_EFFECTS2_EXIT:%.*]]
+; CHECK: F.i:
+; CHECK-NEXT: br label [[NOCAPTURE_PROP_FAIL_ENSUING_SIDE_EFFECTS2_EXIT]]
+; CHECK: nocapture_prop_fail_ensuing_side_effects2.exit:
+; CHECK-NEXT: ret void
+;
+ call void @nocapture_prop_fail_ensuing_side_effects2(ptr %p, i1 %c)
+ ret void
+}
+
+define i32 @nocapture_prop_okay_no_sideeffects(ptr nocapture %p, i1 %c) {
+; CHECK-LABEL: define {{[^@]+}}@nocapture_prop_okay_no_sideeffects
+; CHECK-SAME: (ptr nocapture [[P:%.*]], i1 [[C:%.*]]) {
+; CHECK-NEXT: call void @void.call.p0(ptr [[P]])
+; CHECK-NEXT: br i1 [[C]], label [[T:%.*]], label [[F:%.*]]
+; CHECK: T:
+; CHECK-NEXT: [[R:%.*]] = call i32 @ret.call.p0(ptr [[P]]) #[[ATTR3:[0-9]+]]
+; CHECK-NEXT: ret i32 [[R]]
+; CHECK: F:
+; CHECK-NEXT: ret i32 0
+;
+ call void @void.call.p0(ptr %p)
+ br i1 %c, label %T, label %F
+T:
+ %r = call i32 @ret.call.p0(ptr %p) nounwind readonly willreturn
+ ret i32 %r
+F:
+ ret i32 0
+}
+
+define i32 @nocapture_prop_okay_no_sideeffects_caller(ptr %p, i1 %c) {
+; CHECK-LABEL: define {{[^@]+}}@nocapture_prop_okay_no_sideeffects_caller
+; CHECK-SAME: (ptr [[P:%.*]], i1 [[C:%.*]]) {
+; CHECK-NEXT: call void @void.call.p0(ptr [[P]])
+; CHECK-NEXT: br i1 [[C]], label [[T_I:%.*]], label [[F_I:%.*]]
+; CHECK: T.i:
+; CHECK-NEXT: [[R_I:%.*]] = call i32 @ret.call.p0(ptr [[P]]) #[[ATTR3]]
+; CHECK-NEXT: br label [[NOCAPTURE_PROP_OKAY_NO_SIDEEFFECTS_EXIT:%.*]]
+; CHECK: F.i:
+; CHECK-NEXT: br label [[NOCAPTURE_PROP_OKAY_NO_SIDEEFFECTS_EXIT]]
+; CHECK: nocapture_prop_okay_no_sideeffects.exit:
+; CHECK-NEXT: [[R1:%.*]] = phi i32 [ [[R_I]], [[T_I]] ], [ 0, [[F_I]] ]
+; CHECK-NEXT: ret i32 [[R1]]
+;
+ %r = call i32 @nocapture_prop_okay_no_sideeffects(ptr %p, i1 %c)
+ ret i32 %r
+}
+
+define i32 @nocapture_prop_okay_no_sideeffects2(ptr nocapture %p) {
+; CHECK-LABEL: define {{[^@]+}}@nocapture_prop_okay_no_sideeffects2
+; CHECK-SAME: (ptr nocapture [[P:%.*]]) {
+; CHECK-NEXT: call void @void.call.p0(ptr [[P]])
+; CHECK-NEXT: [[R:%.*]] = call i32 @ret.call.p0(ptr [[P]]) #[[ATTR3]]
+; CHECK-NEXT: ret i32 [[R]]
+;
+ call void @void.call.p0(ptr %p)
+ %r = call i32 @ret.call.p0(ptr %p) nounwind readonly willreturn
+ ret i32 %r
+}
+
+define i32 @nocapture_prop_okay_no_sideeffects2_caller(ptr %p, i1 %c) {
+; CHECK-LABEL: define {{[^@]+}}@nocapture_prop_okay_no_sideeffects2_caller
+; CHECK-SAME: (ptr [[P:%.*]], i1 [[C:%.*]]) {
+; CHECK-NEXT: [[R:%.*]] = call i32 @nocapture_prop_okay_no_sideeffects2(ptr [[P]], i1 [[C]])
+; CHECK-NEXT: ret i32 [[R]]
+;
+ %r = call i32 @nocapture_prop_okay_no_sideeffects2(ptr %p, i1 %c)
+ ret i32 %r
+}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; NO_ASSUME: {{.*}}
+; USE_ASSUME: {{.*}}
>From c349e96247395288ae00b69d56e1c6eb90abe8a7 Mon Sep 17 00:00:00 2001
From: Noah Goldstein <goldstein.w.n at gmail.com>
Date: Tue, 22 Oct 2024 16:32:17 -0500
Subject: [PATCH 2/2] [Inliner] Add support for preserving `nocapture` param
attr
Currently if we have:
```
define @foo(ptr nocapture %p) {
entry:
...
bar(ptr %p)
...
}
```
When inlining `foo`, we will lose the `nocapture` on `%p` which might
not be recoverable.
The goal of this patch is to preserve the `nocapture` if some
conservative analysis indicates we can.
1) Return value of `bar` is either unused or only used as return of
`foo` (this rules of capture via return).
2) No `alloca` (or scratch memory of any sort) in `foo` s.t there is a
path from `entry` to `bar` that goes an `alloca`. This helps rule
out `bar` capturing `%p` in memory in a way that wouldn't be
capturing outside of the scope of `foo`.
3) No paths in `foo` that go through `bar` have any instructions with
side-effects other than `bar`. This rules out `bar` capturing `%p`
in memory, but then some later instructions clearing the memory
capture s.t `nocapture` in `foo` still holds. It also rules out
some function (i.e `malloc`) creating scratch memory that `bar`
could capture `%p` in but still only visible in the scope of `foo`.
Ultimately these three checks are highly conservative, but should
allow some preservation.
---
llvm/lib/Transforms/Utils/InlineFunction.cpp | 113 +++++++++++++++++-
llvm/test/Transforms/Inline/prop-nocapture.ll | 10 +-
2 files changed, 117 insertions(+), 6 deletions(-)
diff --git a/llvm/lib/Transforms/Utils/InlineFunction.cpp b/llvm/lib/Transforms/Utils/InlineFunction.cpp
index 4ad426285ce2f0..a6ff3c085b8fbb 100644
--- a/llvm/lib/Transforms/Utils/InlineFunction.cpp
+++ b/llvm/lib/Transforms/Utils/InlineFunction.cpp
@@ -1351,6 +1351,104 @@ static bool MayContainThrowingOrExitingCallAfterCB(CallBase *Begin,
++BeginIt, End->getIterator(), InlinerAttributeWindow + 1);
}
+template <typename RangeT> static bool ContainsSideEffects(RangeT Range) {
+ // Any instruction that may clear local scratch space CB stored
+ // into.
+ return any_of(Range, [](Instruction &I) { return I.mayHaveSideEffects(); });
+}
+
+template <typename RangeT> static bool ContainsScratchSpace(RangeT Range) {
+ return any_of(Range, [](Instruction &I) {
+ // Any instruction that may create local scratch space CB can store
+ // into.
+ return I.mayHaveSideEffects() || isa<AllocaInst>(&I);
+ });
+}
+
+template <typename NextFn, typename CheckFn>
+static bool CheckPathFromBBRecurse(DenseMap<BasicBlock *, bool> &CachedRes,
+ bool First, BasicBlock *BB, NextFn Next,
+ CheckFn Check) {
+ if (!First) {
+ // Initialize to true (okay to propagate) `nocapture`. This means that loops
+ // will be okay.
+ auto [Iter, Inserted] = CachedRes.try_emplace(BB, true);
+ // If we already have a result, return it.
+ if (!Inserted)
+ return Iter->second;
+
+ if (!Check(BB->instructionsWithoutDebug())) {
+ Iter->second = false;
+ return false;
+ }
+ }
+ auto NextBBs = Next(BB);
+ // Check all Succs/Preds
+ for (BasicBlock *NextBB : NextBBs) {
+ if (!CheckPathFromBBRecurse(CachedRes, /*First=*/false, NextBB, Next,
+ Check)) {
+ CachedRes[BB] = false;
+ return false;
+ }
+ }
+
+ return true;
+}
+
+// Assuming we have:
+// define @foo(ptr nocapture %p) {
+// entry:
+// ...
+// bar (ptr %p)
+// ...
+// }
+//
+// Determine if we can propagate `nocapture` to the `%p` at the
+// `bar`.
+static bool
+CanPropagateNoCaptureAtCB(DenseMap<BasicBlock *, bool> &PureFromBB,
+ DenseMap<BasicBlock *, bool> &NoLocalStateToBB,
+ BasicBlock *BB, CallBase *CB) {
+ // If CB returns and its used by anything other than `ret`, assume it may be
+ // capturing.
+ // Potential TODO: We could allow many operations.
+ if (!CB->getType()->isVoidTy())
+ for (auto Use : CB->users())
+ if (!isa<ReturnInst>(Use))
+ return false;
+
+ // Can't capture via return, so if no side-effects we are set.
+ if (!CB->mayHaveSideEffects())
+ return true;
+
+ auto It = CB->getIterator();
+ ++It;
+
+ // Check that CB instruction with side-effects on all paths from
+ // `entry` that go through the CB and there are no `alloca`
+ // instructions. This accomplishes two things. 1) It ensures that
+ // after CB, there is no way a store/other could "clean up" any
+ // captures from CB. 2) There is no local state (i.e `alloca` or a
+ // local `malloc`) that could CB could have stored in params in.
+ if (ContainsSideEffects(make_range(It, BB->end())) ||
+ ContainsScratchSpace(make_range(BB->begin(), CB->getIterator())))
+ return false;
+
+ if (!CheckPathFromBBRecurse(
+ PureFromBB, /*First=*/true, BB,
+ [](BasicBlock *CheckedBB) { return successors(CheckedBB); },
+ [](const auto &Region) { return !ContainsSideEffects(Region); }))
+ return false;
+
+ if (!CheckPathFromBBRecurse(
+ PureFromBB, /*First=*/true, BB,
+ [](BasicBlock *CheckedBB) { return predecessors(CheckedBB); },
+ [](const auto &Region) { return !ContainsScratchSpace(Region); }))
+ return false;
+
+ return true;
+}
+
// Add attributes from CB params and Fn attributes that can always be propagated
// to the corresponding argument / inner callbases.
static void AddParamAndFnBasicAttributes(const CallBase &CB,
@@ -1363,6 +1461,9 @@ static void AddParamAndFnBasicAttributes(const CallBase &CB,
SmallVector<AttrBuilder> ValidObjParamAttrs, ValidExactParamAttrs;
bool HasAttrToPropagate = false;
+ DenseMap<BasicBlock *, bool> PureFromBB{};
+ DenseMap<BasicBlock *, bool> NoLocalStateToBB{};
+
// Attributes we can only propagate if the exact parameter is forwarded.
// We can propagate both poison generating and UB generating attributes
// without any extra checks. The only attribute that is tricky to propagate
@@ -1381,6 +1482,8 @@ static void AddParamAndFnBasicAttributes(const CallBase &CB,
ValidObjParamAttrs.back().addAttribute(Attribute::ReadNone);
if (CB.paramHasAttr(I, Attribute::ReadOnly))
ValidObjParamAttrs.back().addAttribute(Attribute::ReadOnly);
+ if (CB.paramHasAttr(I, Attribute::NoCapture))
+ ValidObjParamAttrs.back().addAttribute(Attribute::NoCapture);
for (Attribute::AttrKind AK : ExactAttrsToPropagate) {
Attribute Attr = CB.getParamAttr(I, AK);
@@ -1463,8 +1566,16 @@ static void AddParamAndFnBasicAttributes(const CallBase &CB,
ArgNo = Arg->getArgNo();
}
+ AttributeSet AS = AttributeSet::get(Context, ValidObjParamAttrs[ArgNo]);
+ // Check if we can propagate `nocapture`.
+ if (AS.hasAttribute(Attribute::NoCapture) &&
+ (NewInnerCB->paramHasAttr(I, Attribute::NoCapture) ||
+ !CanPropagateNoCaptureAtCB(PureFromBB, NoLocalStateToBB, &BB,
+ cast<CallBase>(&Ins))))
+ AS = AS.removeAttribute(Context, Attribute::NoCapture);
+
// If so, propagate its access attributes.
- AL = AL.addParamAttributes(Context, I, ValidObjParamAttrs[ArgNo]);
+ AL = AL.addParamAttributes(Context, I, AttrBuilder{Context, AS});
// We can have conflicting attributes from the inner callsite and
// to-be-inlined callsite. In that case, choose the most
// restrictive.
diff --git a/llvm/test/Transforms/Inline/prop-nocapture.ll b/llvm/test/Transforms/Inline/prop-nocapture.ll
index aec45ae3e1f6f7..a9db946db4f28d 100644
--- a/llvm/test/Transforms/Inline/prop-nocapture.ll
+++ b/llvm/test/Transforms/Inline/prop-nocapture.ll
@@ -20,7 +20,7 @@ define void @simple_nocapture_prop(ptr nocapture %p) {
define void @simple_nocapture_prop_caller(ptr %p) {
; CHECK-LABEL: define {{[^@]+}}@simple_nocapture_prop_caller
; CHECK-SAME: (ptr [[P:%.*]]) {
-; CHECK-NEXT: call void @void.call.p0(ptr [[P]])
+; CHECK-NEXT: call void @void.call.p0(ptr nocapture [[P]])
; CHECK-NEXT: ret void
;
call void @simple_nocapture_prop(ptr %p)
@@ -40,7 +40,7 @@ define i32 @nocapture_with_return_prop(ptr nocapture %p) {
define i32 @nocapture_with_return_prop_caller(ptr %p) {
; CHECK-LABEL: define {{[^@]+}}@nocapture_with_return_prop_caller
; CHECK-SAME: (ptr [[P:%.*]]) {
-; CHECK-NEXT: [[R_I:%.*]] = call i32 @ret.call.p0(ptr [[P]])
+; CHECK-NEXT: [[R_I:%.*]] = call i32 @ret.call.p0(ptr nocapture [[P]])
; CHECK-NEXT: ret i32 [[R_I]]
;
%r = call i32 @nocapture_with_return_prop(ptr %p)
@@ -193,7 +193,7 @@ define void @nocapture_prop_okay_seperate_alloca_caller(ptr %p, i1 %c) {
; CHECK-NEXT: call void @llvm.stackrestore.p0(ptr [[SAVEDSTACK]])
; CHECK-NEXT: br label [[NOCAPTURE_PROP_OKAY_SEPERATE_ALLOCA_EXIT:%.*]]
; CHECK: F.i:
-; CHECK-NEXT: call void @void.call.p0(ptr [[P]])
+; CHECK-NEXT: call void @void.call.p0(ptr nocapture [[P]])
; CHECK-NEXT: call void @llvm.stackrestore.p0(ptr [[SAVEDSTACK]])
; CHECK-NEXT: br label [[NOCAPTURE_PROP_OKAY_SEPERATE_ALLOCA_EXIT]]
; CHECK: nocapture_prop_okay_seperate_alloca.exit:
@@ -286,10 +286,10 @@ F:
define i32 @nocapture_prop_okay_no_sideeffects_caller(ptr %p, i1 %c) {
; CHECK-LABEL: define {{[^@]+}}@nocapture_prop_okay_no_sideeffects_caller
; CHECK-SAME: (ptr [[P:%.*]], i1 [[C:%.*]]) {
-; CHECK-NEXT: call void @void.call.p0(ptr [[P]])
+; CHECK-NEXT: call void @void.call.p0(ptr nocapture [[P]])
; CHECK-NEXT: br i1 [[C]], label [[T_I:%.*]], label [[F_I:%.*]]
; CHECK: T.i:
-; CHECK-NEXT: [[R_I:%.*]] = call i32 @ret.call.p0(ptr [[P]]) #[[ATTR3]]
+; CHECK-NEXT: [[R_I:%.*]] = call i32 @ret.call.p0(ptr nocapture [[P]]) #[[ATTR3]]
; CHECK-NEXT: br label [[NOCAPTURE_PROP_OKAY_NO_SIDEEFFECTS_EXIT:%.*]]
; CHECK: F.i:
; CHECK-NEXT: br label [[NOCAPTURE_PROP_OKAY_NO_SIDEEFFECTS_EXIT]]
More information about the llvm-commits
mailing list