[llvm] [Inliner] Add support for preserving `nocapture` param attr (PR #113418)

via llvm-commits llvm-commits at lists.llvm.org
Tue Oct 22 21:46:04 PDT 2024


https://github.com/goldsteinn created https://github.com/llvm/llvm-project/pull/113418

Currently if we have:
```
define @foo(ptr nocapture %p) {
entry:
    ...
    bar(ptr %p)
    ...
}
```

When inlining `foo`, we will lose the `nocapture` on `%p` which might
not be recoverable.

The goal of this patch is to preserve the `nocapture` if some
conservative analysis indicates we can.

1) Return value of `bar` is either unused or only used as return of
   `foo` (this rules of capture via return).

2) No `alloca` (or scratch memory of any sort) in `foo` s.t there is a
   path from `entry` to `bar` that goes an `alloca`. This helps rule
   out `bar` capturing `%p` in memory in a way that wouldn't be
   capturing outside of the scope of `foo`.

3) No paths in `foo` that go through `bar` have any instructions with
   side-effects other than `bar`. This rules out `bar` capturing `%p`
   in memory, but then some later instructions clearing the memory
   capture s.t `nocapture` in `foo` still holds. It also rules out
   some function (i.e `malloc`) creating scratch memory that `bar`
   could capture `%p` in but still only visible in the scope of `foo`.

Ultimately these three checks are highly conservative, but should
allow some preservation.


>From 3038f5588e4420355b5a25ef27907afad178b9d2 Mon Sep 17 00:00:00 2001
From: Noah Goldstein <goldstein.w.n at gmail.com>
Date: Tue, 22 Oct 2024 16:32:15 -0500
Subject: [PATCH 1/2] [Inliner] Add tests for preserving `nocapture` when
 inlining; NFC

---
 llvm/test/Transforms/Inline/prop-nocapture.ll | 327 ++++++++++++++++++
 1 file changed, 327 insertions(+)
 create mode 100644 llvm/test/Transforms/Inline/prop-nocapture.ll

diff --git a/llvm/test/Transforms/Inline/prop-nocapture.ll b/llvm/test/Transforms/Inline/prop-nocapture.ll
new file mode 100644
index 00000000000000..aec45ae3e1f6f7
--- /dev/null
+++ b/llvm/test/Transforms/Inline/prop-nocapture.ll
@@ -0,0 +1,327 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature
+; RUN: opt -passes=inline -S < %s | FileCheck --check-prefixes=CHECK,NO_ASSUME %s
+; RUN: opt -passes=inline -S --enable-knowledge-retention < %s | FileCheck %s --check-prefixes=CHECK,USE_ASSUME
+
+declare void @void.call.p0(ptr)
+declare void @void.call.p0.p1(ptr, ptr)
+declare i32 @ret.call.p0(ptr)
+declare ptr @retp.call.p0(ptr)
+
+define void @simple_nocapture_prop(ptr nocapture %p) {
+; CHECK-LABEL: define {{[^@]+}}@simple_nocapture_prop
+; CHECK-SAME: (ptr nocapture [[P:%.*]]) {
+; CHECK-NEXT:    call void @void.call.p0(ptr [[P]])
+; CHECK-NEXT:    ret void
+;
+  call void @void.call.p0(ptr %p)
+  ret void
+}
+
+define void @simple_nocapture_prop_caller(ptr %p) {
+; CHECK-LABEL: define {{[^@]+}}@simple_nocapture_prop_caller
+; CHECK-SAME: (ptr [[P:%.*]]) {
+; CHECK-NEXT:    call void @void.call.p0(ptr [[P]])
+; CHECK-NEXT:    ret void
+;
+  call void @simple_nocapture_prop(ptr %p)
+  ret void
+}
+
+define i32 @nocapture_with_return_prop(ptr nocapture %p) {
+; CHECK-LABEL: define {{[^@]+}}@nocapture_with_return_prop
+; CHECK-SAME: (ptr nocapture [[P:%.*]]) {
+; CHECK-NEXT:    [[R:%.*]] = call i32 @ret.call.p0(ptr [[P]])
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  %r = call i32 @ret.call.p0(ptr %p)
+  ret i32 %r
+}
+
+define i32 @nocapture_with_return_prop_caller(ptr %p) {
+; CHECK-LABEL: define {{[^@]+}}@nocapture_with_return_prop_caller
+; CHECK-SAME: (ptr [[P:%.*]]) {
+; CHECK-NEXT:    [[R_I:%.*]] = call i32 @ret.call.p0(ptr [[P]])
+; CHECK-NEXT:    ret i32 [[R_I]]
+;
+  %r = call i32 @nocapture_with_return_prop(ptr %p)
+  ret i32 %r
+}
+
+define i32 @nocapture_with_return_prop_todo_indirect(ptr nocapture %p) {
+; CHECK-LABEL: define {{[^@]+}}@nocapture_with_return_prop_todo_indirect
+; CHECK-SAME: (ptr nocapture [[P:%.*]]) {
+; CHECK-NEXT:    [[R:%.*]] = call i32 @ret.call.p0(ptr [[P]])
+; CHECK-NEXT:    [[RR:%.*]] = xor i32 [[R]], -1
+; CHECK-NEXT:    ret i32 [[RR]]
+;
+  %r = call i32 @ret.call.p0(ptr %p)
+  %rr = xor i32 %r, -1
+  ret i32 %rr
+}
+
+define i32 @nocapture_with_return_prop_todo_indirect_caller(ptr %p) {
+; CHECK-LABEL: define {{[^@]+}}@nocapture_with_return_prop_todo_indirect_caller
+; CHECK-SAME: (ptr [[P:%.*]]) {
+; CHECK-NEXT:    [[R_I:%.*]] = call i32 @ret.call.p0(ptr [[P]])
+; CHECK-NEXT:    [[RR_I:%.*]] = xor i32 [[R_I]], -1
+; CHECK-NEXT:    ret i32 [[RR_I]]
+;
+  %r = call i32 @nocapture_with_return_prop_todo_indirect(ptr %p)
+  ret i32 %r
+}
+
+define i32 @nocapture_with_return_prop_fail_maybe_captures(ptr nocapture %p) {
+; CHECK-LABEL: define {{[^@]+}}@nocapture_with_return_prop_fail_maybe_captures
+; CHECK-SAME: (ptr nocapture [[P:%.*]]) {
+; CHECK-NEXT:    [[R:%.*]] = call ptr @void.call.p0(ptr [[P]])
+; CHECK-NEXT:    [[RR:%.*]] = load i32, ptr [[R]], align 4
+; CHECK-NEXT:    ret i32 [[RR]]
+;
+  %r = call ptr @void.call.p0(ptr %p)
+  %rr = load i32, ptr %r
+  ret i32 %rr
+}
+
+define i32 @nocapture_with_return_prop_fail_maybe_captures_caller(ptr %p) {
+; CHECK-LABEL: define {{[^@]+}}@nocapture_with_return_prop_fail_maybe_captures_caller
+; CHECK-SAME: (ptr [[P:%.*]]) {
+; CHECK-NEXT:    [[R_I:%.*]] = call ptr @void.call.p0(ptr [[P]])
+; CHECK-NEXT:    [[RR_I:%.*]] = load i32, ptr [[R_I]], align 4
+; CHECK-NEXT:    ret i32 [[RR_I]]
+;
+  %r = call i32 @nocapture_with_return_prop_fail_maybe_captures(ptr %p)
+  ret i32 %r
+}
+
+define void @nocapture_prop_fail_preceding_alloca(ptr nocapture %p) {
+; CHECK-LABEL: define {{[^@]+}}@nocapture_prop_fail_preceding_alloca
+; CHECK-SAME: (ptr nocapture [[P:%.*]]) {
+; CHECK-NEXT:    [[P2:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    call void @void.call.p0.p1(ptr [[P]], ptr [[P2]])
+; CHECK-NEXT:    ret void
+;
+  %p2 = alloca i32
+  call void @void.call.p0.p1(ptr %p, ptr %p2)
+  ret void
+}
+
+define void @nocapture_prop_fail_preceding_alloca_caller(ptr %p) {
+; CHECK-LABEL: define {{[^@]+}}@nocapture_prop_fail_preceding_alloca_caller
+; CHECK-SAME: (ptr [[P:%.*]]) {
+; CHECK-NEXT:    [[P2:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    call void @nocapture_prop_fail_preceding_alloca(ptr [[P]], ptr [[P2]])
+; CHECK-NEXT:    ret void
+;
+  %p2 = alloca i32
+  call void @nocapture_prop_fail_preceding_alloca(ptr %p, ptr %p2)
+  ret void
+}
+
+define void @nocapture_prop_fail_preceding_alloca2(ptr nocapture %p, i1 %c) {
+; CHECK-LABEL: define {{[^@]+}}@nocapture_prop_fail_preceding_alloca2
+; CHECK-SAME: (ptr nocapture [[P:%.*]], i1 [[C:%.*]]) {
+; CHECK-NEXT:    [[P2:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    br i1 [[C]], label [[T:%.*]], label [[F:%.*]]
+; CHECK:       T:
+; CHECK-NEXT:    call void @void.call.p0(ptr [[P2]])
+; CHECK-NEXT:    ret void
+; CHECK:       F:
+; CHECK-NEXT:    call void @void.call.p0.p1(ptr [[P]], ptr [[P2]])
+; CHECK-NEXT:    ret void
+;
+  %p2 = alloca i32
+  br i1 %c, label %T, label %F
+T:
+  call void @void.call.p0(ptr %p2)
+  ret void
+F:
+  call void @void.call.p0.p1(ptr %p, ptr %p2)
+  ret void
+}
+
+define void @nocapture_prop_fail_preceding_alloca2_caller(ptr %p, i1 %c) {
+; CHECK-LABEL: define {{[^@]+}}@nocapture_prop_fail_preceding_alloca2_caller
+; CHECK-SAME: (ptr [[P:%.*]], i1 [[C:%.*]]) {
+; CHECK-NEXT:    [[P2_I:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    call void @llvm.lifetime.start.p0(i64 4, ptr [[P2_I]])
+; CHECK-NEXT:    br i1 [[C]], label [[T_I:%.*]], label [[F_I:%.*]]
+; CHECK:       T.i:
+; CHECK-NEXT:    call void @void.call.p0(ptr [[P2_I]])
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 4, ptr [[P2_I]])
+; CHECK-NEXT:    br label [[NOCAPTURE_PROP_FAIL_PRECEDING_ALLOCA2_EXIT:%.*]]
+; CHECK:       F.i:
+; CHECK-NEXT:    call void @void.call.p0.p1(ptr [[P]], ptr [[P2_I]])
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 4, ptr [[P2_I]])
+; CHECK-NEXT:    br label [[NOCAPTURE_PROP_FAIL_PRECEDING_ALLOCA2_EXIT]]
+; CHECK:       nocapture_prop_fail_preceding_alloca2.exit:
+; CHECK-NEXT:    ret void
+;
+  call void @nocapture_prop_fail_preceding_alloca2(ptr %p, i1 %c)
+  ret void
+}
+
+define void @nocapture_prop_okay_seperate_alloca(ptr nocapture %p, i1 %c) alwaysinline {
+; CHECK-LABEL: define {{[^@]+}}@nocapture_prop_okay_seperate_alloca
+; CHECK-SAME: (ptr nocapture [[P:%.*]], i1 [[C:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:    br i1 [[C]], label [[T:%.*]], label [[F:%.*]]
+; CHECK:       T:
+; CHECK-NEXT:    [[P2:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    call void @void.call.p0(ptr [[P2]])
+; CHECK-NEXT:    ret void
+; CHECK:       F:
+; CHECK-NEXT:    call void @void.call.p0(ptr [[P]])
+; CHECK-NEXT:    ret void
+;
+  br i1 %c, label %T, label %F
+T:
+  %p2 = alloca i32
+  call void @void.call.p0(ptr %p2)
+  ret void
+F:
+  call void @void.call.p0(ptr %p)
+  ret void
+}
+
+define void @nocapture_prop_okay_seperate_alloca_caller(ptr %p, i1 %c) {
+; CHECK-LABEL: define {{[^@]+}}@nocapture_prop_okay_seperate_alloca_caller
+; CHECK-SAME: (ptr [[P:%.*]], i1 [[C:%.*]]) {
+; CHECK-NEXT:    [[SAVEDSTACK:%.*]] = call ptr @llvm.stacksave.p0()
+; CHECK-NEXT:    br i1 [[C]], label [[T_I:%.*]], label [[F_I:%.*]]
+; CHECK:       T.i:
+; CHECK-NEXT:    [[P2_I:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    call void @void.call.p0(ptr [[P2_I]])
+; CHECK-NEXT:    call void @llvm.stackrestore.p0(ptr [[SAVEDSTACK]])
+; CHECK-NEXT:    br label [[NOCAPTURE_PROP_OKAY_SEPERATE_ALLOCA_EXIT:%.*]]
+; CHECK:       F.i:
+; CHECK-NEXT:    call void @void.call.p0(ptr [[P]])
+; CHECK-NEXT:    call void @llvm.stackrestore.p0(ptr [[SAVEDSTACK]])
+; CHECK-NEXT:    br label [[NOCAPTURE_PROP_OKAY_SEPERATE_ALLOCA_EXIT]]
+; CHECK:       nocapture_prop_okay_seperate_alloca.exit:
+; CHECK-NEXT:    ret void
+;
+  call void @nocapture_prop_okay_seperate_alloca(ptr %p, i1 %c)
+  ret void
+}
+
+define void @nocapture_prop_fail_ensuing_side_effects(ptr nocapture %p) {
+; CHECK-LABEL: define {{[^@]+}}@nocapture_prop_fail_ensuing_side_effects
+; CHECK-SAME: (ptr nocapture [[P:%.*]]) {
+; CHECK-NEXT:    call void @void.call.p0(ptr [[P]])
+; CHECK-NEXT:    call void @void.call.p0(ptr [[P]])
+; CHECK-NEXT:    ret void
+;
+  call void @void.call.p0(ptr %p)
+  call void @void.call.p0(ptr %p)
+  ret void
+}
+
+define void @nocapture_prop_fail_ensuing_side_effects_caller(ptr %p) {
+; CHECK-LABEL: define {{[^@]+}}@nocapture_prop_fail_ensuing_side_effects_caller
+; CHECK-SAME: (ptr [[P:%.*]]) {
+; CHECK-NEXT:    call void @void.call.p0(ptr [[P]])
+; CHECK-NEXT:    call void @void.call.p0(ptr [[P]])
+; CHECK-NEXT:    ret void
+;
+  call void @nocapture_prop_fail_ensuing_side_effects(ptr %p)
+  ret void
+}
+
+define void @nocapture_prop_fail_ensuing_side_effects2(ptr nocapture %p, i1 %c) {
+; CHECK-LABEL: define {{[^@]+}}@nocapture_prop_fail_ensuing_side_effects2
+; CHECK-SAME: (ptr nocapture [[P:%.*]], i1 [[C:%.*]]) {
+; CHECK-NEXT:    call void @void.call.p0(ptr [[P]])
+; CHECK-NEXT:    br i1 [[C]], label [[T:%.*]], label [[F:%.*]]
+; CHECK:       T:
+; CHECK-NEXT:    call void @void.call.p0(ptr [[P]])
+; CHECK-NEXT:    ret void
+; CHECK:       F:
+; CHECK-NEXT:    ret void
+;
+  call void @void.call.p0(ptr %p)
+  br i1 %c, label %T, label %F
+T:
+  call void @void.call.p0(ptr %p)
+  ret void
+F:
+  ret void
+}
+
+define void @nocapture_prop_fail_ensuing_side_effects2_caller(ptr %p, i1 %c) {
+; CHECK-LABEL: define {{[^@]+}}@nocapture_prop_fail_ensuing_side_effects2_caller
+; CHECK-SAME: (ptr [[P:%.*]], i1 [[C:%.*]]) {
+; CHECK-NEXT:    call void @void.call.p0(ptr [[P]])
+; CHECK-NEXT:    br i1 [[C]], label [[T_I:%.*]], label [[F_I:%.*]]
+; CHECK:       T.i:
+; CHECK-NEXT:    call void @void.call.p0(ptr [[P]])
+; CHECK-NEXT:    br label [[NOCAPTURE_PROP_FAIL_ENSUING_SIDE_EFFECTS2_EXIT:%.*]]
+; CHECK:       F.i:
+; CHECK-NEXT:    br label [[NOCAPTURE_PROP_FAIL_ENSUING_SIDE_EFFECTS2_EXIT]]
+; CHECK:       nocapture_prop_fail_ensuing_side_effects2.exit:
+; CHECK-NEXT:    ret void
+;
+  call void @nocapture_prop_fail_ensuing_side_effects2(ptr %p, i1 %c)
+  ret void
+}
+
+define i32 @nocapture_prop_okay_no_sideeffects(ptr nocapture %p, i1 %c) {
+; CHECK-LABEL: define {{[^@]+}}@nocapture_prop_okay_no_sideeffects
+; CHECK-SAME: (ptr nocapture [[P:%.*]], i1 [[C:%.*]]) {
+; CHECK-NEXT:    call void @void.call.p0(ptr [[P]])
+; CHECK-NEXT:    br i1 [[C]], label [[T:%.*]], label [[F:%.*]]
+; CHECK:       T:
+; CHECK-NEXT:    [[R:%.*]] = call i32 @ret.call.p0(ptr [[P]]) #[[ATTR3:[0-9]+]]
+; CHECK-NEXT:    ret i32 [[R]]
+; CHECK:       F:
+; CHECK-NEXT:    ret i32 0
+;
+  call void @void.call.p0(ptr %p)
+  br i1 %c, label %T, label %F
+T:
+  %r = call i32 @ret.call.p0(ptr %p) nounwind readonly willreturn
+  ret i32 %r
+F:
+  ret i32 0
+}
+
+define i32 @nocapture_prop_okay_no_sideeffects_caller(ptr %p, i1 %c) {
+; CHECK-LABEL: define {{[^@]+}}@nocapture_prop_okay_no_sideeffects_caller
+; CHECK-SAME: (ptr [[P:%.*]], i1 [[C:%.*]]) {
+; CHECK-NEXT:    call void @void.call.p0(ptr [[P]])
+; CHECK-NEXT:    br i1 [[C]], label [[T_I:%.*]], label [[F_I:%.*]]
+; CHECK:       T.i:
+; CHECK-NEXT:    [[R_I:%.*]] = call i32 @ret.call.p0(ptr [[P]]) #[[ATTR3]]
+; CHECK-NEXT:    br label [[NOCAPTURE_PROP_OKAY_NO_SIDEEFFECTS_EXIT:%.*]]
+; CHECK:       F.i:
+; CHECK-NEXT:    br label [[NOCAPTURE_PROP_OKAY_NO_SIDEEFFECTS_EXIT]]
+; CHECK:       nocapture_prop_okay_no_sideeffects.exit:
+; CHECK-NEXT:    [[R1:%.*]] = phi i32 [ [[R_I]], [[T_I]] ], [ 0, [[F_I]] ]
+; CHECK-NEXT:    ret i32 [[R1]]
+;
+  %r = call i32 @nocapture_prop_okay_no_sideeffects(ptr %p, i1 %c)
+  ret i32 %r
+}
+
+define i32 @nocapture_prop_okay_no_sideeffects2(ptr nocapture %p) {
+; CHECK-LABEL: define {{[^@]+}}@nocapture_prop_okay_no_sideeffects2
+; CHECK-SAME: (ptr nocapture [[P:%.*]]) {
+; CHECK-NEXT:    call void @void.call.p0(ptr [[P]])
+; CHECK-NEXT:    [[R:%.*]] = call i32 @ret.call.p0(ptr [[P]]) #[[ATTR3]]
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  call void @void.call.p0(ptr %p)
+  %r = call i32 @ret.call.p0(ptr %p) nounwind readonly willreturn
+  ret i32 %r
+}
+
+define i32 @nocapture_prop_okay_no_sideeffects2_caller(ptr %p, i1 %c) {
+; CHECK-LABEL: define {{[^@]+}}@nocapture_prop_okay_no_sideeffects2_caller
+; CHECK-SAME: (ptr [[P:%.*]], i1 [[C:%.*]]) {
+; CHECK-NEXT:    [[R:%.*]] = call i32 @nocapture_prop_okay_no_sideeffects2(ptr [[P]], i1 [[C]])
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  %r = call i32 @nocapture_prop_okay_no_sideeffects2(ptr %p, i1 %c)
+  ret i32 %r
+}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; NO_ASSUME: {{.*}}
+; USE_ASSUME: {{.*}}

>From c349e96247395288ae00b69d56e1c6eb90abe8a7 Mon Sep 17 00:00:00 2001
From: Noah Goldstein <goldstein.w.n at gmail.com>
Date: Tue, 22 Oct 2024 16:32:17 -0500
Subject: [PATCH 2/2] [Inliner] Add support for preserving `nocapture` param
 attr

Currently if we have:
```
define @foo(ptr nocapture %p) {
entry:
    ...
    bar(ptr %p)
    ...
}
```

When inlining `foo`, we will lose the `nocapture` on `%p` which might
not be recoverable.

The goal of this patch is to preserve the `nocapture` if some
conservative analysis indicates we can.

1) Return value of `bar` is either unused or only used as return of
   `foo` (this rules of capture via return).

2) No `alloca` (or scratch memory of any sort) in `foo` s.t there is a
   path from `entry` to `bar` that goes an `alloca`. This helps rule
   out `bar` capturing `%p` in memory in a way that wouldn't be
   capturing outside of the scope of `foo`.

3) No paths in `foo` that go through `bar` have any instructions with
   side-effects other than `bar`. This rules out `bar` capturing `%p`
   in memory, but then some later instructions clearing the memory
   capture s.t `nocapture` in `foo` still holds. It also rules out
   some function (i.e `malloc`) creating scratch memory that `bar`
   could capture `%p` in but still only visible in the scope of `foo`.

Ultimately these three checks are highly conservative, but should
allow some preservation.
---
 llvm/lib/Transforms/Utils/InlineFunction.cpp  | 113 +++++++++++++++++-
 llvm/test/Transforms/Inline/prop-nocapture.ll |  10 +-
 2 files changed, 117 insertions(+), 6 deletions(-)

diff --git a/llvm/lib/Transforms/Utils/InlineFunction.cpp b/llvm/lib/Transforms/Utils/InlineFunction.cpp
index 4ad426285ce2f0..a6ff3c085b8fbb 100644
--- a/llvm/lib/Transforms/Utils/InlineFunction.cpp
+++ b/llvm/lib/Transforms/Utils/InlineFunction.cpp
@@ -1351,6 +1351,104 @@ static bool MayContainThrowingOrExitingCallAfterCB(CallBase *Begin,
       ++BeginIt, End->getIterator(), InlinerAttributeWindow + 1);
 }
 
+template <typename RangeT> static bool ContainsSideEffects(RangeT Range) {
+  // Any instruction that may clear local scratch space CB stored
+  // into.
+  return any_of(Range, [](Instruction &I) { return I.mayHaveSideEffects(); });
+}
+
+template <typename RangeT> static bool ContainsScratchSpace(RangeT Range) {
+  return any_of(Range, [](Instruction &I) {
+    // Any instruction that may create local scratch space CB can store
+    // into.
+    return I.mayHaveSideEffects() || isa<AllocaInst>(&I);
+  });
+}
+
+template <typename NextFn, typename CheckFn>
+static bool CheckPathFromBBRecurse(DenseMap<BasicBlock *, bool> &CachedRes,
+                                   bool First, BasicBlock *BB, NextFn Next,
+                                   CheckFn Check) {
+  if (!First) {
+    // Initialize to true (okay to propagate) `nocapture`. This means that loops
+    // will be okay.
+    auto [Iter, Inserted] = CachedRes.try_emplace(BB, true);
+    // If we already have a result, return it.
+    if (!Inserted)
+      return Iter->second;
+
+    if (!Check(BB->instructionsWithoutDebug())) {
+      Iter->second = false;
+      return false;
+    }
+  }
+  auto NextBBs = Next(BB);
+  // Check all Succs/Preds
+  for (BasicBlock *NextBB : NextBBs) {
+    if (!CheckPathFromBBRecurse(CachedRes, /*First=*/false, NextBB, Next,
+                                Check)) {
+      CachedRes[BB] = false;
+      return false;
+    }
+  }
+
+  return true;
+}
+
+// Assuming we have:
+// define @foo(ptr nocapture %p) {
+// entry:
+//	...
+//  bar (ptr %p)
+//	...
+// }
+//
+// Determine if we can propagate `nocapture` to the `%p` at the
+// `bar`.
+static bool
+CanPropagateNoCaptureAtCB(DenseMap<BasicBlock *, bool> &PureFromBB,
+                          DenseMap<BasicBlock *, bool> &NoLocalStateToBB,
+                          BasicBlock *BB, CallBase *CB) {
+  // If CB returns and its used by anything other than `ret`, assume it may be
+  // capturing.
+  // Potential TODO: We could allow many operations.
+  if (!CB->getType()->isVoidTy())
+    for (auto Use : CB->users())
+      if (!isa<ReturnInst>(Use))
+        return false;
+
+  // Can't capture via return, so if no side-effects we are set.
+  if (!CB->mayHaveSideEffects())
+    return true;
+
+  auto It = CB->getIterator();
+  ++It;
+
+  // Check that CB instruction with side-effects on all paths from
+  // `entry` that go through the CB and there are no `alloca`
+  // instructions. This accomplishes two things. 1) It ensures that
+  // after CB, there is no way a store/other could "clean up" any
+  // captures from CB. 2) There is no local state (i.e `alloca` or a
+  // local `malloc`) that could CB could have stored in params in.
+  if (ContainsSideEffects(make_range(It, BB->end())) ||
+      ContainsScratchSpace(make_range(BB->begin(), CB->getIterator())))
+    return false;
+
+  if (!CheckPathFromBBRecurse(
+          PureFromBB, /*First=*/true, BB,
+          [](BasicBlock *CheckedBB) { return successors(CheckedBB); },
+          [](const auto &Region) { return !ContainsSideEffects(Region); }))
+    return false;
+
+  if (!CheckPathFromBBRecurse(
+          PureFromBB, /*First=*/true, BB,
+          [](BasicBlock *CheckedBB) { return predecessors(CheckedBB); },
+          [](const auto &Region) { return !ContainsScratchSpace(Region); }))
+    return false;
+
+  return true;
+}
+
 // Add attributes from CB params and Fn attributes that can always be propagated
 // to the corresponding argument / inner callbases.
 static void AddParamAndFnBasicAttributes(const CallBase &CB,
@@ -1363,6 +1461,9 @@ static void AddParamAndFnBasicAttributes(const CallBase &CB,
   SmallVector<AttrBuilder> ValidObjParamAttrs, ValidExactParamAttrs;
   bool HasAttrToPropagate = false;
 
+  DenseMap<BasicBlock *, bool> PureFromBB{};
+  DenseMap<BasicBlock *, bool> NoLocalStateToBB{};
+
   // Attributes we can only propagate if the exact parameter is forwarded.
   // We can propagate both poison generating and UB generating attributes
   // without any extra checks. The only attribute that is tricky to propagate
@@ -1381,6 +1482,8 @@ static void AddParamAndFnBasicAttributes(const CallBase &CB,
       ValidObjParamAttrs.back().addAttribute(Attribute::ReadNone);
     if (CB.paramHasAttr(I, Attribute::ReadOnly))
       ValidObjParamAttrs.back().addAttribute(Attribute::ReadOnly);
+    if (CB.paramHasAttr(I, Attribute::NoCapture))
+      ValidObjParamAttrs.back().addAttribute(Attribute::NoCapture);
 
     for (Attribute::AttrKind AK : ExactAttrsToPropagate) {
       Attribute Attr = CB.getParamAttr(I, AK);
@@ -1463,8 +1566,16 @@ static void AddParamAndFnBasicAttributes(const CallBase &CB,
           ArgNo = Arg->getArgNo();
         }
 
+        AttributeSet AS = AttributeSet::get(Context, ValidObjParamAttrs[ArgNo]);
+        // Check if we can propagate `nocapture`.
+        if (AS.hasAttribute(Attribute::NoCapture) &&
+            (NewInnerCB->paramHasAttr(I, Attribute::NoCapture) ||
+             !CanPropagateNoCaptureAtCB(PureFromBB, NoLocalStateToBB, &BB,
+                                        cast<CallBase>(&Ins))))
+          AS = AS.removeAttribute(Context, Attribute::NoCapture);
+
         // If so, propagate its access attributes.
-        AL = AL.addParamAttributes(Context, I, ValidObjParamAttrs[ArgNo]);
+        AL = AL.addParamAttributes(Context, I, AttrBuilder{Context, AS});
         // We can have conflicting attributes from the inner callsite and
         // to-be-inlined callsite. In that case, choose the most
         // restrictive.
diff --git a/llvm/test/Transforms/Inline/prop-nocapture.ll b/llvm/test/Transforms/Inline/prop-nocapture.ll
index aec45ae3e1f6f7..a9db946db4f28d 100644
--- a/llvm/test/Transforms/Inline/prop-nocapture.ll
+++ b/llvm/test/Transforms/Inline/prop-nocapture.ll
@@ -20,7 +20,7 @@ define void @simple_nocapture_prop(ptr nocapture %p) {
 define void @simple_nocapture_prop_caller(ptr %p) {
 ; CHECK-LABEL: define {{[^@]+}}@simple_nocapture_prop_caller
 ; CHECK-SAME: (ptr [[P:%.*]]) {
-; CHECK-NEXT:    call void @void.call.p0(ptr [[P]])
+; CHECK-NEXT:    call void @void.call.p0(ptr nocapture [[P]])
 ; CHECK-NEXT:    ret void
 ;
   call void @simple_nocapture_prop(ptr %p)
@@ -40,7 +40,7 @@ define i32 @nocapture_with_return_prop(ptr nocapture %p) {
 define i32 @nocapture_with_return_prop_caller(ptr %p) {
 ; CHECK-LABEL: define {{[^@]+}}@nocapture_with_return_prop_caller
 ; CHECK-SAME: (ptr [[P:%.*]]) {
-; CHECK-NEXT:    [[R_I:%.*]] = call i32 @ret.call.p0(ptr [[P]])
+; CHECK-NEXT:    [[R_I:%.*]] = call i32 @ret.call.p0(ptr nocapture [[P]])
 ; CHECK-NEXT:    ret i32 [[R_I]]
 ;
   %r = call i32 @nocapture_with_return_prop(ptr %p)
@@ -193,7 +193,7 @@ define void @nocapture_prop_okay_seperate_alloca_caller(ptr %p, i1 %c) {
 ; CHECK-NEXT:    call void @llvm.stackrestore.p0(ptr [[SAVEDSTACK]])
 ; CHECK-NEXT:    br label [[NOCAPTURE_PROP_OKAY_SEPERATE_ALLOCA_EXIT:%.*]]
 ; CHECK:       F.i:
-; CHECK-NEXT:    call void @void.call.p0(ptr [[P]])
+; CHECK-NEXT:    call void @void.call.p0(ptr nocapture [[P]])
 ; CHECK-NEXT:    call void @llvm.stackrestore.p0(ptr [[SAVEDSTACK]])
 ; CHECK-NEXT:    br label [[NOCAPTURE_PROP_OKAY_SEPERATE_ALLOCA_EXIT]]
 ; CHECK:       nocapture_prop_okay_seperate_alloca.exit:
@@ -286,10 +286,10 @@ F:
 define i32 @nocapture_prop_okay_no_sideeffects_caller(ptr %p, i1 %c) {
 ; CHECK-LABEL: define {{[^@]+}}@nocapture_prop_okay_no_sideeffects_caller
 ; CHECK-SAME: (ptr [[P:%.*]], i1 [[C:%.*]]) {
-; CHECK-NEXT:    call void @void.call.p0(ptr [[P]])
+; CHECK-NEXT:    call void @void.call.p0(ptr nocapture [[P]])
 ; CHECK-NEXT:    br i1 [[C]], label [[T_I:%.*]], label [[F_I:%.*]]
 ; CHECK:       T.i:
-; CHECK-NEXT:    [[R_I:%.*]] = call i32 @ret.call.p0(ptr [[P]]) #[[ATTR3]]
+; CHECK-NEXT:    [[R_I:%.*]] = call i32 @ret.call.p0(ptr nocapture [[P]]) #[[ATTR3]]
 ; CHECK-NEXT:    br label [[NOCAPTURE_PROP_OKAY_NO_SIDEEFFECTS_EXIT:%.*]]
 ; CHECK:       F.i:
 ; CHECK-NEXT:    br label [[NOCAPTURE_PROP_OKAY_NO_SIDEEFFECTS_EXIT]]



More information about the llvm-commits mailing list