[llvm] [Inliner] Add support for preserving `nocapture` param attr (PR #113418)

via llvm-commits llvm-commits at lists.llvm.org
Tue Feb 4 15:09:42 PST 2025


https://github.com/goldsteinn updated https://github.com/llvm/llvm-project/pull/113418

>From a08347ae01837e49cc54a1831bc2f544ad44cedb Mon Sep 17 00:00:00 2001
From: Noah Goldstein <goldstein.w.n at gmail.com>
Date: Tue, 22 Oct 2024 16:32:15 -0500
Subject: [PATCH 1/2] [Inliner] Add tests for preserving `nocapture` when
 inlining; NFC

---
 llvm/test/Transforms/Inline/prop-nocapture.ll | 327 ++++++++++++++++++
 1 file changed, 327 insertions(+)
 create mode 100644 llvm/test/Transforms/Inline/prop-nocapture.ll

diff --git a/llvm/test/Transforms/Inline/prop-nocapture.ll b/llvm/test/Transforms/Inline/prop-nocapture.ll
new file mode 100644
index 000000000000000..0659fd7f77049f8
--- /dev/null
+++ b/llvm/test/Transforms/Inline/prop-nocapture.ll
@@ -0,0 +1,327 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature
+; RUN: opt -passes=inline -S < %s | FileCheck --check-prefixes=CHECK,NO_ASSUME %s
+; RUN: opt -passes=inline -S --enable-knowledge-retention < %s | FileCheck %s --check-prefixes=CHECK,USE_ASSUME
+
+declare void @void.call.p0(ptr)
+declare void @void.call.p0.p1(ptr, ptr)
+declare i32 @ret.call.p0(ptr)
+declare ptr @retp.call.p0(ptr)
+
+define void @simple_nocapture_prop(ptr captures(none) %p) {
+; CHECK-LABEL: define {{[^@]+}}@simple_nocapture_prop
+; CHECK-SAME: (ptr captures(none) [[P:%.*]]) {
+; CHECK-NEXT:    call void @void.call.p0(ptr [[P]])
+; CHECK-NEXT:    ret void
+;
+  call void @void.call.p0(ptr %p)
+  ret void
+}
+
+define void @simple_nocapture_prop_caller(ptr %p) {
+; CHECK-LABEL: define {{[^@]+}}@simple_nocapture_prop_caller
+; CHECK-SAME: (ptr [[P:%.*]]) {
+; CHECK-NEXT:    call void @void.call.p0(ptr [[P]])
+; CHECK-NEXT:    ret void
+;
+  call void @simple_nocapture_prop(ptr %p)
+  ret void
+}
+
+define i32 @nocapture_with_return_prop(ptr captures(none) %p) {
+; CHECK-LABEL: define {{[^@]+}}@nocapture_with_return_prop
+; CHECK-SAME: (ptr captures(none) [[P:%.*]]) {
+; CHECK-NEXT:    [[R:%.*]] = call i32 @ret.call.p0(ptr [[P]])
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  %r = call i32 @ret.call.p0(ptr %p)
+  ret i32 %r
+}
+
+define i32 @nocapture_with_return_prop_caller(ptr %p) {
+; CHECK-LABEL: define {{[^@]+}}@nocapture_with_return_prop_caller
+; CHECK-SAME: (ptr [[P:%.*]]) {
+; CHECK-NEXT:    [[R_I:%.*]] = call i32 @ret.call.p0(ptr [[P]])
+; CHECK-NEXT:    ret i32 [[R_I]]
+;
+  %r = call i32 @nocapture_with_return_prop(ptr %p)
+  ret i32 %r
+}
+
+define i32 @nocapture_with_return_prop_todo_indirect(ptr captures(none) %p) {
+; CHECK-LABEL: define {{[^@]+}}@nocapture_with_return_prop_todo_indirect
+; CHECK-SAME: (ptr captures(none) [[P:%.*]]) {
+; CHECK-NEXT:    [[R:%.*]] = call i32 @ret.call.p0(ptr [[P]])
+; CHECK-NEXT:    [[RR:%.*]] = xor i32 [[R]], -1
+; CHECK-NEXT:    ret i32 [[RR]]
+;
+  %r = call i32 @ret.call.p0(ptr %p)
+  %rr = xor i32 %r, -1
+  ret i32 %rr
+}
+
+define i32 @nocapture_with_return_prop_todo_indirect_caller(ptr %p) {
+; CHECK-LABEL: define {{[^@]+}}@nocapture_with_return_prop_todo_indirect_caller
+; CHECK-SAME: (ptr [[P:%.*]]) {
+; CHECK-NEXT:    [[R_I:%.*]] = call i32 @ret.call.p0(ptr [[P]])
+; CHECK-NEXT:    [[RR_I:%.*]] = xor i32 [[R_I]], -1
+; CHECK-NEXT:    ret i32 [[RR_I]]
+;
+  %r = call i32 @nocapture_with_return_prop_todo_indirect(ptr %p)
+  ret i32 %r
+}
+
+define i32 @nocapture_with_return_prop_fail_maybe_captures(ptr captures(none) %p) {
+; CHECK-LABEL: define {{[^@]+}}@nocapture_with_return_prop_fail_maybe_captures
+; CHECK-SAME: (ptr captures(none) [[P:%.*]]) {
+; CHECK-NEXT:    [[R:%.*]] = call ptr @void.call.p0(ptr [[P]])
+; CHECK-NEXT:    [[RR:%.*]] = load i32, ptr [[R]], align 4
+; CHECK-NEXT:    ret i32 [[RR]]
+;
+  %r = call ptr @void.call.p0(ptr %p)
+  %rr = load i32, ptr %r
+  ret i32 %rr
+}
+
+define i32 @nocapture_with_return_prop_fail_maybe_captures_caller(ptr %p) {
+; CHECK-LABEL: define {{[^@]+}}@nocapture_with_return_prop_fail_maybe_captures_caller
+; CHECK-SAME: (ptr [[P:%.*]]) {
+; CHECK-NEXT:    [[R_I:%.*]] = call ptr @void.call.p0(ptr [[P]])
+; CHECK-NEXT:    [[RR_I:%.*]] = load i32, ptr [[R_I]], align 4
+; CHECK-NEXT:    ret i32 [[RR_I]]
+;
+  %r = call i32 @nocapture_with_return_prop_fail_maybe_captures(ptr %p)
+  ret i32 %r
+}
+
+define void @nocapture_prop_fail_preceding_alloca(ptr captures(none) %p) {
+; CHECK-LABEL: define {{[^@]+}}@nocapture_prop_fail_preceding_alloca
+; CHECK-SAME: (ptr captures(none) [[P:%.*]]) {
+; CHECK-NEXT:    [[P2:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    call void @void.call.p0.p1(ptr [[P]], ptr [[P2]])
+; CHECK-NEXT:    ret void
+;
+  %p2 = alloca i32
+  call void @void.call.p0.p1(ptr %p, ptr %p2)
+  ret void
+}
+
+define void @nocapture_prop_fail_preceding_alloca_caller(ptr %p) {
+; CHECK-LABEL: define {{[^@]+}}@nocapture_prop_fail_preceding_alloca_caller
+; CHECK-SAME: (ptr [[P:%.*]]) {
+; CHECK-NEXT:    [[P2:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    call void @nocapture_prop_fail_preceding_alloca(ptr [[P]], ptr [[P2]])
+; CHECK-NEXT:    ret void
+;
+  %p2 = alloca i32
+  call void @nocapture_prop_fail_preceding_alloca(ptr %p, ptr %p2)
+  ret void
+}
+
+define void @nocapture_prop_fail_preceding_alloca2(ptr captures(none) %p, i1 %c) {
+; CHECK-LABEL: define {{[^@]+}}@nocapture_prop_fail_preceding_alloca2
+; CHECK-SAME: (ptr captures(none) [[P:%.*]], i1 [[C:%.*]]) {
+; CHECK-NEXT:    [[P2:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    br i1 [[C]], label [[T:%.*]], label [[F:%.*]]
+; CHECK:       T:
+; CHECK-NEXT:    call void @void.call.p0(ptr [[P2]])
+; CHECK-NEXT:    ret void
+; CHECK:       F:
+; CHECK-NEXT:    call void @void.call.p0.p1(ptr [[P]], ptr [[P2]])
+; CHECK-NEXT:    ret void
+;
+  %p2 = alloca i32
+  br i1 %c, label %T, label %F
+T:
+  call void @void.call.p0(ptr %p2)
+  ret void
+F:
+  call void @void.call.p0.p1(ptr %p, ptr %p2)
+  ret void
+}
+
+define void @nocapture_prop_fail_preceding_alloca2_caller(ptr %p, i1 %c) {
+; CHECK-LABEL: define {{[^@]+}}@nocapture_prop_fail_preceding_alloca2_caller
+; CHECK-SAME: (ptr [[P:%.*]], i1 [[C:%.*]]) {
+; CHECK-NEXT:    [[P2_I:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    call void @llvm.lifetime.start.p0(i64 4, ptr [[P2_I]])
+; CHECK-NEXT:    br i1 [[C]], label [[T_I:%.*]], label [[F_I:%.*]]
+; CHECK:       T.i:
+; CHECK-NEXT:    call void @void.call.p0(ptr [[P2_I]])
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 4, ptr [[P2_I]])
+; CHECK-NEXT:    br label [[NOCAPTURE_PROP_FAIL_PRECEDING_ALLOCA2_EXIT:%.*]]
+; CHECK:       F.i:
+; CHECK-NEXT:    call void @void.call.p0.p1(ptr [[P]], ptr [[P2_I]])
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 4, ptr [[P2_I]])
+; CHECK-NEXT:    br label [[NOCAPTURE_PROP_FAIL_PRECEDING_ALLOCA2_EXIT]]
+; CHECK:       nocapture_prop_fail_preceding_alloca2.exit:
+; CHECK-NEXT:    ret void
+;
+  call void @nocapture_prop_fail_preceding_alloca2(ptr %p, i1 %c)
+  ret void
+}
+
+define void @nocapture_prop_okay_seperate_alloca(ptr captures(none) %p, i1 %c) alwaysinline {
+; CHECK-LABEL: define {{[^@]+}}@nocapture_prop_okay_seperate_alloca
+; CHECK-SAME: (ptr captures(none) [[P:%.*]], i1 [[C:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:    br i1 [[C]], label [[T:%.*]], label [[F:%.*]]
+; CHECK:       T:
+; CHECK-NEXT:    [[P2:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    call void @void.call.p0(ptr [[P2]])
+; CHECK-NEXT:    ret void
+; CHECK:       F:
+; CHECK-NEXT:    call void @void.call.p0(ptr [[P]])
+; CHECK-NEXT:    ret void
+;
+  br i1 %c, label %T, label %F
+T:
+  %p2 = alloca i32
+  call void @void.call.p0(ptr %p2)
+  ret void
+F:
+  call void @void.call.p0(ptr %p)
+  ret void
+}
+
+define void @nocapture_prop_okay_seperate_alloca_caller(ptr %p, i1 %c) {
+; CHECK-LABEL: define {{[^@]+}}@nocapture_prop_okay_seperate_alloca_caller
+; CHECK-SAME: (ptr [[P:%.*]], i1 [[C:%.*]]) {
+; CHECK-NEXT:    [[SAVEDSTACK:%.*]] = call ptr @llvm.stacksave.p0()
+; CHECK-NEXT:    br i1 [[C]], label [[T_I:%.*]], label [[F_I:%.*]]
+; CHECK:       T.i:
+; CHECK-NEXT:    [[P2_I:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    call void @void.call.p0(ptr [[P2_I]])
+; CHECK-NEXT:    call void @llvm.stackrestore.p0(ptr [[SAVEDSTACK]])
+; CHECK-NEXT:    br label [[NOCAPTURE_PROP_OKAY_SEPERATE_ALLOCA_EXIT:%.*]]
+; CHECK:       F.i:
+; CHECK-NEXT:    call void @void.call.p0(ptr [[P]])
+; CHECK-NEXT:    call void @llvm.stackrestore.p0(ptr [[SAVEDSTACK]])
+; CHECK-NEXT:    br label [[NOCAPTURE_PROP_OKAY_SEPERATE_ALLOCA_EXIT]]
+; CHECK:       nocapture_prop_okay_seperate_alloca.exit:
+; CHECK-NEXT:    ret void
+;
+  call void @nocapture_prop_okay_seperate_alloca(ptr %p, i1 %c)
+  ret void
+}
+
+define void @nocapture_prop_fail_ensuing_side_effects(ptr captures(none) %p) {
+; CHECK-LABEL: define {{[^@]+}}@nocapture_prop_fail_ensuing_side_effects
+; CHECK-SAME: (ptr captures(none) [[P:%.*]]) {
+; CHECK-NEXT:    call void @void.call.p0(ptr [[P]])
+; CHECK-NEXT:    call void @void.call.p0(ptr [[P]])
+; CHECK-NEXT:    ret void
+;
+  call void @void.call.p0(ptr %p)
+  call void @void.call.p0(ptr %p)
+  ret void
+}
+
+define void @nocapture_prop_fail_ensuing_side_effects_caller(ptr %p) {
+; CHECK-LABEL: define {{[^@]+}}@nocapture_prop_fail_ensuing_side_effects_caller
+; CHECK-SAME: (ptr [[P:%.*]]) {
+; CHECK-NEXT:    call void @void.call.p0(ptr [[P]])
+; CHECK-NEXT:    call void @void.call.p0(ptr [[P]])
+; CHECK-NEXT:    ret void
+;
+  call void @nocapture_prop_fail_ensuing_side_effects(ptr %p)
+  ret void
+}
+
+define void @nocapture_prop_fail_ensuing_side_effects2(ptr captures(none) %p, i1 %c) {
+; CHECK-LABEL: define {{[^@]+}}@nocapture_prop_fail_ensuing_side_effects2
+; CHECK-SAME: (ptr captures(none) [[P:%.*]], i1 [[C:%.*]]) {
+; CHECK-NEXT:    call void @void.call.p0(ptr [[P]])
+; CHECK-NEXT:    br i1 [[C]], label [[T:%.*]], label [[F:%.*]]
+; CHECK:       T:
+; CHECK-NEXT:    call void @void.call.p0(ptr [[P]])
+; CHECK-NEXT:    ret void
+; CHECK:       F:
+; CHECK-NEXT:    ret void
+;
+  call void @void.call.p0(ptr %p)
+  br i1 %c, label %T, label %F
+T:
+  call void @void.call.p0(ptr %p)
+  ret void
+F:
+  ret void
+}
+
+define void @nocapture_prop_fail_ensuing_side_effects2_caller(ptr %p, i1 %c) {
+; CHECK-LABEL: define {{[^@]+}}@nocapture_prop_fail_ensuing_side_effects2_caller
+; CHECK-SAME: (ptr [[P:%.*]], i1 [[C:%.*]]) {
+; CHECK-NEXT:    call void @void.call.p0(ptr [[P]])
+; CHECK-NEXT:    br i1 [[C]], label [[T_I:%.*]], label [[F_I:%.*]]
+; CHECK:       T.i:
+; CHECK-NEXT:    call void @void.call.p0(ptr [[P]])
+; CHECK-NEXT:    br label [[NOCAPTURE_PROP_FAIL_ENSUING_SIDE_EFFECTS2_EXIT:%.*]]
+; CHECK:       F.i:
+; CHECK-NEXT:    br label [[NOCAPTURE_PROP_FAIL_ENSUING_SIDE_EFFECTS2_EXIT]]
+; CHECK:       nocapture_prop_fail_ensuing_side_effects2.exit:
+; CHECK-NEXT:    ret void
+;
+  call void @nocapture_prop_fail_ensuing_side_effects2(ptr %p, i1 %c)
+  ret void
+}
+
+define i32 @nocapture_prop_okay_no_sideeffects(ptr captures(none) %p, i1 %c) {
+; CHECK-LABEL: define {{[^@]+}}@nocapture_prop_okay_no_sideeffects
+; CHECK-SAME: (ptr captures(none) [[P:%.*]], i1 [[C:%.*]]) {
+; CHECK-NEXT:    call void @void.call.p0(ptr [[P]])
+; CHECK-NEXT:    br i1 [[C]], label [[T:%.*]], label [[F:%.*]]
+; CHECK:       T:
+; CHECK-NEXT:    [[R:%.*]] = call i32 @ret.call.p0(ptr [[P]]) #[[ATTR3:[0-9]+]]
+; CHECK-NEXT:    ret i32 [[R]]
+; CHECK:       F:
+; CHECK-NEXT:    ret i32 0
+;
+  call void @void.call.p0(ptr %p)
+  br i1 %c, label %T, label %F
+T:
+  %r = call i32 @ret.call.p0(ptr %p) nounwind readonly willreturn
+  ret i32 %r
+F:
+  ret i32 0
+}
+
+define i32 @nocapture_prop_okay_no_sideeffects_caller(ptr %p, i1 %c) {
+; CHECK-LABEL: define {{[^@]+}}@nocapture_prop_okay_no_sideeffects_caller
+; CHECK-SAME: (ptr [[P:%.*]], i1 [[C:%.*]]) {
+; CHECK-NEXT:    call void @void.call.p0(ptr [[P]])
+; CHECK-NEXT:    br i1 [[C]], label [[T_I:%.*]], label [[F_I:%.*]]
+; CHECK:       T.i:
+; CHECK-NEXT:    [[R_I:%.*]] = call i32 @ret.call.p0(ptr [[P]]) #[[ATTR3]]
+; CHECK-NEXT:    br label [[NOCAPTURE_PROP_OKAY_NO_SIDEEFFECTS_EXIT:%.*]]
+; CHECK:       F.i:
+; CHECK-NEXT:    br label [[NOCAPTURE_PROP_OKAY_NO_SIDEEFFECTS_EXIT]]
+; CHECK:       nocapture_prop_okay_no_sideeffects.exit:
+; CHECK-NEXT:    [[R1:%.*]] = phi i32 [ [[R_I]], [[T_I]] ], [ 0, [[F_I]] ]
+; CHECK-NEXT:    ret i32 [[R1]]
+;
+  %r = call i32 @nocapture_prop_okay_no_sideeffects(ptr %p, i1 %c)
+  ret i32 %r
+}
+
+define i32 @nocapture_prop_okay_no_sideeffects2(ptr captures(none) %p) {
+; CHECK-LABEL: define {{[^@]+}}@nocapture_prop_okay_no_sideeffects2
+; CHECK-SAME: (ptr captures(none) [[P:%.*]]) {
+; CHECK-NEXT:    call void @void.call.p0(ptr [[P]])
+; CHECK-NEXT:    [[R:%.*]] = call i32 @ret.call.p0(ptr [[P]]) #[[ATTR3]]
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  call void @void.call.p0(ptr %p)
+  %r = call i32 @ret.call.p0(ptr %p) nounwind readonly willreturn
+  ret i32 %r
+}
+
+define i32 @nocapture_prop_okay_no_sideeffects2_caller(ptr %p, i1 %c) {
+; CHECK-LABEL: define {{[^@]+}}@nocapture_prop_okay_no_sideeffects2_caller
+; CHECK-SAME: (ptr [[P:%.*]], i1 [[C:%.*]]) {
+; CHECK-NEXT:    [[R:%.*]] = call i32 @nocapture_prop_okay_no_sideeffects2(ptr [[P]], i1 [[C]])
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  %r = call i32 @nocapture_prop_okay_no_sideeffects2(ptr %p, i1 %c)
+  ret i32 %r
+}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; NO_ASSUME: {{.*}}
+; USE_ASSUME: {{.*}}

>From afd608c64522289ba3f62e737c1b13ff587533c7 Mon Sep 17 00:00:00 2001
From: Noah Goldstein <goldstein.w.n at gmail.com>
Date: Tue, 22 Oct 2024 16:32:17 -0500
Subject: [PATCH 2/2] [Inliner] Add support for preserving `nocapture` param
 attr

Currently if we have:
```
define @foo(ptr nocapture %p) {
entry:
    ...
    bar(ptr %p)
    ...
}
```

When inlining `foo`, we will lose the `nocapture` on `%p` which might
not be recoverable.

The goal of this patch is to preserve the `nocapture` if some
conservative analysis indicates we can.

1) Return value of `bar` is either unused or only used as return of
   `foo` (this rules of capture via return).

2) No `alloca` (or scratch memory of any sort) in `foo` s.t there is a
   path from `entry` to `bar` that goes an `alloca`. This helps rule
   out `bar` capturing `%p` in memory in a way that wouldn't be
   capturing outside of the scope of `foo`.

3) No paths in `foo` that go through `bar` have any instructions with
   side-effects other than `bar`. This rules out `bar` capturing `%p`
   in memory, but then some later instructions clearing the memory
   capture s.t `nocapture` in `foo` still holds. It also rules out
   some function (i.e `malloc`) creating scratch memory that `bar`
   could capture `%p` in but still only visible in the scope of `foo`.

Ultimately these three checks are highly conservative, but should
allow some preservation.
---
 llvm/lib/Transforms/Utils/InlineFunction.cpp  | 115 +++++++++++++++++-
 llvm/test/Transforms/Inline/prop-nocapture.ll |  10 +-
 2 files changed, 118 insertions(+), 7 deletions(-)

diff --git a/llvm/lib/Transforms/Utils/InlineFunction.cpp b/llvm/lib/Transforms/Utils/InlineFunction.cpp
index b92d8b16daad2c6..d5750152502cd9b 100644
--- a/llvm/lib/Transforms/Utils/InlineFunction.cpp
+++ b/llvm/lib/Transforms/Utils/InlineFunction.cpp
@@ -68,6 +68,7 @@
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/ModRef.h"
 #include "llvm/Transforms/Utils/AssumeBundleBuilder.h"
 #include "llvm/Transforms/Utils/Cloning.h"
 #include "llvm/Transforms/Utils/Local.h"
@@ -1364,6 +1365,104 @@ static bool MayContainThrowingOrExitingCallAfterCB(CallBase *Begin,
       ++BeginIt, End->getIterator(), InlinerAttributeWindow + 1);
 }
 
+template <typename RangeT> static bool ContainsSideEffects(RangeT Range) {
+  // Any instruction that may clear local scratch space CB stored
+  // into.
+  return any_of(Range, [](Instruction &I) { return I.mayHaveSideEffects(); });
+}
+
+template <typename RangeT> static bool ContainsScratchSpace(RangeT Range) {
+  return any_of(Range, [](Instruction &I) {
+    // Any instruction that may create local scratch space CB can store
+    // into.
+    return I.mayHaveSideEffects() || isa<AllocaInst>(&I);
+  });
+}
+
+template <typename NextFn, typename CheckFn>
+static bool CheckPathFromBBRecurse(DenseMap<BasicBlock *, bool> &CachedRes,
+                                   bool First, BasicBlock *BB, NextFn Next,
+                                   CheckFn Check) {
+  if (!First) {
+    // Initialize to true (okay to propagate) `nocapture`. This means that loops
+    // will be okay.
+    auto [Iter, Inserted] = CachedRes.try_emplace(BB, true);
+    // If we already have a result, return it.
+    if (!Inserted)
+      return Iter->second;
+
+    if (!Check(BB->instructionsWithoutDebug())) {
+      Iter->second = false;
+      return false;
+    }
+  }
+  auto NextBBs = Next(BB);
+  // Check all Succs/Preds
+  for (BasicBlock *NextBB : NextBBs) {
+    if (!CheckPathFromBBRecurse(CachedRes, /*First=*/false, NextBB, Next,
+                                Check)) {
+      CachedRes[BB] = false;
+      return false;
+    }
+  }
+
+  return true;
+}
+
+// Assuming we have:
+// define @foo(ptr nocapture %p) {
+// entry:
+//	...
+//  bar (ptr %p)
+//	...
+// }
+//
+// Determine if we can propagate `nocapture` to the `%p` at the
+// `bar`.
+static bool
+CanPropagateNoCaptureAtCB(DenseMap<BasicBlock *, bool> &PureFromBB,
+                          DenseMap<BasicBlock *, bool> &NoLocalStateToBB,
+                          BasicBlock *BB, CallBase *CB) {
+  // If CB returns and its used by anything other than `ret`, assume it may be
+  // capturing.
+  // Potential TODO: We could allow many operations.
+  if (!CB->getType()->isVoidTy())
+    for (auto Use : CB->users())
+      if (!isa<ReturnInst>(Use))
+        return false;
+
+  // Can't capture via return, so if no side-effects we are set.
+  if (!CB->mayHaveSideEffects())
+    return true;
+
+  auto It = CB->getIterator();
+  ++It;
+
+  // Check that CB instruction with side-effects on all paths from
+  // `entry` that go through the CB and there are no `alloca`
+  // instructions. This accomplishes two things. 1) It ensures that
+  // after CB, there is no way a store/other could "clean up" any
+  // captures from CB. 2) There is no local state (i.e `alloca` or a
+  // local `malloc`) that could CB could have stored in params in.
+  if (ContainsSideEffects(make_range(It, BB->end())) ||
+      ContainsScratchSpace(make_range(BB->begin(), CB->getIterator())))
+    return false;
+
+  if (!CheckPathFromBBRecurse(
+          PureFromBB, /*First=*/true, BB,
+          [](BasicBlock *CheckedBB) { return successors(CheckedBB); },
+          [](const auto &Region) { return !ContainsSideEffects(Region); }))
+    return false;
+
+  if (!CheckPathFromBBRecurse(
+          PureFromBB, /*First=*/true, BB,
+          [](BasicBlock *CheckedBB) { return predecessors(CheckedBB); },
+          [](const auto &Region) { return !ContainsScratchSpace(Region); }))
+    return false;
+
+  return true;
+}
+
 // Add attributes from CB params and Fn attributes that can always be propagated
 // to the corresponding argument / inner callbases.
 static void AddParamAndFnBasicAttributes(const CallBase &CB,
@@ -1376,6 +1475,9 @@ static void AddParamAndFnBasicAttributes(const CallBase &CB,
   SmallVector<AttrBuilder> ValidObjParamAttrs, ValidExactParamAttrs;
   bool HasAttrToPropagate = false;
 
+  DenseMap<BasicBlock *, bool> PureFromBB{};
+  DenseMap<BasicBlock *, bool> NoLocalStateToBB{};
+
   // Attributes we can only propagate if the exact parameter is forwarded.
   // We can propagate both poison generating and UB generating attributes
   // without any extra checks. The only attribute that is tricky to propagate
@@ -1394,6 +1496,8 @@ static void AddParamAndFnBasicAttributes(const CallBase &CB,
       ValidObjParamAttrs.back().addAttribute(Attribute::ReadNone);
     if (CB.paramHasAttr(I, Attribute::ReadOnly))
       ValidObjParamAttrs.back().addAttribute(Attribute::ReadOnly);
+    if (CB.doesNotCapture(I))
+      ValidObjParamAttrs.back().addCapturesAttr(CaptureInfo::none());
 
     for (Attribute::AttrKind AK : ExactAttrsToPropagate) {
       Attribute Attr = CB.getParamAttr(I, AK);
@@ -1478,9 +1582,16 @@ static void AddParamAndFnBasicAttributes(const CallBase &CB,
           continue;
         }
 
-        // If so, propagate its access attributes.
-        AL = AL.addParamAttributes(Context, I, ValidObjParamAttrs[ArgNo]);
+        AttributeSet AS = AttributeSet::get(Context, ValidObjParamAttrs[ArgNo]);
+        // Check if we can propagate `captures(none)`.
+        if (capturesNothing(AS.getCaptureInfo()) &&
+            (NewInnerCB->doesNotCapture(I) ||
+             !CanPropagateNoCaptureAtCB(PureFromBB, NoLocalStateToBB, &BB,
+                                        cast<CallBase>(&Ins))))
+          AS = AS.removeAttribute(Context, Attribute::Captures);
 
+        // If so, propagate its access attributes.
+        AL = AL.addParamAttributes(Context, I, AttrBuilder{Context, AS});
         // We can have conflicting attributes from the inner callsite and
         // to-be-inlined callsite. In that case, choose the most
         // restrictive.
diff --git a/llvm/test/Transforms/Inline/prop-nocapture.ll b/llvm/test/Transforms/Inline/prop-nocapture.ll
index 0659fd7f77049f8..5517b489f64fda1 100644
--- a/llvm/test/Transforms/Inline/prop-nocapture.ll
+++ b/llvm/test/Transforms/Inline/prop-nocapture.ll
@@ -20,7 +20,7 @@ define void @simple_nocapture_prop(ptr captures(none) %p) {
 define void @simple_nocapture_prop_caller(ptr %p) {
 ; CHECK-LABEL: define {{[^@]+}}@simple_nocapture_prop_caller
 ; CHECK-SAME: (ptr [[P:%.*]]) {
-; CHECK-NEXT:    call void @void.call.p0(ptr [[P]])
+; CHECK-NEXT:    call void @void.call.p0(ptr captures(none) [[P]])
 ; CHECK-NEXT:    ret void
 ;
   call void @simple_nocapture_prop(ptr %p)
@@ -40,7 +40,7 @@ define i32 @nocapture_with_return_prop(ptr captures(none) %p) {
 define i32 @nocapture_with_return_prop_caller(ptr %p) {
 ; CHECK-LABEL: define {{[^@]+}}@nocapture_with_return_prop_caller
 ; CHECK-SAME: (ptr [[P:%.*]]) {
-; CHECK-NEXT:    [[R_I:%.*]] = call i32 @ret.call.p0(ptr [[P]])
+; CHECK-NEXT:    [[R_I:%.*]] = call i32 @ret.call.p0(ptr captures(none) [[P]])
 ; CHECK-NEXT:    ret i32 [[R_I]]
 ;
   %r = call i32 @nocapture_with_return_prop(ptr %p)
@@ -193,7 +193,7 @@ define void @nocapture_prop_okay_seperate_alloca_caller(ptr %p, i1 %c) {
 ; CHECK-NEXT:    call void @llvm.stackrestore.p0(ptr [[SAVEDSTACK]])
 ; CHECK-NEXT:    br label [[NOCAPTURE_PROP_OKAY_SEPERATE_ALLOCA_EXIT:%.*]]
 ; CHECK:       F.i:
-; CHECK-NEXT:    call void @void.call.p0(ptr [[P]])
+; CHECK-NEXT:    call void @void.call.p0(ptr captures(none) [[P]])
 ; CHECK-NEXT:    call void @llvm.stackrestore.p0(ptr [[SAVEDSTACK]])
 ; CHECK-NEXT:    br label [[NOCAPTURE_PROP_OKAY_SEPERATE_ALLOCA_EXIT]]
 ; CHECK:       nocapture_prop_okay_seperate_alloca.exit:
@@ -286,10 +286,10 @@ F:
 define i32 @nocapture_prop_okay_no_sideeffects_caller(ptr %p, i1 %c) {
 ; CHECK-LABEL: define {{[^@]+}}@nocapture_prop_okay_no_sideeffects_caller
 ; CHECK-SAME: (ptr [[P:%.*]], i1 [[C:%.*]]) {
-; CHECK-NEXT:    call void @void.call.p0(ptr [[P]])
+; CHECK-NEXT:    call void @void.call.p0(ptr captures(none) [[P]])
 ; CHECK-NEXT:    br i1 [[C]], label [[T_I:%.*]], label [[F_I:%.*]]
 ; CHECK:       T.i:
-; CHECK-NEXT:    [[R_I:%.*]] = call i32 @ret.call.p0(ptr [[P]]) #[[ATTR3]]
+; CHECK-NEXT:    [[R_I:%.*]] = call i32 @ret.call.p0(ptr captures(none) [[P]]) #[[ATTR3]]
 ; CHECK-NEXT:    br label [[NOCAPTURE_PROP_OKAY_NO_SIDEEFFECTS_EXIT:%.*]]
 ; CHECK:       F.i:
 ; CHECK-NEXT:    br label [[NOCAPTURE_PROP_OKAY_NO_SIDEEFFECTS_EXIT]]



More information about the llvm-commits mailing list