[clang] Elide suspension points via [[clang::coro_destroyed_on_suspend]] (PR #152029)
via cfe-commits
cfe-commits at lists.llvm.org
Mon Aug 4 13:34:38 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-clang
Author: None (snarkmaster)
<details>
<summary>Changes</summary>
See `AttrDocs.td` for the user-facing docs. The immediate motivation was that I noticed that short-circuiting coroutines failed to optimize well. My demo program for lack-of-optimization is here: https://godbolt.org/z/oMdq8zfov
With this patch, `simple_coro()` in the demo does avoid the allocation, and compiles to a function-like form.
```cpp
#include <coroutine>
#include <optional>
// `optional_wrapper` exists since `get_return_object()` can't return
// `std::optional` directly. C++ coroutines have a fundamental timing mismatch
// between when the return object is created and when the value is available:
//
// 1) Early (coroutine startup): `get_return_object()` is called and must return
// something immediately.
// 2) Later (when `co_return` executes): `return_value(T)` is called with the
// actual value.
// 3) Issue: If `get_return_object()` returns the storage, it's empty when
// returned, and writing to it later cannot affect the already-returned copy.
template <typename T>
struct optional_wrapper {
std::optional<T> storage_;
std::optional<T>*& pointer_;
optional_wrapper(std::optional<T>*& p) : pointer_(p) {
pointer_ = &storage_;
}
operator std::optional<T>() {
return std::move(*storage_);
}
~optional_wrapper() {}
};
// Make `std::optional` a coroutine
namespace std {
template <typename T, typename... Args>
struct coroutine_traits<optional<T>, Args...> {
struct promise_type {
optional<T>* storagePtr_ = nullptr;
promise_type() = default;
::optional_wrapper<T> get_return_object() {
return ::optional_wrapper<T>(storagePtr_);
}
suspend_never initial_suspend() const noexcept {
return {};
}
suspend_never final_suspend() const noexcept {
return {};
}
void return_value(T value) {
*storagePtr_ = std::move(value);
}
void unhandled_exception() {
// Leave storage_ empty to represent error
}
};
};
} // namespace std
// Non-coroutine baseline -- compare to the `simple_coro` variants below
std::optional<int> simple_func(const std::optional<int>& r) {
if (r.has_value()) {
return r.value() + 1;
}
return std::nullopt; // empty optional (error)
}
// Without `co_await`, this optimizes just like `simple_func`.
// Unfortunately, this doesn't short-circuit when `r` is empty.
std::optional<int> wrong_simple_coro(const std::optional<int>& r) {
co_return r.value() + 2;
}
// `#if 0` adds a short-circuiting `co_await` that may suspend to
// immediately exit the parent coroutine. Unfortunately, this
// triggers a coro frame allocation, and pessimizes `simple_coro`.
template <typename T>
struct optional_awaitable {
std::optional<T> opt_;
bool await_ready() const noexcept {
return opt_.has_value();
}
T await_resume() {
return opt_.value();
}
template <typename Promise>
[[clang::coro_destroyed_on_suspend]] void await_suspend(
std::coroutine_handle<Promise> handle) {
handle.destroy();
}
};
template <typename T>
optional_awaitable<T> operator co_await(std::optional<T> opt) {
return {std::move(opt)};
}
// Matches the logic of `simple_func`. Before this patch, adding a
// "suspend-to-exit" await point forces an allocation.
std::optional<int> simple_coro(const std::optional<int>& r) {
co_return co_await std::move(r) + 4;
}
int main() {
return simple_coro(std::optional<int>{8}).value() +
wrong_simple_coro(std::optional<int>{16}).value() +
simple_func(std::optional<int>{32}).value();
}
```
---
Full diff: https://github.com/llvm/llvm-project/pull/152029.diff
7 Files Affected:
- (modified) clang/docs/ReleaseNotes.rst (+4)
- (modified) clang/include/clang/Basic/Attr.td (+8)
- (modified) clang/include/clang/Basic/AttrDocs.td (+60)
- (modified) clang/lib/CodeGen/CGCoroutine.cpp (+47-16)
- (modified) clang/lib/CodeGen/CodeGenFunction.h (+2-1)
- (added) clang/test/CodeGenCoroutines/coro-destroyed-on-suspend.cpp (+152)
- (modified) clang/test/Misc/pragma-attribute-supported-attributes-list.test (+1)
``````````diff
diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 9231f2cae9bfa..3ebf64bfbbe11 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -116,6 +116,10 @@ Removed Compiler Flags
Attribute Changes in Clang
--------------------------
+- Introduced a new attribute ``[[clang::coro_destroyed_on_suspend]]`` on coroutine ``await_suspend``
+ methods to indicate that the coroutine will be destroyed during suspension, enabling optimizations
+ that skip the suspend point.
+
Improvements to Clang's diagnostics
-----------------------------------
- Added a separate diagnostic group ``-Wfunction-effect-redeclarations``, for the more pedantic
diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td
index 63fa6aa508827..f0e12ed3b9be3 100644
--- a/clang/include/clang/Basic/Attr.td
+++ b/clang/include/clang/Basic/Attr.td
@@ -1352,6 +1352,14 @@ def CoroAwaitElidableArgument : InheritableAttr {
let SimpleHandler = 1;
}
+def CoroDestroyedOnSuspend: InheritableAttr {
+ let Spellings = [Clang<"coro_destroyed_on_suspend">];
+ let Subjects = SubjectList<[CXXMethod]>;
+ let LangOpts = [CPlusPlus];
+ let Documentation = [CoroDestroyedOnSuspendDoc];
+ let SimpleHandler = 1;
+}
+
// OSObject-based attributes.
def OSConsumed : InheritableParamAttr {
let Spellings = [Clang<"os_consumed">];
diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td
index 326cf0bc842db..7745e08de44cc 100644
--- a/clang/include/clang/Basic/AttrDocs.td
+++ b/clang/include/clang/Basic/AttrDocs.td
@@ -9244,6 +9244,66 @@ Example:
}];
}
+def CoroDestroyedOnSuspendDoc : Documentation {
+ let Category = DocCatDecl;
+ let Content = [{
+The ``[[clang::coro_destroyed_on_suspend]]`` attribute may be applied to a C++
+``void await_suspend(handle)`` method belonging to a coroutine awaiter object.
+
+**Warning:** The attribute must only be used when ``await_suspend`` is
+guaranteed to call ``handle.destroy()`` before completing. Violating this
+invariant **will** cause undefined behavior. Be sure to consider exceptions.
+
+This attribute improves the optimization of "short-circuiting" coroutines. It
+is useful when every suspend point in the coroutine is either (i) trivial
+(`std::suspend_never`), or (ii) a `co_await` that can be translated into simple
+control flow as:
+
+.. code-block:: c++
+
+ if (awaiter.await_ready()) {
+ val = awaiter.await_resume();
+ } else {
+ awaiter.await_suspend();
+ return /* value representing the "execution short-circuited" outcome */
+ }
+
+Adding the attribute permits the compiler to elide the `@llvm.coro.suspend`
+intrinsic and go directly to the cleanup/destruction path, avoiding unnecessary
+suspend/resume overhead. If all suspension points can be simplified via (i) or
+(ii), then the coroutine should be able to be optimized nearly like a plain
+function via the `isNoSuspendCoroutine` optimization. The typical wins are:
+ - no heap allocation
+ - smaller code size & better optimization
+
+For example,
+
+.. code-block:: c++
+
+ template <typename T>
+ struct optional_awaitable {
+ std::optional<T> opt_;
+
+ bool await_ready() const noexcept {
+ return opt_.has_value();
+ }
+
+ T await_resume() {
+ return std::move(opt_).value();
+ }
+
+ [[clang::coro_destroyed_on_suspend]]
+ template <typename Promise>
+ void await_suspend(std::coroutine_handle<Promise> handle) {
+ // DANGER: If any exit path from a `coro_destroyed_on_suspend`-marked
+ // `await_suspend` neglects to call `destroy()`, that will cause UB.
+ handle.destroy();
+ }
+ };
+
+}];
+}
+
def CountedByDocs : Documentation {
let Category = DocCatField;
let Content = [{
diff --git a/clang/lib/CodeGen/CGCoroutine.cpp b/clang/lib/CodeGen/CGCoroutine.cpp
index 827385f9c1a1f..e72c0e28a442e 100644
--- a/clang/lib/CodeGen/CGCoroutine.cpp
+++ b/clang/lib/CodeGen/CGCoroutine.cpp
@@ -247,8 +247,9 @@ static LValueOrRValue emitSuspendExpression(CodeGenFunction &CGF, CGCoroData &Co
auto *NullPtr = llvm::ConstantPointerNull::get(CGF.CGM.Int8PtrTy);
auto *SaveCall = Builder.CreateCall(CoroSave, {NullPtr});
+ bool SuspendWillDestroyCoro = false;
auto SuspendWrapper = CodeGenFunction(CGF.CGM).generateAwaitSuspendWrapper(
- CGF.CurFn->getName(), Prefix, S);
+ CGF.CurFn->getName(), Prefix, S, SuspendWillDestroyCoro);
CGF.CurCoro.InSuspendBlock = true;
@@ -317,17 +318,24 @@ static LValueOrRValue emitSuspendExpression(CodeGenFunction &CGF, CGCoroData &Co
}
}
- // Emit the suspend point.
- const bool IsFinalSuspend = (Kind == AwaitKind::Final);
- llvm::Function *CoroSuspend =
- CGF.CGM.getIntrinsic(llvm::Intrinsic::coro_suspend);
- auto *SuspendResult = Builder.CreateCall(
- CoroSuspend, {SaveCall, Builder.getInt1(IsFinalSuspend)});
-
- // Create a switch capturing three possible continuations.
- auto *Switch = Builder.CreateSwitch(SuspendResult, Coro.SuspendBB, 2);
- Switch->addCase(Builder.getInt8(0), ReadyBlock);
- Switch->addCase(Builder.getInt8(1), CleanupBlock);
+ // If suspend will destroy coroutine, skip the suspend intrinsic and go
+ // straight to cleanup
+ if (SuspendWillDestroyCoro) {
+ // Skip the suspend point and go directly to cleanup
+ CGF.EmitBranch(CleanupBlock);
+ } else {
+ // Emit the suspend point.
+ const bool IsFinalSuspend = (Kind == AwaitKind::Final);
+ llvm::Function *CoroSuspend =
+ CGF.CGM.getIntrinsic(llvm::Intrinsic::coro_suspend);
+ auto *SuspendResult = Builder.CreateCall(
+ CoroSuspend, {SaveCall, Builder.getInt1(IsFinalSuspend)});
+
+ // Create a switch capturing three possible continuations.
+ auto *Switch = Builder.CreateSwitch(SuspendResult, Coro.SuspendBB, 2);
+ Switch->addCase(Builder.getInt8(0), ReadyBlock);
+ Switch->addCase(Builder.getInt8(1), CleanupBlock);
+ }
// Emit cleanup for this suspend point.
CGF.EmitBlock(CleanupBlock);
@@ -411,10 +419,9 @@ static QualType getCoroutineSuspendExprReturnType(const ASTContext &Ctx,
}
#endif
-llvm::Function *
-CodeGenFunction::generateAwaitSuspendWrapper(Twine const &CoroName,
- Twine const &SuspendPointName,
- CoroutineSuspendExpr const &S) {
+llvm::Function *CodeGenFunction::generateAwaitSuspendWrapper(
+ Twine const &CoroName, Twine const &SuspendPointName,
+ CoroutineSuspendExpr const &S, bool &SuspendWillDestroyCoro) {
std::string FuncName =
(CoroName + ".__await_suspend_wrapper__" + SuspendPointName).str();
@@ -445,6 +452,30 @@ CodeGenFunction::generateAwaitSuspendWrapper(Twine const &CoroName,
Fn->setMustProgress();
Fn->addFnAttr(llvm::Attribute::AttrKind::AlwaysInline);
+ // Is the underlying `await_suspend()` marked `coro_destroyed_on_suspend`?
+ SuspendWillDestroyCoro = false;
+ // Only check for the attribute if the suspend expression returns void. When
+ // a handle is returned, the outermost method call is `.address()`, and
+ // there's no point in doing more work to obtain the inner `await_suspend`.
+ if (S.getSuspendReturnType() == CoroutineSuspendExpr::SuspendReturnType::SuspendVoid) {
+ if (auto *CE = dyn_cast<CallExpr>(S.getSuspendExpr()->IgnoreImplicit())) {
+ if (auto *ME = dyn_cast<MemberExpr>(CE->getCallee())) {
+ if (auto *MethodDecl = dyn_cast<CXXMethodDecl>(ME->getMemberDecl())) {
+ if (MethodDecl->getNameAsString() == "await_suspend") {
+ for (const auto *Attr : MethodDecl->attrs()) {
+ if (Attr->getKind() == attr::Kind::CoroDestroyedOnSuspend) {
+ SuspendWillDestroyCoro = true;
+ break;
+ }
+ }
+ } else {
+ assert(false && "Unexpected method for [[clang::coro_destroyed_on_suspend]]");
+ }
+ }
+ }
+ }
+ }
+
StartFunction(GlobalDecl(), ReturnTy, Fn, FI, args);
// FIXME: add TBAA metadata to the loads
diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h
index 6c32c98cec011..ecb5d8e2fcf8c 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -383,7 +383,8 @@ class CodeGenFunction : public CodeGenTypeCache {
// where type is one of (void, i1, ptr)
llvm::Function *generateAwaitSuspendWrapper(Twine const &CoroName,
Twine const &SuspendPointName,
- CoroutineSuspendExpr const &S);
+ CoroutineSuspendExpr const &S,
+ bool &SuspendWillDestroyCoro);
/// CurGD - The GlobalDecl for the current function being compiled.
GlobalDecl CurGD;
diff --git a/clang/test/CodeGenCoroutines/coro-destroyed-on-suspend.cpp b/clang/test/CodeGenCoroutines/coro-destroyed-on-suspend.cpp
new file mode 100644
index 0000000000000..99505c317e24e
--- /dev/null
+++ b/clang/test/CodeGenCoroutines/coro-destroyed-on-suspend.cpp
@@ -0,0 +1,152 @@
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -emit-llvm -o - %s \
+// RUN: -disable-llvm-passes | FileCheck %s --check-prefix=CHECK-INITIAL
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -emit-llvm -o - %s \
+// RUN: -O2 | FileCheck %s --check-prefix=CHECK-OPTIMIZED
+
+#include "Inputs/coroutine.h"
+
+// Awaitable with `coro_destroyed_on_suspend` attribute
+struct DestroyingAwaitable {
+ bool await_ready() { return false; }
+ [[clang::coro_destroyed_on_suspend]]
+ void await_suspend(std::coroutine_handle<> h) {
+ h.destroy();
+ }
+ void await_resume() {}
+};
+
+// Awaitable without `coro_destroyed_on_suspend` (normal behavior)
+struct NormalAwaitable {
+ bool await_ready() { return false; }
+ void await_suspend(std::coroutine_handle<> h) {}
+ void await_resume() {}
+};
+
+// Coroutine type with `std::suspend_never` for initial/final suspend
+struct Task {
+ struct promise_type {
+ Task get_return_object() { return {}; }
+ std::suspend_never initial_suspend() { return {}; }
+ std::suspend_never final_suspend() noexcept { return {}; }
+ void return_void() {}
+ void unhandled_exception() {}
+ };
+};
+
+// Single co_await with coro_destroyed_on_suspend.
+// Should result in no allocation after optimization.
+Task test_single_destroying_await() {
+ co_await DestroyingAwaitable{};
+}
+
+// CHECK-INITIAL-LABEL: define{{.*}} void @_Z28test_single_destroying_awaitv
+// CHECK-INITIAL: call{{.*}} @llvm.coro.alloc
+// CHECK-INITIAL: call{{.*}} @llvm.coro.begin
+
+// CHECK-OPTIMIZED-LABEL: define{{.*}} void @_Z28test_single_destroying_awaitv
+// CHECK-OPTIMIZED-NOT: call{{.*}} @llvm.coro.alloc
+// CHECK-OPTIMIZED-NOT: call{{.*}} malloc
+// CHECK-OPTIMIZED-NOT: call{{.*}} @_Znwm
+
+// Test multiple `co_await`s, all with `coro_destroyed_on_suspend`.
+// This should also result in no allocation after optimization.
+Task test_multiple_destroying_awaits(bool condition) {
+ co_await DestroyingAwaitable{};
+ co_await DestroyingAwaitable{};
+ if (condition) {
+ co_await DestroyingAwaitable{};
+ }
+}
+
+// CHECK-INITIAL-LABEL: define{{.*}} void @_Z31test_multiple_destroying_awaitsb
+// CHECK-INITIAL: call{{.*}} @llvm.coro.alloc
+// CHECK-INITIAL: call{{.*}} @llvm.coro.begin
+
+// CHECK-OPTIMIZED-LABEL: define{{.*}} void @_Z31test_multiple_destroying_awaitsb
+// CHECK-OPTIMIZED-NOT: call{{.*}} @llvm.coro.alloc
+// CHECK-OPTIMIZED-NOT: call{{.*}} malloc
+// CHECK-OPTIMIZED-NOT: call{{.*}} @_Znwm
+
+// Mixed awaits - some with `coro_destroyed_on_suspend`, some without.
+// We should still see allocation because not all awaits destroy the coroutine.
+Task test_mixed_awaits() {
+ co_await NormalAwaitable{}; // Must precede "destroy" to be reachable
+ co_await DestroyingAwaitable{};
+}
+
+// CHECK-INITIAL-LABEL: define{{.*}} void @_Z17test_mixed_awaitsv
+// CHECK-INITIAL: call{{.*}} @llvm.coro.alloc
+// CHECK-INITIAL: call{{.*}} @llvm.coro.begin
+
+// CHECK-OPTIMIZED-LABEL: define{{.*}} void @_Z17test_mixed_awaitsv
+// CHECK-OPTIMIZED: call{{.*}} @_Znwm
+
+
+// Check the attribute detection affects control flow. With
+// `coro_destroyed_on_suspend`, the suspend point should be skipped and control
+// should go directly to cleanup instead of the normal suspend logic
+Task test_attribute_detection() {
+ co_await DestroyingAwaitable{};
+ // Unreachable in OPTIMIZED, so those builds don't see an allocation.
+ co_await NormalAwaitable{};
+}
+
+// Check that we skip the normal suspend intrinsic and go directly to cleanup.
+// This is the key behavioral difference the attribute enables.
+//
+// CHECK-INITIAL-LABEL: define{{.*}} void @_Z24test_attribute_detectionv
+// CHECK-INITIAL: call{{.*}} @_Z24test_attribute_detectionv.__await_suspend_wrapper__await
+// CHECK-NEXT: br label %await.cleanup
+// CHECK-INITIAL-NOT: call{{.*}} @llvm.coro.suspend
+// CHECK-INITIAL: call{{.*}} @_Z24test_attribute_detectionv.__await_suspend_wrapper__await
+// CHECK-INITIAL: call{{.*}} @llvm.coro.suspend
+// CHECK-INITIAL: call{{.*}} @_Z24test_attribute_detectionv.__await_suspend_wrapper__final
+
+// Since `co_await DestroyingAwaitable{}` gets converted into an unconditional
+// branch, the `co_await NormalAwaitable{}` is unreachable in optimized builds.
+//
+// CHECK-OPTIMIZED-NOT: call{{.*}} @llvm.coro.alloc
+// CHECK-OPTIMIZED-NOT: call{{.*}} malloc
+// CHECK-OPTIMIZED-NOT: call{{.*}} @_Znwm
+
+// Template awaitable with `coro_destroyed_on_suspend` attribute
+template<typename T>
+struct TemplateDestroyingAwaitable {
+ bool await_ready() { return false; }
+
+ [[clang::coro_destroyed_on_suspend]]
+ void await_suspend(std::coroutine_handle<> h) {
+ h.destroy();
+ }
+
+ void await_resume() {}
+};
+
+Task test_template_destroying_await() {
+ co_await TemplateDestroyingAwaitable<int>{};
+}
+
+// CHECK-OPTIMIZED-LABEL: define{{.*}} void @_Z30test_template_destroying_awaitv
+// CHECK-OPTIMIZED-NOT: call{{.*}} @llvm.coro.alloc
+// CHECK-OPTIMIZED-NOT: call{{.*}} malloc
+// CHECK-OPTIMIZED-NOT: call{{.*}} @_Znwm
+
+struct InvalidDestroyingAwaitable {
+ bool await_ready() { return false; }
+
+ // The attribute is ignored, since the function doesn't return `void`!
+ [[clang::coro_destroyed_on_suspend]]
+ bool await_suspend(std::coroutine_handle<> h) {
+ h.destroy();
+ return true;
+ }
+
+ void await_resume() {}
+};
+
+Task test_invalid_destroying_await() {
+ co_await InvalidDestroyingAwaitable{};
+}
+
+// CHECK-OPTIMIZED-LABEL: define{{.*}} void @_Z29test_invalid_destroying_awaitv
+// CHECK-OPTIMIZED: call{{.*}} @_Znwm
diff --git a/clang/test/Misc/pragma-attribute-supported-attributes-list.test b/clang/test/Misc/pragma-attribute-supported-attributes-list.test
index 05693538252aa..a02b97869adc9 100644
--- a/clang/test/Misc/pragma-attribute-supported-attributes-list.test
+++ b/clang/test/Misc/pragma-attribute-supported-attributes-list.test
@@ -62,6 +62,7 @@
// CHECK-NEXT: Convergent (SubjectMatchRule_function)
// CHECK-NEXT: CoroAwaitElidable (SubjectMatchRule_record)
// CHECK-NEXT: CoroAwaitElidableArgument (SubjectMatchRule_variable_is_parameter)
+// CHECK-NEXT: CoroDestroyedOnSuspend (SubjectMatchRule_function_is_member)
// CHECK-NEXT: CoroDisableLifetimeBound (SubjectMatchRule_function)
// CHECK-NEXT: CoroLifetimeBound (SubjectMatchRule_record)
// CHECK-NEXT: CoroOnlyDestroyWhenComplete (SubjectMatchRule_record)
``````````
</details>
https://github.com/llvm/llvm-project/pull/152029
More information about the cfe-commits
mailing list