[clang] f786881 - [coroutine] Implement llvm.coro.await.suspend intrinsic (#79712)

via cfe-commits cfe-commits at lists.llvm.org
Sun Mar 10 19:00:08 PDT 2024


Author: fpasserby
Date: 2024-03-11T10:00:00+08:00
New Revision: f78688134026686288a8d310b493d9327753a022

URL: https://github.com/llvm/llvm-project/commit/f78688134026686288a8d310b493d9327753a022
DIFF: https://github.com/llvm/llvm-project/commit/f78688134026686288a8d310b493d9327753a022.diff

LOG: [coroutine] Implement llvm.coro.await.suspend intrinsic (#79712)

Implement `llvm.coro.await.suspend` intrinsics, to deal with performance
regression after prohibiting `.await_suspend` inlining, as suggested in
#64945.
Actually, there are three new intrinsics, which directly correspond to
each of three forms of `await_suspend`:
```
void llvm.coro.await.suspend.void(ptr %awaiter, ptr %frame, ptr @wrapperFunction)
i1 llvm.coro.await.suspend.bool(ptr %awaiter, ptr %frame, ptr @wrapperFunction)
ptr llvm.coro.await.suspend.handle(ptr %awaiter, ptr %frame, ptr @wrapperFunction)
```
There are three different versions instead of one, because in `bool`
case it's result is used for resuming via a branch, and in
`coroutine_handle` case exceptions from `await_suspend` are handled in
the coroutine, and exceptions from the subsequent `.resume()` are
propagated to the caller.

Await-suspend block is simplified down to intrinsic calls only, for
example for symmetric transfer:
```
%id = call token @llvm.coro.save(ptr null)
%handle = call ptr @llvm.coro.await.suspend.handle(ptr %awaiter, ptr %frame, ptr @wrapperFunction)
call void @llvm.coro.resume(%handle)
%result = call i8 @llvm.coro.suspend(token %id, i1 false)
switch i8 %result, ...
```
All await-suspend logic is moved out into a wrapper function, generated
for each suspension point.
The signature of the function is `<type> wrapperFunction(ptr %awaiter,
ptr %frame)` where `<type>` is one of `void` `i1` or `ptr`, depending on
the return type of `await_suspend`.
Intrinsic calls are lowered during `CoroSplit` pass, right after the
split.

Because I'm new to LLVM, I'm not sure if the helper function generation,
calls to them and lowering are implemented in the right way, especially
with regard to various metadata and attributes, i. e. for TBAA. All
things that seemed questionable are marked with `FIXME` comments.

There is another detail: in case of symmetric transfer raw pointer to
the frame of coroutine, that should be resumed, is returned from the
helper function and a direct call to `@llvm.coro.resume` is generated.
C++ standard demands, that `.resume()` method is evaluated. Not sure how
important is this, because code has been generated in the same way
before, sans helper function.

Added: 
    llvm/test/Transforms/Coroutines/coro-await-suspend-lower-invoke.ll
    llvm/test/Transforms/Coroutines/coro-await-suspend-lower.ll

Modified: 
    clang/include/clang/AST/ExprCXX.h
    clang/lib/CodeGen/CGCoroutine.cpp
    clang/lib/CodeGen/CodeGenFunction.h
    clang/lib/Sema/SemaCoroutine.cpp
    clang/test/AST/coroutine-locals-cleanup.cpp
    clang/test/CodeGenCoroutines/coro-always-inline.cpp
    clang/test/CodeGenCoroutines/coro-await.cpp
    clang/test/CodeGenCoroutines/coro-dwarf.cpp
    clang/test/CodeGenCoroutines/coro-function-try-block.cpp
    clang/test/CodeGenCoroutines/coro-symmetric-transfer-01.cpp
    clang/test/CodeGenCoroutines/coro-symmetric-transfer-02.cpp
    clang/test/CodeGenCoroutines/pr56329.cpp
    clang/test/CodeGenCoroutines/pr59181.cpp
    clang/test/CodeGenCoroutines/pr65054.cpp
    llvm/docs/Coroutines.rst
    llvm/include/llvm/IR/Intrinsics.td
    llvm/lib/IR/Verifier.cpp
    llvm/lib/Transforms/Coroutines/CoroInstr.h
    llvm/lib/Transforms/Coroutines/CoroInternal.h
    llvm/lib/Transforms/Coroutines/CoroSplit.cpp
    llvm/lib/Transforms/Coroutines/Coroutines.cpp

Removed: 
    clang/test/CodeGenCoroutines/coro-awaiter-noinline-suspend.cpp


################################################################################
diff  --git a/clang/include/clang/AST/ExprCXX.h b/clang/include/clang/AST/ExprCXX.h
index a0e467b35778c5..6003b866c9f564 100644
--- a/clang/include/clang/AST/ExprCXX.h
+++ b/clang/include/clang/AST/ExprCXX.h
@@ -5038,6 +5038,9 @@ class CoroutineSuspendExpr : public Expr {
   OpaqueValueExpr *OpaqueValue = nullptr;
 
 public:
+  // These types correspond to the three C++ 'await_suspend' return variants
+  enum class SuspendReturnType { SuspendVoid, SuspendBool, SuspendHandle };
+
   CoroutineSuspendExpr(StmtClass SC, SourceLocation KeywordLoc, Expr *Operand,
                        Expr *Common, Expr *Ready, Expr *Suspend, Expr *Resume,
                        OpaqueValueExpr *OpaqueValue)
@@ -5097,6 +5100,24 @@ class CoroutineSuspendExpr : public Expr {
     return static_cast<Expr *>(SubExprs[SubExpr::Operand]);
   }
 
+  SuspendReturnType getSuspendReturnType() const {
+    auto *SuspendExpr = getSuspendExpr();
+    assert(SuspendExpr);
+
+    auto SuspendType = SuspendExpr->getType();
+
+    if (SuspendType->isVoidType())
+      return SuspendReturnType::SuspendVoid;
+    if (SuspendType->isBooleanType())
+      return SuspendReturnType::SuspendBool;
+
+    // Void pointer is the type of handle.address(), which is returned
+    // from the await suspend wrapper so that the temporary coroutine handle
+    // value won't go to the frame by mistake
+    assert(SuspendType->isVoidPointerType());
+    return SuspendReturnType::SuspendHandle;
+  }
+
   SourceLocation getKeywordLoc() const { return KeywordLoc; }
 
   SourceLocation getBeginLoc() const LLVM_READONLY { return KeywordLoc; }

diff  --git a/clang/lib/CodeGen/CGCoroutine.cpp b/clang/lib/CodeGen/CGCoroutine.cpp
index 888d30bfb3e1d6..b7142ec08af986 100644
--- a/clang/lib/CodeGen/CGCoroutine.cpp
+++ b/clang/lib/CodeGen/CGCoroutine.cpp
@@ -141,7 +141,7 @@ static bool FunctionCanThrow(const FunctionDecl *D) {
          Proto->canThrow() != CT_Cannot;
 }
 
-static bool ResumeStmtCanThrow(const Stmt *S) {
+static bool StmtCanThrow(const Stmt *S) {
   if (const auto *CE = dyn_cast<CallExpr>(S)) {
     const auto *Callee = CE->getDirectCallee();
     if (!Callee)
@@ -167,7 +167,7 @@ static bool ResumeStmtCanThrow(const Stmt *S) {
   }
 
   for (const auto *child : S->children())
-    if (ResumeStmtCanThrow(child))
+    if (StmtCanThrow(child))
       return true;
 
   return false;
@@ -178,18 +178,31 @@ static bool ResumeStmtCanThrow(const Stmt *S) {
 //   auto && x = CommonExpr();
 //   if (!x.await_ready()) {
 //      llvm_coro_save();
-//      x.await_suspend(...);     (*)
-//      llvm_coro_suspend(); (**)
+//      llvm_coro_await_suspend(&x, frame, wrapper) (*) (**)
+//      llvm_coro_suspend(); (***)
 //   }
 //   x.await_resume();
 //
 // where the result of the entire expression is the result of x.await_resume()
 //
-//   (*) If x.await_suspend return type is bool, it allows to veto a suspend:
+//   (*) llvm_coro_await_suspend_{void, bool, handle} is lowered to
+//      wrapper(&x, frame) when it's certain not to interfere with
+//      coroutine transform. await_suspend expression is
+//      asynchronous to the coroutine body and not all analyses
+//      and transformations can handle it correctly at the moment.
+//
+//      Wrapper function encapsulates x.await_suspend(...) call and looks like:
+//
+//      auto __await_suspend_wrapper(auto& awaiter, void* frame) {
+//        std::coroutine_handle<> handle(frame);
+//        return awaiter.await_suspend(handle);
+//      }
+//
+//  (**) If x.await_suspend return type is bool, it allows to veto a suspend:
 //      if (x.await_suspend(...))
 //        llvm_coro_suspend();
 //
-//  (**) llvm_coro_suspend() encodes three possible continuations as
+//  (***) llvm_coro_suspend() encodes three possible continuations as
 //       a switch instruction:
 //
 //  %where-to = call i8 @llvm.coro.suspend(...)
@@ -212,9 +225,10 @@ static LValueOrRValue emitSuspendExpression(CodeGenFunction &CGF, CGCoroData &Co
                                     bool ignoreResult, bool forLValue) {
   auto *E = S.getCommonExpr();
 
-  auto Binder =
+  auto CommonBinder =
       CodeGenFunction::OpaqueValueMappingData::bind(CGF, S.getOpaqueValue(), E);
-  auto UnbindOnExit = llvm::make_scope_exit([&] { Binder.unbind(CGF); });
+  auto UnbindCommonOnExit =
+      llvm::make_scope_exit([&] { CommonBinder.unbind(CGF); });
 
   auto Prefix = buildSuspendPrefixStr(Coro, Kind);
   BasicBlock *ReadyBlock = CGF.createBasicBlock(Prefix + Twine(".ready"));
@@ -232,16 +246,73 @@ static LValueOrRValue emitSuspendExpression(CodeGenFunction &CGF, CGCoroData &Co
   auto *NullPtr = llvm::ConstantPointerNull::get(CGF.CGM.Int8PtrTy);
   auto *SaveCall = Builder.CreateCall(CoroSave, {NullPtr});
 
+  auto SuspendWrapper = CodeGenFunction(CGF.CGM).generateAwaitSuspendWrapper(
+      CGF.CurFn->getName(), Prefix, S);
+
   CGF.CurCoro.InSuspendBlock = true;
-  auto *SuspendRet = CGF.EmitScalarExpr(S.getSuspendExpr());
+
+  assert(CGF.CurCoro.Data && CGF.CurCoro.Data->CoroBegin &&
+         "expected to be called in coroutine context");
+
+  SmallVector<llvm::Value *, 3> SuspendIntrinsicCallArgs;
+  SuspendIntrinsicCallArgs.push_back(
+      CGF.getOrCreateOpaqueLValueMapping(S.getOpaqueValue()).getPointer(CGF));
+
+  SuspendIntrinsicCallArgs.push_back(CGF.CurCoro.Data->CoroBegin);
+  SuspendIntrinsicCallArgs.push_back(SuspendWrapper);
+
+  const auto SuspendReturnType = S.getSuspendReturnType();
+  llvm::Intrinsic::ID AwaitSuspendIID;
+
+  switch (SuspendReturnType) {
+  case CoroutineSuspendExpr::SuspendReturnType::SuspendVoid:
+    AwaitSuspendIID = llvm::Intrinsic::coro_await_suspend_void;
+    break;
+  case CoroutineSuspendExpr::SuspendReturnType::SuspendBool:
+    AwaitSuspendIID = llvm::Intrinsic::coro_await_suspend_bool;
+    break;
+  case CoroutineSuspendExpr::SuspendReturnType::SuspendHandle:
+    AwaitSuspendIID = llvm::Intrinsic::coro_await_suspend_handle;
+    break;
+  }
+
+  llvm::Function *AwaitSuspendIntrinsic = CGF.CGM.getIntrinsic(AwaitSuspendIID);
+
+  const auto AwaitSuspendCanThrow = StmtCanThrow(S.getSuspendExpr());
+
+  llvm::CallBase *SuspendRet = nullptr;
+  // FIXME: add call attributes?
+  if (AwaitSuspendCanThrow)
+    SuspendRet =
+        CGF.EmitCallOrInvoke(AwaitSuspendIntrinsic, SuspendIntrinsicCallArgs);
+  else
+    SuspendRet = CGF.EmitNounwindRuntimeCall(AwaitSuspendIntrinsic,
+                                             SuspendIntrinsicCallArgs);
+
+  assert(SuspendRet);
   CGF.CurCoro.InSuspendBlock = false;
 
-  if (SuspendRet != nullptr && SuspendRet->getType()->isIntegerTy(1)) {
+  switch (SuspendReturnType) {
+  case CoroutineSuspendExpr::SuspendReturnType::SuspendVoid:
+    assert(SuspendRet->getType()->isVoidTy());
+    break;
+  case CoroutineSuspendExpr::SuspendReturnType::SuspendBool: {
+    assert(SuspendRet->getType()->isIntegerTy());
+
     // Veto suspension if requested by bool returning await_suspend.
     BasicBlock *RealSuspendBlock =
         CGF.createBasicBlock(Prefix + Twine(".suspend.bool"));
     CGF.Builder.CreateCondBr(SuspendRet, RealSuspendBlock, ReadyBlock);
     CGF.EmitBlock(RealSuspendBlock);
+    break;
+  }
+  case CoroutineSuspendExpr::SuspendReturnType::SuspendHandle: {
+    assert(SuspendRet->getType()->isPointerTy());
+
+    auto ResumeIntrinsic = CGF.CGM.getIntrinsic(llvm::Intrinsic::coro_resume);
+    Builder.CreateCall(ResumeIntrinsic, SuspendRet);
+    break;
+  }
   }
 
   // Emit the suspend point.
@@ -267,7 +338,7 @@ static LValueOrRValue emitSuspendExpression(CodeGenFunction &CGF, CGCoroData &Co
   // is marked as 'noexcept', we avoid generating this additional IR.
   CXXTryStmt *TryStmt = nullptr;
   if (Coro.ExceptionHandler && Kind == AwaitKind::Init &&
-      ResumeStmtCanThrow(S.getResumeExpr())) {
+      StmtCanThrow(S.getResumeExpr())) {
     Coro.ResumeEHVar =
         CGF.CreateTempAlloca(Builder.getInt1Ty(), Prefix + Twine("resume.eh"));
     Builder.CreateFlagStore(true, Coro.ResumeEHVar);
@@ -338,6 +409,69 @@ static QualType getCoroutineSuspendExprReturnType(const ASTContext &Ctx,
 }
 #endif
 
+llvm::Function *
+CodeGenFunction::generateAwaitSuspendWrapper(Twine const &CoroName,
+                                             Twine const &SuspendPointName,
+                                             CoroutineSuspendExpr const &S) {
+  std::string FuncName = "__await_suspend_wrapper_";
+  FuncName += CoroName.str();
+  FuncName += '_';
+  FuncName += SuspendPointName.str();
+
+  ASTContext &C = getContext();
+
+  FunctionArgList args;
+
+  ImplicitParamDecl AwaiterDecl(C, C.VoidPtrTy, ImplicitParamKind::Other);
+  ImplicitParamDecl FrameDecl(C, C.VoidPtrTy, ImplicitParamKind::Other);
+  QualType ReturnTy = S.getSuspendExpr()->getType();
+
+  args.push_back(&AwaiterDecl);
+  args.push_back(&FrameDecl);
+
+  const CGFunctionInfo &FI =
+      CGM.getTypes().arrangeBuiltinFunctionDeclaration(ReturnTy, args);
+
+  llvm::FunctionType *LTy = CGM.getTypes().GetFunctionType(FI);
+
+  llvm::Function *Fn = llvm::Function::Create(
+      LTy, llvm::GlobalValue::PrivateLinkage, FuncName, &CGM.getModule());
+
+  Fn->addParamAttr(0, llvm::Attribute::AttrKind::NonNull);
+  Fn->addParamAttr(0, llvm::Attribute::AttrKind::NoUndef);
+
+  Fn->addParamAttr(1, llvm::Attribute::AttrKind::NoUndef);
+
+  Fn->setMustProgress();
+  Fn->addFnAttr(llvm::Attribute::AttrKind::AlwaysInline);
+
+  StartFunction(GlobalDecl(), ReturnTy, Fn, FI, args);
+
+  // FIXME: add TBAA metadata to the loads
+  llvm::Value *AwaiterPtr = Builder.CreateLoad(GetAddrOfLocalVar(&AwaiterDecl));
+  auto AwaiterLValue =
+      MakeNaturalAlignAddrLValue(AwaiterPtr, AwaiterDecl.getType());
+
+  CurAwaitSuspendWrapper.FramePtr =
+      Builder.CreateLoad(GetAddrOfLocalVar(&FrameDecl));
+
+  auto AwaiterBinder = CodeGenFunction::OpaqueValueMappingData::bind(
+      *this, S.getOpaqueValue(), AwaiterLValue);
+
+  auto *SuspendRet = EmitScalarExpr(S.getSuspendExpr());
+
+  auto UnbindCommonOnExit =
+      llvm::make_scope_exit([&] { AwaiterBinder.unbind(*this); });
+  if (SuspendRet != nullptr) {
+    Fn->addRetAttr(llvm::Attribute::AttrKind::NoUndef);
+    Builder.CreateStore(SuspendRet, ReturnValue);
+  }
+
+  CurAwaitSuspendWrapper.FramePtr = nullptr;
+  FinishFunction();
+  return Fn;
+}
+
 LValue
 CodeGenFunction::EmitCoawaitLValue(const CoawaitExpr *E) {
   assert(getCoroutineSuspendExprReturnType(getContext(), E)->isReferenceType() &&
@@ -834,6 +968,11 @@ RValue CodeGenFunction::EmitCoroutineIntrinsic(const CallExpr *E,
     if (CurCoro.Data && CurCoro.Data->CoroBegin) {
       return RValue::get(CurCoro.Data->CoroBegin);
     }
+
+    if (CurAwaitSuspendWrapper.FramePtr) {
+      return RValue::get(CurAwaitSuspendWrapper.FramePtr);
+    }
+
     CGM.Error(E->getBeginLoc(), "this builtin expect that __builtin_coro_begin "
                                 "has been used earlier in this function");
     auto *NullPtr = llvm::ConstantPointerNull::get(Builder.getPtrTy());

diff  --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h
index 06327a18471778..6c825a302913df 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -352,6 +352,25 @@ class CodeGenFunction : public CodeGenTypeCache {
     return isCoroutine() && CurCoro.InSuspendBlock;
   }
 
+  // Holds FramePtr for await_suspend wrapper generation,
+  // so that __builtin_coro_frame call can be lowered
+  // directly to value of its second argument
+  struct AwaitSuspendWrapperInfo {
+    llvm::Value *FramePtr = nullptr;
+  };
+  AwaitSuspendWrapperInfo CurAwaitSuspendWrapper;
+
+  // Generates wrapper function for `llvm.coro.await.suspend.*` intrinisics.
+  // It encapsulates SuspendExpr in a function, to separate it's body
+  // from the main coroutine to avoid miscompilations. Intrinisic
+  // is lowered to this function call in CoroSplit pass
+  // Function signature is:
+  // <type> __await_suspend_wrapper_<name>(ptr %awaiter, ptr %hdl)
+  // where type is one of (void, i1, ptr)
+  llvm::Function *generateAwaitSuspendWrapper(Twine const &CoroName,
+                                              Twine const &SuspendPointName,
+                                              CoroutineSuspendExpr const &S);
+
   /// CurGD - The GlobalDecl for the current function being compiled.
   GlobalDecl CurGD;
 

diff  --git a/clang/lib/Sema/SemaCoroutine.cpp b/clang/lib/Sema/SemaCoroutine.cpp
index 301a5ff72a3b2a..5206fc7621c7cd 100644
--- a/clang/lib/Sema/SemaCoroutine.cpp
+++ b/clang/lib/Sema/SemaCoroutine.cpp
@@ -348,99 +348,15 @@ static Expr *maybeTailCall(Sema &S, QualType RetType, Expr *E,
 
   Expr *JustAddress = AddressExpr.get();
 
-  // FIXME: Without optimizations, the temporary result from `await_suspend()`
-  // may be put on the coroutine frame since the coroutine frame constructor
-  // will think the temporary variable will escape from the
-  // `coroutine_handle<>::address()` call. This is problematic since the
-  // coroutine should be considered to be suspended after it enters
-  // `await_suspend` so it shouldn't access/update the coroutine frame after
-  // that.
-  //
-  // See https://github.com/llvm/llvm-project/issues/65054 for the report.
-  //
-  // The long term solution may wrap the whole logic about `await-suspend`
-  // into a standalone function. This is similar to the proposed solution
-  // in tryMarkAwaitSuspendNoInline. See the comments there for details.
-  //
-  // The short term solution here is to mark `coroutine_handle<>::address()`
-  // function as always-inline so that the coroutine frame constructor won't
-  // think the temporary result is escaped incorrectly.
-  if (auto *FD = cast<CallExpr>(JustAddress)->getDirectCallee())
-    if (!FD->hasAttr<AlwaysInlineAttr>() && !FD->hasAttr<NoInlineAttr>())
-      FD->addAttr(AlwaysInlineAttr::CreateImplicit(S.getASTContext(),
-                                                   FD->getLocation()));
-
   // Check that the type of AddressExpr is void*
   if (!JustAddress->getType().getTypePtr()->isVoidPointerType())
     S.Diag(cast<CallExpr>(JustAddress)->getCalleeDecl()->getLocation(),
            diag::warn_coroutine_handle_address_invalid_return_type)
         << JustAddress->getType();
 
-  // Clean up temporary objects so that they don't live across suspension points
-  // unnecessarily. We choose to clean up before the call to
-  // __builtin_coro_resume so that the cleanup code are not inserted in-between
-  // the resume call and return instruction, which would interfere with the
-  // musttail call contract.
-  JustAddress = S.MaybeCreateExprWithCleanups(JustAddress);
-  return S.BuildBuiltinCallExpr(Loc, Builtin::BI__builtin_coro_resume,
-                                JustAddress);
-}
-
-/// The await_suspend call performed by co_await is essentially asynchronous
-/// to the execution of the coroutine. Inlining it normally into an unsplit
-/// coroutine can cause miscompilation because the coroutine CFG misrepresents
-/// the true control flow of the program: things that happen in the
-/// await_suspend are not guaranteed to happen prior to the resumption of the
-/// coroutine, and things that happen after the resumption of the coroutine
-/// (including its exit and the potential deallocation of the coroutine frame)
-/// are not guaranteed to happen only after the end of await_suspend.
-///
-/// See https://github.com/llvm/llvm-project/issues/56301 and
-/// https://reviews.llvm.org/D157070 for the example and the full discussion.
-///
-/// The short-term solution to this problem is to mark the call as uninlinable.
-/// But we don't want to do this if the call is known to be trivial, which is
-/// very common.
-///
-/// The long-term solution may introduce patterns like:
-///
-///  call @llvm.coro.await_suspend(ptr %awaiter, ptr %handle,
-///                                ptr @awaitSuspendFn)
-///
-/// Then it is much easier to perform the safety analysis in the middle end.
-/// If it is safe to inline the call to awaitSuspend, we can replace it in the
-/// CoroEarly pass. Otherwise we could replace it in the CoroSplit pass.
-static void tryMarkAwaitSuspendNoInline(Sema &S, OpaqueValueExpr *Awaiter,
-                                        CallExpr *AwaitSuspend) {
-  // The method here to extract the awaiter decl is not precise.
-  // This is intentional. Since it is hard to perform the analysis in the
-  // frontend due to the complexity of C++'s type systems.
-  // And we prefer to perform such analysis in the middle end since it is
-  // easier to implement and more powerful.
-  CXXRecordDecl *AwaiterDecl =
-      Awaiter->getType().getNonReferenceType()->getAsCXXRecordDecl();
-
-  if (AwaiterDecl && AwaiterDecl->field_empty())
-    return;
-
-  FunctionDecl *FD = AwaitSuspend->getDirectCallee();
-
-  assert(FD);
-
-  // If the `await_suspend()` function is marked as `always_inline` explicitly,
-  // we should give the user the right to control the codegen.
-  if (FD->hasAttr<NoInlineAttr>() || FD->hasAttr<AlwaysInlineAttr>())
-    return;
-
-  // This is problematic if the user calls the await_suspend standalone. But on
-  // the on hand, it is not incorrect semantically since inlining is not part
-  // of the standard. On the other hand, it is relatively rare to call
-  // the await_suspend function standalone.
-  //
-  // And given we've already had the long-term plan, the current workaround
-  // looks relatively tolerant.
-  FD->addAttr(
-      NoInlineAttr::CreateImplicit(S.getASTContext(), FD->getLocation()));
+  // Clean up temporary objects, because the resulting expression
+  // will become the body of await_suspend wrapper.
+  return S.MaybeCreateExprWithCleanups(JustAddress);
 }
 
 /// Build calls to await_ready, await_suspend, and await_resume for a co_await
@@ -514,10 +430,6 @@ static ReadySuspendResumeResult buildCoawaitCalls(Sema &S, VarDecl *CoroPromise,
     //     type Z.
     QualType RetType = AwaitSuspend->getCallReturnType(S.Context);
 
-    // We need to mark await_suspend as noinline temporarily. See the comment
-    // of tryMarkAwaitSuspendNoInline for details.
-    tryMarkAwaitSuspendNoInline(S, Operand, AwaitSuspend);
-
     // Support for coroutine_handle returning await_suspend.
     if (Expr *TailCallSuspend =
             maybeTailCall(S, RetType, AwaitSuspend, Loc))

diff  --git a/clang/test/AST/coroutine-locals-cleanup.cpp b/clang/test/AST/coroutine-locals-cleanup.cpp
index ce106b8e230a10..6264df01fa2acb 100644
--- a/clang/test/AST/coroutine-locals-cleanup.cpp
+++ b/clang/test/AST/coroutine-locals-cleanup.cpp
@@ -90,10 +90,7 @@ Task bar() {
 // CHECK:                 ExprWithCleanups {{.*}} 'bool'
 // CHECK-NEXT:              CXXMemberCallExpr {{.*}} 'bool'
 // CHECK-NEXT:                MemberExpr {{.*}} .await_ready
-// CHECK:                 CallExpr {{.*}} 'void'
-// CHECK-NEXT:              ImplicitCastExpr {{.*}} 'void (*)(void *)'
-// CHECK-NEXT:                DeclRefExpr {{.*}} '__builtin_coro_resume' 'void (void *)'
-// CHECK-NEXT:              ExprWithCleanups {{.*}} 'void *'
+// CHECK:                 ExprWithCleanups {{.*}} 'void *'
 
 // CHECK:           CaseStmt
 // CHECK:             ExprWithCleanups {{.*}} 'void'
@@ -103,7 +100,4 @@ Task bar() {
 // CHECK:                 ExprWithCleanups {{.*}} 'bool'
 // CHECK-NEXT:              CXXMemberCallExpr {{.*}} 'bool'
 // CHECK-NEXT:                MemberExpr {{.*}} .await_ready
-// CHECK:                 CallExpr {{.*}} 'void'
-// CHECK-NEXT:              ImplicitCastExpr {{.*}} 'void (*)(void *)'
-// CHECK-NEXT:                DeclRefExpr {{.*}} '__builtin_coro_resume' 'void (void *)'
-// CHECK-NEXT:              ExprWithCleanups {{.*}} 'void *'
+// CHECK:                 ExprWithCleanups {{.*}} 'void *'

diff  --git a/clang/test/CodeGenCoroutines/coro-always-inline.cpp b/clang/test/CodeGenCoroutines/coro-always-inline.cpp
index 6e13a62fbd9865..d4f67a73f51726 100644
--- a/clang/test/CodeGenCoroutines/coro-always-inline.cpp
+++ b/clang/test/CodeGenCoroutines/coro-always-inline.cpp
@@ -34,7 +34,7 @@ struct coroutine_traits {
 // CHECK-LABEL: @_Z3foov
 // CHECK-LABEL: entry:
 // CHECK: %ref.tmp.reload.addr = getelementptr
-// CHECK: %ref.tmp4.reload.addr = getelementptr
+// CHECK: %ref.tmp3.reload.addr = getelementptr
 void foo() { co_return; }
 
 // Check that bar is not inlined even it's marked as always_inline.

diff  --git a/clang/test/CodeGenCoroutines/coro-await.cpp b/clang/test/CodeGenCoroutines/coro-await.cpp
index dc5a765ccb83d7..75851d8805bb6e 100644
--- a/clang/test/CodeGenCoroutines/coro-await.cpp
+++ b/clang/test/CodeGenCoroutines/coro-await.cpp
@@ -71,16 +71,13 @@ extern "C" void f0() {
   // CHECK: [[SUSPEND_BB]]:
   // CHECK: %[[SUSPEND_ID:.+]] = call token @llvm.coro.save(
   // ---------------------------
-  // Build the coroutine handle and pass it to await_suspend
+  // Call coro.await.suspend
   // ---------------------------
-  // CHECK: call ptr @_ZNSt16coroutine_handleINSt16coroutine_traitsIJvEE12promise_typeEE12from_addressEPv(ptr %[[FRAME]])
-  //   ... many lines of code to coerce coroutine_handle into an ptr scalar
-  // CHECK: %[[CH:.+]] = load ptr, ptr %{{.+}}
-  // CHECK: call void @_ZN14suspend_always13await_suspendESt16coroutine_handleIvE(ptr {{[^,]*}} %[[AWAITABLE]], ptr %[[CH]])
+  // CHECK-NEXT: call void @llvm.coro.await.suspend.void(ptr %[[AWAITABLE]], ptr %[[FRAME]], ptr @__await_suspend_wrapper_f0_await)
   // -------------------------
   // Generate a suspend point:
   // -------------------------
-  // CHECK: %[[OUTCOME:.+]] = call i8 @llvm.coro.suspend(token %[[SUSPEND_ID]], i1 false)
+  // CHECK-NEXT: %[[OUTCOME:.+]] = call i8 @llvm.coro.suspend(token %[[SUSPEND_ID]], i1 false)
   // CHECK: switch i8 %[[OUTCOME]], label %[[RET_BB:.+]] [
   // CHECK:   i8 0, label %[[READY_BB]]
   // CHECK:   i8 1, label %[[CLEANUP_BB:.+]]
@@ -101,6 +98,17 @@ extern "C" void f0() {
   // CHECK-NEXT: call zeroext i1 @_ZN10final_susp11await_readyEv(ptr
   // CHECK: %[[FINALSP_ID:.+]] = call token @llvm.coro.save(
   // CHECK: call i8 @llvm.coro.suspend(token %[[FINALSP_ID]], i1 true)
+
+  // Await suspend wrapper
+  // CHECK: define{{.*}} @__await_suspend_wrapper_f0_await(ptr {{[^,]*}} %[[AWAITABLE_ARG:.+]], ptr {{[^,]*}} %[[FRAME_ARG:.+]])
+  // CHECK: store ptr %[[AWAITABLE_ARG]], ptr %[[AWAITABLE_TMP:.+]],
+  // CHECK: store ptr %[[FRAME_ARG]], ptr %[[FRAME_TMP:.+]],
+  // CHECK: %[[AWAITABLE:.+]] = load ptr, ptr %[[AWAITABLE_TMP]]
+  // CHECK: %[[FRAME:.+]] = load ptr, ptr %[[FRAME_TMP]]
+  // CHECK: call ptr @_ZNSt16coroutine_handleINSt16coroutine_traitsIJvEE12promise_typeEE12from_addressEPv(ptr %[[FRAME]])
+  //   ... many lines of code to coerce coroutine_handle into an ptr scalar
+  // CHECK: %[[CH:.+]] = load ptr, ptr %{{.+}}
+  // CHECK: call void @_ZN14suspend_always13await_suspendESt16coroutine_handleIvE(ptr {{[^,]*}} %[[AWAITABLE]], ptr %[[CH]])
 }
 
 struct suspend_maybe {
@@ -131,7 +139,7 @@ extern "C" void f1(int) {
 
   // See if we need to suspend:
   // --------------------------
-  // CHECK: %[[READY:.+]] = call zeroext i1 @_ZN13suspend_maybe11await_readyEv(ptr {{[^,]*}} %[[AWAITABLE]])
+  // CHECK: %[[READY:.+]] = call zeroext i1 @_ZN13suspend_maybe11await_readyEv(ptr {{[^,]*}} %[[AWAITABLE:.+]])
   // CHECK: br i1 %[[READY]], label %[[READY_BB:.+]], label %[[SUSPEND_BB:.+]]
 
   // If we are suspending:
@@ -139,12 +147,9 @@ extern "C" void f1(int) {
   // CHECK: [[SUSPEND_BB]]:
   // CHECK: %[[SUSPEND_ID:.+]] = call token @llvm.coro.save(
   // ---------------------------
-  // Build the coroutine handle and pass it to await_suspend
+  // Call coro.await.suspend
   // ---------------------------
-  // CHECK: call ptr @_ZNSt16coroutine_handleINSt16coroutine_traitsIJviEE12promise_typeEE12from_addressEPv(ptr %[[FRAME]])
-  //   ... many lines of code to coerce coroutine_handle into an ptr scalar
-  // CHECK: %[[CH:.+]] = load ptr, ptr %{{.+}}
-  // CHECK: %[[YES:.+]] = call zeroext i1 @_ZN13suspend_maybe13await_suspendESt16coroutine_handleIvE(ptr {{[^,]*}} %[[AWAITABLE]], ptr %[[CH]])
+  // CHECK-NEXT: %[[YES:.+]] = call i1 @llvm.coro.await.suspend.bool(ptr %[[AWAITABLE]], ptr %[[FRAME]], ptr @__await_suspend_wrapper_f1_yield)
   // -------------------------------------------
   // See if await_suspend decided not to suspend
   // -------------------------------------------
@@ -155,6 +160,18 @@ extern "C" void f1(int) {
 
   // CHECK: [[READY_BB]]:
   // CHECK:     call void @_ZN13suspend_maybe12await_resumeEv(ptr {{[^,]*}} %[[AWAITABLE]])
+
+  // Await suspend wrapper
+  // CHECK: define {{.*}} i1 @__await_suspend_wrapper_f1_yield(ptr {{[^,]*}} %[[AWAITABLE_ARG:.+]], ptr {{[^,]*}} %[[FRAME_ARG:.+]])
+  // CHECK: store ptr %[[AWAITABLE_ARG]], ptr %[[AWAITABLE_TMP:.+]],
+  // CHECK: store ptr %[[FRAME_ARG]], ptr %[[FRAME_TMP:.+]],
+  // CHECK: %[[AWAITABLE:.+]] = load ptr, ptr %[[AWAITABLE_TMP]]
+  // CHECK: %[[FRAME:.+]] = load ptr, ptr %[[FRAME_TMP]]
+  // CHECK: call ptr @_ZNSt16coroutine_handleINSt16coroutine_traitsIJviEE12promise_typeEE12from_addressEPv(ptr %[[FRAME]])
+  //   ... many lines of code to coerce coroutine_handle into an ptr scalar
+  // CHECK: %[[CH:.+]] = load ptr, ptr %{{.+}}
+  // CHECK: %[[YES:.+]] = call zeroext i1 @_ZN13suspend_maybe13await_suspendESt16coroutine_handleIvE(ptr {{[^,]*}} %[[AWAITABLE]], ptr %[[CH]]) 
+  // CHECK-NEXT: ret i1 %[[YES]]
 }
 
 struct ComplexAwaiter {
@@ -340,11 +357,39 @@ struct TailCallAwait {
 
 // CHECK-LABEL: @TestTailcall(
 extern "C" void TestTailcall() {
+  // CHECK: %[[PROMISE:.+]] = alloca %"struct.std::coroutine_traits<void>::promise_type"
+  // CHECK: %[[FRAME:.+]] = call ptr @llvm.coro.begin(
   co_await TailCallAwait{};
+  // CHECK: %[[READY:.+]] = call zeroext i1 @_ZN13TailCallAwait11await_readyEv(ptr {{[^,]*}} %[[AWAITABLE:.+]])
+  // CHECK: br i1 %[[READY]], label %[[READY_BB:.+]], label %[[SUSPEND_BB:.+]]
 
-  // CHECK: %[[RESULT:.+]] = call ptr @_ZN13TailCallAwait13await_suspendESt16coroutine_handleIvE(ptr
-  // CHECK: %[[COERCE:.+]] = getelementptr inbounds %"struct.std::coroutine_handle", ptr %[[TMP:.+]], i32 0, i32 0
-  // CHECK: store ptr %[[RESULT]], ptr %[[COERCE]]
-  // CHECK: %[[ADDR:.+]] = call ptr @_ZNSt16coroutine_handleIvE7addressEv(ptr {{[^,]*}} %[[TMP]])
-  // CHECK: call void @llvm.coro.resume(ptr %[[ADDR]])
+  // If we are suspending:
+  // ---------------------
+  // CHECK: [[SUSPEND_BB]]:
+  // CHECK: %[[SUSPEND_ID:.+]] = call token @llvm.coro.save(
+  // ---------------------------
+  // Call coro.await.suspend
+  // ---------------------------
+  // CHECK-NEXT: %[[RESUMED:.+]] = call ptr @llvm.coro.await.suspend.handle(ptr %[[AWAITABLE]], ptr %[[FRAME]], ptr @__await_suspend_wrapper_TestTailcall_await)
+  // CHECK-NEXT: call void @llvm.coro.resume(ptr %[[RESUMED]])
+  // CHECK-NEXT: %[[OUTCOME:.+]] = call i8 @llvm.coro.suspend(token %[[SUSPEND_ID]], i1 false)
+  // CHECK-NEXT: switch i8 %[[OUTCOME]], label %[[RET_BB:.+]] [
+  // CHECK-NEXT:   i8 0, label %[[READY_BB]]
+  // CHECK-NEXT:   i8 1, label %{{.+}}
+  // CHECK-NEXT: ]
+
+  // Await suspend wrapper
+  // CHECK: define {{.*}} ptr @__await_suspend_wrapper_TestTailcall_await(ptr {{[^,]*}} %[[AWAITABLE_ARG:.+]], ptr {{[^,]*}} %[[FRAME_ARG:.+]])
+  // CHECK: store ptr %[[AWAITABLE_ARG]], ptr %[[AWAITABLE_TMP:.+]],
+  // CHECK: store ptr %[[FRAME_ARG]], ptr %[[FRAME_TMP:.+]],
+  // CHECK: %[[AWAITABLE:.+]] = load ptr, ptr %[[AWAITABLE_TMP]]
+  // CHECK: %[[FRAME:.+]] = load ptr, ptr %[[FRAME_TMP]]
+  // CHECK: call ptr  @_ZNSt16coroutine_handleINSt16coroutine_traitsIJvEE12promise_typeEE12from_addressEPv(ptr %[[FRAME]])
+  //   ... many lines of code to coerce coroutine_handle into an ptr scalar
+  // CHECK: %[[CH:.+]] = load ptr, ptr %{{.+}}
+  // CHECK-NEXT: %[[RESULT:.+]] = call ptr @_ZN13TailCallAwait13await_suspendESt16coroutine_handleIvE(ptr {{[^,]*}} %[[AWAITABLE]], ptr %[[CH]]) 
+  // CHECK-NEXT: %[[COERCE:.+]] = getelementptr inbounds %"struct.std::coroutine_handle", ptr %[[TMP:.+]], i32 0, i32 0
+  // CHECK-NEXT: store ptr %[[RESULT]], ptr %[[COERCE]]
+  // CHECK-NEXT: %[[ADDR:.+]] = call ptr @_ZNSt16coroutine_handleIvE7addressEv(ptr {{[^,]*}} %[[TMP]])
+  // CHECK-NEXT: ret ptr %[[ADDR]]
 }

diff  --git a/clang/test/CodeGenCoroutines/coro-awaiter-noinline-suspend.cpp b/clang/test/CodeGenCoroutines/coro-awaiter-noinline-suspend.cpp
deleted file mode 100644
index f95286faf46ec8..00000000000000
--- a/clang/test/CodeGenCoroutines/coro-awaiter-noinline-suspend.cpp
+++ /dev/null
@@ -1,168 +0,0 @@
-// Tests that we can mark await-suspend as noinline correctly.
-//
-// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -emit-llvm -o - %s \
-// RUN:     -O1 -disable-llvm-passes | FileCheck %s
-
-#include "Inputs/coroutine.h"
-
-struct Task {
-  struct promise_type {
-    struct FinalAwaiter {
-      bool await_ready() const noexcept { return false; }
-      template <typename PromiseType>
-      std::coroutine_handle<> await_suspend(std::coroutine_handle<PromiseType> h) noexcept {
-        return h.promise().continuation;
-      }
-      void await_resume() noexcept {}
-    };
-
-    Task get_return_object() noexcept {
-      return std::coroutine_handle<promise_type>::from_promise(*this);
-    }
-
-    std::suspend_always initial_suspend() noexcept { return {}; }
-    FinalAwaiter final_suspend() noexcept { return {}; }
-    void unhandled_exception() noexcept {}
-    void return_void() noexcept {}
-
-    std::coroutine_handle<> continuation;
-  };
-
-  Task(std::coroutine_handle<promise_type> handle);
-  ~Task();
-
-private:
-  std::coroutine_handle<promise_type> handle;
-};
-
-struct StatefulAwaiter {
-    int value;
-    bool await_ready() const noexcept { return false; }
-    template <typename PromiseType>
-    void await_suspend(std::coroutine_handle<PromiseType> h) noexcept {}
-    void await_resume() noexcept {}
-};
-
-typedef std::suspend_always NoStateAwaiter;
-using AnotherStatefulAwaiter = StatefulAwaiter;
-
-template <class T>
-struct TemplatedAwaiter {
-    T value;
-    bool await_ready() const noexcept { return false; }
-    template <typename PromiseType>
-    void await_suspend(std::coroutine_handle<PromiseType> h) noexcept {}
-    void await_resume() noexcept {}
-};
-
-
-class Awaitable {};
-StatefulAwaiter operator co_await(Awaitable) {
-  return StatefulAwaiter{};
-}
-
-StatefulAwaiter GlobalAwaiter;
-class Awaitable2 {};
-StatefulAwaiter& operator co_await(Awaitable2) {
-  return GlobalAwaiter;
-}
-
-struct AlwaysInlineStatefulAwaiter {
-    void* value;
-    bool await_ready() const noexcept { return false; }
-
-    template <typename PromiseType>
-    __attribute__((always_inline))
-    void await_suspend(std::coroutine_handle<PromiseType> h) noexcept {}
-
-    void await_resume() noexcept {}
-};
-
-Task testing() {
-    co_await std::suspend_always{};
-    co_await StatefulAwaiter{};
-    co_await AnotherStatefulAwaiter{};
-    
-    // Test lvalue case.
-    StatefulAwaiter awaiter;
-    co_await awaiter;
-
-    // The explicit call to await_suspend is not considered suspended.
-    awaiter.await_suspend(std::coroutine_handle<void>::from_address(nullptr));
-
-    co_await TemplatedAwaiter<int>{};
-    TemplatedAwaiter<int> TemplatedAwaiterInstace;
-    co_await TemplatedAwaiterInstace;
-
-    co_await Awaitable{};
-    co_await Awaitable2{};
-
-    co_await AlwaysInlineStatefulAwaiter{};
-}
-
-struct AwaitTransformTask {
-  struct promise_type {
-    struct FinalAwaiter {
-      bool await_ready() const noexcept { return false; }
-      template <typename PromiseType>
-      std::coroutine_handle<> await_suspend(std::coroutine_handle<PromiseType> h) noexcept {
-        return h.promise().continuation;
-      }
-      void await_resume() noexcept {}
-    };
-
-    AwaitTransformTask get_return_object() noexcept {
-      return std::coroutine_handle<promise_type>::from_promise(*this);
-    }
-
-    std::suspend_always initial_suspend() noexcept { return {}; }
-    FinalAwaiter final_suspend() noexcept { return {}; }
-    void unhandled_exception() noexcept {}
-    void return_void() noexcept {}
-
-    template <typename Awaitable>
-    auto await_transform(Awaitable &&awaitable) {
-      return awaitable;
-    }
-
-    std::coroutine_handle<> continuation;
-  };
-
-  AwaitTransformTask(std::coroutine_handle<promise_type> handle);
-  ~AwaitTransformTask();
-
-private:
-  std::coroutine_handle<promise_type> handle;
-};
-
-struct awaitableWithGetAwaiter {
-  bool await_ready() const noexcept { return false; }
-  template <typename PromiseType>
-  void await_suspend(std::coroutine_handle<PromiseType> h) noexcept {}
-  void await_resume() noexcept {}
-};
-
-AwaitTransformTask testingWithAwaitTransform() {
-  co_await awaitableWithGetAwaiter{};
-}
-
-// CHECK: define{{.*}}@_ZNSt14suspend_always13await_suspendESt16coroutine_handleIvE{{.*}}#[[NORMAL_ATTR:[0-9]+]]
-
-// CHECK: define{{.*}}@_ZN15StatefulAwaiter13await_suspendIN4Task12promise_typeEEEvSt16coroutine_handleIT_E{{.*}}#[[NOINLINE_ATTR:[0-9]+]]
-
-// CHECK: define{{.*}}@_ZN15StatefulAwaiter13await_suspendIvEEvSt16coroutine_handleIT_E{{.*}}#[[NORMAL_ATTR]]
-
-// CHECK: define{{.*}}@_ZN16TemplatedAwaiterIiE13await_suspendIN4Task12promise_typeEEEvSt16coroutine_handleIT_E{{.*}}#[[NOINLINE_ATTR]]
-
-// CHECK: define{{.*}}@_ZN27AlwaysInlineStatefulAwaiter13await_suspendIN4Task12promise_typeEEEvSt16coroutine_handleIT_E{{.*}}#[[ALWAYS_INLINE_ATTR:[0-9]+]]
-
-// CHECK: define{{.*}}@_ZN4Task12promise_type12FinalAwaiter13await_suspendIS0_EESt16coroutine_handleIvES3_IT_E{{.*}}#[[NORMAL_ATTR]]
-
-// CHECK: define{{.*}}@_ZN23awaitableWithGetAwaiter13await_suspendIN18AwaitTransformTask12promise_typeEEEvSt16coroutine_handleIT_E{{.*}}#[[NORMAL_ATTR]]
-
-// CHECK: define{{.*}}@_ZN18AwaitTransformTask12promise_type12FinalAwaiter13await_suspendIS0_EESt16coroutine_handleIvES3_IT_E{{.*}}#[[NORMAL_ATTR]]
-
-// CHECK-NOT: attributes #[[NORMAL_ATTR]] = noinline
-// CHECK: attributes #[[NOINLINE_ATTR]] = {{.*}}noinline
-// CHECK-NOT: attributes #[[ALWAYS_INLINE_ATTR]] = {{.*}}noinline
-// CHECK: attributes #[[ALWAYS_INLINE_ATTR]] = {{.*}}alwaysinline

diff  --git a/clang/test/CodeGenCoroutines/coro-dwarf.cpp b/clang/test/CodeGenCoroutines/coro-dwarf.cpp
index 7914babe5483a4..2c9c827e6753d6 100644
--- a/clang/test/CodeGenCoroutines/coro-dwarf.cpp
+++ b/clang/test/CodeGenCoroutines/coro-dwarf.cpp
@@ -70,3 +70,15 @@ void f_coro(int val, MoveOnly moParam, MoveAndCopy mcParam) {
 // CHECK: !{{[0-9]+}} = !DILocalVariable(name: "moParam", arg: 2, scope: ![[SP]], file: !{{[0-9]+}}, line: {{[0-9]+}}, type: !{{[0-9]+}})
 // CHECK: !{{[0-9]+}} = !DILocalVariable(name: "mcParam", arg: 3, scope: ![[SP]], file: !{{[0-9]+}}, line: {{[0-9]+}}, type: !{{[0-9]+}})
 // CHECK: !{{[0-9]+}} = !DILocalVariable(name: "__promise",
+
+// CHECK: !{{[0-9]+}} = distinct !DISubprogram(linkageName: "__await_suspend_wrapper__Z6f_coroi8MoveOnly11MoveAndCopy_init"
+// CHECK-NEXT: !{{[0-9]+}} = !DIFile
+// CHECK-NEXT: !{{[0-9]+}} = !DISubroutineType
+// CHECK-NEXT: !{{[0-9]+}} = !DILocalVariable(arg: 1,
+// CHECK-NEXT: !{{[0-9]+}} = !DILocation
+// CHECK-NEXT: !{{[0-9]+}} = !DILocalVariable(arg: 2,
+
+// CHECK: !{{[0-9]+}} = distinct !DISubprogram(linkageName: "__await_suspend_wrapper__Z6f_coroi8MoveOnly11MoveAndCopy_final"
+// CHECK-NEXT: !{{[0-9]+}} = !DILocalVariable(arg: 1,
+// CHECK-NEXT: !{{[0-9]+}} = !DILocation
+// CHECK-NEXT: !{{[0-9]+}} = !DILocalVariable(arg: 2,

diff  --git a/clang/test/CodeGenCoroutines/coro-function-try-block.cpp b/clang/test/CodeGenCoroutines/coro-function-try-block.cpp
index f609eb55b8e771..b7a796cc241af4 100644
--- a/clang/test/CodeGenCoroutines/coro-function-try-block.cpp
+++ b/clang/test/CodeGenCoroutines/coro-function-try-block.cpp
@@ -19,5 +19,5 @@ task f() try {
 }
 
 // CHECK-LABEL: define{{.*}} void @_Z1fv(
-// CHECK: call void @_ZNSt13suspend_never13await_suspendESt16coroutine_handleIvE(
+// CHECK: call void @llvm.coro.await.suspend.void(
 // CHECK: call void @_ZN4task12promise_type11return_voidEv(

diff  --git a/clang/test/CodeGenCoroutines/coro-symmetric-transfer-01.cpp b/clang/test/CodeGenCoroutines/coro-symmetric-transfer-01.cpp
index c0b9e9ee2c5581..da30e12c63cffb 100644
--- a/clang/test/CodeGenCoroutines/coro-symmetric-transfer-01.cpp
+++ b/clang/test/CodeGenCoroutines/coro-symmetric-transfer-01.cpp
@@ -50,10 +50,5 @@ detached_task foo() {
 // check that the lifetime of the coroutine handle used to obtain the address is contained within single basic block, and hence does not live across suspension points.
 // CHECK-LABEL: final.suspend:
 // CHECK:         %{{.+}} = call token @llvm.coro.save(ptr null)
-// CHECK:         call void @llvm.lifetime.start.p0(i64 8, ptr %[[HDL:.+]])
-// CHECK:         %[[CALL:.+]] = call ptr @_ZN13detached_task12promise_type13final_awaiter13await_suspendESt16coroutine_handleIS0_E(
-// CHECK:         %[[HDL_CAST2:.+]] = getelementptr inbounds %"struct.std::coroutine_handle.0", ptr %[[HDL]], i32 0, i32 0
-// CHECK:         store ptr %[[CALL]], ptr %[[HDL_CAST2]], align 8
-// CHECK:         %[[HDL_TRANSFER:.+]] = call noundef ptr @_ZNKSt16coroutine_handleIvE7addressEv(ptr noundef {{.*}}%[[HDL]])
-// CHECK:         call void @llvm.lifetime.end.p0(i64 8, ptr %[[HDL]])
+// CHECK:         %[[HDL_TRANSFER:.+]] = call ptr @llvm.coro.await.suspend.handle
 // CHECK:         call void @llvm.coro.resume(ptr %[[HDL_TRANSFER]])

diff  --git a/clang/test/CodeGenCoroutines/coro-symmetric-transfer-02.cpp b/clang/test/CodeGenCoroutines/coro-symmetric-transfer-02.cpp
index 890d55e41de953..ca6cf74115a3b1 100644
--- a/clang/test/CodeGenCoroutines/coro-symmetric-transfer-02.cpp
+++ b/clang/test/CodeGenCoroutines/coro-symmetric-transfer-02.cpp
@@ -89,10 +89,8 @@ Task bar() {
 // CHECK:         br i1 %{{.+}}, label %[[CASE1_AWAIT_READY:.+]], label %[[CASE1_AWAIT_SUSPEND:.+]]
 // CHECK:       [[CASE1_AWAIT_SUSPEND]]:
 // CHECK-NEXT:    %{{.+}} = call token @llvm.coro.save(ptr null)
-// CHECK-NEXT:    call void @llvm.lifetime.start.p0(i64 8, ptr %[[TMP1:.+]])
-
-// CHECK:    call void @llvm.lifetime.end.p0(i64 8, ptr %[[TMP1]])
-// CHECK-NEXT:    call void @llvm.coro.resume
+// CHECK-NEXT:    %[[HANDLE1_PTR:.+]] = call ptr @llvm.coro.await.suspend.handle
+// CHECK-NEXT:    call void @llvm.coro.resume(ptr %[[HANDLE1_PTR]])
 // CHECK-NEXT:    %{{.+}} = call i8 @llvm.coro.suspend
 // CHECK-NEXT:    switch i8 %{{.+}}, label %coro.ret [
 // CHECK-NEXT:      i8 0, label %[[CASE1_AWAIT_READY]]
@@ -106,10 +104,8 @@ Task bar() {
 // CHECK:         br i1 %{{.+}}, label %[[CASE2_AWAIT_READY:.+]], label %[[CASE2_AWAIT_SUSPEND:.+]]
 // CHECK:       [[CASE2_AWAIT_SUSPEND]]:
 // CHECK-NEXT:    %{{.+}} = call token @llvm.coro.save(ptr null)
-// CHECK-NEXT:    call void @llvm.lifetime.start.p0(i64 8, ptr %[[TMP2:.+]])
-
-// CHECK:    call void @llvm.lifetime.end.p0(i64 8, ptr %[[TMP2]])
-// CHECK-NEXT:    call void @llvm.coro.resume
+// CHECK-NEXT:    %[[HANDLE2_PTR:.+]] = call ptr @llvm.coro.await.suspend.handle
+// CHECK-NEXT:    call void @llvm.coro.resume(ptr %[[HANDLE2_PTR]])
 // CHECK-NEXT:    %{{.+}} = call i8 @llvm.coro.suspend
 // CHECK-NEXT:    switch i8 %{{.+}}, label %coro.ret [
 // CHECK-NEXT:      i8 0, label %[[CASE2_AWAIT_READY]]

diff  --git a/clang/test/CodeGenCoroutines/pr56329.cpp b/clang/test/CodeGenCoroutines/pr56329.cpp
index 31d4849af4e71e..ad8b1b990179c9 100644
--- a/clang/test/CodeGenCoroutines/pr56329.cpp
+++ b/clang/test/CodeGenCoroutines/pr56329.cpp
@@ -115,5 +115,9 @@ Task Outer() {
 // CHECK-NOT: _exit
 // CHECK: musttail call
 // CHECK: musttail call
+// CHECK: musttail call
 // CHECK-NEXT: ret void
+// CHECK-EMPTY:
+// CHECK-NEXT: unreachable:
+// CHECK-NEXT: unreachable
 // CHECK-NEXT: }

diff  --git a/clang/test/CodeGenCoroutines/pr59181.cpp b/clang/test/CodeGenCoroutines/pr59181.cpp
index 80f4634db25214..21e784e0031de3 100644
--- a/clang/test/CodeGenCoroutines/pr59181.cpp
+++ b/clang/test/CodeGenCoroutines/pr59181.cpp
@@ -52,9 +52,8 @@ void foo() {
 // CHECK-NEXT: load i8
 // CHECK-NEXT: trunc
 // CHECK-NEXT: store i1 false
-// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr [[REF:%ref.tmp[0-9]+]])
 
 // CHECK: await.suspend:{{.*}}
-// CHECK-NOT: call void @llvm.lifetime.start.p0(i64 8, ptr [[REF]])
-// CHECK: call void @_ZZN4Task12promise_type15await_transformES_EN10Suspension13await_suspendESt16coroutine_handleIvE
-// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr [[REF]])
+// CHECK-NOT: call void @llvm.lifetime
+// CHECK: call void @llvm.coro.await.suspend.void(
+// CHECK-NEXT: %{{[0-9]+}} = call i8 @llvm.coro.suspend(

diff  --git a/clang/test/CodeGenCoroutines/pr65054.cpp b/clang/test/CodeGenCoroutines/pr65054.cpp
index 834b71050f59ff..7af9c04fca180e 100644
--- a/clang/test/CodeGenCoroutines/pr65054.cpp
+++ b/clang/test/CodeGenCoroutines/pr65054.cpp
@@ -1,7 +1,3 @@
-// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -std=c++20 \
-// RUN:      -O0 -disable-llvm-passes -emit-llvm %s -o - \
-// RUN:      | FileCheck %s --check-prefix=FRONTEND
-
 // The output of O0 is highly redundant and hard to test. Also it is not good
 // limit the output of O0. So we test the optimized output from O0. The idea
 // is the optimizations shouldn't change the semantics of the program.
@@ -51,10 +47,7 @@ MyTask FooBar() {
   }
 }
 
-// FRONTEND: define{{.*}}@_ZNKSt16coroutine_handleIvE7addressEv{{.*}}#[[address_attr:[0-9]+]]
-// FRONTEND: attributes #[[address_attr]] = {{.*}}alwaysinline
-
 // CHECK-O0: define{{.*}}@_Z6FooBarv.resume
-// CHECK-O0: call{{.*}}@_ZN7Awaiter13await_suspendESt16coroutine_handleIvE
+// CHECK-O0: call{{.*}}@__await_suspend_wrapper__Z6FooBarv_await(
 // CHECK-O0-NOT: store
 // CHECK-O0: ret void

diff  --git a/llvm/docs/Coroutines.rst b/llvm/docs/Coroutines.rst
index d6219d264e4a9f..83369d93c309a7 100644
--- a/llvm/docs/Coroutines.rst
+++ b/llvm/docs/Coroutines.rst
@@ -1744,6 +1744,266 @@ a call to ``llvm.coro.suspend.retcon`` after resuming abnormally.
 In a yield-once coroutine, it is undefined behavior if the coroutine
 executes a call to ``llvm.coro.suspend.retcon`` after resuming in any way.
 
+.. _coro.await.suspend.void:
+
+'llvm.coro.await.suspend.void' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+::
+
+  declare void @llvm.coro.await.suspend.void(
+                ptr <awaiter>,
+                ptr <handle>,
+                ptr <await_suspend_function>)
+
+Overview:
+"""""""""
+
+The '``llvm.coro.await.suspend.void``' intrinsic encapsulates C++ 
+`await-suspend` block until it can't interfere with coroutine transform.
+
+The `await_suspend` block of `co_await` is essentially asynchronous
+to the execution of the coroutine. Inlining it normally into an unsplit
+coroutine can cause miscompilation because the coroutine CFG misrepresents
+the true control flow of the program: things that happen in the
+await_suspend are not guaranteed to happen prior to the resumption of the
+coroutine, and things that happen after the resumption of the coroutine
+(including its exit and the potential deallocation of the coroutine frame)
+are not guaranteed to happen only after the end of `await_suspend`.
+
+This version of intrinsic corresponds to 
+'``void awaiter.await_suspend(...)``' variant.
+
+Arguments:
+""""""""""
+
+The first argument is a pointer to `awaiter` object.
+
+The second argument is a pointer to the current coroutine's frame.
+
+The third argument is a pointer to the wrapper function encapsulating
+`await-suspend` logic. Its signature must be
+
+.. code-block:: llvm
+
+    declare void @await_suspend_function(ptr %awaiter, ptr %hdl)
+
+Semantics:
+""""""""""
+
+The intrinsic must be used between corresponding `coro.save`_ and 
+`coro.suspend`_ calls. It is lowered to a direct 
+`await_suspend_function` call during `CoroSplit`_ pass.
+
+Example:
+""""""""
+
+.. code-block:: llvm
+
+  ; before lowering
+  await.suspend:
+    %save = call token @llvm.coro.save(ptr %hdl)
+    call void @llvm.coro.await.suspend.void(
+                ptr %awaiter,
+                ptr %hdl,
+                ptr @await_suspend_function)
+    %suspend = call i8 @llvm.coro.suspend(token %save, i1 false)
+    ...
+
+  ; after lowering
+  await.suspend:
+    %save = call token @llvm.coro.save(ptr %hdl)
+    ; the call to await_suspend_function can be inlined
+    call void @await_suspend_function(
+                ptr %awaiter,
+                ptr %hdl)
+    %suspend = call i8 @llvm.coro.suspend(token %save, i1 false)   
+    ...
+
+  ; wrapper function example
+  define void @await_suspend_function(ptr %awaiter, ptr %hdl)
+    entry:
+      %hdl.arg = ... ; construct std::coroutine_handle from %hdl
+      call void @"Awaiter::await_suspend"(ptr %awaiter, ptr %hdl.arg)
+      ret void
+
+.. _coro.await.suspend.bool:
+
+'llvm.coro.await.suspend.bool' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+::
+
+  declare i1 @llvm.coro.await.suspend.bool(
+                ptr <awaiter>,
+                ptr <handle>,
+                ptr <await_suspend_function>)
+
+Overview:
+"""""""""
+
+The '``llvm.coro.await.suspend.bool``' intrinsic encapsulates C++
+`await-suspend` block until it can't interfere with coroutine transform.
+
+The `await_suspend` block of `co_await` is essentially asynchronous
+to the execution of the coroutine. Inlining it normally into an unsplit
+coroutine can cause miscompilation because the coroutine CFG misrepresents
+the true control flow of the program: things that happen in the
+await_suspend are not guaranteed to happen prior to the resumption of the
+coroutine, and things that happen after the resumption of the coroutine
+(including its exit and the potential deallocation of the coroutine frame)
+are not guaranteed to happen only after the end of `await_suspend`.
+
+This version of intrinsic corresponds to 
+'``bool awaiter.await_suspend(...)``' variant.
+
+Arguments:
+""""""""""
+
+The first argument is a pointer to `awaiter` object.
+
+The second argument is a pointer to the current coroutine's frame.
+
+The third argument is a pointer to the wrapper function encapsulating
+`await-suspend` logic. Its signature must be
+
+.. code-block:: llvm
+
+    declare i1 @await_suspend_function(ptr %awaiter, ptr %hdl)
+
+Semantics:
+""""""""""
+
+The intrinsic must be used between corresponding `coro.save`_ and 
+`coro.suspend`_ calls. It is lowered to a direct 
+`await_suspend_function` call during `CoroSplit`_ pass.
+
+If `await_suspend_function` call returns `true`, the current coroutine is
+immediately resumed.
+
+Example:
+""""""""
+
+.. code-block:: llvm
+
+  ; before lowering
+  await.suspend:
+    %save = call token @llvm.coro.save(ptr %hdl)
+    %resume = call i1 @llvm.coro.await.suspend.bool(
+                ptr %awaiter,
+                ptr %hdl,
+                ptr @await_suspend_function)
+    br i1 %resume, %await.suspend.bool, %await.ready
+  await.suspend.bool:
+    %suspend = call i8 @llvm.coro.suspend(token %save, i1 false)
+    ...
+  await.ready:
+    call void @"Awaiter::await_resume"(ptr %awaiter)
+    ...
+
+  ; after lowering
+  await.suspend:
+    %save = call token @llvm.coro.save(ptr %hdl)
+    ; the call to await_suspend_function can inlined
+    %resume = call i1 @await_suspend_function(
+                ptr %awaiter,
+                ptr %hdl)
+    br i1 %resume, %await.suspend.bool, %await.ready
+    ...
+
+  ; wrapper function example
+  define i1 @await_suspend_function(ptr %awaiter, ptr %hdl)
+    entry:
+      %hdl.arg = ... ; construct std::coroutine_handle from %hdl
+      %resume = call i1 @"Awaiter::await_suspend"(ptr %awaiter, ptr %hdl.arg)
+      ret i1 %resume
+
+.. _coro.await.suspend.handle:
+
+'llvm.coro.await.suspend.handle' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+::
+
+  declare ptr @llvm.coro.await.suspend.handle(
+                ptr <awaiter>,
+                ptr <handle>,
+                ptr <await_suspend_function>)
+
+Overview:
+"""""""""
+
+The '``llvm.coro.await.suspend.handle``' intrinsic encapsulates C++
+`await-suspend` block until it can't interfere with coroutine transform.
+
+The `await_suspend` block of `co_await` is essentially asynchronous
+to the execution of the coroutine. Inlining it normally into an unsplit
+coroutine can cause miscompilation because the coroutine CFG misrepresents
+the true control flow of the program: things that happen in the
+await_suspend are not guaranteed to happen prior to the resumption of the
+coroutine, and things that happen after the resumption of the coroutine
+(including its exit and the potential deallocation of the coroutine frame)
+are not guaranteed to happen only after the end of `await_suspend`.
+
+This version of intrinsic corresponds to 
+'``std::corouine_handle<> awaiter.await_suspend(...)``' variant.
+
+Arguments:
+""""""""""
+
+The first argument is a pointer to `awaiter` object.
+
+The second argument is a pointer to the current coroutine's frame.
+
+The third argument is a pointer to the wrapper function encapsulating
+`await-suspend` logic. Its signature must be
+
+.. code-block:: llvm
+
+    declare ptr @await_suspend_function(ptr %awaiter, ptr %hdl)
+
+Semantics:
+""""""""""
+
+The intrinsic must be used between corresponding `coro.save`_ and 
+`coro.suspend`_ calls. It is lowered to a direct 
+`await_suspend_function` call during `CoroSplit`_ pass.
+
+`await_suspend_function` must return a pointer to a valid
+coroutine frame, which is immediately resumed
+
+Example:
+""""""""
+
+.. code-block:: llvm
+
+  ; before lowering
+  await.suspend:
+    %save = call token @llvm.coro.save(ptr %hdl)
+    %next = call ptr @llvm.coro.await.suspend.handle(
+                ptr %awaiter,
+                ptr %hdl,
+                ptr @await_suspend_function)
+    call void @llvm.coro.resume(%next)
+    %suspend = call i8 @llvm.coro.suspend(token %save, i1 false)
+    ...
+
+  ; after lowering
+  await.suspend:
+    %save = call token @llvm.coro.save(ptr %hdl)
+    ; the call to await_suspend_function can be inlined
+    %next = call ptr @await_suspend_function(
+                ptr %awaiter,
+                ptr %hdl)
+    call void @llvm.coro.resume(%next)
+    %suspend = call i8 @llvm.coro.suspend(token %save, i1 false)
+    ...
+
+  ; wrapper function example
+  define ptr @await_suspend_function(ptr %awaiter, ptr %hdl)
+    entry:
+      %hdl.arg = ... ; construct std::coroutine_handle from %hdl
+      %hdl.raw = call ptr @"Awaiter::await_suspend"(ptr %awaiter, ptr %hdl.arg)
+      %hdl.result = ... ; get address of returned coroutine handle
+      ret ptr %hdl.result
+
 Coroutine Transformation Passes
 ===============================
 CoroEarly
@@ -1758,7 +2018,9 @@ and `coro.promise`_ intrinsics.
 CoroSplit
 ---------
 The pass CoroSplit builds coroutine frame and outlines resume and destroy parts
-into separate functions.
+into separate functions. This pass also lowers `coro.await.suspend.void`_,
+`coro.await.suspend.bool`_ and `coro.await.suspend.handle`_ intrinsics.
+
 
 CoroElide
 ---------

diff  --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td
index c2c0f74c315bab..144298fd7c0162 100644
--- a/llvm/include/llvm/IR/Intrinsics.td
+++ b/llvm/include/llvm/IR/Intrinsics.td
@@ -1692,6 +1692,18 @@ def int_coro_promise : Intrinsic<[llvm_ptr_ty],
                                  [llvm_ptr_ty, llvm_i32_ty, llvm_i1_ty],
                                  [IntrNoMem, NoCapture<ArgIndex<0>>]>;
 
+def int_coro_await_suspend_void : Intrinsic<[],
+                                       [llvm_ptr_ty, llvm_ptr_ty, llvm_ptr_ty],
+                                       [Throws]>;
+
+def int_coro_await_suspend_bool : Intrinsic<[llvm_i1_ty],
+                                            [llvm_ptr_ty, llvm_ptr_ty, llvm_ptr_ty],
+                                            [Throws]>;
+
+def int_coro_await_suspend_handle : Intrinsic<[llvm_ptr_ty],
+                                              [llvm_ptr_ty, llvm_ptr_ty, llvm_ptr_ty],
+                                              [Throws]>;
+
 // Coroutine Lowering Intrinsics. Used internally by coroutine passes.
 
 def int_coro_subfn_addr : DefaultAttrsIntrinsic<

diff  --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp
index 3cf5e81efb3b1a..ce090c3b8a7444 100644
--- a/llvm/lib/IR/Verifier.cpp
+++ b/llvm/lib/IR/Verifier.cpp
@@ -4999,6 +4999,9 @@ void Verifier::visitInstruction(Instruction &I) {
                 F->getIntrinsicID() == Intrinsic::seh_scope_end ||
                 F->getIntrinsicID() == Intrinsic::coro_resume ||
                 F->getIntrinsicID() == Intrinsic::coro_destroy ||
+                F->getIntrinsicID() == Intrinsic::coro_await_suspend_void ||
+                F->getIntrinsicID() == Intrinsic::coro_await_suspend_bool ||
+                F->getIntrinsicID() == Intrinsic::coro_await_suspend_handle ||
                 F->getIntrinsicID() ==
                     Intrinsic::experimental_patchpoint_void ||
                 F->getIntrinsicID() == Intrinsic::experimental_patchpoint_i64 ||

diff  --git a/llvm/lib/Transforms/Coroutines/CoroInstr.h b/llvm/lib/Transforms/Coroutines/CoroInstr.h
index f01aa58eb89996..79e745bb162cdb 100644
--- a/llvm/lib/Transforms/Coroutines/CoroInstr.h
+++ b/llvm/lib/Transforms/Coroutines/CoroInstr.h
@@ -78,6 +78,39 @@ class LLVM_LIBRARY_VISIBILITY CoroAllocInst : public IntrinsicInst {
   }
 };
 
+/// This represents the llvm.coro.await.suspend instruction.
+// FIXME: add callback metadata
+// FIXME: make a proper IntrinisicInst. Currently this is not possible,
+// because llvm.coro.await.suspend can be invoked.
+class LLVM_LIBRARY_VISIBILITY CoroAwaitSuspendInst : public CallBase {
+  enum { AwaiterArg, FrameArg, WrapperArg };
+
+public:
+  Value *getAwaiter() const { return getArgOperand(AwaiterArg); }
+
+  Value *getFrame() const { return getArgOperand(FrameArg); }
+
+  Function *getWrapperFunction() const {
+    return cast<Function>(getArgOperand(WrapperArg));
+  }
+
+  // Methods to support type inquiry through isa, cast, and dyn_cast:
+  static bool classof(const CallBase *CB) {
+    if (const Function *CF = CB->getCalledFunction()) {
+      auto IID = CF->getIntrinsicID();
+      return IID == Intrinsic::coro_await_suspend_void ||
+             IID == Intrinsic::coro_await_suspend_bool ||
+             IID == Intrinsic::coro_await_suspend_handle;
+    }
+
+    return false;
+  }
+
+  static bool classof(const Value *V) {
+    return isa<CallBase>(V) && classof(cast<CallBase>(V));
+  }
+};
+
 /// This represents a common base class for llvm.coro.id instructions.
 class LLVM_LIBRARY_VISIBILITY AnyCoroIdInst : public IntrinsicInst {
 public:

diff  --git a/llvm/lib/Transforms/Coroutines/CoroInternal.h b/llvm/lib/Transforms/Coroutines/CoroInternal.h
index 388cf8d2aee71c..09d1430b4c559e 100644
--- a/llvm/lib/Transforms/Coroutines/CoroInternal.h
+++ b/llvm/lib/Transforms/Coroutines/CoroInternal.h
@@ -84,6 +84,7 @@ struct LLVM_LIBRARY_VISIBILITY Shape {
   SmallVector<CoroAlignInst *, 2> CoroAligns;
   SmallVector<AnyCoroSuspendInst *, 4> CoroSuspends;
   SmallVector<CallInst*, 2> SwiftErrorOps;
+  SmallVector<CoroAwaitSuspendInst *, 4> CoroAwaitSuspends;
 
   // Field indexes for special fields in the switch lowering.
   struct SwitchFieldIndex {

diff  --git a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp
index 99675aa495f531..58b95e43b8994e 100644
--- a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp
+++ b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp
@@ -167,6 +167,55 @@ class CoroCloner {
 
 } // end anonymous namespace
 
+// FIXME:
+// Lower the intrinisc in CoroEarly phase if coroutine frame doesn't escape
+// and it is known that other transformations, for example, sanitizers
+// won't lead to incorrect code.
+static void lowerAwaitSuspend(IRBuilder<> &Builder, CoroAwaitSuspendInst *CB) {
+  auto Wrapper = CB->getWrapperFunction();
+  auto Awaiter = CB->getAwaiter();
+  auto FramePtr = CB->getFrame();
+
+  Builder.SetInsertPoint(CB);
+
+  CallBase *NewCall = nullptr;
+  // await_suspend has only 2 parameters, awaiter and handle.
+  // Copy parameter attributes from the intrinsic call, but remove the last,
+  // because the last parameter now becomes the function that is being called.
+  AttributeList NewAttributes =
+      CB->getAttributes().removeParamAttributes(CB->getContext(), 2);
+
+  if (auto Invoke = dyn_cast<InvokeInst>(CB)) {
+    auto WrapperInvoke =
+        Builder.CreateInvoke(Wrapper, Invoke->getNormalDest(),
+                             Invoke->getUnwindDest(), {Awaiter, FramePtr});
+
+    WrapperInvoke->setCallingConv(Invoke->getCallingConv());
+    std::copy(Invoke->bundle_op_info_begin(), Invoke->bundle_op_info_end(),
+              WrapperInvoke->bundle_op_info_begin());
+    WrapperInvoke->setAttributes(NewAttributes);
+    WrapperInvoke->setDebugLoc(Invoke->getDebugLoc());
+    NewCall = WrapperInvoke;
+  } else if (auto Call = dyn_cast<CallInst>(CB)) {
+    auto WrapperCall = Builder.CreateCall(Wrapper, {Awaiter, FramePtr});
+
+    WrapperCall->setAttributes(NewAttributes);
+    WrapperCall->setDebugLoc(Call->getDebugLoc());
+    NewCall = WrapperCall;
+  } else {
+    llvm_unreachable("Unexpected coro_await_suspend invocation method");
+  }
+
+  CB->replaceAllUsesWith(NewCall);
+  CB->eraseFromParent();
+}
+
+static void lowerAwaitSuspends(Function &F, coro::Shape &Shape) {
+  IRBuilder<> Builder(F.getContext());
+  for (auto *AWS : Shape.CoroAwaitSuspends)
+    lowerAwaitSuspend(Builder, AWS);
+}
+
 static void maybeFreeRetconStorage(IRBuilder<> &Builder,
                                    const coro::Shape &Shape, Value *FramePtr,
                                    CallGraph *CG) {
@@ -2025,6 +2074,8 @@ splitCoroutine(Function &F, SmallVectorImpl<Function *> &Clones,
   if (!Shape.CoroBegin)
     return Shape;
 
+  lowerAwaitSuspends(F, Shape);
+
   simplifySuspendPoints(Shape);
   buildCoroutineFrame(F, Shape, TTI, MaterializableCallback);
   replaceFrameSizeAndAlignment(Shape);

diff  --git a/llvm/lib/Transforms/Coroutines/Coroutines.cpp b/llvm/lib/Transforms/Coroutines/Coroutines.cpp
index 7bd151ed4dc1e3..a1c78d6a44ef46 100644
--- a/llvm/lib/Transforms/Coroutines/Coroutines.cpp
+++ b/llvm/lib/Transforms/Coroutines/Coroutines.cpp
@@ -67,6 +67,9 @@ static const char *const CoroIntrinsics[] = {
     "llvm.coro.async.resume",
     "llvm.coro.async.size.replace",
     "llvm.coro.async.store_resume",
+    "llvm.coro.await.suspend.bool",
+    "llvm.coro.await.suspend.handle",
+    "llvm.coro.await.suspend.void",
     "llvm.coro.begin",
     "llvm.coro.destroy",
     "llvm.coro.done",
@@ -174,7 +177,11 @@ void coro::Shape::buildFrom(Function &F) {
   SmallVector<CoroSaveInst *, 2> UnusedCoroSaves;
 
   for (Instruction &I : instructions(F)) {
-    if (auto II = dyn_cast<IntrinsicInst>(&I)) {
+    // FIXME: coro_await_suspend_* are not proper `IntrinisicInst`s
+    // because they might be invoked
+    if (auto AWS = dyn_cast<CoroAwaitSuspendInst>(&I)) {
+      CoroAwaitSuspends.push_back(AWS);
+    } else if (auto II = dyn_cast<IntrinsicInst>(&I)) {
       switch (II->getIntrinsicID()) {
       default:
         continue;

diff  --git a/llvm/test/Transforms/Coroutines/coro-await-suspend-lower-invoke.ll b/llvm/test/Transforms/Coroutines/coro-await-suspend-lower-invoke.ll
new file mode 100644
index 00000000000000..fbc4a2c006f84e
--- /dev/null
+++ b/llvm/test/Transforms/Coroutines/coro-await-suspend-lower-invoke.ll
@@ -0,0 +1,123 @@
+; Tests that invoke <type> @llvm.coro.await.suspend lowers to invoke @helper
+; RUN: opt < %s -passes='module(coro-early),cgscc(coro-split),simplifycfg' -S | FileCheck %s
+
+%Awaiter = type {}
+
+; CHECK:     define {{[^@]*}} @f.resume(ptr {{[^%]*}} %[[HDL:.+]])
+; CHECK:       %[[AWAITER:.+]] = getelementptr inbounds %f.Frame, ptr %[[HDL]], i32 0, i32 0
+define void @f() presplitcoroutine personality i32 0 {
+entry:
+  %awaiter = alloca %Awaiter
+  %id = call token @llvm.coro.id(i32 0, ptr null, ptr null, ptr null)
+  %size = call i32 @llvm.coro.size.i32()
+  %alloc = call ptr @malloc(i32 %size)
+  %hdl = call ptr @llvm.coro.begin(token %id, ptr %alloc)
+  ; Initial suspend so that all 3 await_suspend invocations are inside f.resume
+  %suspend.init = call i8 @llvm.coro.suspend(token none, i1 false)
+  switch i8 %suspend.init, label %ret [
+    i8 0, label %step
+    i8 1, label %cleanup
+  ]
+
+; CHECK:        invoke void @await_suspend_wrapper_void(ptr %[[AWAITER]], ptr %[[HDL]])
+; CHECK-NEXT:       to label %[[STEP_CONT:[^ ]+]] unwind label %[[PAD:[^ ]+]]
+step:
+  %save = call token @llvm.coro.save(ptr null)
+  invoke void @llvm.coro.await.suspend.void(ptr %awaiter, ptr %hdl, ptr @await_suspend_wrapper_void)
+    to label %step.continue unwind label %pad
+
+; CHECK       [[STEP_CONT]]:
+step.continue:
+  %suspend = call i8 @llvm.coro.suspend(token %save, i1 false)
+  switch i8 %suspend, label %ret [
+    i8 0, label %step1
+    i8 1, label %cleanup
+  ]
+
+; CHECK:        %[[RESUME:.+]] = invoke i1 @await_suspend_wrapper_bool(ptr %[[AWAITER]], ptr %[[HDL]])
+; CHECK-NEXT:       to label %[[STEP1_CONT:[^ ]+]] unwind label %[[PAD]]
+step1:
+  %save1 = call token @llvm.coro.save(ptr null)
+  %resume.bool = invoke i1 @llvm.coro.await.suspend.bool(ptr %awaiter, ptr %hdl, ptr @await_suspend_wrapper_bool)
+    to label %step1.continue unwind label %pad
+
+; CHECK:      [[STEP1_CONT]]:
+; CHECK-NEXT:   br i1 %[[RESUME]], label %{{[^,]+}}, label %[[STEP2:.+]]
+step1.continue:
+  br i1 %resume.bool, label %suspend.cond, label %step2
+
+suspend.cond:
+  %suspend1 = call i8 @llvm.coro.suspend(token %save1, i1 false)
+  switch i8 %suspend1, label %ret [
+    i8 0, label %step2
+    i8 1, label %cleanup
+  ]
+
+; CHECK:      [[STEP2]]:
+; CHECK:        %[[NEXT_HDL:.+]] = invoke ptr @await_suspend_wrapper_handle(ptr %[[AWAITER]], ptr %[[HDL]])
+; CHECK-NEXT:       to label %[[STEP2_CONT:[^ ]+]] unwind label %[[PAD]]
+step2:
+  %save2 = call token @llvm.coro.save(ptr null)
+  %resume.handle = invoke ptr @llvm.coro.await.suspend.handle(ptr %awaiter, ptr %hdl, ptr @await_suspend_wrapper_handle)
+    to label %step2.continue unwind label %pad
+
+; CHECK:      [[STEP2_CONT]]:
+; CHECK-NEXT:   %[[NEXT_RESUME:.+]] = call ptr @llvm.coro.subfn.addr(ptr %[[NEXT_HDL]], i8 0)
+; CHECK-NEXT:   musttail call {{.*}} void %[[NEXT_RESUME]](ptr %[[NEXT_HDL]])
+step2.continue:
+  call void @llvm.coro.resume(ptr %resume.handle)
+  %suspend2 = call i8 @llvm.coro.suspend(token %save2, i1 false)
+  switch i8 %suspend2, label %ret [
+    i8 0, label %step3
+    i8 1, label %cleanup
+  ]
+
+step3:
+  br label %cleanup
+
+pad:
+  %lp = landingpad { ptr, i32 }
+          catch ptr null
+  %exn = extractvalue { ptr, i32 } %lp, 0
+  call ptr @__cxa_begin_catch(ptr %exn)
+  call void @__cxa_end_catch()
+  br label %cleanup
+
+cleanup:
+  %mem = call ptr @llvm.coro.free(token %id, ptr %hdl)
+  call void @free(ptr %mem)
+  br label %ret
+
+ret:
+  call i1 @llvm.coro.end(ptr %hdl, i1 0, token none)
+  ret void
+}
+
+; check that we were haven't accidentally went out of @f.resume body
+; CHECK-LABEL: @f.destroy(
+; CHECK-LABEL: @f.cleanup(
+
+declare void @await_suspend_wrapper_void(ptr, ptr)
+declare i1 @await_suspend_wrapper_bool(ptr, ptr)
+declare ptr @await_suspend_wrapper_handle(ptr, ptr)
+
+declare ptr @llvm.coro.free(token, ptr)
+declare i32 @llvm.coro.size.i32()
+declare i8  @llvm.coro.suspend(token, i1)
+declare void @llvm.coro.resume(ptr)
+declare void @llvm.coro.destroy(ptr)
+
+declare token @llvm.coro.id(i32, ptr, ptr, ptr)
+declare i1 @llvm.coro.alloc(token)
+declare ptr @llvm.coro.begin(token, ptr)
+declare void @llvm.coro.await.suspend.void(ptr, ptr, ptr)
+declare i1 @llvm.coro.await.suspend.bool(ptr, ptr, ptr)
+declare ptr @llvm.coro.await.suspend.handle(ptr, ptr, ptr)
+declare i1 @llvm.coro.end(ptr, i1, token)
+
+declare ptr @__cxa_begin_catch(ptr)
+declare void @use_val(i32)
+declare void @__cxa_end_catch()
+
+declare noalias ptr @malloc(i32)
+declare void @free(ptr)

diff  --git a/llvm/test/Transforms/Coroutines/coro-await-suspend-lower.ll b/llvm/test/Transforms/Coroutines/coro-await-suspend-lower.ll
new file mode 100644
index 00000000000000..0f574c4acc26e7
--- /dev/null
+++ b/llvm/test/Transforms/Coroutines/coro-await-suspend-lower.ll
@@ -0,0 +1,96 @@
+; Tests lowerings of 
diff erent versions of coro.await.suspend
+; RUN: opt < %s -passes='module(coro-early),cgscc(coro-split),simplifycfg' -S | FileCheck %s
+
+%Awaiter = type {}
+
+; CHECK:     define {{[^@]*}} @f.resume(ptr {{[^%]*}} %[[HDL:.+]])
+; CHECK:       %[[AWAITER:.+]] = getelementptr inbounds %f.Frame, ptr %[[HDL]], i32 0, i32 0
+define void @f() presplitcoroutine {
+entry:
+  %awaiter = alloca %Awaiter
+  %id = call token @llvm.coro.id(i32 0, ptr null, ptr null, ptr null)
+  %size = call i32 @llvm.coro.size.i32()
+  %alloc = call ptr @malloc(i32 %size)
+  %hdl = call ptr @llvm.coro.begin(token %id, ptr %alloc)
+  %suspend.init = call i8 @llvm.coro.suspend(token none, i1 false)
+  switch i8 %suspend.init, label %ret [
+    i8 0, label %step
+    i8 1, label %cleanup
+  ]
+
+; CHECK:        call void @await_suspend_wrapper_void(ptr %[[AWAITER]], ptr %[[HDL]])
+; CHECK-NEXT:   br label %{{.*}}
+step:
+  %save = call token @llvm.coro.save(ptr null)
+  call void @llvm.coro.await.suspend.void(ptr %awaiter, ptr %hdl, ptr @await_suspend_wrapper_void)
+  %suspend = call i8 @llvm.coro.suspend(token %save, i1 false)
+  switch i8 %suspend, label %ret [
+    i8 0, label %step1
+    i8 1, label %cleanup
+  ]
+
+; CHECK:        %[[RESUME:.+]] = call i1 @await_suspend_wrapper_bool(ptr %[[AWAITER]], ptr %[[HDL]])
+; CHECK-NEXT:   br i1 %[[RESUME]], label %{{[^,]+}}, label %[[STEP2:.+]]
+step1:
+  %save1 = call token @llvm.coro.save(ptr null)
+  %resume.bool = call i1 @llvm.coro.await.suspend.bool(ptr %awaiter, ptr %hdl, ptr @await_suspend_wrapper_bool)
+  br i1 %resume.bool, label %suspend.cond, label %step2
+
+suspend.cond:
+  %suspend1 = call i8 @llvm.coro.suspend(token %save1, i1 false)
+  switch i8 %suspend1, label %ret [
+    i8 0, label %step2
+    i8 1, label %cleanup
+  ]
+
+; CHECK:      [[STEP2]]:
+; CHECK:        %[[NEXT_HDL:.+]] = call ptr @await_suspend_wrapper_handle(ptr %[[AWAITER]], ptr %[[HDL]])
+; CHECK-NEXT:   %[[CONT:.+]] = call ptr @llvm.coro.subfn.addr(ptr %[[NEXT_HDL]], i8 0)
+; CHECK-NEXT:   musttail call {{.*}} void %[[CONT]](ptr %[[NEXT_HDL]])
+step2:
+  %save2 = call token @llvm.coro.save(ptr null)
+  %resume.handle = call ptr @llvm.coro.await.suspend.handle(ptr %awaiter, ptr %hdl, ptr @await_suspend_wrapper_handle)
+  call void @llvm.coro.resume(ptr %resume.handle)
+  %suspend2 = call i8 @llvm.coro.suspend(token %save2, i1 false)
+  switch i8 %suspend2, label %ret [
+    i8 0, label %step3
+    i8 1, label %cleanup
+  ]
+
+step3:
+  br label %cleanup
+
+cleanup:
+  %mem = call ptr @llvm.coro.free(token %id, ptr %hdl)
+  call void @free(ptr %mem)
+  br label %ret
+
+ret:
+  call i1 @llvm.coro.end(ptr %hdl, i1 0, token none)
+  ret void
+}
+
+; check that we were haven't accidentally went out of @f.resume body
+; CHECK-LABEL: @f.destroy(
+; CHECK-LABEL: @f.cleanup(
+
+declare void @await_suspend_wrapper_void(ptr, ptr)
+declare i1 @await_suspend_wrapper_bool(ptr, ptr)
+declare ptr @await_suspend_wrapper_handle(ptr, ptr)
+
+declare ptr @llvm.coro.free(token, ptr)
+declare i32 @llvm.coro.size.i32()
+declare i8  @llvm.coro.suspend(token, i1)
+declare void @llvm.coro.resume(ptr)
+declare void @llvm.coro.destroy(ptr)
+
+declare token @llvm.coro.id(i32, ptr, ptr, ptr)
+declare i1 @llvm.coro.alloc(token)
+declare ptr @llvm.coro.begin(token, ptr)
+declare void @llvm.coro.await.suspend.void(ptr, ptr, ptr)
+declare i1 @llvm.coro.await.suspend.bool(ptr, ptr, ptr)
+declare ptr @llvm.coro.await.suspend.handle(ptr, ptr, ptr)
+declare i1 @llvm.coro.end(ptr, i1, token)
+
+declare noalias ptr @malloc(i32)
+declare void @free(ptr)


        


More information about the cfe-commits mailing list