[clang] [llvm] [coro] Lower `llvm.coro.await.suspend.handle` to resume with tail call (PR #89751)

via cfe-commits cfe-commits at lists.llvm.org
Wed Apr 24 05:42:09 PDT 2024


https://github.com/zmodem updated https://github.com/llvm/llvm-project/pull/89751

>From 33b07efe6d68cb4d17e96349b552ef5e5901d8c6 Mon Sep 17 00:00:00 2001
From: Hans Wennborg <hans at chromium.org>
Date: Tue, 26 Mar 2024 15:04:35 +0100
Subject: [PATCH 1/4] stuff

---
 clang/lib/CodeGen/CGCoroutine.cpp             |   5 +-
 clang/test/CodeGenCoroutines/coro-await.cpp   |   3 +-
 .../coro-symmetric-transfer-01.cpp            |   4 +-
 .../coro-symmetric-transfer-02.cpp            |   6 +-
 llvm/docs/Coroutines.rst                      |  19 +-
 llvm/docs/LangRef.rst                         |   2 +-
 llvm/include/llvm/IR/Intrinsics.td            |   2 +-
 llvm/lib/Transforms/Coroutines/CoroInstr.h    |   4 +-
 llvm/lib/Transforms/Coroutines/CoroInternal.h |   3 +-
 llvm/lib/Transforms/Coroutines/CoroSplit.cpp  | 234 +++++-------------
 llvm/lib/Transforms/Coroutines/Coroutines.cpp |   2 +-
 .../Coro/coro-split-musttail.ll               |  63 -----
 .../Coro/coro-split-musttail1.ll              |  97 --------
 .../Coro/coro-split-musttail10.ll             |  55 ----
 .../Coro/coro-split-musttail11.ll             |  55 ----
 .../Coro/coro-split-musttail12.ll             |  85 -------
 .../Coro/coro-split-musttail13.ll             |  76 ------
 .../Coro/coro-split-musttail2.ll              |  68 -----
 .../Coro/coro-split-musttail3.ll              |  91 -------
 .../Coro/coro-split-musttail4.ll              |  66 -----
 .../Coro/coro-split-musttail5.ll              |  63 -----
 .../Coro/coro-split-musttail6.ll              | 112 ---------
 .../Coro/coro-split-musttail7.ll              | 115 ---------
 .../coro-await-suspend-lower-invoke.ll        |   5 +-
 .../Coroutines/coro-await-suspend-lower.ll    |   5 +-
 .../Coroutines/coro-preserve-final.ll         | 131 ----------
 ...-split-musttail-chain-pgo-counter-promo.ll |   9 +-
 .../Coroutines/coro-split-musttail.ll         |  17 +-
 .../Coroutines/coro-split-musttail1.ll        |  32 ++-
 .../Coroutines/coro-split-musttail10.ll       |  13 +-
 .../Coroutines/coro-split-musttail11.ll       |  55 ----
 .../Coroutines/coro-split-musttail2.ll        |  12 +-
 .../Coroutines/coro-split-musttail3.ll        |  33 ++-
 .../Coroutines/coro-split-musttail4.ll        |   5 +-
 .../Coroutines/coro-split-musttail5.ll        |   5 +-
 .../Coroutines/coro-split-musttail6.ll        |   9 +-
 .../Coroutines/coro-split-musttail7.ll        |  15 +-
 37 files changed, 157 insertions(+), 1419 deletions(-)
 delete mode 100644 llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail.ll
 delete mode 100644 llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail1.ll
 delete mode 100644 llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail10.ll
 delete mode 100644 llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail11.ll
 delete mode 100644 llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail12.ll
 delete mode 100644 llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail13.ll
 delete mode 100644 llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail2.ll
 delete mode 100644 llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail3.ll
 delete mode 100644 llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail4.ll
 delete mode 100644 llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail5.ll
 delete mode 100644 llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail6.ll
 delete mode 100644 llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail7.ll
 delete mode 100644 llvm/test/Transforms/Coroutines/coro-preserve-final.ll
 delete mode 100644 llvm/test/Transforms/Coroutines/coro-split-musttail11.ll

diff --git a/clang/lib/CodeGen/CGCoroutine.cpp b/clang/lib/CodeGen/CGCoroutine.cpp
index 93ca711f716fce..e976734898b9b8 100644
--- a/clang/lib/CodeGen/CGCoroutine.cpp
+++ b/clang/lib/CodeGen/CGCoroutine.cpp
@@ -307,10 +307,7 @@ static LValueOrRValue emitSuspendExpression(CodeGenFunction &CGF, CGCoroData &Co
     break;
   }
   case CoroutineSuspendExpr::SuspendReturnType::SuspendHandle: {
-    assert(SuspendRet->getType()->isPointerTy());
-
-    auto ResumeIntrinsic = CGF.CGM.getIntrinsic(llvm::Intrinsic::coro_resume);
-    Builder.CreateCall(ResumeIntrinsic, SuspendRet);
+    assert(SuspendRet->getType()->isVoidTy());
     break;
   }
   }
diff --git a/clang/test/CodeGenCoroutines/coro-await.cpp b/clang/test/CodeGenCoroutines/coro-await.cpp
index 75851d8805bb6e..7caaa6351844b2 100644
--- a/clang/test/CodeGenCoroutines/coro-await.cpp
+++ b/clang/test/CodeGenCoroutines/coro-await.cpp
@@ -370,8 +370,7 @@ extern "C" void TestTailcall() {
   // ---------------------------
   // Call coro.await.suspend
   // ---------------------------
-  // CHECK-NEXT: %[[RESUMED:.+]] = call ptr @llvm.coro.await.suspend.handle(ptr %[[AWAITABLE]], ptr %[[FRAME]], ptr @__await_suspend_wrapper_TestTailcall_await)
-  // CHECK-NEXT: call void @llvm.coro.resume(ptr %[[RESUMED]])
+  // CHECK-NEXT: call void @llvm.coro.await.suspend.handle(ptr %[[AWAITABLE]], ptr %[[FRAME]], ptr @__await_suspend_wrapper_TestTailcall_await)
   // CHECK-NEXT: %[[OUTCOME:.+]] = call i8 @llvm.coro.suspend(token %[[SUSPEND_ID]], i1 false)
   // CHECK-NEXT: switch i8 %[[OUTCOME]], label %[[RET_BB:.+]] [
   // CHECK-NEXT:   i8 0, label %[[READY_BB]]
diff --git a/clang/test/CodeGenCoroutines/coro-symmetric-transfer-01.cpp b/clang/test/CodeGenCoroutines/coro-symmetric-transfer-01.cpp
index da30e12c63cffb..0ae672de391c47 100644
--- a/clang/test/CodeGenCoroutines/coro-symmetric-transfer-01.cpp
+++ b/clang/test/CodeGenCoroutines/coro-symmetric-transfer-01.cpp
@@ -48,7 +48,7 @@ detached_task foo() {
 }
 
 // check that the lifetime of the coroutine handle used to obtain the address is contained within single basic block, and hence does not live across suspension points.
+// XXX: not sure this makes sense anymore?
 // CHECK-LABEL: final.suspend:
 // CHECK:         %{{.+}} = call token @llvm.coro.save(ptr null)
-// CHECK:         %[[HDL_TRANSFER:.+]] = call ptr @llvm.coro.await.suspend.handle
-// CHECK:         call void @llvm.coro.resume(ptr %[[HDL_TRANSFER]])
+// CHECK:         call void @llvm.coro.await.suspend.handle
diff --git a/clang/test/CodeGenCoroutines/coro-symmetric-transfer-02.cpp b/clang/test/CodeGenCoroutines/coro-symmetric-transfer-02.cpp
index ca6cf74115a3b1..f36f89926505f3 100644
--- a/clang/test/CodeGenCoroutines/coro-symmetric-transfer-02.cpp
+++ b/clang/test/CodeGenCoroutines/coro-symmetric-transfer-02.cpp
@@ -89,8 +89,7 @@ Task bar() {
 // CHECK:         br i1 %{{.+}}, label %[[CASE1_AWAIT_READY:.+]], label %[[CASE1_AWAIT_SUSPEND:.+]]
 // CHECK:       [[CASE1_AWAIT_SUSPEND]]:
 // CHECK-NEXT:    %{{.+}} = call token @llvm.coro.save(ptr null)
-// CHECK-NEXT:    %[[HANDLE1_PTR:.+]] = call ptr @llvm.coro.await.suspend.handle
-// CHECK-NEXT:    call void @llvm.coro.resume(ptr %[[HANDLE1_PTR]])
+// CHECK-NEXT:    call void @llvm.coro.await.suspend.handle
 // CHECK-NEXT:    %{{.+}} = call i8 @llvm.coro.suspend
 // CHECK-NEXT:    switch i8 %{{.+}}, label %coro.ret [
 // CHECK-NEXT:      i8 0, label %[[CASE1_AWAIT_READY]]
@@ -104,8 +103,7 @@ Task bar() {
 // CHECK:         br i1 %{{.+}}, label %[[CASE2_AWAIT_READY:.+]], label %[[CASE2_AWAIT_SUSPEND:.+]]
 // CHECK:       [[CASE2_AWAIT_SUSPEND]]:
 // CHECK-NEXT:    %{{.+}} = call token @llvm.coro.save(ptr null)
-// CHECK-NEXT:    %[[HANDLE2_PTR:.+]] = call ptr @llvm.coro.await.suspend.handle
-// CHECK-NEXT:    call void @llvm.coro.resume(ptr %[[HANDLE2_PTR]])
+// CHECK-NEXT:    call void @llvm.coro.await.suspend.handle
 // CHECK-NEXT:    %{{.+}} = call i8 @llvm.coro.suspend
 // CHECK-NEXT:    switch i8 %{{.+}}, label %coro.ret [
 // CHECK-NEXT:      i8 0, label %[[CASE2_AWAIT_READY]]
diff --git a/llvm/docs/Coroutines.rst b/llvm/docs/Coroutines.rst
index 83369d93c309a7..36092325e536fb 100644
--- a/llvm/docs/Coroutines.rst
+++ b/llvm/docs/Coroutines.rst
@@ -1922,7 +1922,7 @@ Example:
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 ::
 
-  declare ptr @llvm.coro.await.suspend.handle(
+  declare void @llvm.coro.await.suspend.handle(
                 ptr <awaiter>,
                 ptr <handle>,
                 ptr <await_suspend_function>)
@@ -1967,7 +1967,9 @@ The intrinsic must be used between corresponding `coro.save`_ and
 `await_suspend_function` call during `CoroSplit`_ pass.
 
 `await_suspend_function` must return a pointer to a valid
-coroutine frame, which is immediately resumed
+coroutine frame. The intrinsic will be lowered to a tail call resuming the
+returned coroutine frame. It will be marked `musttail` on targets that support
+that. Instructions following the intrinsic will become unreachable.
 
 Example:
 """"""""
@@ -1977,11 +1979,10 @@ Example:
   ; before lowering
   await.suspend:
     %save = call token @llvm.coro.save(ptr %hdl)
-    %next = call ptr @llvm.coro.await.suspend.handle(
-                ptr %awaiter,
-                ptr %hdl,
-                ptr @await_suspend_function)
-    call void @llvm.coro.resume(%next)
+    call void @llvm.coro.await.suspend.handle(
+        ptr %awaiter,
+        ptr %hdl,
+        ptr @await_suspend_function)
     %suspend = call i8 @llvm.coro.suspend(token %save, i1 false)
     ...
 
@@ -1992,8 +1993,8 @@ Example:
     %next = call ptr @await_suspend_function(
                 ptr %awaiter,
                 ptr %hdl)
-    call void @llvm.coro.resume(%next)
-    %suspend = call i8 @llvm.coro.suspend(token %save, i1 false)
+    musttail call void @llvm.coro.resume(%next)
+    ret void
     ...
 
   ; wrapper function example
diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
index 9592929d79feb4..0e87a8e2ace0e2 100644
--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst
@@ -12517,7 +12517,7 @@ This instruction requires several arguments:
       ``llvm::GuaranteedTailCallOpt`` is ``true``, or the calling convention
       is ``tailcc``
    -  `Platform-specific constraints are
-      met. <CodeGenerator.html#tailcallopt>`_
+      met. <CodeGenerator.html#tail-call-optimization>`_
 
 #. The optional ``notail`` marker indicates that the optimizers should not add
    ``tail`` or ``musttail`` markers to the call. It is used to prevent tail
diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td
index 1d20f7e1b19854..2bbdd0e4627c62 100644
--- a/llvm/include/llvm/IR/Intrinsics.td
+++ b/llvm/include/llvm/IR/Intrinsics.td
@@ -1711,7 +1711,7 @@ def int_coro_await_suspend_bool : Intrinsic<[llvm_i1_ty],
                                             [llvm_ptr_ty, llvm_ptr_ty, llvm_ptr_ty],
                                             [Throws]>;
 
-def int_coro_await_suspend_handle : Intrinsic<[llvm_ptr_ty],
+def int_coro_await_suspend_handle : Intrinsic<[],
                                               [llvm_ptr_ty, llvm_ptr_ty, llvm_ptr_ty],
                                               [Throws]>;
 
diff --git a/llvm/lib/Transforms/Coroutines/CoroInstr.h b/llvm/lib/Transforms/Coroutines/CoroInstr.h
index 79e745bb162cdb..a31703fe01304c 100644
--- a/llvm/lib/Transforms/Coroutines/CoroInstr.h
+++ b/llvm/lib/Transforms/Coroutines/CoroInstr.h
@@ -78,10 +78,10 @@ class LLVM_LIBRARY_VISIBILITY CoroAllocInst : public IntrinsicInst {
   }
 };
 
-/// This represents the llvm.coro.await.suspend instruction.
+/// This represents the llvm.coro.await.suspend.{void,bool,handle} instructions.
 // FIXME: add callback metadata
 // FIXME: make a proper IntrinisicInst. Currently this is not possible,
-// because llvm.coro.await.suspend can be invoked.
+// because llvm.coro.await.suspend.* can be invoked.
 class LLVM_LIBRARY_VISIBILITY CoroAwaitSuspendInst : public CallBase {
   enum { AwaiterArg, FrameArg, WrapperArg };
 
diff --git a/llvm/lib/Transforms/Coroutines/CoroInternal.h b/llvm/lib/Transforms/Coroutines/CoroInternal.h
index 84fd88806154e3..5716fd0ea4ab96 100644
--- a/llvm/lib/Transforms/Coroutines/CoroInternal.h
+++ b/llvm/lib/Transforms/Coroutines/CoroInternal.h
@@ -47,7 +47,7 @@ struct LowererBase {
   ConstantPointerNull *const NullPtr;
 
   LowererBase(Module &M);
-  Value *makeSubFnCall(Value *Arg, int Index, Instruction *InsertPt);
+  CallInst *makeSubFnCall(Value *Arg, int Index, Instruction *InsertPt);
 };
 
 enum class ABI {
@@ -85,6 +85,7 @@ struct LLVM_LIBRARY_VISIBILITY Shape {
   SmallVector<AnyCoroSuspendInst *, 4> CoroSuspends;
   SmallVector<CallInst*, 2> SwiftErrorOps;
   SmallVector<CoroAwaitSuspendInst *, 4> CoroAwaitSuspends;
+  SmallVector<CallInst *, 2> SymmetricTransfers;
 
   // Field indexes for special fields in the switch lowering.
   struct SwitchFieldIndex {
diff --git a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp
index 3a43b1edcaba37..01eb75617e39fe 100644
--- a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp
+++ b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp
@@ -113,21 +113,24 @@ class CoroCloner {
   /// ABIs.
   AnyCoroSuspendInst *ActiveSuspend = nullptr;
 
+  TargetTransformInfo &TTI;
+
 public:
   /// Create a cloner for a switch lowering.
   CoroCloner(Function &OrigF, const Twine &Suffix, coro::Shape &Shape,
-             Kind FKind)
+             Kind FKind, TargetTransformInfo &TTI)
       : OrigF(OrigF), NewF(nullptr), Suffix(Suffix), Shape(Shape), FKind(FKind),
-        Builder(OrigF.getContext()) {
+        Builder(OrigF.getContext()), TTI(TTI) {
     assert(Shape.ABI == coro::ABI::Switch);
   }
 
   /// Create a cloner for a continuation lowering.
   CoroCloner(Function &OrigF, const Twine &Suffix, coro::Shape &Shape,
-             Function *NewF, AnyCoroSuspendInst *ActiveSuspend)
+             Function *NewF, AnyCoroSuspendInst *ActiveSuspend,
+             TargetTransformInfo &TTI)
       : OrigF(OrigF), NewF(NewF), Suffix(Suffix), Shape(Shape),
         FKind(Shape.ABI == coro::ABI::Async ? Kind::Async : Kind::Continuation),
-        Builder(OrigF.getContext()), ActiveSuspend(ActiveSuspend) {
+        Builder(OrigF.getContext()), ActiveSuspend(ActiveSuspend), TTI(TTI) {
     assert(Shape.ABI == coro::ABI::Retcon ||
            Shape.ABI == coro::ABI::RetconOnce || Shape.ABI == coro::ABI::Async);
     assert(NewF && "need existing function for continuation");
@@ -171,7 +174,8 @@ class CoroCloner {
 // Lower the intrinisc in CoroEarly phase if coroutine frame doesn't escape
 // and it is known that other transformations, for example, sanitizers
 // won't lead to incorrect code.
-static void lowerAwaitSuspend(IRBuilder<> &Builder, CoroAwaitSuspendInst *CB) {
+static void lowerAwaitSuspend(IRBuilder<> &Builder, CoroAwaitSuspendInst *CB,
+                              coro::Shape &Shape) {
   auto Wrapper = CB->getWrapperFunction();
   auto Awaiter = CB->getAwaiter();
   auto FramePtr = CB->getFrame();
@@ -206,6 +210,28 @@ static void lowerAwaitSuspend(IRBuilder<> &Builder, CoroAwaitSuspendInst *CB) {
     llvm_unreachable("Unexpected coro_await_suspend invocation method");
   }
 
+  if (CB->getCalledFunction()->getIntrinsicID() ==
+      Intrinsic::coro_await_suspend_handle) {
+    // Follow the await_suspend by a lowered resume call to the returned coroutine.
+    if (auto *Invoke = dyn_cast<InvokeInst>(CB))
+      Builder.SetInsertPoint(Invoke->getNormalDest()->getFirstInsertionPt());
+
+    coro::LowererBase LB(*Wrapper->getParent());
+    auto *ResumeAddr = LB.makeSubFnCall(NewCall, CoroSubFnInst::ResumeIndex,
+                                        &*Builder.GetInsertPoint());
+
+    LLVMContext& Ctx = Builder.getContext();
+    FunctionType *ResumeTy = FunctionType::get(
+        Type::getVoidTy(Ctx), PointerType::getUnqual(Ctx), false);
+    auto *ResumeCall = Builder.CreateCall(ResumeTy, ResumeAddr, {NewCall});
+
+    // We can't insert the 'ret' instruction and adjust the cc until the
+    // function has been split, so remember this for later.
+    Shape.SymmetricTransfers.push_back(ResumeCall);
+
+    NewCall = ResumeCall;
+  }
+
   CB->replaceAllUsesWith(NewCall);
   CB->eraseFromParent();
 }
@@ -213,7 +239,7 @@ static void lowerAwaitSuspend(IRBuilder<> &Builder, CoroAwaitSuspendInst *CB) {
 static void lowerAwaitSuspends(Function &F, coro::Shape &Shape) {
   IRBuilder<> Builder(F.getContext());
   for (auto *AWS : Shape.CoroAwaitSuspends)
-    lowerAwaitSuspend(Builder, AWS);
+    lowerAwaitSuspend(Builder, AWS, Shape);
 }
 
 static void maybeFreeRetconStorage(IRBuilder<> &Builder,
@@ -1056,6 +1082,22 @@ void CoroCloner::create() {
   // Set up the new entry block.
   replaceEntryBlock();
 
+  // Turn symmetric transfers into musttail calls.
+  for (CallInst *ResumeCall : Shape.SymmetricTransfers) {
+    ResumeCall = cast<CallInst>(VMap[ResumeCall]);
+    ResumeCall->setCallingConv(NewF->getCallingConv());
+    if (TTI.supportsTailCallFor(ResumeCall))
+      ResumeCall->setTailCallKind(CallInst::TCK_MustTail);
+
+    // Put a 'ret void' after the call, and split any remaining instructions to
+    // an unreachable block.
+    BasicBlock *BB = ResumeCall->getParent();
+    BB->splitBasicBlock(ResumeCall->getNextNode());
+    Builder.SetInsertPoint(BB->getTerminator());
+    Builder.CreateRetVoid();
+    BB->getTerminator()->eraseFromParent();
+  }
+
   Builder.SetInsertPoint(&NewF->getEntryBlock().front());
   NewFramePtr = deriveNewFramePointer();
 
@@ -1186,130 +1228,6 @@ scanPHIsAndUpdateValueMap(Instruction *Prev, BasicBlock *NewBlock,
   }
 }
 
-// Replace a sequence of branches leading to a ret, with a clone of a ret
-// instruction. Suspend instruction represented by a switch, track the PHI
-// values and select the correct case successor when possible.
-static bool simplifyTerminatorLeadingToRet(Instruction *InitialInst) {
-  // There is nothing to simplify.
-  if (isa<ReturnInst>(InitialInst))
-    return false;
-
-  DenseMap<Value *, Value *> ResolvedValues;
-  assert(InitialInst->getModule());
-  const DataLayout &DL = InitialInst->getModule()->getDataLayout();
-
-  auto TryResolveConstant = [&ResolvedValues](Value *V) {
-    auto It = ResolvedValues.find(V);
-    if (It != ResolvedValues.end())
-      V = It->second;
-    return dyn_cast<ConstantInt>(V);
-  };
-
-  Instruction *I = InitialInst;
-  while (true) {
-    if (isa<ReturnInst>(I)) {
-      assert(!cast<ReturnInst>(I)->getReturnValue());
-      ReplaceInstWithInst(InitialInst, I->clone());
-      return true;
-    }
-
-    if (auto *BR = dyn_cast<BranchInst>(I)) {
-      unsigned SuccIndex = 0;
-      if (BR->isConditional()) {
-        // Handle the case the condition of the conditional branch is constant.
-        // e.g.,
-        //
-        //     br i1 false, label %cleanup, label %CoroEnd
-        //
-        // It is possible during the transformation. We could continue the
-        // simplifying in this case.
-        ConstantInt *Cond = TryResolveConstant(BR->getCondition());
-        if (!Cond)
-          return false;
-
-        SuccIndex = Cond->isOne() ? 0 : 1;
-      }
-
-      BasicBlock *Succ = BR->getSuccessor(SuccIndex);
-      scanPHIsAndUpdateValueMap(I, Succ, ResolvedValues);
-      I = Succ->getFirstNonPHIOrDbgOrLifetime();
-      continue;
-    }
-
-    if (auto *Cmp = dyn_cast<CmpInst>(I)) {
-      // If the case number of suspended switch instruction is reduced to
-      // 1, then it is simplified to CmpInst in llvm::ConstantFoldTerminator.
-      // Try to constant fold it.
-      ConstantInt *Cond0 = TryResolveConstant(Cmp->getOperand(0));
-      ConstantInt *Cond1 = TryResolveConstant(Cmp->getOperand(1));
-      if (Cond0 && Cond1) {
-        ConstantInt *Result =
-            dyn_cast_or_null<ConstantInt>(ConstantFoldCompareInstOperands(
-                Cmp->getPredicate(), Cond0, Cond1, DL));
-        if (Result) {
-          ResolvedValues[Cmp] = Result;
-          I = I->getNextNode();
-          continue;
-        }
-      }
-    }
-
-    if (auto *SI = dyn_cast<SwitchInst>(I)) {
-      ConstantInt *Cond = TryResolveConstant(SI->getCondition());
-      if (!Cond)
-        return false;
-
-      BasicBlock *Succ = SI->findCaseValue(Cond)->getCaseSuccessor();
-      scanPHIsAndUpdateValueMap(I, Succ, ResolvedValues);
-      I = Succ->getFirstNonPHIOrDbgOrLifetime();
-      continue;
-    }
-
-    if (I->isDebugOrPseudoInst() || I->isLifetimeStartOrEnd() ||
-        wouldInstructionBeTriviallyDead(I)) {
-      // We can skip instructions without side effects. If their values are
-      // needed, we'll notice later, e.g. when hitting a conditional branch.
-      I = I->getNextNode();
-      continue;
-    }
-
-    break;
-  }
-
-  return false;
-}
-
-// Check whether CI obeys the rules of musttail attribute.
-static bool shouldBeMustTail(const CallInst &CI, const Function &F) {
-  if (CI.isInlineAsm())
-    return false;
-
-  // Match prototypes and calling conventions of resume function.
-  FunctionType *CalleeTy = CI.getFunctionType();
-  if (!CalleeTy->getReturnType()->isVoidTy() || (CalleeTy->getNumParams() != 1))
-    return false;
-
-  Type *CalleeParmTy = CalleeTy->getParamType(0);
-  if (!CalleeParmTy->isPointerTy() ||
-      (CalleeParmTy->getPointerAddressSpace() != 0))
-    return false;
-
-  if (CI.getCallingConv() != F.getCallingConv())
-    return false;
-
-  // CI should not has any ABI-impacting function attributes.
-  static const Attribute::AttrKind ABIAttrs[] = {
-      Attribute::StructRet,    Attribute::ByVal,     Attribute::InAlloca,
-      Attribute::Preallocated, Attribute::InReg,     Attribute::Returned,
-      Attribute::SwiftSelf,    Attribute::SwiftError};
-  AttributeList Attrs = CI.getAttributes();
-  for (auto AK : ABIAttrs)
-    if (Attrs.hasParamAttr(0, AK))
-      return false;
-
-  return true;
-}
-
 // Coroutine has no suspend points. Remove heap allocation for the coroutine
 // frame if possible.
 static void handleNoSuspendCoroutine(coro::Shape &Shape) {
@@ -1523,24 +1441,16 @@ struct SwitchCoroutineSplitter {
 
     createResumeEntryBlock(F, Shape);
     auto *ResumeClone =
-        createClone(F, ".resume", Shape, CoroCloner::Kind::SwitchResume);
+        createClone(F, ".resume", Shape, CoroCloner::Kind::SwitchResume, TTI);
     auto *DestroyClone =
-        createClone(F, ".destroy", Shape, CoroCloner::Kind::SwitchUnwind);
+        createClone(F, ".destroy", Shape, CoroCloner::Kind::SwitchUnwind, TTI);
     auto *CleanupClone =
-        createClone(F, ".cleanup", Shape, CoroCloner::Kind::SwitchCleanup);
+        createClone(F, ".cleanup", Shape, CoroCloner::Kind::SwitchCleanup, TTI);
 
     postSplitCleanup(*ResumeClone);
     postSplitCleanup(*DestroyClone);
     postSplitCleanup(*CleanupClone);
 
-    // Adding musttail call to support symmetric transfer.
-    // Skip targets which don't support tail call.
-    //
-    // FIXME: Could we support symmetric transfer effectively without musttail
-    // call?
-    if (TTI.supportsTailCalls())
-      addMustTailToCoroResumes(*ResumeClone, TTI);
-
     // Store addresses resume/destroy/cleanup functions in the coroutine frame.
     updateCoroFrame(Shape, ResumeClone, DestroyClone, CleanupClone);
 
@@ -1560,8 +1470,9 @@ struct SwitchCoroutineSplitter {
   // new entry block and replacing coro.suspend an appropriate value to force
   // resume or cleanup pass for every suspend point.
   static Function *createClone(Function &F, const Twine &Suffix,
-                               coro::Shape &Shape, CoroCloner::Kind FKind) {
-    CoroCloner Cloner(F, Suffix, Shape, FKind);
+                               coro::Shape &Shape, CoroCloner::Kind FKind,
+                               TargetTransformInfo &TTI) {
+    CoroCloner Cloner(F, Suffix, Shape, FKind, TTI);
     Cloner.create();
     return Cloner.getFunction();
   }
@@ -1662,34 +1573,6 @@ struct SwitchCoroutineSplitter {
     Shape.SwitchLowering.ResumeEntryBlock = NewEntry;
   }
 
-  // Add musttail to any resume instructions that is immediately followed by a
-  // suspend (i.e. ret). We do this even in -O0 to support guaranteed tail call
-  // for symmetrical coroutine control transfer (C++ Coroutines TS extension).
-  // This transformation is done only in the resume part of the coroutine that
-  // has identical signature and calling convention as the coro.resume call.
-  static void addMustTailToCoroResumes(Function &F, TargetTransformInfo &TTI) {
-    bool Changed = false;
-
-    // Collect potential resume instructions.
-    SmallVector<CallInst *, 4> Resumes;
-    for (auto &I : instructions(F))
-      if (auto *Call = dyn_cast<CallInst>(&I))
-        if (shouldBeMustTail(*Call, F))
-          Resumes.push_back(Call);
-
-    // Set musttail on those that are followed by a ret instruction.
-    for (CallInst *Call : Resumes)
-      // Skip targets which don't support tail call on the specific case.
-      if (TTI.supportsTailCallFor(Call) &&
-          simplifyTerminatorLeadingToRet(Call->getNextNode())) {
-        Call->setTailCallKind(CallInst::TCK_MustTail);
-        Changed = true;
-      }
-
-    if (Changed)
-      removeUnreachableBlocks(F);
-  }
-
   // Store addresses of Resume/Destroy/Cleanup functions in the coroutine frame.
   static void updateCoroFrame(coro::Shape &Shape, Function *ResumeFn,
                               Function *DestroyFn, Function *CleanupFn) {
@@ -1894,12 +1777,13 @@ static void splitAsyncCoroutine(Function &F, coro::Shape &Shape,
     auto *Suspend = Shape.CoroSuspends[Idx];
     auto *Clone = Clones[Idx];
 
-    CoroCloner(F, "resume." + Twine(Idx), Shape, Clone, Suspend).create();
+    CoroCloner(F, "resume." + Twine(Idx), Shape, Clone, Suspend, TTI).create();
   }
 }
 
 static void splitRetconCoroutine(Function &F, coro::Shape &Shape,
-                                 SmallVectorImpl<Function *> &Clones) {
+                                 SmallVectorImpl<Function *> &Clones,
+                                 TargetTransformInfo &TTI) {
   assert(Shape.ABI == coro::ABI::Retcon || Shape.ABI == coro::ABI::RetconOnce);
   assert(Clones.empty());
 
@@ -2022,7 +1906,7 @@ static void splitRetconCoroutine(Function &F, coro::Shape &Shape,
     auto Suspend = Shape.CoroSuspends[i];
     auto Clone = Clones[i];
 
-    CoroCloner(F, "resume." + Twine(i), Shape, Clone, Suspend).create();
+    CoroCloner(F, "resume." + Twine(i), Shape, Clone, Suspend, TTI).create();
   }
 }
 
@@ -2074,7 +1958,7 @@ splitCoroutine(Function &F, SmallVectorImpl<Function *> &Clones,
       break;
     case coro::ABI::Retcon:
     case coro::ABI::RetconOnce:
-      splitRetconCoroutine(F, Shape, Clones);
+      splitRetconCoroutine(F, Shape, Clones, TTI);
       break;
     }
   }
diff --git a/llvm/lib/Transforms/Coroutines/Coroutines.cpp b/llvm/lib/Transforms/Coroutines/Coroutines.cpp
index a1c78d6a44ef46..d891173156b2af 100644
--- a/llvm/lib/Transforms/Coroutines/Coroutines.cpp
+++ b/llvm/lib/Transforms/Coroutines/Coroutines.cpp
@@ -47,7 +47,7 @@ coro::LowererBase::LowererBase(Module &M)
 //
 //    call ptr @llvm.coro.subfn.addr(ptr %Arg, i8 %index)
 
-Value *coro::LowererBase::makeSubFnCall(Value *Arg, int Index,
+CallInst *coro::LowererBase::makeSubFnCall(Value *Arg, int Index,
                                         Instruction *InsertPt) {
   auto *IndexVal = ConstantInt::get(Type::getInt8Ty(Context), Index);
   auto *Fn = Intrinsic::getDeclaration(&TheModule, Intrinsic::coro_subfn_addr);
diff --git a/llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail.ll b/llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail.ll
deleted file mode 100644
index a7321833d74843..00000000000000
--- a/llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail.ll
+++ /dev/null
@@ -1,63 +0,0 @@
-; Tests that instrumentation doesn't interfere with lowering (coro-split).
-; It should convert coro.resume followed by a suspend to a musttail call.
-
-; RUN: opt < %s -passes='pgo-instr-gen,cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
-
-define void @f() #0 {
-entry:
-  %id = call token @llvm.coro.id(i32 0, ptr null, ptr null, ptr null)
-  %alloc = call ptr @malloc(i64 16) #3
-  %vFrame = call noalias nonnull ptr @llvm.coro.begin(token %id, ptr %alloc)
-
-  %save = call token @llvm.coro.save(ptr null)
-  %addr1 = call ptr @llvm.coro.subfn.addr(ptr null, i8 0)
-  call fastcc void %addr1(ptr null)
-
-  %suspend = call i8 @llvm.coro.suspend(token %save, i1 false)
-  switch i8 %suspend, label %exit [
-    i8 0, label %await.ready
-    i8 1, label %exit
-  ]
-await.ready:
-  %save2 = call token @llvm.coro.save(ptr null)
-  %addr2 = call ptr @llvm.coro.subfn.addr(ptr null, i8 0)
-  call fastcc void %addr2(ptr null)
-
-  %suspend2 = call i8 @llvm.coro.suspend(token %save2, i1 false)
-  switch i8 %suspend2, label %exit [
-    i8 0, label %exit
-    i8 1, label %exit
-  ]
-exit:
-  call i1 @llvm.coro.end(ptr null, i1 false, token none)
-  ret void
-}
-
-; Verify that in the initial function resume is not marked with musttail.
-; CHECK-LABEL: @f(
-; CHECK: %[[addr1:.+]] = call ptr @llvm.coro.subfn.addr(ptr null, i8 0)
-; CHECK-NOT: musttail call fastcc void %[[addr1]](ptr null)
-
-; Verify that in the resume part resume call is marked with musttail.
-; CHECK-LABEL: @f.resume(
-; CHECK: %[[addr2:.+]] = call ptr @llvm.coro.subfn.addr(ptr null, i8 0)
-; CHECK: call void @llvm.instrprof
-; CHECK-NEXT: musttail call fastcc void %[[addr2]](ptr null)
-; CHECK-NEXT: ret void
-
-declare token @llvm.coro.id(i32, ptr readnone, ptr nocapture readonly, ptr) #1
-declare i1 @llvm.coro.alloc(token) #2
-declare i64 @llvm.coro.size.i64() #3
-declare ptr @llvm.coro.begin(token, ptr writeonly) #2
-declare token @llvm.coro.save(ptr) #2
-declare ptr @llvm.coro.frame() #3
-declare i8 @llvm.coro.suspend(token, i1) #2
-declare ptr @llvm.coro.free(token, ptr nocapture readonly) #1
-declare i1 @llvm.coro.end(ptr, i1, token) #2
-declare ptr @llvm.coro.subfn.addr(ptr nocapture readonly, i8) #1
-declare ptr @malloc(i64)
-
-attributes #0 = { presplitcoroutine }
-attributes #1 = { argmemonly nounwind readonly }
-attributes #2 = { nounwind }
-attributes #3 = { nounwind readnone }
diff --git a/llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail1.ll b/llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail1.ll
deleted file mode 100644
index 6098dee9a58035..00000000000000
--- a/llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail1.ll
+++ /dev/null
@@ -1,97 +0,0 @@
-; Tests that instrumentation doesn't interfere with lowering (coro-split).
-; It should convert coro.resume followed by a suspend to a musttail call.
-
-; RUN: opt < %s -passes='pgo-instr-gen,cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
-
-define void @f() #0 {
-entry:
-  %id = call token @llvm.coro.id(i32 0, ptr null, ptr null, ptr null)
-  %alloc = call ptr @malloc(i64 16) #3
-  %vFrame = call noalias nonnull ptr @llvm.coro.begin(token %id, ptr %alloc)
-
-  %save = call token @llvm.coro.save(ptr null)
-  %addr1 = call ptr @llvm.coro.subfn.addr(ptr null, i8 0)
-  call fastcc void %addr1(ptr null)
-
-  %suspend = call i8 @llvm.coro.suspend(token %save, i1 false)
-  switch i8 %suspend, label %exit [
-    i8 0, label %await.suspend
-    i8 1, label %exit
-  ]
-await.suspend:
-  %save2 = call token @llvm.coro.save(ptr null)
-  %br0 = call i8 @switch_result()
-  switch i8 %br0, label %unreach [
-    i8 0, label %await.resume3
-    i8 1, label %await.resume1
-    i8 2, label %await.resume2
-  ]
-await.resume1:
-  %hdl = call ptr @g()
-  %addr2 = call ptr @llvm.coro.subfn.addr(ptr %hdl, i8 0)
-  call fastcc void %addr2(ptr %hdl)
-  br label %final.suspend
-await.resume2:
-  %hdl2 = call ptr @h()
-  %addr3 = call ptr @llvm.coro.subfn.addr(ptr %hdl2, i8 0)
-  call fastcc void %addr3(ptr %hdl2)
-  br label %final.suspend
-await.resume3:
-  %addr4 = call ptr @llvm.coro.subfn.addr(ptr null, i8 0)
-  call fastcc void %addr4(ptr null)
-  br label %final.suspend
-final.suspend:
-  %suspend2 = call i8 @llvm.coro.suspend(token %save2, i1 false)
-  switch i8 %suspend2, label %exit [
-    i8 0, label %pre.exit
-    i8 1, label %exit
-  ]
-pre.exit:
-  br label %exit
-exit:
-  call i1 @llvm.coro.end(ptr null, i1 false, token none)
-  ret void
-unreach:
-  unreachable
-}
-
-; Verify that in the initial function resume is not marked with musttail.
-; CHECK-LABEL: @f(
-; CHECK: %[[addr1:.+]] = call ptr @llvm.coro.subfn.addr(ptr null, i8 0)
-; CHECK-NOT: musttail call fastcc void %[[addr1]](ptr null)
-
-; Verify that in the resume part resume call is marked with musttail.
-; CHECK-LABEL: @f.resume(
-; CHECK: %[[hdl:.+]] = call ptr @g()
-; CHECK-NEXT: %[[addr2:.+]] = call ptr @llvm.coro.subfn.addr(ptr %[[hdl]], i8 0)
-; CHECK: musttail call fastcc void %[[addr2]](ptr %[[hdl]])
-; CHECK-NEXT: ret void
-; CHECK: %[[hdl2:.+]] = call ptr @h()
-; CHECK-NEXT: %[[addr3:.+]] = call ptr @llvm.coro.subfn.addr(ptr %[[hdl2]], i8 0)
-; CHECK: musttail call fastcc void %[[addr3]](ptr %[[hdl2]])
-; CHECK-NEXT: ret void
-; CHECK: %[[addr4:.+]] = call ptr @llvm.coro.subfn.addr(ptr null, i8 0)
-; CHECK: musttail call fastcc void %[[addr4]](ptr null)
-; CHECK-NEXT: ret void
-
-
-
-declare token @llvm.coro.id(i32, ptr readnone, ptr nocapture readonly, ptr) #1
-declare i1 @llvm.coro.alloc(token) #2
-declare i64 @llvm.coro.size.i64() #3
-declare ptr @llvm.coro.begin(token, ptr writeonly) #2
-declare token @llvm.coro.save(ptr) #2
-declare ptr @llvm.coro.frame() #3
-declare i8 @llvm.coro.suspend(token, i1) #2
-declare ptr @llvm.coro.free(token, ptr nocapture readonly) #1
-declare i1 @llvm.coro.end(ptr, i1, token) #2
-declare ptr @llvm.coro.subfn.addr(ptr nocapture readonly, i8) #1
-declare ptr @malloc(i64)
-declare i8 @switch_result()
-declare ptr @g()
-declare ptr @h()
-
-attributes #0 = { presplitcoroutine }
-attributes #1 = { argmemonly nounwind readonly }
-attributes #2 = { nounwind }
-attributes #3 = { nounwind readnone }
diff --git a/llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail10.ll b/llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail10.ll
deleted file mode 100644
index f43b10ebf42e5a..00000000000000
--- a/llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail10.ll
+++ /dev/null
@@ -1,55 +0,0 @@
-; Tests that instrumentation doesn't interfere with lowering (coro-split).
-; It should convert coro.resume followed by a suspend to a musttail call.
-
-; RUN: opt < %s -passes='pgo-instr-gen,cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
-
-target triple = "wasm64-unknown-unknown"
-
-define void @f() #0 {
-entry:
-  %id = call token @llvm.coro.id(i32 0, ptr null, ptr null, ptr null)
-  %alloc = call ptr @malloc(i64 16) #3
-  %vFrame = call noalias nonnull ptr @llvm.coro.begin(token %id, ptr %alloc)
-
-  %save = call token @llvm.coro.save(ptr null)
-  %addr1 = call ptr @llvm.coro.subfn.addr(ptr null, i8 0)
-  call fastcc void %addr1(ptr null)
-
-  %suspend = call i8 @llvm.coro.suspend(token %save, i1 false)
-  switch i8 %suspend, label %exit [
-    i8 0, label %await.ready
-    i8 1, label %exit
-  ]
-await.ready:
-  %save2 = call token @llvm.coro.save(ptr null)
-  %addr2 = call ptr @llvm.coro.subfn.addr(ptr null, i8 0)
-  call fastcc void %addr2(ptr null)
-
-  %suspend2 = call i8 @llvm.coro.suspend(token %save2, i1 false)
-  switch i8 %suspend2, label %exit [
-    i8 0, label %exit
-    i8 1, label %exit
-  ]
-exit:
-  call i1 @llvm.coro.end(ptr null, i1 false, token none)
-  ret void
-}
-
-; CHECK: musttail call
-
-declare token @llvm.coro.id(i32, ptr readnone, ptr nocapture readonly, ptr) #1
-declare i1 @llvm.coro.alloc(token) #2
-declare i64 @llvm.coro.size.i64() #3
-declare ptr @llvm.coro.begin(token, ptr writeonly) #2
-declare token @llvm.coro.save(ptr) #2
-declare ptr @llvm.coro.frame() #3
-declare i8 @llvm.coro.suspend(token, i1) #2
-declare ptr @llvm.coro.free(token, ptr nocapture readonly) #1
-declare i1 @llvm.coro.end(ptr, i1, token) #2
-declare ptr @llvm.coro.subfn.addr(ptr nocapture readonly, i8) #1
-declare ptr @malloc(i64)
-
-attributes #0 = { presplitcoroutine "target-features"="+tail-call" }
-attributes #1 = { argmemonly nounwind readonly }
-attributes #2 = { nounwind }
-attributes #3 = { nounwind readnone }
diff --git a/llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail11.ll b/llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail11.ll
deleted file mode 100644
index fc5bb9a1b20b3d..00000000000000
--- a/llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail11.ll
+++ /dev/null
@@ -1,55 +0,0 @@
-; Tests that instrumentation doesn't interfere with lowering (coro-split).
-; It should convert coro.resume followed by a suspend to a musttail call.
-
-; RUN: opt < %s -passes='pgo-instr-gen,cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
-
-target triple = "wasm32-unknown-unknown"
-
-define void @f() #0 {
-entry:
-  %id = call token @llvm.coro.id(i32 0, ptr null, ptr null, ptr null)
-  %alloc = call ptr @malloc(i64 16) #3
-  %vFrame = call noalias nonnull ptr @llvm.coro.begin(token %id, ptr %alloc)
-
-  %save = call token @llvm.coro.save(ptr null)
-  %addr1 = call ptr @llvm.coro.subfn.addr(ptr null, i8 0)
-  call fastcc void %addr1(ptr null)
-
-  %suspend = call i8 @llvm.coro.suspend(token %save, i1 false)
-  switch i8 %suspend, label %exit [
-    i8 0, label %await.ready
-    i8 1, label %exit
-  ]
-await.ready:
-  %save2 = call token @llvm.coro.save(ptr null)
-  %addr2 = call ptr @llvm.coro.subfn.addr(ptr null, i8 0)
-  call fastcc void %addr2(ptr null)
-
-  %suspend2 = call i8 @llvm.coro.suspend(token %save2, i1 false)
-  switch i8 %suspend2, label %exit [
-    i8 0, label %exit
-    i8 1, label %exit
-  ]
-exit:
-  call i1 @llvm.coro.end(ptr null, i1 false, token none)
-  ret void
-}
-
-; CHECK: musttail call
-
-declare token @llvm.coro.id(i32, ptr readnone, ptr nocapture readonly, ptr) #1
-declare i1 @llvm.coro.alloc(token) #2
-declare i64 @llvm.coro.size.i64() #3
-declare ptr @llvm.coro.begin(token, ptr writeonly) #2
-declare token @llvm.coro.save(ptr) #2
-declare ptr @llvm.coro.frame() #3
-declare i8 @llvm.coro.suspend(token, i1) #2
-declare ptr @llvm.coro.free(token, ptr nocapture readonly) #1
-declare i1 @llvm.coro.end(ptr, i1, token) #2
-declare ptr @llvm.coro.subfn.addr(ptr nocapture readonly, i8) #1
-declare ptr @malloc(i64)
-
-attributes #0 = { presplitcoroutine "target-features"="+tail-call" }
-attributes #1 = { argmemonly nounwind readonly }
-attributes #2 = { nounwind }
-attributes #3 = { nounwind readnone }
diff --git a/llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail12.ll b/llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail12.ll
deleted file mode 100644
index 634d0106a2e6ae..00000000000000
--- a/llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail12.ll
+++ /dev/null
@@ -1,85 +0,0 @@
-; Tests that instrumentation doesn't interfere with lowering (coro-split).
-; It should convert coro.resume followed by a suspend to a musttail call.
-
-; RUN: opt < %s -passes='pgo-instr-gen,cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
-
-declare void @fakeresume1(ptr)
-declare void @print()
-
-define void @f(i1 %cond) #0 {
-entry:
-  %id = call token @llvm.coro.id(i32 0, ptr null, ptr null, ptr null)
-  %alloc = call ptr @malloc(i64 16) #3
-  %vFrame = call noalias nonnull ptr @llvm.coro.begin(token %id, ptr %alloc)
-
-  %save = call token @llvm.coro.save(ptr null)
-
-  %init_suspend = call i8 @llvm.coro.suspend(token %save, i1 false)
-  switch i8 %init_suspend, label %coro.end [
-    i8 0, label %await.ready
-    i8 1, label %coro.end
-  ]
-await.ready:
-  %save2 = call token @llvm.coro.save(ptr null)
-  br i1 %cond, label %then, label %else
-
-then:
-  call fastcc void @fakeresume1(ptr align 8 null)
-  br label %merge
-
-else:
-  br label %merge
-
-merge:
-  %v0 = phi i1 [0, %then], [1, %else]
-  br label %compare
-
-compare:
-  %cond.cmp = icmp eq i1 %v0, 0
-  br i1 %cond.cmp, label %ready, label %prepare
-
-prepare:
-  call void @print()
-  br label %ready
-
-ready:
-  %suspend = call i8 @llvm.coro.suspend(token %save2, i1 true)
-  %switch = icmp ult i8 %suspend, 2
-  br i1 %switch, label %cleanup, label %coro.end
-
-cleanup:
-  %free.handle = call ptr @llvm.coro.free(token %id, ptr %vFrame)
-  %.not = icmp eq ptr %free.handle, null
-  br i1 %.not, label %coro.end, label %coro.free
-
-coro.free:
-  call void @delete(ptr nonnull %free.handle) #2
-  br label %coro.end
-
-coro.end:
-  call i1 @llvm.coro.end(ptr null, i1 false, token none)
-  ret void
-}
-
-; CHECK-LABEL: @f.resume(
-; CHECK-NOT:      }
-; CHECK:          call void @print()
-
-
-declare token @llvm.coro.id(i32, ptr readnone, ptr nocapture readonly, ptr) #1
-declare i1 @llvm.coro.alloc(token) #2
-declare i64 @llvm.coro.size.i64() #3
-declare ptr @llvm.coro.begin(token, ptr writeonly) #2
-declare token @llvm.coro.save(ptr) #2
-declare ptr @llvm.coro.frame() #3
-declare i8 @llvm.coro.suspend(token, i1) #2
-declare ptr @llvm.coro.free(token, ptr nocapture readonly) #1
-declare i1 @llvm.coro.end(ptr, i1, token) #2
-declare ptr @llvm.coro.subfn.addr(ptr nocapture readonly, i8) #1
-declare ptr @malloc(i64)
-declare void @delete(ptr nonnull) #2
-
-attributes #0 = { presplitcoroutine }
-attributes #1 = { argmemonly nounwind readonly }
-attributes #2 = { nounwind }
-attributes #3 = { nounwind readnone }
diff --git a/llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail13.ll b/llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail13.ll
deleted file mode 100644
index 2f9a14c9010719..00000000000000
--- a/llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail13.ll
+++ /dev/null
@@ -1,76 +0,0 @@
-; Tests that instrumentation doesn't interfere with lowering (coro-split).
-; It should convert coro.resume followed by a suspend to a musttail call.
-
-; RUN: opt < %s -passes='pgo-instr-gen,cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
-
-declare void @fakeresume1(ptr)
-declare void @may_throw(ptr)
-declare void @print()
-
-define void @f(i1 %cond) #0 personality i32 3 {
-entry:
-  %id = call token @llvm.coro.id(i32 0, ptr null, ptr null, ptr null)
-  %alloc = call ptr @malloc(i64 16) #3
-  %vFrame = call noalias nonnull ptr @llvm.coro.begin(token %id, ptr %alloc)
-
-  %save = call token @llvm.coro.save(ptr null)
-
-  %init_suspend = call i8 @llvm.coro.suspend(token %save, i1 false)
-  switch i8 %init_suspend, label %coro.end [
-    i8 0, label %await.ready
-    i8 1, label %coro.end
-  ]
-await.ready:
-  call fastcc void @fakeresume1(ptr align 8 null)
-  invoke void @may_throw(ptr null)
-    to label %ready unwind label %lpad
-
-ready:
-  %save2 = call token @llvm.coro.save(ptr null)
-  %suspend = call i8 @llvm.coro.suspend(token %save2, i1 true)
-  %switch = icmp ult i8 %suspend, 2
-  br i1 %switch, label %cleanup, label %coro.end
-
-cleanup:
-  %free.handle = call ptr @llvm.coro.free(token %id, ptr %vFrame)
-  %.not = icmp eq ptr %free.handle, null
-  br i1 %.not, label %coro.end, label %coro.free
-
-lpad:
-  %lpval = landingpad { ptr, i32 }
-     cleanup
-
-  %need.resume = call i1 @llvm.coro.end(ptr null, i1 true, token none)
-  resume { ptr, i32 } %lpval
-
-coro.free:
-  call void @delete(ptr nonnull %free.handle) #2
-  br label %coro.end
-
-coro.end:
-  call i1 @llvm.coro.end(ptr null, i1 false, token none)
-  ret void
-}
-
-; CHECK-LABEL: @f.resume(
-; CHECK-NOT:          musttail call fastcc void @fakeresume1(
-; CHECK:     }
-
-
-declare token @llvm.coro.id(i32, ptr readnone, ptr nocapture readonly, ptr) #1
-declare i1 @llvm.coro.alloc(token) #2
-declare i64 @llvm.coro.size.i64() #3
-declare ptr @llvm.coro.begin(token, ptr writeonly) #2
-declare token @llvm.coro.save(ptr) #2
-declare ptr @llvm.coro.frame() #3
-declare i8 @llvm.coro.suspend(token, i1) #2
-declare ptr @llvm.coro.free(token, ptr nocapture readonly) #1
-declare i1 @llvm.coro.end(ptr, i1, token) #2
-declare ptr @llvm.coro.subfn.addr(ptr nocapture readonly, i8) #1
-declare ptr @malloc(i64)
-declare void @delete(ptr nonnull) #2
-
-attributes #0 = { presplitcoroutine }
-attributes #1 = { argmemonly nounwind readonly }
-attributes #2 = { nounwind }
-attributes #3 = { nounwind readnone }
diff --git a/llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail2.ll b/llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail2.ll
deleted file mode 100644
index 61b61a200e704d..00000000000000
--- a/llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail2.ll
+++ /dev/null
@@ -1,68 +0,0 @@
-; Tests that instrumentation doesn't interfere with lowering (coro-split).
-; It should convert coro.resume followed by a suspend to a musttail call.
-
-; RUN: opt < %s -passes='pgo-instr-gen,cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
-
-define void @fakeresume1(ptr)  {
-entry:
-  ret void;
-}
-
-define void @fakeresume2(ptr align 8)  {
-entry:
-  ret void;
-}
-
-define void @g() #0 {
-entry:
-  %id = call token @llvm.coro.id(i32 0, ptr null, ptr null, ptr null)
-  %alloc = call ptr @malloc(i64 16) #3
-  %vFrame = call noalias nonnull ptr @llvm.coro.begin(token %id, ptr %alloc)
-
-  %save = call token @llvm.coro.save(ptr null)
-  call fastcc void @fakeresume1(ptr null)
-
-  %suspend = call i8 @llvm.coro.suspend(token %save, i1 false)
-  switch i8 %suspend, label %exit [
-    i8 0, label %await.ready
-    i8 1, label %exit
-  ]
-await.ready:
-  %save2 = call token @llvm.coro.save(ptr null)
-  call fastcc void @fakeresume2(ptr align 8 null)
-
-  %suspend2 = call i8 @llvm.coro.suspend(token %save2, i1 false)
-  switch i8 %suspend2, label %exit [
-    i8 0, label %exit
-    i8 1, label %exit
-  ]
-exit:
-  call i1 @llvm.coro.end(ptr null, i1 false, token none)
-  ret void
-}
-
-; Verify that in the initial function resume is not marked with musttail.
-; CHECK-LABEL: @g(
-; CHECK-NOT: musttail call fastcc void @fakeresume1(ptr null)
-
-; Verify that in the resume part resume call is marked with musttail.
-; CHECK-LABEL: @g.resume(
-; CHECK: musttail call fastcc void @fakeresume2(ptr align 8 null)
-; CHECK-NEXT: ret void
-
-declare token @llvm.coro.id(i32, ptr readnone, ptr nocapture readonly, ptr) #1
-declare i1 @llvm.coro.alloc(token) #2
-declare i64 @llvm.coro.size.i64() #3
-declare ptr @llvm.coro.begin(token, ptr writeonly) #2
-declare token @llvm.coro.save(ptr) #2
-declare ptr @llvm.coro.frame() #3
-declare i8 @llvm.coro.suspend(token, i1) #2
-declare ptr @llvm.coro.free(token, ptr nocapture readonly) #1
-declare i1 @llvm.coro.end(ptr, i1, token) #2
-declare ptr @llvm.coro.subfn.addr(ptr nocapture readonly, i8) #1
-declare ptr @malloc(i64)
-
-attributes #0 = { presplitcoroutine }
-attributes #1 = { argmemonly nounwind readonly }
-attributes #2 = { nounwind }
-attributes #3 = { nounwind readnone }
diff --git a/llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail3.ll b/llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail3.ll
deleted file mode 100644
index 82176b8085e6c7..00000000000000
--- a/llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail3.ll
+++ /dev/null
@@ -1,91 +0,0 @@
-; Tests that instrumentation doesn't interfere with lowering (coro-split).
-; It should convert coro.resume followed by a suspend to a musttail call.
-
-; RUN: opt < %s -passes='pgo-instr-gen,cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
-
-define void @f() #0 {
-entry:
-  %id = call token @llvm.coro.id(i32 0, ptr null, ptr null, ptr null)
-  %alloc = call ptr @malloc(i64 16) #3
-  %vFrame = call noalias nonnull ptr @llvm.coro.begin(token %id, ptr %alloc)
-
-  %save = call token @llvm.coro.save(ptr null)
-  %addr1 = call ptr @llvm.coro.subfn.addr(ptr null, i8 0)
-  call fastcc void %addr1(ptr null)
-
-  %suspend = call i8 @llvm.coro.suspend(token %save, i1 false)
-  %cmp = icmp eq i8 %suspend, 0
-  br i1 %cmp, label %await.suspend, label %exit
-await.suspend:
-  %save2 = call token @llvm.coro.save(ptr null)
-  %br0 = call i8 @switch_result()
-  switch i8 %br0, label %unreach [
-    i8 0, label %await.resume3
-    i8 1, label %await.resume1
-    i8 2, label %await.resume2
-  ]
-await.resume1:
-  %hdl = call ptr @g()
-  %addr2 = call ptr @llvm.coro.subfn.addr(ptr %hdl, i8 0)
-  call fastcc void %addr2(ptr %hdl)
-  br label %final.suspend
-await.resume2:
-  %hdl2 = call ptr @h()
-  %addr3 = call ptr @llvm.coro.subfn.addr(ptr %hdl2, i8 0)
-  call fastcc void %addr3(ptr %hdl2)
-  br label %final.suspend
-await.resume3:
-  %addr4 = call ptr @llvm.coro.subfn.addr(ptr null, i8 0)
-  call fastcc void %addr4(ptr null)
-  br label %final.suspend
-final.suspend:
-  %suspend2 = call i8 @llvm.coro.suspend(token %save2, i1 false)
-  %cmp2 = icmp eq i8 %suspend2, 0
-  br i1 %cmp2, label %pre.exit, label %exit
-pre.exit:
-  br label %exit
-exit:
-  call i1 @llvm.coro.end(ptr null, i1 false, token none)
-  ret void
-unreach:
-  unreachable
-}
-
-; Verify that in the initial function resume is not marked with musttail.
-; CHECK-LABEL: @f(
-; CHECK: %[[addr1:.+]] = call ptr @llvm.coro.subfn.addr(ptr null, i8 0)
-; CHECK-NOT: musttail call fastcc void %[[addr1]](ptr null)
-
-; Verify that in the resume part resume call is marked with musttail.
-; CHECK-LABEL: @f.resume(
-; CHECK: %[[hdl:.+]] = call ptr @g()
-; CHECK-NEXT: %[[addr2:.+]] = call ptr @llvm.coro.subfn.addr(ptr %[[hdl]], i8 0)
-; CHECK: musttail call fastcc void %[[addr2]](ptr %[[hdl]])
-; CHECK-NEXT: ret void
-; CHECK: %[[hdl2:.+]] = call ptr @h()
-; CHECK-NEXT: %[[addr3:.+]] = call ptr @llvm.coro.subfn.addr(ptr %[[hdl2]], i8 0)
-; CHECK: musttail call fastcc void %[[addr3]](ptr %[[hdl2]])
-; CHECK-NEXT: ret void
-; CHECK: %[[addr4:.+]] = call ptr @llvm.coro.subfn.addr(ptr null, i8 0)
-; CHECK: musttail call fastcc void %[[addr4]](ptr null)
-; CHECK-NEXT: ret void
-
-declare token @llvm.coro.id(i32, ptr readnone, ptr nocapture readonly, ptr) #1
-declare i1 @llvm.coro.alloc(token) #2
-declare i64 @llvm.coro.size.i64() #3
-declare ptr @llvm.coro.begin(token, ptr writeonly) #2
-declare token @llvm.coro.save(ptr) #2
-declare ptr @llvm.coro.frame() #3
-declare i8 @llvm.coro.suspend(token, i1) #2
-declare ptr @llvm.coro.free(token, ptr nocapture readonly) #1
-declare i1 @llvm.coro.end(ptr, i1, token) #2
-declare ptr @llvm.coro.subfn.addr(ptr nocapture readonly, i8) #1
-declare ptr @malloc(i64)
-declare i8 @switch_result()
-declare ptr @g()
-declare ptr @h()
-
-attributes #0 = { presplitcoroutine }
-attributes #1 = { argmemonly nounwind readonly }
-attributes #2 = { nounwind }
-attributes #3 = { nounwind readnone }
diff --git a/llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail4.ll b/llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail4.ll
deleted file mode 100644
index be70fc4b51f1db..00000000000000
--- a/llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail4.ll
+++ /dev/null
@@ -1,66 +0,0 @@
-; Tests that instrumentation doesn't interfere with lowering (coro-split).
-; It should convert coro.resume followed by a suspend to a musttail call.
-
-; RUN: opt < %s -passes='pgo-instr-gen,cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
-
-define void @fakeresume1(ptr)  {
-entry:
-  ret void;
-}
-
-define void @f() #0 {
-entry:
-  %id = call token @llvm.coro.id(i32 0, ptr null, ptr null, ptr null)
-  %alloc = call ptr @malloc(i64 16) #3
-  %vFrame = call noalias nonnull ptr @llvm.coro.begin(token %id, ptr %alloc)
-
-  %save = call token @llvm.coro.save(ptr null)
-
-  %init_suspend = call i8 @llvm.coro.suspend(token %save, i1 false)
-  switch i8 %init_suspend, label %coro.end [
-    i8 0, label %await.ready
-    i8 1, label %coro.end
-  ]
-await.ready:
-  %save2 = call token @llvm.coro.save(ptr null)
-
-  call fastcc void @fakeresume1(ptr align 8 null)
-  %suspend = call i8 @llvm.coro.suspend(token %save2, i1 true)
-  %switch = icmp ult i8 %suspend, 2
-  br i1 %switch, label %cleanup, label %coro.end
-
-cleanup:
-  %free.handle = call ptr @llvm.coro.free(token %id, ptr %vFrame)
-  %.not = icmp eq ptr %free.handle, null
-  br i1 %.not, label %coro.end, label %coro.free
-
-coro.free:
-  call void @delete(ptr nonnull %free.handle) #2
-  br label %coro.end
-
-coro.end:
-  call i1 @llvm.coro.end(ptr null, i1 false, token none)
-  ret void
-}
-
-; CHECK-LABEL: @f.resume(
-; CHECK:          musttail call fastcc void @fakeresume1(
-; CHECK-NEXT:     ret void
-
-declare token @llvm.coro.id(i32, ptr readnone, ptr nocapture readonly, ptr) #1
-declare i1 @llvm.coro.alloc(token) #2
-declare i64 @llvm.coro.size.i64() #3
-declare ptr @llvm.coro.begin(token, ptr writeonly) #2
-declare token @llvm.coro.save(ptr) #2
-declare ptr @llvm.coro.frame() #3
-declare i8 @llvm.coro.suspend(token, i1) #2
-declare ptr @llvm.coro.free(token, ptr nocapture readonly) #1
-declare i1 @llvm.coro.end(ptr, i1, token) #2
-declare ptr @llvm.coro.subfn.addr(ptr nocapture readonly, i8) #1
-declare ptr @malloc(i64)
-declare void @delete(ptr nonnull) #2
-
-attributes #0 = { presplitcoroutine }
-attributes #1 = { argmemonly nounwind readonly }
-attributes #2 = { nounwind }
-attributes #3 = { nounwind readnone }
diff --git a/llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail5.ll b/llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail5.ll
deleted file mode 100644
index 3e5bddd8e13112..00000000000000
--- a/llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail5.ll
+++ /dev/null
@@ -1,63 +0,0 @@
-; Tests that instrumentation doesn't interfere with lowering (coro-split).
-; It should convert coro.resume followed by a suspend to a musttail call.
-
-; RUN: opt < %s -passes='pgo-instr-gen,cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
-
-declare void @fakeresume1(ptr align 8)
-
-define void @g() #0 {
-entry:
-  %id = call token @llvm.coro.id(i32 0, ptr null, ptr null, ptr null)
-  %alloc = call ptr @malloc(i64 16) #3
-  %alloc.var = alloca i8
-  call void @llvm.lifetime.start.p0(i64 1, ptr %alloc.var)
-  %vFrame = call noalias nonnull ptr @llvm.coro.begin(token %id, ptr %alloc)
-
-  %save = call token @llvm.coro.save(ptr null)
-  %suspend = call i8 @llvm.coro.suspend(token %save, i1 false)
-
-  switch i8 %suspend, label %exit [
-    i8 0, label %await.suspend
-    i8 1, label %exit
-  ]
-await.suspend:
-  %save2 = call token @llvm.coro.save(ptr null)
-  call fastcc void @fakeresume1(ptr align 8 null)
-  %suspend2 = call i8 @llvm.coro.suspend(token %save2, i1 false)
-  switch i8 %suspend2, label %exit [
-    i8 0, label %await.ready
-    i8 1, label %exit
-  ]
-await.ready:
-  call void @consume(ptr %alloc.var)
-  call void @llvm.lifetime.end.p0(i64 1, ptr %alloc.var)
-  br label %exit
-exit:
-  call i1 @llvm.coro.end(ptr null, i1 false, token none)
-  ret void
-}
-
-; Verify that in the resume part resume call is marked with musttail.
-; CHECK-LABEL: @g.resume(
-; CHECK:          musttail call fastcc void @fakeresume1(ptr align 8 null)
-; CHECK-NEXT:     ret void
-
-declare token @llvm.coro.id(i32, ptr readnone, ptr nocapture readonly, ptr) #1
-declare i1 @llvm.coro.alloc(token) #2
-declare i64 @llvm.coro.size.i64() #3
-declare ptr @llvm.coro.begin(token, ptr writeonly) #2
-declare token @llvm.coro.save(ptr) #2
-declare ptr @llvm.coro.frame() #3
-declare i8 @llvm.coro.suspend(token, i1) #2
-declare ptr @llvm.coro.free(token, ptr nocapture readonly) #1
-declare i1 @llvm.coro.end(ptr, i1, token) #2
-declare ptr @llvm.coro.subfn.addr(ptr nocapture readonly, i8) #1
-declare ptr @malloc(i64)
-declare void @consume(ptr)
-declare void @llvm.lifetime.start.p0(i64, ptr nocapture)
-declare void @llvm.lifetime.end.p0(i64, ptr nocapture)
-
-attributes #0 = { presplitcoroutine }
-attributes #1 = { argmemonly nounwind readonly }
-attributes #2 = { nounwind }
-attributes #3 = { nounwind readnone }
diff --git a/llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail6.ll b/llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail6.ll
deleted file mode 100644
index 4359d5305d4d91..00000000000000
--- a/llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail6.ll
+++ /dev/null
@@ -1,112 +0,0 @@
-; Tests that instrumentation doesn't interfere with lowering (coro-split).
-; It should convert coro.resume followed by a suspend to a musttail call.
-
-; RUN: opt < %s -passes='pgo-instr-gen,cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
-
-declare void @fakeresume1(ptr align 8)
-
-define void @g() #0 {
-entry:
-  %id = call token @llvm.coro.id(i32 0, ptr null, ptr null, ptr null)
-  %alloc = call ptr @malloc(i64 16) #3
-  %alloc.var = alloca i64
-  call void @llvm.lifetime.start.p0(i64 1, ptr %alloc.var)
-  %vFrame = call noalias nonnull ptr @llvm.coro.begin(token %id, ptr %alloc)
-
-  %save = call token @llvm.coro.save(ptr null)
-  %suspend = call i8 @llvm.coro.suspend(token %save, i1 false)
-
-  switch i8 %suspend, label %exit [
-    i8 0, label %await.suspend
-    i8 1, label %exit
-  ]
-await.suspend:
-  %save2 = call token @llvm.coro.save(ptr null)
-  call fastcc void @fakeresume1(ptr align 8 null)
-  %suspend2 = call i8 @llvm.coro.suspend(token %save2, i1 false)
-  switch i8 %suspend2, label %exit [
-    i8 0, label %await.ready
-    i8 1, label %exit
-  ]
-await.ready:
-  call void @consume(ptr %alloc.var)
-  call void @llvm.lifetime.end.p0(i64 1, ptr %alloc.var)
-  br label %exit
-exit:
-  call i1 @llvm.coro.end(ptr null, i1 false, token none)
-  ret void
-}
-
-; Verify that in the resume part resume call is marked with musttail.
-; CHECK-LABEL: @g.resume(
-; CHECK:      musttail call fastcc void @fakeresume1(ptr align 8 null)
-; CHECK-NEXT: ret void
-
-; It has a cleanup bb.
-define void @f() #0 {
-entry:
-  %id = call token @llvm.coro.id(i32 0, ptr null, ptr null, ptr null)
-  %alloc = call ptr @malloc(i64 16) #3
-  %alloc.var = alloca i64
-  call void @llvm.lifetime.start.p0(i64 1, ptr %alloc.var)
-  %vFrame = call noalias nonnull ptr @llvm.coro.begin(token %id, ptr %alloc)
-
-  %save = call token @llvm.coro.save(ptr null)
-  %suspend = call i8 @llvm.coro.suspend(token %save, i1 false)
-
-  switch i8 %suspend, label %exit [
-    i8 0, label %await.suspend
-    i8 1, label %exit
-  ]
-await.suspend:
-  %save2 = call token @llvm.coro.save(ptr null)
-  call fastcc void @fakeresume1(ptr align 8 null)
-  %suspend2 = call i8 @llvm.coro.suspend(token %save2, i1 false)
-  switch i8 %suspend2, label %exit [
-    i8 0, label %await.ready
-    i8 1, label %cleanup
-  ]
-await.ready:
-  call void @consume(ptr %alloc.var)
-  call void @llvm.lifetime.end.p0(i64 1, ptr %alloc.var)
-  br label %exit
-
-cleanup:
-  %free.handle = call ptr @llvm.coro.free(token %id, ptr %vFrame)
-  %.not = icmp eq ptr %free.handle, null
-  br i1 %.not, label %exit, label %coro.free
-
-coro.free:
-  call void @delete(ptr nonnull %free.handle) #2
-  br label %exit
-
-exit:
-  call i1 @llvm.coro.end(ptr null, i1 false, token none)
-  ret void
-}
-
-; Verify that in the resume part resume call is marked with musttail.
-; CHECK-LABEL: @f.resume(
-; CHECK:      musttail call fastcc void @fakeresume1(ptr align 8 null)
-; CHECK-NEXT: ret void
-
-declare token @llvm.coro.id(i32, ptr readnone, ptr nocapture readonly, ptr) #1
-declare i1 @llvm.coro.alloc(token) #2
-declare i64 @llvm.coro.size.i64() #3
-declare ptr @llvm.coro.begin(token, ptr writeonly) #2
-declare token @llvm.coro.save(ptr) #2
-declare ptr @llvm.coro.frame() #3
-declare i8 @llvm.coro.suspend(token, i1) #2
-declare ptr @llvm.coro.free(token, ptr nocapture readonly) #1
-declare i1 @llvm.coro.end(ptr, i1, token) #2
-declare ptr @llvm.coro.subfn.addr(ptr nocapture readonly, i8) #1
-declare ptr @malloc(i64)
-declare void @delete(ptr nonnull) #2
-declare void @consume(ptr)
-declare void @llvm.lifetime.start.p0(i64, ptr nocapture)
-declare void @llvm.lifetime.end.p0(i64, ptr nocapture)
-
-attributes #0 = { presplitcoroutine }
-attributes #1 = { argmemonly nounwind readonly }
-attributes #2 = { nounwind }
-attributes #3 = { nounwind readnone }
diff --git a/llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail7.ll b/llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail7.ll
deleted file mode 100644
index 2a14be0f921806..00000000000000
--- a/llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail7.ll
+++ /dev/null
@@ -1,115 +0,0 @@
-; Tests that instrumentation doesn't interfere with lowering (coro-split).
-; It should convert coro.resume followed by a suspend to a musttail call.
-
-; The difference between this and coro-split-musttail5.ll and coro-split-musttail5.ll
-; is that this contains dead instruction generated during the transformation,
-; which makes the optimization harder.
-; RUN: opt < %s -passes='pgo-instr-gen,cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
-
-declare void @fakeresume1(ptr align 8)
-
-define void @g() #0 {
-entry:
-  %id = call token @llvm.coro.id(i32 0, ptr null, ptr null, ptr null)
-  %alloc = call ptr @malloc(i64 16) #3
-  %alloc.var = alloca i64
-  call void @llvm.lifetime.start.p0(i64 1, ptr %alloc.var)
-  %vFrame = call noalias nonnull ptr @llvm.coro.begin(token %id, ptr %alloc)
-
-  %save = call token @llvm.coro.save(ptr null)
-  %suspend = call i8 @llvm.coro.suspend(token %save, i1 false)
-
-  switch i8 %suspend, label %exit [
-    i8 0, label %await.suspend
-    i8 1, label %exit
-  ]
-await.suspend:
-  %save2 = call token @llvm.coro.save(ptr null)
-  call fastcc void @fakeresume1(ptr align 8 null)
-  %suspend2 = call i8 @llvm.coro.suspend(token %save2, i1 false)
-  switch i8 %suspend2, label %exit [
-    i8 0, label %await.ready
-    i8 1, label %exit
-  ]
-await.ready:
-  call void @consume(ptr %alloc.var)
-  call void @llvm.lifetime.end.p0(i64 1, ptr %alloc.var)
-  br label %exit
-exit:
-  call i1 @llvm.coro.end(ptr null, i1 false, token none)
-  ret void
-}
-
-; Verify that in the resume part resume call is marked with musttail.
-; CHECK-LABEL: @g.resume(
-; CHECK:         musttail call fastcc void @fakeresume1(ptr align 8 null)
-; CHECK-NEXT:    ret void
-
-; It has a cleanup bb.
-define void @f() #0 {
-entry:
-  %id = call token @llvm.coro.id(i32 0, ptr null, ptr null, ptr null)
-  %alloc = call ptr @malloc(i64 16) #3
-  %alloc.var = alloca i64
-  call void @llvm.lifetime.start.p0(i64 1, ptr %alloc.var)
-  %vFrame = call noalias nonnull ptr @llvm.coro.begin(token %id, ptr %alloc)
-
-  %save = call token @llvm.coro.save(ptr null)
-  %suspend = call i8 @llvm.coro.suspend(token %save, i1 false)
-
-  switch i8 %suspend, label %exit [
-    i8 0, label %await.suspend
-    i8 1, label %exit
-  ]
-await.suspend:
-  %save2 = call token @llvm.coro.save(ptr null)
-  call fastcc void @fakeresume1(ptr align 8 null)
-  %suspend2 = call i8 @llvm.coro.suspend(token %save2, i1 false)
-  switch i8 %suspend2, label %exit [
-    i8 0, label %await.ready
-    i8 1, label %cleanup
-  ]
-await.ready:
-  call void @consume(ptr %alloc.var)
-  call void @llvm.lifetime.end.p0(i64 1, ptr %alloc.var)
-  br label %exit
-
-cleanup:
-  %free.handle = call ptr @llvm.coro.free(token %id, ptr %vFrame)
-  %.not = icmp eq ptr %free.handle, null
-  br i1 %.not, label %exit, label %coro.free
-
-coro.free:
-  call void @delete(ptr nonnull %free.handle) #2
-  br label %exit
-
-exit:
-  call i1 @llvm.coro.end(ptr null, i1 false, token none)
-  ret void
-}
-
-; Verify that in the resume part resume call is marked with musttail.
-; CHECK-LABEL: @f.resume(
-; CHECK:         musttail call fastcc void @fakeresume1(ptr align 8 null)
-; CHECK-NEXT:    ret void
-
-declare token @llvm.coro.id(i32, ptr readnone, ptr nocapture readonly, ptr) #1
-declare i1 @llvm.coro.alloc(token) #2
-declare i64 @llvm.coro.size.i64() #3
-declare ptr @llvm.coro.begin(token, ptr writeonly) #2
-declare token @llvm.coro.save(ptr) #2
-declare ptr @llvm.coro.frame() #3
-declare i8 @llvm.coro.suspend(token, i1) #2
-declare ptr @llvm.coro.free(token, ptr nocapture readonly) #1
-declare i1 @llvm.coro.end(ptr, i1, token) #2
-declare ptr @llvm.coro.subfn.addr(ptr nocapture readonly, i8) #1
-declare ptr @malloc(i64)
-declare void @delete(ptr nonnull) #2
-declare void @consume(ptr)
-declare void @llvm.lifetime.start.p0(i64, ptr nocapture)
-declare void @llvm.lifetime.end.p0(i64, ptr nocapture)
-
-attributes #0 = { presplitcoroutine }
-attributes #1 = { argmemonly nounwind readonly }
-attributes #2 = { nounwind }
-attributes #3 = { nounwind readnone }
diff --git a/llvm/test/Transforms/Coroutines/coro-await-suspend-lower-invoke.ll b/llvm/test/Transforms/Coroutines/coro-await-suspend-lower-invoke.ll
index fbc4a2c006f84e..fd3b7bd815300c 100644
--- a/llvm/test/Transforms/Coroutines/coro-await-suspend-lower-invoke.ll
+++ b/llvm/test/Transforms/Coroutines/coro-await-suspend-lower-invoke.ll
@@ -58,14 +58,13 @@ suspend.cond:
 ; CHECK-NEXT:       to label %[[STEP2_CONT:[^ ]+]] unwind label %[[PAD]]
 step2:
   %save2 = call token @llvm.coro.save(ptr null)
-  %resume.handle = invoke ptr @llvm.coro.await.suspend.handle(ptr %awaiter, ptr %hdl, ptr @await_suspend_wrapper_handle)
+  invoke void @llvm.coro.await.suspend.handle(ptr %awaiter, ptr %hdl, ptr @await_suspend_wrapper_handle)
     to label %step2.continue unwind label %pad
 
 ; CHECK:      [[STEP2_CONT]]:
 ; CHECK-NEXT:   %[[NEXT_RESUME:.+]] = call ptr @llvm.coro.subfn.addr(ptr %[[NEXT_HDL]], i8 0)
 ; CHECK-NEXT:   musttail call {{.*}} void %[[NEXT_RESUME]](ptr %[[NEXT_HDL]])
 step2.continue:
-  call void @llvm.coro.resume(ptr %resume.handle)
   %suspend2 = call i8 @llvm.coro.suspend(token %save2, i1 false)
   switch i8 %suspend2, label %ret [
     i8 0, label %step3
@@ -112,7 +111,7 @@ declare i1 @llvm.coro.alloc(token)
 declare ptr @llvm.coro.begin(token, ptr)
 declare void @llvm.coro.await.suspend.void(ptr, ptr, ptr)
 declare i1 @llvm.coro.await.suspend.bool(ptr, ptr, ptr)
-declare ptr @llvm.coro.await.suspend.handle(ptr, ptr, ptr)
+declare void @llvm.coro.await.suspend.handle(ptr, ptr, ptr)
 declare i1 @llvm.coro.end(ptr, i1, token)
 
 declare ptr @__cxa_begin_catch(ptr)
diff --git a/llvm/test/Transforms/Coroutines/coro-await-suspend-lower.ll b/llvm/test/Transforms/Coroutines/coro-await-suspend-lower.ll
index 0f574c4acc26e7..8d019e6954628b 100644
--- a/llvm/test/Transforms/Coroutines/coro-await-suspend-lower.ll
+++ b/llvm/test/Transforms/Coroutines/coro-await-suspend-lower.ll
@@ -49,8 +49,7 @@ suspend.cond:
 ; CHECK-NEXT:   musttail call {{.*}} void %[[CONT]](ptr %[[NEXT_HDL]])
 step2:
   %save2 = call token @llvm.coro.save(ptr null)
-  %resume.handle = call ptr @llvm.coro.await.suspend.handle(ptr %awaiter, ptr %hdl, ptr @await_suspend_wrapper_handle)
-  call void @llvm.coro.resume(ptr %resume.handle)
+  call void @llvm.coro.await.suspend.handle(ptr %awaiter, ptr %hdl, ptr @await_suspend_wrapper_handle)
   %suspend2 = call i8 @llvm.coro.suspend(token %save2, i1 false)
   switch i8 %suspend2, label %ret [
     i8 0, label %step3
@@ -89,7 +88,7 @@ declare i1 @llvm.coro.alloc(token)
 declare ptr @llvm.coro.begin(token, ptr)
 declare void @llvm.coro.await.suspend.void(ptr, ptr, ptr)
 declare i1 @llvm.coro.await.suspend.bool(ptr, ptr, ptr)
-declare ptr @llvm.coro.await.suspend.handle(ptr, ptr, ptr)
+declare void @llvm.coro.await.suspend.handle(ptr, ptr, ptr)
 declare i1 @llvm.coro.end(ptr, i1, token)
 
 declare noalias ptr @malloc(i32)
diff --git a/llvm/test/Transforms/Coroutines/coro-preserve-final.ll b/llvm/test/Transforms/Coroutines/coro-preserve-final.ll
deleted file mode 100644
index 16eeb84e7915ae..00000000000000
--- a/llvm/test/Transforms/Coroutines/coro-preserve-final.ll
+++ /dev/null
@@ -1,131 +0,0 @@
-; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
-
-%"struct.std::__n4861::noop_coroutine_promise" = type { i8 }
-%struct.Promise = type { %"struct.std::__n4861::coroutine_handle" }
-%"struct.std::__n4861::coroutine_handle" = type { ptr }
-
-define dso_local ptr @_Z5Outerv() #1 {
-entry:
-  %__promise = alloca %struct.Promise, align 8
-  %0 = call token @llvm.coro.id(i32 16, ptr nonnull %__promise, ptr nonnull @_Z5Outerv, ptr null)
-  %1 = call i1 @llvm.coro.alloc(token %0)
-  br i1 %1, label %coro.alloc, label %init.suspend
-
-coro.alloc:                                       ; preds = %entry
-  %2 = tail call i64 @llvm.coro.size.i64()
-  %call = call noalias noundef nonnull ptr @_Znwm(i64 noundef %2) #12
-  br label %init.suspend
-
-init.suspend:                                     ; preds = %entry, %coro.alloc
-  %3 = phi ptr [ null, %entry ], [ %call, %coro.alloc ]
-  %4 = call ptr @llvm.coro.begin(token %0, ptr %3) #13
-  call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %__promise) #3
-  store ptr null, ptr %__promise, align 8
-  %5 = call token @llvm.coro.save(ptr null)
-  %6 = call i8 @llvm.coro.suspend(token %5, i1 false)
-  switch i8 %6, label %coro.ret [
-  i8 0, label %await.suspend
-  i8 1, label %cleanup62
-  ]
-
-await.suspend:                                    ; preds = %init.suspend
-  %7 = call token @llvm.coro.save(ptr null)
-  %8 = call ptr @llvm.coro.subfn.addr(ptr %4, i8 0)
-  call fastcc void %8(ptr %4) #3
-  %9 = call i8 @llvm.coro.suspend(token %7, i1 false)
-  switch i8 %9, label %coro.ret [
-  i8 0, label %await2.suspend
-  i8 1, label %cleanup62
-  ]
-
-await2.suspend:                                   ; preds = %await.suspend
-  %call27 = call ptr @_Z5Innerv() #3
-  %10 = call token @llvm.coro.save(ptr null)
-  %11 = getelementptr inbounds i8, ptr %__promise, i64 -16
-  store ptr %11, ptr %call27, align 8
-  %12 = getelementptr inbounds i8, ptr %call27, i64 -16
-  %13 = call ptr @llvm.coro.subfn.addr(ptr nonnull %12, i8 0)
-  call fastcc void %13(ptr nonnull %12) #3
-  %14 = call i8 @llvm.coro.suspend(token %10, i1 false)
-  switch i8 %14, label %coro.ret [
-  i8 0, label %final.suspend
-  i8 1, label %cleanup62
-  ]
-
-final.suspend:                                    ; preds = %await2.suspend
-  %15 = call ptr @llvm.coro.subfn.addr(ptr nonnull %12, i8 1)
-  call fastcc void %15(ptr nonnull %12) #3
-  %16 = call token @llvm.coro.save(ptr null)
-  %retval.sroa.0.0.copyload.i = load ptr, ptr %__promise, align 8
-  %17 = call ptr @llvm.coro.subfn.addr(ptr %retval.sroa.0.0.copyload.i, i8 0)
-  call fastcc void %17(ptr %retval.sroa.0.0.copyload.i) #3
-  %18 = call i8 @llvm.coro.suspend(token %16, i1 true) #13
-  switch i8 %18, label %coro.ret [
-  i8 0, label %final.ready
-  i8 1, label %cleanup62
-  ]
-
-final.ready:                                      ; preds = %final.suspend
-  call void @_Z5_exiti(i32 noundef 1) #14
-  unreachable
-
-cleanup62:                                        ; preds = %await2.suspend, %await.suspend, %init.suspend, %final.suspend
-  call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %__promise) #3
-  %19 = call ptr @llvm.coro.free(token %0, ptr %4)
-  %.not = icmp eq ptr %19, null
-  br i1 %.not, label %coro.ret, label %coro.free
-
-coro.free:                                        ; preds = %cleanup62
-  call void @_ZdlPv(ptr noundef nonnull %19) #3
-  br label %coro.ret
-
-coro.ret:                                         ; preds = %coro.free, %cleanup62, %final.suspend, %await2.suspend, %await.suspend, %init.suspend
-  %20 = call i1 @llvm.coro.end(ptr null, i1 false, token none) #13
-  ret ptr %__promise
-}
-
-declare token @llvm.coro.id(i32, ptr readnone, ptr nocapture readonly, ptr) #2
-declare i1 @llvm.coro.alloc(token) #3
-declare dso_local noundef nonnull ptr @_Znwm(i64 noundef) local_unnamed_addr #4
-declare i64 @llvm.coro.size.i64() #5
-declare ptr @llvm.coro.begin(token, ptr writeonly) #3
-declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #6
-declare token @llvm.coro.save(ptr) #7
-declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #6
-declare i8 @llvm.coro.suspend(token, i1) #3
-declare dso_local ptr @_Z5Innerv() local_unnamed_addr #8
-declare dso_local void @_ZdlPv(ptr noundef) local_unnamed_addr #9
-declare ptr @llvm.coro.free(token, ptr nocapture readonly) #2
-declare i1 @llvm.coro.end(ptr, i1, token) #3
-declare dso_local void @_Z5_exiti(i32 noundef) local_unnamed_addr #10
-declare ptr @llvm.coro.subfn.addr(ptr nocapture readonly, i8) #11
-
-attributes #0 = { mustprogress nounwind uwtable "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
-attributes #1 = { nounwind presplitcoroutine uwtable "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
-attributes #2 = { argmemonly nofree nounwind readonly }
-attributes #3 = { nounwind }
-attributes #4 = { nobuiltin allocsize(0) "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
-attributes #5 = { nofree nosync nounwind readnone }
-attributes #6 = { argmemonly mustprogress nocallback nofree nosync nounwind willreturn }
-attributes #7 = { nomerge nounwind }
-attributes #8 = { "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
-attributes #9 = { nobuiltin nounwind "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
-attributes #10 = { noreturn "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
-attributes #11 = { argmemonly nounwind readonly }
-attributes #12 = { nounwind allocsize(0) }
-attributes #13 = { noduplicate }
-attributes #14 = { noreturn nounwind }
-
-; CHECK: define{{.*}}@_Z5Outerv.resume(
-; CHECK: entry.resume:
-; CHECK: switch i2 %index
-; CHECK-NEXT:    i2 0, label %await2.suspend
-; CHECK-NEXT:    i2 1, label %final.suspend
-;
-; CHECK: await2.suspend:
-; CHECK: musttail call
-; CHECK-NEXT: ret void
-;
-; CHECK: final.suspend:
-; CHECK: musttail call
-; CHECK-NEXT: ret void
diff --git a/llvm/test/Transforms/Coroutines/coro-split-musttail-chain-pgo-counter-promo.ll b/llvm/test/Transforms/Coroutines/coro-split-musttail-chain-pgo-counter-promo.ll
index ddd293eed2409e..e2ed205f2c2f4f 100644
--- a/llvm/test/Transforms/Coroutines/coro-split-musttail-chain-pgo-counter-promo.ll
+++ b/llvm/test/Transforms/Coroutines/coro-split-musttail-chain-pgo-counter-promo.ll
@@ -90,10 +90,7 @@ define ptr @f(i32 %0) presplitcoroutine align 32 {
   %25 = getelementptr inbounds { ptr, ptr }, ptr %5, i64 0, i32 1
   store ptr %24, ptr %25, align 8
   %26 = call token @llvm.coro.save(ptr null)
-  %27 = call ptr @await_transform_await_suspend(ptr noundef nonnull align 8 dereferenceable(16) %5, ptr %14)
-  %28 = call ptr @llvm.coro.subfn.addr(ptr %27, i8 0)
-  %29 = ptrtoint ptr %28 to i64
-  call fastcc void %28(ptr %27) #9
+  call void @llvm.coro.await.suspend.handle(ptr null, ptr null, ptr @await_transform_await_suspend)
   %30 = call i8 @llvm.coro.suspend(token %26, i1 false)
   switch i8 %30, label %60 [
     i8 0, label %31
@@ -123,9 +120,7 @@ define ptr @f(i32 %0) presplitcoroutine align 32 {
   br i1 %42, label %43, label %46
 
 43:                                               ; preds = %36
-  %44 = call ptr @llvm.coro.subfn.addr(ptr nonnull %14, i8 1)
-  %45 = ptrtoint ptr %44 to i64
-  call fastcc void %44(ptr nonnull %14) #9
+  call void @llvm.coro.await.suspend.handle(ptr null, ptr null, ptr @await_transform_await_suspend)
   br label %47
 
 46:                                               ; preds = %36
diff --git a/llvm/test/Transforms/Coroutines/coro-split-musttail.ll b/llvm/test/Transforms/Coroutines/coro-split-musttail.ll
index 825e44471db27a..49589459c92448 100644
--- a/llvm/test/Transforms/Coroutines/coro-split-musttail.ll
+++ b/llvm/test/Transforms/Coroutines/coro-split-musttail.ll
@@ -1,7 +1,6 @@
-; Tests that coro-split will convert coro.resume followed by a suspend to a
-; musttail call.
-; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck --check-prefixes=CHECK,NOPGO %s
-; RUN: opt < %s -passes='pgo-instr-gen,cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck --check-prefixes=CHECK,PGO %s
+; Tests that coro-split will convert coro.await.suspend.handle to a tail call.
+; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck --check-prefixes=CHECK %s
+; RUN: opt < %s -passes='pgo-instr-gen,cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck --check-prefixes=CHECK %s
 
 define void @f() #0 {
 entry:
@@ -20,8 +19,7 @@ entry:
   ]
 await.ready:
   %save2 = call token @llvm.coro.save(ptr null)
-  %addr2 = call ptr @llvm.coro.subfn.addr(ptr null, i8 0)
-  call fastcc void %addr2(ptr null)
+  call void @llvm.coro.await.suspend.handle(ptr null, ptr null, ptr @await_suspend_function)
 
   %suspend2 = call i8 @llvm.coro.suspend(token %save2, i1 false)
   switch i8 %suspend2, label %exit [
@@ -40,10 +38,8 @@ exit:
 
 ; Verify that in the resume part resume call is marked with musttail.
 ; CHECK-LABEL: @f.resume(
-; CHECK: %[[addr2:.+]] = call ptr @llvm.coro.subfn.addr(ptr null, i8 0)
-; NOPGO-NEXT: musttail call fastcc void %[[addr2]](ptr null)
-; PGO: call void @llvm.instrprof
-; PGO-NEXT: musttail call fastcc void %[[addr2]](ptr null)
+; CHECK: %[[addr2:.+]] = call ptr @llvm.coro.subfn.addr
+; CHECK-NEXT: musttail call fastcc void %[[addr2]]
 ; CHECK-NEXT: ret void
 
 declare token @llvm.coro.id(i32, ptr readnone, ptr nocapture readonly, ptr) #1
@@ -57,6 +53,7 @@ declare ptr @llvm.coro.free(token, ptr nocapture readonly) #1
 declare i1 @llvm.coro.end(ptr, i1, token) #2
 declare ptr @llvm.coro.subfn.addr(ptr nocapture readonly, i8) #1
 declare ptr @malloc(i64)
+declare ptr @await_suspend_function(ptr %awaiter, ptr %hdl)
 
 attributes #0 = { presplitcoroutine }
 attributes #1 = { argmemonly nounwind readonly }
diff --git a/llvm/test/Transforms/Coroutines/coro-split-musttail1.ll b/llvm/test/Transforms/Coroutines/coro-split-musttail1.ll
index d0d11fc4495e48..e6da28cc612ddd 100644
--- a/llvm/test/Transforms/Coroutines/coro-split-musttail1.ll
+++ b/llvm/test/Transforms/Coroutines/coro-split-musttail1.ll
@@ -1,7 +1,7 @@
 ; Tests that coro-split will convert coro.resume followed by a suspend to a
 ; musttail call.
-; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck --check-prefixes=CHECK,NOPGO %s
-; RUN: opt < %s -passes='pgo-instr-gen,cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck --check-prefixes=CHECK,PGO %s
+; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck --check-prefixes=CHECK %s
+; RUN: opt < %s -passes='pgo-instr-gen,cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck --check-prefixes=CHECK %s
 
 define void @f() #0 {
 entry:
@@ -28,17 +28,14 @@ await.suspend:
   ]
 await.resume1:
   %hdl = call ptr @g()
-  %addr2 = call ptr @llvm.coro.subfn.addr(ptr %hdl, i8 0)
-  call fastcc void %addr2(ptr %hdl)
+  call void @llvm.coro.await.suspend.handle(ptr null, ptr %hdl, ptr @await_suspend_function)
   br label %final.suspend
 await.resume2:
   %hdl2 = call ptr @h()
-  %addr3 = call ptr @llvm.coro.subfn.addr(ptr %hdl2, i8 0)
-  call fastcc void %addr3(ptr %hdl2)
+  call void @llvm.coro.await.suspend.handle(ptr null, ptr %hdl2, ptr @await_suspend_function)
   br label %final.suspend
 await.resume3:
-  %addr4 = call ptr @llvm.coro.subfn.addr(ptr null, i8 0)
-  call fastcc void %addr4(ptr null)
+  call void @llvm.coro.await.suspend.handle(ptr null, ptr null, ptr @await_suspend_function)
   br label %final.suspend
 final.suspend:
   %suspend2 = call i8 @llvm.coro.suspend(token %save2, i1 false)
@@ -63,18 +60,18 @@ unreach:
 ; Verify that in the resume part resume call is marked with musttail.
 ; CHECK-LABEL: @f.resume(
 ; CHECK: %[[hdl:.+]] = call ptr @g()
-; CHECK-NEXT: %[[addr2:.+]] = call ptr @llvm.coro.subfn.addr(ptr %[[hdl]], i8 0)
-; NOPGO-NEXT: musttail call fastcc void %[[addr2]](ptr %[[hdl]])
-; PGO: musttail call fastcc void %[[addr2]](ptr %[[hdl]])
+; CHECK-NEXT: call ptr @await_suspend_function
+; CHECK-NEXT: %[[addr2:.+]] = call ptr @llvm.coro.subfn.addr
+; CHECK-NEXT: musttail call fastcc void %[[addr2]]
 ; CHECK-NEXT: ret void
 ; CHECK: %[[hdl2:.+]] = call ptr @h()
-; CHECK-NEXT: %[[addr3:.+]] = call ptr @llvm.coro.subfn.addr(ptr %[[hdl2]], i8 0)
-; NOPGO-NEXT: musttail call fastcc void %[[addr3]](ptr %[[hdl2]])
-; PGO: musttail call fastcc void %[[addr3]](ptr %[[hdl2]])
+; CHECK-NEXT: call ptr @await_suspend_function
+; CHECK-NEXT: %[[addr3:.+]] = call ptr @llvm.coro.subfn.addr
+; CHECK-NEXT: musttail call fastcc void %[[addr3]]
 ; CHECK-NEXT: ret void
-; CHECK: %[[addr4:.+]] = call ptr @llvm.coro.subfn.addr(ptr null, i8 0)
-; NOPGO-NEXT: musttail call fastcc void %[[addr4]](ptr null)
-; PGO: musttail call fastcc void %[[addr4]](ptr null)
+; CHECK: call ptr @await_suspend_function
+; CHECK: %[[addr4:.+]] = call ptr @llvm.coro.subfn.addr
+; CHECK-NEXT: musttail call fastcc void %[[addr4]]
 ; CHECK-NEXT: ret void
 
 
@@ -93,6 +90,7 @@ declare ptr @malloc(i64)
 declare i8 @switch_result()
 declare ptr @g()
 declare ptr @h()
+declare ptr @await_suspend_function(ptr %awaiter, ptr %hdl)
 
 attributes #0 = { presplitcoroutine }
 attributes #1 = { argmemonly nounwind readonly }
diff --git a/llvm/test/Transforms/Coroutines/coro-split-musttail10.ll b/llvm/test/Transforms/Coroutines/coro-split-musttail10.ll
index cdd58b2a084fcd..a96e2472e74ed9 100644
--- a/llvm/test/Transforms/Coroutines/coro-split-musttail10.ll
+++ b/llvm/test/Transforms/Coroutines/coro-split-musttail10.ll
@@ -1,9 +1,10 @@
 ; Tests that we would convert coro.resume to a musttail call if the target is
-; Wasm64 with tail-call support.
-; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
-; RUN: opt < %s -passes='pgo-instr-gen,cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
+; Wasm64 or Wasm32 with tail-call support.
+; RUN: opt -mtriple=wasm64-unknown-unknown < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
+; RUN: opt -mtriple=wasm64-unknown-unknown < %s -passes='pgo-instr-gen,cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
 
-target triple = "wasm64-unknown-unknown"
+; RUN: opt -mtriple=wasm32-unknown-unknown < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
+; RUN: opt -mtriple=wasm32-unknown-unknown < %s -passes='pgo-instr-gen,cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
 
 define void @f() #0 {
 entry:
@@ -22,8 +23,7 @@ entry:
   ]
 await.ready:
   %save2 = call token @llvm.coro.save(ptr null)
-  %addr2 = call ptr @llvm.coro.subfn.addr(ptr null, i8 0)
-  call fastcc void %addr2(ptr null)
+  call void @llvm.coro.await.suspend.handle(ptr null, ptr null, ptr @await_suspend_function)
 
   %suspend2 = call i8 @llvm.coro.suspend(token %save2, i1 false)
   switch i8 %suspend2, label %exit [
@@ -48,6 +48,7 @@ declare ptr @llvm.coro.free(token, ptr nocapture readonly) #1
 declare i1 @llvm.coro.end(ptr, i1, token) #2
 declare ptr @llvm.coro.subfn.addr(ptr nocapture readonly, i8) #1
 declare ptr @malloc(i64)
+declare ptr @await_suspend_function(ptr %awaiter, ptr %hdl)
 
 attributes #0 = { presplitcoroutine "target-features"="+tail-call" }
 attributes #1 = { argmemonly nounwind readonly }
diff --git a/llvm/test/Transforms/Coroutines/coro-split-musttail11.ll b/llvm/test/Transforms/Coroutines/coro-split-musttail11.ll
deleted file mode 100644
index da5d868280e967..00000000000000
--- a/llvm/test/Transforms/Coroutines/coro-split-musttail11.ll
+++ /dev/null
@@ -1,55 +0,0 @@
-; Tests that we would convert coro.resume to a musttail call if the target is
-; Wasm32 with tail-call support.
-; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
-; RUN: opt < %s -passes='pgo-instr-gen,cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
-
-target triple = "wasm32-unknown-unknown"
-
-define void @f() #0 {
-entry:
-  %id = call token @llvm.coro.id(i32 0, ptr null, ptr null, ptr null)
-  %alloc = call ptr @malloc(i64 16) #3
-  %vFrame = call noalias nonnull ptr @llvm.coro.begin(token %id, ptr %alloc)
-
-  %save = call token @llvm.coro.save(ptr null)
-  %addr1 = call ptr @llvm.coro.subfn.addr(ptr null, i8 0)
-  call fastcc void %addr1(ptr null)
-
-  %suspend = call i8 @llvm.coro.suspend(token %save, i1 false)
-  switch i8 %suspend, label %exit [
-    i8 0, label %await.ready
-    i8 1, label %exit
-  ]
-await.ready:
-  %save2 = call token @llvm.coro.save(ptr null)
-  %addr2 = call ptr @llvm.coro.subfn.addr(ptr null, i8 0)
-  call fastcc void %addr2(ptr null)
-
-  %suspend2 = call i8 @llvm.coro.suspend(token %save2, i1 false)
-  switch i8 %suspend2, label %exit [
-    i8 0, label %exit
-    i8 1, label %exit
-  ]
-exit:
-  call i1 @llvm.coro.end(ptr null, i1 false, token none)
-  ret void
-}
-
-; CHECK: musttail call
-
-declare token @llvm.coro.id(i32, ptr readnone, ptr nocapture readonly, ptr) #1
-declare i1 @llvm.coro.alloc(token) #2
-declare i64 @llvm.coro.size.i64() #3
-declare ptr @llvm.coro.begin(token, ptr writeonly) #2
-declare token @llvm.coro.save(ptr) #2
-declare ptr @llvm.coro.frame() #3
-declare i8 @llvm.coro.suspend(token, i1) #2
-declare ptr @llvm.coro.free(token, ptr nocapture readonly) #1
-declare i1 @llvm.coro.end(ptr, i1, token) #2
-declare ptr @llvm.coro.subfn.addr(ptr nocapture readonly, i8) #1
-declare ptr @malloc(i64)
-
-attributes #0 = { presplitcoroutine "target-features"="+tail-call" }
-attributes #1 = { argmemonly nounwind readonly }
-attributes #2 = { nounwind }
-attributes #3 = { nounwind readnone }
diff --git a/llvm/test/Transforms/Coroutines/coro-split-musttail2.ll b/llvm/test/Transforms/Coroutines/coro-split-musttail2.ll
index 2f27f79480ab1b..e1f794c52dbae8 100644
--- a/llvm/test/Transforms/Coroutines/coro-split-musttail2.ll
+++ b/llvm/test/Transforms/Coroutines/coro-split-musttail2.ll
@@ -8,11 +8,6 @@ entry:
   ret void;
 }
 
-define void @fakeresume2(ptr align 8)  {
-entry:
-  ret void;
-}
-
 define void @g() #0 {
 entry:
   %id = call token @llvm.coro.id(i32 0, ptr null, ptr null, ptr null)
@@ -29,7 +24,7 @@ entry:
   ]
 await.ready:
   %save2 = call token @llvm.coro.save(ptr null)
-  call fastcc void @fakeresume2(ptr align 8 null)
+  call void @llvm.coro.await.suspend.handle(ptr null, ptr null, ptr @await_suspend_function)
 
   %suspend2 = call i8 @llvm.coro.suspend(token %save2, i1 false)
   switch i8 %suspend2, label %exit [
@@ -47,7 +42,9 @@ exit:
 
 ; Verify that in the resume part resume call is marked with musttail.
 ; CHECK-LABEL: @g.resume(
-; CHECK: musttail call fastcc void @fakeresume2(ptr align 8 null)
+; CHECK: call ptr @await_suspend_function
+; CHECK-NEXT: call ptr @llvm.coro.subfn.addr
+; CHECK-NEXT: musttail call fastcc void
 ; CHECK-NEXT: ret void
 
 declare token @llvm.coro.id(i32, ptr readnone, ptr nocapture readonly, ptr) #1
@@ -61,6 +58,7 @@ declare ptr @llvm.coro.free(token, ptr nocapture readonly) #1
 declare i1 @llvm.coro.end(ptr, i1, token) #2
 declare ptr @llvm.coro.subfn.addr(ptr nocapture readonly, i8) #1
 declare ptr @malloc(i64)
+declare ptr @await_suspend_function(ptr %awaiter, ptr %hdl)
 
 attributes #0 = { presplitcoroutine }
 attributes #1 = { argmemonly nounwind readonly }
diff --git a/llvm/test/Transforms/Coroutines/coro-split-musttail3.ll b/llvm/test/Transforms/Coroutines/coro-split-musttail3.ll
index 4778e3dcaf9957..068c6a3619869f 100644
--- a/llvm/test/Transforms/Coroutines/coro-split-musttail3.ll
+++ b/llvm/test/Transforms/Coroutines/coro-split-musttail3.ll
@@ -1,7 +1,7 @@
 ; Tests that coro-split will convert coro.resume followed by a suspend to a
 ; musttail call.
-; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck --check-prefixes=CHECK,NOPGO %s
-; RUN: opt < %s -passes='pgo-instr-gen,cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck --check-prefixes=CHECK,PGO %s
+; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck --check-prefixes=CHECK %s
+; RUN: opt < %s -passes='pgo-instr-gen,cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck --check-prefixes=CHECK %s
 
 define void @f() #0 {
 entry:
@@ -26,17 +26,14 @@ await.suspend:
   ]
 await.resume1:
   %hdl = call ptr @g()
-  %addr2 = call ptr @llvm.coro.subfn.addr(ptr %hdl, i8 0)
-  call fastcc void %addr2(ptr %hdl)
+  call void @llvm.coro.await.suspend.handle(ptr null, ptr %hdl, ptr @await_suspend_function)
   br label %final.suspend
 await.resume2:
   %hdl2 = call ptr @h()
-  %addr3 = call ptr @llvm.coro.subfn.addr(ptr %hdl2, i8 0)
-  call fastcc void %addr3(ptr %hdl2)
+  call void @llvm.coro.await.suspend.handle(ptr null, ptr %hdl2, ptr @await_suspend_function)
   br label %final.suspend
 await.resume3:
-  %addr4 = call ptr @llvm.coro.subfn.addr(ptr null, i8 0)
-  call fastcc void %addr4(ptr null)
+  call void @llvm.coro.await.suspend.handle(ptr null, ptr null, ptr @await_suspend_function)
   br label %final.suspend
 final.suspend:
   %suspend2 = call i8 @llvm.coro.suspend(token %save2, i1 false)
@@ -59,22 +56,21 @@ unreach:
 ; Verify that in the resume part resume call is marked with musttail.
 ; CHECK-LABEL: @f.resume(
 ; CHECK: %[[hdl:.+]] = call ptr @g()
-; CHECK-NEXT: %[[addr2:.+]] = call ptr @llvm.coro.subfn.addr(ptr %[[hdl]], i8 0)
-; NOPGO-NEXT: musttail call fastcc void %[[addr2]](ptr %[[hdl]])
-; PGO: musttail call fastcc void %[[addr2]](ptr %[[hdl]])
+; CHECK-NEXT: call ptr @await_suspend_function
+; CHECK-NEXT: %[[addr2:.+]] = call ptr @llvm.coro.subfn.addr
+; CHECK-NEXT: musttail call fastcc void %[[addr2]]
 ; CHECK-NEXT: ret void
 ; CHECK: %[[hdl2:.+]] = call ptr @h()
-; CHECK-NEXT: %[[addr3:.+]] = call ptr @llvm.coro.subfn.addr(ptr %[[hdl2]], i8 0)
-; NOPGO-NEXT: musttail call fastcc void %[[addr3]](ptr %[[hdl2]])
-; PGO: musttail call fastcc void %[[addr3]](ptr %[[hdl2]])
+; CHECK-NEXT: call ptr @await_suspend_function
+; CHECK-NEXT: %[[addr3:.+]] = call ptr @llvm.coro.subfn.addr
+; CHECK-NEXT: musttail call fastcc void %[[addr3]]
 ; CHECK-NEXT: ret void
-; CHECK: %[[addr4:.+]] = call ptr @llvm.coro.subfn.addr(ptr null, i8 0)
-; NOPGO-NEXT: musttail call fastcc void %[[addr4]](ptr null)
-; PGO: musttail call fastcc void %[[addr4]](ptr null)
+; CHECK: call ptr @await_suspend_function
+; CHECK: %[[addr4:.+]] = call ptr @llvm.coro.subfn.addr
+; CHECK-NEXT: musttail call fastcc void %[[addr4]]
 ; CHECK-NEXT: ret void
 
 
-
 declare token @llvm.coro.id(i32, ptr readnone, ptr nocapture readonly, ptr) #1
 declare i1 @llvm.coro.alloc(token) #2
 declare i64 @llvm.coro.size.i64() #3
@@ -89,6 +85,7 @@ declare ptr @malloc(i64)
 declare i8 @switch_result()
 declare ptr @g()
 declare ptr @h()
+declare ptr @await_suspend_function(ptr %awaiter, ptr %hdl)
 
 attributes #0 = { presplitcoroutine }
 attributes #1 = { argmemonly nounwind readonly }
diff --git a/llvm/test/Transforms/Coroutines/coro-split-musttail4.ll b/llvm/test/Transforms/Coroutines/coro-split-musttail4.ll
index 00ee422ce5863d..931473d893ecad 100644
--- a/llvm/test/Transforms/Coroutines/coro-split-musttail4.ll
+++ b/llvm/test/Transforms/Coroutines/coro-split-musttail4.ll
@@ -24,7 +24,7 @@ entry:
 await.ready:
   %save2 = call token @llvm.coro.save(ptr null)
 
-  call fastcc void @fakeresume1(ptr align 8 null)
+  call void @llvm.coro.await.suspend.handle(ptr null, ptr null, ptr @await_suspend_function)
   %suspend = call i8 @llvm.coro.suspend(token %save2, i1 true)
   %switch = icmp ult i8 %suspend, 2
   br i1 %switch, label %cleanup, label %coro.end
@@ -44,7 +44,7 @@ coro.end:
 }
 
 ; CHECK-LABEL: @f.resume(
-; CHECK:          musttail call fastcc void @fakeresume1(
+; CHECK:          musttail call fastcc void
 ; CHECK-NEXT:     ret void
 
 declare token @llvm.coro.id(i32, ptr readnone, ptr nocapture readonly, ptr) #1
@@ -59,6 +59,7 @@ declare i1 @llvm.coro.end(ptr, i1, token) #2
 declare ptr @llvm.coro.subfn.addr(ptr nocapture readonly, i8) #1
 declare ptr @malloc(i64)
 declare void @delete(ptr nonnull) #2
+declare ptr @await_suspend_function(ptr %awaiter, ptr %hdl)
 
 attributes #0 = { presplitcoroutine }
 attributes #1 = { argmemonly nounwind readonly }
diff --git a/llvm/test/Transforms/Coroutines/coro-split-musttail5.ll b/llvm/test/Transforms/Coroutines/coro-split-musttail5.ll
index 9afc79abbe88cd..69cff303995646 100644
--- a/llvm/test/Transforms/Coroutines/coro-split-musttail5.ll
+++ b/llvm/test/Transforms/Coroutines/coro-split-musttail5.ll
@@ -22,7 +22,7 @@ entry:
   ]
 await.suspend:
   %save2 = call token @llvm.coro.save(ptr null)
-  call fastcc void @fakeresume1(ptr align 8 null)
+  call void @llvm.coro.await.suspend.handle(ptr null, ptr null, ptr @await_suspend_function)
   %suspend2 = call i8 @llvm.coro.suspend(token %save2, i1 false)
   switch i8 %suspend2, label %exit [
     i8 0, label %await.ready
@@ -39,7 +39,7 @@ exit:
 
 ; Verify that in the resume part resume call is marked with musttail.
 ; CHECK-LABEL: @g.resume(
-; CHECK:          musttail call fastcc void @fakeresume1(ptr align 8 null)
+; CHECK:          musttail call fastcc void
 ; CHECK-NEXT:     ret void
 
 declare token @llvm.coro.id(i32, ptr readnone, ptr nocapture readonly, ptr) #1
@@ -56,6 +56,7 @@ declare ptr @malloc(i64)
 declare void @consume(ptr)
 declare void @llvm.lifetime.start.p0(i64, ptr nocapture)
 declare void @llvm.lifetime.end.p0(i64, ptr nocapture)
+declare ptr @await_suspend_function(ptr %awaiter, ptr %hdl)
 
 attributes #0 = { presplitcoroutine }
 attributes #1 = { argmemonly nounwind readonly }
diff --git a/llvm/test/Transforms/Coroutines/coro-split-musttail6.ll b/llvm/test/Transforms/Coroutines/coro-split-musttail6.ll
index d9dba92ec4eb7e..971e8e47fc9310 100644
--- a/llvm/test/Transforms/Coroutines/coro-split-musttail6.ll
+++ b/llvm/test/Transforms/Coroutines/coro-split-musttail6.ll
@@ -25,7 +25,7 @@ entry:
   ]
 await.suspend:
   %save2 = call token @llvm.coro.save(ptr null)
-  call fastcc void @fakeresume1(ptr align 8 null)
+  call void @llvm.coro.await.suspend.handle(ptr null, ptr null, ptr @await_suspend_function)
   %suspend2 = call i8 @llvm.coro.suspend(token %save2, i1 false)
   switch i8 %suspend2, label %exit [
     i8 0, label %await.ready
@@ -42,7 +42,7 @@ exit:
 
 ; Verify that in the resume part resume call is marked with musttail.
 ; CHECK-LABEL: @g.resume(
-; CHECK:      musttail call fastcc void @fakeresume1(ptr align 8 null)
+; CHECK:      musttail call fastcc void
 ; CHECK-NEXT: ret void
 
 ; It has a cleanup bb.
@@ -63,7 +63,7 @@ entry:
   ]
 await.suspend:
   %save2 = call token @llvm.coro.save(ptr null)
-  call fastcc void @fakeresume1(ptr align 8 null)
+  call void @llvm.coro.await.suspend.handle(ptr null, ptr null, ptr @await_suspend_function)
   %suspend2 = call i8 @llvm.coro.suspend(token %save2, i1 false)
   switch i8 %suspend2, label %exit [
     i8 0, label %await.ready
@@ -90,7 +90,7 @@ exit:
 
 ; Verify that in the resume part resume call is marked with musttail.
 ; CHECK-LABEL: @f.resume(
-; CHECK:      musttail call fastcc void @fakeresume1(ptr align 8 null)
+; CHECK:      musttail call fastcc void
 ; CHECK-NEXT: ret void
 
 declare token @llvm.coro.id(i32, ptr readnone, ptr nocapture readonly, ptr) #1
@@ -108,6 +108,7 @@ declare void @delete(ptr nonnull) #2
 declare void @consume(ptr)
 declare void @llvm.lifetime.start.p0(i64, ptr nocapture)
 declare void @llvm.lifetime.end.p0(i64, ptr nocapture)
+declare ptr @await_suspend_function(ptr %awaiter, ptr %hdl)
 
 attributes #0 = { presplitcoroutine }
 attributes #1 = { argmemonly nounwind readonly }
diff --git a/llvm/test/Transforms/Coroutines/coro-split-musttail7.ll b/llvm/test/Transforms/Coroutines/coro-split-musttail7.ll
index d0d5005587bda6..b62480a50737c3 100644
--- a/llvm/test/Transforms/Coroutines/coro-split-musttail7.ll
+++ b/llvm/test/Transforms/Coroutines/coro-split-musttail7.ll
@@ -6,8 +6,6 @@
 ; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
 ; RUN: opt < %s -passes='pgo-instr-gen,cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
 
-declare void @fakeresume1(ptr align 8)
-
 define i64 @g() #0 {
 entry:
   %id = call token @llvm.coro.id(i32 0, ptr null, ptr null, ptr null)
@@ -25,7 +23,7 @@ entry:
   ]
 await.suspend:
   %save2 = call token @llvm.coro.save(ptr null)
-  call fastcc void @fakeresume1(ptr align 8 null)
+  call void @llvm.coro.await.suspend.handle(ptr null, ptr null, ptr @await_suspend_function)
   %suspend2 = call i8 @llvm.coro.suspend(token %save2, i1 false)
 
   ; These (non-trivially) dead instructions are in the way.
@@ -48,7 +46,9 @@ exit:
 
 ; Verify that in the resume part resume call is marked with musttail.
 ; CHECK-LABEL: @g.resume(
-; CHECK:         musttail call fastcc void @fakeresume1(ptr align 8 null)
+; CHECK:         %[[FRAME:[0-9]+]] = call ptr @await_suspend_function(ptr null, ptr null)
+; CHECK:         %[[RESUMEADDR:[0-9]+]] = call ptr @llvm.coro.subfn.addr(ptr %[[FRAME]], i8 0)
+; CHECK:         musttail call fastcc void %[[RESUMEADDR]](ptr %[[FRAME]])
 ; CHECK-NEXT:    ret void
 
 ; It has a cleanup bb.
@@ -69,7 +69,7 @@ entry:
   ]
 await.suspend:
   %save2 = call token @llvm.coro.save(ptr null)
-  call fastcc void @fakeresume1(ptr align 8 null)
+  call void @llvm.coro.await.suspend.handle(ptr null, ptr null, ptr @await_suspend_function)
   %suspend2 = call i8 @llvm.coro.suspend(token %save2, i1 false)
   switch i8 %suspend2, label %exit [
     i8 0, label %await.ready
@@ -96,7 +96,9 @@ exit:
 
 ; Verify that in the resume part resume call is marked with musttail.
 ; CHECK-LABEL: @f.resume(
-; CHECK:         musttail call fastcc void @fakeresume1(ptr align 8 null)
+; CHECK:         %[[FRAME:[0-9]+]] = call ptr @await_suspend_function(ptr null, ptr null)
+; CHECK:         %[[RESUMEADDR:[0-9]+]] = call ptr @llvm.coro.subfn.addr(ptr %[[FRAME]], i8 0)
+; CHECK:         musttail call fastcc void %[[RESUMEADDR]](ptr %[[FRAME]])
 ; CHECK-NEXT:    ret void
 
 declare token @llvm.coro.id(i32, ptr readnone, ptr nocapture readonly, ptr) #1
@@ -114,6 +116,7 @@ declare void @delete(ptr nonnull) #2
 declare void @consume(ptr)
 declare void @llvm.lifetime.start.p0(i64, ptr nocapture)
 declare void @llvm.lifetime.end.p0(i64, ptr nocapture)
+declare ptr @await_suspend_function(ptr %awaiter, ptr %hdl)
 
 attributes #0 = { presplitcoroutine }
 attributes #1 = { argmemonly nounwind readonly }

>From 6d23b16e2d8aeebffcaf13d23ffd0acdb7df1a46 Mon Sep 17 00:00:00 2001
From: Hans Wennborg <hans at chromium.org>
Date: Tue, 23 Apr 2024 14:50:42 +0200
Subject: [PATCH 2/4] format

---
 llvm/lib/Transforms/Coroutines/CoroSplit.cpp  | 5 +++--
 llvm/lib/Transforms/Coroutines/Coroutines.cpp | 2 +-
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp
index 01eb75617e39fe..ff037518cb302e 100644
--- a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp
+++ b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp
@@ -212,7 +212,8 @@ static void lowerAwaitSuspend(IRBuilder<> &Builder, CoroAwaitSuspendInst *CB,
 
   if (CB->getCalledFunction()->getIntrinsicID() ==
       Intrinsic::coro_await_suspend_handle) {
-    // Follow the await_suspend by a lowered resume call to the returned coroutine.
+    // Follow the await_suspend by a lowered resume call to the returned
+    // coroutine.
     if (auto *Invoke = dyn_cast<InvokeInst>(CB))
       Builder.SetInsertPoint(Invoke->getNormalDest()->getFirstInsertionPt());
 
@@ -220,7 +221,7 @@ static void lowerAwaitSuspend(IRBuilder<> &Builder, CoroAwaitSuspendInst *CB,
     auto *ResumeAddr = LB.makeSubFnCall(NewCall, CoroSubFnInst::ResumeIndex,
                                         &*Builder.GetInsertPoint());
 
-    LLVMContext& Ctx = Builder.getContext();
+    LLVMContext &Ctx = Builder.getContext();
     FunctionType *ResumeTy = FunctionType::get(
         Type::getVoidTy(Ctx), PointerType::getUnqual(Ctx), false);
     auto *ResumeCall = Builder.CreateCall(ResumeTy, ResumeAddr, {NewCall});
diff --git a/llvm/lib/Transforms/Coroutines/Coroutines.cpp b/llvm/lib/Transforms/Coroutines/Coroutines.cpp
index d891173156b2af..1a92bc1636257b 100644
--- a/llvm/lib/Transforms/Coroutines/Coroutines.cpp
+++ b/llvm/lib/Transforms/Coroutines/Coroutines.cpp
@@ -48,7 +48,7 @@ coro::LowererBase::LowererBase(Module &M)
 //    call ptr @llvm.coro.subfn.addr(ptr %Arg, i8 %index)
 
 CallInst *coro::LowererBase::makeSubFnCall(Value *Arg, int Index,
-                                        Instruction *InsertPt) {
+                                           Instruction *InsertPt) {
   auto *IndexVal = ConstantInt::get(Type::getInt8Ty(Context), Index);
   auto *Fn = Intrinsic::getDeclaration(&TheModule, Intrinsic::coro_subfn_addr);
 

>From 862b5d1226d3c4d298bd25d95d15b16046910e6d Mon Sep 17 00:00:00 2001
From: Hans Wennborg <hans at chromium.org>
Date: Wed, 24 Apr 2024 14:10:52 +0200
Subject: [PATCH 3/4] reinstate fixme

---
 llvm/lib/Transforms/Coroutines/CoroSplit.cpp | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp
index ff037518cb302e..72c324c82994bc 100644
--- a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp
+++ b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp
@@ -1087,8 +1087,11 @@ void CoroCloner::create() {
   for (CallInst *ResumeCall : Shape.SymmetricTransfers) {
     ResumeCall = cast<CallInst>(VMap[ResumeCall]);
     ResumeCall->setCallingConv(NewF->getCallingConv());
-    if (TTI.supportsTailCallFor(ResumeCall))
+    if (TTI.supportsTailCallFor(ResumeCall)) {
+      // FIXME: Could we support symmetric transfer effectively without
+      // musttail?
       ResumeCall->setTailCallKind(CallInst::TCK_MustTail);
+    }
 
     // Put a 'ret void' after the call, and split any remaining instructions to
     // an unreachable block.

>From 7192d93fe8172093833ce2887bd13ec0985f617c Mon Sep 17 00:00:00 2001
From: Hans Wennborg <hans at chromium.org>
Date: Wed, 24 Apr 2024 14:35:30 +0200
Subject: [PATCH 4/4] llvm.coro.await.suspend.handle calls must not be nounwind

---
 clang/lib/CodeGen/CGCoroutine.cpp           | 6 +++++-
 clang/test/CodeGenCoroutines/coro-await.cpp | 3 ++-
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/clang/lib/CodeGen/CGCoroutine.cpp b/clang/lib/CodeGen/CGCoroutine.cpp
index e976734898b9b8..fb2994fd35e5a5 100644
--- a/clang/lib/CodeGen/CGCoroutine.cpp
+++ b/clang/lib/CodeGen/CGCoroutine.cpp
@@ -278,7 +278,11 @@ static LValueOrRValue emitSuspendExpression(CodeGenFunction &CGF, CGCoroData &Co
 
   llvm::Function *AwaitSuspendIntrinsic = CGF.CGM.getIntrinsic(AwaitSuspendIID);
 
-  const auto AwaitSuspendCanThrow = StmtCanThrow(S.getSuspendExpr());
+  // SuspendHandle might throw since it also resumes the returned handle.
+  const bool AwaitSuspendCanThrow =
+      SuspendReturnType ==
+          CoroutineSuspendExpr::SuspendReturnType::SuspendHandle ||
+      StmtCanThrow(S.getSuspendExpr());
 
   llvm::CallBase *SuspendRet = nullptr;
   // FIXME: add call attributes?
diff --git a/clang/test/CodeGenCoroutines/coro-await.cpp b/clang/test/CodeGenCoroutines/coro-await.cpp
index 7caaa6351844b2..b92777dfa28a6c 100644
--- a/clang/test/CodeGenCoroutines/coro-await.cpp
+++ b/clang/test/CodeGenCoroutines/coro-await.cpp
@@ -370,7 +370,8 @@ extern "C" void TestTailcall() {
   // ---------------------------
   // Call coro.await.suspend
   // ---------------------------
-  // CHECK-NEXT: call void @llvm.coro.await.suspend.handle(ptr %[[AWAITABLE]], ptr %[[FRAME]], ptr @__await_suspend_wrapper_TestTailcall_await)
+  // Note: The call must not be nounwind since the resumed function could throw.
+  // CHECK-NEXT: call void @llvm.coro.await.suspend.handle(ptr %[[AWAITABLE]], ptr %[[FRAME]], ptr @__await_suspend_wrapper_TestTailcall_await){{$}}
   // CHECK-NEXT: %[[OUTCOME:.+]] = call i8 @llvm.coro.suspend(token %[[SUSPEND_ID]], i1 false)
   // CHECK-NEXT: switch i8 %[[OUTCOME]], label %[[RET_BB:.+]] [
   // CHECK-NEXT:   i8 0, label %[[READY_BB]]



More information about the cfe-commits mailing list