[clang] [llvm] [coro] Lower `llvm.coro.await.suspend.handle` to resume with tail call (PR #89751)

via cfe-commits cfe-commits at lists.llvm.org
Mon Apr 29 07:37:40 PDT 2024


https://github.com/zmodem updated https://github.com/llvm/llvm-project/pull/89751

>From 33b07efe6d68cb4d17e96349b552ef5e5901d8c6 Mon Sep 17 00:00:00 2001
From: Hans Wennborg <hans at chromium.org>
Date: Tue, 26 Mar 2024 15:04:35 +0100
Subject: [PATCH 1/6] stuff

---
 clang/lib/CodeGen/CGCoroutine.cpp             |   5 +-
 clang/test/CodeGenCoroutines/coro-await.cpp   |   3 +-
 .../coro-symmetric-transfer-01.cpp            |   4 +-
 .../coro-symmetric-transfer-02.cpp            |   6 +-
 llvm/docs/Coroutines.rst                      |  19 +-
 llvm/docs/LangRef.rst                         |   2 +-
 llvm/include/llvm/IR/Intrinsics.td            |   2 +-
 llvm/lib/Transforms/Coroutines/CoroInstr.h    |   4 +-
 llvm/lib/Transforms/Coroutines/CoroInternal.h |   3 +-
 llvm/lib/Transforms/Coroutines/CoroSplit.cpp  | 234 +++++-------------
 llvm/lib/Transforms/Coroutines/Coroutines.cpp |   2 +-
 .../Coro/coro-split-musttail.ll               |  63 -----
 .../Coro/coro-split-musttail1.ll              |  97 --------
 .../Coro/coro-split-musttail10.ll             |  55 ----
 .../Coro/coro-split-musttail11.ll             |  55 ----
 .../Coro/coro-split-musttail12.ll             |  85 -------
 .../Coro/coro-split-musttail13.ll             |  76 ------
 .../Coro/coro-split-musttail2.ll              |  68 -----
 .../Coro/coro-split-musttail3.ll              |  91 -------
 .../Coro/coro-split-musttail4.ll              |  66 -----
 .../Coro/coro-split-musttail5.ll              |  63 -----
 .../Coro/coro-split-musttail6.ll              | 112 ---------
 .../Coro/coro-split-musttail7.ll              | 115 ---------
 .../coro-await-suspend-lower-invoke.ll        |   5 +-
 .../Coroutines/coro-await-suspend-lower.ll    |   5 +-
 .../Coroutines/coro-preserve-final.ll         | 131 ----------
 ...-split-musttail-chain-pgo-counter-promo.ll |   9 +-
 .../Coroutines/coro-split-musttail.ll         |  17 +-
 .../Coroutines/coro-split-musttail1.ll        |  32 ++-
 .../Coroutines/coro-split-musttail10.ll       |  13 +-
 .../Coroutines/coro-split-musttail11.ll       |  55 ----
 .../Coroutines/coro-split-musttail2.ll        |  12 +-
 .../Coroutines/coro-split-musttail3.ll        |  33 ++-
 .../Coroutines/coro-split-musttail4.ll        |   5 +-
 .../Coroutines/coro-split-musttail5.ll        |   5 +-
 .../Coroutines/coro-split-musttail6.ll        |   9 +-
 .../Coroutines/coro-split-musttail7.ll        |  15 +-
 37 files changed, 157 insertions(+), 1419 deletions(-)
 delete mode 100644 llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail.ll
 delete mode 100644 llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail1.ll
 delete mode 100644 llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail10.ll
 delete mode 100644 llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail11.ll
 delete mode 100644 llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail12.ll
 delete mode 100644 llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail13.ll
 delete mode 100644 llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail2.ll
 delete mode 100644 llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail3.ll
 delete mode 100644 llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail4.ll
 delete mode 100644 llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail5.ll
 delete mode 100644 llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail6.ll
 delete mode 100644 llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail7.ll
 delete mode 100644 llvm/test/Transforms/Coroutines/coro-preserve-final.ll
 delete mode 100644 llvm/test/Transforms/Coroutines/coro-split-musttail11.ll

diff --git a/clang/lib/CodeGen/CGCoroutine.cpp b/clang/lib/CodeGen/CGCoroutine.cpp
index 93ca711f716fce..e976734898b9b8 100644
--- a/clang/lib/CodeGen/CGCoroutine.cpp
+++ b/clang/lib/CodeGen/CGCoroutine.cpp
@@ -307,10 +307,7 @@ static LValueOrRValue emitSuspendExpression(CodeGenFunction &CGF, CGCoroData &Co
     break;
   }
   case CoroutineSuspendExpr::SuspendReturnType::SuspendHandle: {
-    assert(SuspendRet->getType()->isPointerTy());
-
-    auto ResumeIntrinsic = CGF.CGM.getIntrinsic(llvm::Intrinsic::coro_resume);
-    Builder.CreateCall(ResumeIntrinsic, SuspendRet);
+    assert(SuspendRet->getType()->isVoidTy());
     break;
   }
   }
diff --git a/clang/test/CodeGenCoroutines/coro-await.cpp b/clang/test/CodeGenCoroutines/coro-await.cpp
index 75851d8805bb6e..7caaa6351844b2 100644
--- a/clang/test/CodeGenCoroutines/coro-await.cpp
+++ b/clang/test/CodeGenCoroutines/coro-await.cpp
@@ -370,8 +370,7 @@ extern "C" void TestTailcall() {
   // ---------------------------
   // Call coro.await.suspend
   // ---------------------------
-  // CHECK-NEXT: %[[RESUMED:.+]] = call ptr @llvm.coro.await.suspend.handle(ptr %[[AWAITABLE]], ptr %[[FRAME]], ptr @__await_suspend_wrapper_TestTailcall_await)
-  // CHECK-NEXT: call void @llvm.coro.resume(ptr %[[RESUMED]])
+  // CHECK-NEXT: call void @llvm.coro.await.suspend.handle(ptr %[[AWAITABLE]], ptr %[[FRAME]], ptr @__await_suspend_wrapper_TestTailcall_await)
   // CHECK-NEXT: %[[OUTCOME:.+]] = call i8 @llvm.coro.suspend(token %[[SUSPEND_ID]], i1 false)
   // CHECK-NEXT: switch i8 %[[OUTCOME]], label %[[RET_BB:.+]] [
   // CHECK-NEXT:   i8 0, label %[[READY_BB]]
diff --git a/clang/test/CodeGenCoroutines/coro-symmetric-transfer-01.cpp b/clang/test/CodeGenCoroutines/coro-symmetric-transfer-01.cpp
index da30e12c63cffb..0ae672de391c47 100644
--- a/clang/test/CodeGenCoroutines/coro-symmetric-transfer-01.cpp
+++ b/clang/test/CodeGenCoroutines/coro-symmetric-transfer-01.cpp
@@ -48,7 +48,7 @@ detached_task foo() {
 }
 
 // check that the lifetime of the coroutine handle used to obtain the address is contained within single basic block, and hence does not live across suspension points.
+// XXX: not sure this makes sense anymore?
 // CHECK-LABEL: final.suspend:
 // CHECK:         %{{.+}} = call token @llvm.coro.save(ptr null)
-// CHECK:         %[[HDL_TRANSFER:.+]] = call ptr @llvm.coro.await.suspend.handle
-// CHECK:         call void @llvm.coro.resume(ptr %[[HDL_TRANSFER]])
+// CHECK:         call void @llvm.coro.await.suspend.handle
diff --git a/clang/test/CodeGenCoroutines/coro-symmetric-transfer-02.cpp b/clang/test/CodeGenCoroutines/coro-symmetric-transfer-02.cpp
index ca6cf74115a3b1..f36f89926505f3 100644
--- a/clang/test/CodeGenCoroutines/coro-symmetric-transfer-02.cpp
+++ b/clang/test/CodeGenCoroutines/coro-symmetric-transfer-02.cpp
@@ -89,8 +89,7 @@ Task bar() {
 // CHECK:         br i1 %{{.+}}, label %[[CASE1_AWAIT_READY:.+]], label %[[CASE1_AWAIT_SUSPEND:.+]]
 // CHECK:       [[CASE1_AWAIT_SUSPEND]]:
 // CHECK-NEXT:    %{{.+}} = call token @llvm.coro.save(ptr null)
-// CHECK-NEXT:    %[[HANDLE1_PTR:.+]] = call ptr @llvm.coro.await.suspend.handle
-// CHECK-NEXT:    call void @llvm.coro.resume(ptr %[[HANDLE1_PTR]])
+// CHECK-NEXT:    call void @llvm.coro.await.suspend.handle
 // CHECK-NEXT:    %{{.+}} = call i8 @llvm.coro.suspend
 // CHECK-NEXT:    switch i8 %{{.+}}, label %coro.ret [
 // CHECK-NEXT:      i8 0, label %[[CASE1_AWAIT_READY]]
@@ -104,8 +103,7 @@ Task bar() {
 // CHECK:         br i1 %{{.+}}, label %[[CASE2_AWAIT_READY:.+]], label %[[CASE2_AWAIT_SUSPEND:.+]]
 // CHECK:       [[CASE2_AWAIT_SUSPEND]]:
 // CHECK-NEXT:    %{{.+}} = call token @llvm.coro.save(ptr null)
-// CHECK-NEXT:    %[[HANDLE2_PTR:.+]] = call ptr @llvm.coro.await.suspend.handle
-// CHECK-NEXT:    call void @llvm.coro.resume(ptr %[[HANDLE2_PTR]])
+// CHECK-NEXT:    call void @llvm.coro.await.suspend.handle
 // CHECK-NEXT:    %{{.+}} = call i8 @llvm.coro.suspend
 // CHECK-NEXT:    switch i8 %{{.+}}, label %coro.ret [
 // CHECK-NEXT:      i8 0, label %[[CASE2_AWAIT_READY]]
diff --git a/llvm/docs/Coroutines.rst b/llvm/docs/Coroutines.rst
index 83369d93c309a7..36092325e536fb 100644
--- a/llvm/docs/Coroutines.rst
+++ b/llvm/docs/Coroutines.rst
@@ -1922,7 +1922,7 @@ Example:
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 ::
 
-  declare ptr @llvm.coro.await.suspend.handle(
+  declare void @llvm.coro.await.suspend.handle(
                 ptr <awaiter>,
                 ptr <handle>,
                 ptr <await_suspend_function>)
@@ -1967,7 +1967,9 @@ The intrinsic must be used between corresponding `coro.save`_ and
 `await_suspend_function` call during `CoroSplit`_ pass.
 
 `await_suspend_function` must return a pointer to a valid
-coroutine frame, which is immediately resumed
+coroutine frame. The intrinsic will be lowered to a tail call resuming the
+returned coroutine frame. It will be marked `musttail` on targets that support
+that. Instructions following the intrinsic will become unreachable.
 
 Example:
 """"""""
@@ -1977,11 +1979,10 @@ Example:
   ; before lowering
   await.suspend:
     %save = call token @llvm.coro.save(ptr %hdl)
-    %next = call ptr @llvm.coro.await.suspend.handle(
-                ptr %awaiter,
-                ptr %hdl,
-                ptr @await_suspend_function)
-    call void @llvm.coro.resume(%next)
+    call void @llvm.coro.await.suspend.handle(
+        ptr %awaiter,
+        ptr %hdl,
+        ptr @await_suspend_function)
     %suspend = call i8 @llvm.coro.suspend(token %save, i1 false)
     ...
 
@@ -1992,8 +1993,8 @@ Example:
     %next = call ptr @await_suspend_function(
                 ptr %awaiter,
                 ptr %hdl)
-    call void @llvm.coro.resume(%next)
-    %suspend = call i8 @llvm.coro.suspend(token %save, i1 false)
+    musttail call void @llvm.coro.resume(%next)
+    ret void
     ...
 
   ; wrapper function example
diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
index 9592929d79feb4..0e87a8e2ace0e2 100644
--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst
@@ -12517,7 +12517,7 @@ This instruction requires several arguments:
       ``llvm::GuaranteedTailCallOpt`` is ``true``, or the calling convention
       is ``tailcc``
    -  `Platform-specific constraints are
-      met. <CodeGenerator.html#tailcallopt>`_
+      met. <CodeGenerator.html#tail-call-optimization>`_
 
 #. The optional ``notail`` marker indicates that the optimizers should not add
    ``tail`` or ``musttail`` markers to the call. It is used to prevent tail
diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td
index 1d20f7e1b19854..2bbdd0e4627c62 100644
--- a/llvm/include/llvm/IR/Intrinsics.td
+++ b/llvm/include/llvm/IR/Intrinsics.td
@@ -1711,7 +1711,7 @@ def int_coro_await_suspend_bool : Intrinsic<[llvm_i1_ty],
                                             [llvm_ptr_ty, llvm_ptr_ty, llvm_ptr_ty],
                                             [Throws]>;
 
-def int_coro_await_suspend_handle : Intrinsic<[llvm_ptr_ty],
+def int_coro_await_suspend_handle : Intrinsic<[],
                                               [llvm_ptr_ty, llvm_ptr_ty, llvm_ptr_ty],
                                               [Throws]>;
 
diff --git a/llvm/lib/Transforms/Coroutines/CoroInstr.h b/llvm/lib/Transforms/Coroutines/CoroInstr.h
index 79e745bb162cdb..a31703fe01304c 100644
--- a/llvm/lib/Transforms/Coroutines/CoroInstr.h
+++ b/llvm/lib/Transforms/Coroutines/CoroInstr.h
@@ -78,10 +78,10 @@ class LLVM_LIBRARY_VISIBILITY CoroAllocInst : public IntrinsicInst {
   }
 };
 
-/// This represents the llvm.coro.await.suspend instruction.
+/// This represents the llvm.coro.await.suspend.{void,bool,handle} instructions.
 // FIXME: add callback metadata
 // FIXME: make a proper IntrinisicInst. Currently this is not possible,
-// because llvm.coro.await.suspend can be invoked.
+// because llvm.coro.await.suspend.* can be invoked.
 class LLVM_LIBRARY_VISIBILITY CoroAwaitSuspendInst : public CallBase {
   enum { AwaiterArg, FrameArg, WrapperArg };
 
diff --git a/llvm/lib/Transforms/Coroutines/CoroInternal.h b/llvm/lib/Transforms/Coroutines/CoroInternal.h
index 84fd88806154e3..5716fd0ea4ab96 100644
--- a/llvm/lib/Transforms/Coroutines/CoroInternal.h
+++ b/llvm/lib/Transforms/Coroutines/CoroInternal.h
@@ -47,7 +47,7 @@ struct LowererBase {
   ConstantPointerNull *const NullPtr;
 
   LowererBase(Module &M);
-  Value *makeSubFnCall(Value *Arg, int Index, Instruction *InsertPt);
+  CallInst *makeSubFnCall(Value *Arg, int Index, Instruction *InsertPt);
 };
 
 enum class ABI {
@@ -85,6 +85,7 @@ struct LLVM_LIBRARY_VISIBILITY Shape {
   SmallVector<AnyCoroSuspendInst *, 4> CoroSuspends;
   SmallVector<CallInst*, 2> SwiftErrorOps;
   SmallVector<CoroAwaitSuspendInst *, 4> CoroAwaitSuspends;
+  SmallVector<CallInst *, 2> SymmetricTransfers;
 
   // Field indexes for special fields in the switch lowering.
   struct SwitchFieldIndex {
diff --git a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp
index 3a43b1edcaba37..01eb75617e39fe 100644
--- a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp
+++ b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp
@@ -113,21 +113,24 @@ class CoroCloner {
   /// ABIs.
   AnyCoroSuspendInst *ActiveSuspend = nullptr;
 
+  TargetTransformInfo &TTI;
+
 public:
   /// Create a cloner for a switch lowering.
   CoroCloner(Function &OrigF, const Twine &Suffix, coro::Shape &Shape,
-             Kind FKind)
+             Kind FKind, TargetTransformInfo &TTI)
       : OrigF(OrigF), NewF(nullptr), Suffix(Suffix), Shape(Shape), FKind(FKind),
-        Builder(OrigF.getContext()) {
+        Builder(OrigF.getContext()), TTI(TTI) {
     assert(Shape.ABI == coro::ABI::Switch);
   }
 
   /// Create a cloner for a continuation lowering.
   CoroCloner(Function &OrigF, const Twine &Suffix, coro::Shape &Shape,
-             Function *NewF, AnyCoroSuspendInst *ActiveSuspend)
+             Function *NewF, AnyCoroSuspendInst *ActiveSuspend,
+             TargetTransformInfo &TTI)
       : OrigF(OrigF), NewF(NewF), Suffix(Suffix), Shape(Shape),
         FKind(Shape.ABI == coro::ABI::Async ? Kind::Async : Kind::Continuation),
-        Builder(OrigF.getContext()), ActiveSuspend(ActiveSuspend) {
+        Builder(OrigF.getContext()), ActiveSuspend(ActiveSuspend), TTI(TTI) {
     assert(Shape.ABI == coro::ABI::Retcon ||
            Shape.ABI == coro::ABI::RetconOnce || Shape.ABI == coro::ABI::Async);
     assert(NewF && "need existing function for continuation");
@@ -171,7 +174,8 @@ class CoroCloner {
 // Lower the intrinisc in CoroEarly phase if coroutine frame doesn't escape
 // and it is known that other transformations, for example, sanitizers
 // won't lead to incorrect code.
-static void lowerAwaitSuspend(IRBuilder<> &Builder, CoroAwaitSuspendInst *CB) {
+static void lowerAwaitSuspend(IRBuilder<> &Builder, CoroAwaitSuspendInst *CB,
+                              coro::Shape &Shape) {
   auto Wrapper = CB->getWrapperFunction();
   auto Awaiter = CB->getAwaiter();
   auto FramePtr = CB->getFrame();
@@ -206,6 +210,28 @@ static void lowerAwaitSuspend(IRBuilder<> &Builder, CoroAwaitSuspendInst *CB) {
     llvm_unreachable("Unexpected coro_await_suspend invocation method");
   }
 
+  if (CB->getCalledFunction()->getIntrinsicID() ==
+      Intrinsic::coro_await_suspend_handle) {
+    // Follow the await_suspend by a lowered resume call to the returned coroutine.
+    if (auto *Invoke = dyn_cast<InvokeInst>(CB))
+      Builder.SetInsertPoint(Invoke->getNormalDest()->getFirstInsertionPt());
+
+    coro::LowererBase LB(*Wrapper->getParent());
+    auto *ResumeAddr = LB.makeSubFnCall(NewCall, CoroSubFnInst::ResumeIndex,
+                                        &*Builder.GetInsertPoint());
+
+    LLVMContext& Ctx = Builder.getContext();
+    FunctionType *ResumeTy = FunctionType::get(
+        Type::getVoidTy(Ctx), PointerType::getUnqual(Ctx), false);
+    auto *ResumeCall = Builder.CreateCall(ResumeTy, ResumeAddr, {NewCall});
+
+    // We can't insert the 'ret' instruction and adjust the cc until the
+    // function has been split, so remember this for later.
+    Shape.SymmetricTransfers.push_back(ResumeCall);
+
+    NewCall = ResumeCall;
+  }
+
   CB->replaceAllUsesWith(NewCall);
   CB->eraseFromParent();
 }
@@ -213,7 +239,7 @@ static void lowerAwaitSuspend(IRBuilder<> &Builder, CoroAwaitSuspendInst *CB) {
 static void lowerAwaitSuspends(Function &F, coro::Shape &Shape) {
   IRBuilder<> Builder(F.getContext());
   for (auto *AWS : Shape.CoroAwaitSuspends)
-    lowerAwaitSuspend(Builder, AWS);
+    lowerAwaitSuspend(Builder, AWS, Shape);
 }
 
 static void maybeFreeRetconStorage(IRBuilder<> &Builder,
@@ -1056,6 +1082,22 @@ void CoroCloner::create() {
   // Set up the new entry block.
   replaceEntryBlock();
 
+  // Turn symmetric transfers into musttail calls.
+  for (CallInst *ResumeCall : Shape.SymmetricTransfers) {
+    ResumeCall = cast<CallInst>(VMap[ResumeCall]);
+    ResumeCall->setCallingConv(NewF->getCallingConv());
+    if (TTI.supportsTailCallFor(ResumeCall))
+      ResumeCall->setTailCallKind(CallInst::TCK_MustTail);
+
+    // Put a 'ret void' after the call, and split any remaining instructions to
+    // an unreachable block.
+    BasicBlock *BB = ResumeCall->getParent();
+    BB->splitBasicBlock(ResumeCall->getNextNode());
+    Builder.SetInsertPoint(BB->getTerminator());
+    Builder.CreateRetVoid();
+    BB->getTerminator()->eraseFromParent();
+  }
+
   Builder.SetInsertPoint(&NewF->getEntryBlock().front());
   NewFramePtr = deriveNewFramePointer();
 
@@ -1186,130 +1228,6 @@ scanPHIsAndUpdateValueMap(Instruction *Prev, BasicBlock *NewBlock,
   }
 }
 
-// Replace a sequence of branches leading to a ret, with a clone of a ret
-// instruction. Suspend instruction represented by a switch, track the PHI
-// values and select the correct case successor when possible.
-static bool simplifyTerminatorLeadingToRet(Instruction *InitialInst) {
-  // There is nothing to simplify.
-  if (isa<ReturnInst>(InitialInst))
-    return false;
-
-  DenseMap<Value *, Value *> ResolvedValues;
-  assert(InitialInst->getModule());
-  const DataLayout &DL = InitialInst->getModule()->getDataLayout();
-
-  auto TryResolveConstant = [&ResolvedValues](Value *V) {
-    auto It = ResolvedValues.find(V);
-    if (It != ResolvedValues.end())
-      V = It->second;
-    return dyn_cast<ConstantInt>(V);
-  };
-
-  Instruction *I = InitialInst;
-  while (true) {
-    if (isa<ReturnInst>(I)) {
-      assert(!cast<ReturnInst>(I)->getReturnValue());
-      ReplaceInstWithInst(InitialInst, I->clone());
-      return true;
-    }
-
-    if (auto *BR = dyn_cast<BranchInst>(I)) {
-      unsigned SuccIndex = 0;
-      if (BR->isConditional()) {
-        // Handle the case the condition of the conditional branch is constant.
-        // e.g.,
-        //
-        //     br i1 false, label %cleanup, label %CoroEnd
-        //
-        // It is possible during the transformation. We could continue the
-        // simplifying in this case.
-        ConstantInt *Cond = TryResolveConstant(BR->getCondition());
-        if (!Cond)
-          return false;
-
-        SuccIndex = Cond->isOne() ? 0 : 1;
-      }
-
-      BasicBlock *Succ = BR->getSuccessor(SuccIndex);
-      scanPHIsAndUpdateValueMap(I, Succ, ResolvedValues);
-      I = Succ->getFirstNonPHIOrDbgOrLifetime();
-      continue;
-    }
-
-    if (auto *Cmp = dyn_cast<CmpInst>(I)) {
-      // If the case number of suspended switch instruction is reduced to
-      // 1, then it is simplified to CmpInst in llvm::ConstantFoldTerminator.
-      // Try to constant fold it.
-      ConstantInt *Cond0 = TryResolveConstant(Cmp->getOperand(0));
-      ConstantInt *Cond1 = TryResolveConstant(Cmp->getOperand(1));
-      if (Cond0 && Cond1) {
-        ConstantInt *Result =
-            dyn_cast_or_null<ConstantInt>(ConstantFoldCompareInstOperands(
-                Cmp->getPredicate(), Cond0, Cond1, DL));
-        if (Result) {
-          ResolvedValues[Cmp] = Result;
-          I = I->getNextNode();
-          continue;
-        }
-      }
-    }
-
-    if (auto *SI = dyn_cast<SwitchInst>(I)) {
-      ConstantInt *Cond = TryResolveConstant(SI->getCondition());
-      if (!Cond)
-        return false;
-
-      BasicBlock *Succ = SI->findCaseValue(Cond)->getCaseSuccessor();
-      scanPHIsAndUpdateValueMap(I, Succ, ResolvedValues);
-      I = Succ->getFirstNonPHIOrDbgOrLifetime();
-      continue;
-    }
-
-    if (I->isDebugOrPseudoInst() || I->isLifetimeStartOrEnd() ||
-        wouldInstructionBeTriviallyDead(I)) {
-      // We can skip instructions without side effects. If their values are
-      // needed, we'll notice later, e.g. when hitting a conditional branch.
-      I = I->getNextNode();
-      continue;
-    }
-
-    break;
-  }
-
-  return false;
-}
-
-// Check whether CI obeys the rules of musttail attribute.
-static bool shouldBeMustTail(const CallInst &CI, const Function &F) {
-  if (CI.isInlineAsm())
-    return false;
-
-  // Match prototypes and calling conventions of resume function.
-  FunctionType *CalleeTy = CI.getFunctionType();
-  if (!CalleeTy->getReturnType()->isVoidTy() || (CalleeTy->getNumParams() != 1))
-    return false;
-
-  Type *CalleeParmTy = CalleeTy->getParamType(0);
-  if (!CalleeParmTy->isPointerTy() ||
-      (CalleeParmTy->getPointerAddressSpace() != 0))
-    return false;
-
-  if (CI.getCallingConv() != F.getCallingConv())
-    return false;
-
-  // CI should not has any ABI-impacting function attributes.
-  static const Attribute::AttrKind ABIAttrs[] = {
-      Attribute::StructRet,    Attribute::ByVal,     Attribute::InAlloca,
-      Attribute::Preallocated, Attribute::InReg,     Attribute::Returned,
-      Attribute::SwiftSelf,    Attribute::SwiftError};
-  AttributeList Attrs = CI.getAttributes();
-  for (auto AK : ABIAttrs)
-    if (Attrs.hasParamAttr(0, AK))
-      return false;
-
-  return true;
-}
-
 // Coroutine has no suspend points. Remove heap allocation for the coroutine
 // frame if possible.
 static void handleNoSuspendCoroutine(coro::Shape &Shape) {
@@ -1523,24 +1441,16 @@ struct SwitchCoroutineSplitter {
 
     createResumeEntryBlock(F, Shape);
     auto *ResumeClone =
-        createClone(F, ".resume", Shape, CoroCloner::Kind::SwitchResume);
+        createClone(F, ".resume", Shape, CoroCloner::Kind::SwitchResume, TTI);
     auto *DestroyClone =
-        createClone(F, ".destroy", Shape, CoroCloner::Kind::SwitchUnwind);
+        createClone(F, ".destroy", Shape, CoroCloner::Kind::SwitchUnwind, TTI);
     auto *CleanupClone =
-        createClone(F, ".cleanup", Shape, CoroCloner::Kind::SwitchCleanup);
+        createClone(F, ".cleanup", Shape, CoroCloner::Kind::SwitchCleanup, TTI);
 
     postSplitCleanup(*ResumeClone);
     postSplitCleanup(*DestroyClone);
     postSplitCleanup(*CleanupClone);
 
-    // Adding musttail call to support symmetric transfer.
-    // Skip targets which don't support tail call.
-    //
-    // FIXME: Could we support symmetric transfer effectively without musttail
-    // call?
-    if (TTI.supportsTailCalls())
-      addMustTailToCoroResumes(*ResumeClone, TTI);
-
     // Store addresses resume/destroy/cleanup functions in the coroutine frame.
     updateCoroFrame(Shape, ResumeClone, DestroyClone, CleanupClone);
 
@@ -1560,8 +1470,9 @@ struct SwitchCoroutineSplitter {
   // new entry block and replacing coro.suspend an appropriate value to force
   // resume or cleanup pass for every suspend point.
   static Function *createClone(Function &F, const Twine &Suffix,
-                               coro::Shape &Shape, CoroCloner::Kind FKind) {
-    CoroCloner Cloner(F, Suffix, Shape, FKind);
+                               coro::Shape &Shape, CoroCloner::Kind FKind,
+                               TargetTransformInfo &TTI) {
+    CoroCloner Cloner(F, Suffix, Shape, FKind, TTI);
     Cloner.create();
     return Cloner.getFunction();
   }
@@ -1662,34 +1573,6 @@ struct SwitchCoroutineSplitter {
     Shape.SwitchLowering.ResumeEntryBlock = NewEntry;
   }
 
-  // Add musttail to any resume instructions that is immediately followed by a
-  // suspend (i.e. ret). We do this even in -O0 to support guaranteed tail call
-  // for symmetrical coroutine control transfer (C++ Coroutines TS extension).
-  // This transformation is done only in the resume part of the coroutine that
-  // has identical signature and calling convention as the coro.resume call.
-  static void addMustTailToCoroResumes(Function &F, TargetTransformInfo &TTI) {
-    bool Changed = false;
-
-    // Collect potential resume instructions.
-    SmallVector<CallInst *, 4> Resumes;
-    for (auto &I : instructions(F))
-      if (auto *Call = dyn_cast<CallInst>(&I))
-        if (shouldBeMustTail(*Call, F))
-          Resumes.push_back(Call);
-
-    // Set musttail on those that are followed by a ret instruction.
-    for (CallInst *Call : Resumes)
-      // Skip targets which don't support tail call on the specific case.
-      if (TTI.supportsTailCallFor(Call) &&
-          simplifyTerminatorLeadingToRet(Call->getNextNode())) {
-        Call->setTailCallKind(CallInst::TCK_MustTail);
-        Changed = true;
-      }
-
-    if (Changed)
-      removeUnreachableBlocks(F);
-  }
-
   // Store addresses of Resume/Destroy/Cleanup functions in the coroutine frame.
   static void updateCoroFrame(coro::Shape &Shape, Function *ResumeFn,
                               Function *DestroyFn, Function *CleanupFn) {
@@ -1894,12 +1777,13 @@ static void splitAsyncCoroutine(Function &F, coro::Shape &Shape,
     auto *Suspend = Shape.CoroSuspends[Idx];
     auto *Clone = Clones[Idx];
 
-    CoroCloner(F, "resume." + Twine(Idx), Shape, Clone, Suspend).create();
+    CoroCloner(F, "resume." + Twine(Idx), Shape, Clone, Suspend, TTI).create();
   }
 }
 
 static void splitRetconCoroutine(Function &F, coro::Shape &Shape,
-                                 SmallVectorImpl<Function *> &Clones) {
+                                 SmallVectorImpl<Function *> &Clones,
+                                 TargetTransformInfo &TTI) {
   assert(Shape.ABI == coro::ABI::Retcon || Shape.ABI == coro::ABI::RetconOnce);
   assert(Clones.empty());
 
@@ -2022,7 +1906,7 @@ static void splitRetconCoroutine(Function &F, coro::Shape &Shape,
     auto Suspend = Shape.CoroSuspends[i];
     auto Clone = Clones[i];
 
-    CoroCloner(F, "resume." + Twine(i), Shape, Clone, Suspend).create();
+    CoroCloner(F, "resume." + Twine(i), Shape, Clone, Suspend, TTI).create();
   }
 }
 
@@ -2074,7 +1958,7 @@ splitCoroutine(Function &F, SmallVectorImpl<Function *> &Clones,
       break;
     case coro::ABI::Retcon:
     case coro::ABI::RetconOnce:
-      splitRetconCoroutine(F, Shape, Clones);
+      splitRetconCoroutine(F, Shape, Clones, TTI);
       break;
     }
   }
diff --git a/llvm/lib/Transforms/Coroutines/Coroutines.cpp b/llvm/lib/Transforms/Coroutines/Coroutines.cpp
index a1c78d6a44ef46..d891173156b2af 100644
--- a/llvm/lib/Transforms/Coroutines/Coroutines.cpp
+++ b/llvm/lib/Transforms/Coroutines/Coroutines.cpp
@@ -47,7 +47,7 @@ coro::LowererBase::LowererBase(Module &M)
 //
 //    call ptr @llvm.coro.subfn.addr(ptr %Arg, i8 %index)
 
-Value *coro::LowererBase::makeSubFnCall(Value *Arg, int Index,
+CallInst *coro::LowererBase::makeSubFnCall(Value *Arg, int Index,
                                         Instruction *InsertPt) {
   auto *IndexVal = ConstantInt::get(Type::getInt8Ty(Context), Index);
   auto *Fn = Intrinsic::getDeclaration(&TheModule, Intrinsic::coro_subfn_addr);
diff --git a/llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail.ll b/llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail.ll
deleted file mode 100644
index a7321833d74843..00000000000000
--- a/llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail.ll
+++ /dev/null
@@ -1,63 +0,0 @@
-; Tests that instrumentation doesn't interfere with lowering (coro-split).
-; It should convert coro.resume followed by a suspend to a musttail call.
-
-; RUN: opt < %s -passes='pgo-instr-gen,cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
-
-define void @f() #0 {
-entry:
-  %id = call token @llvm.coro.id(i32 0, ptr null, ptr null, ptr null)
-  %alloc = call ptr @malloc(i64 16) #3
-  %vFrame = call noalias nonnull ptr @llvm.coro.begin(token %id, ptr %alloc)
-
-  %save = call token @llvm.coro.save(ptr null)
-  %addr1 = call ptr @llvm.coro.subfn.addr(ptr null, i8 0)
-  call fastcc void %addr1(ptr null)
-
-  %suspend = call i8 @llvm.coro.suspend(token %save, i1 false)
-  switch i8 %suspend, label %exit [
-    i8 0, label %await.ready
-    i8 1, label %exit
-  ]
-await.ready:
-  %save2 = call token @llvm.coro.save(ptr null)
-  %addr2 = call ptr @llvm.coro.subfn.addr(ptr null, i8 0)
-  call fastcc void %addr2(ptr null)
-
-  %suspend2 = call i8 @llvm.coro.suspend(token %save2, i1 false)
-  switch i8 %suspend2, label %exit [
-    i8 0, label %exit
-    i8 1, label %exit
-  ]
-exit:
-  call i1 @llvm.coro.end(ptr null, i1 false, token none)
-  ret void
-}
-
-; Verify that in the initial function resume is not marked with musttail.
-; CHECK-LABEL: @f(
-; CHECK: %[[addr1:.+]] = call ptr @llvm.coro.subfn.addr(ptr null, i8 0)
-; CHECK-NOT: musttail call fastcc void %[[addr1]](ptr null)
-
-; Verify that in the resume part resume call is marked with musttail.
-; CHECK-LABEL: @f.resume(
-; CHECK: %[[addr2:.+]] = call ptr @llvm.coro.subfn.addr(ptr null, i8 0)
-; CHECK: call void @llvm.instrprof
-; CHECK-NEXT: musttail call fastcc void %[[addr2]](ptr null)
-; CHECK-NEXT: ret void
-
-declare token @llvm.coro.id(i32, ptr readnone, ptr nocapture readonly, ptr) #1
-declare i1 @llvm.coro.alloc(token) #2
-declare i64 @llvm.coro.size.i64() #3
-declare ptr @llvm.coro.begin(token, ptr writeonly) #2
-declare token @llvm.coro.save(ptr) #2
-declare ptr @llvm.coro.frame() #3
-declare i8 @llvm.coro.suspend(token, i1) #2
-declare ptr @llvm.coro.free(token, ptr nocapture readonly) #1
-declare i1 @llvm.coro.end(ptr, i1, token) #2
-declare ptr @llvm.coro.subfn.addr(ptr nocapture readonly, i8) #1
-declare ptr @malloc(i64)
-
-attributes #0 = { presplitcoroutine }
-attributes #1 = { argmemonly nounwind readonly }
-attributes #2 = { nounwind }
-attributes #3 = { nounwind readnone }
diff --git a/llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail1.ll b/llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail1.ll
deleted file mode 100644
index 6098dee9a58035..00000000000000
--- a/llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail1.ll
+++ /dev/null
@@ -1,97 +0,0 @@
-; Tests that instrumentation doesn't interfere with lowering (coro-split).
-; It should convert coro.resume followed by a suspend to a musttail call.
-
-; RUN: opt < %s -passes='pgo-instr-gen,cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
-
-define void @f() #0 {
-entry:
-  %id = call token @llvm.coro.id(i32 0, ptr null, ptr null, ptr null)
-  %alloc = call ptr @malloc(i64 16) #3
-  %vFrame = call noalias nonnull ptr @llvm.coro.begin(token %id, ptr %alloc)
-
-  %save = call token @llvm.coro.save(ptr null)
-  %addr1 = call ptr @llvm.coro.subfn.addr(ptr null, i8 0)
-  call fastcc void %addr1(ptr null)
-
-  %suspend = call i8 @llvm.coro.suspend(token %save, i1 false)
-  switch i8 %suspend, label %exit [
-    i8 0, label %await.suspend
-    i8 1, label %exit
-  ]
-await.suspend:
-  %save2 = call token @llvm.coro.save(ptr null)
-  %br0 = call i8 @switch_result()
-  switch i8 %br0, label %unreach [
-    i8 0, label %await.resume3
-    i8 1, label %await.resume1
-    i8 2, label %await.resume2
-  ]
-await.resume1:
-  %hdl = call ptr @g()
-  %addr2 = call ptr @llvm.coro.subfn.addr(ptr %hdl, i8 0)
-  call fastcc void %addr2(ptr %hdl)
-  br label %final.suspend
-await.resume2:
-  %hdl2 = call ptr @h()
-  %addr3 = call ptr @llvm.coro.subfn.addr(ptr %hdl2, i8 0)
-  call fastcc void %addr3(ptr %hdl2)
-  br label %final.suspend
-await.resume3:
-  %addr4 = call ptr @llvm.coro.subfn.addr(ptr null, i8 0)
-  call fastcc void %addr4(ptr null)
-  br label %final.suspend
-final.suspend:
-  %suspend2 = call i8 @llvm.coro.suspend(token %save2, i1 false)
-  switch i8 %suspend2, label %exit [
-    i8 0, label %pre.exit
-    i8 1, label %exit
-  ]
-pre.exit:
-  br label %exit
-exit:
-  call i1 @llvm.coro.end(ptr null, i1 false, token none)
-  ret void
-unreach:
-  unreachable
-}
-
-; Verify that in the initial function resume is not marked with musttail.
-; CHECK-LABEL: @f(
-; CHECK: %[[addr1:.+]] = call ptr @llvm.coro.subfn.addr(ptr null, i8 0)
-; CHECK-NOT: musttail call fastcc void %[[addr1]](ptr null)
-
-; Verify that in the resume part resume call is marked with musttail.
-; CHECK-LABEL: @f.resume(
-; CHECK: %[[hdl:.+]] = call ptr @g()
-; CHECK-NEXT: %[[addr2:.+]] = call ptr @llvm.coro.subfn.addr(ptr %[[hdl]], i8 0)
-; CHECK: musttail call fastcc void %[[addr2]](ptr %[[hdl]])
-; CHECK-NEXT: ret void
-; CHECK: %[[hdl2:.+]] = call ptr @h()
-; CHECK-NEXT: %[[addr3:.+]] = call ptr @llvm.coro.subfn.addr(ptr %[[hdl2]], i8 0)
-; CHECK: musttail call fastcc void %[[addr3]](ptr %[[hdl2]])
-; CHECK-NEXT: ret void
-; CHECK: %[[addr4:.+]] = call ptr @llvm.coro.subfn.addr(ptr null, i8 0)
-; CHECK: musttail call fastcc void %[[addr4]](ptr null)
-; CHECK-NEXT: ret void
-
-
-
-declare token @llvm.coro.id(i32, ptr readnone, ptr nocapture readonly, ptr) #1
-declare i1 @llvm.coro.alloc(token) #2
-declare i64 @llvm.coro.size.i64() #3
-declare ptr @llvm.coro.begin(token, ptr writeonly) #2
-declare token @llvm.coro.save(ptr) #2
-declare ptr @llvm.coro.frame() #3
-declare i8 @llvm.coro.suspend(token, i1) #2
-declare ptr @llvm.coro.free(token, ptr nocapture readonly) #1
-declare i1 @llvm.coro.end(ptr, i1, token) #2
-declare ptr @llvm.coro.subfn.addr(ptr nocapture readonly, i8) #1
-declare ptr @malloc(i64)
-declare i8 @switch_result()
-declare ptr @g()
-declare ptr @h()
-
-attributes #0 = { presplitcoroutine }
-attributes #1 = { argmemonly nounwind readonly }
-attributes #2 = { nounwind }
-attributes #3 = { nounwind readnone }
diff --git a/llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail10.ll b/llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail10.ll
deleted file mode 100644
index f43b10ebf42e5a..00000000000000
--- a/llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail10.ll
+++ /dev/null
@@ -1,55 +0,0 @@
-; Tests that instrumentation doesn't interfere with lowering (coro-split).
-; It should convert coro.resume followed by a suspend to a musttail call.
-
-; RUN: opt < %s -passes='pgo-instr-gen,cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
-
-target triple = "wasm64-unknown-unknown"
-
-define void @f() #0 {
-entry:
-  %id = call token @llvm.coro.id(i32 0, ptr null, ptr null, ptr null)
-  %alloc = call ptr @malloc(i64 16) #3
-  %vFrame = call noalias nonnull ptr @llvm.coro.begin(token %id, ptr %alloc)
-
-  %save = call token @llvm.coro.save(ptr null)
-  %addr1 = call ptr @llvm.coro.subfn.addr(ptr null, i8 0)
-  call fastcc void %addr1(ptr null)
-
-  %suspend = call i8 @llvm.coro.suspend(token %save, i1 false)
-  switch i8 %suspend, label %exit [
-    i8 0, label %await.ready
-    i8 1, label %exit
-  ]
-await.ready:
-  %save2 = call token @llvm.coro.save(ptr null)
-  %addr2 = call ptr @llvm.coro.subfn.addr(ptr null, i8 0)
-  call fastcc void %addr2(ptr null)
-
-  %suspend2 = call i8 @llvm.coro.suspend(token %save2, i1 false)
-  switch i8 %suspend2, label %exit [
-    i8 0, label %exit
-    i8 1, label %exit
-  ]
-exit:
-  call i1 @llvm.coro.end(ptr null, i1 false, token none)
-  ret void
-}
-
-; CHECK: musttail call
-
-declare token @llvm.coro.id(i32, ptr readnone, ptr nocapture readonly, ptr) #1
-declare i1 @llvm.coro.alloc(token) #2
-declare i64 @llvm.coro.size.i64() #3
-declare ptr @llvm.coro.begin(token, ptr writeonly) #2
-declare token @llvm.coro.save(ptr) #2
-declare ptr @llvm.coro.frame() #3
-declare i8 @llvm.coro.suspend(token, i1) #2
-declare ptr @llvm.coro.free(token, ptr nocapture readonly) #1
-declare i1 @llvm.coro.end(ptr, i1, token) #2
-declare ptr @llvm.coro.subfn.addr(ptr nocapture readonly, i8) #1
-declare ptr @malloc(i64)
-
-attributes #0 = { presplitcoroutine "target-features"="+tail-call" }
-attributes #1 = { argmemonly nounwind readonly }
-attributes #2 = { nounwind }
-attributes #3 = { nounwind readnone }
diff --git a/llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail11.ll b/llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail11.ll
deleted file mode 100644
index fc5bb9a1b20b3d..00000000000000
--- a/llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail11.ll
+++ /dev/null
@@ -1,55 +0,0 @@
-; Tests that instrumentation doesn't interfere with lowering (coro-split).
-; It should convert coro.resume followed by a suspend to a musttail call.
-
-; RUN: opt < %s -passes='pgo-instr-gen,cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
-
-target triple = "wasm32-unknown-unknown"
-
-define void @f() #0 {
-entry:
-  %id = call token @llvm.coro.id(i32 0, ptr null, ptr null, ptr null)
-  %alloc = call ptr @malloc(i64 16) #3
-  %vFrame = call noalias nonnull ptr @llvm.coro.begin(token %id, ptr %alloc)
-
-  %save = call token @llvm.coro.save(ptr null)
-  %addr1 = call ptr @llvm.coro.subfn.addr(ptr null, i8 0)
-  call fastcc void %addr1(ptr null)
-
-  %suspend = call i8 @llvm.coro.suspend(token %save, i1 false)
-  switch i8 %suspend, label %exit [
-    i8 0, label %await.ready
-    i8 1, label %exit
-  ]
-await.ready:
-  %save2 = call token @llvm.coro.save(ptr null)
-  %addr2 = call ptr @llvm.coro.subfn.addr(ptr null, i8 0)
-  call fastcc void %addr2(ptr null)
-
-  %suspend2 = call i8 @llvm.coro.suspend(token %save2, i1 false)
-  switch i8 %suspend2, label %exit [
-    i8 0, label %exit
-    i8 1, label %exit
-  ]
-exit:
-  call i1 @llvm.coro.end(ptr null, i1 false, token none)
-  ret void
-}
-
-; CHECK: musttail call
-
-declare token @llvm.coro.id(i32, ptr readnone, ptr nocapture readonly, ptr) #1
-declare i1 @llvm.coro.alloc(token) #2
-declare i64 @llvm.coro.size.i64() #3
-declare ptr @llvm.coro.begin(token, ptr writeonly) #2
-declare token @llvm.coro.save(ptr) #2
-declare ptr @llvm.coro.frame() #3
-declare i8 @llvm.coro.suspend(token, i1) #2
-declare ptr @llvm.coro.free(token, ptr nocapture readonly) #1
-declare i1 @llvm.coro.end(ptr, i1, token) #2
-declare ptr @llvm.coro.subfn.addr(ptr nocapture readonly, i8) #1
-declare ptr @malloc(i64)
-
-attributes #0 = { presplitcoroutine "target-features"="+tail-call" }
-attributes #1 = { argmemonly nounwind readonly }
-attributes #2 = { nounwind }
-attributes #3 = { nounwind readnone }
diff --git a/llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail12.ll b/llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail12.ll
deleted file mode 100644
index 634d0106a2e6ae..00000000000000
--- a/llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail12.ll
+++ /dev/null
@@ -1,85 +0,0 @@
-; Tests that instrumentation doesn't interfere with lowering (coro-split).
-; It should convert coro.resume followed by a suspend to a musttail call.
-
-; RUN: opt < %s -passes='pgo-instr-gen,cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
-
-declare void @fakeresume1(ptr)
-declare void @print()
-
-define void @f(i1 %cond) #0 {
-entry:
-  %id = call token @llvm.coro.id(i32 0, ptr null, ptr null, ptr null)
-  %alloc = call ptr @malloc(i64 16) #3
-  %vFrame = call noalias nonnull ptr @llvm.coro.begin(token %id, ptr %alloc)
-
-  %save = call token @llvm.coro.save(ptr null)
-
-  %init_suspend = call i8 @llvm.coro.suspend(token %save, i1 false)
-  switch i8 %init_suspend, label %coro.end [
-    i8 0, label %await.ready
-    i8 1, label %coro.end
-  ]
-await.ready:
-  %save2 = call token @llvm.coro.save(ptr null)
-  br i1 %cond, label %then, label %else
-
-then:
-  call fastcc void @fakeresume1(ptr align 8 null)
-  br label %merge
-
-else:
-  br label %merge
-
-merge:
-  %v0 = phi i1 [0, %then], [1, %else]
-  br label %compare
-
-compare:
-  %cond.cmp = icmp eq i1 %v0, 0
-  br i1 %cond.cmp, label %ready, label %prepare
-
-prepare:
-  call void @print()
-  br label %ready
-
-ready:
-  %suspend = call i8 @llvm.coro.suspend(token %save2, i1 true)
-  %switch = icmp ult i8 %suspend, 2
-  br i1 %switch, label %cleanup, label %coro.end
-
-cleanup:
-  %free.handle = call ptr @llvm.coro.free(token %id, ptr %vFrame)
-  %.not = icmp eq ptr %free.handle, null
-  br i1 %.not, label %coro.end, label %coro.free
-
-coro.free:
-  call void @delete(ptr nonnull %free.handle) #2
-  br label %coro.end
-
-coro.end:
-  call i1 @llvm.coro.end(ptr null, i1 false, token none)
-  ret void
-}
-
-; CHECK-LABEL: @f.resume(
-; CHECK-NOT:      }
-; CHECK:          call void @print()
-
-
-declare token @llvm.coro.id(i32, ptr readnone, ptr nocapture readonly, ptr) #1
-declare i1 @llvm.coro.alloc(token) #2
-declare i64 @llvm.coro.size.i64() #3
-declare ptr @llvm.coro.begin(token, ptr writeonly) #2
-declare token @llvm.coro.save(ptr) #2
-declare ptr @llvm.coro.frame() #3
-declare i8 @llvm.coro.suspend(token, i1) #2
-declare ptr @llvm.coro.free(token, ptr nocapture readonly) #1
-declare i1 @llvm.coro.end(ptr, i1, token) #2
-declare ptr @llvm.coro.subfn.addr(ptr nocapture readonly, i8) #1
-declare ptr @malloc(i64)
-declare void @delete(ptr nonnull) #2
-
-attributes #0 = { presplitcoroutine }
-attributes #1 = { argmemonly nounwind readonly }
-attributes #2 = { nounwind }
-attributes #3 = { nounwind readnone }
diff --git a/llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail13.ll b/llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail13.ll
deleted file mode 100644
index 2f9a14c9010719..00000000000000
--- a/llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail13.ll
+++ /dev/null
@@ -1,76 +0,0 @@
-; Tests that instrumentation doesn't interfere with lowering (coro-split).
-; It should convert coro.resume followed by a suspend to a musttail call.
-
-; RUN: opt < %s -passes='pgo-instr-gen,cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
-
-declare void @fakeresume1(ptr)
-declare void @may_throw(ptr)
-declare void @print()
-
-define void @f(i1 %cond) #0 personality i32 3 {
-entry:
-  %id = call token @llvm.coro.id(i32 0, ptr null, ptr null, ptr null)
-  %alloc = call ptr @malloc(i64 16) #3
-  %vFrame = call noalias nonnull ptr @llvm.coro.begin(token %id, ptr %alloc)
-
-  %save = call token @llvm.coro.save(ptr null)
-
-  %init_suspend = call i8 @llvm.coro.suspend(token %save, i1 false)
-  switch i8 %init_suspend, label %coro.end [
-    i8 0, label %await.ready
-    i8 1, label %coro.end
-  ]
-await.ready:
-  call fastcc void @fakeresume1(ptr align 8 null)
-  invoke void @may_throw(ptr null)
-    to label %ready unwind label %lpad
-
-ready:
-  %save2 = call token @llvm.coro.save(ptr null)
-  %suspend = call i8 @llvm.coro.suspend(token %save2, i1 true)
-  %switch = icmp ult i8 %suspend, 2
-  br i1 %switch, label %cleanup, label %coro.end
-
-cleanup:
-  %free.handle = call ptr @llvm.coro.free(token %id, ptr %vFrame)
-  %.not = icmp eq ptr %free.handle, null
-  br i1 %.not, label %coro.end, label %coro.free
-
-lpad:
-  %lpval = landingpad { ptr, i32 }
-     cleanup
-
-  %need.resume = call i1 @llvm.coro.end(ptr null, i1 true, token none)
-  resume { ptr, i32 } %lpval
-
-coro.free:
-  call void @delete(ptr nonnull %free.handle) #2
-  br label %coro.end
-
-coro.end:
-  call i1 @llvm.coro.end(ptr null, i1 false, token none)
-  ret void
-}
-
-; CHECK-LABEL: @f.resume(
-; CHECK-NOT:          musttail call fastcc void @fakeresume1(
-; CHECK:     }
-
-
-declare token @llvm.coro.id(i32, ptr readnone, ptr nocapture readonly, ptr) #1
-declare i1 @llvm.coro.alloc(token) #2
-declare i64 @llvm.coro.size.i64() #3
-declare ptr @llvm.coro.begin(token, ptr writeonly) #2
-declare token @llvm.coro.save(ptr) #2
-declare ptr @llvm.coro.frame() #3
-declare i8 @llvm.coro.suspend(token, i1) #2
-declare ptr @llvm.coro.free(token, ptr nocapture readonly) #1
-declare i1 @llvm.coro.end(ptr, i1, token) #2
-declare ptr @llvm.coro.subfn.addr(ptr nocapture readonly, i8) #1
-declare ptr @malloc(i64)
-declare void @delete(ptr nonnull) #2
-
-attributes #0 = { presplitcoroutine }
-attributes #1 = { argmemonly nounwind readonly }
-attributes #2 = { nounwind }
-attributes #3 = { nounwind readnone }
diff --git a/llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail2.ll b/llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail2.ll
deleted file mode 100644
index 61b61a200e704d..00000000000000
--- a/llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail2.ll
+++ /dev/null
@@ -1,68 +0,0 @@
-; Tests that instrumentation doesn't interfere with lowering (coro-split).
-; It should convert coro.resume followed by a suspend to a musttail call.
-
-; RUN: opt < %s -passes='pgo-instr-gen,cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
-
-define void @fakeresume1(ptr)  {
-entry:
-  ret void;
-}
-
-define void @fakeresume2(ptr align 8)  {
-entry:
-  ret void;
-}
-
-define void @g() #0 {
-entry:
-  %id = call token @llvm.coro.id(i32 0, ptr null, ptr null, ptr null)
-  %alloc = call ptr @malloc(i64 16) #3
-  %vFrame = call noalias nonnull ptr @llvm.coro.begin(token %id, ptr %alloc)
-
-  %save = call token @llvm.coro.save(ptr null)
-  call fastcc void @fakeresume1(ptr null)
-
-  %suspend = call i8 @llvm.coro.suspend(token %save, i1 false)
-  switch i8 %suspend, label %exit [
-    i8 0, label %await.ready
-    i8 1, label %exit
-  ]
-await.ready:
-  %save2 = call token @llvm.coro.save(ptr null)
-  call fastcc void @fakeresume2(ptr align 8 null)
-
-  %suspend2 = call i8 @llvm.coro.suspend(token %save2, i1 false)
-  switch i8 %suspend2, label %exit [
-    i8 0, label %exit
-    i8 1, label %exit
-  ]
-exit:
-  call i1 @llvm.coro.end(ptr null, i1 false, token none)
-  ret void
-}
-
-; Verify that in the initial function resume is not marked with musttail.
-; CHECK-LABEL: @g(
-; CHECK-NOT: musttail call fastcc void @fakeresume1(ptr null)
-
-; Verify that in the resume part resume call is marked with musttail.
-; CHECK-LABEL: @g.resume(
-; CHECK: musttail call fastcc void @fakeresume2(ptr align 8 null)
-; CHECK-NEXT: ret void
-
-declare token @llvm.coro.id(i32, ptr readnone, ptr nocapture readonly, ptr) #1
-declare i1 @llvm.coro.alloc(token) #2
-declare i64 @llvm.coro.size.i64() #3
-declare ptr @llvm.coro.begin(token, ptr writeonly) #2
-declare token @llvm.coro.save(ptr) #2
-declare ptr @llvm.coro.frame() #3
-declare i8 @llvm.coro.suspend(token, i1) #2
-declare ptr @llvm.coro.free(token, ptr nocapture readonly) #1
-declare i1 @llvm.coro.end(ptr, i1, token) #2
-declare ptr @llvm.coro.subfn.addr(ptr nocapture readonly, i8) #1
-declare ptr @malloc(i64)
-
-attributes #0 = { presplitcoroutine }
-attributes #1 = { argmemonly nounwind readonly }
-attributes #2 = { nounwind }
-attributes #3 = { nounwind readnone }
diff --git a/llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail3.ll b/llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail3.ll
deleted file mode 100644
index 82176b8085e6c7..00000000000000
--- a/llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail3.ll
+++ /dev/null
@@ -1,91 +0,0 @@
-; Tests that instrumentation doesn't interfere with lowering (coro-split).
-; It should convert coro.resume followed by a suspend to a musttail call.
-
-; RUN: opt < %s -passes='pgo-instr-gen,cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
-
-define void @f() #0 {
-entry:
-  %id = call token @llvm.coro.id(i32 0, ptr null, ptr null, ptr null)
-  %alloc = call ptr @malloc(i64 16) #3
-  %vFrame = call noalias nonnull ptr @llvm.coro.begin(token %id, ptr %alloc)
-
-  %save = call token @llvm.coro.save(ptr null)
-  %addr1 = call ptr @llvm.coro.subfn.addr(ptr null, i8 0)
-  call fastcc void %addr1(ptr null)
-
-  %suspend = call i8 @llvm.coro.suspend(token %save, i1 false)
-  %cmp = icmp eq i8 %suspend, 0
-  br i1 %cmp, label %await.suspend, label %exit
-await.suspend:
-  %save2 = call token @llvm.coro.save(ptr null)
-  %br0 = call i8 @switch_result()
-  switch i8 %br0, label %unreach [
-    i8 0, label %await.resume3
-    i8 1, label %await.resume1
-    i8 2, label %await.resume2
-  ]
-await.resume1:
-  %hdl = call ptr @g()
-  %addr2 = call ptr @llvm.coro.subfn.addr(ptr %hdl, i8 0)
-  call fastcc void %addr2(ptr %hdl)
-  br label %final.suspend
-await.resume2:
-  %hdl2 = call ptr @h()
-  %addr3 = call ptr @llvm.coro.subfn.addr(ptr %hdl2, i8 0)
-  call fastcc void %addr3(ptr %hdl2)
-  br label %final.suspend
-await.resume3:
-  %addr4 = call ptr @llvm.coro.subfn.addr(ptr null, i8 0)
-  call fastcc void %addr4(ptr null)
-  br label %final.suspend
-final.suspend:
-  %suspend2 = call i8 @llvm.coro.suspend(token %save2, i1 false)
-  %cmp2 = icmp eq i8 %suspend2, 0
-  br i1 %cmp2, label %pre.exit, label %exit
-pre.exit:
-  br label %exit
-exit:
-  call i1 @llvm.coro.end(ptr null, i1 false, token none)
-  ret void
-unreach:
-  unreachable
-}
-
-; Verify that in the initial function resume is not marked with musttail.
-; CHECK-LABEL: @f(
-; CHECK: %[[addr1:.+]] = call ptr @llvm.coro.subfn.addr(ptr null, i8 0)
-; CHECK-NOT: musttail call fastcc void %[[addr1]](ptr null)
-
-; Verify that in the resume part resume call is marked with musttail.
-; CHECK-LABEL: @f.resume(
-; CHECK: %[[hdl:.+]] = call ptr @g()
-; CHECK-NEXT: %[[addr2:.+]] = call ptr @llvm.coro.subfn.addr(ptr %[[hdl]], i8 0)
-; CHECK: musttail call fastcc void %[[addr2]](ptr %[[hdl]])
-; CHECK-NEXT: ret void
-; CHECK: %[[hdl2:.+]] = call ptr @h()
-; CHECK-NEXT: %[[addr3:.+]] = call ptr @llvm.coro.subfn.addr(ptr %[[hdl2]], i8 0)
-; CHECK: musttail call fastcc void %[[addr3]](ptr %[[hdl2]])
-; CHECK-NEXT: ret void
-; CHECK: %[[addr4:.+]] = call ptr @llvm.coro.subfn.addr(ptr null, i8 0)
-; CHECK: musttail call fastcc void %[[addr4]](ptr null)
-; CHECK-NEXT: ret void
-
-declare token @llvm.coro.id(i32, ptr readnone, ptr nocapture readonly, ptr) #1
-declare i1 @llvm.coro.alloc(token) #2
-declare i64 @llvm.coro.size.i64() #3
-declare ptr @llvm.coro.begin(token, ptr writeonly) #2
-declare token @llvm.coro.save(ptr) #2
-declare ptr @llvm.coro.frame() #3
-declare i8 @llvm.coro.suspend(token, i1) #2
-declare ptr @llvm.coro.free(token, ptr nocapture readonly) #1
-declare i1 @llvm.coro.end(ptr, i1, token) #2
-declare ptr @llvm.coro.subfn.addr(ptr nocapture readonly, i8) #1
-declare ptr @malloc(i64)
-declare i8 @switch_result()
-declare ptr @g()
-declare ptr @h()
-
-attributes #0 = { presplitcoroutine }
-attributes #1 = { argmemonly nounwind readonly }
-attributes #2 = { nounwind }
-attributes #3 = { nounwind readnone }
diff --git a/llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail4.ll b/llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail4.ll
deleted file mode 100644
index be70fc4b51f1db..00000000000000
--- a/llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail4.ll
+++ /dev/null
@@ -1,66 +0,0 @@
-; Tests that instrumentation doesn't interfere with lowering (coro-split).
-; It should convert coro.resume followed by a suspend to a musttail call.
-
-; RUN: opt < %s -passes='pgo-instr-gen,cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
-
-define void @fakeresume1(ptr)  {
-entry:
-  ret void;
-}
-
-define void @f() #0 {
-entry:
-  %id = call token @llvm.coro.id(i32 0, ptr null, ptr null, ptr null)
-  %alloc = call ptr @malloc(i64 16) #3
-  %vFrame = call noalias nonnull ptr @llvm.coro.begin(token %id, ptr %alloc)
-
-  %save = call token @llvm.coro.save(ptr null)
-
-  %init_suspend = call i8 @llvm.coro.suspend(token %save, i1 false)
-  switch i8 %init_suspend, label %coro.end [
-    i8 0, label %await.ready
-    i8 1, label %coro.end
-  ]
-await.ready:
-  %save2 = call token @llvm.coro.save(ptr null)
-
-  call fastcc void @fakeresume1(ptr align 8 null)
-  %suspend = call i8 @llvm.coro.suspend(token %save2, i1 true)
-  %switch = icmp ult i8 %suspend, 2
-  br i1 %switch, label %cleanup, label %coro.end
-
-cleanup:
-  %free.handle = call ptr @llvm.coro.free(token %id, ptr %vFrame)
-  %.not = icmp eq ptr %free.handle, null
-  br i1 %.not, label %coro.end, label %coro.free
-
-coro.free:
-  call void @delete(ptr nonnull %free.handle) #2
-  br label %coro.end
-
-coro.end:
-  call i1 @llvm.coro.end(ptr null, i1 false, token none)
-  ret void
-}
-
-; CHECK-LABEL: @f.resume(
-; CHECK:          musttail call fastcc void @fakeresume1(
-; CHECK-NEXT:     ret void
-
-declare token @llvm.coro.id(i32, ptr readnone, ptr nocapture readonly, ptr) #1
-declare i1 @llvm.coro.alloc(token) #2
-declare i64 @llvm.coro.size.i64() #3
-declare ptr @llvm.coro.begin(token, ptr writeonly) #2
-declare token @llvm.coro.save(ptr) #2
-declare ptr @llvm.coro.frame() #3
-declare i8 @llvm.coro.suspend(token, i1) #2
-declare ptr @llvm.coro.free(token, ptr nocapture readonly) #1
-declare i1 @llvm.coro.end(ptr, i1, token) #2
-declare ptr @llvm.coro.subfn.addr(ptr nocapture readonly, i8) #1
-declare ptr @malloc(i64)
-declare void @delete(ptr nonnull) #2
-
-attributes #0 = { presplitcoroutine }
-attributes #1 = { argmemonly nounwind readonly }
-attributes #2 = { nounwind }
-attributes #3 = { nounwind readnone }
diff --git a/llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail5.ll b/llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail5.ll
deleted file mode 100644
index 3e5bddd8e13112..00000000000000
--- a/llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail5.ll
+++ /dev/null
@@ -1,63 +0,0 @@
-; Tests that instrumentation doesn't interfere with lowering (coro-split).
-; It should convert coro.resume followed by a suspend to a musttail call.
-
-; RUN: opt < %s -passes='pgo-instr-gen,cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
-
-declare void @fakeresume1(ptr align 8)
-
-define void @g() #0 {
-entry:
-  %id = call token @llvm.coro.id(i32 0, ptr null, ptr null, ptr null)
-  %alloc = call ptr @malloc(i64 16) #3
-  %alloc.var = alloca i8
-  call void @llvm.lifetime.start.p0(i64 1, ptr %alloc.var)
-  %vFrame = call noalias nonnull ptr @llvm.coro.begin(token %id, ptr %alloc)
-
-  %save = call token @llvm.coro.save(ptr null)
-  %suspend = call i8 @llvm.coro.suspend(token %save, i1 false)
-
-  switch i8 %suspend, label %exit [
-    i8 0, label %await.suspend
-    i8 1, label %exit
-  ]
-await.suspend:
-  %save2 = call token @llvm.coro.save(ptr null)
-  call fastcc void @fakeresume1(ptr align 8 null)
-  %suspend2 = call i8 @llvm.coro.suspend(token %save2, i1 false)
-  switch i8 %suspend2, label %exit [
-    i8 0, label %await.ready
-    i8 1, label %exit
-  ]
-await.ready:
-  call void @consume(ptr %alloc.var)
-  call void @llvm.lifetime.end.p0(i64 1, ptr %alloc.var)
-  br label %exit
-exit:
-  call i1 @llvm.coro.end(ptr null, i1 false, token none)
-  ret void
-}
-
-; Verify that in the resume part resume call is marked with musttail.
-; CHECK-LABEL: @g.resume(
-; CHECK:          musttail call fastcc void @fakeresume1(ptr align 8 null)
-; CHECK-NEXT:     ret void
-
-declare token @llvm.coro.id(i32, ptr readnone, ptr nocapture readonly, ptr) #1
-declare i1 @llvm.coro.alloc(token) #2
-declare i64 @llvm.coro.size.i64() #3
-declare ptr @llvm.coro.begin(token, ptr writeonly) #2
-declare token @llvm.coro.save(ptr) #2
-declare ptr @llvm.coro.frame() #3
-declare i8 @llvm.coro.suspend(token, i1) #2
-declare ptr @llvm.coro.free(token, ptr nocapture readonly) #1
-declare i1 @llvm.coro.end(ptr, i1, token) #2
-declare ptr @llvm.coro.subfn.addr(ptr nocapture readonly, i8) #1
-declare ptr @malloc(i64)
-declare void @consume(ptr)
-declare void @llvm.lifetime.start.p0(i64, ptr nocapture)
-declare void @llvm.lifetime.end.p0(i64, ptr nocapture)
-
-attributes #0 = { presplitcoroutine }
-attributes #1 = { argmemonly nounwind readonly }
-attributes #2 = { nounwind }
-attributes #3 = { nounwind readnone }
diff --git a/llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail6.ll b/llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail6.ll
deleted file mode 100644
index 4359d5305d4d91..00000000000000
--- a/llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail6.ll
+++ /dev/null
@@ -1,112 +0,0 @@
-; Tests that instrumentation doesn't interfere with lowering (coro-split).
-; It should convert coro.resume followed by a suspend to a musttail call.
-
-; RUN: opt < %s -passes='pgo-instr-gen,cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
-
-declare void @fakeresume1(ptr align 8)
-
-define void @g() #0 {
-entry:
-  %id = call token @llvm.coro.id(i32 0, ptr null, ptr null, ptr null)
-  %alloc = call ptr @malloc(i64 16) #3
-  %alloc.var = alloca i64
-  call void @llvm.lifetime.start.p0(i64 1, ptr %alloc.var)
-  %vFrame = call noalias nonnull ptr @llvm.coro.begin(token %id, ptr %alloc)
-
-  %save = call token @llvm.coro.save(ptr null)
-  %suspend = call i8 @llvm.coro.suspend(token %save, i1 false)
-
-  switch i8 %suspend, label %exit [
-    i8 0, label %await.suspend
-    i8 1, label %exit
-  ]
-await.suspend:
-  %save2 = call token @llvm.coro.save(ptr null)
-  call fastcc void @fakeresume1(ptr align 8 null)
-  %suspend2 = call i8 @llvm.coro.suspend(token %save2, i1 false)
-  switch i8 %suspend2, label %exit [
-    i8 0, label %await.ready
-    i8 1, label %exit
-  ]
-await.ready:
-  call void @consume(ptr %alloc.var)
-  call void @llvm.lifetime.end.p0(i64 1, ptr %alloc.var)
-  br label %exit
-exit:
-  call i1 @llvm.coro.end(ptr null, i1 false, token none)
-  ret void
-}
-
-; Verify that in the resume part resume call is marked with musttail.
-; CHECK-LABEL: @g.resume(
-; CHECK:      musttail call fastcc void @fakeresume1(ptr align 8 null)
-; CHECK-NEXT: ret void
-
-; It has a cleanup bb.
-define void @f() #0 {
-entry:
-  %id = call token @llvm.coro.id(i32 0, ptr null, ptr null, ptr null)
-  %alloc = call ptr @malloc(i64 16) #3
-  %alloc.var = alloca i64
-  call void @llvm.lifetime.start.p0(i64 1, ptr %alloc.var)
-  %vFrame = call noalias nonnull ptr @llvm.coro.begin(token %id, ptr %alloc)
-
-  %save = call token @llvm.coro.save(ptr null)
-  %suspend = call i8 @llvm.coro.suspend(token %save, i1 false)
-
-  switch i8 %suspend, label %exit [
-    i8 0, label %await.suspend
-    i8 1, label %exit
-  ]
-await.suspend:
-  %save2 = call token @llvm.coro.save(ptr null)
-  call fastcc void @fakeresume1(ptr align 8 null)
-  %suspend2 = call i8 @llvm.coro.suspend(token %save2, i1 false)
-  switch i8 %suspend2, label %exit [
-    i8 0, label %await.ready
-    i8 1, label %cleanup
-  ]
-await.ready:
-  call void @consume(ptr %alloc.var)
-  call void @llvm.lifetime.end.p0(i64 1, ptr %alloc.var)
-  br label %exit
-
-cleanup:
-  %free.handle = call ptr @llvm.coro.free(token %id, ptr %vFrame)
-  %.not = icmp eq ptr %free.handle, null
-  br i1 %.not, label %exit, label %coro.free
-
-coro.free:
-  call void @delete(ptr nonnull %free.handle) #2
-  br label %exit
-
-exit:
-  call i1 @llvm.coro.end(ptr null, i1 false, token none)
-  ret void
-}
-
-; Verify that in the resume part resume call is marked with musttail.
-; CHECK-LABEL: @f.resume(
-; CHECK:      musttail call fastcc void @fakeresume1(ptr align 8 null)
-; CHECK-NEXT: ret void
-
-declare token @llvm.coro.id(i32, ptr readnone, ptr nocapture readonly, ptr) #1
-declare i1 @llvm.coro.alloc(token) #2
-declare i64 @llvm.coro.size.i64() #3
-declare ptr @llvm.coro.begin(token, ptr writeonly) #2
-declare token @llvm.coro.save(ptr) #2
-declare ptr @llvm.coro.frame() #3
-declare i8 @llvm.coro.suspend(token, i1) #2
-declare ptr @llvm.coro.free(token, ptr nocapture readonly) #1
-declare i1 @llvm.coro.end(ptr, i1, token) #2
-declare ptr @llvm.coro.subfn.addr(ptr nocapture readonly, i8) #1
-declare ptr @malloc(i64)
-declare void @delete(ptr nonnull) #2
-declare void @consume(ptr)
-declare void @llvm.lifetime.start.p0(i64, ptr nocapture)
-declare void @llvm.lifetime.end.p0(i64, ptr nocapture)
-
-attributes #0 = { presplitcoroutine }
-attributes #1 = { argmemonly nounwind readonly }
-attributes #2 = { nounwind }
-attributes #3 = { nounwind readnone }
diff --git a/llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail7.ll b/llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail7.ll
deleted file mode 100644
index 2a14be0f921806..00000000000000
--- a/llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail7.ll
+++ /dev/null
@@ -1,115 +0,0 @@
-; Tests that instrumentation doesn't interfere with lowering (coro-split).
-; It should convert coro.resume followed by a suspend to a musttail call.
-
-; The difference between this and coro-split-musttail5.ll and coro-split-musttail5.ll
-; is that this contains dead instruction generated during the transformation,
-; which makes the optimization harder.
-; RUN: opt < %s -passes='pgo-instr-gen,cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
-
-declare void @fakeresume1(ptr align 8)
-
-define void @g() #0 {
-entry:
-  %id = call token @llvm.coro.id(i32 0, ptr null, ptr null, ptr null)
-  %alloc = call ptr @malloc(i64 16) #3
-  %alloc.var = alloca i64
-  call void @llvm.lifetime.start.p0(i64 1, ptr %alloc.var)
-  %vFrame = call noalias nonnull ptr @llvm.coro.begin(token %id, ptr %alloc)
-
-  %save = call token @llvm.coro.save(ptr null)
-  %suspend = call i8 @llvm.coro.suspend(token %save, i1 false)
-
-  switch i8 %suspend, label %exit [
-    i8 0, label %await.suspend
-    i8 1, label %exit
-  ]
-await.suspend:
-  %save2 = call token @llvm.coro.save(ptr null)
-  call fastcc void @fakeresume1(ptr align 8 null)
-  %suspend2 = call i8 @llvm.coro.suspend(token %save2, i1 false)
-  switch i8 %suspend2, label %exit [
-    i8 0, label %await.ready
-    i8 1, label %exit
-  ]
-await.ready:
-  call void @consume(ptr %alloc.var)
-  call void @llvm.lifetime.end.p0(i64 1, ptr %alloc.var)
-  br label %exit
-exit:
-  call i1 @llvm.coro.end(ptr null, i1 false, token none)
-  ret void
-}
-
-; Verify that in the resume part resume call is marked with musttail.
-; CHECK-LABEL: @g.resume(
-; CHECK:         musttail call fastcc void @fakeresume1(ptr align 8 null)
-; CHECK-NEXT:    ret void
-
-; It has a cleanup bb.
-define void @f() #0 {
-entry:
-  %id = call token @llvm.coro.id(i32 0, ptr null, ptr null, ptr null)
-  %alloc = call ptr @malloc(i64 16) #3
-  %alloc.var = alloca i64
-  call void @llvm.lifetime.start.p0(i64 1, ptr %alloc.var)
-  %vFrame = call noalias nonnull ptr @llvm.coro.begin(token %id, ptr %alloc)
-
-  %save = call token @llvm.coro.save(ptr null)
-  %suspend = call i8 @llvm.coro.suspend(token %save, i1 false)
-
-  switch i8 %suspend, label %exit [
-    i8 0, label %await.suspend
-    i8 1, label %exit
-  ]
-await.suspend:
-  %save2 = call token @llvm.coro.save(ptr null)
-  call fastcc void @fakeresume1(ptr align 8 null)
-  %suspend2 = call i8 @llvm.coro.suspend(token %save2, i1 false)
-  switch i8 %suspend2, label %exit [
-    i8 0, label %await.ready
-    i8 1, label %cleanup
-  ]
-await.ready:
-  call void @consume(ptr %alloc.var)
-  call void @llvm.lifetime.end.p0(i64 1, ptr %alloc.var)
-  br label %exit
-
-cleanup:
-  %free.handle = call ptr @llvm.coro.free(token %id, ptr %vFrame)
-  %.not = icmp eq ptr %free.handle, null
-  br i1 %.not, label %exit, label %coro.free
-
-coro.free:
-  call void @delete(ptr nonnull %free.handle) #2
-  br label %exit
-
-exit:
-  call i1 @llvm.coro.end(ptr null, i1 false, token none)
-  ret void
-}
-
-; Verify that in the resume part resume call is marked with musttail.
-; CHECK-LABEL: @f.resume(
-; CHECK:         musttail call fastcc void @fakeresume1(ptr align 8 null)
-; CHECK-NEXT:    ret void
-
-declare token @llvm.coro.id(i32, ptr readnone, ptr nocapture readonly, ptr) #1
-declare i1 @llvm.coro.alloc(token) #2
-declare i64 @llvm.coro.size.i64() #3
-declare ptr @llvm.coro.begin(token, ptr writeonly) #2
-declare token @llvm.coro.save(ptr) #2
-declare ptr @llvm.coro.frame() #3
-declare i8 @llvm.coro.suspend(token, i1) #2
-declare ptr @llvm.coro.free(token, ptr nocapture readonly) #1
-declare i1 @llvm.coro.end(ptr, i1, token) #2
-declare ptr @llvm.coro.subfn.addr(ptr nocapture readonly, i8) #1
-declare ptr @malloc(i64)
-declare void @delete(ptr nonnull) #2
-declare void @consume(ptr)
-declare void @llvm.lifetime.start.p0(i64, ptr nocapture)
-declare void @llvm.lifetime.end.p0(i64, ptr nocapture)
-
-attributes #0 = { presplitcoroutine }
-attributes #1 = { argmemonly nounwind readonly }
-attributes #2 = { nounwind }
-attributes #3 = { nounwind readnone }
diff --git a/llvm/test/Transforms/Coroutines/coro-await-suspend-lower-invoke.ll b/llvm/test/Transforms/Coroutines/coro-await-suspend-lower-invoke.ll
index fbc4a2c006f84e..fd3b7bd815300c 100644
--- a/llvm/test/Transforms/Coroutines/coro-await-suspend-lower-invoke.ll
+++ b/llvm/test/Transforms/Coroutines/coro-await-suspend-lower-invoke.ll
@@ -58,14 +58,13 @@ suspend.cond:
 ; CHECK-NEXT:       to label %[[STEP2_CONT:[^ ]+]] unwind label %[[PAD]]
 step2:
   %save2 = call token @llvm.coro.save(ptr null)
-  %resume.handle = invoke ptr @llvm.coro.await.suspend.handle(ptr %awaiter, ptr %hdl, ptr @await_suspend_wrapper_handle)
+  invoke void @llvm.coro.await.suspend.handle(ptr %awaiter, ptr %hdl, ptr @await_suspend_wrapper_handle)
     to label %step2.continue unwind label %pad
 
 ; CHECK:      [[STEP2_CONT]]:
 ; CHECK-NEXT:   %[[NEXT_RESUME:.+]] = call ptr @llvm.coro.subfn.addr(ptr %[[NEXT_HDL]], i8 0)
 ; CHECK-NEXT:   musttail call {{.*}} void %[[NEXT_RESUME]](ptr %[[NEXT_HDL]])
 step2.continue:
-  call void @llvm.coro.resume(ptr %resume.handle)
   %suspend2 = call i8 @llvm.coro.suspend(token %save2, i1 false)
   switch i8 %suspend2, label %ret [
     i8 0, label %step3
@@ -112,7 +111,7 @@ declare i1 @llvm.coro.alloc(token)
 declare ptr @llvm.coro.begin(token, ptr)
 declare void @llvm.coro.await.suspend.void(ptr, ptr, ptr)
 declare i1 @llvm.coro.await.suspend.bool(ptr, ptr, ptr)
-declare ptr @llvm.coro.await.suspend.handle(ptr, ptr, ptr)
+declare void @llvm.coro.await.suspend.handle(ptr, ptr, ptr)
 declare i1 @llvm.coro.end(ptr, i1, token)
 
 declare ptr @__cxa_begin_catch(ptr)
diff --git a/llvm/test/Transforms/Coroutines/coro-await-suspend-lower.ll b/llvm/test/Transforms/Coroutines/coro-await-suspend-lower.ll
index 0f574c4acc26e7..8d019e6954628b 100644
--- a/llvm/test/Transforms/Coroutines/coro-await-suspend-lower.ll
+++ b/llvm/test/Transforms/Coroutines/coro-await-suspend-lower.ll
@@ -49,8 +49,7 @@ suspend.cond:
 ; CHECK-NEXT:   musttail call {{.*}} void %[[CONT]](ptr %[[NEXT_HDL]])
 step2:
   %save2 = call token @llvm.coro.save(ptr null)
-  %resume.handle = call ptr @llvm.coro.await.suspend.handle(ptr %awaiter, ptr %hdl, ptr @await_suspend_wrapper_handle)
-  call void @llvm.coro.resume(ptr %resume.handle)
+  call void @llvm.coro.await.suspend.handle(ptr %awaiter, ptr %hdl, ptr @await_suspend_wrapper_handle)
   %suspend2 = call i8 @llvm.coro.suspend(token %save2, i1 false)
   switch i8 %suspend2, label %ret [
     i8 0, label %step3
@@ -89,7 +88,7 @@ declare i1 @llvm.coro.alloc(token)
 declare ptr @llvm.coro.begin(token, ptr)
 declare void @llvm.coro.await.suspend.void(ptr, ptr, ptr)
 declare i1 @llvm.coro.await.suspend.bool(ptr, ptr, ptr)
-declare ptr @llvm.coro.await.suspend.handle(ptr, ptr, ptr)
+declare void @llvm.coro.await.suspend.handle(ptr, ptr, ptr)
 declare i1 @llvm.coro.end(ptr, i1, token)
 
 declare noalias ptr @malloc(i32)
diff --git a/llvm/test/Transforms/Coroutines/coro-preserve-final.ll b/llvm/test/Transforms/Coroutines/coro-preserve-final.ll
deleted file mode 100644
index 16eeb84e7915ae..00000000000000
--- a/llvm/test/Transforms/Coroutines/coro-preserve-final.ll
+++ /dev/null
@@ -1,131 +0,0 @@
-; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
-
-%"struct.std::__n4861::noop_coroutine_promise" = type { i8 }
-%struct.Promise = type { %"struct.std::__n4861::coroutine_handle" }
-%"struct.std::__n4861::coroutine_handle" = type { ptr }
-
-define dso_local ptr @_Z5Outerv() #1 {
-entry:
-  %__promise = alloca %struct.Promise, align 8
-  %0 = call token @llvm.coro.id(i32 16, ptr nonnull %__promise, ptr nonnull @_Z5Outerv, ptr null)
-  %1 = call i1 @llvm.coro.alloc(token %0)
-  br i1 %1, label %coro.alloc, label %init.suspend
-
-coro.alloc:                                       ; preds = %entry
-  %2 = tail call i64 @llvm.coro.size.i64()
-  %call = call noalias noundef nonnull ptr @_Znwm(i64 noundef %2) #12
-  br label %init.suspend
-
-init.suspend:                                     ; preds = %entry, %coro.alloc
-  %3 = phi ptr [ null, %entry ], [ %call, %coro.alloc ]
-  %4 = call ptr @llvm.coro.begin(token %0, ptr %3) #13
-  call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %__promise) #3
-  store ptr null, ptr %__promise, align 8
-  %5 = call token @llvm.coro.save(ptr null)
-  %6 = call i8 @llvm.coro.suspend(token %5, i1 false)
-  switch i8 %6, label %coro.ret [
-  i8 0, label %await.suspend
-  i8 1, label %cleanup62
-  ]
-
-await.suspend:                                    ; preds = %init.suspend
-  %7 = call token @llvm.coro.save(ptr null)
-  %8 = call ptr @llvm.coro.subfn.addr(ptr %4, i8 0)
-  call fastcc void %8(ptr %4) #3
-  %9 = call i8 @llvm.coro.suspend(token %7, i1 false)
-  switch i8 %9, label %coro.ret [
-  i8 0, label %await2.suspend
-  i8 1, label %cleanup62
-  ]
-
-await2.suspend:                                   ; preds = %await.suspend
-  %call27 = call ptr @_Z5Innerv() #3
-  %10 = call token @llvm.coro.save(ptr null)
-  %11 = getelementptr inbounds i8, ptr %__promise, i64 -16
-  store ptr %11, ptr %call27, align 8
-  %12 = getelementptr inbounds i8, ptr %call27, i64 -16
-  %13 = call ptr @llvm.coro.subfn.addr(ptr nonnull %12, i8 0)
-  call fastcc void %13(ptr nonnull %12) #3
-  %14 = call i8 @llvm.coro.suspend(token %10, i1 false)
-  switch i8 %14, label %coro.ret [
-  i8 0, label %final.suspend
-  i8 1, label %cleanup62
-  ]
-
-final.suspend:                                    ; preds = %await2.suspend
-  %15 = call ptr @llvm.coro.subfn.addr(ptr nonnull %12, i8 1)
-  call fastcc void %15(ptr nonnull %12) #3
-  %16 = call token @llvm.coro.save(ptr null)
-  %retval.sroa.0.0.copyload.i = load ptr, ptr %__promise, align 8
-  %17 = call ptr @llvm.coro.subfn.addr(ptr %retval.sroa.0.0.copyload.i, i8 0)
-  call fastcc void %17(ptr %retval.sroa.0.0.copyload.i) #3
-  %18 = call i8 @llvm.coro.suspend(token %16, i1 true) #13
-  switch i8 %18, label %coro.ret [
-  i8 0, label %final.ready
-  i8 1, label %cleanup62
-  ]
-
-final.ready:                                      ; preds = %final.suspend
-  call void @_Z5_exiti(i32 noundef 1) #14
-  unreachable
-
-cleanup62:                                        ; preds = %await2.suspend, %await.suspend, %init.suspend, %final.suspend
-  call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %__promise) #3
-  %19 = call ptr @llvm.coro.free(token %0, ptr %4)
-  %.not = icmp eq ptr %19, null
-  br i1 %.not, label %coro.ret, label %coro.free
-
-coro.free:                                        ; preds = %cleanup62
-  call void @_ZdlPv(ptr noundef nonnull %19) #3
-  br label %coro.ret
-
-coro.ret:                                         ; preds = %coro.free, %cleanup62, %final.suspend, %await2.suspend, %await.suspend, %init.suspend
-  %20 = call i1 @llvm.coro.end(ptr null, i1 false, token none) #13
-  ret ptr %__promise
-}
-
-declare token @llvm.coro.id(i32, ptr readnone, ptr nocapture readonly, ptr) #2
-declare i1 @llvm.coro.alloc(token) #3
-declare dso_local noundef nonnull ptr @_Znwm(i64 noundef) local_unnamed_addr #4
-declare i64 @llvm.coro.size.i64() #5
-declare ptr @llvm.coro.begin(token, ptr writeonly) #3
-declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #6
-declare token @llvm.coro.save(ptr) #7
-declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #6
-declare i8 @llvm.coro.suspend(token, i1) #3
-declare dso_local ptr @_Z5Innerv() local_unnamed_addr #8
-declare dso_local void @_ZdlPv(ptr noundef) local_unnamed_addr #9
-declare ptr @llvm.coro.free(token, ptr nocapture readonly) #2
-declare i1 @llvm.coro.end(ptr, i1, token) #3
-declare dso_local void @_Z5_exiti(i32 noundef) local_unnamed_addr #10
-declare ptr @llvm.coro.subfn.addr(ptr nocapture readonly, i8) #11
-
-attributes #0 = { mustprogress nounwind uwtable "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
-attributes #1 = { nounwind presplitcoroutine uwtable "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
-attributes #2 = { argmemonly nofree nounwind readonly }
-attributes #3 = { nounwind }
-attributes #4 = { nobuiltin allocsize(0) "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
-attributes #5 = { nofree nosync nounwind readnone }
-attributes #6 = { argmemonly mustprogress nocallback nofree nosync nounwind willreturn }
-attributes #7 = { nomerge nounwind }
-attributes #8 = { "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
-attributes #9 = { nobuiltin nounwind "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
-attributes #10 = { noreturn "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
-attributes #11 = { argmemonly nounwind readonly }
-attributes #12 = { nounwind allocsize(0) }
-attributes #13 = { noduplicate }
-attributes #14 = { noreturn nounwind }
-
-; CHECK: define{{.*}}@_Z5Outerv.resume(
-; CHECK: entry.resume:
-; CHECK: switch i2 %index
-; CHECK-NEXT:    i2 0, label %await2.suspend
-; CHECK-NEXT:    i2 1, label %final.suspend
-;
-; CHECK: await2.suspend:
-; CHECK: musttail call
-; CHECK-NEXT: ret void
-;
-; CHECK: final.suspend:
-; CHECK: musttail call
-; CHECK-NEXT: ret void
diff --git a/llvm/test/Transforms/Coroutines/coro-split-musttail-chain-pgo-counter-promo.ll b/llvm/test/Transforms/Coroutines/coro-split-musttail-chain-pgo-counter-promo.ll
index ddd293eed2409e..e2ed205f2c2f4f 100644
--- a/llvm/test/Transforms/Coroutines/coro-split-musttail-chain-pgo-counter-promo.ll
+++ b/llvm/test/Transforms/Coroutines/coro-split-musttail-chain-pgo-counter-promo.ll
@@ -90,10 +90,7 @@ define ptr @f(i32 %0) presplitcoroutine align 32 {
   %25 = getelementptr inbounds { ptr, ptr }, ptr %5, i64 0, i32 1
   store ptr %24, ptr %25, align 8
   %26 = call token @llvm.coro.save(ptr null)
-  %27 = call ptr @await_transform_await_suspend(ptr noundef nonnull align 8 dereferenceable(16) %5, ptr %14)
-  %28 = call ptr @llvm.coro.subfn.addr(ptr %27, i8 0)
-  %29 = ptrtoint ptr %28 to i64
-  call fastcc void %28(ptr %27) #9
+  call void @llvm.coro.await.suspend.handle(ptr null, ptr null, ptr @await_transform_await_suspend)
   %30 = call i8 @llvm.coro.suspend(token %26, i1 false)
   switch i8 %30, label %60 [
     i8 0, label %31
@@ -123,9 +120,7 @@ define ptr @f(i32 %0) presplitcoroutine align 32 {
   br i1 %42, label %43, label %46
 
 43:                                               ; preds = %36
-  %44 = call ptr @llvm.coro.subfn.addr(ptr nonnull %14, i8 1)
-  %45 = ptrtoint ptr %44 to i64
-  call fastcc void %44(ptr nonnull %14) #9
+  call void @llvm.coro.await.suspend.handle(ptr null, ptr null, ptr @await_transform_await_suspend)
   br label %47
 
 46:                                               ; preds = %36
diff --git a/llvm/test/Transforms/Coroutines/coro-split-musttail.ll b/llvm/test/Transforms/Coroutines/coro-split-musttail.ll
index 825e44471db27a..49589459c92448 100644
--- a/llvm/test/Transforms/Coroutines/coro-split-musttail.ll
+++ b/llvm/test/Transforms/Coroutines/coro-split-musttail.ll
@@ -1,7 +1,6 @@
-; Tests that coro-split will convert coro.resume followed by a suspend to a
-; musttail call.
-; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck --check-prefixes=CHECK,NOPGO %s
-; RUN: opt < %s -passes='pgo-instr-gen,cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck --check-prefixes=CHECK,PGO %s
+; Tests that coro-split will convert coro.await.suspend.handle to a tail call.
+; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck --check-prefixes=CHECK %s
+; RUN: opt < %s -passes='pgo-instr-gen,cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck --check-prefixes=CHECK %s
 
 define void @f() #0 {
 entry:
@@ -20,8 +19,7 @@ entry:
   ]
 await.ready:
   %save2 = call token @llvm.coro.save(ptr null)
-  %addr2 = call ptr @llvm.coro.subfn.addr(ptr null, i8 0)
-  call fastcc void %addr2(ptr null)
+  call void @llvm.coro.await.suspend.handle(ptr null, ptr null, ptr @await_suspend_function)
 
   %suspend2 = call i8 @llvm.coro.suspend(token %save2, i1 false)
   switch i8 %suspend2, label %exit [
@@ -40,10 +38,8 @@ exit:
 
 ; Verify that in the resume part resume call is marked with musttail.
 ; CHECK-LABEL: @f.resume(
-; CHECK: %[[addr2:.+]] = call ptr @llvm.coro.subfn.addr(ptr null, i8 0)
-; NOPGO-NEXT: musttail call fastcc void %[[addr2]](ptr null)
-; PGO: call void @llvm.instrprof
-; PGO-NEXT: musttail call fastcc void %[[addr2]](ptr null)
+; CHECK: %[[addr2:.+]] = call ptr @llvm.coro.subfn.addr
+; CHECK-NEXT: musttail call fastcc void %[[addr2]]
 ; CHECK-NEXT: ret void
 
 declare token @llvm.coro.id(i32, ptr readnone, ptr nocapture readonly, ptr) #1
@@ -57,6 +53,7 @@ declare ptr @llvm.coro.free(token, ptr nocapture readonly) #1
 declare i1 @llvm.coro.end(ptr, i1, token) #2
 declare ptr @llvm.coro.subfn.addr(ptr nocapture readonly, i8) #1
 declare ptr @malloc(i64)
+declare ptr @await_suspend_function(ptr %awaiter, ptr %hdl)
 
 attributes #0 = { presplitcoroutine }
 attributes #1 = { argmemonly nounwind readonly }
diff --git a/llvm/test/Transforms/Coroutines/coro-split-musttail1.ll b/llvm/test/Transforms/Coroutines/coro-split-musttail1.ll
index d0d11fc4495e48..e6da28cc612ddd 100644
--- a/llvm/test/Transforms/Coroutines/coro-split-musttail1.ll
+++ b/llvm/test/Transforms/Coroutines/coro-split-musttail1.ll
@@ -1,7 +1,7 @@
 ; Tests that coro-split will convert coro.resume followed by a suspend to a
 ; musttail call.
-; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck --check-prefixes=CHECK,NOPGO %s
-; RUN: opt < %s -passes='pgo-instr-gen,cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck --check-prefixes=CHECK,PGO %s
+; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck --check-prefixes=CHECK %s
+; RUN: opt < %s -passes='pgo-instr-gen,cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck --check-prefixes=CHECK %s
 
 define void @f() #0 {
 entry:
@@ -28,17 +28,14 @@ await.suspend:
   ]
 await.resume1:
   %hdl = call ptr @g()
-  %addr2 = call ptr @llvm.coro.subfn.addr(ptr %hdl, i8 0)
-  call fastcc void %addr2(ptr %hdl)
+  call void @llvm.coro.await.suspend.handle(ptr null, ptr %hdl, ptr @await_suspend_function)
   br label %final.suspend
 await.resume2:
   %hdl2 = call ptr @h()
-  %addr3 = call ptr @llvm.coro.subfn.addr(ptr %hdl2, i8 0)
-  call fastcc void %addr3(ptr %hdl2)
+  call void @llvm.coro.await.suspend.handle(ptr null, ptr %hdl2, ptr @await_suspend_function)
   br label %final.suspend
 await.resume3:
-  %addr4 = call ptr @llvm.coro.subfn.addr(ptr null, i8 0)
-  call fastcc void %addr4(ptr null)
+  call void @llvm.coro.await.suspend.handle(ptr null, ptr null, ptr @await_suspend_function)
   br label %final.suspend
 final.suspend:
   %suspend2 = call i8 @llvm.coro.suspend(token %save2, i1 false)
@@ -63,18 +60,18 @@ unreach:
 ; Verify that in the resume part resume call is marked with musttail.
 ; CHECK-LABEL: @f.resume(
 ; CHECK: %[[hdl:.+]] = call ptr @g()
-; CHECK-NEXT: %[[addr2:.+]] = call ptr @llvm.coro.subfn.addr(ptr %[[hdl]], i8 0)
-; NOPGO-NEXT: musttail call fastcc void %[[addr2]](ptr %[[hdl]])
-; PGO: musttail call fastcc void %[[addr2]](ptr %[[hdl]])
+; CHECK-NEXT: call ptr @await_suspend_function
+; CHECK-NEXT: %[[addr2:.+]] = call ptr @llvm.coro.subfn.addr
+; CHECK-NEXT: musttail call fastcc void %[[addr2]]
 ; CHECK-NEXT: ret void
 ; CHECK: %[[hdl2:.+]] = call ptr @h()
-; CHECK-NEXT: %[[addr3:.+]] = call ptr @llvm.coro.subfn.addr(ptr %[[hdl2]], i8 0)
-; NOPGO-NEXT: musttail call fastcc void %[[addr3]](ptr %[[hdl2]])
-; PGO: musttail call fastcc void %[[addr3]](ptr %[[hdl2]])
+; CHECK-NEXT: call ptr @await_suspend_function
+; CHECK-NEXT: %[[addr3:.+]] = call ptr @llvm.coro.subfn.addr
+; CHECK-NEXT: musttail call fastcc void %[[addr3]]
 ; CHECK-NEXT: ret void
-; CHECK: %[[addr4:.+]] = call ptr @llvm.coro.subfn.addr(ptr null, i8 0)
-; NOPGO-NEXT: musttail call fastcc void %[[addr4]](ptr null)
-; PGO: musttail call fastcc void %[[addr4]](ptr null)
+; CHECK: call ptr @await_suspend_function
+; CHECK: %[[addr4:.+]] = call ptr @llvm.coro.subfn.addr
+; CHECK-NEXT: musttail call fastcc void %[[addr4]]
 ; CHECK-NEXT: ret void
 
 
@@ -93,6 +90,7 @@ declare ptr @malloc(i64)
 declare i8 @switch_result()
 declare ptr @g()
 declare ptr @h()
+declare ptr @await_suspend_function(ptr %awaiter, ptr %hdl)
 
 attributes #0 = { presplitcoroutine }
 attributes #1 = { argmemonly nounwind readonly }
diff --git a/llvm/test/Transforms/Coroutines/coro-split-musttail10.ll b/llvm/test/Transforms/Coroutines/coro-split-musttail10.ll
index cdd58b2a084fcd..a96e2472e74ed9 100644
--- a/llvm/test/Transforms/Coroutines/coro-split-musttail10.ll
+++ b/llvm/test/Transforms/Coroutines/coro-split-musttail10.ll
@@ -1,9 +1,10 @@
 ; Tests that we would convert coro.resume to a musttail call if the target is
-; Wasm64 with tail-call support.
-; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
-; RUN: opt < %s -passes='pgo-instr-gen,cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
+; Wasm64 or Wasm32 with tail-call support.
+; RUN: opt -mtriple=wasm64-unknown-unknown < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
+; RUN: opt -mtriple=wasm64-unknown-unknown < %s -passes='pgo-instr-gen,cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
 
-target triple = "wasm64-unknown-unknown"
+; RUN: opt -mtriple=wasm32-unknown-unknown < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
+; RUN: opt -mtriple=wasm32-unknown-unknown < %s -passes='pgo-instr-gen,cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
 
 define void @f() #0 {
 entry:
@@ -22,8 +23,7 @@ entry:
   ]
 await.ready:
   %save2 = call token @llvm.coro.save(ptr null)
-  %addr2 = call ptr @llvm.coro.subfn.addr(ptr null, i8 0)
-  call fastcc void %addr2(ptr null)
+  call void @llvm.coro.await.suspend.handle(ptr null, ptr null, ptr @await_suspend_function)
 
   %suspend2 = call i8 @llvm.coro.suspend(token %save2, i1 false)
   switch i8 %suspend2, label %exit [
@@ -48,6 +48,7 @@ declare ptr @llvm.coro.free(token, ptr nocapture readonly) #1
 declare i1 @llvm.coro.end(ptr, i1, token) #2
 declare ptr @llvm.coro.subfn.addr(ptr nocapture readonly, i8) #1
 declare ptr @malloc(i64)
+declare ptr @await_suspend_function(ptr %awaiter, ptr %hdl)
 
 attributes #0 = { presplitcoroutine "target-features"="+tail-call" }
 attributes #1 = { argmemonly nounwind readonly }
diff --git a/llvm/test/Transforms/Coroutines/coro-split-musttail11.ll b/llvm/test/Transforms/Coroutines/coro-split-musttail11.ll
deleted file mode 100644
index da5d868280e967..00000000000000
--- a/llvm/test/Transforms/Coroutines/coro-split-musttail11.ll
+++ /dev/null
@@ -1,55 +0,0 @@
-; Tests that we would convert coro.resume to a musttail call if the target is
-; Wasm32 with tail-call support.
-; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
-; RUN: opt < %s -passes='pgo-instr-gen,cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
-
-target triple = "wasm32-unknown-unknown"
-
-define void @f() #0 {
-entry:
-  %id = call token @llvm.coro.id(i32 0, ptr null, ptr null, ptr null)
-  %alloc = call ptr @malloc(i64 16) #3
-  %vFrame = call noalias nonnull ptr @llvm.coro.begin(token %id, ptr %alloc)
-
-  %save = call token @llvm.coro.save(ptr null)
-  %addr1 = call ptr @llvm.coro.subfn.addr(ptr null, i8 0)
-  call fastcc void %addr1(ptr null)
-
-  %suspend = call i8 @llvm.coro.suspend(token %save, i1 false)
-  switch i8 %suspend, label %exit [
-    i8 0, label %await.ready
-    i8 1, label %exit
-  ]
-await.ready:
-  %save2 = call token @llvm.coro.save(ptr null)
-  %addr2 = call ptr @llvm.coro.subfn.addr(ptr null, i8 0)
-  call fastcc void %addr2(ptr null)
-
-  %suspend2 = call i8 @llvm.coro.suspend(token %save2, i1 false)
-  switch i8 %suspend2, label %exit [
-    i8 0, label %exit
-    i8 1, label %exit
-  ]
-exit:
-  call i1 @llvm.coro.end(ptr null, i1 false, token none)
-  ret void
-}
-
-; CHECK: musttail call
-
-declare token @llvm.coro.id(i32, ptr readnone, ptr nocapture readonly, ptr) #1
-declare i1 @llvm.coro.alloc(token) #2
-declare i64 @llvm.coro.size.i64() #3
-declare ptr @llvm.coro.begin(token, ptr writeonly) #2
-declare token @llvm.coro.save(ptr) #2
-declare ptr @llvm.coro.frame() #3
-declare i8 @llvm.coro.suspend(token, i1) #2
-declare ptr @llvm.coro.free(token, ptr nocapture readonly) #1
-declare i1 @llvm.coro.end(ptr, i1, token) #2
-declare ptr @llvm.coro.subfn.addr(ptr nocapture readonly, i8) #1
-declare ptr @malloc(i64)
-
-attributes #0 = { presplitcoroutine "target-features"="+tail-call" }
-attributes #1 = { argmemonly nounwind readonly }
-attributes #2 = { nounwind }
-attributes #3 = { nounwind readnone }
diff --git a/llvm/test/Transforms/Coroutines/coro-split-musttail2.ll b/llvm/test/Transforms/Coroutines/coro-split-musttail2.ll
index 2f27f79480ab1b..e1f794c52dbae8 100644
--- a/llvm/test/Transforms/Coroutines/coro-split-musttail2.ll
+++ b/llvm/test/Transforms/Coroutines/coro-split-musttail2.ll
@@ -8,11 +8,6 @@ entry:
   ret void;
 }
 
-define void @fakeresume2(ptr align 8)  {
-entry:
-  ret void;
-}
-
 define void @g() #0 {
 entry:
   %id = call token @llvm.coro.id(i32 0, ptr null, ptr null, ptr null)
@@ -29,7 +24,7 @@ entry:
   ]
 await.ready:
   %save2 = call token @llvm.coro.save(ptr null)
-  call fastcc void @fakeresume2(ptr align 8 null)
+  call void @llvm.coro.await.suspend.handle(ptr null, ptr null, ptr @await_suspend_function)
 
   %suspend2 = call i8 @llvm.coro.suspend(token %save2, i1 false)
   switch i8 %suspend2, label %exit [
@@ -47,7 +42,9 @@ exit:
 
 ; Verify that in the resume part resume call is marked with musttail.
 ; CHECK-LABEL: @g.resume(
-; CHECK: musttail call fastcc void @fakeresume2(ptr align 8 null)
+; CHECK: call ptr @await_suspend_function
+; CHECK-NEXT: call ptr @llvm.coro.subfn.addr
+; CHECK-NEXT: musttail call fastcc void
 ; CHECK-NEXT: ret void
 
 declare token @llvm.coro.id(i32, ptr readnone, ptr nocapture readonly, ptr) #1
@@ -61,6 +58,7 @@ declare ptr @llvm.coro.free(token, ptr nocapture readonly) #1
 declare i1 @llvm.coro.end(ptr, i1, token) #2
 declare ptr @llvm.coro.subfn.addr(ptr nocapture readonly, i8) #1
 declare ptr @malloc(i64)
+declare ptr @await_suspend_function(ptr %awaiter, ptr %hdl)
 
 attributes #0 = { presplitcoroutine }
 attributes #1 = { argmemonly nounwind readonly }
diff --git a/llvm/test/Transforms/Coroutines/coro-split-musttail3.ll b/llvm/test/Transforms/Coroutines/coro-split-musttail3.ll
index 4778e3dcaf9957..068c6a3619869f 100644
--- a/llvm/test/Transforms/Coroutines/coro-split-musttail3.ll
+++ b/llvm/test/Transforms/Coroutines/coro-split-musttail3.ll
@@ -1,7 +1,7 @@
 ; Tests that coro-split will convert coro.resume followed by a suspend to a
 ; musttail call.
-; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck --check-prefixes=CHECK,NOPGO %s
-; RUN: opt < %s -passes='pgo-instr-gen,cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck --check-prefixes=CHECK,PGO %s
+; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck --check-prefixes=CHECK %s
+; RUN: opt < %s -passes='pgo-instr-gen,cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck --check-prefixes=CHECK %s
 
 define void @f() #0 {
 entry:
@@ -26,17 +26,14 @@ await.suspend:
   ]
 await.resume1:
   %hdl = call ptr @g()
-  %addr2 = call ptr @llvm.coro.subfn.addr(ptr %hdl, i8 0)
-  call fastcc void %addr2(ptr %hdl)
+  call void @llvm.coro.await.suspend.handle(ptr null, ptr %hdl, ptr @await_suspend_function)
   br label %final.suspend
 await.resume2:
   %hdl2 = call ptr @h()
-  %addr3 = call ptr @llvm.coro.subfn.addr(ptr %hdl2, i8 0)
-  call fastcc void %addr3(ptr %hdl2)
+  call void @llvm.coro.await.suspend.handle(ptr null, ptr %hdl2, ptr @await_suspend_function)
   br label %final.suspend
 await.resume3:
-  %addr4 = call ptr @llvm.coro.subfn.addr(ptr null, i8 0)
-  call fastcc void %addr4(ptr null)
+  call void @llvm.coro.await.suspend.handle(ptr null, ptr null, ptr @await_suspend_function)
   br label %final.suspend
 final.suspend:
   %suspend2 = call i8 @llvm.coro.suspend(token %save2, i1 false)
@@ -59,22 +56,21 @@ unreach:
 ; Verify that in the resume part resume call is marked with musttail.
 ; CHECK-LABEL: @f.resume(
 ; CHECK: %[[hdl:.+]] = call ptr @g()
-; CHECK-NEXT: %[[addr2:.+]] = call ptr @llvm.coro.subfn.addr(ptr %[[hdl]], i8 0)
-; NOPGO-NEXT: musttail call fastcc void %[[addr2]](ptr %[[hdl]])
-; PGO: musttail call fastcc void %[[addr2]](ptr %[[hdl]])
+; CHECK-NEXT: call ptr @await_suspend_function
+; CHECK-NEXT: %[[addr2:.+]] = call ptr @llvm.coro.subfn.addr
+; CHECK-NEXT: musttail call fastcc void %[[addr2]]
 ; CHECK-NEXT: ret void
 ; CHECK: %[[hdl2:.+]] = call ptr @h()
-; CHECK-NEXT: %[[addr3:.+]] = call ptr @llvm.coro.subfn.addr(ptr %[[hdl2]], i8 0)
-; NOPGO-NEXT: musttail call fastcc void %[[addr3]](ptr %[[hdl2]])
-; PGO: musttail call fastcc void %[[addr3]](ptr %[[hdl2]])
+; CHECK-NEXT: call ptr @await_suspend_function
+; CHECK-NEXT: %[[addr3:.+]] = call ptr @llvm.coro.subfn.addr
+; CHECK-NEXT: musttail call fastcc void %[[addr3]]
 ; CHECK-NEXT: ret void
-; CHECK: %[[addr4:.+]] = call ptr @llvm.coro.subfn.addr(ptr null, i8 0)
-; NOPGO-NEXT: musttail call fastcc void %[[addr4]](ptr null)
-; PGO: musttail call fastcc void %[[addr4]](ptr null)
+; CHECK: call ptr @await_suspend_function
+; CHECK: %[[addr4:.+]] = call ptr @llvm.coro.subfn.addr
+; CHECK-NEXT: musttail call fastcc void %[[addr4]]
 ; CHECK-NEXT: ret void
 
 
-
 declare token @llvm.coro.id(i32, ptr readnone, ptr nocapture readonly, ptr) #1
 declare i1 @llvm.coro.alloc(token) #2
 declare i64 @llvm.coro.size.i64() #3
@@ -89,6 +85,7 @@ declare ptr @malloc(i64)
 declare i8 @switch_result()
 declare ptr @g()
 declare ptr @h()
+declare ptr @await_suspend_function(ptr %awaiter, ptr %hdl)
 
 attributes #0 = { presplitcoroutine }
 attributes #1 = { argmemonly nounwind readonly }
diff --git a/llvm/test/Transforms/Coroutines/coro-split-musttail4.ll b/llvm/test/Transforms/Coroutines/coro-split-musttail4.ll
index 00ee422ce5863d..931473d893ecad 100644
--- a/llvm/test/Transforms/Coroutines/coro-split-musttail4.ll
+++ b/llvm/test/Transforms/Coroutines/coro-split-musttail4.ll
@@ -24,7 +24,7 @@ entry:
 await.ready:
   %save2 = call token @llvm.coro.save(ptr null)
 
-  call fastcc void @fakeresume1(ptr align 8 null)
+  call void @llvm.coro.await.suspend.handle(ptr null, ptr null, ptr @await_suspend_function)
   %suspend = call i8 @llvm.coro.suspend(token %save2, i1 true)
   %switch = icmp ult i8 %suspend, 2
   br i1 %switch, label %cleanup, label %coro.end
@@ -44,7 +44,7 @@ coro.end:
 }
 
 ; CHECK-LABEL: @f.resume(
-; CHECK:          musttail call fastcc void @fakeresume1(
+; CHECK:          musttail call fastcc void
 ; CHECK-NEXT:     ret void
 
 declare token @llvm.coro.id(i32, ptr readnone, ptr nocapture readonly, ptr) #1
@@ -59,6 +59,7 @@ declare i1 @llvm.coro.end(ptr, i1, token) #2
 declare ptr @llvm.coro.subfn.addr(ptr nocapture readonly, i8) #1
 declare ptr @malloc(i64)
 declare void @delete(ptr nonnull) #2
+declare ptr @await_suspend_function(ptr %awaiter, ptr %hdl)
 
 attributes #0 = { presplitcoroutine }
 attributes #1 = { argmemonly nounwind readonly }
diff --git a/llvm/test/Transforms/Coroutines/coro-split-musttail5.ll b/llvm/test/Transforms/Coroutines/coro-split-musttail5.ll
index 9afc79abbe88cd..69cff303995646 100644
--- a/llvm/test/Transforms/Coroutines/coro-split-musttail5.ll
+++ b/llvm/test/Transforms/Coroutines/coro-split-musttail5.ll
@@ -22,7 +22,7 @@ entry:
   ]
 await.suspend:
   %save2 = call token @llvm.coro.save(ptr null)
-  call fastcc void @fakeresume1(ptr align 8 null)
+  call void @llvm.coro.await.suspend.handle(ptr null, ptr null, ptr @await_suspend_function)
   %suspend2 = call i8 @llvm.coro.suspend(token %save2, i1 false)
   switch i8 %suspend2, label %exit [
     i8 0, label %await.ready
@@ -39,7 +39,7 @@ exit:
 
 ; Verify that in the resume part resume call is marked with musttail.
 ; CHECK-LABEL: @g.resume(
-; CHECK:          musttail call fastcc void @fakeresume1(ptr align 8 null)
+; CHECK:          musttail call fastcc void
 ; CHECK-NEXT:     ret void
 
 declare token @llvm.coro.id(i32, ptr readnone, ptr nocapture readonly, ptr) #1
@@ -56,6 +56,7 @@ declare ptr @malloc(i64)
 declare void @consume(ptr)
 declare void @llvm.lifetime.start.p0(i64, ptr nocapture)
 declare void @llvm.lifetime.end.p0(i64, ptr nocapture)
+declare ptr @await_suspend_function(ptr %awaiter, ptr %hdl)
 
 attributes #0 = { presplitcoroutine }
 attributes #1 = { argmemonly nounwind readonly }
diff --git a/llvm/test/Transforms/Coroutines/coro-split-musttail6.ll b/llvm/test/Transforms/Coroutines/coro-split-musttail6.ll
index d9dba92ec4eb7e..971e8e47fc9310 100644
--- a/llvm/test/Transforms/Coroutines/coro-split-musttail6.ll
+++ b/llvm/test/Transforms/Coroutines/coro-split-musttail6.ll
@@ -25,7 +25,7 @@ entry:
   ]
 await.suspend:
   %save2 = call token @llvm.coro.save(ptr null)
-  call fastcc void @fakeresume1(ptr align 8 null)
+  call void @llvm.coro.await.suspend.handle(ptr null, ptr null, ptr @await_suspend_function)
   %suspend2 = call i8 @llvm.coro.suspend(token %save2, i1 false)
   switch i8 %suspend2, label %exit [
     i8 0, label %await.ready
@@ -42,7 +42,7 @@ exit:
 
 ; Verify that in the resume part resume call is marked with musttail.
 ; CHECK-LABEL: @g.resume(
-; CHECK:      musttail call fastcc void @fakeresume1(ptr align 8 null)
+; CHECK:      musttail call fastcc void
 ; CHECK-NEXT: ret void
 
 ; It has a cleanup bb.
@@ -63,7 +63,7 @@ entry:
   ]
 await.suspend:
   %save2 = call token @llvm.coro.save(ptr null)
-  call fastcc void @fakeresume1(ptr align 8 null)
+  call void @llvm.coro.await.suspend.handle(ptr null, ptr null, ptr @await_suspend_function)
   %suspend2 = call i8 @llvm.coro.suspend(token %save2, i1 false)
   switch i8 %suspend2, label %exit [
     i8 0, label %await.ready
@@ -90,7 +90,7 @@ exit:
 
 ; Verify that in the resume part resume call is marked with musttail.
 ; CHECK-LABEL: @f.resume(
-; CHECK:      musttail call fastcc void @fakeresume1(ptr align 8 null)
+; CHECK:      musttail call fastcc void
 ; CHECK-NEXT: ret void
 
 declare token @llvm.coro.id(i32, ptr readnone, ptr nocapture readonly, ptr) #1
@@ -108,6 +108,7 @@ declare void @delete(ptr nonnull) #2
 declare void @consume(ptr)
 declare void @llvm.lifetime.start.p0(i64, ptr nocapture)
 declare void @llvm.lifetime.end.p0(i64, ptr nocapture)
+declare ptr @await_suspend_function(ptr %awaiter, ptr %hdl)
 
 attributes #0 = { presplitcoroutine }
 attributes #1 = { argmemonly nounwind readonly }
diff --git a/llvm/test/Transforms/Coroutines/coro-split-musttail7.ll b/llvm/test/Transforms/Coroutines/coro-split-musttail7.ll
index d0d5005587bda6..b62480a50737c3 100644
--- a/llvm/test/Transforms/Coroutines/coro-split-musttail7.ll
+++ b/llvm/test/Transforms/Coroutines/coro-split-musttail7.ll
@@ -6,8 +6,6 @@
 ; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
 ; RUN: opt < %s -passes='pgo-instr-gen,cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
 
-declare void @fakeresume1(ptr align 8)
-
 define i64 @g() #0 {
 entry:
   %id = call token @llvm.coro.id(i32 0, ptr null, ptr null, ptr null)
@@ -25,7 +23,7 @@ entry:
   ]
 await.suspend:
   %save2 = call token @llvm.coro.save(ptr null)
-  call fastcc void @fakeresume1(ptr align 8 null)
+  call void @llvm.coro.await.suspend.handle(ptr null, ptr null, ptr @await_suspend_function)
   %suspend2 = call i8 @llvm.coro.suspend(token %save2, i1 false)
 
   ; These (non-trivially) dead instructions are in the way.
@@ -48,7 +46,9 @@ exit:
 
 ; Verify that in the resume part resume call is marked with musttail.
 ; CHECK-LABEL: @g.resume(
-; CHECK:         musttail call fastcc void @fakeresume1(ptr align 8 null)
+; CHECK:         %[[FRAME:[0-9]+]] = call ptr @await_suspend_function(ptr null, ptr null)
+; CHECK:         %[[RESUMEADDR:[0-9]+]] = call ptr @llvm.coro.subfn.addr(ptr %[[FRAME]], i8 0)
+; CHECK:         musttail call fastcc void %[[RESUMEADDR]](ptr %[[FRAME]])
 ; CHECK-NEXT:    ret void
 
 ; It has a cleanup bb.
@@ -69,7 +69,7 @@ entry:
   ]
 await.suspend:
   %save2 = call token @llvm.coro.save(ptr null)
-  call fastcc void @fakeresume1(ptr align 8 null)
+  call void @llvm.coro.await.suspend.handle(ptr null, ptr null, ptr @await_suspend_function)
   %suspend2 = call i8 @llvm.coro.suspend(token %save2, i1 false)
   switch i8 %suspend2, label %exit [
     i8 0, label %await.ready
@@ -96,7 +96,9 @@ exit:
 
 ; Verify that in the resume part resume call is marked with musttail.
 ; CHECK-LABEL: @f.resume(
-; CHECK:         musttail call fastcc void @fakeresume1(ptr align 8 null)
+; CHECK:         %[[FRAME:[0-9]+]] = call ptr @await_suspend_function(ptr null, ptr null)
+; CHECK:         %[[RESUMEADDR:[0-9]+]] = call ptr @llvm.coro.subfn.addr(ptr %[[FRAME]], i8 0)
+; CHECK:         musttail call fastcc void %[[RESUMEADDR]](ptr %[[FRAME]])
 ; CHECK-NEXT:    ret void
 
 declare token @llvm.coro.id(i32, ptr readnone, ptr nocapture readonly, ptr) #1
@@ -114,6 +116,7 @@ declare void @delete(ptr nonnull) #2
 declare void @consume(ptr)
 declare void @llvm.lifetime.start.p0(i64, ptr nocapture)
 declare void @llvm.lifetime.end.p0(i64, ptr nocapture)
+declare ptr @await_suspend_function(ptr %awaiter, ptr %hdl)
 
 attributes #0 = { presplitcoroutine }
 attributes #1 = { argmemonly nounwind readonly }

>From 6d23b16e2d8aeebffcaf13d23ffd0acdb7df1a46 Mon Sep 17 00:00:00 2001
From: Hans Wennborg <hans at chromium.org>
Date: Tue, 23 Apr 2024 14:50:42 +0200
Subject: [PATCH 2/6] format

---
 llvm/lib/Transforms/Coroutines/CoroSplit.cpp  | 5 +++--
 llvm/lib/Transforms/Coroutines/Coroutines.cpp | 2 +-
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp
index 01eb75617e39fe..ff037518cb302e 100644
--- a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp
+++ b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp
@@ -212,7 +212,8 @@ static void lowerAwaitSuspend(IRBuilder<> &Builder, CoroAwaitSuspendInst *CB,
 
   if (CB->getCalledFunction()->getIntrinsicID() ==
       Intrinsic::coro_await_suspend_handle) {
-    // Follow the await_suspend by a lowered resume call to the returned coroutine.
+    // Follow the await_suspend by a lowered resume call to the returned
+    // coroutine.
     if (auto *Invoke = dyn_cast<InvokeInst>(CB))
       Builder.SetInsertPoint(Invoke->getNormalDest()->getFirstInsertionPt());
 
@@ -220,7 +221,7 @@ static void lowerAwaitSuspend(IRBuilder<> &Builder, CoroAwaitSuspendInst *CB,
     auto *ResumeAddr = LB.makeSubFnCall(NewCall, CoroSubFnInst::ResumeIndex,
                                         &*Builder.GetInsertPoint());
 
-    LLVMContext& Ctx = Builder.getContext();
+    LLVMContext &Ctx = Builder.getContext();
     FunctionType *ResumeTy = FunctionType::get(
         Type::getVoidTy(Ctx), PointerType::getUnqual(Ctx), false);
     auto *ResumeCall = Builder.CreateCall(ResumeTy, ResumeAddr, {NewCall});
diff --git a/llvm/lib/Transforms/Coroutines/Coroutines.cpp b/llvm/lib/Transforms/Coroutines/Coroutines.cpp
index d891173156b2af..1a92bc1636257b 100644
--- a/llvm/lib/Transforms/Coroutines/Coroutines.cpp
+++ b/llvm/lib/Transforms/Coroutines/Coroutines.cpp
@@ -48,7 +48,7 @@ coro::LowererBase::LowererBase(Module &M)
 //    call ptr @llvm.coro.subfn.addr(ptr %Arg, i8 %index)
 
 CallInst *coro::LowererBase::makeSubFnCall(Value *Arg, int Index,
-                                        Instruction *InsertPt) {
+                                           Instruction *InsertPt) {
   auto *IndexVal = ConstantInt::get(Type::getInt8Ty(Context), Index);
   auto *Fn = Intrinsic::getDeclaration(&TheModule, Intrinsic::coro_subfn_addr);
 

>From 862b5d1226d3c4d298bd25d95d15b16046910e6d Mon Sep 17 00:00:00 2001
From: Hans Wennborg <hans at chromium.org>
Date: Wed, 24 Apr 2024 14:10:52 +0200
Subject: [PATCH 3/6] reinstate fixme

---
 llvm/lib/Transforms/Coroutines/CoroSplit.cpp | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp
index ff037518cb302e..72c324c82994bc 100644
--- a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp
+++ b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp
@@ -1087,8 +1087,11 @@ void CoroCloner::create() {
   for (CallInst *ResumeCall : Shape.SymmetricTransfers) {
     ResumeCall = cast<CallInst>(VMap[ResumeCall]);
     ResumeCall->setCallingConv(NewF->getCallingConv());
-    if (TTI.supportsTailCallFor(ResumeCall))
+    if (TTI.supportsTailCallFor(ResumeCall)) {
+      // FIXME: Could we support symmetric transfer effectively without
+      // musttail?
       ResumeCall->setTailCallKind(CallInst::TCK_MustTail);
+    }
 
     // Put a 'ret void' after the call, and split any remaining instructions to
     // an unreachable block.

>From 7192d93fe8172093833ce2887bd13ec0985f617c Mon Sep 17 00:00:00 2001
From: Hans Wennborg <hans at chromium.org>
Date: Wed, 24 Apr 2024 14:35:30 +0200
Subject: [PATCH 4/6] llvm.coro.await.suspend.handle calls must not be nounwind

---
 clang/lib/CodeGen/CGCoroutine.cpp           | 6 +++++-
 clang/test/CodeGenCoroutines/coro-await.cpp | 3 ++-
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/clang/lib/CodeGen/CGCoroutine.cpp b/clang/lib/CodeGen/CGCoroutine.cpp
index e976734898b9b8..fb2994fd35e5a5 100644
--- a/clang/lib/CodeGen/CGCoroutine.cpp
+++ b/clang/lib/CodeGen/CGCoroutine.cpp
@@ -278,7 +278,11 @@ static LValueOrRValue emitSuspendExpression(CodeGenFunction &CGF, CGCoroData &Co
 
   llvm::Function *AwaitSuspendIntrinsic = CGF.CGM.getIntrinsic(AwaitSuspendIID);
 
-  const auto AwaitSuspendCanThrow = StmtCanThrow(S.getSuspendExpr());
+  // SuspendHandle might throw since it also resumes the returned handle.
+  const bool AwaitSuspendCanThrow =
+      SuspendReturnType ==
+          CoroutineSuspendExpr::SuspendReturnType::SuspendHandle ||
+      StmtCanThrow(S.getSuspendExpr());
 
   llvm::CallBase *SuspendRet = nullptr;
   // FIXME: add call attributes?
diff --git a/clang/test/CodeGenCoroutines/coro-await.cpp b/clang/test/CodeGenCoroutines/coro-await.cpp
index 7caaa6351844b2..b92777dfa28a6c 100644
--- a/clang/test/CodeGenCoroutines/coro-await.cpp
+++ b/clang/test/CodeGenCoroutines/coro-await.cpp
@@ -370,7 +370,8 @@ extern "C" void TestTailcall() {
   // ---------------------------
   // Call coro.await.suspend
   // ---------------------------
-  // CHECK-NEXT: call void @llvm.coro.await.suspend.handle(ptr %[[AWAITABLE]], ptr %[[FRAME]], ptr @__await_suspend_wrapper_TestTailcall_await)
+  // Note: The call must not be nounwind since the resumed function could throw.
+  // CHECK-NEXT: call void @llvm.coro.await.suspend.handle(ptr %[[AWAITABLE]], ptr %[[FRAME]], ptr @__await_suspend_wrapper_TestTailcall_await){{$}}
   // CHECK-NEXT: %[[OUTCOME:.+]] = call i8 @llvm.coro.suspend(token %[[SUSPEND_ID]], i1 false)
   // CHECK-NEXT: switch i8 %[[OUTCOME]], label %[[RET_BB:.+]] [
   // CHECK-NEXT:   i8 0, label %[[READY_BB]]

>From 8fac76b2692be09216dcaf5cb4aecfa856f7cc44 Mon Sep 17 00:00:00 2001
From: Hans Wennborg <hans at chromium.org>
Date: Mon, 29 Apr 2024 16:07:06 +0200
Subject: [PATCH 5/6] remove clang/.../coro-symmetric-transfer-01.cpp

It was testing that "the lifetime of the coroutine handle used to obtain
the address is contained within single basic block, and hence does not
live across suspension points"

Since we no longer separately obtain the handle and pass it to
@llvm.coro.resume, this test doesn't really test anything.
---
 .../coro-symmetric-transfer-01.cpp            | 54 -------------------
 1 file changed, 54 deletions(-)
 delete mode 100644 clang/test/CodeGenCoroutines/coro-symmetric-transfer-01.cpp

diff --git a/clang/test/CodeGenCoroutines/coro-symmetric-transfer-01.cpp b/clang/test/CodeGenCoroutines/coro-symmetric-transfer-01.cpp
deleted file mode 100644
index 0ae672de391c47..00000000000000
--- a/clang/test/CodeGenCoroutines/coro-symmetric-transfer-01.cpp
+++ /dev/null
@@ -1,54 +0,0 @@
-// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -std=c++20 -O0 -emit-llvm %s -o - -disable-llvm-passes | FileCheck %s
-// RUN: %clang -std=c++20 -O0 -emit-llvm -c  %s -o %t -Xclang -disable-llvm-passes && %clang -c %t
-
-#include "Inputs/coroutine.h"
-
-struct detached_task {
-  struct promise_type {
-    detached_task get_return_object() noexcept {
-      return detached_task{std::coroutine_handle<promise_type>::from_promise(*this)};
-    }
-
-    void return_void() noexcept {}
-
-    struct final_awaiter {
-      bool await_ready() noexcept { return false; }
-      std::coroutine_handle<> await_suspend(std::coroutine_handle<promise_type> h) noexcept {
-        h.destroy();
-        return {};
-      }
-      void await_resume() noexcept {}
-    };
-
-    void unhandled_exception() noexcept {}
-
-    final_awaiter final_suspend() noexcept { return {}; }
-
-    std::suspend_always initial_suspend() noexcept { return {}; }
-  };
-
-  ~detached_task() {
-    if (coro_) {
-      coro_.destroy();
-      coro_ = {};
-    }
-  }
-
-  void start() && {
-    auto tmp = coro_;
-    coro_ = {};
-    tmp.resume();
-  }
-
-  std::coroutine_handle<promise_type> coro_;
-};
-
-detached_task foo() {
-  co_return;
-}
-
-// check that the lifetime of the coroutine handle used to obtain the address is contained within single basic block, and hence does not live across suspension points.
-// XXX: not sure this makes sense anymore?
-// CHECK-LABEL: final.suspend:
-// CHECK:         %{{.+}} = call token @llvm.coro.save(ptr null)
-// CHECK:         call void @llvm.coro.await.suspend.handle

>From 17d0967c43c0c244bbae43828ca4d6c9fd04e16f Mon Sep 17 00:00:00 2001
From: Hans Wennborg <hans at chromium.org>
Date: Mon, 29 Apr 2024 16:33:09 +0200
Subject: [PATCH 6/6] tweaks

---
 llvm/test/Transforms/Coroutines/coro-split-musttail.ll   | 2 +-
 llvm/test/Transforms/Coroutines/coro-split-musttail1.ll  | 3 +--
 llvm/test/Transforms/Coroutines/coro-split-musttail10.ll | 2 +-
 llvm/test/Transforms/Coroutines/coro-split-musttail2.ll  | 3 +--
 llvm/test/Transforms/Coroutines/coro-split-musttail3.ll  | 3 +--
 llvm/test/Transforms/Coroutines/coro-split-musttail4.ll  | 3 +--
 llvm/test/Transforms/Coroutines/coro-split-musttail5.ll  | 3 +--
 llvm/test/Transforms/Coroutines/coro-split-musttail6.ll  | 2 +-
 llvm/test/Transforms/Coroutines/coro-split-musttail7.ll  | 2 +-
 9 files changed, 9 insertions(+), 14 deletions(-)

diff --git a/llvm/test/Transforms/Coroutines/coro-split-musttail.ll b/llvm/test/Transforms/Coroutines/coro-split-musttail.ll
index 49589459c92448..70f29f4a9a4dc4 100644
--- a/llvm/test/Transforms/Coroutines/coro-split-musttail.ll
+++ b/llvm/test/Transforms/Coroutines/coro-split-musttail.ll
@@ -1,4 +1,4 @@
-; Tests that coro-split will convert coro.await.suspend.handle to a tail call.
+; Tests that coro-split will convert coro.await.suspend.handle to a musttail call.
 ; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck --check-prefixes=CHECK %s
 ; RUN: opt < %s -passes='pgo-instr-gen,cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck --check-prefixes=CHECK %s
 
diff --git a/llvm/test/Transforms/Coroutines/coro-split-musttail1.ll b/llvm/test/Transforms/Coroutines/coro-split-musttail1.ll
index e6da28cc612ddd..3edb8728d8550b 100644
--- a/llvm/test/Transforms/Coroutines/coro-split-musttail1.ll
+++ b/llvm/test/Transforms/Coroutines/coro-split-musttail1.ll
@@ -1,5 +1,4 @@
-; Tests that coro-split will convert coro.resume followed by a suspend to a
-; musttail call.
+; Tests that coro-split will convert coro.await.suspend.handle to a musttail call.
 ; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck --check-prefixes=CHECK %s
 ; RUN: opt < %s -passes='pgo-instr-gen,cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck --check-prefixes=CHECK %s
 
diff --git a/llvm/test/Transforms/Coroutines/coro-split-musttail10.ll b/llvm/test/Transforms/Coroutines/coro-split-musttail10.ll
index a4d32d960c12ea..a55b3d16e2ded3 100644
--- a/llvm/test/Transforms/Coroutines/coro-split-musttail10.ll
+++ b/llvm/test/Transforms/Coroutines/coro-split-musttail10.ll
@@ -1,4 +1,4 @@
-; Tests that we would convert coro.resume to a musttail call if the target is
+; Tests that coro-split will convert coro.await.suspend.handle to a musttail call if the target is
 ; Wasm64 or Wasm32 with tail-call support.
 ; REQUIRES: webassembly-registered-target
 
diff --git a/llvm/test/Transforms/Coroutines/coro-split-musttail2.ll b/llvm/test/Transforms/Coroutines/coro-split-musttail2.ll
index e1f794c52dbae8..ca1611e19b9f94 100644
--- a/llvm/test/Transforms/Coroutines/coro-split-musttail2.ll
+++ b/llvm/test/Transforms/Coroutines/coro-split-musttail2.ll
@@ -1,5 +1,4 @@
-; Tests that coro-split will convert coro.resume followed by a suspend to a
-; musttail call.
+; Tests that coro-split will convert coro.await.suspend.handle to a musttail call.
 ; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
 ; RUN: opt < %s -passes='pgo-instr-gen,cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
 
diff --git a/llvm/test/Transforms/Coroutines/coro-split-musttail3.ll b/llvm/test/Transforms/Coroutines/coro-split-musttail3.ll
index 068c6a3619869f..84cdac17beebb1 100644
--- a/llvm/test/Transforms/Coroutines/coro-split-musttail3.ll
+++ b/llvm/test/Transforms/Coroutines/coro-split-musttail3.ll
@@ -1,5 +1,4 @@
-; Tests that coro-split will convert coro.resume followed by a suspend to a
-; musttail call.
+; Tests that coro-split will convert coro.await.suspend.handle to a musttail call.
 ; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck --check-prefixes=CHECK %s
 ; RUN: opt < %s -passes='pgo-instr-gen,cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck --check-prefixes=CHECK %s
 
diff --git a/llvm/test/Transforms/Coroutines/coro-split-musttail4.ll b/llvm/test/Transforms/Coroutines/coro-split-musttail4.ll
index 931473d893ecad..b647bd2e4a207f 100644
--- a/llvm/test/Transforms/Coroutines/coro-split-musttail4.ll
+++ b/llvm/test/Transforms/Coroutines/coro-split-musttail4.ll
@@ -1,5 +1,4 @@
-; Tests that coro-split will convert a call before coro.suspend to a musttail call
-; while the user of the coro.suspend is a icmpinst.
+; Tests that coro-split will convert coro.await.suspend.handle to a musttail call.
 ; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
 ; RUN: opt < %s -passes='pgo-instr-gen,cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
 
diff --git a/llvm/test/Transforms/Coroutines/coro-split-musttail5.ll b/llvm/test/Transforms/Coroutines/coro-split-musttail5.ll
index 69cff303995646..7c1a13fd83cec5 100644
--- a/llvm/test/Transforms/Coroutines/coro-split-musttail5.ll
+++ b/llvm/test/Transforms/Coroutines/coro-split-musttail5.ll
@@ -1,5 +1,4 @@
-; Tests that sinked lifetime markers wouldn't provent optimization
-; to convert a resuming call to a musttail call.
+; Tests that coro-split will convert coro.await.suspend.handle to a musttail call.
 ; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
 ; RUN: opt < %s -passes='pgo-instr-gen,cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
 
diff --git a/llvm/test/Transforms/Coroutines/coro-split-musttail6.ll b/llvm/test/Transforms/Coroutines/coro-split-musttail6.ll
index 971e8e47fc9310..e05169a7291680 100644
--- a/llvm/test/Transforms/Coroutines/coro-split-musttail6.ll
+++ b/llvm/test/Transforms/Coroutines/coro-split-musttail6.ll
@@ -1,5 +1,5 @@
 ; Tests that sinked lifetime markers wouldn't provent optimization
-; to convert a resuming call to a musttail call.
+; to convert a coro.await.suspend.handle call to a musttail call.
 ; The difference between this and coro-split-musttail5.ll is that there is
 ; an extra bitcast instruction in the path, which makes it harder to
 ; optimize.
diff --git a/llvm/test/Transforms/Coroutines/coro-split-musttail7.ll b/llvm/test/Transforms/Coroutines/coro-split-musttail7.ll
index b62480a50737c3..8ceb0dda94f6ac 100644
--- a/llvm/test/Transforms/Coroutines/coro-split-musttail7.ll
+++ b/llvm/test/Transforms/Coroutines/coro-split-musttail7.ll
@@ -1,5 +1,5 @@
 ; Tests that sinked lifetime markers wouldn't provent optimization
-; to convert a resuming call to a musttail call.
+; to convert a coro.await.suspend.handle call to a musttail call.
 ; The difference between this and coro-split-musttail5.ll and coro-split-musttail6.ll
 ; is that this contains dead instruction generated during the transformation,
 ; which makes the optimization harder.



More information about the cfe-commits mailing list