[llvm] ffd337b - [coro][pgo] Do not insert counters in the `suspend` block (#71262)

via llvm-commits llvm-commits at lists.llvm.org
Wed Nov 15 11:13:05 PST 2023


Author: Mircea Trofin
Date: 2023-11-15T11:12:59-08:00
New Revision: ffd337b995658266d83e15630d656e57882bcf0d

URL: https://github.com/llvm/llvm-project/commit/ffd337b995658266d83e15630d656e57882bcf0d
DIFF: https://github.com/llvm/llvm-project/commit/ffd337b995658266d83e15630d656e57882bcf0d.diff

LOG: [coro][pgo] Do not insert counters in the `suspend` block (#71262)

If we did, we couldn't lower symmetric transfer resumes to tail calls.

We can instrument the other 2 edges instead, as long as they also don't
point to the same basic block.

Added: 
    llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail.ll
    llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail1.ll
    llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail10.ll
    llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail11.ll
    llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail12.ll
    llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail13.ll
    llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail2.ll
    llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail3.ll
    llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail4.ll
    llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail5.ll
    llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail6.ll
    llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail7.ll

Modified: 
    llvm/include/llvm/Transforms/Instrumentation/CFGMST.h
    llvm/test/Transforms/Coroutines/coro-split-musttail.ll
    llvm/test/Transforms/Coroutines/coro-split-musttail1.ll
    llvm/test/Transforms/Coroutines/coro-split-musttail10.ll
    llvm/test/Transforms/Coroutines/coro-split-musttail11.ll
    llvm/test/Transforms/Coroutines/coro-split-musttail12.ll
    llvm/test/Transforms/Coroutines/coro-split-musttail13.ll
    llvm/test/Transforms/Coroutines/coro-split-musttail2.ll
    llvm/test/Transforms/Coroutines/coro-split-musttail3.ll
    llvm/test/Transforms/Coroutines/coro-split-musttail4.ll
    llvm/test/Transforms/Coroutines/coro-split-musttail5.ll
    llvm/test/Transforms/Coroutines/coro-split-musttail6.ll
    llvm/test/Transforms/Coroutines/coro-split-musttail7.ll

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/Transforms/Instrumentation/CFGMST.h b/llvm/include/llvm/Transforms/Instrumentation/CFGMST.h
index ea90fa89e04eaef..cd2ae61334d0f05 100644
--- a/llvm/include/llvm/Transforms/Instrumentation/CFGMST.h
+++ b/llvm/include/llvm/Transforms/Instrumentation/CFGMST.h
@@ -19,6 +19,8 @@
 #include "llvm/Analysis/BlockFrequencyInfo.h"
 #include "llvm/Analysis/BranchProbabilityInfo.h"
 #include "llvm/Analysis/CFG.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
 #include "llvm/Support/BranchProbability.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
@@ -82,6 +84,38 @@ template <class Edge, class BBInfo> class CFGMST {
     return true;
   }
 
+  void handleCoroSuspendEdge(Edge *E) {
+    // We must not add instrumentation to the BB representing the
+    // "suspend" path, else CoroSplit won't be able to lower
+    // llvm.coro.suspend to a tail call. We do want profiling info for
+    // the other branches (resume/destroy). So we do 2 things:
+    // 1. we prefer instrumenting those other edges by setting the weight
+    //    of the "suspend" edge to max, and
+    // 2. we mark the edge as "Removed" to guarantee it is not considered
+    //    for instrumentation. That could technically happen:
+    //    (from test/Transforms/Coroutines/coro-split-musttail.ll)
+    //
+    // %suspend = call i8 @llvm.coro.suspend(token %save, i1 false)
+    // switch i8 %suspend, label %exit [
+    //   i8 0, label %await.ready
+    //   i8 1, label %exit
+    // ]
+    const BasicBlock *EdgeTarget = E->DestBB;
+    if (!EdgeTarget)
+      return;
+    assert(E->SrcBB);
+    const Function *F = EdgeTarget->getParent();
+    if (!F->isPresplitCoroutine())
+      return;
+
+    const Instruction *TI = E->SrcBB->getTerminator();
+    if (auto *SWInst = dyn_cast<SwitchInst>(TI))
+      if (auto *Intrinsic = dyn_cast<IntrinsicInst>(SWInst->getCondition()))
+        if (Intrinsic->getIntrinsicID() == Intrinsic::coro_suspend &&
+            SWInst->getDefaultDest() == EdgeTarget)
+          E->Removed = true;
+  }
+
   // Traverse the CFG using a stack. Find all the edges and assign the weight.
   // Edges with large weight will be put into MST first so they are less likely
   // to be instrumented.
@@ -133,6 +167,7 @@ template <class Edge, class BBInfo> class CFGMST {
             Weight++;
           auto *E = &addEdge(&BB, TargetBB, Weight);
           E->IsCritical = Critical;
+          handleCoroSuspendEdge(E);
           LLVM_DEBUG(dbgs() << "  Edge: from " << BB.getName() << " to "
                             << TargetBB->getName() << "  w=" << Weight << "\n");
 

diff  --git a/llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail.ll b/llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail.ll
new file mode 100644
index 000000000000000..a7321833d74843b
--- /dev/null
+++ b/llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail.ll
@@ -0,0 +1,63 @@
+; Tests that instrumentation doesn't interfere with lowering (coro-split).
+; It should convert coro.resume followed by a suspend to a musttail call.
+
+; RUN: opt < %s -passes='pgo-instr-gen,cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
+
+define void @f() #0 {
+entry:
+  %id = call token @llvm.coro.id(i32 0, ptr null, ptr null, ptr null)
+  %alloc = call ptr @malloc(i64 16) #3
+  %vFrame = call noalias nonnull ptr @llvm.coro.begin(token %id, ptr %alloc)
+
+  %save = call token @llvm.coro.save(ptr null)
+  %addr1 = call ptr @llvm.coro.subfn.addr(ptr null, i8 0)
+  call fastcc void %addr1(ptr null)
+
+  %suspend = call i8 @llvm.coro.suspend(token %save, i1 false)
+  switch i8 %suspend, label %exit [
+    i8 0, label %await.ready
+    i8 1, label %exit
+  ]
+await.ready:
+  %save2 = call token @llvm.coro.save(ptr null)
+  %addr2 = call ptr @llvm.coro.subfn.addr(ptr null, i8 0)
+  call fastcc void %addr2(ptr null)
+
+  %suspend2 = call i8 @llvm.coro.suspend(token %save2, i1 false)
+  switch i8 %suspend2, label %exit [
+    i8 0, label %exit
+    i8 1, label %exit
+  ]
+exit:
+  call i1 @llvm.coro.end(ptr null, i1 false, token none)
+  ret void
+}
+
+; Verify that in the initial function resume is not marked with musttail.
+; CHECK-LABEL: @f(
+; CHECK: %[[addr1:.+]] = call ptr @llvm.coro.subfn.addr(ptr null, i8 0)
+; CHECK-NOT: musttail call fastcc void %[[addr1]](ptr null)
+
+; Verify that in the resume part resume call is marked with musttail.
+; CHECK-LABEL: @f.resume(
+; CHECK: %[[addr2:.+]] = call ptr @llvm.coro.subfn.addr(ptr null, i8 0)
+; CHECK: call void @llvm.instrprof
+; CHECK-NEXT: musttail call fastcc void %[[addr2]](ptr null)
+; CHECK-NEXT: ret void
+
+declare token @llvm.coro.id(i32, ptr readnone, ptr nocapture readonly, ptr) #1
+declare i1 @llvm.coro.alloc(token) #2
+declare i64 @llvm.coro.size.i64() #3
+declare ptr @llvm.coro.begin(token, ptr writeonly) #2
+declare token @llvm.coro.save(ptr) #2
+declare ptr @llvm.coro.frame() #3
+declare i8 @llvm.coro.suspend(token, i1) #2
+declare ptr @llvm.coro.free(token, ptr nocapture readonly) #1
+declare i1 @llvm.coro.end(ptr, i1, token) #2
+declare ptr @llvm.coro.subfn.addr(ptr nocapture readonly, i8) #1
+declare ptr @malloc(i64)
+
+attributes #0 = { presplitcoroutine }
+attributes #1 = { argmemonly nounwind readonly }
+attributes #2 = { nounwind }
+attributes #3 = { nounwind readnone }

diff  --git a/llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail1.ll b/llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail1.ll
new file mode 100644
index 000000000000000..6098dee9a58035a
--- /dev/null
+++ b/llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail1.ll
@@ -0,0 +1,97 @@
+; Tests that instrumentation doesn't interfere with lowering (coro-split).
+; It should convert coro.resume followed by a suspend to a musttail call.
+
+; RUN: opt < %s -passes='pgo-instr-gen,cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
+
+define void @f() #0 {
+entry:
+  %id = call token @llvm.coro.id(i32 0, ptr null, ptr null, ptr null)
+  %alloc = call ptr @malloc(i64 16) #3
+  %vFrame = call noalias nonnull ptr @llvm.coro.begin(token %id, ptr %alloc)
+
+  %save = call token @llvm.coro.save(ptr null)
+  %addr1 = call ptr @llvm.coro.subfn.addr(ptr null, i8 0)
+  call fastcc void %addr1(ptr null)
+
+  %suspend = call i8 @llvm.coro.suspend(token %save, i1 false)
+  switch i8 %suspend, label %exit [
+    i8 0, label %await.suspend
+    i8 1, label %exit
+  ]
+await.suspend:
+  %save2 = call token @llvm.coro.save(ptr null)
+  %br0 = call i8 @switch_result()
+  switch i8 %br0, label %unreach [
+    i8 0, label %await.resume3
+    i8 1, label %await.resume1
+    i8 2, label %await.resume2
+  ]
+await.resume1:
+  %hdl = call ptr @g()
+  %addr2 = call ptr @llvm.coro.subfn.addr(ptr %hdl, i8 0)
+  call fastcc void %addr2(ptr %hdl)
+  br label %final.suspend
+await.resume2:
+  %hdl2 = call ptr @h()
+  %addr3 = call ptr @llvm.coro.subfn.addr(ptr %hdl2, i8 0)
+  call fastcc void %addr3(ptr %hdl2)
+  br label %final.suspend
+await.resume3:
+  %addr4 = call ptr @llvm.coro.subfn.addr(ptr null, i8 0)
+  call fastcc void %addr4(ptr null)
+  br label %final.suspend
+final.suspend:
+  %suspend2 = call i8 @llvm.coro.suspend(token %save2, i1 false)
+  switch i8 %suspend2, label %exit [
+    i8 0, label %pre.exit
+    i8 1, label %exit
+  ]
+pre.exit:
+  br label %exit
+exit:
+  call i1 @llvm.coro.end(ptr null, i1 false, token none)
+  ret void
+unreach:
+  unreachable
+}
+
+; Verify that in the initial function resume is not marked with musttail.
+; CHECK-LABEL: @f(
+; CHECK: %[[addr1:.+]] = call ptr @llvm.coro.subfn.addr(ptr null, i8 0)
+; CHECK-NOT: musttail call fastcc void %[[addr1]](ptr null)
+
+; Verify that in the resume part resume call is marked with musttail.
+; CHECK-LABEL: @f.resume(
+; CHECK: %[[hdl:.+]] = call ptr @g()
+; CHECK-NEXT: %[[addr2:.+]] = call ptr @llvm.coro.subfn.addr(ptr %[[hdl]], i8 0)
+; CHECK: musttail call fastcc void %[[addr2]](ptr %[[hdl]])
+; CHECK-NEXT: ret void
+; CHECK: %[[hdl2:.+]] = call ptr @h()
+; CHECK-NEXT: %[[addr3:.+]] = call ptr @llvm.coro.subfn.addr(ptr %[[hdl2]], i8 0)
+; CHECK: musttail call fastcc void %[[addr3]](ptr %[[hdl2]])
+; CHECK-NEXT: ret void
+; CHECK: %[[addr4:.+]] = call ptr @llvm.coro.subfn.addr(ptr null, i8 0)
+; CHECK: musttail call fastcc void %[[addr4]](ptr null)
+; CHECK-NEXT: ret void
+
+
+
+declare token @llvm.coro.id(i32, ptr readnone, ptr nocapture readonly, ptr) #1
+declare i1 @llvm.coro.alloc(token) #2
+declare i64 @llvm.coro.size.i64() #3
+declare ptr @llvm.coro.begin(token, ptr writeonly) #2
+declare token @llvm.coro.save(ptr) #2
+declare ptr @llvm.coro.frame() #3
+declare i8 @llvm.coro.suspend(token, i1) #2
+declare ptr @llvm.coro.free(token, ptr nocapture readonly) #1
+declare i1 @llvm.coro.end(ptr, i1, token) #2
+declare ptr @llvm.coro.subfn.addr(ptr nocapture readonly, i8) #1
+declare ptr @malloc(i64)
+declare i8 @switch_result()
+declare ptr @g()
+declare ptr @h()
+
+attributes #0 = { presplitcoroutine }
+attributes #1 = { argmemonly nounwind readonly }
+attributes #2 = { nounwind }
+attributes #3 = { nounwind readnone }

diff  --git a/llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail10.ll b/llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail10.ll
new file mode 100644
index 000000000000000..f43b10ebf42e5a3
--- /dev/null
+++ b/llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail10.ll
@@ -0,0 +1,55 @@
+; Tests that instrumentation doesn't interfere with lowering (coro-split).
+; It should convert coro.resume followed by a suspend to a musttail call.
+
+; RUN: opt < %s -passes='pgo-instr-gen,cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
+
+target triple = "wasm64-unknown-unknown"
+
+define void @f() #0 {
+entry:
+  %id = call token @llvm.coro.id(i32 0, ptr null, ptr null, ptr null)
+  %alloc = call ptr @malloc(i64 16) #3
+  %vFrame = call noalias nonnull ptr @llvm.coro.begin(token %id, ptr %alloc)
+
+  %save = call token @llvm.coro.save(ptr null)
+  %addr1 = call ptr @llvm.coro.subfn.addr(ptr null, i8 0)
+  call fastcc void %addr1(ptr null)
+
+  %suspend = call i8 @llvm.coro.suspend(token %save, i1 false)
+  switch i8 %suspend, label %exit [
+    i8 0, label %await.ready
+    i8 1, label %exit
+  ]
+await.ready:
+  %save2 = call token @llvm.coro.save(ptr null)
+  %addr2 = call ptr @llvm.coro.subfn.addr(ptr null, i8 0)
+  call fastcc void %addr2(ptr null)
+
+  %suspend2 = call i8 @llvm.coro.suspend(token %save2, i1 false)
+  switch i8 %suspend2, label %exit [
+    i8 0, label %exit
+    i8 1, label %exit
+  ]
+exit:
+  call i1 @llvm.coro.end(ptr null, i1 false, token none)
+  ret void
+}
+
+; CHECK: musttail call
+
+declare token @llvm.coro.id(i32, ptr readnone, ptr nocapture readonly, ptr) #1
+declare i1 @llvm.coro.alloc(token) #2
+declare i64 @llvm.coro.size.i64() #3
+declare ptr @llvm.coro.begin(token, ptr writeonly) #2
+declare token @llvm.coro.save(ptr) #2
+declare ptr @llvm.coro.frame() #3
+declare i8 @llvm.coro.suspend(token, i1) #2
+declare ptr @llvm.coro.free(token, ptr nocapture readonly) #1
+declare i1 @llvm.coro.end(ptr, i1, token) #2
+declare ptr @llvm.coro.subfn.addr(ptr nocapture readonly, i8) #1
+declare ptr @malloc(i64)
+
+attributes #0 = { presplitcoroutine "target-features"="+tail-call" }
+attributes #1 = { argmemonly nounwind readonly }
+attributes #2 = { nounwind }
+attributes #3 = { nounwind readnone }

diff  --git a/llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail11.ll b/llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail11.ll
new file mode 100644
index 000000000000000..fc5bb9a1b20b3de
--- /dev/null
+++ b/llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail11.ll
@@ -0,0 +1,55 @@
+; Tests that instrumentation doesn't interfere with lowering (coro-split).
+; It should convert coro.resume followed by a suspend to a musttail call.
+
+; RUN: opt < %s -passes='pgo-instr-gen,cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
+
+target triple = "wasm32-unknown-unknown"
+
+define void @f() #0 {
+entry:
+  %id = call token @llvm.coro.id(i32 0, ptr null, ptr null, ptr null)
+  %alloc = call ptr @malloc(i64 16) #3
+  %vFrame = call noalias nonnull ptr @llvm.coro.begin(token %id, ptr %alloc)
+
+  %save = call token @llvm.coro.save(ptr null)
+  %addr1 = call ptr @llvm.coro.subfn.addr(ptr null, i8 0)
+  call fastcc void %addr1(ptr null)
+
+  %suspend = call i8 @llvm.coro.suspend(token %save, i1 false)
+  switch i8 %suspend, label %exit [
+    i8 0, label %await.ready
+    i8 1, label %exit
+  ]
+await.ready:
+  %save2 = call token @llvm.coro.save(ptr null)
+  %addr2 = call ptr @llvm.coro.subfn.addr(ptr null, i8 0)
+  call fastcc void %addr2(ptr null)
+
+  %suspend2 = call i8 @llvm.coro.suspend(token %save2, i1 false)
+  switch i8 %suspend2, label %exit [
+    i8 0, label %exit
+    i8 1, label %exit
+  ]
+exit:
+  call i1 @llvm.coro.end(ptr null, i1 false, token none)
+  ret void
+}
+
+; CHECK: musttail call
+
+declare token @llvm.coro.id(i32, ptr readnone, ptr nocapture readonly, ptr) #1
+declare i1 @llvm.coro.alloc(token) #2
+declare i64 @llvm.coro.size.i64() #3
+declare ptr @llvm.coro.begin(token, ptr writeonly) #2
+declare token @llvm.coro.save(ptr) #2
+declare ptr @llvm.coro.frame() #3
+declare i8 @llvm.coro.suspend(token, i1) #2
+declare ptr @llvm.coro.free(token, ptr nocapture readonly) #1
+declare i1 @llvm.coro.end(ptr, i1, token) #2
+declare ptr @llvm.coro.subfn.addr(ptr nocapture readonly, i8) #1
+declare ptr @malloc(i64)
+
+attributes #0 = { presplitcoroutine "target-features"="+tail-call" }
+attributes #1 = { argmemonly nounwind readonly }
+attributes #2 = { nounwind }
+attributes #3 = { nounwind readnone }

diff  --git a/llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail12.ll b/llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail12.ll
new file mode 100644
index 000000000000000..634d0106a2e6aea
--- /dev/null
+++ b/llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail12.ll
@@ -0,0 +1,85 @@
+; Tests that instrumentation doesn't interfere with lowering (coro-split).
+; It should convert coro.resume followed by a suspend to a musttail call.
+
+; RUN: opt < %s -passes='pgo-instr-gen,cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
+
+declare void @fakeresume1(ptr)
+declare void @print()
+
+define void @f(i1 %cond) #0 {
+entry:
+  %id = call token @llvm.coro.id(i32 0, ptr null, ptr null, ptr null)
+  %alloc = call ptr @malloc(i64 16) #3
+  %vFrame = call noalias nonnull ptr @llvm.coro.begin(token %id, ptr %alloc)
+
+  %save = call token @llvm.coro.save(ptr null)
+
+  %init_suspend = call i8 @llvm.coro.suspend(token %save, i1 false)
+  switch i8 %init_suspend, label %coro.end [
+    i8 0, label %await.ready
+    i8 1, label %coro.end
+  ]
+await.ready:
+  %save2 = call token @llvm.coro.save(ptr null)
+  br i1 %cond, label %then, label %else
+
+then:
+  call fastcc void @fakeresume1(ptr align 8 null)
+  br label %merge
+
+else:
+  br label %merge
+
+merge:
+  %v0 = phi i1 [0, %then], [1, %else]
+  br label %compare
+
+compare:
+  %cond.cmp = icmp eq i1 %v0, 0
+  br i1 %cond.cmp, label %ready, label %prepare
+
+prepare:
+  call void @print()
+  br label %ready
+
+ready:
+  %suspend = call i8 @llvm.coro.suspend(token %save2, i1 true)
+  %switch = icmp ult i8 %suspend, 2
+  br i1 %switch, label %cleanup, label %coro.end
+
+cleanup:
+  %free.handle = call ptr @llvm.coro.free(token %id, ptr %vFrame)
+  %.not = icmp eq ptr %free.handle, null
+  br i1 %.not, label %coro.end, label %coro.free
+
+coro.free:
+  call void @delete(ptr nonnull %free.handle) #2
+  br label %coro.end
+
+coro.end:
+  call i1 @llvm.coro.end(ptr null, i1 false, token none)
+  ret void
+}
+
+; CHECK-LABEL: @f.resume(
+; CHECK-NOT:      }
+; CHECK:          call void @print()
+
+
+declare token @llvm.coro.id(i32, ptr readnone, ptr nocapture readonly, ptr) #1
+declare i1 @llvm.coro.alloc(token) #2
+declare i64 @llvm.coro.size.i64() #3
+declare ptr @llvm.coro.begin(token, ptr writeonly) #2
+declare token @llvm.coro.save(ptr) #2
+declare ptr @llvm.coro.frame() #3
+declare i8 @llvm.coro.suspend(token, i1) #2
+declare ptr @llvm.coro.free(token, ptr nocapture readonly) #1
+declare i1 @llvm.coro.end(ptr, i1, token) #2
+declare ptr @llvm.coro.subfn.addr(ptr nocapture readonly, i8) #1
+declare ptr @malloc(i64)
+declare void @delete(ptr nonnull) #2
+
+attributes #0 = { presplitcoroutine }
+attributes #1 = { argmemonly nounwind readonly }
+attributes #2 = { nounwind }
+attributes #3 = { nounwind readnone }

diff  --git a/llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail13.ll b/llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail13.ll
new file mode 100644
index 000000000000000..2f9a14c90107195
--- /dev/null
+++ b/llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail13.ll
@@ -0,0 +1,76 @@
+; Tests that instrumentation doesn't interfere with lowering (coro-split).
+; It should convert coro.resume followed by a suspend to a musttail call.
+
+; RUN: opt < %s -passes='pgo-instr-gen,cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
+
+declare void @fakeresume1(ptr)
+declare void @may_throw(ptr)
+declare void @print()
+
+define void @f(i1 %cond) #0 personality i32 3 {
+entry:
+  %id = call token @llvm.coro.id(i32 0, ptr null, ptr null, ptr null)
+  %alloc = call ptr @malloc(i64 16) #3
+  %vFrame = call noalias nonnull ptr @llvm.coro.begin(token %id, ptr %alloc)
+
+  %save = call token @llvm.coro.save(ptr null)
+
+  %init_suspend = call i8 @llvm.coro.suspend(token %save, i1 false)
+  switch i8 %init_suspend, label %coro.end [
+    i8 0, label %await.ready
+    i8 1, label %coro.end
+  ]
+await.ready:
+  call fastcc void @fakeresume1(ptr align 8 null)
+  invoke void @may_throw(ptr null)
+    to label %ready unwind label %lpad
+
+ready:
+  %save2 = call token @llvm.coro.save(ptr null)
+  %suspend = call i8 @llvm.coro.suspend(token %save2, i1 true)
+  %switch = icmp ult i8 %suspend, 2
+  br i1 %switch, label %cleanup, label %coro.end
+
+cleanup:
+  %free.handle = call ptr @llvm.coro.free(token %id, ptr %vFrame)
+  %.not = icmp eq ptr %free.handle, null
+  br i1 %.not, label %coro.end, label %coro.free
+
+lpad:
+  %lpval = landingpad { ptr, i32 }
+     cleanup
+
+  %need.resume = call i1 @llvm.coro.end(ptr null, i1 true, token none)
+  resume { ptr, i32 } %lpval
+
+coro.free:
+  call void @delete(ptr nonnull %free.handle) #2
+  br label %coro.end
+
+coro.end:
+  call i1 @llvm.coro.end(ptr null, i1 false, token none)
+  ret void
+}
+
+; CHECK-LABEL: @f.resume(
+; CHECK-NOT:          musttail call fastcc void @fakeresume1(
+; CHECK:     }
+
+
+declare token @llvm.coro.id(i32, ptr readnone, ptr nocapture readonly, ptr) #1
+declare i1 @llvm.coro.alloc(token) #2
+declare i64 @llvm.coro.size.i64() #3
+declare ptr @llvm.coro.begin(token, ptr writeonly) #2
+declare token @llvm.coro.save(ptr) #2
+declare ptr @llvm.coro.frame() #3
+declare i8 @llvm.coro.suspend(token, i1) #2
+declare ptr @llvm.coro.free(token, ptr nocapture readonly) #1
+declare i1 @llvm.coro.end(ptr, i1, token) #2
+declare ptr @llvm.coro.subfn.addr(ptr nocapture readonly, i8) #1
+declare ptr @malloc(i64)
+declare void @delete(ptr nonnull) #2
+
+attributes #0 = { presplitcoroutine }
+attributes #1 = { argmemonly nounwind readonly }
+attributes #2 = { nounwind }
+attributes #3 = { nounwind readnone }

diff  --git a/llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail2.ll b/llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail2.ll
new file mode 100644
index 000000000000000..61b61a200e704d5
--- /dev/null
+++ b/llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail2.ll
@@ -0,0 +1,68 @@
+; Tests that instrumentation doesn't interfere with lowering (coro-split).
+; It should convert coro.resume followed by a suspend to a musttail call.
+
+; RUN: opt < %s -passes='pgo-instr-gen,cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
+
+define void @fakeresume1(ptr)  {
+entry:
+  ret void;
+}
+
+define void @fakeresume2(ptr align 8)  {
+entry:
+  ret void;
+}
+
+define void @g() #0 {
+entry:
+  %id = call token @llvm.coro.id(i32 0, ptr null, ptr null, ptr null)
+  %alloc = call ptr @malloc(i64 16) #3
+  %vFrame = call noalias nonnull ptr @llvm.coro.begin(token %id, ptr %alloc)
+
+  %save = call token @llvm.coro.save(ptr null)
+  call fastcc void @fakeresume1(ptr null)
+
+  %suspend = call i8 @llvm.coro.suspend(token %save, i1 false)
+  switch i8 %suspend, label %exit [
+    i8 0, label %await.ready
+    i8 1, label %exit
+  ]
+await.ready:
+  %save2 = call token @llvm.coro.save(ptr null)
+  call fastcc void @fakeresume2(ptr align 8 null)
+
+  %suspend2 = call i8 @llvm.coro.suspend(token %save2, i1 false)
+  switch i8 %suspend2, label %exit [
+    i8 0, label %exit
+    i8 1, label %exit
+  ]
+exit:
+  call i1 @llvm.coro.end(ptr null, i1 false, token none)
+  ret void
+}
+
+; Verify that in the initial function resume is not marked with musttail.
+; CHECK-LABEL: @g(
+; CHECK-NOT: musttail call fastcc void @fakeresume1(ptr null)
+
+; Verify that in the resume part resume call is marked with musttail.
+; CHECK-LABEL: @g.resume(
+; CHECK: musttail call fastcc void @fakeresume2(ptr align 8 null)
+; CHECK-NEXT: ret void
+
+declare token @llvm.coro.id(i32, ptr readnone, ptr nocapture readonly, ptr) #1
+declare i1 @llvm.coro.alloc(token) #2
+declare i64 @llvm.coro.size.i64() #3
+declare ptr @llvm.coro.begin(token, ptr writeonly) #2
+declare token @llvm.coro.save(ptr) #2
+declare ptr @llvm.coro.frame() #3
+declare i8 @llvm.coro.suspend(token, i1) #2
+declare ptr @llvm.coro.free(token, ptr nocapture readonly) #1
+declare i1 @llvm.coro.end(ptr, i1, token) #2
+declare ptr @llvm.coro.subfn.addr(ptr nocapture readonly, i8) #1
+declare ptr @malloc(i64)
+
+attributes #0 = { presplitcoroutine }
+attributes #1 = { argmemonly nounwind readonly }
+attributes #2 = { nounwind }
+attributes #3 = { nounwind readnone }

diff  --git a/llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail3.ll b/llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail3.ll
new file mode 100644
index 000000000000000..82176b8085e6c7b
--- /dev/null
+++ b/llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail3.ll
@@ -0,0 +1,91 @@
+; Tests that instrumentation doesn't interfere with lowering (coro-split).
+; It should convert coro.resume followed by a suspend to a musttail call.
+
+; RUN: opt < %s -passes='pgo-instr-gen,cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
+
+define void @f() #0 {
+entry:
+  %id = call token @llvm.coro.id(i32 0, ptr null, ptr null, ptr null)
+  %alloc = call ptr @malloc(i64 16) #3
+  %vFrame = call noalias nonnull ptr @llvm.coro.begin(token %id, ptr %alloc)
+
+  %save = call token @llvm.coro.save(ptr null)
+  %addr1 = call ptr @llvm.coro.subfn.addr(ptr null, i8 0)
+  call fastcc void %addr1(ptr null)
+
+  %suspend = call i8 @llvm.coro.suspend(token %save, i1 false)
+  %cmp = icmp eq i8 %suspend, 0
+  br i1 %cmp, label %await.suspend, label %exit
+await.suspend:
+  %save2 = call token @llvm.coro.save(ptr null)
+  %br0 = call i8 @switch_result()
+  switch i8 %br0, label %unreach [
+    i8 0, label %await.resume3
+    i8 1, label %await.resume1
+    i8 2, label %await.resume2
+  ]
+await.resume1:
+  %hdl = call ptr @g()
+  %addr2 = call ptr @llvm.coro.subfn.addr(ptr %hdl, i8 0)
+  call fastcc void %addr2(ptr %hdl)
+  br label %final.suspend
+await.resume2:
+  %hdl2 = call ptr @h()
+  %addr3 = call ptr @llvm.coro.subfn.addr(ptr %hdl2, i8 0)
+  call fastcc void %addr3(ptr %hdl2)
+  br label %final.suspend
+await.resume3:
+  %addr4 = call ptr @llvm.coro.subfn.addr(ptr null, i8 0)
+  call fastcc void %addr4(ptr null)
+  br label %final.suspend
+final.suspend:
+  %suspend2 = call i8 @llvm.coro.suspend(token %save2, i1 false)
+  %cmp2 = icmp eq i8 %suspend2, 0
+  br i1 %cmp2, label %pre.exit, label %exit
+pre.exit:
+  br label %exit
+exit:
+  call i1 @llvm.coro.end(ptr null, i1 false, token none)
+  ret void
+unreach:
+  unreachable
+}
+
+; Verify that in the initial function resume is not marked with musttail.
+; CHECK-LABEL: @f(
+; CHECK: %[[addr1:.+]] = call ptr @llvm.coro.subfn.addr(ptr null, i8 0)
+; CHECK-NOT: musttail call fastcc void %[[addr1]](ptr null)
+
+; Verify that in the resume part resume call is marked with musttail.
+; CHECK-LABEL: @f.resume(
+; CHECK: %[[hdl:.+]] = call ptr @g()
+; CHECK-NEXT: %[[addr2:.+]] = call ptr @llvm.coro.subfn.addr(ptr %[[hdl]], i8 0)
+; CHECK: musttail call fastcc void %[[addr2]](ptr %[[hdl]])
+; CHECK-NEXT: ret void
+; CHECK: %[[hdl2:.+]] = call ptr @h()
+; CHECK-NEXT: %[[addr3:.+]] = call ptr @llvm.coro.subfn.addr(ptr %[[hdl2]], i8 0)
+; CHECK: musttail call fastcc void %[[addr3]](ptr %[[hdl2]])
+; CHECK-NEXT: ret void
+; CHECK: %[[addr4:.+]] = call ptr @llvm.coro.subfn.addr(ptr null, i8 0)
+; CHECK: musttail call fastcc void %[[addr4]](ptr null)
+; CHECK-NEXT: ret void
+
+declare token @llvm.coro.id(i32, ptr readnone, ptr nocapture readonly, ptr) #1
+declare i1 @llvm.coro.alloc(token) #2
+declare i64 @llvm.coro.size.i64() #3
+declare ptr @llvm.coro.begin(token, ptr writeonly) #2
+declare token @llvm.coro.save(ptr) #2
+declare ptr @llvm.coro.frame() #3
+declare i8 @llvm.coro.suspend(token, i1) #2
+declare ptr @llvm.coro.free(token, ptr nocapture readonly) #1
+declare i1 @llvm.coro.end(ptr, i1, token) #2
+declare ptr @llvm.coro.subfn.addr(ptr nocapture readonly, i8) #1
+declare ptr @malloc(i64)
+declare i8 @switch_result()
+declare ptr @g()
+declare ptr @h()
+
+attributes #0 = { presplitcoroutine }
+attributes #1 = { argmemonly nounwind readonly }
+attributes #2 = { nounwind }
+attributes #3 = { nounwind readnone }

diff  --git a/llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail4.ll b/llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail4.ll
new file mode 100644
index 000000000000000..be70fc4b51f1db4
--- /dev/null
+++ b/llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail4.ll
@@ -0,0 +1,66 @@
+; Tests that instrumentation doesn't interfere with lowering (coro-split).
+; It should convert coro.resume followed by a suspend to a musttail call.
+
+; RUN: opt < %s -passes='pgo-instr-gen,cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
+
+define void @fakeresume1(ptr)  {
+entry:
+  ret void;
+}
+
+define void @f() #0 {
+entry:
+  %id = call token @llvm.coro.id(i32 0, ptr null, ptr null, ptr null)
+  %alloc = call ptr @malloc(i64 16) #3
+  %vFrame = call noalias nonnull ptr @llvm.coro.begin(token %id, ptr %alloc)
+
+  %save = call token @llvm.coro.save(ptr null)
+
+  %init_suspend = call i8 @llvm.coro.suspend(token %save, i1 false)
+  switch i8 %init_suspend, label %coro.end [
+    i8 0, label %await.ready
+    i8 1, label %coro.end
+  ]
+await.ready:
+  %save2 = call token @llvm.coro.save(ptr null)
+
+  call fastcc void @fakeresume1(ptr align 8 null)
+  %suspend = call i8 @llvm.coro.suspend(token %save2, i1 true)
+  %switch = icmp ult i8 %suspend, 2
+  br i1 %switch, label %cleanup, label %coro.end
+
+cleanup:
+  %free.handle = call ptr @llvm.coro.free(token %id, ptr %vFrame)
+  %.not = icmp eq ptr %free.handle, null
+  br i1 %.not, label %coro.end, label %coro.free
+
+coro.free:
+  call void @delete(ptr nonnull %free.handle) #2
+  br label %coro.end
+
+coro.end:
+  call i1 @llvm.coro.end(ptr null, i1 false, token none)
+  ret void
+}
+
+; CHECK-LABEL: @f.resume(
+; CHECK:          musttail call fastcc void @fakeresume1(
+; CHECK-NEXT:     ret void
+
+declare token @llvm.coro.id(i32, ptr readnone, ptr nocapture readonly, ptr) #1
+declare i1 @llvm.coro.alloc(token) #2
+declare i64 @llvm.coro.size.i64() #3
+declare ptr @llvm.coro.begin(token, ptr writeonly) #2
+declare token @llvm.coro.save(ptr) #2
+declare ptr @llvm.coro.frame() #3
+declare i8 @llvm.coro.suspend(token, i1) #2
+declare ptr @llvm.coro.free(token, ptr nocapture readonly) #1
+declare i1 @llvm.coro.end(ptr, i1, token) #2
+declare ptr @llvm.coro.subfn.addr(ptr nocapture readonly, i8) #1
+declare ptr @malloc(i64)
+declare void @delete(ptr nonnull) #2
+
+attributes #0 = { presplitcoroutine }
+attributes #1 = { argmemonly nounwind readonly }
+attributes #2 = { nounwind }
+attributes #3 = { nounwind readnone }

diff  --git a/llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail5.ll b/llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail5.ll
new file mode 100644
index 000000000000000..3e5bddd8e13112d
--- /dev/null
+++ b/llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail5.ll
@@ -0,0 +1,63 @@
+; Tests that instrumentation doesn't interfere with lowering (coro-split).
+; It should convert coro.resume followed by a suspend to a musttail call.
+
+; RUN: opt < %s -passes='pgo-instr-gen,cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
+
+declare void @fakeresume1(ptr align 8)
+
+define void @g() #0 {
+entry:
+  %id = call token @llvm.coro.id(i32 0, ptr null, ptr null, ptr null)
+  %alloc = call ptr @malloc(i64 16) #3
+  %alloc.var = alloca i8
+  call void @llvm.lifetime.start.p0(i64 1, ptr %alloc.var)
+  %vFrame = call noalias nonnull ptr @llvm.coro.begin(token %id, ptr %alloc)
+
+  %save = call token @llvm.coro.save(ptr null)
+  %suspend = call i8 @llvm.coro.suspend(token %save, i1 false)
+
+  switch i8 %suspend, label %exit [
+    i8 0, label %await.suspend
+    i8 1, label %exit
+  ]
+await.suspend:
+  %save2 = call token @llvm.coro.save(ptr null)
+  call fastcc void @fakeresume1(ptr align 8 null)
+  %suspend2 = call i8 @llvm.coro.suspend(token %save2, i1 false)
+  switch i8 %suspend2, label %exit [
+    i8 0, label %await.ready
+    i8 1, label %exit
+  ]
+await.ready:
+  call void @consume(ptr %alloc.var)
+  call void @llvm.lifetime.end.p0(i64 1, ptr %alloc.var)
+  br label %exit
+exit:
+  call i1 @llvm.coro.end(ptr null, i1 false, token none)
+  ret void
+}
+
+; Verify that in the resume part resume call is marked with musttail.
+; CHECK-LABEL: @g.resume(
+; CHECK:          musttail call fastcc void @fakeresume1(ptr align 8 null)
+; CHECK-NEXT:     ret void
+
+declare token @llvm.coro.id(i32, ptr readnone, ptr nocapture readonly, ptr) #1
+declare i1 @llvm.coro.alloc(token) #2
+declare i64 @llvm.coro.size.i64() #3
+declare ptr @llvm.coro.begin(token, ptr writeonly) #2
+declare token @llvm.coro.save(ptr) #2
+declare ptr @llvm.coro.frame() #3
+declare i8 @llvm.coro.suspend(token, i1) #2
+declare ptr @llvm.coro.free(token, ptr nocapture readonly) #1
+declare i1 @llvm.coro.end(ptr, i1, token) #2
+declare ptr @llvm.coro.subfn.addr(ptr nocapture readonly, i8) #1
+declare ptr @malloc(i64)
+declare void @consume(ptr)
+declare void @llvm.lifetime.start.p0(i64, ptr nocapture)
+declare void @llvm.lifetime.end.p0(i64, ptr nocapture)
+
+attributes #0 = { presplitcoroutine }
+attributes #1 = { argmemonly nounwind readonly }
+attributes #2 = { nounwind }
+attributes #3 = { nounwind readnone }

diff  --git a/llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail6.ll b/llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail6.ll
new file mode 100644
index 000000000000000..5d068872fcace07
--- /dev/null
+++ b/llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail6.ll
@@ -0,0 +1,113 @@
+; Tests that instrumentation doesn't interfere with lowering (coro-split).
+; It should convert coro.resume followed by a suspend to a musttail call.
+
+; RUN: opt < %s -passes='pgo-instr-gen,cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
+
+declare void @fakeresume1(ptr align 8)
+
+define void @g() #0 {
+entry:
+  %id = call token @llvm.coro.id(i32 0, ptr null, ptr null, ptr null)
+  %alloc = call ptr @malloc(i64 16) #3
+  %alloc.var = alloca i64
+  call void @llvm.lifetime.start.p0(i64 1, ptr %alloc.var)
+  %vFrame = call noalias nonnull ptr @llvm.coro.begin(token %id, ptr %alloc)
+
+  %save = call token @llvm.coro.save(ptr null)
+  %suspend = call i8 @llvm.coro.suspend(token %save, i1 false)
+
+  switch i8 %suspend, label %exit [
+    i8 0, label %await.suspend
+    i8 1, label %exit
+  ]
+await.suspend:
+  %save2 = call token @llvm.coro.save(ptr null)
+  call fastcc void @fakeresume1(ptr align 8 null)
+  %suspend2 = call i8 @llvm.coro.suspend(token %save2, i1 false)
+  switch i8 %suspend2, label %exit [
+    i8 0, label %await.ready
+    i8 1, label %exit
+  ]
+await.ready:
+  call void @consume(ptr %alloc.var)
+  call void @llvm.lifetime.end.p0(i64 1, ptr %alloc.var)
+  br label %exit
+exit:
+  call i1 @llvm.coro.end(ptr null, i1 false, token none)
+  ret void
+}
+
+; Verify that in the resume part resume call is marked with musttail.
+; CHECK-LABEL: @g.resume(
+; CHECK:      musttail call fastcc void @fakeresume1(ptr align 8 null)
+; CHECK-NEXT: ret void
+
+; It has a cleanup bb.
+define void @f() #0 {
+entry:
+  %id = call token @llvm.coro.id(i32 0, ptr null, ptr null, ptr null)
+  %alloc = call ptr @malloc(i64 16) #3
+  %alloc.var = alloca i64
+  call void @llvm.lifetime.start.p0(i64 1, ptr %alloc.var)
+  %vFrame = call noalias nonnull ptr @llvm.coro.begin(token %id, ptr %alloc)
+
+  %save = call token @llvm.coro.save(ptr null)
+  %suspend = call i8 @llvm.coro.suspend(token %save, i1 false)
+
+  switch i8 %suspend, label %exit [
+    i8 0, label %await.suspend
+    i8 1, label %exit
+  ]
+await.suspend:
+  %save2 = call token @llvm.coro.save(ptr null)
+  call fastcc void @fakeresume1(ptr align 8 null)
+  %suspend2 = call i8 @llvm.coro.suspend(token %save2, i1 false)
+  switch i8 %suspend2, label %exit [
+    i8 0, label %await.ready
+    i8 1, label %cleanup
+  ]
+await.ready:
+  call void @consume(ptr %alloc.var)
+  call void @llvm.lifetime.end.p0(i64 1, ptr %alloc.var)
+  br label %exit
+
+cleanup:
+  %free.handle = call ptr @llvm.coro.free(token %id, ptr %vFrame)
+  %.not = icmp eq ptr %free.handle, null
+  br i1 %.not, label %exit, label %coro.free
+
+coro.free:
+  call void @delete(ptr nonnull %free.handle) #2
+  br label %exit
+
+exit:
+  call i1 @llvm.coro.end(ptr null, i1 false, token none)
+  ret void
+}
+
+; FIXME: The fakeresume1 here should be marked as musttail.
+; Verify that in the resume part resume call is marked with musttail.
+; CHECK-LABEL: @f.resume(
+; CHECK:      musttail call fastcc void @fakeresume1(ptr align 8 null)
+; CHECK-NEXT: ret void
+
+declare token @llvm.coro.id(i32, ptr readnone, ptr nocapture readonly, ptr) #1
+declare i1 @llvm.coro.alloc(token) #2
+declare i64 @llvm.coro.size.i64() #3
+declare ptr @llvm.coro.begin(token, ptr writeonly) #2
+declare token @llvm.coro.save(ptr) #2
+declare ptr @llvm.coro.frame() #3
+declare i8 @llvm.coro.suspend(token, i1) #2
+declare ptr @llvm.coro.free(token, ptr nocapture readonly) #1
+declare i1 @llvm.coro.end(ptr, i1, token) #2
+declare ptr @llvm.coro.subfn.addr(ptr nocapture readonly, i8) #1
+declare ptr @malloc(i64)
+declare void @delete(ptr nonnull) #2
+declare void @consume(ptr)
+declare void @llvm.lifetime.start.p0(i64, ptr nocapture)
+declare void @llvm.lifetime.end.p0(i64, ptr nocapture)
+
+attributes #0 = { presplitcoroutine }
+attributes #1 = { argmemonly nounwind readonly }
+attributes #2 = { nounwind }
+attributes #3 = { nounwind readnone }

diff  --git a/llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail7.ll b/llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail7.ll
new file mode 100644
index 000000000000000..6ea81c6ff0b0961
--- /dev/null
+++ b/llvm/test/Instrumentation/InstrProfiling/Coro/coro-split-musttail7.ll
@@ -0,0 +1,116 @@
+; Tests that instrumentation doesn't interfere with lowering (coro-split).
+; It should convert coro.resume followed by a suspend to a musttail call.
+
+; The 
diff erence between this and coro-split-musttail5.ll and coro-split-musttail5.ll
+; is that this contains dead instruction generated during the transformation,
+; which makes the optimization harder.
+; RUN: opt < %s -passes='pgo-instr-gen,cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
+
+declare void @fakeresume1(ptr align 8)
+
+define void @g() #0 {
+entry:
+  %id = call token @llvm.coro.id(i32 0, ptr null, ptr null, ptr null)
+  %alloc = call ptr @malloc(i64 16) #3
+  %alloc.var = alloca i64
+  call void @llvm.lifetime.start.p0(i64 1, ptr %alloc.var)
+  %vFrame = call noalias nonnull ptr @llvm.coro.begin(token %id, ptr %alloc)
+
+  %save = call token @llvm.coro.save(ptr null)
+  %suspend = call i8 @llvm.coro.suspend(token %save, i1 false)
+
+  switch i8 %suspend, label %exit [
+    i8 0, label %await.suspend
+    i8 1, label %exit
+  ]
+await.suspend:
+  %save2 = call token @llvm.coro.save(ptr null)
+  call fastcc void @fakeresume1(ptr align 8 null)
+  %suspend2 = call i8 @llvm.coro.suspend(token %save2, i1 false)
+  switch i8 %suspend2, label %exit [
+    i8 0, label %await.ready
+    i8 1, label %exit
+  ]
+await.ready:
+  call void @consume(ptr %alloc.var)
+  call void @llvm.lifetime.end.p0(i64 1, ptr %alloc.var)
+  br label %exit
+exit:
+  call i1 @llvm.coro.end(ptr null, i1 false, token none)
+  ret void
+}
+
+; Verify that in the resume part resume call is marked with musttail.
+; CHECK-LABEL: @g.resume(
+; CHECK:         musttail call fastcc void @fakeresume1(ptr align 8 null)
+; CHECK-NEXT:    ret void
+
+; It has a cleanup bb.
+define void @f() #0 {
+entry:
+  %id = call token @llvm.coro.id(i32 0, ptr null, ptr null, ptr null)
+  %alloc = call ptr @malloc(i64 16) #3
+  %alloc.var = alloca i64
+  call void @llvm.lifetime.start.p0(i64 1, ptr %alloc.var)
+  %vFrame = call noalias nonnull ptr @llvm.coro.begin(token %id, ptr %alloc)
+
+  %save = call token @llvm.coro.save(ptr null)
+  %suspend = call i8 @llvm.coro.suspend(token %save, i1 false)
+
+  switch i8 %suspend, label %exit [
+    i8 0, label %await.suspend
+    i8 1, label %exit
+  ]
+await.suspend:
+  %save2 = call token @llvm.coro.save(ptr null)
+  call fastcc void @fakeresume1(ptr align 8 null)
+  %suspend2 = call i8 @llvm.coro.suspend(token %save2, i1 false)
+  switch i8 %suspend2, label %exit [
+    i8 0, label %await.ready
+    i8 1, label %cleanup
+  ]
+await.ready:
+  call void @consume(ptr %alloc.var)
+  call void @llvm.lifetime.end.p0(i64 1, ptr %alloc.var)
+  br label %exit
+
+cleanup:
+  %free.handle = call ptr @llvm.coro.free(token %id, ptr %vFrame)
+  %.not = icmp eq ptr %free.handle, null
+  br i1 %.not, label %exit, label %coro.free
+
+coro.free:
+  call void @delete(ptr nonnull %free.handle) #2
+  br label %exit
+
+exit:
+  call i1 @llvm.coro.end(ptr null, i1 false, token none)
+  ret void
+}
+
+; FIXME: The fakeresume1 here should be marked as musttail.
+; Verify that in the resume part resume call is marked with musttail.
+; CHECK-LABEL: @f.resume(
+; CHECK:         musttail call fastcc void @fakeresume1(ptr align 8 null)
+; CHECK-NEXT:    ret void
+
+declare token @llvm.coro.id(i32, ptr readnone, ptr nocapture readonly, ptr) #1
+declare i1 @llvm.coro.alloc(token) #2
+declare i64 @llvm.coro.size.i64() #3
+declare ptr @llvm.coro.begin(token, ptr writeonly) #2
+declare token @llvm.coro.save(ptr) #2
+declare ptr @llvm.coro.frame() #3
+declare i8 @llvm.coro.suspend(token, i1) #2
+declare ptr @llvm.coro.free(token, ptr nocapture readonly) #1
+declare i1 @llvm.coro.end(ptr, i1, token) #2
+declare ptr @llvm.coro.subfn.addr(ptr nocapture readonly, i8) #1
+declare ptr @malloc(i64)
+declare void @delete(ptr nonnull) #2
+declare void @consume(ptr)
+declare void @llvm.lifetime.start.p0(i64, ptr nocapture)
+declare void @llvm.lifetime.end.p0(i64, ptr nocapture)
+
+attributes #0 = { presplitcoroutine }
+attributes #1 = { argmemonly nounwind readonly }
+attributes #2 = { nounwind }
+attributes #3 = { nounwind readnone }

diff  --git a/llvm/test/Transforms/Coroutines/coro-split-musttail.ll b/llvm/test/Transforms/Coroutines/coro-split-musttail.ll
index 0406135687904bf..825e44471db27ae 100644
--- a/llvm/test/Transforms/Coroutines/coro-split-musttail.ll
+++ b/llvm/test/Transforms/Coroutines/coro-split-musttail.ll
@@ -1,6 +1,7 @@
 ; Tests that coro-split will convert coro.resume followed by a suspend to a
 ; musttail call.
-; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
+; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck --check-prefixes=CHECK,NOPGO %s
+; RUN: opt < %s -passes='pgo-instr-gen,cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck --check-prefixes=CHECK,PGO %s
 
 define void @f() #0 {
 entry:
@@ -40,7 +41,9 @@ exit:
 ; Verify that in the resume part resume call is marked with musttail.
 ; CHECK-LABEL: @f.resume(
 ; CHECK: %[[addr2:.+]] = call ptr @llvm.coro.subfn.addr(ptr null, i8 0)
-; CHECK-NEXT: musttail call fastcc void %[[addr2]](ptr null)
+; NOPGO-NEXT: musttail call fastcc void %[[addr2]](ptr null)
+; PGO: call void @llvm.instrprof
+; PGO-NEXT: musttail call fastcc void %[[addr2]](ptr null)
 ; CHECK-NEXT: ret void
 
 declare token @llvm.coro.id(i32, ptr readnone, ptr nocapture readonly, ptr) #1

diff  --git a/llvm/test/Transforms/Coroutines/coro-split-musttail1.ll b/llvm/test/Transforms/Coroutines/coro-split-musttail1.ll
index cd1635b93d2cc24..d0d11fc4495e480 100644
--- a/llvm/test/Transforms/Coroutines/coro-split-musttail1.ll
+++ b/llvm/test/Transforms/Coroutines/coro-split-musttail1.ll
@@ -1,6 +1,7 @@
 ; Tests that coro-split will convert coro.resume followed by a suspend to a
 ; musttail call.
-; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
+; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck --check-prefixes=CHECK,NOPGO %s
+; RUN: opt < %s -passes='pgo-instr-gen,cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck --check-prefixes=CHECK,PGO %s
 
 define void @f() #0 {
 entry:
@@ -63,14 +64,17 @@ unreach:
 ; CHECK-LABEL: @f.resume(
 ; CHECK: %[[hdl:.+]] = call ptr @g()
 ; CHECK-NEXT: %[[addr2:.+]] = call ptr @llvm.coro.subfn.addr(ptr %[[hdl]], i8 0)
-; CHECK-NEXT: musttail call fastcc void %[[addr2]](ptr %[[hdl]])
+; NOPGO-NEXT: musttail call fastcc void %[[addr2]](ptr %[[hdl]])
+; PGO: musttail call fastcc void %[[addr2]](ptr %[[hdl]])
 ; CHECK-NEXT: ret void
 ; CHECK: %[[hdl2:.+]] = call ptr @h()
 ; CHECK-NEXT: %[[addr3:.+]] = call ptr @llvm.coro.subfn.addr(ptr %[[hdl2]], i8 0)
-; CHECK-NEXT: musttail call fastcc void %[[addr3]](ptr %[[hdl2]])
+; NOPGO-NEXT: musttail call fastcc void %[[addr3]](ptr %[[hdl2]])
+; PGO: musttail call fastcc void %[[addr3]](ptr %[[hdl2]])
 ; CHECK-NEXT: ret void
 ; CHECK: %[[addr4:.+]] = call ptr @llvm.coro.subfn.addr(ptr null, i8 0)
-; CHECK-NEXT: musttail call fastcc void %[[addr4]](ptr null)
+; NOPGO-NEXT: musttail call fastcc void %[[addr4]](ptr null)
+; PGO: musttail call fastcc void %[[addr4]](ptr null)
 ; CHECK-NEXT: ret void
 
 

diff  --git a/llvm/test/Transforms/Coroutines/coro-split-musttail10.ll b/llvm/test/Transforms/Coroutines/coro-split-musttail10.ll
index 9d73c8bbc57b81a..cdd58b2a084fcd8 100644
--- a/llvm/test/Transforms/Coroutines/coro-split-musttail10.ll
+++ b/llvm/test/Transforms/Coroutines/coro-split-musttail10.ll
@@ -1,6 +1,7 @@
 ; Tests that we would convert coro.resume to a musttail call if the target is
 ; Wasm64 with tail-call support.
 ; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
+; RUN: opt < %s -passes='pgo-instr-gen,cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
 
 target triple = "wasm64-unknown-unknown"
 

diff  --git a/llvm/test/Transforms/Coroutines/coro-split-musttail11.ll b/llvm/test/Transforms/Coroutines/coro-split-musttail11.ll
index 9bc5b4f0c65d91e..da5d868280e9671 100644
--- a/llvm/test/Transforms/Coroutines/coro-split-musttail11.ll
+++ b/llvm/test/Transforms/Coroutines/coro-split-musttail11.ll
@@ -1,6 +1,7 @@
 ; Tests that we would convert coro.resume to a musttail call if the target is
 ; Wasm32 with tail-call support.
 ; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
+; RUN: opt < %s -passes='pgo-instr-gen,cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
 
 target triple = "wasm32-unknown-unknown"
 

diff  --git a/llvm/test/Transforms/Coroutines/coro-split-musttail12.ll b/llvm/test/Transforms/Coroutines/coro-split-musttail12.ll
index e7f4bcb9b0ff29a..5baec378876bb1e 100644
--- a/llvm/test/Transforms/Coroutines/coro-split-musttail12.ll
+++ b/llvm/test/Transforms/Coroutines/coro-split-musttail12.ll
@@ -1,5 +1,6 @@
 ; Tests that coro-split won't convert the cmp instruction prematurely.
 ; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
+; RUN: opt < %s -passes='pgo-instr-gen,cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
 
 declare void @fakeresume1(ptr)
 declare void @print()

diff  --git a/llvm/test/Transforms/Coroutines/coro-split-musttail13.ll b/llvm/test/Transforms/Coroutines/coro-split-musttail13.ll
index 2384f9382685bd0..0290e42339e2ad4 100644
--- a/llvm/test/Transforms/Coroutines/coro-split-musttail13.ll
+++ b/llvm/test/Transforms/Coroutines/coro-split-musttail13.ll
@@ -1,5 +1,6 @@
 ; Tests that coro-split won't fall in infinite loop when simplify the terminators leading to ret.
 ; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
+; RUN: opt < %s -passes='pgo-instr-gen,cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
 
 declare void @fakeresume1(ptr)
 declare void @may_throw(ptr)

diff  --git a/llvm/test/Transforms/Coroutines/coro-split-musttail2.ll b/llvm/test/Transforms/Coroutines/coro-split-musttail2.ll
index 38fc12815c033e7..2f27f79480ab1b4 100644
--- a/llvm/test/Transforms/Coroutines/coro-split-musttail2.ll
+++ b/llvm/test/Transforms/Coroutines/coro-split-musttail2.ll
@@ -1,6 +1,7 @@
 ; Tests that coro-split will convert coro.resume followed by a suspend to a
 ; musttail call.
 ; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
+; RUN: opt < %s -passes='pgo-instr-gen,cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
 
 define void @fakeresume1(ptr)  {
 entry:

diff  --git a/llvm/test/Transforms/Coroutines/coro-split-musttail3.ll b/llvm/test/Transforms/Coroutines/coro-split-musttail3.ll
index b777f000e33a6d3..4778e3dcaf9957b 100644
--- a/llvm/test/Transforms/Coroutines/coro-split-musttail3.ll
+++ b/llvm/test/Transforms/Coroutines/coro-split-musttail3.ll
@@ -1,6 +1,7 @@
 ; Tests that coro-split will convert coro.resume followed by a suspend to a
 ; musttail call.
-; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
+; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck --check-prefixes=CHECK,NOPGO %s
+; RUN: opt < %s -passes='pgo-instr-gen,cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck --check-prefixes=CHECK,PGO %s
 
 define void @f() #0 {
 entry:
@@ -59,14 +60,17 @@ unreach:
 ; CHECK-LABEL: @f.resume(
 ; CHECK: %[[hdl:.+]] = call ptr @g()
 ; CHECK-NEXT: %[[addr2:.+]] = call ptr @llvm.coro.subfn.addr(ptr %[[hdl]], i8 0)
-; CHECK-NEXT: musttail call fastcc void %[[addr2]](ptr %[[hdl]])
+; NOPGO-NEXT: musttail call fastcc void %[[addr2]](ptr %[[hdl]])
+; PGO: musttail call fastcc void %[[addr2]](ptr %[[hdl]])
 ; CHECK-NEXT: ret void
 ; CHECK: %[[hdl2:.+]] = call ptr @h()
 ; CHECK-NEXT: %[[addr3:.+]] = call ptr @llvm.coro.subfn.addr(ptr %[[hdl2]], i8 0)
-; CHECK-NEXT: musttail call fastcc void %[[addr3]](ptr %[[hdl2]])
+; NOPGO-NEXT: musttail call fastcc void %[[addr3]](ptr %[[hdl2]])
+; PGO: musttail call fastcc void %[[addr3]](ptr %[[hdl2]])
 ; CHECK-NEXT: ret void
 ; CHECK: %[[addr4:.+]] = call ptr @llvm.coro.subfn.addr(ptr null, i8 0)
-; CHECK-NEXT: musttail call fastcc void %[[addr4]](ptr null)
+; NOPGO-NEXT: musttail call fastcc void %[[addr4]](ptr null)
+; PGO: musttail call fastcc void %[[addr4]](ptr null)
 ; CHECK-NEXT: ret void
 
 

diff  --git a/llvm/test/Transforms/Coroutines/coro-split-musttail4.ll b/llvm/test/Transforms/Coroutines/coro-split-musttail4.ll
index 1e0fcdb87a72d30..00ee422ce5863df 100644
--- a/llvm/test/Transforms/Coroutines/coro-split-musttail4.ll
+++ b/llvm/test/Transforms/Coroutines/coro-split-musttail4.ll
@@ -1,6 +1,7 @@
 ; Tests that coro-split will convert a call before coro.suspend to a musttail call
 ; while the user of the coro.suspend is a icmpinst.
 ; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
+; RUN: opt < %s -passes='pgo-instr-gen,cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
 
 define void @fakeresume1(ptr)  {
 entry:

diff  --git a/llvm/test/Transforms/Coroutines/coro-split-musttail5.ll b/llvm/test/Transforms/Coroutines/coro-split-musttail5.ll
index d19606491335e50..9afc79abbe88cd8 100644
--- a/llvm/test/Transforms/Coroutines/coro-split-musttail5.ll
+++ b/llvm/test/Transforms/Coroutines/coro-split-musttail5.ll
@@ -1,6 +1,7 @@
 ; Tests that sinked lifetime markers wouldn't provent optimization
 ; to convert a resuming call to a musttail call.
 ; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
+; RUN: opt < %s -passes='pgo-instr-gen,cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
 
 declare void @fakeresume1(ptr align 8)
 

diff  --git a/llvm/test/Transforms/Coroutines/coro-split-musttail6.ll b/llvm/test/Transforms/Coroutines/coro-split-musttail6.ll
index eea711861c488c5..9c2b1ece1624bc9 100644
--- a/llvm/test/Transforms/Coroutines/coro-split-musttail6.ll
+++ b/llvm/test/Transforms/Coroutines/coro-split-musttail6.ll
@@ -4,6 +4,7 @@
 ; an extra bitcast instruction in the path, which makes it harder to
 ; optimize.
 ; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
+; RUN: opt < %s -passes='pgo-instr-gen,cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
 
 declare void @fakeresume1(ptr align 8)
 

diff  --git a/llvm/test/Transforms/Coroutines/coro-split-musttail7.ll b/llvm/test/Transforms/Coroutines/coro-split-musttail7.ll
index c32fe9b0ee304c2..860032bd3cf8e52 100644
--- a/llvm/test/Transforms/Coroutines/coro-split-musttail7.ll
+++ b/llvm/test/Transforms/Coroutines/coro-split-musttail7.ll
@@ -4,6 +4,7 @@
 ; is that this contains dead instruction generated during the transformation,
 ; which makes the optimization harder.
 ; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
+; RUN: opt < %s -passes='pgo-instr-gen,cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
 
 declare void @fakeresume1(ptr align 8)
 


        


More information about the llvm-commits mailing list