[llvm] [coro][pgo] Don't promote pgo counters in the suspend basic block (PR #71263)

Mircea Trofin via llvm-commits llvm-commits at lists.llvm.org
Fri Nov 3 18:53:12 PDT 2023


https://github.com/mtrofin updated https://github.com/llvm/llvm-project/pull/71263

>From 41ecae9e95412eebfc45d580e1eead398fa9ec4a Mon Sep 17 00:00:00 2001
From: Mircea Trofin <mtrofin at google.com>
Date: Fri, 3 Nov 2023 18:19:15 -0700
Subject: [PATCH 1/2] [coro][pgp] Do not insert counters in the `suspend` block

If we do, we can't lower the suspend call to a tail call. If this
happened in a loop, it can lead to stack overflow (this was encountered
in a benchmark, as an extreme case)

We can instrument the other 2 edges instead, as long as they also don't
point to the same basic block.
---
 .../llvm/Transforms/Instrumentation/CFGMST.h  | 67 +++++++++++++++----
 .../Coroutines/coro-split-musttail.ll         |  7 +-
 .../Coroutines/coro-split-musttail1.ll        | 12 ++--
 .../Coroutines/coro-split-musttail10.ll       |  1 +
 .../Coroutines/coro-split-musttail11.ll       |  1 +
 .../Coroutines/coro-split-musttail12.ll       |  1 +
 .../Coroutines/coro-split-musttail13.ll       |  1 +
 .../Coroutines/coro-split-musttail2.ll        |  1 +
 .../Coroutines/coro-split-musttail3.ll        | 12 ++--
 .../Coroutines/coro-split-musttail4.ll        |  1 +
 .../Coroutines/coro-split-musttail5.ll        |  1 +
 .../Coroutines/coro-split-musttail6.ll        |  1 +
 .../Coroutines/coro-split-musttail7.ll        |  1 +
 13 files changed, 84 insertions(+), 23 deletions(-)

diff --git a/llvm/include/llvm/Transforms/Instrumentation/CFGMST.h b/llvm/include/llvm/Transforms/Instrumentation/CFGMST.h
index 6ed8a6c6eaf0197..eddfbd8a8e45b7a 100644
--- a/llvm/include/llvm/Transforms/Instrumentation/CFGMST.h
+++ b/llvm/include/llvm/Transforms/Instrumentation/CFGMST.h
@@ -19,6 +19,8 @@
 #include "llvm/Analysis/BlockFrequencyInfo.h"
 #include "llvm/Analysis/BranchProbabilityInfo.h"
 #include "llvm/Analysis/CFG.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
 #include "llvm/Support/BranchProbability.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
@@ -121,31 +123,70 @@ template <class Edge, class BBInfo> class CFGMST {
 
     static const uint32_t CriticalEdgeMultiplier = 1000;
 
+    auto GetCoroSuspendSwitch =
+        [&](const Instruction *TI) -> const SwitchInst * {
+      if (!F.isPresplitCoroutine())
+        return nullptr;
+      if (auto *SWInst = dyn_cast<SwitchInst>(TI))
+        if (auto *Intrinsic = dyn_cast<IntrinsicInst>(SWInst->getCondition()))
+          if (Intrinsic->getIntrinsicID() == Intrinsic::coro_suspend)
+            return SWInst;
+      return nullptr;
+    };
+
     for (BasicBlock &BB : F) {
       Instruction *TI = BB.getTerminator();
+      const SwitchInst *CoroSuspendSwitch = GetCoroSuspendSwitch(TI);
       uint64_t BBWeight =
           (BFI != nullptr ? BFI->getBlockFreq(&BB).getFrequency() : 2);
       uint64_t Weight = 2;
       if (int successors = TI->getNumSuccessors()) {
         for (int i = 0; i != successors; ++i) {
           BasicBlock *TargetBB = TI->getSuccessor(i);
-          bool Critical = isCriticalEdge(TI, i);
-          uint64_t scaleFactor = BBWeight;
-          if (Critical) {
-            if (scaleFactor < UINT64_MAX / CriticalEdgeMultiplier)
-              scaleFactor *= CriticalEdgeMultiplier;
-            else
-              scaleFactor = UINT64_MAX;
+          const bool Critical = isCriticalEdge(TI, i);
+          const bool IsCoroSuspendTarget =
+              CoroSuspendSwitch &&
+              CoroSuspendSwitch->getDefaultDest() == TargetBB;
+          // We must not add instrumentation to the BB representing the
+          // "suspend" path, else CoroSplit won't be able to lower
+          // llvm.coro.suspend to a tail call. We do want profiling info for
+          // the other branches (resume/destroy). So we do 2 things:
+          // 1. we prefer instrumenting those other edges by setting the weight
+          //    of the "suspend" edge to max, and
+          // 2. we mark the edge as "Removed" to guarantee it is not considered
+          //    for instrumentation. That could technically happen:
+          //    (from test/Transforms/Coroutines/coro-split-musttail.ll)
+          //
+          // %suspend = call i8 @llvm.coro.suspend(token %save, i1 false)
+          // switch i8 %suspend, label %exit [
+          //   i8 0, label %await.ready
+          //   i8 1, label %exit
+          // ]
+          if (IsCoroSuspendTarget) {
+            Weight = UINT64_MAX;
+          } else {
+            bool Critical = isCriticalEdge(TI, i);
+            uint64_t scaleFactor = BBWeight;
+            if (Critical) {
+              if (scaleFactor < UINT64_MAX / CriticalEdgeMultiplier)
+                scaleFactor *= CriticalEdgeMultiplier;
+              else
+                scaleFactor = UINT64_MAX;
+            }
+            if (BPI != nullptr)
+              Weight =
+                  BPI->getEdgeProbability(&BB, TargetBB).scale(scaleFactor);
+            if (Weight == 0)
+              Weight++;
           }
-          if (BPI != nullptr)
-            Weight = BPI->getEdgeProbability(&BB, TargetBB).scale(scaleFactor);
-          if (Weight == 0)
-            Weight++;
           auto *E = &addEdge(&BB, TargetBB, Weight);
           E->IsCritical = Critical;
+          // See comment above - we must guarantee the coro suspend BB isn't
+          // instrumented.
+          if (IsCoroSuspendTarget)
+            E->Removed = true;
           LLVM_DEBUG(dbgs() << "  Edge: from " << BB.getName() << " to "
-                            << TargetBB->getName() << "  w=" << Weight << "\n");
-
+                          << TargetBB->getName() << "  w=" << Weight << "\n");          
           // Keep track of entry/exit edges:
           if (&BB == Entry) {
             if (Weight > MaxEntryOutWeight) {
diff --git a/llvm/test/Transforms/Coroutines/coro-split-musttail.ll b/llvm/test/Transforms/Coroutines/coro-split-musttail.ll
index 0406135687904bf..825e44471db27ae 100644
--- a/llvm/test/Transforms/Coroutines/coro-split-musttail.ll
+++ b/llvm/test/Transforms/Coroutines/coro-split-musttail.ll
@@ -1,6 +1,7 @@
 ; Tests that coro-split will convert coro.resume followed by a suspend to a
 ; musttail call.
-; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
+; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck --check-prefixes=CHECK,NOPGO %s
+; RUN: opt < %s -passes='pgo-instr-gen,cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck --check-prefixes=CHECK,PGO %s
 
 define void @f() #0 {
 entry:
@@ -40,7 +41,9 @@ exit:
 ; Verify that in the resume part resume call is marked with musttail.
 ; CHECK-LABEL: @f.resume(
 ; CHECK: %[[addr2:.+]] = call ptr @llvm.coro.subfn.addr(ptr null, i8 0)
-; CHECK-NEXT: musttail call fastcc void %[[addr2]](ptr null)
+; NOPGO-NEXT: musttail call fastcc void %[[addr2]](ptr null)
+; PGO: call void @llvm.instrprof
+; PGO-NEXT: musttail call fastcc void %[[addr2]](ptr null)
 ; CHECK-NEXT: ret void
 
 declare token @llvm.coro.id(i32, ptr readnone, ptr nocapture readonly, ptr) #1
diff --git a/llvm/test/Transforms/Coroutines/coro-split-musttail1.ll b/llvm/test/Transforms/Coroutines/coro-split-musttail1.ll
index cd1635b93d2cc24..d0d11fc4495e480 100644
--- a/llvm/test/Transforms/Coroutines/coro-split-musttail1.ll
+++ b/llvm/test/Transforms/Coroutines/coro-split-musttail1.ll
@@ -1,6 +1,7 @@
 ; Tests that coro-split will convert coro.resume followed by a suspend to a
 ; musttail call.
-; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
+; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck --check-prefixes=CHECK,NOPGO %s
+; RUN: opt < %s -passes='pgo-instr-gen,cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck --check-prefixes=CHECK,PGO %s
 
 define void @f() #0 {
 entry:
@@ -63,14 +64,17 @@ unreach:
 ; CHECK-LABEL: @f.resume(
 ; CHECK: %[[hdl:.+]] = call ptr @g()
 ; CHECK-NEXT: %[[addr2:.+]] = call ptr @llvm.coro.subfn.addr(ptr %[[hdl]], i8 0)
-; CHECK-NEXT: musttail call fastcc void %[[addr2]](ptr %[[hdl]])
+; NOPGO-NEXT: musttail call fastcc void %[[addr2]](ptr %[[hdl]])
+; PGO: musttail call fastcc void %[[addr2]](ptr %[[hdl]])
 ; CHECK-NEXT: ret void
 ; CHECK: %[[hdl2:.+]] = call ptr @h()
 ; CHECK-NEXT: %[[addr3:.+]] = call ptr @llvm.coro.subfn.addr(ptr %[[hdl2]], i8 0)
-; CHECK-NEXT: musttail call fastcc void %[[addr3]](ptr %[[hdl2]])
+; NOPGO-NEXT: musttail call fastcc void %[[addr3]](ptr %[[hdl2]])
+; PGO: musttail call fastcc void %[[addr3]](ptr %[[hdl2]])
 ; CHECK-NEXT: ret void
 ; CHECK: %[[addr4:.+]] = call ptr @llvm.coro.subfn.addr(ptr null, i8 0)
-; CHECK-NEXT: musttail call fastcc void %[[addr4]](ptr null)
+; NOPGO-NEXT: musttail call fastcc void %[[addr4]](ptr null)
+; PGO: musttail call fastcc void %[[addr4]](ptr null)
 ; CHECK-NEXT: ret void
 
 
diff --git a/llvm/test/Transforms/Coroutines/coro-split-musttail10.ll b/llvm/test/Transforms/Coroutines/coro-split-musttail10.ll
index 9d73c8bbc57b81a..cdd58b2a084fcd8 100644
--- a/llvm/test/Transforms/Coroutines/coro-split-musttail10.ll
+++ b/llvm/test/Transforms/Coroutines/coro-split-musttail10.ll
@@ -1,6 +1,7 @@
 ; Tests that we would convert coro.resume to a musttail call if the target is
 ; Wasm64 with tail-call support.
 ; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
+; RUN: opt < %s -passes='pgo-instr-gen,cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
 
 target triple = "wasm64-unknown-unknown"
 
diff --git a/llvm/test/Transforms/Coroutines/coro-split-musttail11.ll b/llvm/test/Transforms/Coroutines/coro-split-musttail11.ll
index 9bc5b4f0c65d91e..da5d868280e9671 100644
--- a/llvm/test/Transforms/Coroutines/coro-split-musttail11.ll
+++ b/llvm/test/Transforms/Coroutines/coro-split-musttail11.ll
@@ -1,6 +1,7 @@
 ; Tests that we would convert coro.resume to a musttail call if the target is
 ; Wasm32 with tail-call support.
 ; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
+; RUN: opt < %s -passes='pgo-instr-gen,cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
 
 target triple = "wasm32-unknown-unknown"
 
diff --git a/llvm/test/Transforms/Coroutines/coro-split-musttail12.ll b/llvm/test/Transforms/Coroutines/coro-split-musttail12.ll
index e7f4bcb9b0ff29a..5baec378876bb1e 100644
--- a/llvm/test/Transforms/Coroutines/coro-split-musttail12.ll
+++ b/llvm/test/Transforms/Coroutines/coro-split-musttail12.ll
@@ -1,5 +1,6 @@
 ; Tests that coro-split won't convert the cmp instruction prematurely.
 ; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
+; RUN: opt < %s -passes='pgo-instr-gen,cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
 
 declare void @fakeresume1(ptr)
 declare void @print()
diff --git a/llvm/test/Transforms/Coroutines/coro-split-musttail13.ll b/llvm/test/Transforms/Coroutines/coro-split-musttail13.ll
index 2384f9382685bd0..0290e42339e2ad4 100644
--- a/llvm/test/Transforms/Coroutines/coro-split-musttail13.ll
+++ b/llvm/test/Transforms/Coroutines/coro-split-musttail13.ll
@@ -1,5 +1,6 @@
 ; Tests that coro-split won't fall in infinite loop when simplify the terminators leading to ret.
 ; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
+; RUN: opt < %s -passes='pgo-instr-gen,cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
 
 declare void @fakeresume1(ptr)
 declare void @may_throw(ptr)
diff --git a/llvm/test/Transforms/Coroutines/coro-split-musttail2.ll b/llvm/test/Transforms/Coroutines/coro-split-musttail2.ll
index 38fc12815c033e7..2f27f79480ab1b4 100644
--- a/llvm/test/Transforms/Coroutines/coro-split-musttail2.ll
+++ b/llvm/test/Transforms/Coroutines/coro-split-musttail2.ll
@@ -1,6 +1,7 @@
 ; Tests that coro-split will convert coro.resume followed by a suspend to a
 ; musttail call.
 ; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
+; RUN: opt < %s -passes='pgo-instr-gen,cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
 
 define void @fakeresume1(ptr)  {
 entry:
diff --git a/llvm/test/Transforms/Coroutines/coro-split-musttail3.ll b/llvm/test/Transforms/Coroutines/coro-split-musttail3.ll
index b777f000e33a6d3..4778e3dcaf9957b 100644
--- a/llvm/test/Transforms/Coroutines/coro-split-musttail3.ll
+++ b/llvm/test/Transforms/Coroutines/coro-split-musttail3.ll
@@ -1,6 +1,7 @@
 ; Tests that coro-split will convert coro.resume followed by a suspend to a
 ; musttail call.
-; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
+; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck --check-prefixes=CHECK,NOPGO %s
+; RUN: opt < %s -passes='pgo-instr-gen,cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck --check-prefixes=CHECK,PGO %s
 
 define void @f() #0 {
 entry:
@@ -59,14 +60,17 @@ unreach:
 ; CHECK-LABEL: @f.resume(
 ; CHECK: %[[hdl:.+]] = call ptr @g()
 ; CHECK-NEXT: %[[addr2:.+]] = call ptr @llvm.coro.subfn.addr(ptr %[[hdl]], i8 0)
-; CHECK-NEXT: musttail call fastcc void %[[addr2]](ptr %[[hdl]])
+; NOPGO-NEXT: musttail call fastcc void %[[addr2]](ptr %[[hdl]])
+; PGO: musttail call fastcc void %[[addr2]](ptr %[[hdl]])
 ; CHECK-NEXT: ret void
 ; CHECK: %[[hdl2:.+]] = call ptr @h()
 ; CHECK-NEXT: %[[addr3:.+]] = call ptr @llvm.coro.subfn.addr(ptr %[[hdl2]], i8 0)
-; CHECK-NEXT: musttail call fastcc void %[[addr3]](ptr %[[hdl2]])
+; NOPGO-NEXT: musttail call fastcc void %[[addr3]](ptr %[[hdl2]])
+; PGO: musttail call fastcc void %[[addr3]](ptr %[[hdl2]])
 ; CHECK-NEXT: ret void
 ; CHECK: %[[addr4:.+]] = call ptr @llvm.coro.subfn.addr(ptr null, i8 0)
-; CHECK-NEXT: musttail call fastcc void %[[addr4]](ptr null)
+; NOPGO-NEXT: musttail call fastcc void %[[addr4]](ptr null)
+; PGO: musttail call fastcc void %[[addr4]](ptr null)
 ; CHECK-NEXT: ret void
 
 
diff --git a/llvm/test/Transforms/Coroutines/coro-split-musttail4.ll b/llvm/test/Transforms/Coroutines/coro-split-musttail4.ll
index 1e0fcdb87a72d30..00ee422ce5863df 100644
--- a/llvm/test/Transforms/Coroutines/coro-split-musttail4.ll
+++ b/llvm/test/Transforms/Coroutines/coro-split-musttail4.ll
@@ -1,6 +1,7 @@
 ; Tests that coro-split will convert a call before coro.suspend to a musttail call
 ; while the user of the coro.suspend is a icmpinst.
 ; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
+; RUN: opt < %s -passes='pgo-instr-gen,cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
 
 define void @fakeresume1(ptr)  {
 entry:
diff --git a/llvm/test/Transforms/Coroutines/coro-split-musttail5.ll b/llvm/test/Transforms/Coroutines/coro-split-musttail5.ll
index d19606491335e50..9afc79abbe88cd8 100644
--- a/llvm/test/Transforms/Coroutines/coro-split-musttail5.ll
+++ b/llvm/test/Transforms/Coroutines/coro-split-musttail5.ll
@@ -1,6 +1,7 @@
 ; Tests that sinked lifetime markers wouldn't provent optimization
 ; to convert a resuming call to a musttail call.
 ; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
+; RUN: opt < %s -passes='pgo-instr-gen,cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
 
 declare void @fakeresume1(ptr align 8)
 
diff --git a/llvm/test/Transforms/Coroutines/coro-split-musttail6.ll b/llvm/test/Transforms/Coroutines/coro-split-musttail6.ll
index eea711861c488c5..9c2b1ece1624bc9 100644
--- a/llvm/test/Transforms/Coroutines/coro-split-musttail6.ll
+++ b/llvm/test/Transforms/Coroutines/coro-split-musttail6.ll
@@ -4,6 +4,7 @@
 ; an extra bitcast instruction in the path, which makes it harder to
 ; optimize.
 ; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
+; RUN: opt < %s -passes='pgo-instr-gen,cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
 
 declare void @fakeresume1(ptr align 8)
 
diff --git a/llvm/test/Transforms/Coroutines/coro-split-musttail7.ll b/llvm/test/Transforms/Coroutines/coro-split-musttail7.ll
index c32fe9b0ee304c2..860032bd3cf8e52 100644
--- a/llvm/test/Transforms/Coroutines/coro-split-musttail7.ll
+++ b/llvm/test/Transforms/Coroutines/coro-split-musttail7.ll
@@ -4,6 +4,7 @@
 ; is that this contains dead instruction generated during the transformation,
 ; which makes the optimization harder.
 ; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
+; RUN: opt < %s -passes='pgo-instr-gen,cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
 
 declare void @fakeresume1(ptr align 8)
 

>From 11c47bf93c69c01536263a05bfacb4936f6a20fc Mon Sep 17 00:00:00 2001
From: Mircea Trofin <mtrofin at google.com>
Date: Fri, 3 Nov 2023 18:33:31 -0700
Subject: [PATCH 2/2] [coro][pgo] Don't promote pgo counters in the suspend
 basic block

If a suspend happens in the resume part (this can happen in the case of
chained coroutines), and that's part of a loop, the pre-split CFG has
the suspend block as an exit of that loop. PGO Counter Promotion will
then try to commit the temporary counter to the global in that "exit"
block (it also does that in the other loop exit BBs, which also includes
the "destroy" case).

We don't need to commit the counter in the suspend case - it's not
a loop exit from the perspective of the behavior of the program. The
regular loop exit, together with the "destroy" case, completely cover
any updates that may need to happen to the global counter.
---
 .../Instrumentation/InstrProfiling.cpp        |  10 +-
 ...-split-musttail-chain-pgo-counter-promo.ll | 175 ++++++++++++++++++
 2 files changed, 184 insertions(+), 1 deletion(-)
 create mode 100644 llvm/test/Transforms/Coroutines/coro-split-musttail-chain-pgo-counter-promo.ll

diff --git a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
index 55eef2b76e9be28..d4f02a3fb848de9 100644
--- a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
+++ b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
@@ -242,8 +242,16 @@ class PGOCounterPromoter {
     if (!isPromotionPossible(&L, LoopExitBlocks))
       return;
 
+    auto IsSuspendBB = [&](BasicBlock *BB) {
+      if (auto *Pred = BB->getSinglePredecessor())
+        if (auto *SW = dyn_cast<SwitchInst>(Pred->getTerminator()))
+          if (auto *Intr = dyn_cast<IntrinsicInst>(SW->getCondition()))
+            return Intr->getIntrinsicID() == Intrinsic::coro_suspend &&
+                   SW->getDefaultDest() == BB;
+      return false;
+    };
     for (BasicBlock *ExitBlock : LoopExitBlocks) {
-      if (BlockSet.insert(ExitBlock).second) {
+      if (BlockSet.insert(ExitBlock).second && !IsSuspendBB(ExitBlock)) {
         ExitBlocks.push_back(ExitBlock);
         InsertPts.push_back(&*ExitBlock->getFirstInsertionPt());
       }
diff --git a/llvm/test/Transforms/Coroutines/coro-split-musttail-chain-pgo-counter-promo.ll b/llvm/test/Transforms/Coroutines/coro-split-musttail-chain-pgo-counter-promo.ll
new file mode 100644
index 000000000000000..ddd293eed2409e9
--- /dev/null
+++ b/llvm/test/Transforms/Coroutines/coro-split-musttail-chain-pgo-counter-promo.ll
@@ -0,0 +1,175 @@
+; REQUIRES: x86-registered-target
+; RUN: opt -passes='pgo-instr-gen,instrprof,coro-split' -do-counter-promotion=true -S < %s | FileCheck %s
+
+; CHECK-LABEL: define internal fastcc void @f.resume
+; CHECK: musttail call fastcc void 
+; CHECK-NEXT: ret void
+; CHECK: musttail call fastcc void 
+; CHECK-NEXT: ret void
+; CHECK-LABEL: define internal fastcc void @f.destroy
+target triple = "x86_64-grtev4-linux-gnu"
+
+%CoroutinePromise = type { ptr, i64, [8 x i8], ptr} 
+%Awaitable.1 = type { ptr }
+%Awaitable.2 = type { ptr, ptr }
+
+declare void @await_suspend(ptr noundef nonnull align 1 dereferenceable(1), ptr) local_unnamed_addr
+declare ptr @await_transform_await_suspend(ptr noundef nonnull align 8 dereferenceable(16), ptr) local_unnamed_addr
+declare void @destroy_frame_slowpath(ptr noundef nonnull align 16 dereferenceable(32)) local_unnamed_addr
+declare ptr @other_coro();
+declare void @heap_delete(ptr noundef, i64 noundef, i64 noundef) local_unnamed_addr
+declare noundef nonnull ptr @heap_allocate(i64 noundef, i64 noundef) local_unnamed_addr
+
+declare void @llvm.assume(i1 noundef)
+declare i64 @llvm.coro.align.i64()
+declare i1 @llvm.coro.alloc(token)
+declare ptr @llvm.coro.begin(token, ptr writeonly)
+declare i1 @llvm.coro.end(ptr, i1, token)
+declare ptr @llvm.coro.free(token, ptr nocapture readonly)
+declare token @llvm.coro.id(i32, ptr readnone, ptr nocapture readonly, ptr)
+declare token @llvm.coro.save(ptr)
+declare i64 @llvm.coro.size.i64()
+declare ptr @llvm.coro.subfn.addr(ptr nocapture readonly, i8)
+declare i8 @llvm.coro.suspend(token, i1)
+declare void @llvm.instrprof.increment(ptr, i64, i32, i32)
+declare void @llvm.instrprof.value.profile(ptr, i64, i64, i32, i32)
+declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture)
+declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture)
+
+; Function Attrs: noinline nounwind presplitcoroutine uwtable
+define ptr @f(i32 %0) presplitcoroutine align 32 {
+  %2 = alloca i32, align 8
+  %3 = alloca %CoroutinePromise, align 16
+  %4 = alloca %Awaitable.1, align 8
+  %5 = alloca %Awaitable.2, align 8
+  %6 = call token @llvm.coro.id(i32 8, ptr nonnull %3, ptr nonnull @f, ptr null)
+  %7 = call i1 @llvm.coro.alloc(token %6)
+  br i1 %7, label %8, label %12
+
+8:                                                ; preds = %1
+  %9 = call i64 @llvm.coro.size.i64()
+  %10 = call i64 @llvm.coro.align.i64()
+  %11 = call noalias noundef nonnull ptr @heap_allocate(i64 noundef %9, i64 noundef %10) #27
+  call void @llvm.assume(i1 true) [ "align"(ptr %11, i64 %10) ]
+  br label %12
+
+12:                                               ; preds = %8, %1
+  %13 = phi ptr [ null, %1 ], [ %11, %8 ]
+  %14 = call ptr @llvm.coro.begin(token %6, ptr %13) #28
+  call void @llvm.lifetime.start.p0(i64 32, ptr nonnull %3) #9
+  store ptr null, ptr %3, align 16
+  %15 = getelementptr inbounds {ptr, i64}, ptr %3, i64 0, i32 1
+  store i64 0, ptr %15, align 8
+  call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %4) #9
+  store ptr %3, ptr %4, align 8
+  %16 = call token @llvm.coro.save(ptr null)
+  call void @await_suspend(ptr noundef nonnull align 1 dereferenceable(1) %4, ptr %14) #9
+  %17 = call i8 @llvm.coro.suspend(token %16, i1 false)
+  switch i8 %17, label %61 [
+    i8 0, label %18
+    i8 1, label %21
+  ]
+
+18:                                               ; preds = %12
+  call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %4) #9
+  %19 = icmp slt i32 0, %0
+  br i1 %19, label %20, label %36
+
+20:                                               ; preds = %18
+  br label %22
+
+21:                                               ; preds = %12
+  call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %4) #9
+  br label %54
+
+22:                                               ; preds = %20, %31
+  %23 = phi i32 [ 0, %20 ], [ %32, %31 ]
+  call void @llvm.lifetime.start.p0(i64 16, ptr nonnull %5) #9
+  %24 = call ptr @other_coro()
+  store ptr %3, ptr %5, align 8
+  %25 = getelementptr inbounds { ptr, ptr }, ptr %5, i64 0, i32 1
+  store ptr %24, ptr %25, align 8
+  %26 = call token @llvm.coro.save(ptr null)
+  %27 = call ptr @await_transform_await_suspend(ptr noundef nonnull align 8 dereferenceable(16) %5, ptr %14)
+  %28 = call ptr @llvm.coro.subfn.addr(ptr %27, i8 0)
+  %29 = ptrtoint ptr %28 to i64
+  call fastcc void %28(ptr %27) #9
+  %30 = call i8 @llvm.coro.suspend(token %26, i1 false)
+  switch i8 %30, label %60 [
+    i8 0, label %31
+    i8 1, label %34
+  ]
+
+31:                                               ; preds = %22
+  call void @llvm.lifetime.end.p0(i64 16, ptr nonnull %5) #9
+  %32 = add nuw nsw i32 %23, 1
+  %33 = icmp slt i32 %32, %0
+  br i1 %33, label %22, label %35, !llvm.loop !0
+
+34:                                               ; preds = %22
+  call void @llvm.lifetime.end.p0(i64 16, ptr nonnull %5) #9
+  br label %54
+
+35:                                               ; preds = %31
+  br label %36
+
+36:                                               ; preds = %35, %18
+  %37 = call token @llvm.coro.save(ptr null)
+  %38 = getelementptr inbounds i8, ptr %14, i64 16
+  %39 = getelementptr inbounds i8, ptr %14, i64 32
+  %40 = load i64, ptr %39, align 8
+  %41 = load ptr, ptr %38, align 16
+  %42 = icmp eq ptr %41, null
+  br i1 %42, label %43, label %46
+
+43:                                               ; preds = %36
+  %44 = call ptr @llvm.coro.subfn.addr(ptr nonnull %14, i8 1)
+  %45 = ptrtoint ptr %44 to i64
+  call fastcc void %44(ptr nonnull %14) #9
+  br label %47
+
+46:                                               ; preds = %36
+  call void @destroy_frame_slowpath(ptr noundef nonnull align 16 dereferenceable(32) %38) #9
+  br label %47
+
+47:                                               ; preds = %43, %46
+  %48 = inttoptr i64 %40 to ptr
+  %49 = call ptr @llvm.coro.subfn.addr(ptr %48, i8 0)
+  %50 = ptrtoint ptr %49 to i64
+  call fastcc void %49(ptr %48) #9
+  %51 = call i8 @llvm.coro.suspend(token %37, i1 true) #28
+  switch i8 %51, label %61 [
+    i8 0, label %53
+    i8 1, label %52
+  ]
+
+52:                                               ; preds = %47
+  br label %54
+
+53:                                               ; preds = %47
+  call void @llvm.lifetime.start.p0(i64 16, ptr nonnull %2) #9
+  unreachable
+
+54:                                               ; preds = %52, %34, %21
+  call void @llvm.lifetime.end.p0(i64 32, ptr nonnull %3) #9
+  %55 = call ptr @llvm.coro.free(token %6, ptr %14)
+  %56 = icmp eq ptr %55, null
+  br i1 %56, label %61, label %57
+
+57:                                               ; preds = %54
+  %58 = call i64 @llvm.coro.size.i64()
+  %59 = call i64 @llvm.coro.align.i64()
+  call void @heap_delete(ptr noundef nonnull %55, i64 noundef %58, i64 noundef %59) #9
+  br label %61
+
+60:                                               ; preds = %22
+  br label %61
+
+61:                                               ; preds = %60, %57, %54, %47, %12
+  %62 = getelementptr inbounds i8, ptr %3, i64 -16
+  %63 = call i1 @llvm.coro.end(ptr null, i1 false, token none) #28
+  ret ptr %62
+}
+
+!0 = distinct !{!0, !1}
+!1 = !{!"llvm.loop.mustprogress"}



More information about the llvm-commits mailing list