[llvm] [coro][pgp] Do not insert counters in the `suspend` block (PR #71262)
    Mircea Trofin via llvm-commits 
    llvm-commits at lists.llvm.org
       
    Mon Nov 13 10:48:03 PST 2023
    
    
  
https://github.com/mtrofin updated https://github.com/llvm/llvm-project/pull/71262
>From 184936c339ea73ccfc4349e023ff165aa9f8392e Mon Sep 17 00:00:00 2001
From: Mircea Trofin <mtrofin at google.com>
Date: Fri, 3 Nov 2023 18:19:15 -0700
Subject: [PATCH 1/2] [coro][pgp] Do not insert counters in the `suspend` block
If we do, we can't lower the suspend call to a tail call. If this
happened in a loop, it can lead to stack overflow (this was encountered
in a benchmark, as an extreme case)
We can instrument the other 2 edges instead, as long as they also don't
point to the same basic block.
---
 .../llvm/Transforms/Instrumentation/CFGMST.h  | 65 +++++++++++++++----
 .../Coroutines/coro-split-musttail.ll         |  7 +-
 .../Coroutines/coro-split-musttail1.ll        | 12 ++--
 .../Coroutines/coro-split-musttail10.ll       |  1 +
 .../Coroutines/coro-split-musttail11.ll       |  1 +
 .../Coroutines/coro-split-musttail12.ll       |  1 +
 .../Coroutines/coro-split-musttail13.ll       |  1 +
 .../Coroutines/coro-split-musttail2.ll        |  1 +
 .../Coroutines/coro-split-musttail3.ll        | 12 ++--
 .../Coroutines/coro-split-musttail4.ll        |  1 +
 .../Coroutines/coro-split-musttail5.ll        |  1 +
 .../Coroutines/coro-split-musttail6.ll        |  1 +
 .../Coroutines/coro-split-musttail7.ll        |  1 +
 13 files changed, 83 insertions(+), 22 deletions(-)
diff --git a/llvm/include/llvm/Transforms/Instrumentation/CFGMST.h b/llvm/include/llvm/Transforms/Instrumentation/CFGMST.h
index 6ed8a6c6eaf0197..1c5b7ba6d0ed364 100644
--- a/llvm/include/llvm/Transforms/Instrumentation/CFGMST.h
+++ b/llvm/include/llvm/Transforms/Instrumentation/CFGMST.h
@@ -19,6 +19,8 @@
 #include "llvm/Analysis/BlockFrequencyInfo.h"
 #include "llvm/Analysis/BranchProbabilityInfo.h"
 #include "llvm/Analysis/CFG.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
 #include "llvm/Support/BranchProbability.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
@@ -121,31 +123,70 @@ template <class Edge, class BBInfo> class CFGMST {
 
     static const uint32_t CriticalEdgeMultiplier = 1000;
 
+    auto GetCoroSuspendSwitch =
+        [&](const Instruction *TI) -> const SwitchInst * {
+      if (!F.isPresplitCoroutine())
+        return nullptr;
+      if (auto *SWInst = dyn_cast<SwitchInst>(TI))
+        if (auto *Intrinsic = dyn_cast<IntrinsicInst>(SWInst->getCondition()))
+          if (Intrinsic->getIntrinsicID() == Intrinsic::coro_suspend)
+            return SWInst;
+      return nullptr;
+    };
+
     for (BasicBlock &BB : F) {
       Instruction *TI = BB.getTerminator();
+      const SwitchInst *CoroSuspendSwitch = GetCoroSuspendSwitch(TI);
       uint64_t BBWeight =
           (BFI != nullptr ? BFI->getBlockFreq(&BB).getFrequency() : 2);
       uint64_t Weight = 2;
       if (int successors = TI->getNumSuccessors()) {
         for (int i = 0; i != successors; ++i) {
           BasicBlock *TargetBB = TI->getSuccessor(i);
-          bool Critical = isCriticalEdge(TI, i);
-          uint64_t scaleFactor = BBWeight;
-          if (Critical) {
-            if (scaleFactor < UINT64_MAX / CriticalEdgeMultiplier)
-              scaleFactor *= CriticalEdgeMultiplier;
-            else
-              scaleFactor = UINT64_MAX;
+          const bool Critical = isCriticalEdge(TI, i);
+          const bool IsCoroSuspendTarget =
+              CoroSuspendSwitch &&
+              CoroSuspendSwitch->getDefaultDest() == TargetBB;
+          // We must not add instrumentation to the BB representing the
+          // "suspend" path, else CoroSplit won't be able to lower
+          // llvm.coro.suspend to a tail call. We do want profiling info for
+          // the other branches (resume/destroy). So we do 2 things:
+          // 1. we prefer instrumenting those other edges by setting the weight
+          //    of the "suspend" edge to max, and
+          // 2. we mark the edge as "Removed" to guarantee it is not considered
+          //    for instrumentation. That could technically happen:
+          //    (from test/Transforms/Coroutines/coro-split-musttail.ll)
+          //
+          // %suspend = call i8 @llvm.coro.suspend(token %save, i1 false)
+          // switch i8 %suspend, label %exit [
+          //   i8 0, label %await.ready
+          //   i8 1, label %exit
+          // ]
+          if (IsCoroSuspendTarget) {
+            Weight = UINT64_MAX;
+          } else {
+            bool Critical = isCriticalEdge(TI, i);
+            uint64_t scaleFactor = BBWeight;
+            if (Critical) {
+              if (scaleFactor < UINT64_MAX / CriticalEdgeMultiplier)
+                scaleFactor *= CriticalEdgeMultiplier;
+              else
+                scaleFactor = UINT64_MAX;
+            }
+            if (BPI != nullptr)
+              Weight =
+                  BPI->getEdgeProbability(&BB, TargetBB).scale(scaleFactor);
+            if (Weight == 0)
+              Weight++;
           }
-          if (BPI != nullptr)
-            Weight = BPI->getEdgeProbability(&BB, TargetBB).scale(scaleFactor);
-          if (Weight == 0)
-            Weight++;
           auto *E = &addEdge(&BB, TargetBB, Weight);
           E->IsCritical = Critical;
+          // See comment above - we must guarantee the coro suspend BB isn't
+          // instrumented.
+          if (IsCoroSuspendTarget)
+            E->Removed = true;
           LLVM_DEBUG(dbgs() << "  Edge: from " << BB.getName() << " to "
                             << TargetBB->getName() << "  w=" << Weight << "\n");
-
           // Keep track of entry/exit edges:
           if (&BB == Entry) {
             if (Weight > MaxEntryOutWeight) {
diff --git a/llvm/test/Transforms/Coroutines/coro-split-musttail.ll b/llvm/test/Transforms/Coroutines/coro-split-musttail.ll
index 0406135687904bf..825e44471db27ae 100644
--- a/llvm/test/Transforms/Coroutines/coro-split-musttail.ll
+++ b/llvm/test/Transforms/Coroutines/coro-split-musttail.ll
@@ -1,6 +1,7 @@
 ; Tests that coro-split will convert coro.resume followed by a suspend to a
 ; musttail call.
-; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
+; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck --check-prefixes=CHECK,NOPGO %s
+; RUN: opt < %s -passes='pgo-instr-gen,cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck --check-prefixes=CHECK,PGO %s
 
 define void @f() #0 {
 entry:
@@ -40,7 +41,9 @@ exit:
 ; Verify that in the resume part resume call is marked with musttail.
 ; CHECK-LABEL: @f.resume(
 ; CHECK: %[[addr2:.+]] = call ptr @llvm.coro.subfn.addr(ptr null, i8 0)
-; CHECK-NEXT: musttail call fastcc void %[[addr2]](ptr null)
+; NOPGO-NEXT: musttail call fastcc void %[[addr2]](ptr null)
+; PGO: call void @llvm.instrprof
+; PGO-NEXT: musttail call fastcc void %[[addr2]](ptr null)
 ; CHECK-NEXT: ret void
 
 declare token @llvm.coro.id(i32, ptr readnone, ptr nocapture readonly, ptr) #1
diff --git a/llvm/test/Transforms/Coroutines/coro-split-musttail1.ll b/llvm/test/Transforms/Coroutines/coro-split-musttail1.ll
index cd1635b93d2cc24..d0d11fc4495e480 100644
--- a/llvm/test/Transforms/Coroutines/coro-split-musttail1.ll
+++ b/llvm/test/Transforms/Coroutines/coro-split-musttail1.ll
@@ -1,6 +1,7 @@
 ; Tests that coro-split will convert coro.resume followed by a suspend to a
 ; musttail call.
-; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
+; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck --check-prefixes=CHECK,NOPGO %s
+; RUN: opt < %s -passes='pgo-instr-gen,cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck --check-prefixes=CHECK,PGO %s
 
 define void @f() #0 {
 entry:
@@ -63,14 +64,17 @@ unreach:
 ; CHECK-LABEL: @f.resume(
 ; CHECK: %[[hdl:.+]] = call ptr @g()
 ; CHECK-NEXT: %[[addr2:.+]] = call ptr @llvm.coro.subfn.addr(ptr %[[hdl]], i8 0)
-; CHECK-NEXT: musttail call fastcc void %[[addr2]](ptr %[[hdl]])
+; NOPGO-NEXT: musttail call fastcc void %[[addr2]](ptr %[[hdl]])
+; PGO: musttail call fastcc void %[[addr2]](ptr %[[hdl]])
 ; CHECK-NEXT: ret void
 ; CHECK: %[[hdl2:.+]] = call ptr @h()
 ; CHECK-NEXT: %[[addr3:.+]] = call ptr @llvm.coro.subfn.addr(ptr %[[hdl2]], i8 0)
-; CHECK-NEXT: musttail call fastcc void %[[addr3]](ptr %[[hdl2]])
+; NOPGO-NEXT: musttail call fastcc void %[[addr3]](ptr %[[hdl2]])
+; PGO: musttail call fastcc void %[[addr3]](ptr %[[hdl2]])
 ; CHECK-NEXT: ret void
 ; CHECK: %[[addr4:.+]] = call ptr @llvm.coro.subfn.addr(ptr null, i8 0)
-; CHECK-NEXT: musttail call fastcc void %[[addr4]](ptr null)
+; NOPGO-NEXT: musttail call fastcc void %[[addr4]](ptr null)
+; PGO: musttail call fastcc void %[[addr4]](ptr null)
 ; CHECK-NEXT: ret void
 
 
diff --git a/llvm/test/Transforms/Coroutines/coro-split-musttail10.ll b/llvm/test/Transforms/Coroutines/coro-split-musttail10.ll
index 9d73c8bbc57b81a..cdd58b2a084fcd8 100644
--- a/llvm/test/Transforms/Coroutines/coro-split-musttail10.ll
+++ b/llvm/test/Transforms/Coroutines/coro-split-musttail10.ll
@@ -1,6 +1,7 @@
 ; Tests that we would convert coro.resume to a musttail call if the target is
 ; Wasm64 with tail-call support.
 ; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
+; RUN: opt < %s -passes='pgo-instr-gen,cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
 
 target triple = "wasm64-unknown-unknown"
 
diff --git a/llvm/test/Transforms/Coroutines/coro-split-musttail11.ll b/llvm/test/Transforms/Coroutines/coro-split-musttail11.ll
index 9bc5b4f0c65d91e..da5d868280e9671 100644
--- a/llvm/test/Transforms/Coroutines/coro-split-musttail11.ll
+++ b/llvm/test/Transforms/Coroutines/coro-split-musttail11.ll
@@ -1,6 +1,7 @@
 ; Tests that we would convert coro.resume to a musttail call if the target is
 ; Wasm32 with tail-call support.
 ; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
+; RUN: opt < %s -passes='pgo-instr-gen,cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
 
 target triple = "wasm32-unknown-unknown"
 
diff --git a/llvm/test/Transforms/Coroutines/coro-split-musttail12.ll b/llvm/test/Transforms/Coroutines/coro-split-musttail12.ll
index e7f4bcb9b0ff29a..5baec378876bb1e 100644
--- a/llvm/test/Transforms/Coroutines/coro-split-musttail12.ll
+++ b/llvm/test/Transforms/Coroutines/coro-split-musttail12.ll
@@ -1,5 +1,6 @@
 ; Tests that coro-split won't convert the cmp instruction prematurely.
 ; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
+; RUN: opt < %s -passes='pgo-instr-gen,cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
 
 declare void @fakeresume1(ptr)
 declare void @print()
diff --git a/llvm/test/Transforms/Coroutines/coro-split-musttail13.ll b/llvm/test/Transforms/Coroutines/coro-split-musttail13.ll
index 2384f9382685bd0..0290e42339e2ad4 100644
--- a/llvm/test/Transforms/Coroutines/coro-split-musttail13.ll
+++ b/llvm/test/Transforms/Coroutines/coro-split-musttail13.ll
@@ -1,5 +1,6 @@
 ; Tests that coro-split won't fall in infinite loop when simplify the terminators leading to ret.
 ; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
+; RUN: opt < %s -passes='pgo-instr-gen,cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
 
 declare void @fakeresume1(ptr)
 declare void @may_throw(ptr)
diff --git a/llvm/test/Transforms/Coroutines/coro-split-musttail2.ll b/llvm/test/Transforms/Coroutines/coro-split-musttail2.ll
index 38fc12815c033e7..2f27f79480ab1b4 100644
--- a/llvm/test/Transforms/Coroutines/coro-split-musttail2.ll
+++ b/llvm/test/Transforms/Coroutines/coro-split-musttail2.ll
@@ -1,6 +1,7 @@
 ; Tests that coro-split will convert coro.resume followed by a suspend to a
 ; musttail call.
 ; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
+; RUN: opt < %s -passes='pgo-instr-gen,cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
 
 define void @fakeresume1(ptr)  {
 entry:
diff --git a/llvm/test/Transforms/Coroutines/coro-split-musttail3.ll b/llvm/test/Transforms/Coroutines/coro-split-musttail3.ll
index b777f000e33a6d3..4778e3dcaf9957b 100644
--- a/llvm/test/Transforms/Coroutines/coro-split-musttail3.ll
+++ b/llvm/test/Transforms/Coroutines/coro-split-musttail3.ll
@@ -1,6 +1,7 @@
 ; Tests that coro-split will convert coro.resume followed by a suspend to a
 ; musttail call.
-; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
+; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck --check-prefixes=CHECK,NOPGO %s
+; RUN: opt < %s -passes='pgo-instr-gen,cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck --check-prefixes=CHECK,PGO %s
 
 define void @f() #0 {
 entry:
@@ -59,14 +60,17 @@ unreach:
 ; CHECK-LABEL: @f.resume(
 ; CHECK: %[[hdl:.+]] = call ptr @g()
 ; CHECK-NEXT: %[[addr2:.+]] = call ptr @llvm.coro.subfn.addr(ptr %[[hdl]], i8 0)
-; CHECK-NEXT: musttail call fastcc void %[[addr2]](ptr %[[hdl]])
+; NOPGO-NEXT: musttail call fastcc void %[[addr2]](ptr %[[hdl]])
+; PGO: musttail call fastcc void %[[addr2]](ptr %[[hdl]])
 ; CHECK-NEXT: ret void
 ; CHECK: %[[hdl2:.+]] = call ptr @h()
 ; CHECK-NEXT: %[[addr3:.+]] = call ptr @llvm.coro.subfn.addr(ptr %[[hdl2]], i8 0)
-; CHECK-NEXT: musttail call fastcc void %[[addr3]](ptr %[[hdl2]])
+; NOPGO-NEXT: musttail call fastcc void %[[addr3]](ptr %[[hdl2]])
+; PGO: musttail call fastcc void %[[addr3]](ptr %[[hdl2]])
 ; CHECK-NEXT: ret void
 ; CHECK: %[[addr4:.+]] = call ptr @llvm.coro.subfn.addr(ptr null, i8 0)
-; CHECK-NEXT: musttail call fastcc void %[[addr4]](ptr null)
+; NOPGO-NEXT: musttail call fastcc void %[[addr4]](ptr null)
+; PGO: musttail call fastcc void %[[addr4]](ptr null)
 ; CHECK-NEXT: ret void
 
 
diff --git a/llvm/test/Transforms/Coroutines/coro-split-musttail4.ll b/llvm/test/Transforms/Coroutines/coro-split-musttail4.ll
index 1e0fcdb87a72d30..00ee422ce5863df 100644
--- a/llvm/test/Transforms/Coroutines/coro-split-musttail4.ll
+++ b/llvm/test/Transforms/Coroutines/coro-split-musttail4.ll
@@ -1,6 +1,7 @@
 ; Tests that coro-split will convert a call before coro.suspend to a musttail call
 ; while the user of the coro.suspend is a icmpinst.
 ; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
+; RUN: opt < %s -passes='pgo-instr-gen,cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
 
 define void @fakeresume1(ptr)  {
 entry:
diff --git a/llvm/test/Transforms/Coroutines/coro-split-musttail5.ll b/llvm/test/Transforms/Coroutines/coro-split-musttail5.ll
index d19606491335e50..9afc79abbe88cd8 100644
--- a/llvm/test/Transforms/Coroutines/coro-split-musttail5.ll
+++ b/llvm/test/Transforms/Coroutines/coro-split-musttail5.ll
@@ -1,6 +1,7 @@
 ; Tests that sinked lifetime markers wouldn't provent optimization
 ; to convert a resuming call to a musttail call.
 ; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
+; RUN: opt < %s -passes='pgo-instr-gen,cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
 
 declare void @fakeresume1(ptr align 8)
 
diff --git a/llvm/test/Transforms/Coroutines/coro-split-musttail6.ll b/llvm/test/Transforms/Coroutines/coro-split-musttail6.ll
index eea711861c488c5..9c2b1ece1624bc9 100644
--- a/llvm/test/Transforms/Coroutines/coro-split-musttail6.ll
+++ b/llvm/test/Transforms/Coroutines/coro-split-musttail6.ll
@@ -4,6 +4,7 @@
 ; an extra bitcast instruction in the path, which makes it harder to
 ; optimize.
 ; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
+; RUN: opt < %s -passes='pgo-instr-gen,cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
 
 declare void @fakeresume1(ptr align 8)
 
diff --git a/llvm/test/Transforms/Coroutines/coro-split-musttail7.ll b/llvm/test/Transforms/Coroutines/coro-split-musttail7.ll
index c32fe9b0ee304c2..860032bd3cf8e52 100644
--- a/llvm/test/Transforms/Coroutines/coro-split-musttail7.ll
+++ b/llvm/test/Transforms/Coroutines/coro-split-musttail7.ll
@@ -4,6 +4,7 @@
 ; is that this contains dead instruction generated during the transformation,
 ; which makes the optimization harder.
 ; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
+; RUN: opt < %s -passes='pgo-instr-gen,cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
 
 declare void @fakeresume1(ptr align 8)
 
>From d77e267674da1dae85ff64fe252e0384af84da61 Mon Sep 17 00:00:00 2001
From: Mircea Trofin <mtrofin at google.com>
Date: Mon, 13 Nov 2023 10:47:28 -0800
Subject: [PATCH 2/2] Factored suspend case out.
---
 .../llvm/Transforms/Instrumentation/CFGMST.h  | 97 +++++++++----------
 1 file changed, 46 insertions(+), 51 deletions(-)
diff --git a/llvm/include/llvm/Transforms/Instrumentation/CFGMST.h b/llvm/include/llvm/Transforms/Instrumentation/CFGMST.h
index 1c5b7ba6d0ed364..33d4019eed9b139 100644
--- a/llvm/include/llvm/Transforms/Instrumentation/CFGMST.h
+++ b/llvm/include/llvm/Transforms/Instrumentation/CFGMST.h
@@ -94,6 +94,39 @@ template <class Edge, class BBInfo> class CFGMST {
     return It->second.get();
   }
 
+  void handleCoroSuspendEdge(Edge *E) {
+    // We must not add instrumentation to the BB representing the
+    // "suspend" path, else CoroSplit won't be able to lower
+    // llvm.coro.suspend to a tail call. We do want profiling info for
+    // the other branches (resume/destroy). So we do 2 things:
+    // 1. we prefer instrumenting those other edges by setting the weight
+    //    of the "suspend" edge to max, and
+    // 2. we mark the edge as "Removed" to guarantee it is not considered
+    //    for instrumentation. That could technically happen:
+    //    (from test/Transforms/Coroutines/coro-split-musttail.ll)
+    //
+    // %suspend = call i8 @llvm.coro.suspend(token %save, i1 false)
+    // switch i8 %suspend, label %exit [
+    //   i8 0, label %await.ready
+    //   i8 1, label %exit
+    // ]
+    const BasicBlock *EdgeTarget = E->DestBB;
+    if (!EdgeTarget)
+      return;
+    assert(E->SrcBB);
+    const Function *F = EdgeTarget->getParent();
+    if (!F->isPresplitCoroutine())
+      return;
+
+    const Instruction *TI = E->SrcBB->getTerminator();
+    if (auto *SWInst = dyn_cast<SwitchInst>(TI))
+      if (auto *Intrinsic = dyn_cast<IntrinsicInst>(SWInst->getCondition()))
+        if (Intrinsic->getIntrinsicID() == Intrinsic::coro_suspend &&
+            SWInst->getDefaultDest() == EdgeTarget) {
+          E->Weight = UINT64_MAX;
+          E->Removed = true;
+        }
+  }
   // Traverse the CFG using a stack. Find all the edges and assign the weight.
   // Edges with large weight will be put into MST first so they are less likely
   // to be instrumented.
@@ -123,70 +156,32 @@ template <class Edge, class BBInfo> class CFGMST {
 
     static const uint32_t CriticalEdgeMultiplier = 1000;
 
-    auto GetCoroSuspendSwitch =
-        [&](const Instruction *TI) -> const SwitchInst * {
-      if (!F.isPresplitCoroutine())
-        return nullptr;
-      if (auto *SWInst = dyn_cast<SwitchInst>(TI))
-        if (auto *Intrinsic = dyn_cast<IntrinsicInst>(SWInst->getCondition()))
-          if (Intrinsic->getIntrinsicID() == Intrinsic::coro_suspend)
-            return SWInst;
-      return nullptr;
-    };
-
     for (BasicBlock &BB : F) {
       Instruction *TI = BB.getTerminator();
-      const SwitchInst *CoroSuspendSwitch = GetCoroSuspendSwitch(TI);
       uint64_t BBWeight =
           (BFI != nullptr ? BFI->getBlockFreq(&BB).getFrequency() : 2);
       uint64_t Weight = 2;
       if (int successors = TI->getNumSuccessors()) {
         for (int i = 0; i != successors; ++i) {
           BasicBlock *TargetBB = TI->getSuccessor(i);
-          const bool Critical = isCriticalEdge(TI, i);
-          const bool IsCoroSuspendTarget =
-              CoroSuspendSwitch &&
-              CoroSuspendSwitch->getDefaultDest() == TargetBB;
-          // We must not add instrumentation to the BB representing the
-          // "suspend" path, else CoroSplit won't be able to lower
-          // llvm.coro.suspend to a tail call. We do want profiling info for
-          // the other branches (resume/destroy). So we do 2 things:
-          // 1. we prefer instrumenting those other edges by setting the weight
-          //    of the "suspend" edge to max, and
-          // 2. we mark the edge as "Removed" to guarantee it is not considered
-          //    for instrumentation. That could technically happen:
-          //    (from test/Transforms/Coroutines/coro-split-musttail.ll)
-          //
-          // %suspend = call i8 @llvm.coro.suspend(token %save, i1 false)
-          // switch i8 %suspend, label %exit [
-          //   i8 0, label %await.ready
-          //   i8 1, label %exit
-          // ]
-          if (IsCoroSuspendTarget) {
-            Weight = UINT64_MAX;
-          } else {
-            bool Critical = isCriticalEdge(TI, i);
-            uint64_t scaleFactor = BBWeight;
-            if (Critical) {
-              if (scaleFactor < UINT64_MAX / CriticalEdgeMultiplier)
-                scaleFactor *= CriticalEdgeMultiplier;
-              else
-                scaleFactor = UINT64_MAX;
-            }
-            if (BPI != nullptr)
-              Weight =
-                  BPI->getEdgeProbability(&BB, TargetBB).scale(scaleFactor);
-            if (Weight == 0)
-              Weight++;
+          bool Critical = isCriticalEdge(TI, i);
+          uint64_t scaleFactor = BBWeight;
+          if (Critical) {
+            if (scaleFactor < UINT64_MAX / CriticalEdgeMultiplier)
+              scaleFactor *= CriticalEdgeMultiplier;
+            else
+              scaleFactor = UINT64_MAX;
           }
+          if (BPI != nullptr)
+            Weight = BPI->getEdgeProbability(&BB, TargetBB).scale(scaleFactor);
+          if (Weight == 0)
+            Weight++;
           auto *E = &addEdge(&BB, TargetBB, Weight);
           E->IsCritical = Critical;
-          // See comment above - we must guarantee the coro suspend BB isn't
-          // instrumented.
-          if (IsCoroSuspendTarget)
-            E->Removed = true;
+          handleCoroSuspendEdge(E);
           LLVM_DEBUG(dbgs() << "  Edge: from " << BB.getName() << " to "
                             << TargetBB->getName() << "  w=" << Weight << "\n");
+
           // Keep track of entry/exit edges:
           if (&BB == Entry) {
             if (Weight > MaxEntryOutWeight) {
    
    
More information about the llvm-commits
mailing list