[llvm] [coro][pgo] Don't promote pgo counters in the suspend basic block (PR #71263)

via llvm-commits llvm-commits at lists.llvm.org
Fri Nov 3 18:47:51 PDT 2023


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-pgo

Author: Mircea Trofin (mtrofin)

<details>
<summary>Changes</summary>

This depends on PR #<!-- -->71262 

If a suspend happens in the resume part (this can happen in the case of
chained coroutines), and that's part of a loop, the pre-split CFG has
the suspend block as an exit of that loop. PGO Counter Promotion will
then try to commit the temporary counter to the global in that "exit"
block (it also does that in the other loop exit BBs, which also includes
the "destroy" case).

We don't need to commit the counter in the suspend case - it's not
a loop exit from the perspective of the behavior of the program. The
regular loop exit, together with the "destroy" case, completely cover
any updates that may need to happen to the global counter.

---
Full diff: https://github.com/llvm/llvm-project/pull/71263.diff


14 Files Affected:

- (modified) llvm/include/llvm/Transforms/Instrumentation/CFGMST.h (+54-13) 
- (modified) llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp (+9-1) 
- (modified) llvm/test/Transforms/Coroutines/coro-split-musttail.ll (+5-2) 
- (modified) llvm/test/Transforms/Coroutines/coro-split-musttail1.ll (+8-4) 
- (modified) llvm/test/Transforms/Coroutines/coro-split-musttail10.ll (+1) 
- (modified) llvm/test/Transforms/Coroutines/coro-split-musttail11.ll (+1) 
- (modified) llvm/test/Transforms/Coroutines/coro-split-musttail12.ll (+1) 
- (modified) llvm/test/Transforms/Coroutines/coro-split-musttail13.ll (+1) 
- (modified) llvm/test/Transforms/Coroutines/coro-split-musttail2.ll (+1) 
- (modified) llvm/test/Transforms/Coroutines/coro-split-musttail3.ll (+8-4) 
- (modified) llvm/test/Transforms/Coroutines/coro-split-musttail4.ll (+1) 
- (modified) llvm/test/Transforms/Coroutines/coro-split-musttail5.ll (+1) 
- (modified) llvm/test/Transforms/Coroutines/coro-split-musttail6.ll (+1) 
- (modified) llvm/test/Transforms/Coroutines/coro-split-musttail7.ll (+1) 


``````````diff
diff --git a/llvm/include/llvm/Transforms/Instrumentation/CFGMST.h b/llvm/include/llvm/Transforms/Instrumentation/CFGMST.h
index 6ed8a6c6eaf0197..eddfbd8a8e45b7a 100644
--- a/llvm/include/llvm/Transforms/Instrumentation/CFGMST.h
+++ b/llvm/include/llvm/Transforms/Instrumentation/CFGMST.h
@@ -19,6 +19,8 @@
 #include "llvm/Analysis/BlockFrequencyInfo.h"
 #include "llvm/Analysis/BranchProbabilityInfo.h"
 #include "llvm/Analysis/CFG.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
 #include "llvm/Support/BranchProbability.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
@@ -121,31 +123,70 @@ template <class Edge, class BBInfo> class CFGMST {
 
     static const uint32_t CriticalEdgeMultiplier = 1000;
 
+    auto GetCoroSuspendSwitch =
+        [&](const Instruction *TI) -> const SwitchInst * {
+      if (!F.isPresplitCoroutine())
+        return nullptr;
+      if (auto *SWInst = dyn_cast<SwitchInst>(TI))
+        if (auto *Intrinsic = dyn_cast<IntrinsicInst>(SWInst->getCondition()))
+          if (Intrinsic->getIntrinsicID() == Intrinsic::coro_suspend)
+            return SWInst;
+      return nullptr;
+    };
+
     for (BasicBlock &BB : F) {
       Instruction *TI = BB.getTerminator();
+      const SwitchInst *CoroSuspendSwitch = GetCoroSuspendSwitch(TI);
       uint64_t BBWeight =
           (BFI != nullptr ? BFI->getBlockFreq(&BB).getFrequency() : 2);
       uint64_t Weight = 2;
       if (int successors = TI->getNumSuccessors()) {
         for (int i = 0; i != successors; ++i) {
           BasicBlock *TargetBB = TI->getSuccessor(i);
-          bool Critical = isCriticalEdge(TI, i);
-          uint64_t scaleFactor = BBWeight;
-          if (Critical) {
-            if (scaleFactor < UINT64_MAX / CriticalEdgeMultiplier)
-              scaleFactor *= CriticalEdgeMultiplier;
-            else
-              scaleFactor = UINT64_MAX;
+          const bool Critical = isCriticalEdge(TI, i);
+          const bool IsCoroSuspendTarget =
+              CoroSuspendSwitch &&
+              CoroSuspendSwitch->getDefaultDest() == TargetBB;
+          // We must not add instrumentation to the BB representing the
+          // "suspend" path, else CoroSplit won't be able to lower
+          // llvm.coro.suspend to a tail call. We do want profiling info for
+          // the other branches (resume/destroy). So we do 2 things:
+          // 1. we prefer instrumenting those other edges by setting the weight
+          //    of the "suspend" edge to max, and
+          // 2. we mark the edge as "Removed" to guarantee it is not considered
+          //    for instrumentation. That could technically happen:
+          //    (from test/Transforms/Coroutines/coro-split-musttail.ll)
+          //
+          // %suspend = call i8 @llvm.coro.suspend(token %save, i1 false)
+          // switch i8 %suspend, label %exit [
+          //   i8 0, label %await.ready
+          //   i8 1, label %exit
+          // ]
+          if (IsCoroSuspendTarget) {
+            Weight = UINT64_MAX;
+          } else {
+            bool Critical = isCriticalEdge(TI, i);
+            uint64_t scaleFactor = BBWeight;
+            if (Critical) {
+              if (scaleFactor < UINT64_MAX / CriticalEdgeMultiplier)
+                scaleFactor *= CriticalEdgeMultiplier;
+              else
+                scaleFactor = UINT64_MAX;
+            }
+            if (BPI != nullptr)
+              Weight =
+                  BPI->getEdgeProbability(&BB, TargetBB).scale(scaleFactor);
+            if (Weight == 0)
+              Weight++;
           }
-          if (BPI != nullptr)
-            Weight = BPI->getEdgeProbability(&BB, TargetBB).scale(scaleFactor);
-          if (Weight == 0)
-            Weight++;
           auto *E = &addEdge(&BB, TargetBB, Weight);
           E->IsCritical = Critical;
+          // See comment above - we must guarantee the coro suspend BB isn't
+          // instrumented.
+          if (IsCoroSuspendTarget)
+            E->Removed = true;
           LLVM_DEBUG(dbgs() << "  Edge: from " << BB.getName() << " to "
-                            << TargetBB->getName() << "  w=" << Weight << "\n");
-
+                          << TargetBB->getName() << "  w=" << Weight << "\n");          
           // Keep track of entry/exit edges:
           if (&BB == Entry) {
             if (Weight > MaxEntryOutWeight) {
diff --git a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
index 55eef2b76e9be28..d4f02a3fb848de9 100644
--- a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
+++ b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
@@ -242,8 +242,16 @@ class PGOCounterPromoter {
     if (!isPromotionPossible(&L, LoopExitBlocks))
       return;
 
+    auto IsSuspendBB = [&](BasicBlock *BB) {
+      if (auto *Pred = BB->getSinglePredecessor())
+        if (auto *SW = dyn_cast<SwitchInst>(Pred->getTerminator()))
+          if (auto *Intr = dyn_cast<IntrinsicInst>(SW->getCondition()))
+            return Intr->getIntrinsicID() == Intrinsic::coro_suspend &&
+                   SW->getDefaultDest() == BB;
+      return false;
+    };
     for (BasicBlock *ExitBlock : LoopExitBlocks) {
-      if (BlockSet.insert(ExitBlock).second) {
+      if (BlockSet.insert(ExitBlock).second && !IsSuspendBB(ExitBlock)) {
         ExitBlocks.push_back(ExitBlock);
         InsertPts.push_back(&*ExitBlock->getFirstInsertionPt());
       }
diff --git a/llvm/test/Transforms/Coroutines/coro-split-musttail.ll b/llvm/test/Transforms/Coroutines/coro-split-musttail.ll
index 0406135687904bf..825e44471db27ae 100644
--- a/llvm/test/Transforms/Coroutines/coro-split-musttail.ll
+++ b/llvm/test/Transforms/Coroutines/coro-split-musttail.ll
@@ -1,6 +1,7 @@
 ; Tests that coro-split will convert coro.resume followed by a suspend to a
 ; musttail call.
-; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
+; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck --check-prefixes=CHECK,NOPGO %s
+; RUN: opt < %s -passes='pgo-instr-gen,cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck --check-prefixes=CHECK,PGO %s
 
 define void @f() #0 {
 entry:
@@ -40,7 +41,9 @@ exit:
 ; Verify that in the resume part resume call is marked with musttail.
 ; CHECK-LABEL: @f.resume(
 ; CHECK: %[[addr2:.+]] = call ptr @llvm.coro.subfn.addr(ptr null, i8 0)
-; CHECK-NEXT: musttail call fastcc void %[[addr2]](ptr null)
+; NOPGO-NEXT: musttail call fastcc void %[[addr2]](ptr null)
+; PGO: call void @llvm.instrprof
+; PGO-NEXT: musttail call fastcc void %[[addr2]](ptr null)
 ; CHECK-NEXT: ret void
 
 declare token @llvm.coro.id(i32, ptr readnone, ptr nocapture readonly, ptr) #1
diff --git a/llvm/test/Transforms/Coroutines/coro-split-musttail1.ll b/llvm/test/Transforms/Coroutines/coro-split-musttail1.ll
index cd1635b93d2cc24..d0d11fc4495e480 100644
--- a/llvm/test/Transforms/Coroutines/coro-split-musttail1.ll
+++ b/llvm/test/Transforms/Coroutines/coro-split-musttail1.ll
@@ -1,6 +1,7 @@
 ; Tests that coro-split will convert coro.resume followed by a suspend to a
 ; musttail call.
-; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
+; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck --check-prefixes=CHECK,NOPGO %s
+; RUN: opt < %s -passes='pgo-instr-gen,cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck --check-prefixes=CHECK,PGO %s
 
 define void @f() #0 {
 entry:
@@ -63,14 +64,17 @@ unreach:
 ; CHECK-LABEL: @f.resume(
 ; CHECK: %[[hdl:.+]] = call ptr @g()
 ; CHECK-NEXT: %[[addr2:.+]] = call ptr @llvm.coro.subfn.addr(ptr %[[hdl]], i8 0)
-; CHECK-NEXT: musttail call fastcc void %[[addr2]](ptr %[[hdl]])
+; NOPGO-NEXT: musttail call fastcc void %[[addr2]](ptr %[[hdl]])
+; PGO: musttail call fastcc void %[[addr2]](ptr %[[hdl]])
 ; CHECK-NEXT: ret void
 ; CHECK: %[[hdl2:.+]] = call ptr @h()
 ; CHECK-NEXT: %[[addr3:.+]] = call ptr @llvm.coro.subfn.addr(ptr %[[hdl2]], i8 0)
-; CHECK-NEXT: musttail call fastcc void %[[addr3]](ptr %[[hdl2]])
+; NOPGO-NEXT: musttail call fastcc void %[[addr3]](ptr %[[hdl2]])
+; PGO: musttail call fastcc void %[[addr3]](ptr %[[hdl2]])
 ; CHECK-NEXT: ret void
 ; CHECK: %[[addr4:.+]] = call ptr @llvm.coro.subfn.addr(ptr null, i8 0)
-; CHECK-NEXT: musttail call fastcc void %[[addr4]](ptr null)
+; NOPGO-NEXT: musttail call fastcc void %[[addr4]](ptr null)
+; PGO: musttail call fastcc void %[[addr4]](ptr null)
 ; CHECK-NEXT: ret void
 
 
diff --git a/llvm/test/Transforms/Coroutines/coro-split-musttail10.ll b/llvm/test/Transforms/Coroutines/coro-split-musttail10.ll
index 9d73c8bbc57b81a..cdd58b2a084fcd8 100644
--- a/llvm/test/Transforms/Coroutines/coro-split-musttail10.ll
+++ b/llvm/test/Transforms/Coroutines/coro-split-musttail10.ll
@@ -1,6 +1,7 @@
 ; Tests that we would convert coro.resume to a musttail call if the target is
 ; Wasm64 with tail-call support.
 ; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
+; RUN: opt < %s -passes='pgo-instr-gen,cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
 
 target triple = "wasm64-unknown-unknown"
 
diff --git a/llvm/test/Transforms/Coroutines/coro-split-musttail11.ll b/llvm/test/Transforms/Coroutines/coro-split-musttail11.ll
index 9bc5b4f0c65d91e..da5d868280e9671 100644
--- a/llvm/test/Transforms/Coroutines/coro-split-musttail11.ll
+++ b/llvm/test/Transforms/Coroutines/coro-split-musttail11.ll
@@ -1,6 +1,7 @@
 ; Tests that we would convert coro.resume to a musttail call if the target is
 ; Wasm32 with tail-call support.
 ; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
+; RUN: opt < %s -passes='pgo-instr-gen,cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
 
 target triple = "wasm32-unknown-unknown"
 
diff --git a/llvm/test/Transforms/Coroutines/coro-split-musttail12.ll b/llvm/test/Transforms/Coroutines/coro-split-musttail12.ll
index e7f4bcb9b0ff29a..5baec378876bb1e 100644
--- a/llvm/test/Transforms/Coroutines/coro-split-musttail12.ll
+++ b/llvm/test/Transforms/Coroutines/coro-split-musttail12.ll
@@ -1,5 +1,6 @@
 ; Tests that coro-split won't convert the cmp instruction prematurely.
 ; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
+; RUN: opt < %s -passes='pgo-instr-gen,cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
 
 declare void @fakeresume1(ptr)
 declare void @print()
diff --git a/llvm/test/Transforms/Coroutines/coro-split-musttail13.ll b/llvm/test/Transforms/Coroutines/coro-split-musttail13.ll
index 2384f9382685bd0..0290e42339e2ad4 100644
--- a/llvm/test/Transforms/Coroutines/coro-split-musttail13.ll
+++ b/llvm/test/Transforms/Coroutines/coro-split-musttail13.ll
@@ -1,5 +1,6 @@
 ; Tests that coro-split won't fall in infinite loop when simplify the terminators leading to ret.
 ; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
+; RUN: opt < %s -passes='pgo-instr-gen,cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
 
 declare void @fakeresume1(ptr)
 declare void @may_throw(ptr)
diff --git a/llvm/test/Transforms/Coroutines/coro-split-musttail2.ll b/llvm/test/Transforms/Coroutines/coro-split-musttail2.ll
index 38fc12815c033e7..2f27f79480ab1b4 100644
--- a/llvm/test/Transforms/Coroutines/coro-split-musttail2.ll
+++ b/llvm/test/Transforms/Coroutines/coro-split-musttail2.ll
@@ -1,6 +1,7 @@
 ; Tests that coro-split will convert coro.resume followed by a suspend to a
 ; musttail call.
 ; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
+; RUN: opt < %s -passes='pgo-instr-gen,cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
 
 define void @fakeresume1(ptr)  {
 entry:
diff --git a/llvm/test/Transforms/Coroutines/coro-split-musttail3.ll b/llvm/test/Transforms/Coroutines/coro-split-musttail3.ll
index b777f000e33a6d3..4778e3dcaf9957b 100644
--- a/llvm/test/Transforms/Coroutines/coro-split-musttail3.ll
+++ b/llvm/test/Transforms/Coroutines/coro-split-musttail3.ll
@@ -1,6 +1,7 @@
 ; Tests that coro-split will convert coro.resume followed by a suspend to a
 ; musttail call.
-; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
+; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck --check-prefixes=CHECK,NOPGO %s
+; RUN: opt < %s -passes='pgo-instr-gen,cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck --check-prefixes=CHECK,PGO %s
 
 define void @f() #0 {
 entry:
@@ -59,14 +60,17 @@ unreach:
 ; CHECK-LABEL: @f.resume(
 ; CHECK: %[[hdl:.+]] = call ptr @g()
 ; CHECK-NEXT: %[[addr2:.+]] = call ptr @llvm.coro.subfn.addr(ptr %[[hdl]], i8 0)
-; CHECK-NEXT: musttail call fastcc void %[[addr2]](ptr %[[hdl]])
+; NOPGO-NEXT: musttail call fastcc void %[[addr2]](ptr %[[hdl]])
+; PGO: musttail call fastcc void %[[addr2]](ptr %[[hdl]])
 ; CHECK-NEXT: ret void
 ; CHECK: %[[hdl2:.+]] = call ptr @h()
 ; CHECK-NEXT: %[[addr3:.+]] = call ptr @llvm.coro.subfn.addr(ptr %[[hdl2]], i8 0)
-; CHECK-NEXT: musttail call fastcc void %[[addr3]](ptr %[[hdl2]])
+; NOPGO-NEXT: musttail call fastcc void %[[addr3]](ptr %[[hdl2]])
+; PGO: musttail call fastcc void %[[addr3]](ptr %[[hdl2]])
 ; CHECK-NEXT: ret void
 ; CHECK: %[[addr4:.+]] = call ptr @llvm.coro.subfn.addr(ptr null, i8 0)
-; CHECK-NEXT: musttail call fastcc void %[[addr4]](ptr null)
+; NOPGO-NEXT: musttail call fastcc void %[[addr4]](ptr null)
+; PGO: musttail call fastcc void %[[addr4]](ptr null)
 ; CHECK-NEXT: ret void
 
 
diff --git a/llvm/test/Transforms/Coroutines/coro-split-musttail4.ll b/llvm/test/Transforms/Coroutines/coro-split-musttail4.ll
index 1e0fcdb87a72d30..00ee422ce5863df 100644
--- a/llvm/test/Transforms/Coroutines/coro-split-musttail4.ll
+++ b/llvm/test/Transforms/Coroutines/coro-split-musttail4.ll
@@ -1,6 +1,7 @@
 ; Tests that coro-split will convert a call before coro.suspend to a musttail call
 ; while the user of the coro.suspend is a icmpinst.
 ; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
+; RUN: opt < %s -passes='pgo-instr-gen,cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
 
 define void @fakeresume1(ptr)  {
 entry:
diff --git a/llvm/test/Transforms/Coroutines/coro-split-musttail5.ll b/llvm/test/Transforms/Coroutines/coro-split-musttail5.ll
index d19606491335e50..9afc79abbe88cd8 100644
--- a/llvm/test/Transforms/Coroutines/coro-split-musttail5.ll
+++ b/llvm/test/Transforms/Coroutines/coro-split-musttail5.ll
@@ -1,6 +1,7 @@
 ; Tests that sinked lifetime markers wouldn't provent optimization
 ; to convert a resuming call to a musttail call.
 ; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
+; RUN: opt < %s -passes='pgo-instr-gen,cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
 
 declare void @fakeresume1(ptr align 8)
 
diff --git a/llvm/test/Transforms/Coroutines/coro-split-musttail6.ll b/llvm/test/Transforms/Coroutines/coro-split-musttail6.ll
index eea711861c488c5..9c2b1ece1624bc9 100644
--- a/llvm/test/Transforms/Coroutines/coro-split-musttail6.ll
+++ b/llvm/test/Transforms/Coroutines/coro-split-musttail6.ll
@@ -4,6 +4,7 @@
 ; an extra bitcast instruction in the path, which makes it harder to
 ; optimize.
 ; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
+; RUN: opt < %s -passes='pgo-instr-gen,cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
 
 declare void @fakeresume1(ptr align 8)
 
diff --git a/llvm/test/Transforms/Coroutines/coro-split-musttail7.ll b/llvm/test/Transforms/Coroutines/coro-split-musttail7.ll
index c32fe9b0ee304c2..860032bd3cf8e52 100644
--- a/llvm/test/Transforms/Coroutines/coro-split-musttail7.ll
+++ b/llvm/test/Transforms/Coroutines/coro-split-musttail7.ll
@@ -4,6 +4,7 @@
 ; is that this contains dead instruction generated during the transformation,
 ; which makes the optimization harder.
 ; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
+; RUN: opt < %s -passes='pgo-instr-gen,cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
 
 declare void @fakeresume1(ptr align 8)
 

``````````

</details>


https://github.com/llvm/llvm-project/pull/71263


More information about the llvm-commits mailing list