[llvm] 51aceb5 - [llvm/llvm-project][Coroutines] Improve debugging and minor refactoring (#104642)

Tue Aug 27 15:55:36 PDT 2024

Author: Tyler Nowicki
Date: 2024-08-27T18:55:33-04:00
New Revision: 51aceb5b30ff9934c476eeb5cd7ff584c3e27049

URL: https://github.com/llvm/llvm-project/commit/51aceb5b30ff9934c476eeb5cd7ff584c3e27049
DIFF: https://github.com/llvm/llvm-project/commit/51aceb5b30ff9934c476eeb5cd7ff584c3e27049.diff

LOG: [llvm/llvm-project][Coroutines] Improve debugging and minor refactoring (#104642)

No Functional Changes

* Fix comments in several places
* Instead of using BB-getName() (in dump methods) use
getBasicBlockLabel. This fixes the poor output of the dumped info that
resulted in missing BB labels.
* Use RPO when dumping SuspendCrossingInfo. Without this the dump order
is determined by the ptr addresses and so is not consistent from run to
run making IR diffs difficult to read.
* Inference -> Interference
* Pull the logic that determines insertion location out of insertSpills
and into getSpillInsertionPt, to differentiate between these two
operations.
* Use Shape getters for CoroId instead of getting it manually.

---------

Co-authored-by: tnowicki <tnowicki.nowicki at amd.com>

Added: 
    

Modified: 
    llvm/lib/Transforms/Coroutines/CoroEarly.cpp
    llvm/lib/Transforms/Coroutines/CoroFrame.cpp
    llvm/lib/Transforms/Coroutines/CoroSplit.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Coroutines/CoroEarly.cpp b/llvm/lib/Transforms/Coroutines/CoroEarly.cpp
index d8e827e9cebcfe..13b6680264c87c 100644

--- a/llvm/lib/Transforms/Coroutines/CoroEarly.cpp
+++ b/llvm/lib/Transforms/Coroutines/CoroEarly.cpp
@@ -203,7 +203,7 @@ void Lowerer::lowerEarlyIntrinsics(Function &F) {
         if (auto *CII = cast<CoroIdInst>(&I)) {
           if (CII->getInfo().isPreSplit()) {
             assert(F.isPresplitCoroutine() &&
-                   "The frontend uses Swtich-Resumed ABI should emit "
+                   "The frontend uses Switch-Resumed ABI should emit "
                    "\"presplitcoroutine\" attribute for the coroutine.");
             setCannotDuplicate(CII);
             CII->setCoroutineSelf();

diff  --git a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp
index 996cc142f3c00c..740ae661359445 100644
--- a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp
+++ b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp
@@ -6,7 +6,8 @@
 //
 //===----------------------------------------------------------------------===//
 // This file contains classes used to discover if for a particular value
-// there from sue to definition that crosses a suspend block.
+// its definition precedes and its uses follow a suspend block. This is
+// referred to as a suspend crossing value.
 //
 // Using the information discovered we form a Coroutine Frame structure to
 // contain those values. All uses of those values are replaced with appropriate
@@ -124,7 +125,8 @@ class SuspendCrossingInfo {
 public:
 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
   void dump() const;
-  void dump(StringRef Label, BitVector const &BV) const;
+  void dump(StringRef Label, BitVector const &BV,
+            const ReversePostOrderTraversal<Function *> &RPOT) const;
 #endif
 
   SuspendCrossingInfo(Function &F, coro::Shape &Shape);
@@ -207,21 +209,41 @@ class SuspendCrossingInfo {
 } // end anonymous namespace
 
 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
-LLVM_DUMP_METHOD void SuspendCrossingInfo::dump(StringRef Label,
-                                                BitVector const &BV) const {
+static std::string getBasicBlockLabel(const BasicBlock *BB) {
+  if (BB->hasName())
+    return BB->getName().str();
+
+  std::string S;
+  raw_string_ostream OS(S);
+  BB->printAsOperand(OS, false);
+  return OS.str().substr(1);
+}
+
+LLVM_DUMP_METHOD void SuspendCrossingInfo::dump(
+    StringRef Label, BitVector const &BV,
+    const ReversePostOrderTraversal<Function *> &RPOT) const {
   dbgs() << Label << ":";
-  for (size_t I = 0, N = BV.size(); I < N; ++I)
-    if (BV[I])
-      dbgs() << " " << Mapping.indexToBlock(I)->getName();
+  for (const BasicBlock *BB : RPOT) {
+    auto BBNo = Mapping.blockToIndex(BB);
+    if (BV[BBNo])
+      dbgs() << " " << getBasicBlockLabel(BB);
+  }
   dbgs() << "\n";
 }
 
 LLVM_DUMP_METHOD void SuspendCrossingInfo::dump() const {
-  for (size_t I = 0, N = Block.size(); I < N; ++I) {
-    BasicBlock *const B = Mapping.indexToBlock(I);
-    dbgs() << B->getName() << ":\n";
-    dump("   Consumes", Block[I].Consumes);
-    dump("      Kills", Block[I].Kills);
+  if (Block.empty())
+    return;
+
+  BasicBlock *const B = Mapping.indexToBlock(0);
+  Function *F = B->getParent();
+
+  ReversePostOrderTraversal<Function *> RPOT(F);
+  for (const BasicBlock *BB : RPOT) {
+    auto BBNo = Mapping.blockToIndex(BB);
+    dbgs() << getBasicBlockLabel(BB) << ":\n";
+    dump("   Consumes", Block[BBNo].Consumes, RPOT);
+    dump("      Kills", Block[BBNo].Kills, RPOT);
   }
   dbgs() << "\n";
 }
@@ -418,10 +440,7 @@ struct RematGraph {
 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
   void dump() const {
     dbgs() << "Entry (";
-    if (EntryNode->Node->getParent()->hasName())
-      dbgs() << EntryNode->Node->getParent()->getName();
-    else
-      EntryNode->Node->getParent()->printAsOperand(dbgs(), false);
+    dbgs() << getBasicBlockLabel(EntryNode->Node->getParent());
     dbgs() << ") : " << *EntryNode->Node << "\n";
     for (auto &E : Remats) {
       dbgs() << *(E.first) << "\n";
@@ -551,7 +570,7 @@ struct FrameDataInfo {
 
 #ifndef NDEBUG
 static void dumpSpills(StringRef Title, const SpillInfo &Spills) {
-  dbgs() << "------------- " << Title << "--------------\n";
+  dbgs() << "------------- " << Title << " --------------\n";
   for (const auto &E : Spills) {
     E.first->dump();
     dbgs() << "   user: ";
@@ -813,7 +832,7 @@ void FrameTypeBuilder::addFieldForAllocas(const Function &F,
   StackLifetime StackLifetimeAnalyzer(F, ExtractAllocas(),
                                       StackLifetime::LivenessType::May);
   StackLifetimeAnalyzer.run();
-  auto IsAllocaInferenre = [&](const AllocaInst *AI1, const AllocaInst *AI2) {
+  auto DoAllocasInterfere = [&](const AllocaInst *AI1, const AllocaInst *AI2) {
     return StackLifetimeAnalyzer.getLiveRange(AI1).overlaps(
         StackLifetimeAnalyzer.getLiveRange(AI2));
   };
@@ -833,13 +852,13 @@ void FrameTypeBuilder::addFieldForAllocas(const Function &F,
   for (const auto &A : FrameData.Allocas) {
     AllocaInst *Alloca = A.Alloca;
     bool Merged = false;
-    // Try to find if the Alloca is not inferenced with any existing
+    // Try to find if the Alloca does not interfere with any existing
     // NonOverlappedAllocaSet. If it is true, insert the alloca to that
     // NonOverlappedAllocaSet.
     for (auto &AllocaSet : NonOverlapedAllocas) {
       assert(!AllocaSet.empty() && "Processing Alloca Set is not empty.\n");
-      bool NoInference = none_of(AllocaSet, [&](auto Iter) {
-        return IsAllocaInferenre(Alloca, Iter);
+      bool NoInterference = none_of(AllocaSet, [&](auto Iter) {
+        return DoAllocasInterfere(Alloca, Iter);
       });
       // If the alignment of A is multiple of the alignment of B, the address
       // of A should satisfy the requirement for aligning for B.
@@ -852,7 +871,7 @@ void FrameTypeBuilder::addFieldForAllocas(const Function &F,
         return LargestAlloca->getAlign().value() % Alloca->getAlign().value() ==
                0;
       }();
-      bool CouldMerge = NoInference && Alignable;
+      bool CouldMerge = NoInterference && Alignable;
       if (!CouldMerge)
         continue;
       AllocaSet.push_back(Alloca);
@@ -1714,6 +1733,51 @@ static Instruction *splitBeforeCatchSwitch(CatchSwitchInst *CatchSwitch) {
   return CleanupRet;
 }
 
+static BasicBlock::iterator getSpillInsertionPt(const coro::Shape &Shape,
+                                                Value *Def,
+                                                const DominatorTree &DT) {
+  BasicBlock::iterator InsertPt;
+  if (auto *Arg = dyn_cast<Argument>(Def)) {
+    // For arguments, we will place the store instruction right after
+    // the coroutine frame pointer instruction, i.e. coro.begin.
+    InsertPt = Shape.getInsertPtAfterFramePtr();
+
+    // If we're spilling an Argument, make sure we clear 'nocapture'
+    // from the coroutine function.
+    Arg->getParent()->removeParamAttr(Arg->getArgNo(), Attribute::NoCapture);
+  } else if (auto *CSI = dyn_cast<AnyCoroSuspendInst>(Def)) {
+    // Don't spill immediately after a suspend; splitting assumes
+    // that the suspend will be followed by a branch.
+    InsertPt = CSI->getParent()->getSingleSuccessor()->getFirstNonPHIIt();
+  } else {
+    auto *I = cast<Instruction>(Def);
+    if (!DT.dominates(Shape.CoroBegin, I)) {
+      // If it is not dominated by CoroBegin, then spill should be
+      // inserted immediately after CoroFrame is computed.
+      InsertPt = Shape.getInsertPtAfterFramePtr();
+    } else if (auto *II = dyn_cast<InvokeInst>(I)) {
+      // If we are spilling the result of the invoke instruction, split
+      // the normal edge and insert the spill in the new block.
+      auto *NewBB = SplitEdge(II->getParent(), II->getNormalDest());
+      InsertPt = NewBB->getTerminator()->getIterator();
+    } else if (isa<PHINode>(I)) {
+      // Skip the PHINodes and EH pads instructions.
+      BasicBlock *DefBlock = I->getParent();
+      if (auto *CSI = dyn_cast<CatchSwitchInst>(DefBlock->getTerminator()))
+        InsertPt = splitBeforeCatchSwitch(CSI)->getIterator();
+      else
+        InsertPt = DefBlock->getFirstInsertionPt();
+    } else {
+      assert(!I->isTerminator() && "unexpected terminator");
+      // For all other values, the spill is placed immediately after
+      // the definition.
+      InsertPt = I->getNextNode()->getIterator();
+    }
+  }
+
+  return InsertPt;
+}
+
 // Replace all alloca and SSA values that are accessed across suspend points
 // with GetElementPointer from coroutine frame + loads and stores. Create an
 // AllocaSpillBB that will become the new entry block for the resume parts of
@@ -1736,9 +1800,8 @@ static Instruction *splitBeforeCatchSwitch(CatchSwitchInst *CatchSwitch) {
 //
 //
 static void insertSpills(const FrameDataInfo &FrameData, coro::Shape &Shape) {
-  auto *CB = Shape.CoroBegin;
-  LLVMContext &C = CB->getContext();
-  Function *F = CB->getFunction();
+  LLVMContext &C = Shape.CoroBegin->getContext();
+  Function *F = Shape.CoroBegin->getFunction();
   IRBuilder<> Builder(C);
   StructType *FrameTy = Shape.FrameTy;
   Value *FramePtr = Shape.FramePtr;
@@ -1799,47 +1862,16 @@ static void insertSpills(const FrameDataInfo &FrameData, coro::Shape &Shape) {
     auto SpillAlignment = Align(FrameData.getAlign(Def));
     // Create a store instruction storing the value into the
     // coroutine frame.
-    BasicBlock::iterator InsertPt;
+    BasicBlock::iterator InsertPt = getSpillInsertionPt(Shape, Def, DT);
+
     Type *ByValTy = nullptr;
     if (auto *Arg = dyn_cast<Argument>(Def)) {
-      // For arguments, we will place the store instruction right after
-      // the coroutine frame pointer instruction, i.e. coro.begin.
-      InsertPt = Shape.getInsertPtAfterFramePtr();
-
       // If we're spilling an Argument, make sure we clear 'nocapture'
       // from the coroutine function.
       Arg->getParent()->removeParamAttr(Arg->getArgNo(), Attribute::NoCapture);
 
       if (Arg->hasByValAttr())
         ByValTy = Arg->getParamByValType();
-    } else if (auto *CSI = dyn_cast<AnyCoroSuspendInst>(Def)) {
-      // Don't spill immediately after a suspend; splitting assumes
-      // that the suspend will be followed by a branch.
-      InsertPt = CSI->getParent()->getSingleSuccessor()->getFirstNonPHIIt();
-    } else {
-      auto *I = cast<Instruction>(Def);
-      if (!DT.dominates(CB, I)) {
-        // If it is not dominated by CoroBegin, then spill should be
-        // inserted immediately after CoroFrame is computed.
-        InsertPt = Shape.getInsertPtAfterFramePtr();
-      } else if (auto *II = dyn_cast<InvokeInst>(I)) {
-        // If we are spilling the result of the invoke instruction, split
-        // the normal edge and insert the spill in the new block.
-        auto *NewBB = SplitEdge(II->getParent(), II->getNormalDest());
-        InsertPt = NewBB->getTerminator()->getIterator();
-      } else if (isa<PHINode>(I)) {
-        // Skip the PHINodes and EH pads instructions.
-        BasicBlock *DefBlock = I->getParent();
-        if (auto *CSI = dyn_cast<CatchSwitchInst>(DefBlock->getTerminator()))
-          InsertPt = splitBeforeCatchSwitch(CSI)->getIterator();
-        else
-          InsertPt = DefBlock->getFirstInsertionPt();
-      } else {
-        assert(!I->isTerminator() && "unexpected terminator");
-        // For all other values, the spill is placed immediately after
-        // the definition.
-        InsertPt = I->getNextNode()->getIterator();
-      }
     }
 
     auto Index = FrameData.getFieldIndex(Def);
@@ -1982,7 +2014,7 @@ static void insertSpills(const FrameDataInfo &FrameData, coro::Shape &Shape) {
     UsersToUpdate.clear();
     for (User *U : Alloca->users()) {
       auto *I = cast<Instruction>(U);
-      if (DT.dominates(CB, I))
+      if (DT.dominates(Shape.CoroBegin, I))
         UsersToUpdate.push_back(I);
     }
     if (UsersToUpdate.empty())
@@ -2024,7 +2056,7 @@ static void insertSpills(const FrameDataInfo &FrameData, coro::Shape &Shape) {
       Builder.CreateStore(Value, G);
     }
     // For each alias to Alloca created before CoroBegin but used after
-    // CoroBegin, we recreate them after CoroBegin by appplying the offset
+    // CoroBegin, we recreate them after CoroBegin by applying the offset
     // to the pointer in the frame.
     for (const auto &Alias : A.Aliases) {
       auto *FramePtr = GetFramePointer(Alloca);
@@ -2033,7 +2065,7 @@ static void insertSpills(const FrameDataInfo &FrameData, coro::Shape &Shape) {
       auto *AliasPtr =
           Builder.CreatePtrAdd(FramePtr, ConstantInt::get(ITy, Value));
       Alias.first->replaceUsesWithIf(
-          AliasPtr, [&](Use &U) { return DT.dominates(CB, U); });
+          AliasPtr, [&](Use &U) { return DT.dominates(Shape.CoroBegin, U); });
     }
   }
 
@@ -2046,7 +2078,7 @@ static void insertSpills(const FrameDataInfo &FrameData, coro::Shape &Shape) {
     // If there is memory accessing to promise alloca before CoroBegin;
     bool HasAccessingPromiseBeforeCB = llvm::any_of(PA->uses(), [&](Use &U) {
       auto *Inst = dyn_cast<Instruction>(U.getUser());
-      if (!Inst || DT.dominates(CB, Inst))
+      if (!Inst || DT.dominates(Shape.CoroBegin, Inst))
         return false;
 
       if (auto *CI = dyn_cast<CallInst>(Inst)) {
@@ -2692,7 +2724,7 @@ static void eliminateSwiftError(Function &F, coro::Shape &Shape) {
   }
 }
 
-/// retcon and retcon.once conventions assume that all spill uses can be sunk
+/// Async and Retcon{Once} conventions assume that all spill uses can be sunk
 /// after the coro.begin intrinsic.
 static void sinkSpillUsesAfterCoroBegin(Function &F,
                                         const FrameDataInfo &FrameData,
@@ -2728,7 +2760,7 @@ static void sinkSpillUsesAfterCoroBegin(Function &F,
   // Sort by dominance.
   SmallVector<Instruction *, 64> InsertionList(ToMove.begin(), ToMove.end());
   llvm::sort(InsertionList, [&Dom](Instruction *A, Instruction *B) -> bool {
-    // If a dominates b it should preceed (<) b.
+    // If a dominates b it should precede (<) b.
     return Dom.dominates(A, B);
   });
 
@@ -3126,7 +3158,7 @@ void coro::buildCoroutineFrame(
   cleanupSinglePredPHIs(F);
 
   // Transforms multi-edge PHI Nodes, so that any value feeding into a PHI will
-  // never has its definition separated from the PHI by the suspend point.
+  // never have its definition separated from the PHI by the suspend point.
   rewritePHIs(F);
 
   // Build suspend crossing info.
@@ -3223,6 +3255,7 @@ void coro::buildCoroutineFrame(
   Shape.FramePtr = Shape.CoroBegin;
   // For now, this works for C++ programs only.
   buildFrameDebugInfo(F, Shape, FrameData);
+  // Insert spills and reloads
   insertSpills(FrameData, Shape);
   lowerLocalAllocas(LocalAllocas, DeadInstructions);
 

diff  --git a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp
index 8eceaef59a1e1f..fab387c65ab41f 100644
--- a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp
+++ b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp
@@ -1221,11 +1221,10 @@ static void postSplitCleanup(Function &F) {
 // frame if possible.
 static void handleNoSuspendCoroutine(coro::Shape &Shape) {
   auto *CoroBegin = Shape.CoroBegin;
-  auto *CoroId = CoroBegin->getId();
-  auto *AllocInst = CoroId->getCoroAlloc();
   switch (Shape.ABI) {
   case coro::ABI::Switch: {
-    auto SwitchId = cast<CoroIdInst>(CoroId);
+    auto SwitchId = Shape.getSwitchCoroId();
+    auto *AllocInst = SwitchId->getCoroAlloc();
     coro::replaceCoroFree(SwitchId, /*Elide=*/AllocInst != nullptr);
     if (AllocInst) {
       IRBuilder<> Builder(AllocInst);
@@ -1689,7 +1688,7 @@ static void splitAsyncCoroutine(Function &F, coro::Shape &Shape,
   auto &Context = F.getContext();
   auto *Int8PtrTy = PointerType::getUnqual(Context);
 
-  auto *Id = cast<CoroIdAsyncInst>(Shape.CoroBegin->getId());
+  auto *Id = Shape.getAsyncCoroId();
   IRBuilder<> Builder(Id);
 
   auto *FramePtr = Id->getStorage();
@@ -1783,7 +1782,7 @@ static void splitRetconCoroutine(Function &F, coro::Shape &Shape,
   F.removeRetAttr(Attribute::NonNull);
 
   // Allocate the frame.
-  auto *Id = cast<AnyCoroIdRetconInst>(Shape.CoroBegin->getId());
+  auto *Id = Shape.getRetconCoroId();
   Value *RawFramePtr;
   if (Shape.RetconLowering.IsFrameInlineInStorage) {
     RawFramePtr = Id->getStorage();