[llvm] r279609 - [Coroutines] Part 8: Coroutine Frame Building algorithm

Tue Aug 23 21:44:36 PDT 2016

Author: gornishanov
Date: Tue Aug 23 23:44:35 2016
New Revision: 279609

URL: http://llvm.org/viewvc/llvm-project?rev=279609&view=rev
Log:
[Coroutines] Part 8: Coroutine Frame Building algorithm

Summary:
This patch adds coroutine frame building algorithm. Now, simple coroutines such as ex0.ll and ex1.ll (first examples from docs\Coroutines.rst can be compiled).

Documentation and overview is here: http://llvm.org/docs/Coroutines.html.

Upstreaming sequence (rough plan)
1.Add documentation. (https://reviews.llvm.org/D22603)
2.Add coroutine intrinsics. (https://reviews.llvm.org/D22659)
...

7. Split coroutine into subfunctions. (https://reviews.llvm.org/D23461)
8. Coroutine Frame Building algorithm  <= we are here
9. Add f.cleanup subfunction.
10+. The rest of the logic

Reviewers: majnemer

Subscribers: mehdi_amini, llvm-commits

Differential Revision: https://reviews.llvm.org/D23586

Added:
    llvm/trunk/test/Transforms/Coroutines/ex0.ll
    llvm/trunk/test/Transforms/Coroutines/ex1.ll
Modified:
    llvm/trunk/lib/Transforms/Coroutines/CoroFrame.cpp
    llvm/trunk/test/Transforms/Coroutines/smoketest.ll

Modified: llvm/trunk/lib/Transforms/Coroutines/CoroFrame.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Coroutines/CoroFrame.cpp?rev=279609&r1=279608&r2=279609&view=diff
==============================================================================

--- llvm/trunk/lib/Transforms/Coroutines/CoroFrame.cpp (original)
+++ llvm/trunk/lib/Transforms/Coroutines/CoroFrame.cpp Tue Aug 23 23:44:35 2016
@@ -18,12 +18,275 @@
 //===----------------------------------------------------------------------===//
 
 #include "CoroInternal.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/IR/CFG.h"
+#include "llvm/IR/Dominators.h"
 #include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/circular_raw_ostream.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
 
 using namespace llvm;
 
-// TODO: Implement in future patches.
-struct SpillInfo {};
+// The "coro-suspend-crossing" flag is very noisy. There is another debug type,
+// "coro-frame", which results in leaner debug spew.
+#define DEBUG_TYPE "coro-suspend-crossing"
+
+enum { SmallVectorThreshold = 32 };
+
+// Provides two way mapping between the blocks and numbers.
+namespace {
+class BlockToIndexMapping {
+  SmallVector<BasicBlock *, SmallVectorThreshold> V;
+
+public:
+  size_t size() const { return V.size(); }
+
+  BlockToIndexMapping(Function &F) {
+    for (BasicBlock &BB : F)
+      V.push_back(&BB);
+    std::sort(V.begin(), V.end());
+  }
+
+  size_t blockToIndex(BasicBlock *BB) const {
+    auto *I = std::lower_bound(V.begin(), V.end(), BB);
+    assert(I != V.end() && *I == BB && "BasicBlockNumberng: Unknown block");
+    return I - V.begin();
+  }
+
+  BasicBlock *indexToBlock(unsigned Index) const { return V[Index]; }
+};
+} // end anonymous namespace
+
+// The SuspendCrossingInfo maintains data that allows to answer a question
+// whether given two BasicBlocks A and B there is a path from A to B that
+// passes through a suspend point.
+//
+// For every basic block 'i' it maintains a BlockData that consists of:
+//   Consumes:  a bit vector which contains a set of indicies of blocks that can
+//              reach block 'i'
+//   Kills: a bit vector which contains a set of indicies of blocks that can
+//          reach block 'i', but one of the path will cross a suspend point
+//   Suspend: a boolean indicating whether block 'i' contains a suspend point.
+//   End: a boolean indicating whether block 'i' contains a coro.end intrinsic.
+//
+namespace {
+struct SuspendCrossingInfo {
+  BlockToIndexMapping Mapping;
+
+  struct BlockData {
+    BitVector Consumes;
+    BitVector Kills;
+    bool Suspend = false;
+    bool End = false;
+  };
+  SmallVector<BlockData, SmallVectorThreshold> Block;
+
+  iterator_range<succ_iterator> successors(BlockData const &BD) const {
+    BasicBlock *BB = Mapping.indexToBlock(&BD - &Block[0]);
+    return llvm::successors(BB);
+  }
+
+  BlockData &getBlockData(BasicBlock *BB) {
+    return Block[Mapping.blockToIndex(BB)];
+  }
+
+  void dump() const;
+  void dump(StringRef Label, BitVector const &BV) const;
+
+  SuspendCrossingInfo(Function &F, coro::Shape &Shape);
+
+  bool hasPathCrossingSuspendPoint(BasicBlock *DefBB, BasicBlock *UseBB) const {
+    size_t const DefIndex = Mapping.blockToIndex(DefBB);
+    size_t const UseIndex = Mapping.blockToIndex(UseBB);
+
+    assert(Block[UseIndex].Consumes[DefIndex] && "use must consume def");
+    bool const Result = Block[UseIndex].Kills[DefIndex];
+    DEBUG(dbgs() << UseBB->getName() << " => " << DefBB->getName()
+                 << " answer is " << Result << "\n");
+    return Result;
+  }
+
+  bool isDefinitionAcrossSuspend(BasicBlock *DefBB, User *U) const {
+    auto *I = cast<Instruction>(U);
+
+    // We rewrote PHINodes, so that only the ones with exactly one incoming
+    // value need to be analyzed.
+    if (auto *PN = dyn_cast<PHINode>(I))
+      if (PN->getNumIncomingValues() > 1)
+        return false;
+
+    BasicBlock *UseBB = I->getParent();
+    return hasPathCrossingSuspendPoint(DefBB, UseBB);
+  }
+
+  bool isDefinitionAcrossSuspend(Argument &A, User *U) const {
+    return isDefinitionAcrossSuspend(&A.getParent()->getEntryBlock(), U);
+  }
+
+  bool isDefinitionAcrossSuspend(Instruction &I, User *U) const {
+    return isDefinitionAcrossSuspend(I.getParent(), U);
+  }
+};
+} // end anonymous namespace
+
+LLVM_DUMP_METHOD void SuspendCrossingInfo::dump(StringRef Label,
+                                                BitVector const &BV) const {
+  dbgs() << Label << ":";
+  for (size_t I = 0, N = BV.size(); I < N; ++I)
+    if (BV[I])
+      dbgs() << " " << Mapping.indexToBlock(I)->getName();
+  dbgs() << "\n";
+}
+
+LLVM_DUMP_METHOD void SuspendCrossingInfo::dump() const {
+  for (size_t I = 0, N = Block.size(); I < N; ++I) {
+    BasicBlock *const B = Mapping.indexToBlock(I);
+    dbgs() << B->getName() << ":\n";
+    dump("   Consumes", Block[I].Consumes);
+    dump("      Kills", Block[I].Kills);
+  }
+  dbgs() << "\n";
+}
+
+SuspendCrossingInfo::SuspendCrossingInfo(Function &F, coro::Shape &Shape)
+    : Mapping(F) {
+  const size_t N = Mapping.size();
+  Block.resize(N);
+
+  // Initialize every block so that it consumes itself
+  for (size_t I = 0; I < N; ++I) {
+    auto &B = Block[I];
+    B.Consumes.resize(N);
+    B.Kills.resize(N);
+    B.Consumes.set(I);
+  }
+
+  // Mark all CoroEnd Blocks. We do not propagate Kills beyond coro.ends as
+  // the code beyond coro.end is reachable during initial invocation of the
+  // coroutine.
+  for (auto *CE : Shape.CoroEnds)
+    getBlockData(CE->getParent()).End = true;
+
+  // Mark all suspend blocks and indicate that kill everything they consume.
+  // Note, that crossing coro.save is used to indicate suspend, as any code
+  // between coro.save and coro.suspend may resume the coroutine and all of the
+  // state needs to be saved by that time.
+  for (CoroSuspendInst *CSI : Shape.CoroSuspends) {
+    CoroSaveInst *const CoroSave = CSI->getCoroSave();
+    BasicBlock *const CoroSaveBB = CoroSave->getParent();
+    auto &B = getBlockData(CoroSaveBB);
+    B.Suspend = true;
+    B.Kills |= B.Consumes;
+  }
+
+  // Iterate propagating consumes and kills until they stop changing
+  int Iteration = 0;
+
+  bool Changed;
+  do {
+    DEBUG(dbgs() << "iteration " << ++Iteration);
+    DEBUG(dbgs() << "==============\n");
+
+    Changed = false;
+    for (size_t I = 0; I < N; ++I) {
+      auto &B = Block[I];
+      for (BasicBlock *SI : successors(B)) {
+
+        auto SuccNo = Mapping.blockToIndex(SI);
+
+        // Saved Consumes and Kills bitsets so that it is easy to see
+        // if anything changed after propagation.
+        auto &S = Block[SuccNo];
+        auto SavedConsumes = S.Consumes;
+        auto SavedKills = S.Kills;
+
+        // Propagate Kills and Consumes from block B into its successor S.
+        S.Consumes |= B.Consumes;
+        S.Kills |= B.Kills;
+
+        // If block B is a suspend block, it should propagate kills into the
+        // its successor for every block B consumes.
+        if (B.Suspend) {
+          S.Kills |= B.Consumes;
+        }
+        if (S.Suspend) {
+          // If block S is a suspend block, it should kill all of the blocks it
+          // consumes.
+          S.Kills |= S.Consumes;
+        } else if (S.End) {
+          // If block S is an end block, it should not propagate kills as the
+          // blocks following coro.end() are reached during initial invocation
+          // of the coroutine while all the data are still available on the
+          // stack or in the registers.
+          S.Kills.reset();
+        } else {
+          // This is reached when S block it not Suspend nor coro.end and it
+          // need to make sure that it is not in the kill set.
+          S.Kills.reset(SuccNo);
+        }
+
+        // See if anything changed.
+        Changed |= (S.Kills != SavedKills) || (S.Consumes != SavedConsumes);
+
+        if (S.Kills != SavedKills) {
+          DEBUG(dbgs() << "\nblock " << I << " follower " << SI->getName()
+                       << "\n");
+          DEBUG(dump("S.Kills", S.Kills));
+          DEBUG(dump("SavedKills", SavedKills));
+        }
+        if (S.Consumes != SavedConsumes) {
+          DEBUG(dbgs() << "\nblock " << I << " follower " << SI << "\n");
+          DEBUG(dump("S.Consume", S.Consumes));
+          DEBUG(dump("SavedCons", SavedConsumes));
+        }
+      }
+    }
+  } while (Changed);
+  DEBUG(dump());
+}
+
+#undef DEBUG_TYPE // "coro-suspend-crossing"
+#define DEBUG_TYPE "coro-frame"
+
+// We build up the list of spills for every case where a use is separated
+// from the definition by a suspend point.
+
+struct Spill : std::pair<Value *, Instruction *> {
+  using base = std::pair<Value *, Instruction *>;
+
+  Spill(Value *Def, User *U) : base(Def, cast<Instruction>(U)) {}
+
+  Value *def() const { return first; }
+  Instruction *user() const { return second; }
+  BasicBlock *userBlock() const { return second->getParent(); }
+
+  std::pair<Value *, BasicBlock *> getKey() const {
+    return {def(), userBlock()};
+  }
+
+  bool operator<(Spill const &rhs) const { return getKey() < rhs.getKey(); }
+};
+
+// Note that there may be more than one record with the same value of Def in
+// the SpillInfo vector.
+using SpillInfo = SmallVector<Spill, 8>;
+
+#ifndef NDEBUG
+static void dump(StringRef Title, SpillInfo const &Spills) {
+  dbgs() << "------------- " << Title << "--------------\n";
+  Value *CurrentValue = nullptr;
+  for (auto const &E : Spills) {
+    if (CurrentValue != E.def()) {
+      CurrentValue = E.def();
+      CurrentValue->dump();
+    }
+    dbgs() << "   user: ";
+    E.user()->dump();
+  }
+}
+#endif
 
 // Build a struct that will keep state for an active coroutine.
 //   struct f.frame {
@@ -48,12 +311,35 @@ static StructType *buildFrameType(Functi
     report_fatal_error("Cannot handle coroutine with this many suspend points");
 
   SmallVector<Type *, 8> Types{FnPtrTy, FnPtrTy, Type::getInt32Ty(C)};
-  // TODO: Populate from Spills.
+  Value *CurrentDef = nullptr;
+
+  // Create an entry for every spilled value.
+  for (auto const &S : Spills) {
+    if (CurrentDef == S.def())
+      continue;
+
+    CurrentDef = S.def();
+
+    Type *Ty = nullptr;
+    if (auto *AI = dyn_cast<AllocaInst>(CurrentDef))
+      Ty = AI->getAllocatedType();
+    else
+      Ty = CurrentDef->getType();
+
+    Types.push_back(Ty);
+  }
   FrameTy->setBody(Types);
 
   return FrameTy;
 }
 
+// Returns the index of the last non-spill field in the coroutine frame.
+//  2 - if there is no coroutine promise specified or 3, if there is.
+static unsigned getLastNonSpillIndex(coro::Shape &Shape) {
+  // TODO: Add support for coroutine promise.
+  return 2;
+}
+
 // Replace all alloca and SSA values that are accessed across suspend points
 // with GetElementPointer from coroutine frame + loads and stores. Create an
 // AllocaSpillBB that will become the new entry block for the resume parts of
@@ -82,22 +368,274 @@ static Instruction *insertSpills(SpillIn
   PointerType *FramePtrTy = Shape.FrameTy->getPointerTo();
   auto *FramePtr =
       cast<Instruction>(Builder.CreateBitCast(CB, FramePtrTy, "FramePtr"));
+  Type *FrameTy = FramePtrTy->getElementType();
 
-  // TODO: Insert Spills.
+  Value *CurrentValue = nullptr;
+  BasicBlock *CurrentBlock = nullptr;
+  Value *CurrentReload = nullptr;
+  unsigned Index = getLastNonSpillIndex(Shape);
+
+  // We need to keep track of any allocas that need "spilling"
+  // since they will live in the coroutine frame now, all access to them
+  // need to be changed, not just the access across suspend points
+  // we remember allocas and their indices to be handled once we processed
+  // all the spills.
+  SmallVector<std::pair<AllocaInst *, unsigned>, 4> Allocas;
+
+  // Create a load instruction to reload the spilled value from the coroutine
+  // frame.
+  auto CreateReload = [&](Instruction *InsertBefore) {
+    Builder.SetInsertPoint(InsertBefore);
+    auto *G = Builder.CreateConstInBoundsGEP2_32(FrameTy, FramePtr, 0, Index,
+                                                 CurrentValue->getName() +
+                                                     Twine(".reload.addr"));
+    return isa<AllocaInst>(CurrentValue)
+               ? G
+               : Builder.CreateLoad(G,
+                                    CurrentValue->getName() + Twine(".reload"));
+  };
+
+  for (auto const &E : Spills) {
+    // If we have not seen the value, generate a spill.
+    if (CurrentValue != E.def()) {
+      CurrentValue = E.def();
+      CurrentBlock = nullptr;
+      CurrentReload = nullptr;
+
+      ++Index;
+
+      if (auto *AI = dyn_cast<AllocaInst>(CurrentValue)) {
+        // Spiled AllocaInst will be replaced with GEP from the coroutine frame
+        // there is no spill required.
+        Allocas.emplace_back(AI, Index);
+        if (!AI->isStaticAlloca())
+          report_fatal_error("Coroutines cannot handle non static allocas yet");
+      } else {
+        // Otherwise, create a store instruction storing the value into the
+        // coroutine frame. For, argument, we will place the store instruction
+        // right after the coroutine frame pointer instruction, i.e. bitcase of
+        // coro.begin from i8* to %f.frame*. For all other values, the spill is
+        // placed immediately after the definition.
+        Builder.SetInsertPoint(
+            isa<Argument>(CurrentValue)
+                ? FramePtr->getNextNode()
+                : dyn_cast<Instruction>(E.def())->getNextNode());
+
+        auto *G = Builder.CreateConstInBoundsGEP2_32(
+            FrameTy, FramePtr, 0, Index,
+            CurrentValue->getName() + Twine(".spill.addr"));
+        Builder.CreateStore(CurrentValue, G);
+      }
+    }
+
+    // If we have not seen the use block, generate a reload in it.
+    if (CurrentBlock != E.userBlock()) {
+      CurrentBlock = E.userBlock();
+      CurrentReload = CreateReload(&*CurrentBlock->getFirstInsertionPt());
+    }
+
+    // Replace all uses of CurrentValue in the current instruction with reload.
+    E.user()->replaceUsesOfWith(CurrentValue, CurrentReload);
+  }
 
-  auto *FramePtrBB = FramePtr->getParent();
+  BasicBlock *FramePtrBB = FramePtr->getParent();
   Shape.AllocaSpillBlock =
       FramePtrBB->splitBasicBlock(FramePtr->getNextNode(), "AllocaSpillBB");
   Shape.AllocaSpillBlock->splitBasicBlock(&Shape.AllocaSpillBlock->front(),
                                           "PostSpill");
-  // TODO: Insert geps for alloca moved to coroutine frame.
 
+  Builder.SetInsertPoint(&Shape.AllocaSpillBlock->front());
+  // If we found any allocas, replace all of their remaining uses with Geps.
+  for (auto &P : Allocas) {
+    auto *G =
+        Builder.CreateConstInBoundsGEP2_32(FrameTy, FramePtr, 0, P.second);
+    ReplaceInstWithInst(P.first, cast<Instruction>(G));
+  }
   return FramePtr;
 }
 
+static void rewritePHIs(BasicBlock &BB) {
+  // For every incoming edge we will create a block holding all
+  // incoming values in a single PHI nodes.
+  //
+  // loop:
+  //    %n.val = phi i32[%n, %entry], [%inc, %loop]
+  //
+  // It will create:
+  //
+  // loop.from.entry:
+  //    %n.loop.pre = phi i32 [%n, %entry]
+  //    br %label loop
+  // loop.from.loop:
+  //    %inc.loop.pre = phi i32 [%inc, %loop]
+  //    br %label loop
+  //
+  // After this rewrite, further analysis will ignore any phi nodes with more
+  // than one incoming edge.
+
+  // TODO: Simplify PHINodes in the basic block to remove duplicate
+  // predecessors.
+
+  SmallVector<BasicBlock *, 8> Preds(pred_begin(&BB), pred_end(&BB));
+  for (BasicBlock *Pred : Preds) {
+    auto *IncomingBB = SplitEdge(Pred, &BB);
+    IncomingBB->setName(BB.getName() + Twine(".from.") + Pred->getName());
+    auto *PN = cast<PHINode>(&BB.front());
+    do {
+      int Index = PN->getBasicBlockIndex(IncomingBB);
+      Value *V = PN->getIncomingValue(Index);
+      PHINode *InputV = PHINode::Create(
+          V->getType(), 1, V->getName() + Twine(".") + BB.getName(),
+          &IncomingBB->front());
+      InputV->addIncoming(V, Pred);
+      PN->setIncomingValue(Index, InputV);
+      PN = dyn_cast<PHINode>(PN->getNextNode());
+    } while (PN);
+  }
+}
+
+static void rewritePHIs(Function &F) {
+  SmallVector<BasicBlock *, 8> WorkList;
+
+  for (BasicBlock &BB : F)
+    if (auto *PN = dyn_cast<PHINode>(&BB.front()))
+      if (PN->getNumIncomingValues() > 1)
+        WorkList.push_back(&BB);
+
+  for (BasicBlock *BB : WorkList)
+    rewritePHIs(*BB);
+}
+
+// Check for instructions that we can recreate on resume as opposed to spill
+// the result into a coroutine frame.
+static bool materializable(Instruction &V) {
+  return isa<CastInst>(&V) || isa<GetElementPtrInst>(&V) ||
+         isa<BinaryOperator>(&V) || isa<CmpInst>(&V) || isa<SelectInst>(&V);
+}
+
+// For every use of the value that is across suspend point, recreate that value
+// after a suspend point.
+static void rewriteMaterializableInstructions(IRBuilder<> &IRB,
+                                              SpillInfo const &Spills) {
+  BasicBlock *CurrentBlock = nullptr;
+  Instruction *CurrentMaterialization = nullptr;
+  Instruction *CurrentDef = nullptr;
+
+  for (auto const &E : Spills) {
+    // If it is a new definition, update CurrentXXX variables.
+    if (CurrentDef != E.def()) {
+      CurrentDef = cast<Instruction>(E.def());
+      CurrentBlock = nullptr;
+      CurrentMaterialization = nullptr;
+    }
+
+    // If we have not seen this block, materialize the value.
+    if (CurrentBlock != E.userBlock()) {
+      CurrentBlock = E.userBlock();
+      CurrentMaterialization = cast<Instruction>(CurrentDef)->clone();
+      CurrentMaterialization->setName(CurrentDef->getName());
+      CurrentMaterialization->insertBefore(
+          &*CurrentBlock->getFirstInsertionPt());
+    }
+
+    if (auto *PN = dyn_cast<PHINode>(E.user())) {
+      assert(PN->getNumIncomingValues() == 1 && "unexpected number of incoming "
+                                                "values in the PHINode");
+      PN->replaceAllUsesWith(CurrentMaterialization);
+      PN->eraseFromParent();
+      continue;
+    }
+
+    // Replace all uses of CurrentDef in the current instruction with the
+    // CurrentMaterialization for the block.
+    E.user()->replaceUsesOfWith(CurrentDef, CurrentMaterialization);
+  }
+}
+
+// Splits the block at a particular instruction unless it is the first
+// instruction in the block with a single predecessor.
+static BasicBlock *splitBlockIfNotFirst(Instruction *I, const Twine &Name) {
+  auto *BB = I->getParent();
+  if (&BB->front() == I) {
+    if (BB->getSinglePredecessor()) {
+      BB->setName(Name);
+      return BB;
+    }
+  }
+  return BB->splitBasicBlock(I, Name);
+}
+
+// Split above and below a particular instruction so that it
+// will be all alone by itself in a block.
+static void splitAround(Instruction *I, const Twine &Name) {
+  splitBlockIfNotFirst(I, Name);
+  splitBlockIfNotFirst(I->getNextNode(), "After" + Name);
+}
+
 void coro::buildCoroutineFrame(Function &F, Shape &Shape) {
+
+  // Make sure that all coro.saves and the fallthrough coro.end are in their
+  // own block to simplify the logic of building up SuspendCrossing data.
+  for (CoroSuspendInst *CSI : Shape.CoroSuspends)
+    splitAround(CSI->getCoroSave(), "CoroSave");
+
+  // Put fallthrough CoroEnd into its own block. Note: Shape::buildFrom places
+  // the fallthrough coro.end as the first element of CoroEnds array.
+  splitAround(Shape.CoroEnds.front(), "CoroEnd");
+
+  // Transforms multi-edge PHI Nodes, so that any value feeding into a PHI will
+  // never has its definition separated from the PHI by the suspend point.
+  rewritePHIs(F);
+
+  // Build suspend crossing info.
+  SuspendCrossingInfo Checker(F, Shape);
+
+  IRBuilder<> Builder(F.getContext());
   SpillInfo Spills;
-  // TODO: Compute Spills (incoming in later patches).
+
+  // See if there are materializable instructions across suspend points.
+  for (Instruction &I : instructions(F))
+    if (materializable(I))
+      for (User *U : I.users())
+        if (Checker.isDefinitionAcrossSuspend(I, U))
+          Spills.emplace_back(&I, U);
+
+  // Rewrite materializable instructions to be materialized at the use point.
+  std::sort(Spills.begin(), Spills.end());
+  DEBUG(dump("Materializations", Spills));
+  rewriteMaterializableInstructions(Builder, Spills);
+
+  // Collect the spills for arguments and other not-materializable values.
+  Spills.clear();
+  for (Argument &A : F.getArgumentList())
+    for (User *U : A.users())
+      if (Checker.isDefinitionAcrossSuspend(A, U))
+        Spills.emplace_back(&A, U);
+
+  for (Instruction &I : instructions(F)) {
+    // token returned by CoroSave is an artifact of how we build save/suspend
+    // pairs and should not be part of the Coroutine Frame
+    if (isa<CoroSaveInst>(&I))
+      continue;
+    // CoroBeginInst returns a handle to a coroutine which is passed as a sole
+    // parameter to .resume and .cleanup parts and should not go into coroutine
+    // frame.
+    if (isa<CoroBeginInst>(&I))
+      continue;
+
+    for (User *U : I.users())
+      if (Checker.isDefinitionAcrossSuspend(I, U)) {
+        // We cannot spill a token.
+        if (I.getType()->isTokenTy())
+          report_fatal_error(
+              "token definition is separated from the use by a suspend point");
+        assert(!materializable(I) &&
+               "rewriteMaterializable did not do its job");
+        Spills.emplace_back(&I, U);
+      }
+  }
+  std::sort(Spills.begin(), Spills.end());
+  DEBUG(dump("Spills", Spills));
 
   Shape.FrameTy = buildFrameType(F, Shape, Spills);
   Shape.FramePtr = insertSpills(Spills, Shape);

Added: llvm/trunk/test/Transforms/Coroutines/ex0.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/Coroutines/ex0.ll?rev=279609&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/Coroutines/ex0.ll (added)
+++ llvm/trunk/test/Transforms/Coroutines/ex0.ll Tue Aug 23 23:44:35 2016
@@ -0,0 +1,59 @@
+; First example from Doc/Coroutines.rst (two block loop)
+; RUN: opt < %s -enable-coroutines -O2 -S | FileCheck %s
+
+define i8* @f(i32 %n) {
+entry:
+  %id = call token @llvm.coro.id(i32 0, i8* null, i8* null)
+  %size = call i32 @llvm.coro.size.i32()
+  %alloc = call i8* @malloc(i32 %size)
+  %hdl = call i8* @llvm.coro.begin(token %id, i8* %alloc)
+  br label %loop
+
+loop:
+  %n.val = phi i32 [ %n, %entry ], [ %inc, %resume ]
+  call void @print(i32 %n.val)
+  %0 = call i8 @llvm.coro.suspend(token none, i1 false)
+  switch i8 %0, label %suspend [i8 0, label %resume 
+                                i8 1, label %cleanup]
+resume:
+  %inc = add i32 %n.val, 1
+  br label %loop
+
+cleanup:
+  %mem = call i8* @llvm.coro.free(i8* %hdl)
+  call void @free(i8* %mem)
+  br label %suspend
+suspend:
+  call void @llvm.coro.end(i8* %hdl, i1 0)  
+  ret i8* %hdl
+}
+
+; CHECK-LABEL: @main(
+define i32 @main() {
+entry:
+  %hdl = call i8* @f(i32 4)
+  call void @llvm.coro.resume(i8* %hdl)
+  call void @llvm.coro.resume(i8* %hdl)
+  call void @llvm.coro.destroy(i8* %hdl)
+  ret i32 0
+; CHECK: entry:
+; CHECK:      call void @print(i32 4)
+; CHECK:      call void @print(i32 5)
+; CHECK:      call void @print(i32 6)
+; CHECK:      ret i32 0
+}
+
+declare token @llvm.coro.id(i32, i8*, i8*)
+declare i8* @llvm.coro.alloc(token)
+declare i8* @llvm.coro.free(i8*)
+declare i32 @llvm.coro.size.i32()
+declare i8  @llvm.coro.suspend(token, i1)
+declare void @llvm.coro.resume(i8*)
+declare void @llvm.coro.destroy(i8*)
+  
+declare i8* @llvm.coro.begin(token, i8*)
+declare void @llvm.coro.end(i8*, i1) 
+
+declare noalias i8* @malloc(i32)
+declare void @print(i32)
+declare void @free(i8*)

Added: llvm/trunk/test/Transforms/Coroutines/ex1.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/Coroutines/ex1.ll?rev=279609&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/Coroutines/ex1.ll (added)
+++ llvm/trunk/test/Transforms/Coroutines/ex1.ll Tue Aug 23 23:44:35 2016
@@ -0,0 +1,54 @@
+; First example from Doc/Coroutines.rst (one block loop)
+; RUN: opt < %s -O2 -enable-coroutines -S | FileCheck %s
+
+define i8* @f(i32 %n) {
+entry:
+  %id = call token @llvm.coro.id(i32 0, i8* null, i8* null)
+  %size = call i32 @llvm.coro.size.i32()
+  %alloc = call i8* @malloc(i32 %size)
+  %hdl = call noalias i8* @llvm.coro.begin(token %id, i8* %alloc)
+  br label %loop
+loop:
+  %n.val = phi i32 [ %n, %entry ], [ %inc, %loop ]
+  %inc = add nsw i32 %n.val, 1
+  call void @print(i32 %n.val)
+  %0 = call i8 @llvm.coro.suspend(token none, i1 false)
+  switch i8 %0, label %suspend [i8 0, label %loop
+                                i8 1, label %cleanup]
+cleanup:
+  %mem = call i8* @llvm.coro.free(i8* %hdl)
+  call void @free(i8* %mem)
+  br label %suspend
+suspend:
+  call void @llvm.coro.end(i8* %hdl, i1 false)
+  ret i8* %hdl
+}
+
+; CHECK-LABEL: @main(
+define i32 @main() {
+entry:
+  %hdl = call i8* @f(i32 4)
+  call void @llvm.coro.resume(i8* %hdl)
+  call void @llvm.coro.resume(i8* %hdl)
+  call void @llvm.coro.destroy(i8* %hdl)
+  ret i32 0
+; CHECK-NEXT: entry:
+; CHECK:      call void @print(i32 4)
+; CHECK:      call void @print(i32 5)
+; CHECK:      call void @print(i32 6)
+; CHECK:      ret i32 0
+}
+
+declare i8* @malloc(i32)
+declare void @free(i8*)
+declare void @print(i32)
+
+declare token @llvm.coro.id(i32, i8*, i8*)
+declare i32 @llvm.coro.size.i32()
+declare i8* @llvm.coro.begin(token, i8*)
+declare i8 @llvm.coro.suspend(token, i1)
+declare i8* @llvm.coro.free(i8*)
+declare void @llvm.coro.end(i8*, i1)
+
+declare void @llvm.coro.resume(i8*)
+declare void @llvm.coro.destroy(i8*)

Modified: llvm/trunk/test/Transforms/Coroutines/smoketest.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/Coroutines/smoketest.ll?rev=279609&r1=279608&r2=279609&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/Coroutines/smoketest.ll (original)
+++ llvm/trunk/test/Transforms/Coroutines/smoketest.ll Tue Aug 23 23:44:35 2016
@@ -1,5 +1,5 @@
 ; Test that all coroutine passes run in the correct order at all optimization
-; levels and -disable-coroutines removes coroutine passes from the pipeline.
+; levels and -enable-coroutines adds coroutine passes to the pipeline.
 ;
 ; RUN: opt < %s -disable-output -enable-coroutines -debug-pass=Arguments -O0 2>&1 | FileCheck %s
 ; RUN: opt < %s -disable-output -enable-coroutines -debug-pass=Arguments -O1 2>&1 | FileCheck %s