[llvm] [LLVM][Coroutines] Perform HALO on "coro_must_elide" coroutines (PR #98974)

Yuxuan Chen via llvm-commits llvm-commits at lists.llvm.org
Tue Jul 16 00:18:47 PDT 2024


https://github.com/yuxuanchen1997 updated https://github.com/llvm/llvm-project/pull/98974

>From 66f9d412776576c3500fd20ab204eca69f8022a1 Mon Sep 17 00:00:00 2001
From: Yuxuan Chen <ych at meta.com>
Date: Mon, 15 Jul 2024 15:01:39 -0700
Subject: [PATCH] Implement LLVM bits

---
 llvm/include/llvm/IR/Instruction.h            |   4 +
 .../Coroutines/CoroAnnotationElide.h          |  34 +++++
 llvm/lib/IR/Metadata.cpp                      |  16 +++
 llvm/lib/Passes/PassBuilder.cpp               |   1 +
 llvm/lib/Passes/PassBuilderPipelines.cpp      |   3 +-
 llvm/lib/Passes/PassRegistry.def              |   1 +
 llvm/lib/Transforms/Coroutines/CMakeLists.txt |   1 +
 .../Coroutines/CoroAnnotationElide.cpp        | 119 +++++++++++++++++
 llvm/lib/Transforms/Coroutines/CoroInternal.h |   4 +
 llvm/lib/Transforms/Coroutines/CoroSplit.cpp  | 121 ++++++++++++++----
 llvm/lib/Transforms/Coroutines/Coroutines.cpp |  27 ++++
 llvm/test/Other/new-pm-defaults.ll            |   1 +
 .../Other/new-pm-thinlto-postlink-defaults.ll |   1 +
 .../new-pm-thinlto-postlink-pgo-defaults.ll   |   1 +
 ...-pm-thinlto-postlink-samplepgo-defaults.ll |   1 +
 .../Other/new-pm-thinlto-prelink-defaults.ll  |   1 +
 .../new-pm-thinlto-prelink-pgo-defaults.ll    |   1 +
 ...w-pm-thinlto-prelink-samplepgo-defaults.ll |   1 +
 llvm/test/Transforms/Coroutines/ArgAddr.ll    |   2 +-
 .../Transforms/Coroutines/coro-alloca-07.ll   |   2 +-
 .../coro-alloca-loop-carried-address.ll       |   2 +-
 .../Coroutines/coro-lifetime-end.ll           |   6 +-
 .../Coroutines/coro-spill-after-phi.ll        |   2 +-
 .../Transforms/Coroutines/coro-split-00.ll    |   7 +
 .../Coroutines/coro-transform-must-elide.ll   |  77 +++++++++++
 25 files changed, 403 insertions(+), 33 deletions(-)
 create mode 100644 llvm/include/llvm/Transforms/Coroutines/CoroAnnotationElide.h
 create mode 100644 llvm/lib/Transforms/Coroutines/CoroAnnotationElide.cpp
 create mode 100644 llvm/test/Transforms/Coroutines/coro-transform-must-elide.ll

diff --git a/llvm/include/llvm/IR/Instruction.h b/llvm/include/llvm/IR/Instruction.h
index c27572300d506..2551ecaa6a19d 100644
--- a/llvm/include/llvm/IR/Instruction.h
+++ b/llvm/include/llvm/IR/Instruction.h
@@ -445,6 +445,10 @@ class Instruction : public User,
   /// !annotation metadata, append the tuple to
   /// the existing node.
   void addAnnotationMetadata(SmallVector<StringRef> Annotations);
+
+  /// Returns true if an !annotation metadata is set to this instruction.
+  bool hasAnnotationMetadata(StringRef Name) const;
+
   /// Returns the AA metadata for this instruction.
   AAMDNodes getAAMetadata() const;
 
diff --git a/llvm/include/llvm/Transforms/Coroutines/CoroAnnotationElide.h b/llvm/include/llvm/Transforms/Coroutines/CoroAnnotationElide.h
new file mode 100644
index 0000000000000..dae1cc0c689a3
--- /dev/null
+++ b/llvm/include/llvm/Transforms/Coroutines/CoroAnnotationElide.h
@@ -0,0 +1,34 @@
+//===- CoroAnnotationElide.h - Optimizing a coro_must_elide call ----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// \file
+// This pass transforms all Call or Invoke instructions that are annotated
+// "coro_must_elide" to call the `.noalloc` variant of coroutine instead.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_COROUTINES_COROANNOTATIONELIDE_H
+#define LLVM_TRANSFORMS_COROUTINES_COROANNOTATIONELIDE_H
+
+#include "llvm/Analysis/CGSCCPassManager.h"
+#include "llvm/Analysis/LazyCallGraph.h"
+#include "llvm/IR/PassManager.h"
+
+namespace llvm {
+
+struct CoroAnnotationElidePass : PassInfoMixin<CoroAnnotationElidePass> {
+  CoroAnnotationElidePass() {}
+
+  PreservedAnalyses run(LazyCallGraph::SCC &C, CGSCCAnalysisManager &AM,
+                        LazyCallGraph &CG, CGSCCUpdateResult &UR);
+
+  static bool isRequired() { return false; }
+};
+} // end namespace llvm
+
+#endif // LLVM_TRANSFORMS_COROUTINES_COROANNOTATIONELIDE_H
diff --git a/llvm/lib/IR/Metadata.cpp b/llvm/lib/IR/Metadata.cpp
index 3aec7140510a6..feaf79609652f 100644
--- a/llvm/lib/IR/Metadata.cpp
+++ b/llvm/lib/IR/Metadata.cpp
@@ -1703,6 +1703,22 @@ void Instruction::addAnnotationMetadata(StringRef Name) {
   setMetadata(LLVMContext::MD_annotation, MD);
 }
 
+bool Instruction::hasAnnotationMetadata(StringRef Name) const {
+  auto *Metadata = getMetadata(LLVMContext::MD_annotation);
+  if (!Metadata)
+    return false;
+
+  auto *Tuple = cast<MDTuple>(Metadata);
+  for (auto &N : Tuple->operands()) {
+    if (auto *S = dyn_cast<MDString>(N.get())) {
+      if (S->getString() == Name) {
+        return true;
+      }
+    }
+  }
+  return false;
+}
+
 AAMDNodes Instruction::getAAMetadata() const {
   AAMDNodes Result;
   // Not using Instruction::hasMetadata() because we're not interested in
diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp
index 929690c2c74d6..6b7d5089b1727 100644
--- a/llvm/lib/Passes/PassBuilder.cpp
+++ b/llvm/lib/Passes/PassBuilder.cpp
@@ -134,6 +134,7 @@
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h"
 #include "llvm/Transforms/CFGuard.h"
+#include "llvm/Transforms/Coroutines/CoroAnnotationElide.h"
 #include "llvm/Transforms/Coroutines/CoroCleanup.h"
 #include "llvm/Transforms/Coroutines/CoroConditionalWrapper.h"
 #include "llvm/Transforms/Coroutines/CoroEarly.h"
diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp
index 4fd5ee1946bb7..2d5a6e6861f23 100644
--- a/llvm/lib/Passes/PassBuilderPipelines.cpp
+++ b/llvm/lib/Passes/PassBuilderPipelines.cpp
@@ -32,6 +32,7 @@
 #include "llvm/Support/VirtualFileSystem.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h"
+#include "llvm/Transforms/Coroutines/CoroAnnotationElide.h"
 #include "llvm/Transforms/Coroutines/CoroCleanup.h"
 #include "llvm/Transforms/Coroutines/CoroConditionalWrapper.h"
 #include "llvm/Transforms/Coroutines/CoroEarly.h"
@@ -968,8 +969,8 @@ PassBuilder::buildInlinerPipeline(OptimizationLevel Level,
   // it's been modified since.
   MainCGPipeline.addPass(createCGSCCToFunctionPassAdaptor(
       RequireAnalysisPass<ShouldNotRunFunctionPassesAnalysis, Function>()));
-
   MainCGPipeline.addPass(CoroSplitPass(Level != OptimizationLevel::O0));
+  MainCGPipeline.addPass(CoroAnnotationElidePass());
 
   // Make sure we don't affect potential future NoRerun CGSCC adaptors.
   MIWP.addLateModulePass(createModuleToFunctionPassAdaptor(
diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def
index 3b92823cd283b..f3d77f8def4a6 100644
--- a/llvm/lib/Passes/PassRegistry.def
+++ b/llvm/lib/Passes/PassRegistry.def
@@ -234,6 +234,7 @@ CGSCC_PASS("attributor-light-cgscc", AttributorLightCGSCCPass())
 CGSCC_PASS("invalidate<all>", InvalidateAllAnalysesPass())
 CGSCC_PASS("no-op-cgscc", NoOpCGSCCPass())
 CGSCC_PASS("openmp-opt-cgscc", OpenMPOptCGSCCPass())
+CGSCC_PASS("coro-annotation-elide", CoroAnnotationElidePass())
 #undef CGSCC_PASS
 
 #ifndef CGSCC_PASS_WITH_PARAMS
diff --git a/llvm/lib/Transforms/Coroutines/CMakeLists.txt b/llvm/lib/Transforms/Coroutines/CMakeLists.txt
index 2139446e5ff95..b4b5812d97d89 100644
--- a/llvm/lib/Transforms/Coroutines/CMakeLists.txt
+++ b/llvm/lib/Transforms/Coroutines/CMakeLists.txt
@@ -1,5 +1,6 @@
 add_llvm_component_library(LLVMCoroutines
   Coroutines.cpp
+  CoroAnnotationElide.cpp
   CoroCleanup.cpp
   CoroConditionalWrapper.cpp
   CoroEarly.cpp
diff --git a/llvm/lib/Transforms/Coroutines/CoroAnnotationElide.cpp b/llvm/lib/Transforms/Coroutines/CoroAnnotationElide.cpp
new file mode 100644
index 0000000000000..bf75ecd9203fc
--- /dev/null
+++ b/llvm/lib/Transforms/Coroutines/CoroAnnotationElide.cpp
@@ -0,0 +1,119 @@
+//===- CoroSplit.cpp - Converts a coroutine into a state machine ----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Coroutines/CoroAnnotationElide.h"
+
+#include "llvm/Analysis/LazyCallGraph.h"
+#include "llvm/IR/Analysis.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Module.h"
+
+#include <cassert>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "coro-annotation-elide"
+
+#define CORO_MUST_ELIDE_ANNOTATION "coro_must_elide"
+
+static Instruction *getFirstNonAllocaInTheEntryBlock(Function *F) {
+  for (Instruction &I : F->getEntryBlock())
+    if (!isa<AllocaInst>(&I))
+      return &I;
+  llvm_unreachable("no terminator in the entry block");
+}
+
+static Value *allocateFrameInCaller(Function *Caller, uint64_t FrameSize,
+                                    Align FrameAlign) {
+  LLVMContext &C = Caller->getContext();
+  BasicBlock::iterator InsertPt =
+      getFirstNonAllocaInTheEntryBlock(Caller)->getIterator();
+  const DataLayout &DL = Caller->getDataLayout();
+  auto FrameTy = ArrayType::get(Type::getInt8Ty(C), FrameSize);
+  auto *Frame = new AllocaInst(FrameTy, DL.getAllocaAddrSpace(), "", InsertPt);
+  Frame->setAlignment(FrameAlign);
+  return new BitCastInst(Frame, PointerType::getUnqual(C), "vFrame", InsertPt);
+}
+
+static void processCall(CallBase *CB, Function *Caller, Function *NewCallee,
+                        uint64_t FrameSize, Align FrameAlign) {
+  auto *FramePtr = allocateFrameInCaller(Caller, FrameSize, FrameAlign);
+  CB->setCalledFunction(NewCallee->getFunctionType(), NewCallee);
+  auto NewCBInsertPt = CB->getIterator();
+  llvm::CallBase *NewCB = nullptr;
+  SmallVector<Value *, 4> NewArgs;
+  NewArgs.append(CB->arg_begin(), CB->arg_end());
+  NewArgs.push_back(FramePtr);
+
+  // TODO: See CallBase::Create(CallBase*, ...)
+  if (auto *CI = dyn_cast<CallInst>(CB)) {
+    auto *NewCI = CallInst::Create(NewCallee->getFunctionType(), NewCallee,
+                                   NewArgs, "", NewCBInsertPt);
+    NewCI->setTailCallKind(CI->getTailCallKind());
+    NewCB = NewCI;
+  } else if (auto *II = dyn_cast<InvokeInst>(CB)) {
+    NewCB = InvokeInst::Create(NewCallee->getFunctionType(), NewCallee,
+                               II->getNormalDest(), II->getUnwindDest(),
+                               NewArgs, std::nullopt, "", NewCBInsertPt);
+  } else {
+    llvm_unreachable("CallBase should either be Call or Invoke!");
+  }
+
+  NewCB->setCallingConv(CB->getCallingConv());
+  NewCB->setAttributes(CB->getAttributes());
+  NewCB->setDebugLoc(CB->getDebugLoc());
+  std::copy(CB->bundle_op_info_begin(), CB->bundle_op_info_end(),
+            NewCB->bundle_op_info_begin());
+
+  CB->replaceAllUsesWith(NewCB);
+  CB->eraseFromParent();
+}
+
+PreservedAnalyses CoroAnnotationElidePass::run(LazyCallGraph::SCC &C,
+                                               CGSCCAnalysisManager &AM,
+                                               LazyCallGraph &CG,
+                                               CGSCCUpdateResult &UR) {
+  bool Changed = false;
+
+  for (LazyCallGraph::Node &N : C) {
+    Function *Callee = &N.getFunction();
+    Function *NewCallee = Callee->getParent()->getFunction(
+        (Callee->getName() + ".noalloc").str());
+    if (!NewCallee) {
+      continue;
+    }
+
+    auto FramePtrArgPosition = NewCallee->arg_size() - 1;
+    auto FrameSize =
+        NewCallee->getParamDereferenceableBytes(FramePtrArgPosition);
+    auto FrameAlign =
+        NewCallee->getParamAlign(FramePtrArgPosition).valueOrOne();
+
+    SmallVector<CallBase *, 4> Users;
+    for (auto *U : Callee->users()) {
+      if (auto *CB = dyn_cast<CallBase>(U)) {
+        Users.push_back(CB);
+      }
+    }
+
+    for (auto *CB : Users) {
+      auto *Caller = CB->getFunction();
+      if (Caller && Caller->isPresplitCoroutine() &&
+          CB->hasAnnotationMetadata(CORO_MUST_ELIDE_ANNOTATION)) {
+        processCall(CB, Caller, NewCallee, FrameSize, FrameAlign);
+        C.getOuterRefSCC().replaceNodeFunction(N, *NewCallee);
+        Changed = true;
+      }
+    }
+  }
+  return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all();
+}
diff --git a/llvm/lib/Transforms/Coroutines/CoroInternal.h b/llvm/lib/Transforms/Coroutines/CoroInternal.h
index 5716fd0ea4ab9..d91cccd99a703 100644
--- a/llvm/lib/Transforms/Coroutines/CoroInternal.h
+++ b/llvm/lib/Transforms/Coroutines/CoroInternal.h
@@ -26,6 +26,10 @@ bool declaresIntrinsics(const Module &M,
                         const std::initializer_list<StringRef>);
 void replaceCoroFree(CoroIdInst *CoroId, bool Elide);
 
+void suppressCoroAllocs(CoroIdInst *CoroId);
+void suppressCoroAllocs(LLVMContext &Context,
+                        ArrayRef<CoroAllocInst *> CoroAllocs);
+
 /// Attempts to rewrite the location operand of debug intrinsics in terms of
 /// the coroutine frame pointer, folding pointer offsets into the DIExpression
 /// of the intrinsic.
diff --git a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp
index 9e4da5f8ca961..aa3078d547351 100644
--- a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp
+++ b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp
@@ -25,6 +25,7 @@
 #include "llvm/ADT/PriorityWorklist.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/Analysis/CFG.h"
@@ -1179,6 +1180,14 @@ static void updateAsyncFuncPointerContextSize(coro::Shape &Shape) {
   Shape.AsyncLowering.AsyncFuncPointer->setInitializer(NewFuncPtrStruct);
 }
 
+static TypeSize getFrameSizeForShape(coro::Shape &Shape) {
+  // In the same function all coro.sizes should have the same result type.
+  auto *SizeIntrin = Shape.CoroSizes.back();
+  Module *M = SizeIntrin->getModule();
+  const DataLayout &DL = M->getDataLayout();
+  return DL.getTypeAllocSize(Shape.FrameTy);
+}
+
 static void replaceFrameSizeAndAlignment(coro::Shape &Shape) {
   if (Shape.ABI == coro::ABI::Async)
     updateAsyncFuncPointerContextSize(Shape);
@@ -1194,10 +1203,8 @@ static void replaceFrameSizeAndAlignment(coro::Shape &Shape) {
 
   // In the same function all coro.sizes should have the same result type.
   auto *SizeIntrin = Shape.CoroSizes.back();
-  Module *M = SizeIntrin->getModule();
-  const DataLayout &DL = M->getDataLayout();
-  auto Size = DL.getTypeAllocSize(Shape.FrameTy);
-  auto *SizeConstant = ConstantInt::get(SizeIntrin->getType(), Size);
+  auto *SizeConstant =
+      ConstantInt::get(SizeIntrin->getType(), getFrameSizeForShape(Shape));
 
   for (CoroSizeInst *CS : Shape.CoroSizes) {
     CS->replaceAllUsesWith(SizeConstant);
@@ -1455,6 +1462,64 @@ struct SwitchCoroutineSplitter {
     setCoroInfo(F, Shape, Clones);
   }
 
+  static Function *createNoAllocVariant(Function &F, coro::Shape &Shape,
+                                        SmallVectorImpl<Function *> &Clones) {
+    auto *OrigFnTy = F.getFunctionType();
+    auto OldParams = OrigFnTy->params();
+
+    SmallVector<Type *> NewParams;
+    NewParams.reserve(OldParams.size() + 1);
+    for (Type *T : OldParams) {
+      NewParams.push_back(T);
+    }
+    NewParams.push_back(PointerType::getUnqual(Shape.FrameTy));
+
+    auto *NewFnTy = FunctionType::get(OrigFnTy->getReturnType(), NewParams,
+                                      OrigFnTy->isVarArg());
+    Function *NoAllocF =
+        Function::Create(NewFnTy, F.getLinkage(), F.getName() + ".noalloc");
+    ValueToValueMapTy VMap;
+    unsigned int Idx = 0;
+    for (const auto &I : F.args()) {
+      VMap[&I] = NoAllocF->getArg(Idx++);
+    }
+    SmallVector<ReturnInst *, 4> Returns;
+    CloneFunctionInto(NoAllocF, &F, VMap,
+                      CloneFunctionChangeType::LocalChangesOnly, Returns);
+
+    if (Shape.CoroBegin) {
+      auto *NewCoroBegin =
+          cast_if_present<CoroBeginInst>(VMap[Shape.CoroBegin]);
+      auto *NewCoroId = cast<CoroIdInst>(NewCoroBegin->getId());
+      coro::replaceCoroFree(NewCoroId, /*Elide=*/true);
+      coro::suppressCoroAllocs(NewCoroId);
+      NewCoroBegin->replaceAllUsesWith(NoAllocF->getArg(Idx));
+      NewCoroBegin->eraseFromParent();
+    }
+
+    Module *M = F.getParent();
+    M->getFunctionList().insert(M->end(), NoAllocF);
+
+    removeUnreachableBlocks(*NoAllocF);
+    auto NewAttrs = NoAllocF->getAttributes();
+    // We just appended the frame pointer as the last argument of the new
+    // function.
+    auto FrameIdx = NoAllocF->arg_size() - 1;
+    // When we elide allocation, we read these attributes to determine the
+    // frame size and alignment.
+    addFramePointerAttrs(NewAttrs, NoAllocF->getContext(), FrameIdx,
+                         Shape.FrameSize, Shape.FrameAlign,
+                         /*NoAlias=*/false);
+
+    NoAllocF->setAttributes(NewAttrs);
+
+    Clones.push_back(NoAllocF);
+    // Reset the original function's coro info, make the new noalloc variant
+    // connected to the original ramp function.
+    setCoroInfo(F, Shape, Clones);
+    return NoAllocF;
+  }
+
 private:
   // Create a resume clone by cloning the body of the original function, setting
   // new entry block and replacing coro.suspend an appropriate value to force
@@ -1913,6 +1978,21 @@ class PrettyStackTraceFunction : public PrettyStackTraceEntry {
 };
 } // namespace
 
+/// Remove calls to llvm.coro.end in the original function.
+static void removeCoroEndsFromRampFunction(const coro::Shape &Shape) {
+  if (Shape.ABI != coro::ABI::Switch) {
+    for (auto *End : Shape.CoroEnds) {
+      replaceCoroEnd(End, Shape, Shape.FramePtr, /*in resume*/ false, nullptr);
+    }
+  } else {
+    for (llvm::AnyCoroEndInst *End : Shape.CoroEnds) {
+      auto &Context = End->getContext();
+      End->replaceAllUsesWith(ConstantInt::getFalse(Context));
+      End->eraseFromParent();
+    }
+  }
+}
+
 static coro::Shape
 splitCoroutine(Function &F, SmallVectorImpl<Function *> &Clones,
                TargetTransformInfo &TTI, bool OptimizeFrame,
@@ -1932,10 +2012,10 @@ splitCoroutine(Function &F, SmallVectorImpl<Function *> &Clones,
   simplifySuspendPoints(Shape);
   buildCoroutineFrame(F, Shape, TTI, MaterializableCallback);
   replaceFrameSizeAndAlignment(Shape);
-
+  bool isNoSuspendCoroutine = Shape.CoroSuspends.empty();
   // If there are no suspend points, no split required, just remove
   // the allocation and deallocation blocks, they are not needed.
-  if (Shape.CoroSuspends.empty()) {
+  if (isNoSuspendCoroutine) {
     handleNoSuspendCoroutine(Shape);
   } else {
     switch (Shape.ABI) {
@@ -1967,22 +2047,13 @@ splitCoroutine(Function &F, SmallVectorImpl<Function *> &Clones,
   for (DbgVariableRecord *DVR : DbgVariableRecords)
     coro::salvageDebugInfo(ArgToAllocaMap, *DVR, Shape.OptimizeFrame,
                            false /*UseEntryValue*/);
-  return Shape;
-}
 
-/// Remove calls to llvm.coro.end in the original function.
-static void removeCoroEndsFromRampFunction(const coro::Shape &Shape) {
-  if (Shape.ABI != coro::ABI::Switch) {
-    for (auto *End : Shape.CoroEnds) {
-      replaceCoroEnd(End, Shape, Shape.FramePtr, /*in resume*/ false, nullptr);
-    }
-  } else {
-    for (llvm::AnyCoroEndInst *End : Shape.CoroEnds) {
-      auto &Context = End->getContext();
-      End->replaceAllUsesWith(ConstantInt::getFalse(Context));
-      End->eraseFromParent();
-    }
+  removeCoroEndsFromRampFunction(Shape);
+
+  if (!isNoSuspendCoroutine && Shape.ABI == coro::ABI::Switch) {
+    SwitchCoroutineSplitter::createNoAllocVariant(F, Shape, Clones);
   }
+  return Shape;
 }
 
 static void updateCallGraphAfterCoroutineSplit(
@@ -2108,16 +2179,16 @@ PreservedAnalyses CoroSplitPass::run(LazyCallGraph::SCC &C,
   // Split all the coroutines.
   for (LazyCallGraph::Node *N : Coroutines) {
     Function &F = N->getFunction();
+
     LLVM_DEBUG(dbgs() << "CoroSplit: Processing coroutine '" << F.getName()
                       << "\n");
     F.setSplittedCoroutine();
 
     SmallVector<Function *, 4> Clones;
     auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
-    const coro::Shape Shape =
+    coro::Shape Shape =
         splitCoroutine(F, Clones, FAM.getResult<TargetIRAnalysis>(F),
                        OptimizeFrame, MaterializableCallback);
-    removeCoroEndsFromRampFunction(Shape);
     updateCallGraphAfterCoroutineSplit(*N, Shape, Clones, C, CG, AM, UR, FAM);
 
     ORE.emit([&]() {
@@ -2135,9 +2206,9 @@ PreservedAnalyses CoroSplitPass::run(LazyCallGraph::SCC &C,
     }
   }
 
-    for (auto *PrepareFn : PrepareFns) {
-      replaceAllPrepares(PrepareFn, CG, C);
-    }
+  for (auto *PrepareFn : PrepareFns) {
+    replaceAllPrepares(PrepareFn, CG, C);
+  }
 
   return PreservedAnalyses::none();
 }
diff --git a/llvm/lib/Transforms/Coroutines/Coroutines.cpp b/llvm/lib/Transforms/Coroutines/Coroutines.cpp
index 1a92bc1636257..be257339e0ac4 100644
--- a/llvm/lib/Transforms/Coroutines/Coroutines.cpp
+++ b/llvm/lib/Transforms/Coroutines/Coroutines.cpp
@@ -145,6 +145,33 @@ void coro::replaceCoroFree(CoroIdInst *CoroId, bool Elide) {
   }
 }
 
+void coro::suppressCoroAllocs(CoroIdInst *CoroId) {
+  SmallVector<CoroAllocInst *, 4> CoroAllocs;
+  for (User *U : CoroId->users())
+    if (auto *CA = dyn_cast<CoroAllocInst>(U))
+      CoroAllocs.push_back(CA);
+
+  if (CoroAllocs.empty())
+    return;
+
+  coro::suppressCoroAllocs(CoroId->getContext(), CoroAllocs);
+}
+
+// Replacing llvm.coro.alloc with false will suppress dynamic
+// allocation as it is expected for the frontend to generate the code that
+// looks like:
+//   id = coro.id(...)
+//   mem = coro.alloc(id) ? malloc(coro.size()) : 0;
+//   coro.begin(id, mem)
+void coro::suppressCoroAllocs(LLVMContext &Context,
+                              ArrayRef<CoroAllocInst *> CoroAllocs) {
+  auto *False = ConstantInt::getFalse(Context);
+  for (auto *CA : CoroAllocs) {
+    CA->replaceAllUsesWith(False);
+    CA->eraseFromParent();
+  }
+}
+
 static void clear(coro::Shape &Shape) {
   Shape.CoroBegin = nullptr;
   Shape.CoroEnds.clear();
diff --git a/llvm/test/Other/new-pm-defaults.ll b/llvm/test/Other/new-pm-defaults.ll
index 588337c15625e..55dbdb1b8366d 100644
--- a/llvm/test/Other/new-pm-defaults.ll
+++ b/llvm/test/Other/new-pm-defaults.ll
@@ -226,6 +226,7 @@
 ; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}ShouldNotRunFunctionPassesAnalysis
 ; CHECK-O-NEXT: Running analysis: ShouldNotRunFunctionPassesAnalysis
 ; CHECK-O-NEXT: Running pass: CoroSplitPass
+; CHECK-O-NEXT: Running pass: CoroAnnotationElidePass
 ; CHECK-O-NEXT: Running pass: InvalidateAnalysisPass<{{.*}}ShouldNotRunFunctionPassesAnalysis
 ; CHECK-O-NEXT: Invalidating analysis: ShouldNotRunFunctionPassesAnalysis
 ; CHECK-O-NEXT: Invalidating analysis: InlineAdvisorAnalysis
diff --git a/llvm/test/Other/new-pm-thinlto-postlink-defaults.ll b/llvm/test/Other/new-pm-thinlto-postlink-defaults.ll
index 064362eabbf83..fcf84dc5e1105 100644
--- a/llvm/test/Other/new-pm-thinlto-postlink-defaults.ll
+++ b/llvm/test/Other/new-pm-thinlto-postlink-defaults.ll
@@ -153,6 +153,7 @@
 ; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}ShouldNotRunFunctionPassesAnalysis
 ; CHECK-O-NEXT: Running analysis: ShouldNotRunFunctionPassesAnalysis
 ; CHECK-O-NEXT: Running pass: CoroSplitPass
+; CHECK-O-NEXT: Running pass: CoroAnnotationElidePass
 ; CHECK-O-NEXT: Running pass: InvalidateAnalysisPass<{{.*}}ShouldNotRunFunctionPassesAnalysis
 ; CHECK-O-NEXT: Invalidating analysis: ShouldNotRunFunctionPassesAnalysis
 ; CHECK-O-NEXT: Invalidating analysis: InlineAdvisorAnalysis
diff --git a/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll
index 19a44867e434a..4d5b5e733a87c 100644
--- a/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll
+++ b/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll
@@ -137,6 +137,7 @@
 ; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}ShouldNotRunFunctionPassesAnalysis
 ; CHECK-O-NEXT: Running analysis: ShouldNotRunFunctionPassesAnalysis
 ; CHECK-O-NEXT: Running pass: CoroSplitPass
+; CHECK-O-NEXT: Running pass: CoroAnnotationElidePass
 ; CHECK-O-NEXT: Running pass: InvalidateAnalysisPass<{{.*}}ShouldNotRunFunctionPassesAnalysis
 ; CHECK-O-NEXT: Invalidating analysis: ShouldNotRunFunctionPassesAnalysis
 ; CHECK-O-NEXT: Invalidating analysis: InlineAdvisorAnalysis
diff --git a/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll
index ac80a31d8fd4b..056d6f41ce5a7 100644
--- a/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll
+++ b/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll
@@ -145,6 +145,7 @@
 ; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}ShouldNotRunFunctionPassesAnalysis
 ; CHECK-O-NEXT: Running analysis: ShouldNotRunFunctionPassesAnalysis
 ; CHECK-O-NEXT: Running pass: CoroSplitPass
+; CHECK-O-NEXT: Running pass: CoroAnnotationElidePass
 ; CHECK-O-NEXT: Running pass: InvalidateAnalysisPass<{{.*}}ShouldNotRunFunctionPassesAnalysis
 ; CHECK-O-NEXT: Invalidating analysis: ShouldNotRunFunctionPassesAnalysis
 ; CHECK-O-NEXT: Invalidating analysis: InlineAdvisorAnalysis
diff --git a/llvm/test/Other/new-pm-thinlto-prelink-defaults.ll b/llvm/test/Other/new-pm-thinlto-prelink-defaults.ll
index 42ef49f8f7c7e..1a7cf77b43fca 100644
--- a/llvm/test/Other/new-pm-thinlto-prelink-defaults.ll
+++ b/llvm/test/Other/new-pm-thinlto-prelink-defaults.ll
@@ -185,6 +185,7 @@
 ; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}ShouldNotRunFunctionPassesAnalysis
 ; CHECK-O-NEXT: Running analysis: ShouldNotRunFunctionPassesAnalysis
 ; CHECK-O-NEXT: Running pass: CoroSplitPass
+; CHECK-O-NEXT: Running pass: CoroAnnotationElidePass
 ; CHECK-O-NEXT: Running pass: InvalidateAnalysisPass<{{.*}}ShouldNotRunFunctionPassesAnalysis
 ; CHECK-O-NEXT: Invalidating analysis: ShouldNotRunFunctionPassesAnalysis
 ; CHECK-O-NEXT: Invalidating analysis: InlineAdvisorAnalysis
diff --git a/llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll
index e74f88c1a3bf9..47c36a646e556 100644
--- a/llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll
+++ b/llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll
@@ -184,6 +184,7 @@
 ; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}ShouldNotRunFunctionPassesAnalysis
 ; CHECK-O-NEXT: Running analysis: ShouldNotRunFunctionPassesAnalysis
 ; CHECK-O-NEXT: Running pass: CoroSplitPass
+; CHECK-O-NEXT: Running pass: CoroAnnotationElidePass
 ; CHECK-O-NEXT: Running pass: InvalidateAnalysisPass<{{.*}}ShouldNotRunFunctionPassesAnalysis
 ; CHECK-O-NEXT: Invalidating analysis: ShouldNotRunFunctionPassesAnalysis
 ; CHECK-O-NEXT: Invalidating analysis: InlineAdvisorAnalysis
diff --git a/llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll
index 210a4ef1f7664..9e1862d50a112 100644
--- a/llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll
+++ b/llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll
@@ -149,6 +149,7 @@
 ; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}ShouldNotRunFunctionPassesAnalysis
 ; CHECK-O-NEXT: Running analysis: ShouldNotRunFunctionPassesAnalysis
 ; CHECK-O-NEXT: Running pass: CoroSplitPass
+; CHECK-O-NEXT: Running pass: CoroAnnotationElidePass
 ; CHECK-O-NEXT: Running pass: InvalidateAnalysisPass<{{.*}}ShouldNotRunFunctionPassesAnalysis
 ; CHECK-O-NEXT: Invalidating analysis: ShouldNotRunFunctionPassesAnalysis
 ; CHECK-O-NEXT: Invalidating analysis: InlineAdvisorAnalysis
diff --git a/llvm/test/Transforms/Coroutines/ArgAddr.ll b/llvm/test/Transforms/Coroutines/ArgAddr.ll
index 1fbc8e1d49767..6c18cc19a9c0c 100644
--- a/llvm/test/Transforms/Coroutines/ArgAddr.ll
+++ b/llvm/test/Transforms/Coroutines/ArgAddr.ll
@@ -5,7 +5,7 @@
 define nonnull ptr @f(i32 %n) presplitcoroutine {
 ; CHECK-LABEL: @f(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[ID:%.*]] = call token @llvm.coro.id(i32 0, ptr null, ptr null, ptr @f.resumers)
+; CHECK-NEXT:    [[ID:%.*]] = call token @llvm.coro.id(i32 0, ptr null, ptr null, ptr @{{.*}})
 ; CHECK-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
 ; CHECK-NEXT:    store i32 [[N:%.*]], ptr [[N_ADDR]], align 4
 ; CHECK-NEXT:    [[CALL:%.*]] = tail call ptr @malloc(i32 24)
diff --git a/llvm/test/Transforms/Coroutines/coro-alloca-07.ll b/llvm/test/Transforms/Coroutines/coro-alloca-07.ll
index c81bf333f2059..914fd87ccdffc 100644
--- a/llvm/test/Transforms/Coroutines/coro-alloca-07.ll
+++ b/llvm/test/Transforms/Coroutines/coro-alloca-07.ll
@@ -62,7 +62,7 @@ declare void @free(ptr)
 
 ; CHECK-LABEL: @f(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[ID:%.*]] = call token @llvm.coro.id(i32 0, ptr null, ptr null, ptr @f.resumers)
+; CHECK-NEXT:    [[ID:%.*]] = call token @llvm.coro.id(i32 0, ptr null, ptr null, ptr @{{.*}})
 ; CHECK-NEXT:    [[ALLOC:%.*]] = call ptr @malloc(i32 48)
 ; CHECK-NEXT:    [[HDL:%.*]] = call noalias nonnull ptr @llvm.coro.begin(token [[ID]], ptr [[ALLOC]])
 ; CHECK-NEXT:    store ptr @f.resume, ptr [[HDL]], align 8
diff --git a/llvm/test/Transforms/Coroutines/coro-alloca-loop-carried-address.ll b/llvm/test/Transforms/Coroutines/coro-alloca-loop-carried-address.ll
index 412327a49dcf2..b132f79f13db1 100644
--- a/llvm/test/Transforms/Coroutines/coro-alloca-loop-carried-address.ll
+++ b/llvm/test/Transforms/Coroutines/coro-alloca-loop-carried-address.ll
@@ -7,7 +7,7 @@
 define void @foo() presplitcoroutine {
 ; CHECK-LABEL: @foo(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[ID:%.*]] = call token @llvm.coro.id(i32 0, ptr null, ptr null, ptr @foo.resumers)
+; CHECK-NEXT:    [[ID:%.*]] = call token @llvm.coro.id(i32 0, ptr null, ptr null, ptr @{{.*}})
 ; CHECK-NEXT:    [[ALLOC:%.*]] = call ptr @malloc(i64 40)
 ; CHECK-NEXT:    [[VFRAME:%.*]] = call noalias nonnull ptr @llvm.coro.begin(token [[ID]], ptr [[ALLOC]])
 ; CHECK-NEXT:    store ptr @foo.resume, ptr [[VFRAME]], align 8
diff --git a/llvm/test/Transforms/Coroutines/coro-lifetime-end.ll b/llvm/test/Transforms/Coroutines/coro-lifetime-end.ll
index 330c61360e20a..d0b856865c215 100644
--- a/llvm/test/Transforms/Coroutines/coro-lifetime-end.ll
+++ b/llvm/test/Transforms/Coroutines/coro-lifetime-end.ll
@@ -13,7 +13,7 @@ declare void @consume.i8.array(ptr)
 define void @HasNoLifetimeEnd() presplitcoroutine {
 ; CHECK-LABEL: define void @HasNoLifetimeEnd() {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[ID:%.*]] = call token @llvm.coro.id(i32 0, ptr null, ptr null, ptr @HasNoLifetimeEnd.resumers)
+; CHECK-NEXT:    [[ID:%.*]] = call token @llvm.coro.id(i32 0, ptr null, ptr null, ptr @{{.*}})
 ; CHECK-NEXT:    [[ALLOC:%.*]] = call ptr @malloc(i64 16)
 ; CHECK-NEXT:    [[VFRAME:%.*]] = call noalias nonnull ptr @llvm.coro.begin(token [[ID]], ptr [[ALLOC]])
 ; CHECK-NEXT:    store ptr @HasNoLifetimeEnd.resume, ptr [[VFRAME]], align 8
@@ -50,7 +50,7 @@ exit:
 define void @LifetimeEndAfterCoroEnd() presplitcoroutine {
 ; CHECK-LABEL: define void @LifetimeEndAfterCoroEnd() {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[ID:%.*]] = call token @llvm.coro.id(i32 0, ptr null, ptr null, ptr @LifetimeEndAfterCoroEnd.resumers)
+; CHECK-NEXT:    [[ID:%.*]] = call token @llvm.coro.id(i32 0, ptr null, ptr null, ptr @{{.*}})
 ; CHECK-NEXT:    [[ALLOC:%.*]] = call ptr @malloc(i64 16)
 ; CHECK-NEXT:    [[VFRAME:%.*]] = call noalias nonnull ptr @llvm.coro.begin(token [[ID]], ptr [[ALLOC]])
 ; CHECK-NEXT:    store ptr @LifetimeEndAfterCoroEnd.resume, ptr [[VFRAME]], align 8
@@ -88,7 +88,7 @@ exit:
 define void @BranchWithoutLifetimeEnd() presplitcoroutine {
 ; CHECK-LABEL: define void @BranchWithoutLifetimeEnd() {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[ID:%.*]] = call token @llvm.coro.id(i32 0, ptr null, ptr null, ptr @BranchWithoutLifetimeEnd.resumers)
+; CHECK-NEXT:    [[ID:%.*]] = call token @llvm.coro.id(i32 0, ptr null, ptr null, ptr @{{.*}})
 ; CHECK-NEXT:    [[ALLOC:%.*]] = call ptr @malloc(i64 16)
 ; CHECK-NEXT:    [[VFRAME:%.*]] = call noalias nonnull ptr @llvm.coro.begin(token [[ID]], ptr [[ALLOC]])
 ; CHECK-NEXT:    store ptr @BranchWithoutLifetimeEnd.resume, ptr [[VFRAME]], align 8
diff --git a/llvm/test/Transforms/Coroutines/coro-spill-after-phi.ll b/llvm/test/Transforms/Coroutines/coro-spill-after-phi.ll
index cbe57a8d61132..41b53d89c5dfe 100644
--- a/llvm/test/Transforms/Coroutines/coro-spill-after-phi.ll
+++ b/llvm/test/Transforms/Coroutines/coro-spill-after-phi.ll
@@ -8,7 +8,7 @@
 define ptr @f(i1 %n) presplitcoroutine {
 ; CHECK-LABEL: @f(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[ID:%.*]] = call token @llvm.coro.id(i32 0, ptr null, ptr null, ptr @f.resumers)
+; CHECK-NEXT:    [[ID:%.*]] = call token @llvm.coro.id(i32 0, ptr null, ptr null, ptr @{{.*}})
 ; CHECK-NEXT:    [[ALLOC:%.*]] = call ptr @malloc(i32 32)
 ; CHECK-NEXT:    [[HDL:%.*]] = call noalias nonnull ptr @llvm.coro.begin(token [[ID]], ptr [[ALLOC]])
 ; CHECK-NEXT:    store ptr @f.resume, ptr [[HDL]], align 8
diff --git a/llvm/test/Transforms/Coroutines/coro-split-00.ll b/llvm/test/Transforms/Coroutines/coro-split-00.ll
index b35bd720b86f9..d89938388eb8e 100644
--- a/llvm/test/Transforms/Coroutines/coro-split-00.ll
+++ b/llvm/test/Transforms/Coroutines/coro-split-00.ll
@@ -63,6 +63,13 @@ suspend:
 ; CHECK-NOT: call void @free(
 ; CHECK: ret void
 
+; CHECK-LABEL: @f.noalloc({{.*}})
+; CHECK-NOT: call ptr @malloc
+; CHECK: call void @print(i32 0)
+; CHECK-NOT: call void @print(i32 1)
+; CHECK-NOT: call void @free(
+; CHECK: ret ptr %{{.*}}
+
 declare ptr @llvm.coro.free(token, ptr)
 declare i32 @llvm.coro.size.i32()
 declare i8  @llvm.coro.suspend(token, i1)
diff --git a/llvm/test/Transforms/Coroutines/coro-transform-must-elide.ll b/llvm/test/Transforms/Coroutines/coro-transform-must-elide.ll
new file mode 100644
index 0000000000000..9de91a75bc204
--- /dev/null
+++ b/llvm/test/Transforms/Coroutines/coro-transform-must-elide.ll
@@ -0,0 +1,77 @@
+; Testing elide performed its job for calls to coroutines marked safe.
+; RUN: opt < %s -S -passes='cgscc(coro-annotation-elide)' | FileCheck %s
+
+%struct.Task = type { ptr }
+
+declare void @print(i32) nounwind
+
+; resume part of the coroutine
+define fastcc void @callee.resume(ptr dereferenceable(1)) {
+  tail call void @print(i32 0)
+  ret void
+}
+
+; destroy part of the coroutine
+define fastcc void @callee.destroy(ptr) {
+  tail call void @print(i32 1)
+  ret void
+}
+
+; cleanup part of the coroutine
+define fastcc void @callee.cleanup(ptr) {
+  tail call void @print(i32 2)
+  ret void
+}
+
+ at callee.resumers = internal constant [3 x ptr] [
+  ptr @callee.resume, ptr @callee.destroy, ptr @callee.cleanup]
+
+declare void @alloc(i1) nounwind
+
+; CHECK-LABEL: define ptr @callee
+define ptr @callee(i8 %arg) {
+entry:
+  %task = alloca %struct.Task, align 8
+  %id = call token @llvm.coro.id(i32 0, ptr null,
+                          ptr @callee,
+                          ptr @callee.resumers)
+  %alloc = call i1 @llvm.coro.alloc(token %id)
+  %hdl = call ptr @llvm.coro.begin(token %id, ptr null)
+  store ptr %hdl, ptr %task
+  ret ptr %task
+}
+
+; CHECK-LABEL: define ptr @callee.noalloc
+define ptr @callee.noalloc(i8 %arg, ptr dereferenceable(32) align(8) %frame) {
+ entry:
+  %task = alloca %struct.Task, align 8
+  %id = call token @llvm.coro.id(i32 0, ptr null,
+                          ptr @callee,
+                          ptr @callee.resumers)
+  %hdl = call ptr @llvm.coro.begin(token %id, ptr null)
+  store ptr %hdl, ptr %task
+  ret ptr %task
+}
+
+; CHECK-LABEL: define ptr @caller()
+; Function Attrs: presplitcoroutine
+define ptr @caller() #0 {
+entry:
+  %task = call ptr @callee(i8 0), !annotation !1
+  ret ptr %task
+
+  ; CHECK: %[[ALLOCA:.+]] = alloca [32 x i8], align 8
+  ; CHECK-NEXT: %[[FRAME:.+]] = bitcast ptr %[[ALLOCA]] to ptr
+  ; CHECK-NEXT: %[[TASK:.+]] = call ptr @callee.noalloc(i8 0, ptr %[[FRAME]])
+  ; CHECK-NEXT: ret ptr %[[TASK]]
+}
+
+attributes #0 = { presplitcoroutine }
+
+declare token @llvm.coro.id(i32, ptr, ptr, ptr)
+declare ptr @llvm.coro.begin(token, ptr)
+declare ptr @llvm.coro.frame()
+declare ptr @llvm.coro.subfn.addr(ptr, i8)
+declare i1 @llvm.coro.alloc(token)
+
+!1 = !{!"coro_must_elide"}



More information about the llvm-commits mailing list