[llvm] [Coroutines] Inline the `.noalloc` ramp function marked coro_safe_elide (PR #114004)
Yuxuan Chen via llvm-commits
llvm-commits at lists.llvm.org
Fri Nov 1 11:04:59 PDT 2024
https://github.com/yuxuanchen1997 updated https://github.com/llvm/llvm-project/pull/114004
>From d2b5f8acde3af92b19cc4b72da8cbcac3c8bf285 Mon Sep 17 00:00:00 2001
From: Yuxuan Chen <ych at meta.com>
Date: Mon, 28 Oct 2024 20:28:54 -0700
Subject: [PATCH 1/3] Revert "[LLVM][Coroutines] Switch CoroAnnotationElidePass
to a FunctionPass (#107897)"
This reverts commit 761bf333e378b52614cf36cd5db2837d5e4e0ae4.
---
.../Coroutines/CoroAnnotationElide.h | 10 +-
llvm/lib/Passes/PassBuilderPipelines.cpp | 6 +-
llvm/lib/Passes/PassRegistry.def | 2 +-
.../Coroutines/CoroAnnotationElide.cpp | 122 ++++++++++--------
4 files changed, 79 insertions(+), 61 deletions(-)
diff --git a/llvm/include/llvm/Transforms/Coroutines/CoroAnnotationElide.h b/llvm/include/llvm/Transforms/Coroutines/CoroAnnotationElide.h
index 986a5dbd1ed0fe..352c9e14526697 100644
--- a/llvm/include/llvm/Transforms/Coroutines/CoroAnnotationElide.h
+++ b/llvm/include/llvm/Transforms/Coroutines/CoroAnnotationElide.h
@@ -17,14 +17,18 @@
#ifndef LLVM_TRANSFORMS_COROUTINES_COROANNOTATIONELIDE_H
#define LLVM_TRANSFORMS_COROUTINES_COROANNOTATIONELIDE_H
+#include "llvm/Analysis/CGSCCPassManager.h"
+#include "llvm/Analysis/LazyCallGraph.h"
#include "llvm/IR/PassManager.h"
namespace llvm {
-class Function;
-
struct CoroAnnotationElidePass : PassInfoMixin<CoroAnnotationElidePass> {
- PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM);
+ CoroAnnotationElidePass() {}
+
+ PreservedAnalyses run(LazyCallGraph::SCC &C, CGSCCAnalysisManager &AM,
+ LazyCallGraph &CG, CGSCCUpdateResult &UR);
+
static bool isRequired() { return false; }
};
} // end namespace llvm
diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp
index 488554c84c1c43..aa33b1acc1580c 100644
--- a/llvm/lib/Passes/PassBuilderPipelines.cpp
+++ b/llvm/lib/Passes/PassBuilderPipelines.cpp
@@ -980,8 +980,7 @@ PassBuilder::buildInlinerPipeline(OptimizationLevel Level,
if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink) {
MainCGPipeline.addPass(CoroSplitPass(Level != OptimizationLevel::O0));
- MainCGPipeline.addPass(
- createCGSCCToFunctionPassAdaptor(CoroAnnotationElidePass()));
+ MainCGPipeline.addPass(CoroAnnotationElidePass());
}
// Make sure we don't affect potential future NoRerun CGSCC adaptors.
@@ -1032,7 +1031,8 @@ PassBuilder::buildModuleInlinerPipeline(OptimizationLevel Level,
if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink) {
MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(
CoroSplitPass(Level != OptimizationLevel::O0)));
- MPM.addPass(createModuleToFunctionPassAdaptor(CoroAnnotationElidePass()));
+ MPM.addPass(
+ createModuleToPostOrderCGSCCPassAdaptor(CoroAnnotationElidePass()));
}
return MPM;
diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def
index 017ae311c55eb4..4994005560472f 100644
--- a/llvm/lib/Passes/PassRegistry.def
+++ b/llvm/lib/Passes/PassRegistry.def
@@ -244,6 +244,7 @@ CGSCC_PASS("attributor-light-cgscc", AttributorLightCGSCCPass())
CGSCC_PASS("invalidate<all>", InvalidateAllAnalysesPass())
CGSCC_PASS("no-op-cgscc", NoOpCGSCCPass())
CGSCC_PASS("openmp-opt-cgscc", OpenMPOptCGSCCPass())
+CGSCC_PASS("coro-annotation-elide", CoroAnnotationElidePass())
#undef CGSCC_PASS
#ifndef CGSCC_PASS_WITH_PARAMS
@@ -344,7 +345,6 @@ FUNCTION_PASS("complex-deinterleaving", ComplexDeinterleavingPass(TM))
FUNCTION_PASS("consthoist", ConstantHoistingPass())
FUNCTION_PASS("constraint-elimination", ConstraintEliminationPass())
FUNCTION_PASS("coro-elide", CoroElidePass())
-FUNCTION_PASS("coro-annotation-elide", CoroAnnotationElidePass())
FUNCTION_PASS("correlated-propagation", CorrelatedValuePropagationPass())
FUNCTION_PASS("count-visits", CountVisitsPass())
FUNCTION_PASS("dce", DCEPass())
diff --git a/llvm/lib/Transforms/Coroutines/CoroAnnotationElide.cpp b/llvm/lib/Transforms/Coroutines/CoroAnnotationElide.cpp
index 5f19d600a983aa..7f3214c0de241b 100644
--- a/llvm/lib/Transforms/Coroutines/CoroAnnotationElide.cpp
+++ b/llvm/lib/Transforms/Coroutines/CoroAnnotationElide.cpp
@@ -16,6 +16,7 @@
#include "llvm/Transforms/Coroutines/CoroAnnotationElide.h"
+#include "llvm/Analysis/CGSCCPassManager.h"
#include "llvm/Analysis/LazyCallGraph.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/IR/Analysis.h"
@@ -42,10 +43,10 @@ static Instruction *getFirstNonAllocaInTheEntryBlock(Function *F) {
// Create an alloca in the caller, using FrameSize and FrameAlign as the callee
// coroutine's activation frame.
static Value *allocateFrameInCaller(Function *Caller, uint64_t FrameSize,
- Align FrameAlign) {
+ Align FrameAlign) {
LLVMContext &C = Caller->getContext();
BasicBlock::iterator InsertPt =
- getFirstNonAllocaInTheEntryBlock(Caller)->getIterator();
+ getFirstNonAllocaInTheEntryBlock(Caller)->getIterator();
const DataLayout &DL = Caller->getDataLayout();
auto FrameTy = ArrayType::get(Type::getInt8Ty(C), FrameSize);
auto *Frame = new AllocaInst(FrameTy, DL.getAllocaAddrSpace(), "", InsertPt);
@@ -59,7 +60,7 @@ static Value *allocateFrameInCaller(Function *Caller, uint64_t FrameSize,
// - Replace the old CB with a new Call or Invoke to `NewCallee`, with the
// pointer to the frame as an additional argument to NewCallee.
static void processCall(CallBase *CB, Function *Caller, Function *NewCallee,
- uint64_t FrameSize, Align FrameAlign) {
+ uint64_t FrameSize, Align FrameAlign) {
// TODO: generate the lifetime intrinsics for the new frame. This will require
// introduction of two pesudo lifetime intrinsics in the frontend around the
// `co_await` expression and convert them to real lifetime intrinsics here.
@@ -72,13 +73,13 @@ static void processCall(CallBase *CB, Function *Caller, Function *NewCallee,
if (auto *CI = dyn_cast<CallInst>(CB)) {
auto *NewCI = CallInst::Create(NewCallee->getFunctionType(), NewCallee,
- NewArgs, "", NewCBInsertPt);
+ NewArgs, "", NewCBInsertPt);
NewCI->setTailCallKind(CI->getTailCallKind());
NewCB = NewCI;
} else if (auto *II = dyn_cast<InvokeInst>(CB)) {
NewCB = InvokeInst::Create(NewCallee->getFunctionType(), NewCallee,
- II->getNormalDest(), II->getUnwindDest(),
- NewArgs, {}, "", NewCBInsertPt);
+ II->getNormalDest(), II->getUnwindDest(),
+ NewArgs, {}, "", NewCBInsertPt);
} else {
llvm_unreachable("CallBase should either be Call or Invoke!");
}
@@ -88,65 +89,78 @@ static void processCall(CallBase *CB, Function *Caller, Function *NewCallee,
NewCB->setAttributes(CB->getAttributes());
NewCB->setDebugLoc(CB->getDebugLoc());
std::copy(CB->bundle_op_info_begin(), CB->bundle_op_info_end(),
- NewCB->bundle_op_info_begin());
+ NewCB->bundle_op_info_begin());
NewCB->removeFnAttr(llvm::Attribute::CoroElideSafe);
CB->replaceAllUsesWith(NewCB);
CB->eraseFromParent();
}
-PreservedAnalyses CoroAnnotationElidePass::run(Function &F,
- FunctionAnalysisManager &FAM) {
+PreservedAnalyses CoroAnnotationElidePass::run(LazyCallGraph::SCC &C,
+ CGSCCAnalysisManager &AM,
+ LazyCallGraph &CG,
+ CGSCCUpdateResult &UR) {
bool Changed = false;
+ CallGraphUpdater CGUpdater;
+ CGUpdater.initialize(CG, C, AM, UR);
- Function *NewCallee =
- F.getParent()->getFunction((F.getName() + ".noalloc").str());
+ auto &FAM =
+ AM.getResult<FunctionAnalysisManagerCGSCCProxy>(C, CG).getManager();
- if (!NewCallee)
- return PreservedAnalyses::all();
-
- auto FramePtrArgPosition = NewCallee->arg_size() - 1;
- auto FrameSize = NewCallee->getParamDereferenceableBytes(FramePtrArgPosition);
- auto FrameAlign = NewCallee->getParamAlign(FramePtrArgPosition).valueOrOne();
-
- SmallVector<CallBase *, 4> Users;
- for (auto *U : F.users()) {
- if (auto *CB = dyn_cast<CallBase>(U)) {
- if (CB->getCalledFunction() == &F)
- Users.push_back(CB);
- }
- }
-
- auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
-
- for (auto *CB : Users) {
- auto *Caller = CB->getFunction();
- if (!Caller)
+ for (LazyCallGraph::Node &N : C) {
+ Function *Callee = &N.getFunction();
+ Function *NewCallee = Callee->getParent()->getFunction(
+ (Callee->getName() + ".noalloc").str());
+ if (!NewCallee)
continue;
- bool IsCallerPresplitCoroutine = Caller->isPresplitCoroutine();
- bool HasAttr = CB->hasFnAttr(llvm::Attribute::CoroElideSafe);
- if (IsCallerPresplitCoroutine && HasAttr) {
- processCall(CB, Caller, NewCallee, FrameSize, FrameAlign);
-
- ORE.emit([&]() {
- return OptimizationRemark(DEBUG_TYPE, "CoroAnnotationElide", Caller)
- << "'" << ore::NV("callee", F.getName()) << "' elided in '"
- << ore::NV("caller", Caller->getName()) << "'";
- });
-
- FAM.invalidate(*Caller, PreservedAnalyses::none());
- Changed = true;
- } else {
- ORE.emit([&]() {
- return OptimizationRemarkMissed(DEBUG_TYPE, "CoroAnnotationElide",
- Caller)
- << "'" << ore::NV("callee", F.getName()) << "' not elided in '"
- << ore::NV("caller", Caller->getName()) << "' (caller_presplit="
- << ore::NV("caller_presplit", IsCallerPresplitCoroutine)
- << ", elide_safe_attr=" << ore::NV("elide_safe_attr", HasAttr)
- << ")";
- });
+ SmallVector<CallBase *, 4> Users;
+ for (auto *U : Callee->users()) {
+ if (auto *CB = dyn_cast<CallBase>(U)) {
+ if (CB->getCalledFunction() == Callee)
+ Users.push_back(CB);
+ }
+ }
+ auto FramePtrArgPosition = NewCallee->arg_size() - 1;
+ auto FrameSize = NewCallee->getParamDereferenceableBytes(FramePtrArgPosition);
+ auto FrameAlign = NewCallee->getParamAlign(FramePtrArgPosition).valueOrOne();
+
+ auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(*Callee);
+
+ for (auto *CB : Users) {
+ auto *Caller = CB->getFunction();
+ if (!Caller)
+ continue;
+
+ bool IsCallerPresplitCoroutine = Caller->isPresplitCoroutine();
+ bool HasAttr = CB->hasFnAttr(llvm::Attribute::CoroElideSafe);
+ if (IsCallerPresplitCoroutine && HasAttr) {
+ auto *CallerN = CG.lookup(*Caller);
+ auto *CallerC = CG.lookupSCC(*CallerN);
+ processCall(CB, Caller, NewCallee, FrameSize, FrameAlign);
+
+ ORE.emit([&]() {
+ return OptimizationRemark(DEBUG_TYPE, "CoroAnnotationElide", Caller)
+ << "'" << ore::NV("callee", Callee->getName()) << "' elided in '"
+ << ore::NV("caller", Caller->getName()) << "'";
+ });
+
+ FAM.invalidate(*Caller, PreservedAnalyses::none());
+ Changed = true;
+ updateCGAndAnalysisManagerForCGSCCPass(CG, *CallerC, *CallerN, AM, UR,
+ FAM);
+
+ } else {
+ ORE.emit([&]() {
+ return OptimizationRemarkMissed(DEBUG_TYPE, "CoroAnnotationElide",
+ Caller)
+ << "'" << ore::NV("callee", Callee->getName()) << "' not elided in '"
+ << ore::NV("caller", Caller->getName()) << "' (caller_presplit="
+ << ore::NV("caller_presplit", IsCallerPresplitCoroutine)
+ << ", elide_safe_attr=" << ore::NV("elide_safe_attr", HasAttr)
+ << ")";
+ });
+ }
}
}
>From 3bbd280015ba4bdd3dbba171ebd706e89cfdd5a4 Mon Sep 17 00:00:00 2001
From: Yuxuan Chen <ych at meta.com>
Date: Mon, 28 Oct 2024 20:38:40 -0700
Subject: [PATCH 2/3] inline new call to caller
---
.../lib/Transforms/Coroutines/CoroAnnotationElide.cpp | 11 ++++++++++-
1 file changed, 10 insertions(+), 1 deletion(-)
diff --git a/llvm/lib/Transforms/Coroutines/CoroAnnotationElide.cpp b/llvm/lib/Transforms/Coroutines/CoroAnnotationElide.cpp
index 7f3214c0de241b..9e22d96387fc31 100644
--- a/llvm/lib/Transforms/Coroutines/CoroAnnotationElide.cpp
+++ b/llvm/lib/Transforms/Coroutines/CoroAnnotationElide.cpp
@@ -26,6 +26,7 @@
#include "llvm/IR/Module.h"
#include "llvm/IR/PassManager.h"
#include "llvm/Transforms/Utils/CallGraphUpdater.h"
+#include "llvm/Transforms/Utils/Cloning.h"
#include <cassert>
@@ -93,7 +94,15 @@ static void processCall(CallBase *CB, Function *Caller, Function *NewCallee,
NewCB->removeFnAttr(llvm::Attribute::CoroElideSafe);
CB->replaceAllUsesWith(NewCB);
- CB->eraseFromParent();
+
+ InlineFunctionInfo IFI;
+ InlineResult IR = InlineFunction(*NewCB, IFI);
+ if (IR.isSuccess()) {
+ CB->eraseFromParent();
+ } else {
+ NewCB->replaceAllUsesWith(CB);
+ NewCB->eraseFromParent();
+ }
}
PreservedAnalyses CoroAnnotationElidePass::run(LazyCallGraph::SCC &C,
>From e48430eec013803c24d05f5da5fb64cc98208bad Mon Sep 17 00:00:00 2001
From: Yuxuan Chen <ych at meta.com>
Date: Fri, 1 Nov 2024 11:04:19 -0700
Subject: [PATCH 3/3] Fix existing test failure due to inlining
---
.../Transforms/Coroutines/coro-transform-must-elide.ll | 8 +++++---
1 file changed, 5 insertions(+), 3 deletions(-)
diff --git a/llvm/test/Transforms/Coroutines/coro-transform-must-elide.ll b/llvm/test/Transforms/Coroutines/coro-transform-must-elide.ll
index a4e575f6c03816..2363ba6b8d1a95 100644
--- a/llvm/test/Transforms/Coroutines/coro-transform-must-elide.ll
+++ b/llvm/test/Transforms/Coroutines/coro-transform-must-elide.ll
@@ -59,9 +59,11 @@ define ptr @caller() #0 {
entry:
%task = call ptr @callee(i8 0) #1
ret ptr %task
-
- ; CHECK: %[[FRAME:.+]] = alloca [32 x i8], align 8
- ; CHECK-NEXT: %[[TASK:.+]] = call ptr @callee.noalloc(i8 0, ptr %[[FRAME]])
+ ; CHECK: %[[TASK:.+]] = alloca %struct.Task, align 8
+ ; CHECK-NEXT: %[[FRAME:.+]] = alloca [32 x i8], align 8
+ ; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr %[[TASK]])
+ ; CHECK-NEXT: call token @llvm.coro.id(i32 0, ptr null, ptr @callee, ptr @callee.resumers)
+ ; CHECK: call void @llvm.lifetime.end.p0(i64 8, ptr %[[TASK]])
; CHECK-NEXT: ret ptr %[[TASK]]
}
More information about the llvm-commits
mailing list