[llvm] c4952e5 - [nfc][ctx_prof] Efficient profile traversal and update (#110052)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Sep 27 08:09:14 PDT 2024
Author: Mircea Trofin
Date: 2024-09-27T08:09:10-07:00
New Revision: c4952e513f845a0e290d22ed1131d1b6438bb65b
URL: https://github.com/llvm/llvm-project/commit/c4952e513f845a0e290d22ed1131d1b6438bb65b
DIFF: https://github.com/llvm/llvm-project/commit/c4952e513f845a0e290d22ed1131d1b6438bb65b.diff
LOG: [nfc][ctx_prof] Efficient profile traversal and update (#110052)
This optimizes profile updates and visits, where we want to access contexts for a specific function. These are all the current update cases. We do so by maintaining a list of contexts for each function, preserving preorder traversal. The list is updated whenever contexts are `std::move`-d or deleted.
Added:
Modified:
llvm/include/llvm/Analysis/CtxProfAnalysis.h
llvm/include/llvm/ProfileData/PGOCtxProfReader.h
llvm/lib/Analysis/CtxProfAnalysis.cpp
llvm/lib/Transforms/Utils/CallPromotionUtils.cpp
llvm/lib/Transforms/Utils/InlineFunction.cpp
Removed:
################################################################################
diff --git a/llvm/include/llvm/Analysis/CtxProfAnalysis.h b/llvm/include/llvm/Analysis/CtxProfAnalysis.h
index 0a9543f037eb58..be040d5eca5f31 100644
--- a/llvm/include/llvm/Analysis/CtxProfAnalysis.h
+++ b/llvm/include/llvm/Analysis/CtxProfAnalysis.h
@@ -35,7 +35,7 @@ class PGOContextualProfile {
uint32_t NextCounterIndex = 0;
uint32_t NextCallsiteIndex = 0;
const std::string Name;
-
+ PGOCtxProfContext Index;
FunctionInfo(StringRef Name) : Name(Name) {}
};
std::optional<PGOCtxProfContext::CallTargetMapTy> Profiles;
@@ -50,6 +50,8 @@ class PGOContextualProfile {
// its state piecemeal.
PGOContextualProfile() = default;
+ void initIndex();
+
public:
PGOContextualProfile(const PGOContextualProfile &) = delete;
PGOContextualProfile(PGOContextualProfile &&) = default;
@@ -94,7 +96,7 @@ class PGOContextualProfile {
using ConstVisitor = function_ref<void(const PGOCtxProfContext &)>;
using Visitor = function_ref<void(PGOCtxProfContext &)>;
- void update(Visitor, const Function *F = nullptr);
+ void update(Visitor, const Function &F);
void visit(ConstVisitor, const Function *F = nullptr) const;
const CtxProfFlatProfile flatten() const;
diff --git a/llvm/include/llvm/ProfileData/PGOCtxProfReader.h b/llvm/include/llvm/ProfileData/PGOCtxProfReader.h
index a00c21ddc7d7a1..d358041e3a001f 100644
--- a/llvm/include/llvm/ProfileData/PGOCtxProfReader.h
+++ b/llvm/include/llvm/ProfileData/PGOCtxProfReader.h
@@ -22,6 +22,58 @@
#include <map>
namespace llvm {
+class PGOContextualProfile;
+class PGOCtxProfContext;
+
+namespace internal {
+// When we traverse the contextual profile, we typically want to visit contexts
+// pertaining to a specific function. To avoid traversing the whole tree, we
+// want to keep a per-function list - which will be in preorder - of that
+// function's contexts. This happens in PGOContextualProfile. For memory use
+// efficiency, we want to make PGOCtxProfContext an intrusive double-linked list
+// node. We need to handle the cases where PGOCtxProfContext nodes are moved and
+// deleted: in both cases, we need to update the index (==list). We can do that
+// directly from the node in the list, without knowing who the "parent" of the
+// list is. That makes the ADT ilist overkill here. Finally, IndexNode is meant
+// to be an implementation detail of PGOCtxProfContext, and the only reason it's
+// factored out is to avoid implementing move semantics for all its members.
+class IndexNode {
+ // This class' members are intentionally private - it's a convenience
+ // implementation detail.
+ friend class ::llvm::PGOCtxProfContext;
+ friend class ::llvm::PGOContextualProfile;
+
+ IndexNode *Previous = nullptr;
+ IndexNode *Next = nullptr;
+
+ ~IndexNode() {
+ if (Next)
+ Next->Previous = Previous;
+ if (Previous)
+ Previous->Next = Next;
+ }
+
+ IndexNode(const IndexNode &Other) = delete;
+
+ IndexNode(IndexNode &&Other) {
+ // Copy the neighbor info
+ Next = Other.Next;
+ Previous = Other.Previous;
+
+ // Update the neighbors to point to this object
+ if (Other.Next)
+ Other.Next->Previous = this;
+ if (Other.Previous)
+ Other.Previous->Next = this;
+
+ // Make sure the dtor is a noop
+ Other.Next = nullptr;
+ Other.Previous = nullptr;
+ }
+ IndexNode() = default;
+};
+} // namespace internal
+
/// A node (context) in the loaded contextual profile, suitable for mutation
/// during IPO passes. We generally expect a fraction of counters and
/// callsites to be populated. We continue to model counters as vectors, but
@@ -29,13 +81,15 @@ namespace llvm {
/// there is a small number of indirect targets (usually, 1 for direct calls);
/// but potentially a large number of callsites, and, as inlining progresses,
/// the callsite count of a caller will grow.
-class PGOCtxProfContext final {
+class PGOCtxProfContext final : public internal::IndexNode {
public:
using CallTargetMapTy = std::map<GlobalValue::GUID, PGOCtxProfContext>;
using CallsiteMapTy = std::map<uint32_t, CallTargetMapTy>;
private:
friend class PGOCtxProfileReader;
+ friend class PGOContextualProfile;
+
GlobalValue::GUID GUID = 0;
SmallVector<uint64_t, 16> Counters;
CallsiteMapTy Callsites;
@@ -47,11 +101,15 @@ class PGOCtxProfContext final {
getOrEmplace(uint32_t Index, GlobalValue::GUID G,
SmallVectorImpl<uint64_t> &&Counters);
+ // Create a bogus context object, used for anchoring the index double linked
+ // list - see IndexNode
+ PGOCtxProfContext() = default;
+
public:
PGOCtxProfContext(const PGOCtxProfContext &) = delete;
PGOCtxProfContext &operator=(const PGOCtxProfContext &) = delete;
PGOCtxProfContext(PGOCtxProfContext &&) = default;
- PGOCtxProfContext &operator=(PGOCtxProfContext &&) = default;
+ PGOCtxProfContext &operator=(PGOCtxProfContext &&) = delete;
GlobalValue::GUID guid() const { return GUID; }
const SmallVectorImpl<uint64_t> &counters() const { return Counters; }
diff --git a/llvm/lib/Analysis/CtxProfAnalysis.cpp b/llvm/lib/Analysis/CtxProfAnalysis.cpp
index e03b5dfd538194..825e517cd09f52 100644
--- a/llvm/lib/Analysis/CtxProfAnalysis.cpp
+++ b/llvm/lib/Analysis/CtxProfAnalysis.cpp
@@ -184,6 +184,7 @@ PGOContextualProfile CtxProfAnalysis::run(Module &M,
// If we made it this far, the Result is valid - which we mark by setting
// .Profiles.
Result.Profiles = std::move(*MaybeCtx);
+ Result.initIndex();
return Result;
}
@@ -265,11 +266,9 @@ CtxProfAnalysis::getSelectInstrumentation(SelectInst &SI) {
template <class ProfilesTy, class ProfTy>
static void preorderVisit(ProfilesTy &Profiles,
- function_ref<void(ProfTy &)> Visitor,
- GlobalValue::GUID Match = 0) {
+ function_ref<void(ProfTy &)> Visitor) {
std::function<void(ProfTy &)> Traverser = [&](auto &Ctx) {
- if (!Match || Ctx.guid() == Match)
- Visitor(Ctx);
+ Visitor(Ctx);
for (auto &[_, SubCtxSet] : Ctx.callsites())
for (auto &[__, Subctx] : SubCtxSet)
Traverser(Subctx);
@@ -278,16 +277,44 @@ static void preorderVisit(ProfilesTy &Profiles,
Traverser(P);
}
-void PGOContextualProfile::update(Visitor V, const Function *F) {
- GlobalValue::GUID G = F ? getDefinedFunctionGUID(*F) : 0U;
+void PGOContextualProfile::initIndex() {
+ // Initialize the head of the index list for each function. We don't need it
+ // after this point.
+ DenseMap<GlobalValue::GUID, PGOCtxProfContext *> InsertionPoints;
+ for (auto &[Guid, FI] : FuncInfo)
+ InsertionPoints[Guid] = &FI.Index;
preorderVisit<PGOCtxProfContext::CallTargetMapTy, PGOCtxProfContext>(
- *Profiles, V, G);
+ *Profiles, [&](PGOCtxProfContext &Ctx) {
+ auto InsertIt = InsertionPoints.find(Ctx.guid());
+ if (InsertIt == InsertionPoints.end())
+ return;
+ // Insert at the end of the list. Since we traverse in preorder, it
+ // means that when we iterate the list from the beginning, we'd
+ // encounter the contexts in the order we would have, should we have
+ // performed a full preorder traversal.
+ InsertIt->second->Next = &Ctx;
+ Ctx.Previous = InsertIt->second;
+ InsertIt->second = &Ctx;
+ });
+}
+
+void PGOContextualProfile::update(Visitor V, const Function &F) {
+ assert(isFunctionKnown(F));
+ GlobalValue::GUID G = getDefinedFunctionGUID(F);
+ for (auto *Node = FuncInfo.find(G)->second.Index.Next; Node;
+ Node = Node->Next)
+ V(*reinterpret_cast<PGOCtxProfContext *>(Node));
}
void PGOContextualProfile::visit(ConstVisitor V, const Function *F) const {
- GlobalValue::GUID G = F ? getDefinedFunctionGUID(*F) : 0U;
- preorderVisit<const PGOCtxProfContext::CallTargetMapTy,
- const PGOCtxProfContext>(*Profiles, V, G);
+ if (!F)
+ return preorderVisit<const PGOCtxProfContext::CallTargetMapTy,
+ const PGOCtxProfContext>(*Profiles, V);
+ assert(isFunctionKnown(*F));
+ GlobalValue::GUID G = getDefinedFunctionGUID(*F);
+ for (const auto *Node = FuncInfo.find(G)->second.Index.Next; Node;
+ Node = Node->Next)
+ V(*reinterpret_cast<const PGOCtxProfContext *>(Node));
}
const CtxProfFlatProfile PGOContextualProfile::flatten() const {
diff --git a/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp b/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp
index 3d2fa226ff15b9..4ead5cdcf29c01 100644
--- a/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp
+++ b/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp
@@ -655,7 +655,7 @@ CallBase *llvm::promoteCallWithIfThenElse(CallBase &CB, Function &Callee,
Ctx.counters()[IndirectID] = IndirectCount;
};
- CtxProf.update(ProfileUpdater, &Caller);
+ CtxProf.update(ProfileUpdater, Caller);
return &DirectCall;
}
diff --git a/llvm/lib/Transforms/Utils/InlineFunction.cpp b/llvm/lib/Transforms/Utils/InlineFunction.cpp
index 25a01558165da0..671b0d0822a5d9 100644
--- a/llvm/lib/Transforms/Utils/InlineFunction.cpp
+++ b/llvm/lib/Transforms/Utils/InlineFunction.cpp
@@ -2375,7 +2375,7 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI,
assert(Deleted);
(void)Deleted;
};
- CtxProf.update(Updater, &Caller);
+ CtxProf.update(Updater, Caller);
return Ret;
}
More information about the llvm-commits
mailing list