[compiler-rt] [compiler-rt][ctx_profile] Add the instrumented contextual profiling APIs (PR #89838)
Teresa Johnson via llvm-commits
llvm-commits at lists.llvm.org
Mon May 6 08:46:44 PDT 2024
================
@@ -38,3 +128,177 @@ void Arena::freeArenaList(Arena *&A) {
}
A = nullptr;
}
+
+inline ContextNode *ContextNode::alloc(char *Place, GUID Guid,
+ uint32_t NrCounters,
+ uint32_t NrCallsites,
+ ContextNode *Next) {
+ assert(reinterpret_cast<uint64_t>(Place) % sizeof(void *) == 0);
+ return new (Place) ContextNode(Guid, NrCounters, NrCallsites, Next);
+}
+
+void ContextNode::reset() {
+ for (uint32_t I = 0; I < NrCounters; ++I)
+ counters()[I] = 0;
+ for (uint32_t I = 0; I < NrCallsites; ++I)
+ for (auto *Next = subContexts()[I]; Next; Next = Next->Next)
+ Next->reset();
+}
+
+// If this is the first time we hit a callsite with this (Guid) particular
+// callee, we need to allocate.
+ContextNode *getCallsiteSlow(uint64_t Guid, ContextNode **InsertionPoint,
+ uint32_t NrCounters, uint32_t NrCallsites) {
+ auto AllocSize = ContextNode::getAllocSize(NrCounters, NrCallsites);
+ auto *Mem = __llvm_ctx_profile_current_context_root->CurrentMem;
+ char *AllocPlace = Mem->tryBumpAllocate(AllocSize);
+ if (!AllocPlace) {
+ // if we failed to allocate on the current arena, allocate a new arena,
+ // and place it on __llvm_ctx_profile_current_context_root->CurrentMem so we
+ // find it from now on for other cases when we need to getCallsiteSlow.
+ // Note that allocateNewArena will link the allocated memory in the list of
+ // Arenas.
+ __llvm_ctx_profile_current_context_root->CurrentMem = Mem =
+ Mem->allocateNewArena(getArenaAllocSize(AllocSize), Mem);
+ }
+ auto *Ret = ContextNode::alloc(AllocPlace, Guid, NrCounters, NrCallsites,
+ *InsertionPoint);
+ *InsertionPoint = Ret;
+ return Ret;
+}
+
+ContextNode *__llvm_ctx_profile_get_context(void *Callee, GUID Guid,
+ uint32_t NrCounters,
+ uint32_t NrCallsites) {
+ // fast "out" if we're not even doing contextual collection.
+ if (!__llvm_ctx_profile_current_context_root)
+ return TheScratchContext;
+
+ // also fast "out" if the caller is scratch.
+ auto **CallsiteContext = consume(__llvm_ctx_profile_callsite[0]);
+ if (!CallsiteContext || isScratch(*CallsiteContext))
+ return TheScratchContext;
+
+ // if the callee isn't the expected one, return scratch.
+ // Signal handler(s) could have been invoked at any point in the execution.
+ // Should that have happened, and had it (the handler) be built with
+ // instrumentation, its __llvm_ctx_profile_get_context would have failed here.
+ // Its sub call graph would have then populated
+ // __llvm_ctx_profile_{expected_callee | callsite} at index 1.
+ // The normal call graph may be impacted in that, if the signal handler
+ // happened somewhere before we read the TLS here, we'd see the TLS reset and
+ // we'd also fail here. That would just mean we would loose counter values for
+ // the normal subgraph, this time around. That should be very unlikely, but if
+ // it happens too frequently, we should be able to detect discrepancies in
+ // entry counts (caller-callee). At the moment, the design goes on the
+ // assumption that is so unfrequent, though, that it's not worth doing more
+ // for that case.
+ auto *ExpectedCallee = consume(__llvm_ctx_profile_expected_callee[0]);
+ if (ExpectedCallee != Callee)
+ return TheScratchContext;
+
+ auto *Callsite = *CallsiteContext;
+ while (Callsite && Callsite->guid() != Guid) {
----------------
teresajohnson wrote:
What is this loop doing?
https://github.com/llvm/llvm-project/pull/89838
More information about the llvm-commits
mailing list