[compiler-rt] [compiler-rt][ctx_profile] Add the instrumented contextual profiling APIs (PR #89838)

Mircea Trofin via llvm-commits llvm-commits at lists.llvm.org
Mon May 6 09:59:26 PDT 2024


================
@@ -51,5 +53,155 @@ class Arena final {
   const uint64_t Size;
 };
 
+/// The contextual profile is a directed tree where each node has one parent. A
+/// node (ContextNode) corresponds to a function activation. The root of the
+/// tree is at a function that was marked as entrypoint to the compiler. A node
+/// stores counter values for edges and a vector of subcontexts. These are the
+/// contexts of callees. The index in the subcontext vector corresponds to the
+/// index of the callsite (as was instrumented via llvm.instrprof.callsite). At
+/// that index we find a linked list, potentially empty, of ContextNodes. Direct
+/// calls will have 0 or 1 values in the linked list, but indirect callsites may
+/// have more.
+///
+/// The ContextNode has a fixed sized header describing it - the GUID of the
+/// function, the size of the counter and callsite vectors. It is also an
+/// (intrusive) linked list for the purposes of the indirect call case above.
+///
+/// Allocation is expected to happen on an Arena.
+///
+/// The structure of the ContextNode is known to LLVM, because LLVM needs to:
+///   (1) increment countes, and
+///   (2) form a GEP for the position in the subcontext list of a callsite
+/// This means changes to LLVM contextual profile lowering and changes here
+/// must be coupled.
+/// Note: the header content isn't interesting to LLVM (other than its size)
+class ContextNode final {
+  const GUID Guid;
+  ContextNode *const Next;
+  const uint32_t NrCounters;
+  const uint32_t NrCallsites;
+
+public:
+  ContextNode(GUID Guid, uint32_t NrCounters, uint32_t NrCallsites,
+              ContextNode *Next = nullptr)
+      : Guid(Guid), Next(Next), NrCounters(NrCounters),
+        NrCallsites(NrCallsites) {}
+  static inline ContextNode *alloc(char *Place, GUID Guid, uint32_t NrCounters,
+                                   uint32_t NrCallsites,
+                                   ContextNode *Next = nullptr);
+
+  static inline size_t getAllocSize(uint32_t NrCounters, uint32_t NrCallsites) {
+    return sizeof(ContextNode) + sizeof(uint64_t) * NrCounters +
+           sizeof(ContextNode *) * NrCallsites;
+  }
+
+  uint64_t *counters() {
+    ContextNode *addr_after = &(this[1]);
+    return reinterpret_cast<uint64_t *>(reinterpret_cast<char *>(addr_after));
+  }
+
+  uint32_t counters_size() const { return NrCounters; }
+  uint32_t callsites_size() const { return NrCallsites; }
+
+  const uint64_t *counters() const {
+    return const_cast<ContextNode *>(this)->counters();
+  }
+
+  ContextNode **subContexts() {
+    return reinterpret_cast<ContextNode **>(&(counters()[NrCounters]));
+  }
+
+  ContextNode *const *subContexts() const {
+    return const_cast<ContextNode *>(this)->subContexts();
+  }
+
+  GUID guid() const { return Guid; }
+  ContextNode *next() { return Next; }
+
+  size_t size() const { return getAllocSize(NrCounters, NrCallsites); }
+
+  void reset();
+
+  // since we go through the runtime to get a context back to LLVM, in the entry
+  // basic block, might as well handle incrementing the entry basic block
+  // counter.
+  void onEntry() { ++counters()[0]; }
+
+  uint64_t entrycount() const { return counters()[0]; }
+};
+
+/// ContextRoots are allocated by LLVM for entrypoints. LLVM is only concerned
+/// with allocating and zero-initializing the global value for it.
+struct ContextRoot {
+  ContextNode *FirstNode = nullptr;
+  Arena *FirstMemBlock = nullptr;
+  Arena *CurrentMem = nullptr;
+  // This is init-ed by the static zero initializer in LLVM.
+  ::__sanitizer::StaticSpinMutex Taken;
+
+  // Avoid surprises due to (unlikely) StaticSpinMutex changes.
+  static_assert(sizeof(Taken) == 1);
+};
+
+/// This API is exposed for testing. See the APIs below about the contract with
+/// LLVM.
+inline bool isScratch(const ContextNode *Ctx) {
+  return (reinterpret_cast<uint64_t>(Ctx) & 1);
+}
+
 } // namespace __ctx_profile
+
+extern "C" {
+
+// LLVM fills these in when lowering a llvm.instrprof.callsite intrinsic.
+// position 0 is used when the current context isn't scratch, 1 when it is. They
+// are volatile because of signal handlers - we mean to specifically control
+// when the data is loaded.
+//
+/// TLS where LLVM stores the pointer of the called value, as part of lowering a
+/// llvm.instrprof.callsite
+extern __thread void *volatile __llvm_ctx_profile_expected_callee[2];
+/// TLS where LLVM stores the pointer inside a caller's subcontexts vector that
+/// corresponds to the callsite being lowered.
+extern __thread __ctx_profile::ContextNode *
+    *volatile __llvm_ctx_profile_callsite[2];
+
+// __llvm_ctx_profile_current_context_root is exposed for unit testing,
+// othwerise it's only used internally by compiler-rt/ctx_profile.
+extern __thread __ctx_profile::ContextRoot
+    *volatile __llvm_ctx_profile_current_context_root;
+
+/// called by LLVM in the entry BB of a "entry point" function. The returned
+/// pointer may be "tainted" - its LSB set to 1 - to indicate it's scratch.
+__ctx_profile::ContextNode *
+__llvm_ctx_profile_start_context(__ctx_profile::ContextRoot *Root,
+                                 __ctx_profile::GUID Guid, uint32_t Counters,
+                                 uint32_t Callsites);
+
+/// paired with __llvm_ctx_profile_start_context, and called at the exit of the
+/// entry point function.
+void __llvm_ctx_profile_release_context(__ctx_profile::ContextRoot *Root);
+
+/// called for any other function than entry points, in the entry BB of such
+/// function. Same consideration about LSB of returned value as .._start_context
+__ctx_profile::ContextNode *
+__llvm_ctx_profile_get_context(void *Callee, __ctx_profile::GUID Guid,
----------------
mtrofin wrote:

a function address. So `void *` is appropriate.

https://github.com/llvm/llvm-project/pull/89838


More information about the llvm-commits mailing list