[compiler-rt] [ctxprof] Auto root detection: trie for stack samples (PR #133106)

Mircea Trofin via llvm-commits llvm-commits at lists.llvm.org
Wed Mar 26 08:32:51 PDT 2025


https://github.com/mtrofin updated https://github.com/llvm/llvm-project/pull/133106

>From b5f28ce2a36bc5ea22b6dffad21be751749debd5 Mon Sep 17 00:00:00 2001
From: Mircea Trofin <mtrofin at google.com>
Date: Mon, 24 Mar 2025 12:00:49 -0700
Subject: [PATCH] RuntimeCallsiteTrie

---
 compiler-rt/lib/ctx_profile/CMakeLists.txt    |   2 +
 .../lib/ctx_profile/RootAutoDetector.cpp      |  90 +++++++++++++
 .../lib/ctx_profile/RootAutoDetector.h        |  68 ++++++++++
 .../lib/ctx_profile/tests/CMakeLists.txt      |   4 +-
 .../tests/RootAutoDetectorTest.cpp            | 123 ++++++++++++++++++
 5 files changed, 286 insertions(+), 1 deletion(-)
 create mode 100644 compiler-rt/lib/ctx_profile/RootAutoDetector.cpp
 create mode 100644 compiler-rt/lib/ctx_profile/RootAutoDetector.h
 create mode 100644 compiler-rt/lib/ctx_profile/tests/RootAutoDetectorTest.cpp

diff --git a/compiler-rt/lib/ctx_profile/CMakeLists.txt b/compiler-rt/lib/ctx_profile/CMakeLists.txt
index ce491fc7e8bf0..bb606449c61b1 100644
--- a/compiler-rt/lib/ctx_profile/CMakeLists.txt
+++ b/compiler-rt/lib/ctx_profile/CMakeLists.txt
@@ -2,11 +2,13 @@ add_compiler_rt_component(ctx_profile)
 
 set(CTX_PROFILE_SOURCES
   CtxInstrProfiling.cpp
+  RootAutoDetector.cpp
   )
 
 set(CTX_PROFILE_HEADERS
   CtxInstrContextNode.h
   CtxInstrProfiling.h
+  RootAutoDetector.h
   )
 
 include_directories(..)
diff --git a/compiler-rt/lib/ctx_profile/RootAutoDetector.cpp b/compiler-rt/lib/ctx_profile/RootAutoDetector.cpp
new file mode 100644
index 0000000000000..7daa8f31e16ea
--- /dev/null
+++ b/compiler-rt/lib/ctx_profile/RootAutoDetector.cpp
@@ -0,0 +1,90 @@
+//===- RootAutodetector.cpp - detect contextual profiling roots -----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "RootAutoDetector.h"
+#include "CtxInstrProfiling.h"
+
+#include "sanitizer_common/sanitizer_common.h"
+#include "sanitizer_common/sanitizer_mutex.h"
+#include "sanitizer_common/sanitizer_placement_new.h"
+#include <assert.h>
+#include <dlfcn.h>
+#include <pthread.h>
+
+using namespace __ctx_profile;
+
+uptr PerThreadCallsiteTrie::getFctStartAddr(uptr CallsiteAddress) const {
+  // this requires --linkopt=-Wl,--export-dynamic
+  Dl_info Info;
+  if (dladdr(reinterpret_cast<const void *>(CallsiteAddress), &Info) != 0)
+    return reinterpret_cast<uptr>(Info.dli_saddr);
+  return 0;
+}
+
+void PerThreadCallsiteTrie::insertStack(const StackTrace &ST) {
+  auto *Current = &T;
+  // the stack is backwards - the first callsite is at the top.
+  for (int I = ST.size - 1; I >= 0; --I) {
+    auto ChildAddr = ST.trace[I];
+    auto [Iter, _] = Current->Children.insert({ChildAddr, Trie(ChildAddr)});
+    ++Current->Count;
+    Current = &Iter->second;
+  }
+}
+
+DenseMap<uptr, uint64_t> PerThreadCallsiteTrie::determineRoots() const {
+  // Assuming a message pump design, roots are those functions called by the
+  // message pump. The message pump is an infinite loop (for all practical
+  // considerations) fetching data from a queue. The root functions return -
+  // otherwise the message pump doesn't work. This function detects roots as the
+  // first place in the trie (starting from the root) where a function calls 2
+  // or more functions.
+  //
+  // We start with a callsite trie - the nodes are callsites. Different child
+  // nodes may actually correspond to the same function.
+  //
+  // For example: using function(callsite)
+  // f1(csf1_1) -> f2(csf2_1) -> f3
+  //            -> f2(csf2_2) -> f4
+  //
+  // would be represented in our trie as:
+  // csf1_1 -> csf2_1 -> f3
+  //        -> csf2_2 -> f4
+  //
+  // While we can assert the control flow returns to f2, we don't know if it
+  // ever returns to f1. f2 could be the message pump.
+  //
+  // We need to convert our callsite tree into a function tree. We can also,
+  // more economically, just see how many distinct functions there are at a
+  // certain depth. When that count is greater than 1, we got to potential roots
+  // and everything above should be considered as non-roots.
+  DenseMap<uptr, uint64_t> Result;
+  Set<const Trie *> Worklist;
+  Worklist.insert({&T, {}});
+
+  while (!Worklist.empty()) {
+    Set<const Trie *> NextWorklist;
+    DenseMap<uptr, uint64_t> Candidates;
+    Worklist.forEach([&](auto &KVP) {
+      auto [Node, _] = KVP;
+      auto SA = getFctStartAddr(Node->CallsiteAddress);
+      Candidates[SA] += Node->Count;
+      Node->Children.forEach([&](auto &ChildKVP) {
+        NextWorklist.insert({&ChildKVP.second, {}});
+        return true;
+      });
+      return true;
+    });
+    if (Candidates.size() > 1) {
+      Result.swap(Candidates);
+      break;
+    }
+    Worklist.swap(NextWorklist);
+  }
+  return Result;
+}
diff --git a/compiler-rt/lib/ctx_profile/RootAutoDetector.h b/compiler-rt/lib/ctx_profile/RootAutoDetector.h
new file mode 100644
index 0000000000000..ab51a342d3617
--- /dev/null
+++ b/compiler-rt/lib/ctx_profile/RootAutoDetector.h
@@ -0,0 +1,68 @@
+/*===- RootAutodetector.h- auto-detect roots for ctxprof  -----------------===*\
+|*
+|* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+|* See https://llvm.org/LICENSE.txt for license information.
+|* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+|*
+\*===----------------------------------------------------------------------===*/
+
+#ifndef CTX_PROFILE_ROOTAUTODETECTOR_H_
+#define CTX_PROFILE_ROOTAUTODETECTOR_H_
+
+#include "sanitizer_common/sanitizer_dense_map.h"
+#include "sanitizer_common/sanitizer_internal_defs.h"
+#include "sanitizer_common/sanitizer_stacktrace.h"
+#include <pthread.h>
+#include <sanitizer/common_interface_defs.h>
+
+using namespace __asan;
+using namespace __sanitizer;
+
+namespace __ctx_profile {
+
+template <typename T> using Set = DenseMap<T, bool>;
+
+/// A trie. A node is the address of a callsite in a function activation. A
+/// child is a callsite in the activation made from the callsite corresponding
+/// to the parent.
+class Trie final {
+  friend class PerThreadCallsiteTrie;
+  const uptr CallsiteAddress;
+  uint64_t Count = 0;
+  DenseMap<uptr, Trie> Children;
+
+public:
+  uptr address() const { return CallsiteAddress; }
+  uint64_t count() const { return Count; }
+  const DenseMap<uptr, Trie> &children() const { return Children; }
+
+  Trie(uptr CallsiteAddress) : CallsiteAddress(CallsiteAddress) {}
+};
+
+/// Capture all the stack traces observed for a specific thread. The "for a
+/// specific thread" part is not enforced, but assumed in determineRoots.
+class PerThreadCallsiteTrie {
+  Trie T;
+
+protected:
+  /// Return the runtime start address of the function that contains the call at
+  /// the runtime address CallsiteAddress. May be overriden for easy testing.
+  virtual uptr getFctStartAddr(uptr CallsiteAddress) const;
+
+public:
+  PerThreadCallsiteTrie(const PerThreadCallsiteTrie &) = delete;
+  PerThreadCallsiteTrie(PerThreadCallsiteTrie &&) = default;
+  PerThreadCallsiteTrie() : T(0) {}
+
+  virtual ~PerThreadCallsiteTrie() = default;
+
+  void insertStack(const StackTrace &ST);
+
+  /// Return the runtime address of root functions, as determined for this
+  /// thread, together with the number of samples that included them.
+  DenseMap<uptr, uint64_t> determineRoots() const;
+
+  const Trie &start() const { return T; }
+};
+} // namespace __ctx_profile
+#endif
diff --git a/compiler-rt/lib/ctx_profile/tests/CMakeLists.txt b/compiler-rt/lib/ctx_profile/tests/CMakeLists.txt
index 012fd7aff7862..0954d5cd34487 100644
--- a/compiler-rt/lib/ctx_profile/tests/CMakeLists.txt
+++ b/compiler-rt/lib/ctx_profile/tests/CMakeLists.txt
@@ -22,10 +22,12 @@ append_list_if(COMPILER_RT_HAS_WVARIADIC_MACROS_FLAG -Wno-variadic-macros CTX_PR
 file(GLOB CTX_PROFILE_HEADERS ../*.h)
 
 set(CTX_PROFILE_SOURCES
-  ../CtxInstrProfiling.cpp)
+  ../CtxInstrProfiling.cpp
+  ../RootAutoDetector.cpp)
 
 set(CTX_PROFILE_UNITTESTS
   CtxInstrProfilingTest.cpp
+  RootAutoDetectorTest.cpp
   driver.cpp)
 
 include_directories(../../../include)
diff --git a/compiler-rt/lib/ctx_profile/tests/RootAutoDetectorTest.cpp b/compiler-rt/lib/ctx_profile/tests/RootAutoDetectorTest.cpp
new file mode 100644
index 0000000000000..3e955d33e994e
--- /dev/null
+++ b/compiler-rt/lib/ctx_profile/tests/RootAutoDetectorTest.cpp
@@ -0,0 +1,123 @@
+#include "../RootAutoDetector.h"
+#include "sanitizer_common/sanitizer_array_ref.h"
+#include "gtest/gtest.h"
+
+using namespace __ctx_profile;
+
+class MockCallsiteTree final : public PerThreadCallsiteTrie {
+  // Return the first multiple of 100.
+  uptr getFctStartAddr(uptr CallsiteAddress) const override {
+    return (CallsiteAddress / 100) * 100;
+  }
+};
+
+class Marker {
+  enum class Kind { End, Value, Split };
+  const uptr Value;
+  const Kind K;
+  Marker(uptr V, Kind S) : Value(V), K(S) {}
+
+public:
+  Marker(uptr V) : Marker(V, Kind::Value) {}
+
+  static Marker split(uptr V) { return Marker(V, Kind::Split); }
+  static Marker term() { return Marker(0, Kind::End); }
+
+  bool isSplit() const { return K == Kind::Split; }
+  bool isTerm() const { return K == Kind::End; }
+  bool isVal() const { return K == Kind::Value; }
+
+  bool operator==(const Marker &M) const {
+    return Value == M.Value && K == M.K;
+  }
+};
+
+void popAndCheck(ArrayRef<Marker> &Preorder, Marker M) {
+  ASSERT_FALSE(Preorder.empty());
+  ASSERT_EQ(Preorder[0], M);
+  Preorder = Preorder.drop_front();
+}
+
+void checkSameImpl(const Trie &T, ArrayRef<Marker> &Preorder) {
+  popAndCheck(Preorder, T.address());
+
+  if (T.children().size() == 0) {
+    popAndCheck(Preorder, Marker::term());
+    return;
+  }
+
+  if (T.children().size() > 1)
+    popAndCheck(Preorder, Marker::split(T.children().size()));
+
+  T.children().forEach([&](const auto &KVP) {
+    checkSameImpl(KVP.second, Preorder);
+    return true;
+  });
+}
+
+void checkSame(const PerThreadCallsiteTrie &RCT, ArrayRef<Marker> Preorder) {
+  checkSameImpl(RCT.start(), Preorder);
+  ASSERT_TRUE(Preorder.empty());
+}
+
+TEST(PerThreadCallsiteTrieTest, Insert) {
+  PerThreadCallsiteTrie R;
+  uptr Stack1[]{4, 3, 2, 1};
+  R.insertStack(StackTrace(Stack1, 4));
+  checkSame(R, ArrayRef<Marker>({0, 1, 2, 3, 4, Marker::term()}));
+
+  uptr Stack2[]{5, 4, 3, 2, 1};
+  R.insertStack(StackTrace(Stack2, 5));
+  checkSame(R, ArrayRef<Marker>({0, 1, 2, 3, 4, 5, Marker::term()}));
+
+  uptr Stack3[]{6, 3, 2, 1};
+  R.insertStack(StackTrace(Stack3, 4));
+  checkSame(R, ArrayRef<Marker>({0, 1, 2, 3, Marker::split(2), 4, 5,
+                                 Marker::term(), 6, Marker::term()}));
+
+  uptr Stack4[]{7, 2, 1};
+  R.insertStack(StackTrace(Stack4, 3));
+  checkSame(R, ArrayRef<Marker>({0, 1, 2, Marker::split(2), 7, Marker::term(),
+                                 3, Marker::split(2), 4, 5, Marker::term(), 6,
+                                 Marker::term()}));
+}
+
+TEST(PerThreadCallsiteTrieTest, DetectRoots) {
+  MockCallsiteTree T;
+
+  uptr Stack1[]{501, 302, 202, 102};
+  uptr Stack2[]{601, 402, 203, 102};
+  T.insertStack({Stack1, 4});
+  T.insertStack({Stack2, 4});
+
+  auto R = T.determineRoots();
+  EXPECT_EQ(R.size(), 2U);
+  EXPECT_TRUE(R.contains(300));
+  EXPECT_TRUE(R.contains(400));
+}
+
+TEST(PerThreadCallsiteTrieTest, DetectRootsNoBranches) {
+  MockCallsiteTree T;
+
+  uptr Stack1[]{501, 302, 202, 102};
+  T.insertStack({Stack1, 4});
+
+  auto R = T.determineRoots();
+  EXPECT_EQ(R.size(), 0U);
+}
+
+TEST(PerThreadCallsiteTrieTest, DetectRootsUnknownFct) {
+  MockCallsiteTree T;
+
+  uptr Stack1[]{501, 302, 202, 102};
+  // The MockCallsiteTree address resolver resolves addresses over 100, so 40
+  // will be mapped to 0.
+  uptr Stack2[]{601, 40, 203, 102};
+  T.insertStack({Stack1, 4});
+  T.insertStack({Stack2, 4});
+
+  auto R = T.determineRoots();
+  EXPECT_EQ(R.size(), 2U);
+  EXPECT_TRUE(R.contains(300));
+  EXPECT_TRUE(R.contains(0));
+}



More information about the llvm-commits mailing list