[compiler-rt] 63bb007 - [ctxprof] Auto root detection: trie for stack samples (#133106)

via llvm-commits llvm-commits at lists.llvm.org
Fri Mar 28 20:08:09 PDT 2025


Author: Mircea Trofin
Date: 2025-03-28T20:08:05-07:00
New Revision: 63bb0078f824143f580225ad92464b21186f646e

URL: https://github.com/llvm/llvm-project/commit/63bb0078f824143f580225ad92464b21186f646e
DIFF: https://github.com/llvm/llvm-project/commit/63bb0078f824143f580225ad92464b21186f646e.diff

LOG: [ctxprof] Auto root detection: trie for stack samples (#133106)

An initial patch for supporting automated root detection. The auto-detector is introduced subsequently, but this patch introduces a datastructure for capturing sampled stacks, per thread, in a trie, and inferring from such samples which functions are reasonable roots.

Added: 
    compiler-rt/lib/ctx_profile/RootAutoDetector.cpp
    compiler-rt/lib/ctx_profile/RootAutoDetector.h
    compiler-rt/lib/ctx_profile/tests/RootAutoDetectorTest.cpp

Modified: 
    compiler-rt/lib/ctx_profile/CMakeLists.txt
    compiler-rt/lib/ctx_profile/tests/CMakeLists.txt

Removed: 
    


################################################################################
diff  --git a/compiler-rt/lib/ctx_profile/CMakeLists.txt b/compiler-rt/lib/ctx_profile/CMakeLists.txt
index ce491fc7e8bf0..bb606449c61b1 100644
--- a/compiler-rt/lib/ctx_profile/CMakeLists.txt
+++ b/compiler-rt/lib/ctx_profile/CMakeLists.txt
@@ -2,11 +2,13 @@ add_compiler_rt_component(ctx_profile)
 
 set(CTX_PROFILE_SOURCES
   CtxInstrProfiling.cpp
+  RootAutoDetector.cpp
   )
 
 set(CTX_PROFILE_HEADERS
   CtxInstrContextNode.h
   CtxInstrProfiling.h
+  RootAutoDetector.h
   )
 
 include_directories(..)

diff  --git a/compiler-rt/lib/ctx_profile/RootAutoDetector.cpp b/compiler-rt/lib/ctx_profile/RootAutoDetector.cpp
new file mode 100644
index 0000000000000..483c55c25eefe
--- /dev/null
+++ b/compiler-rt/lib/ctx_profile/RootAutoDetector.cpp
@@ -0,0 +1,90 @@
+//===- RootAutodetector.cpp - detect contextual profiling roots -----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "RootAutoDetector.h"
+
+#include "sanitizer_common/sanitizer_common.h"
+#include "sanitizer_common/sanitizer_placement_new.h" // IWYU pragma: keep (DenseMap)
+#include <assert.h>
+#include <dlfcn.h>
+#include <pthread.h>
+
+using namespace __ctx_profile;
+template <typename T> using Set = DenseMap<T, bool>;
+
+uptr PerThreadCallsiteTrie::getFctStartAddr(uptr CallsiteAddress) const {
+  // this requires --linkopt=-Wl,--export-dynamic
+  Dl_info Info;
+  if (dladdr(reinterpret_cast<const void *>(CallsiteAddress), &Info) != 0)
+    return reinterpret_cast<uptr>(Info.dli_saddr);
+  return 0;
+}
+
+void PerThreadCallsiteTrie::insertStack(const StackTrace &ST) {
+  ++TheTrie.Count;
+  auto *Current = &TheTrie;
+  // the stack is backwards - the first callsite is at the top.
+  for (int I = ST.size - 1; I >= 0; --I) {
+    uptr ChildAddr = ST.trace[I];
+    auto [Iter, _] = Current->Children.insert({ChildAddr, Trie(ChildAddr)});
+    ++Iter->second.Count;
+    Current = &Iter->second;
+  }
+}
+
+DenseMap<uptr, uint64_t> PerThreadCallsiteTrie::determineRoots() const {
+  // Assuming a message pump design, roots are those functions called by the
+  // message pump. The message pump is an infinite loop (for all practical
+  // considerations) fetching data from a queue. The root functions return -
+  // otherwise the message pump doesn't work. This function detects roots as the
+  // first place in the trie (starting from the root) where a function calls 2
+  // or more functions.
+  //
+  // We start with a callsite trie - the nodes are callsites. Different child
+  // nodes may actually correspond to the same function.
+  //
+  // For example: using function(callsite)
+  // f1(csf1_1) -> f2(csf2_1) -> f3
+  //            -> f2(csf2_2) -> f4
+  //
+  // would be represented in our trie as:
+  // csf1_1 -> csf2_1 -> f3
+  //        -> csf2_2 -> f4
+  //
+  // While we can assert the control flow returns to f2, we don't know if it
+  // ever returns to f1. f2 could be the message pump.
+  //
+  // We need to convert our callsite tree into a function tree. We can also,
+  // more economically, just see how many distinct functions there are at a
+  // certain depth. When that count is greater than 1, we got to potential roots
+  // and everything above should be considered as non-roots.
+  DenseMap<uptr, uint64_t> Result;
+  Set<const Trie *> Worklist;
+  Worklist.insert({&TheTrie, {}});
+
+  while (!Worklist.empty()) {
+    Set<const Trie *> NextWorklist;
+    DenseMap<uptr, uint64_t> Candidates;
+    Worklist.forEach([&](const auto &KVP) {
+      auto [Node, _] = KVP;
+      auto SA = getFctStartAddr(Node->CallsiteAddress);
+      Candidates[SA] += Node->Count;
+      Node->Children.forEach([&](auto &ChildKVP) {
+        NextWorklist.insert({&ChildKVP.second, true});
+        return true;
+      });
+      return true;
+    });
+    if (Candidates.size() > 1) {
+      Result.swap(Candidates);
+      break;
+    }
+    Worklist.swap(NextWorklist);
+  }
+  return Result;
+}

diff  --git a/compiler-rt/lib/ctx_profile/RootAutoDetector.h b/compiler-rt/lib/ctx_profile/RootAutoDetector.h
new file mode 100644
index 0000000000000..85dd5ef1c32d9
--- /dev/null
+++ b/compiler-rt/lib/ctx_profile/RootAutoDetector.h
@@ -0,0 +1,57 @@
+/*===- RootAutodetector.h- auto-detect roots for ctxprof  -----------------===*\
+|*
+|* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+|* See https://llvm.org/LICENSE.txt for license information.
+|* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+|*
+\*===----------------------------------------------------------------------===*/
+
+#ifndef CTX_PROFILE_ROOTAUTODETECTOR_H_
+#define CTX_PROFILE_ROOTAUTODETECTOR_H_
+
+#include "sanitizer_common/sanitizer_dense_map.h"
+#include "sanitizer_common/sanitizer_internal_defs.h"
+#include "sanitizer_common/sanitizer_stacktrace.h"
+#include <pthread.h>
+#include <sanitizer/common_interface_defs.h>
+
+using namespace __asan;
+using namespace __sanitizer;
+
+namespace __ctx_profile {
+
+/// Capture all the stack traces observed for a specific thread. The "for a
+/// specific thread" part is not enforced, but assumed in determineRoots.
+class PerThreadCallsiteTrie {
+protected:
+  /// A trie. A node is the address of a callsite in a function activation. A
+  /// child is a callsite in the activation made from the callsite
+  /// corresponding to the parent.
+  struct Trie final {
+    const uptr CallsiteAddress;
+    uint64_t Count = 0;
+    DenseMap<uptr, Trie> Children;
+
+    Trie(uptr CallsiteAddress = 0) : CallsiteAddress(CallsiteAddress) {}
+  };
+  Trie TheTrie;
+
+  /// Return the runtime start address of the function that contains the call at
+  /// the runtime address CallsiteAddress. May be overriden for easy testing.
+  virtual uptr getFctStartAddr(uptr CallsiteAddress) const;
+
+public:
+  PerThreadCallsiteTrie(const PerThreadCallsiteTrie &) = delete;
+  PerThreadCallsiteTrie(PerThreadCallsiteTrie &&) = default;
+  PerThreadCallsiteTrie() = default;
+
+  virtual ~PerThreadCallsiteTrie() = default;
+
+  void insertStack(const StackTrace &ST);
+
+  /// Return the runtime address of root functions, as determined for this
+  /// thread, together with the number of samples that included them.
+  DenseMap<uptr, uint64_t> determineRoots() const;
+};
+} // namespace __ctx_profile
+#endif

diff  --git a/compiler-rt/lib/ctx_profile/tests/CMakeLists.txt b/compiler-rt/lib/ctx_profile/tests/CMakeLists.txt
index 012fd7aff7862..0954d5cd34487 100644
--- a/compiler-rt/lib/ctx_profile/tests/CMakeLists.txt
+++ b/compiler-rt/lib/ctx_profile/tests/CMakeLists.txt
@@ -22,10 +22,12 @@ append_list_if(COMPILER_RT_HAS_WVARIADIC_MACROS_FLAG -Wno-variadic-macros CTX_PR
 file(GLOB CTX_PROFILE_HEADERS ../*.h)
 
 set(CTX_PROFILE_SOURCES
-  ../CtxInstrProfiling.cpp)
+  ../CtxInstrProfiling.cpp
+  ../RootAutoDetector.cpp)
 
 set(CTX_PROFILE_UNITTESTS
   CtxInstrProfilingTest.cpp
+  RootAutoDetectorTest.cpp
   driver.cpp)
 
 include_directories(../../../include)

diff  --git a/compiler-rt/lib/ctx_profile/tests/RootAutoDetectorTest.cpp b/compiler-rt/lib/ctx_profile/tests/RootAutoDetectorTest.cpp
new file mode 100644
index 0000000000000..8fd5bf004faf7
--- /dev/null
+++ b/compiler-rt/lib/ctx_profile/tests/RootAutoDetectorTest.cpp
@@ -0,0 +1,155 @@
+#include "../RootAutoDetector.h"
+#include "sanitizer_common/sanitizer_array_ref.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+
+using namespace __ctx_profile;
+using ::testing::IsEmpty;
+using ::testing::Not;
+using ::testing::SizeIs;
+
+// Utility for describing a preorder traversal. By default it captures the
+// address and count at a callsite node. Implicitly nodes are expected to have 1
+// child. If they have none, we place a Marker::term and if they have more than
+// one, we place a Marker::split(nr_of_children) For example, using a list
+// notation, and letters to denote a pair of address and count:
+// (A (B C) (D (E F))) is a list of markers: A, split(2), B, term, C,
+// term, D, split(2), E, term, F, term
+class Marker {
+  enum class Kind { End, Value, Split };
+  const uptr Value;
+  const uptr Count;
+  const Kind K;
+  Marker(uptr V, uptr C, Kind S) : Value(V), Count(C), K(S) {}
+
+public:
+  Marker(uptr V, uptr C) : Marker(V, C, Kind::Value) {}
+
+  static Marker split(uptr V) { return Marker(V, 0, Kind::Split); }
+  static Marker term() { return Marker(0, 0, Kind::End); }
+
+  bool isSplit() const { return K == Kind::Split; }
+  bool isTerm() const { return K == Kind::End; }
+  bool isVal() const { return K == Kind::Value; }
+
+  bool operator==(const Marker &M) const {
+    return Value == M.Value && Count == M.Count && K == M.K;
+  }
+};
+
+class MockCallsiteTrie final : public PerThreadCallsiteTrie {
+  // Return the first multiple of 100.
+  uptr getFctStartAddr(uptr CallsiteAddress) const override {
+    return (CallsiteAddress / 100) * 100;
+  }
+
+  static void popAndCheck(ArrayRef<Marker> &Preorder, Marker M) {
+    ASSERT_THAT(Preorder, Not(IsEmpty()));
+    ASSERT_EQ(Preorder[0], M);
+    Preorder = Preorder.drop_front();
+  }
+
+  static void checkSameImpl(const Trie &T, ArrayRef<Marker> &Preorder) {
+    popAndCheck(Preorder, {T.CallsiteAddress, T.Count});
+
+    if (T.Children.empty()) {
+      popAndCheck(Preorder, Marker::term());
+      return;
+    }
+
+    if (T.Children.size() > 1)
+      popAndCheck(Preorder, Marker::split(T.Children.size()));
+
+    T.Children.forEach([&](const auto &KVP) {
+      checkSameImpl(KVP.second, Preorder);
+      return true;
+    });
+  }
+
+public:
+  void checkSame(ArrayRef<Marker> Preorder) const {
+    checkSameImpl(TheTrie, Preorder);
+    ASSERT_THAT(Preorder, IsEmpty());
+  }
+};
+
+TEST(PerThreadCallsiteTrieTest, Insert) {
+  MockCallsiteTrie R;
+  uptr Stack1[]{4, 3, 2, 1};
+  R.insertStack(StackTrace(Stack1, 4));
+  R.checkSame(ArrayRef<Marker>(
+      {{0, 1}, {1, 1}, {2, 1}, {3, 1}, {4, 1}, Marker::term()}));
+
+  uptr Stack2[]{5, 4, 3, 2, 1};
+  R.insertStack(StackTrace(Stack2, 5));
+  R.checkSame(ArrayRef<Marker>(
+      {{0, 2}, {1, 2}, {2, 2}, {3, 2}, {4, 2}, {5, 1}, Marker::term()}));
+
+  uptr Stack3[]{6, 3, 2, 1};
+  R.insertStack(StackTrace(Stack3, 4));
+  R.checkSame(ArrayRef<Marker>({{0, 3},
+                                {1, 3},
+                                {2, 3},
+                                {3, 3},
+                                Marker::split(2),
+                                {4, 2},
+                                {5, 1},
+                                Marker::term(),
+                                {6, 1},
+                                Marker::term()}));
+  uptr Stack4[]{7, 2, 1};
+  R.insertStack(StackTrace(Stack4, 3));
+  R.checkSame(ArrayRef<Marker>({{0, 4},
+                                {1, 4},
+                                {2, 4},
+                                Marker::split(2),
+                                {7, 1},
+                                Marker::term(),
+                                {3, 3},
+                                Marker::split(2),
+                                {4, 2},
+                                {5, 1},
+                                Marker::term(),
+                                {6, 1},
+                                Marker::term()}));
+}
+
+TEST(PerThreadCallsiteTrieTest, DetectRoots) {
+  MockCallsiteTrie T;
+
+  uptr Stack1[]{501, 302, 202, 102};
+  uptr Stack2[]{601, 402, 203, 102};
+  T.insertStack({Stack1, 4});
+  T.insertStack({Stack2, 4});
+
+  auto R = T.determineRoots();
+  EXPECT_THAT(R, SizeIs(2U));
+  EXPECT_TRUE(R.contains(300));
+  EXPECT_TRUE(R.contains(400));
+}
+
+TEST(PerThreadCallsiteTrieTest, DetectRootsNoBranches) {
+  MockCallsiteTrie T;
+
+  uptr Stack1[]{501, 302, 202, 102};
+  T.insertStack({Stack1, 4});
+
+  auto R = T.determineRoots();
+  EXPECT_THAT(R, IsEmpty());
+}
+
+TEST(PerThreadCallsiteTrieTest, DetectRootsUnknownFct) {
+  MockCallsiteTrie T;
+
+  uptr Stack1[]{501, 302, 202, 102};
+  // The MockCallsiteTree address resolver resolves addresses over 100, so 40
+  // will be mapped to 0.
+  uptr Stack2[]{601, 40, 203, 102};
+  T.insertStack({Stack1, 4});
+  T.insertStack({Stack2, 4});
+
+  auto R = T.determineRoots();
+  ASSERT_THAT(R, SizeIs(2U));
+  EXPECT_TRUE(R.contains(300));
+  EXPECT_TRUE(R.contains(0));
+}


        


More information about the llvm-commits mailing list