[llvm] 0668bb2 - [ctxprof] Track unhandled call targets (#131417)

via llvm-commits llvm-commits at lists.llvm.org
Wed Mar 19 13:51:26 PDT 2025


Author: Mircea Trofin
Date: 2025-03-19T13:51:22-07:00
New Revision: 0668bb28cc98202f1ae252fb0357b2f1043bdbe7

URL: https://github.com/llvm/llvm-project/commit/0668bb28cc98202f1ae252fb0357b2f1043bdbe7
DIFF: https://github.com/llvm/llvm-project/commit/0668bb28cc98202f1ae252fb0357b2f1043bdbe7.diff

LOG: [ctxprof] Track unhandled call targets (#131417)

Collect profiles for functions we encounter when collecting a contextual profile, that are not associated with a call site. This is expected to happen for signal handlers, but it also - problematically - currently happens for mem{memset|copy|move|set}, which are currently inserted after profile instrumentation.

Collecting a "regular" flat profile in these cases would hide the problem - that we loose better profile opportunities.

Added: 
    llvm/test/tools/llvm-ctxprof-util/Inputs/valid-with-unhandled.yaml

Modified: 
    compiler-rt/lib/ctx_profile/CtxInstrContextNode.h
    compiler-rt/lib/ctx_profile/CtxInstrProfiling.cpp
    compiler-rt/lib/ctx_profile/CtxInstrProfiling.h
    compiler-rt/lib/ctx_profile/tests/CtxInstrProfilingTest.cpp
    compiler-rt/test/ctx_profile/TestCases/generate-context.cpp
    llvm/include/llvm/ProfileData/CtxInstrContextNode.h
    llvm/include/llvm/ProfileData/PGOCtxProfReader.h
    llvm/include/llvm/ProfileData/PGOCtxProfWriter.h
    llvm/lib/ProfileData/PGOCtxProfReader.cpp
    llvm/lib/ProfileData/PGOCtxProfWriter.cpp
    llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
    llvm/test/tools/llvm-ctxprof-util/llvm-ctxprof-util.test
    llvm/unittests/ProfileData/PGOCtxProfReaderWriterTest.cpp

Removed: 
    


################################################################################
diff  --git a/compiler-rt/lib/ctx_profile/CtxInstrContextNode.h b/compiler-rt/lib/ctx_profile/CtxInstrContextNode.h
index 55962df57fb58..a176662b5cb3d 100644
--- a/compiler-rt/lib/ctx_profile/CtxInstrContextNode.h
+++ b/compiler-rt/lib/ctx_profile/CtxInstrContextNode.h
@@ -121,6 +121,7 @@ class ProfileWriter {
 public:
   virtual void startContextSection() = 0;
   virtual void writeContextual(const ctx_profile::ContextNode &RootNode,
+                               const ctx_profile::ContextNode *Unhandled,
                                uint64_t TotalRootEntryCount) = 0;
   virtual void endContextSection() = 0;
 

diff  --git a/compiler-rt/lib/ctx_profile/CtxInstrProfiling.cpp b/compiler-rt/lib/ctx_profile/CtxInstrProfiling.cpp
index be60d04d6051d..b0e63a8861d86 100644
--- a/compiler-rt/lib/ctx_profile/CtxInstrProfiling.cpp
+++ b/compiler-rt/lib/ctx_profile/CtxInstrProfiling.cpp
@@ -246,22 +246,37 @@ ContextNode *getFlatProfile(FunctionData &Data, GUID Guid,
 
 ContextNode *getUnhandledContext(FunctionData &Data, GUID Guid,
                                  uint32_t NumCounters) {
-  // 1) if we are under a root (regardless if this thread is collecting or not a
+
+  // 1) if we are currently collecting a contextual profile, fetch a ContextNode
+  // in the `Unhandled` set. We want to do this regardless of `ProfilingStarted`
+  // to (hopefully) offset the penalty of creating these contexts to before
+  // profiling.
+  //
+  // 2) if we are under a root (regardless if this thread is collecting or not a
   // contextual profile for that root), do not collect a flat profile. We want
   // to keep flat profiles only for activations that can't happen under a root,
   // to avoid confusing profiles. We can, for example, combine flattened and
   // flat profiles meaningfully, as we wouldn't double-count anything.
   //
-  // 2) to avoid lengthy startup, don't bother with flat profiles until the
-  // profiling started. We would reset them anyway when profiling starts.
+  // 3) to avoid lengthy startup, don't bother with flat profiles until the
+  // profiling has started. We would reset them anyway when profiling starts.
   // HOWEVER. This does lose profiling for message pumps: those functions are
   // entered once and never exit. They should be assumed to be entered before
   // profiling starts - because profiling should start after the server is up
   // and running (which is equivalent to "message pumps are set up").
-  if (IsUnderContext || !__sanitizer::atomic_load_relaxed(&ProfilingStarted))
-    return TheScratchContext;
-  return markAsScratch(
-      onContextEnter(*getFlatProfile(Data, Guid, NumCounters)));
+  ContextRoot *R = __llvm_ctx_profile_current_context_root;
+  if (!R) {
+    if (IsUnderContext || !__sanitizer::atomic_load_relaxed(&ProfilingStarted))
+      return TheScratchContext;
+    else
+      return markAsScratch(
+          onContextEnter(*getFlatProfile(Data, Guid, NumCounters)));
+  }
+  auto [Iter, Ins] = R->Unhandled.insert({Guid, nullptr});
+  if (Ins)
+    Iter->second =
+        getCallsiteSlow(Guid, &R->FirstUnhandledCalleeNode, NumCounters, 0);
+  return markAsScratch(onContextEnter(*Iter->second));
 }
 
 ContextNode *__llvm_ctx_profile_get_context(FunctionData *Data, void *Callee,
@@ -396,6 +411,8 @@ void __llvm_ctx_profile_start_collection() {
       ++NumMemUnits;
 
     resetContextNode(*Root->FirstNode);
+    if (Root->FirstUnhandledCalleeNode)
+      resetContextNode(*Root->FirstUnhandledCalleeNode);
     __sanitizer::atomic_store_relaxed(&Root->TotalEntries, 0);
   }
   __sanitizer::atomic_store_relaxed(&ProfilingStarted, true);
@@ -416,8 +433,9 @@ bool __llvm_ctx_profile_fetch(ProfileWriter &Writer) {
       __sanitizer::Printf("[ctxprof] Contextual Profile is %s\n", "invalid");
       return false;
     }
-    Writer.writeContextual(*Root->FirstNode, __sanitizer::atomic_load_relaxed(
-                                                 &Root->TotalEntries));
+    Writer.writeContextual(
+        *Root->FirstNode, Root->FirstUnhandledCalleeNode,
+        __sanitizer::atomic_load_relaxed(&Root->TotalEntries));
   }
   Writer.endContextSection();
   Writer.startFlatSection();

diff  --git a/compiler-rt/lib/ctx_profile/CtxInstrProfiling.h b/compiler-rt/lib/ctx_profile/CtxInstrProfiling.h
index cdbfa571247b7..1e151804ea5b1 100644
--- a/compiler-rt/lib/ctx_profile/CtxInstrProfiling.h
+++ b/compiler-rt/lib/ctx_profile/CtxInstrProfiling.h
@@ -10,6 +10,7 @@
 #define CTX_PROFILE_CTXINSTRPROFILING_H_
 
 #include "CtxInstrContextNode.h"
+#include "sanitizer_common/sanitizer_dense_map.h"
 #include "sanitizer_common/sanitizer_mutex.h"
 #include <sanitizer/common_interface_defs.h>
 
@@ -83,6 +84,20 @@ struct ContextRoot {
   // Count the number of entries - regardless if we could take the `Taken` mutex
   ::__sanitizer::atomic_uint64_t TotalEntries = {};
 
+  // Profiles for functions we encounter when collecting a contexutal profile,
+  // that are not associated with a callsite. This is expected to happen for
+  // signal handlers, but it also - problematically - currently happens for
+  // call sites generated after profile instrumentation, primarily
+  // mem{memset|copy|move|set}.
+  // `Unhandled` serves 2 purposes:
+  //   1. identifying such cases (like the memops)
+  //   2. collecting a profile for them, which can be at least used as a flat
+  //   profile
+  ::__sanitizer::DenseMap<GUID, ContextNode *> Unhandled;
+  // Keep the unhandled contexts in a list, as we allocate them, as it makes it
+  // simpler to send to the writer when the profile is fetched.
+  ContextNode *FirstUnhandledCalleeNode = nullptr;
+
   // Taken is used to ensure only one thread traverses the contextual graph -
   // either to read it or to write it. On server side, the same entrypoint will
   // be entered by numerous threads, but over time, the profile aggregated by

diff  --git a/compiler-rt/lib/ctx_profile/tests/CtxInstrProfilingTest.cpp b/compiler-rt/lib/ctx_profile/tests/CtxInstrProfilingTest.cpp
index ccb8f0e87fcdd..83756fed0d6e6 100644
--- a/compiler-rt/lib/ctx_profile/tests/CtxInstrProfilingTest.cpp
+++ b/compiler-rt/lib/ctx_profile/tests/CtxInstrProfilingTest.cpp
@@ -240,7 +240,7 @@ TEST_F(ContextTest, Dump) {
     TestProfileWriter(ContextRoot *Root, size_t Entries)
         : Root(Root), Entries(Entries) {}
 
-    void writeContextual(const ContextNode &Node,
+    void writeContextual(const ContextNode &Node, const ContextNode *Unhandled,
                          uint64_t TotalRootEntryCount) override {
       EXPECT_EQ(TotalRootEntryCount, Entries);
       EXPECT_EQ(EnteredSectionCount, 1);

diff  --git a/compiler-rt/test/ctx_profile/TestCases/generate-context.cpp b/compiler-rt/test/ctx_profile/TestCases/generate-context.cpp
index 319f17debe48f..3dc53637a35d8 100644
--- a/compiler-rt/test/ctx_profile/TestCases/generate-context.cpp
+++ b/compiler-rt/test/ctx_profile/TestCases/generate-context.cpp
@@ -5,7 +5,8 @@
 // RUN: cp %llvm_src/include/llvm/ProfileData/CtxInstrContextNode.h %t_include/
 //
 // Compile with ctx instrumentation "on". We treat "theRoot" as callgraph root.
-// RUN: %clangxx %s %ctxprofilelib -I%t_include -O2 -o %t.bin -mllvm -profile-context-root=theRoot
+// RUN: %clangxx %s %ctxprofilelib -I%t_include -O2 -o %t.bin -mllvm -profile-context-root=theRoot \
+// RUN:   -mllvm -ctx-prof-skip-callsite-instr=skip_me
 //
 // Run the binary, and observe the profile fetch handler's output.
 // RUN: %t.bin | FileCheck %s
@@ -20,11 +21,14 @@ extern "C" bool __llvm_ctx_profile_fetch(ProfileWriter &);
 
 // avoid name mangling
 extern "C" {
+__attribute__((noinline)) void skip_me() {}
+
 __attribute__((noinline)) void someFunction(int I) {
   if (I % 2)
     printf("check odd\n");
   else
     printf("check even\n");
+  skip_me();
 }
 
 // block inlining because the pre-inliner otherwise will inline this - it's
@@ -36,6 +40,7 @@ __attribute__((noinline)) void theRoot() {
   for (auto I = 0; I < 2; ++I) {
     someFunction(I);
   }
+  skip_me();
 }
 
 __attribute__((noinline)) void flatFct() {
@@ -85,9 +90,13 @@ class TestProfileWriter : public ProfileWriter {
   }
 
   void writeContextual(const ContextNode &RootNode,
+                       const ContextNode *Unhandled,
                        uint64_t EntryCount) override {
     std::cout << "Entering Root " << RootNode.guid()
               << " with total entry count " << EntryCount << std::endl;
+    for (const auto *P = Unhandled; P; P = P->next())
+      std::cout << "Unhandled GUID: " << P->guid() << " entered "
+                << P->entrycount() << " times" << std::endl;
     printProfile(RootNode, "", "");
   }
 
@@ -119,6 +128,8 @@ class TestProfileWriter : public ProfileWriter {
 // branches would be taken once, so the second counter is 1.
 // CHECK-NEXT: Entered Context Section
 // CHECK-NEXT: Entering Root 8657661246551306189 with total entry count 1
+// skip_me is entered 4 times: 3 via `someFunction`, and once from `theRoot`
+// CHECK-NEXT: Unhandled GUID: 17928815489886282963 entered 4 times
 // CHECK-NEXT: Guid: 8657661246551306189
 // CHECK-NEXT: Entries: 1
 // CHECK-NEXT: 2 counters and 3 callsites
@@ -135,6 +146,8 @@ class TestProfileWriter : public ProfileWriter {
 // CHECK-NEXT:   Counter values: 2 1
 // CHECK-NEXT: Exited Context Section
 // CHECK-NEXT: Entered Flat Section
+// This is `skip_me`. Entered 3 times via `someFunction`
+// CHECK-NEXT: Flat: 17928815489886282963 3
 // CHECK-NEXT: Flat: 6759619411192316602 3,1
 // This is flatFct (guid: 14569438697463215220)
 // CHECK-NEXT: Flat: 14569438697463215220 1,2

diff  --git a/llvm/include/llvm/ProfileData/CtxInstrContextNode.h b/llvm/include/llvm/ProfileData/CtxInstrContextNode.h
index 55962df57fb58..a176662b5cb3d 100644
--- a/llvm/include/llvm/ProfileData/CtxInstrContextNode.h
+++ b/llvm/include/llvm/ProfileData/CtxInstrContextNode.h
@@ -121,6 +121,7 @@ class ProfileWriter {
 public:
   virtual void startContextSection() = 0;
   virtual void writeContextual(const ctx_profile::ContextNode &RootNode,
+                               const ctx_profile::ContextNode *Unhandled,
                                uint64_t TotalRootEntryCount) = 0;
   virtual void endContextSection() = 0;
 

diff  --git a/llvm/include/llvm/ProfileData/PGOCtxProfReader.h b/llvm/include/llvm/ProfileData/PGOCtxProfReader.h
index 0b871e40f2847..5b59a0995045d 100644
--- a/llvm/include/llvm/ProfileData/PGOCtxProfReader.h
+++ b/llvm/include/llvm/ProfileData/PGOCtxProfReader.h
@@ -74,6 +74,12 @@ class IndexNode {
 };
 } // namespace internal
 
+// Setting initial capacity to 1 because all contexts must have at least 1
+// counter, and then, because all contexts belonging to a function have the same
+// size, there'll be at most one other heap allocation.
+using CtxProfFlatProfile =
+    std::map<GlobalValue::GUID, SmallVector<uint64_t, 1>>;
+
 /// A node (context) in the loaded contextual profile, suitable for mutation
 /// during IPO passes. We generally expect a fraction of counters and
 /// callsites to be populated. We continue to model counters as vectors, but
@@ -93,11 +99,16 @@ class PGOCtxProfContext final : public internal::IndexNode {
   GlobalValue::GUID GUID = 0;
   SmallVector<uint64_t, 16> Counters;
   const std::optional<uint64_t> RootEntryCount{};
+  std::optional<CtxProfFlatProfile> Unhandled{};
   CallsiteMapTy Callsites;
 
-  PGOCtxProfContext(GlobalValue::GUID G, SmallVectorImpl<uint64_t> &&Counters,
-                    std::optional<uint64_t> RootEntryCount = std::nullopt)
-      : GUID(G), Counters(std::move(Counters)), RootEntryCount(RootEntryCount) {
+  PGOCtxProfContext(
+      GlobalValue::GUID G, SmallVectorImpl<uint64_t> &&Counters,
+      std::optional<uint64_t> RootEntryCount = std::nullopt,
+      std::optional<CtxProfFlatProfile> &&Unhandled = std::nullopt)
+      : GUID(G), Counters(std::move(Counters)), RootEntryCount(RootEntryCount),
+        Unhandled(std::move(Unhandled)) {
+    assert(RootEntryCount.has_value() == Unhandled.has_value());
   }
 
   Expected<PGOCtxProfContext &>
@@ -121,6 +132,8 @@ class PGOCtxProfContext final : public internal::IndexNode {
   bool isRoot() const { return RootEntryCount.has_value(); }
   uint64_t getTotalRootEntryCount() const { return RootEntryCount.value(); }
 
+  const CtxProfFlatProfile &getUnhandled() const { return Unhandled.value(); }
+
   uint64_t getEntrycount() const {
     assert(!Counters.empty() &&
            "Functions are expected to have at their entry BB instrumented, so "
@@ -170,12 +183,6 @@ class PGOCtxProfContext final : public internal::IndexNode {
   }
 };
 
-// Setting initial capacity to 1 because all contexts must have at least 1
-// counter, and then, because all contexts belonging to a function have the same
-// size, there'll be at most one other heap allocation.
-using CtxProfFlatProfile =
-    std::map<GlobalValue::GUID, SmallVector<uint64_t, 1>>;
-
 using CtxProfContextualProfiles =
     std::map<GlobalValue::GUID, PGOCtxProfContext>;
 struct PGOCtxProfile {
@@ -205,6 +212,7 @@ class PGOCtxProfileReader final {
 
   Error loadContexts(CtxProfContextualProfiles &P);
   Error loadFlatProfiles(CtxProfFlatProfile &P);
+  Error loadFlatProfileList(CtxProfFlatProfile &P);
 
 public:
   PGOCtxProfileReader(StringRef Buffer)

diff  --git a/llvm/include/llvm/ProfileData/PGOCtxProfWriter.h b/llvm/include/llvm/ProfileData/PGOCtxProfWriter.h
index b2bb8fea10cfe..36ad5622a8544 100644
--- a/llvm/include/llvm/ProfileData/PGOCtxProfWriter.h
+++ b/llvm/include/llvm/ProfileData/PGOCtxProfWriter.h
@@ -36,7 +36,8 @@ enum PGOCtxProfileBlockIDs {
   ContextNodeBlockID = ContextRootBlockID + 1,
   FlatProfilesSectionBlockID = ContextNodeBlockID + 1,
   FlatProfileBlockID = FlatProfilesSectionBlockID + 1,
-  LAST_VALID = FlatProfileBlockID
+  UnhandledBlockID = FlatProfileBlockID + 1,
+  LAST_VALID = UnhandledBlockID
 };
 
 /// Write one or more ContextNodes to the provided raw_fd_stream.
@@ -94,6 +95,7 @@ class PGOCtxProfileWriter final : public ctx_profile::ProfileWriter {
 
   void startContextSection() override;
   void writeContextual(const ctx_profile::ContextNode &RootNode,
+                       const ctx_profile::ContextNode *Unhandled,
                        uint64_t TotalRootEntryCount) override;
   void endContextSection() override;
 
@@ -104,7 +106,7 @@ class PGOCtxProfileWriter final : public ctx_profile::ProfileWriter {
 
   // constants used in writing which a reader may find useful.
   static constexpr unsigned CodeLen = 2;
-  static constexpr uint32_t CurrentVersion = 3;
+  static constexpr uint32_t CurrentVersion = 4;
   static constexpr unsigned VBREncodingBits = 6;
   static constexpr StringRef ContainerMagic = "CTXP";
 };

diff  --git a/llvm/lib/ProfileData/PGOCtxProfReader.cpp b/llvm/lib/ProfileData/PGOCtxProfReader.cpp
index f53f2956a7b7e..ec801d43c8588 100644
--- a/llvm/lib/ProfileData/PGOCtxProfReader.cpp
+++ b/llvm/lib/ProfileData/PGOCtxProfReader.cpp
@@ -97,7 +97,7 @@ PGOCtxProfileReader::readProfile(PGOCtxProfileBlockIDs Kind) {
   std::optional<SmallVector<uint64_t, 16>> Counters;
   std::optional<uint32_t> CallsiteIndex;
   std::optional<uint64_t> TotalEntryCount;
-
+  std::optional<CtxProfFlatProfile> Unhandled;
   SmallVector<uint64_t, 1> RecordValues;
 
   const bool ExpectIndex = Kind == PGOCtxProfileBlockIDs::ContextNodeBlockID;
@@ -108,14 +108,24 @@ PGOCtxProfileReader::readProfile(PGOCtxProfileBlockIDs Kind) {
   auto GotAllWeNeed = [&]() {
     return Guid.has_value() && Counters.has_value() &&
            (!ExpectIndex || CallsiteIndex.has_value()) &&
-           (!IsRoot || TotalEntryCount.has_value());
+           (!IsRoot || TotalEntryCount.has_value()) &&
+           (!IsRoot || Unhandled.has_value());
   };
+
   while (!GotAllWeNeed()) {
     RecordValues.clear();
     EXPECT_OR_RET(Entry, advance());
-    if (Entry->Kind != BitstreamEntry::Record)
+    if (Entry->Kind != BitstreamEntry::Record) {
+      if (IsRoot && Entry->Kind == BitstreamEntry::SubBlock &&
+          Entry->ID == PGOCtxProfileBlockIDs::UnhandledBlockID) {
+        RET_ON_ERR(enterBlockWithID(PGOCtxProfileBlockIDs::UnhandledBlockID));
+        Unhandled = CtxProfFlatProfile();
+        RET_ON_ERR(loadFlatProfileList(*Unhandled));
+        continue;
+      }
       return wrongValue(
           "Expected records before encountering more subcontexts");
+    }
     EXPECT_OR_RET(ReadRecord,
                   Cursor.readRecord(bitc::UNABBREV_RECORD, RecordValues));
     switch (*ReadRecord) {
@@ -152,7 +162,8 @@ PGOCtxProfileReader::readProfile(PGOCtxProfileBlockIDs Kind) {
     }
   }
 
-  PGOCtxProfContext Ret(*Guid, std::move(*Counters), TotalEntryCount);
+  PGOCtxProfContext Ret(*Guid, std::move(*Counters), TotalEntryCount,
+                        std::move(Unhandled));
 
   while (canEnterBlockWithID(PGOCtxProfileBlockIDs::ContextNodeBlockID)) {
     EXPECT_OR_RET(SC, readProfile(PGOCtxProfileBlockIDs::ContextNodeBlockID));
@@ -212,9 +223,7 @@ Error PGOCtxProfileReader::loadContexts(CtxProfContextualProfiles &P) {
   return Error::success();
 }
 
-Error PGOCtxProfileReader::loadFlatProfiles(CtxProfFlatProfile &P) {
-  RET_ON_ERR(
-      enterBlockWithID(PGOCtxProfileBlockIDs::FlatProfilesSectionBlockID));
+Error PGOCtxProfileReader::loadFlatProfileList(CtxProfFlatProfile &P) {
   while (canEnterBlockWithID(PGOCtxProfileBlockIDs::FlatProfileBlockID)) {
     EXPECT_OR_RET(E, readProfile(PGOCtxProfileBlockIDs::FlatProfileBlockID));
     auto Guid = E->second.guid();
@@ -224,6 +233,12 @@ Error PGOCtxProfileReader::loadFlatProfiles(CtxProfFlatProfile &P) {
   return Error::success();
 }
 
+Error PGOCtxProfileReader::loadFlatProfiles(CtxProfFlatProfile &P) {
+  RET_ON_ERR(
+      enterBlockWithID(PGOCtxProfileBlockIDs::FlatProfilesSectionBlockID));
+  return loadFlatProfileList(P);
+}
+
 Expected<PGOCtxProfile> PGOCtxProfileReader::loadProfiles() {
   RET_ON_ERR(readMetadata());
   PGOCtxProfile Ret;
@@ -287,10 +302,13 @@ void toYaml(yaml::Output &Out,
   Out.endSequence();
 }
 
+void toYaml(yaml::Output &Out, const CtxProfFlatProfile &Flat);
+
 void toYaml(yaml::Output &Out, GlobalValue::GUID Guid,
             const SmallVectorImpl<uint64_t> &Counters,
             const PGOCtxProfContext::CallsiteMapTy &Callsites,
-            std::optional<uint64_t> TotalRootEntryCount = std::nullopt) {
+            std::optional<uint64_t> TotalRootEntryCount = std::nullopt,
+            CtxProfFlatProfile Unhandled = {}) {
   yaml::EmptyContext Empty;
   Out.beginMapping();
   void *SaveInfo = nullptr;
@@ -318,6 +336,14 @@ void toYaml(yaml::Output &Out, GlobalValue::GUID Guid,
     Out.endFlowSequence();
     Out.postflightKey(nullptr);
   }
+
+  if (!Unhandled.empty()) {
+    assert(TotalRootEntryCount.has_value());
+    Out.preflightKey("Unhandled", false, false, UseDefault, SaveInfo);
+    toYaml(Out, Unhandled);
+    Out.postflightKey(nullptr);
+  }
+
   if (!Callsites.empty()) {
     Out.preflightKey("Callsites", true, false, UseDefault, SaveInfo);
     toYaml(Out, Callsites);
@@ -326,10 +352,22 @@ void toYaml(yaml::Output &Out, GlobalValue::GUID Guid,
   Out.endMapping();
 }
 
+void toYaml(yaml::Output &Out, const CtxProfFlatProfile &Flat) {
+  void *SaveInfo = nullptr;
+  Out.beginSequence();
+  size_t ElemID = 0;
+  for (const auto &[Guid, Counters] : Flat) {
+    Out.preflightElement(ElemID++, SaveInfo);
+    toYaml(Out, Guid, Counters, {});
+    Out.postflightElement(nullptr);
+  }
+  Out.endSequence();
+}
+
 void toYaml(yaml::Output &Out, const PGOCtxProfContext &Ctx) {
   if (Ctx.isRoot())
     toYaml(Out, Ctx.guid(), Ctx.counters(), Ctx.callsites(),
-           Ctx.getTotalRootEntryCount());
+           Ctx.getTotalRootEntryCount(), Ctx.getUnhandled());
   else
     toYaml(Out, Ctx.guid(), Ctx.counters(), Ctx.callsites());
 }
@@ -348,14 +386,7 @@ void llvm::convertCtxProfToYaml(raw_ostream &OS, const PGOCtxProfile &Profile) {
   }
   if (!Profile.FlatProfiles.empty()) {
     Out.preflightKey("FlatProfiles", false, false, UseDefault, SaveInfo);
-    Out.beginSequence();
-    size_t ElemID = 0;
-    for (const auto &[Guid, Counters] : Profile.FlatProfiles) {
-      Out.preflightElement(ElemID++, SaveInfo);
-      toYaml(Out, Guid, Counters, {});
-      Out.postflightElement(nullptr);
-    }
-    Out.endSequence();
+    toYaml(Out, Profile.FlatProfiles);
     Out.postflightKey(nullptr);
   }
   Out.endMapping();

diff  --git a/llvm/lib/ProfileData/PGOCtxProfWriter.cpp b/llvm/lib/ProfileData/PGOCtxProfWriter.cpp
index 910842647591d..0fdf90e5554b0 100644
--- a/llvm/lib/ProfileData/PGOCtxProfWriter.cpp
+++ b/llvm/lib/ProfileData/PGOCtxProfWriter.cpp
@@ -58,6 +58,7 @@ PGOCtxProfileWriter::PGOCtxProfileWriter(
     DescribeRecord(PGOCtxProfileRecords::TotalRootEntryCount,
                    "TotalRootEntryCount");
     DescribeRecord(PGOCtxProfileRecords::Counters, "Counters");
+    DescribeBlock(PGOCtxProfileBlockIDs::UnhandledBlockID, "Unhandled");
     DescribeBlock(PGOCtxProfileBlockIDs::ContextNodeBlockID, "Context");
     DescribeRecord(PGOCtxProfileRecords::Guid, "GUID");
     DescribeRecord(PGOCtxProfileRecords::CallsiteIndex, "CalleeIndex");
@@ -135,6 +136,7 @@ void PGOCtxProfileWriter::endContextSection() { Writer.ExitBlock(); }
 void PGOCtxProfileWriter::endFlatSection() { Writer.ExitBlock(); }
 
 void PGOCtxProfileWriter::writeContextual(const ContextNode &RootNode,
+                                          const ContextNode *Unhandled,
                                           uint64_t TotalRootEntryCount) {
   if (!IncludeEmpty && (!TotalRootEntryCount || (RootNode.counters_size() > 0 &&
                                                  RootNode.entrycount() == 0)))
@@ -143,6 +145,12 @@ void PGOCtxProfileWriter::writeContextual(const ContextNode &RootNode,
   writeGuid(RootNode.guid());
   writeRootEntryCount(TotalRootEntryCount);
   writeCounters({RootNode.counters(), RootNode.counters_size()});
+
+  Writer.EnterSubblock(PGOCtxProfileBlockIDs::UnhandledBlockID, CodeLen);
+  for (const auto *P = Unhandled; P; P = P->next())
+    writeFlat(P->guid(), P->counters(), P->counters_size());
+  Writer.ExitBlock();
+
   writeSubcontexts(RootNode);
   Writer.ExitBlock();
 }
@@ -159,6 +167,9 @@ namespace {
 
 /// Representation of the context node suitable for yaml serialization /
 /// deserialization.
+using SerializableFlatProfileRepresentation =
+    std::pair<ctx_profile::GUID, std::vector<uint64_t>>;
+
 struct SerializableCtxRepresentation {
   ctx_profile::GUID Guid = 0;
   std::vector<uint64_t> Counters;
@@ -167,11 +178,9 @@ struct SerializableCtxRepresentation {
 
 struct SerializableRootRepresentation : public SerializableCtxRepresentation {
   uint64_t TotalRootEntryCount = 0;
+  std::vector<SerializableFlatProfileRepresentation> Unhandled;
 };
 
-using SerializableFlatProfileRepresentation =
-    std::pair<ctx_profile::GUID, std::vector<uint64_t>>;
-
 struct SerializableProfileRepresentation {
   std::vector<SerializableRootRepresentation> Contexts;
   std::vector<SerializableFlatProfileRepresentation> FlatProfiles;
@@ -228,6 +237,7 @@ template <> struct yaml::MappingTraits<SerializableRootRepresentation> {
   static void mapping(yaml::IO &IO, SerializableRootRepresentation &R) {
     yaml::MappingTraits<SerializableCtxRepresentation>::mapping(IO, R);
     IO.mapRequired("TotalRootEntryCount", R.TotalRootEntryCount);
+    IO.mapOptional("Unhandled", R.Unhandled);
   }
 };
 
@@ -265,7 +275,15 @@ Error llvm::createCtxProfFromYAML(StringRef Profile, raw_ostream &Out) {
       if (!TopList)
         return createStringError(
             "Unexpected error converting internal structure to ctx profile");
-      Writer.writeContextual(*TopList, DC.TotalRootEntryCount);
+
+      ctx_profile::ContextNode *FirstUnhandled = nullptr;
+      for (const auto &U : DC.Unhandled) {
+        SerializableCtxRepresentation Unhandled;
+        Unhandled.Guid = U.first;
+        Unhandled.Counters = U.second;
+        FirstUnhandled = createNode(Nodes, Unhandled, FirstUnhandled);
+      }
+      Writer.writeContextual(*TopList, FirstUnhandled, DC.TotalRootEntryCount);
     }
     Writer.endContextSection();
   }

diff  --git a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
index a8055979acaa2..44f8eb36d0b1e 100644
--- a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
+++ b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
@@ -55,6 +55,7 @@
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/StringSet.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/ADT/iterator.h"
 #include "llvm/ADT/iterator_range.h"
@@ -338,6 +339,11 @@ cl::opt<bool> PGOInstrumentColdFunctionOnly(
     "pgo-instrument-cold-function-only", cl::init(false), cl::Hidden,
     cl::desc("Enable cold function only instrumentation."));
 
+cl::list<std::string> CtxPGOSkipCallsiteInstrument(
+    "ctx-prof-skip-callsite-instr", cl::Hidden,
+    cl::desc("Do not instrument callsites to functions in this list. Intended "
+             "for testing."));
+
 extern cl::opt<unsigned> MaxNumVTableAnnotations;
 
 namespace llvm {
@@ -957,6 +963,10 @@ void FunctionInstrumenter::instrument() {
       InstrumentBBs.size() + FuncInfo.SIVisitor.getNumOfSelectInsts();
 
   if (IsCtxProf) {
+    StringSet<> SkipCSInstr;
+    SkipCSInstr.insert(CtxPGOSkipCallsiteInstrument.begin(),
+                       CtxPGOSkipCallsiteInstrument.end());
+
     auto *CSIntrinsic =
         Intrinsic::getOrInsertDeclaration(&M, Intrinsic::instrprof_callsite);
     // We want to count the instrumentable callsites, then instrument them. This
@@ -973,6 +983,9 @@ void FunctionInstrumenter::instrument() {
           if (auto *CS = dyn_cast<CallBase>(&Instr)) {
             if (!InstrProfCallsite::canInstrumentCallsite(*CS))
               continue;
+            if (CS->getCalledFunction() &&
+                SkipCSInstr.contains(CS->getCalledFunction()->getName()))
+              continue;
             Visitor(CS);
           }
     };

diff  --git a/llvm/test/tools/llvm-ctxprof-util/Inputs/valid-with-unhandled.yaml b/llvm/test/tools/llvm-ctxprof-util/Inputs/valid-with-unhandled.yaml
new file mode 100644
index 0000000000000..a5eaf18e7718c
--- /dev/null
+++ b/llvm/test/tools/llvm-ctxprof-util/Inputs/valid-with-unhandled.yaml
@@ -0,0 +1,26 @@
+
+Contexts:
+  - Guid:            1000
+    TotalRootEntryCount: 5
+    Counters:        [ 1, 2, 3 ]
+    Callsites:
+      - [  ]
+      - - Guid:            2000
+          Counters:        [ 4, 5 ]
+        - Guid:            18446744073709551613
+          Counters:        [ 6, 7, 8 ]
+      - - Guid:            3000
+          Counters:        [ 40, 50 ]
+  - Guid:            18446744073709551612
+    TotalRootEntryCount: 45
+    Counters:        [ 5, 9, 10 ]
+    Unhandled:
+      - Guid:            555
+        Counters:        [ 1, 2, 3 ]
+      - Guid:            2000
+        Counters:        [ 1, 2 ]
+FlatProfiles:
+  - Guid:            1234
+    Counters:        [ 5, 6, 7 ]
+  - Guid:            5555
+    Counters:        [ 1 ]

diff  --git a/llvm/test/tools/llvm-ctxprof-util/llvm-ctxprof-util.test b/llvm/test/tools/llvm-ctxprof-util/llvm-ctxprof-util.test
index 937bf89532d35..428f1db06cfd1 100644
--- a/llvm/test/tools/llvm-ctxprof-util/llvm-ctxprof-util.test
+++ b/llvm/test/tools/llvm-ctxprof-util/llvm-ctxprof-util.test
@@ -23,6 +23,11 @@
 ; RUN: llvm-ctxprof-util toYAML -input %t/valid-flat-first.bitstream -output %t/valid-flat-first.yaml
 ; RUN: 
diff  %t/valid-flat-first.yaml %S/Inputs/valid.yaml
 
+; A variant with unhandled contexts:
+; RUN: llvm-ctxprof-util fromYAML -input %S/Inputs/valid-with-unhandled.yaml -output %t/valid-with-unhandled.bitstream
+; RUN: llvm-ctxprof-util toYAML -input %t/valid-with-unhandled.bitstream -output %t/valid-with-unhandled.yaml
+; RUN: 
diff  %t/valid-with-unhandled.yaml %S/Inputs/valid-with-unhandled.yaml
+
 ; For the valid case, check against a reference output.
 ; Note that uint64_t are printed as signed values by llvm-bcanalyzer:
 ;  * 18446744073709551613 in yaml is -3 in the output
@@ -33,17 +38,19 @@
 
 ; EMPTY: <BLOCKINFO_BLOCK/>
 ; EMPTY-NEXT: <Metadata NumWords=1 BlockCodeSize=2>
-; EMPTY-NEXT:   <Version op0=3/>
+; EMPTY-NEXT:   <Version op0=4/>
 ; EMPTY-NEXT: </Metadata>
 
 ; VALID:      <BLOCKINFO_BLOCK/>
-; VALID-NEXT: <Metadata NumWords=46 BlockCodeSize=2>
-; VALID-NEXT:   <Version op0=3/>
-; VALID-NEXT:   <Contexts NumWords=30 BlockCodeSize=2>
-; VALID-NEXT:     <Root NumWords=20 BlockCodeSize=2>
+; VALID-NEXT: <Metadata NumWords=53 BlockCodeSize=2>
+; VALID-NEXT:   <Version op0=4/>
+; VALID-NEXT:   <Contexts NumWords=37 BlockCodeSize=2>
+; VALID-NEXT:     <Root NumWords=23 BlockCodeSize=2>
 ; VALID-NEXT:       <GUID op0=1000/>
 ; VALID-NEXT:       <TotalRootEntryCount op0=5/>
 ; VALID-NEXT:       <Counters op0=1 op1=2 op2=3/>
+; VALID-NEXT:       <Unhandled NumWords=1 BlockCodeSize=2>
+; VALID-NEXT:       </Unhandled>
 ; VALID-NEXT:       <Context NumWords=5 BlockCodeSize=2>
 ; VALID-NEXT:         <GUID op0=-3/>
 ; VALID-NEXT:         <CalleeIndex op0=1/>
@@ -60,10 +67,12 @@
 ; VALID-NEXT:         <Counters op0=40 op1=50/>
 ; VALID-NEXT:       </Context>
 ; VALID-NEXT:     </Root>
-; VALID-NEXT:     <Root NumWords=5 BlockCodeSize=2>
+; VALID-NEXT:     <Root NumWords=9 BlockCodeSize=2>
 ; VALID-NEXT:       <GUID op0=-4/>
 ; VALID-NEXT:       <TotalRootEntryCount op0=45/>
 ; VALID-NEXT:       <Counters op0=5 op1=9 op2=10/>
+; VALID-NEXT:       <Unhandled NumWords=1 BlockCodeSize=2>
+; VALID-NEXT:       </Unhandled>
 ; VALID-NEXT:     </Root>
 ; VALID-NEXT:   </Contexts>
 ; VALID-NEXT:   <FlatProfiles NumWords=10 BlockCodeSize=2>

diff  --git a/llvm/unittests/ProfileData/PGOCtxProfReaderWriterTest.cpp b/llvm/unittests/ProfileData/PGOCtxProfReaderWriterTest.cpp
index 541667253bfde..426008f1038bb 100644
--- a/llvm/unittests/ProfileData/PGOCtxProfReaderWriterTest.cpp
+++ b/llvm/unittests/ProfileData/PGOCtxProfReaderWriterTest.cpp
@@ -103,7 +103,7 @@ TEST_F(PGOCtxProfRWTest, RoundTrip) {
       PGOCtxProfileWriter Writer(Out);
       Writer.startContextSection();
       for (auto &[_, R] : roots())
-        Writer.writeContextual(*R, 1);
+        Writer.writeContextual(*R, nullptr, 1);
       Writer.endContextSection();
     }
   }
@@ -155,7 +155,7 @@ TEST_F(PGOCtxProfRWTest, InvalidCounters) {
     {
       PGOCtxProfileWriter Writer(Out);
       Writer.startContextSection();
-      Writer.writeContextual(*R, 2);
+      Writer.writeContextual(*R, nullptr, 2);
       Writer.endContextSection();
     }
   }
@@ -181,7 +181,7 @@ TEST_F(PGOCtxProfRWTest, CountersAllZero) {
     {
       PGOCtxProfileWriter Writer(Out);
       Writer.startContextSection();
-      Writer.writeContextual(*R, 42);
+      Writer.writeContextual(*R, nullptr, 42);
       Writer.endContextSection();
     }
   }
@@ -208,7 +208,7 @@ TEST_F(PGOCtxProfRWTest, CountersAllZeroWithOverride) {
       PGOCtxProfileWriter Writer(Out, /*VersionOverride=*/std::nullopt,
                                  /*IncludeEmpty=*/true);
       Writer.startContextSection();
-      Writer.writeContextual(*R, 8);
+      Writer.writeContextual(*R, nullptr, 8);
       Writer.endContextSection();
     }
   }
@@ -293,8 +293,8 @@ TEST_F(PGOCtxProfRWTest, DuplicateRoots) {
       PGOCtxProfileWriter Writer(Out, /*VersionOverride=*/std::nullopt,
                                  /*IncludeEmpty=*/true);
       Writer.startContextSection();
-      Writer.writeContextual(*createNode(1, 1, 1), 1);
-      Writer.writeContextual(*createNode(1, 1, 1), 1);
+      Writer.writeContextual(*createNode(1, 1, 1), nullptr, 1);
+      Writer.writeContextual(*createNode(1, 1, 1), nullptr, 1);
       Writer.endContextSection();
     }
   }
@@ -322,7 +322,7 @@ TEST_F(PGOCtxProfRWTest, DuplicateTargets) {
       R->subContexts()[0] = L2;
       PGOCtxProfileWriter Writer(Out);
       Writer.startContextSection();
-      Writer.writeContextual(*R, 1);
+      Writer.writeContextual(*R, nullptr, 1);
       Writer.endContextSection();
     }
   }


        


More information about the llvm-commits mailing list