[compiler-rt] [llvm] [ctxprof] Make ContextRoot an implementation detail (PR #131416)

Mircea Trofin via llvm-commits llvm-commits at lists.llvm.org
Fri Mar 14 22:32:22 PDT 2025


https://github.com/mtrofin updated https://github.com/llvm/llvm-project/pull/131416

>From 94a272af37b771b22e9a2363655e3170dc5f36b7 Mon Sep 17 00:00:00 2001
From: Mircea Trofin <mtrofin at google.com>
Date: Thu, 13 Mar 2025 20:46:45 -0700
Subject: [PATCH] [ctxprof] Make ContextRoot an implementation detail

---
 .../lib/ctx_profile/CtxInstrProfiling.cpp     | 25 ++++++++--
 .../lib/ctx_profile/CtxInstrProfiling.h       | 26 +++++-----
 .../tests/CtxInstrProfilingTest.cpp           | 30 +++++------
 .../Instrumentation/PGOCtxProfLowering.cpp    | 50 +++++++------------
 .../PGOProfile/ctx-instrumentation.ll         | 34 ++++++-------
 5 files changed, 88 insertions(+), 77 deletions(-)

diff --git a/compiler-rt/lib/ctx_profile/CtxInstrProfiling.cpp b/compiler-rt/lib/ctx_profile/CtxInstrProfiling.cpp
index 1c2cad1ca506e..6ef7076d93e31 100644
--- a/compiler-rt/lib/ctx_profile/CtxInstrProfiling.cpp
+++ b/compiler-rt/lib/ctx_profile/CtxInstrProfiling.cpp
@@ -336,10 +336,28 @@ void setupContext(ContextRoot *Root, GUID Guid, uint32_t NumCounters,
   AllContextRoots.PushBack(Root);
 }
 
+ContextRoot *FunctionData::getOrAllocateContextRoot() {
+  auto *Root = CtxRoot;
+  if (!Root) {
+    __sanitizer::GenericScopedLock<__sanitizer::StaticSpinMutex> L(&Mutex);
+    Root = CtxRoot;
+    if (!Root) {
+      Root =
+          new (__sanitizer::InternalAlloc(sizeof(ContextRoot))) ContextRoot();
+      CtxRoot = Root;
+    }
+  }
+  assert(Root);
+  return Root;
+}
+
 ContextNode *__llvm_ctx_profile_start_context(
-    ContextRoot *Root, GUID Guid, uint32_t Counters,
+    FunctionData *FData, GUID Guid, uint32_t Counters,
     uint32_t Callsites) SANITIZER_NO_THREAD_SAFETY_ANALYSIS {
   IsUnderContext = true;
+
+  auto *Root = FData->getOrAllocateContextRoot();
+
   __sanitizer::atomic_fetch_add(&Root->TotalEntries, 1,
                                 __sanitizer::memory_order_relaxed);
 
@@ -356,12 +374,13 @@ ContextNode *__llvm_ctx_profile_start_context(
   return TheScratchContext;
 }
 
-void __llvm_ctx_profile_release_context(ContextRoot *Root)
+void __llvm_ctx_profile_release_context(FunctionData *FData)
     SANITIZER_NO_THREAD_SAFETY_ANALYSIS {
   IsUnderContext = false;
   if (__llvm_ctx_profile_current_context_root) {
     __llvm_ctx_profile_current_context_root = nullptr;
-    Root->Taken.Unlock();
+    assert(FData->CtxRoot);
+    FData->CtxRoot->Taken.Unlock();
   }
 }
 
diff --git a/compiler-rt/lib/ctx_profile/CtxInstrProfiling.h b/compiler-rt/lib/ctx_profile/CtxInstrProfiling.h
index 72cc60bf523e1..6bb954da950c4 100644
--- a/compiler-rt/lib/ctx_profile/CtxInstrProfiling.h
+++ b/compiler-rt/lib/ctx_profile/CtxInstrProfiling.h
@@ -84,7 +84,6 @@ struct ContextRoot {
   // Count the number of entries - regardless if we could take the `Taken` mutex
   ::__sanitizer::atomic_uint64_t TotalEntries = {};
 
-  // This is init-ed by the static zero initializer in LLVM.
   // Taken is used to ensure only one thread traverses the contextual graph -
   // either to read it or to write it. On server side, the same entrypoint will
   // be entered by numerous threads, but over time, the profile aggregated by
@@ -109,12 +108,7 @@ struct ContextRoot {
   // or with more concurrent collections (==more memory) and less collection
   // time. Note that concurrent collection does happen for different
   // entrypoints, regardless.
-  ::__sanitizer::StaticSpinMutex Taken;
-
-  // If (unlikely) StaticSpinMutex internals change, we need to modify the LLVM
-  // instrumentation lowering side because it is responsible for allocating and
-  // zero-initializing ContextRoots.
-  static_assert(sizeof(Taken) == 1);
+  ::__sanitizer::SpinMutex Taken;
 };
 
 // This is allocated and zero-initialized by the compiler, the in-place
@@ -139,8 +133,16 @@ struct FunctionData {
   FunctionData() { Mutex.Init(); }
 
   FunctionData *Next = nullptr;
+  ContextRoot *volatile CtxRoot = nullptr;
   ContextNode *volatile FlatCtx = nullptr;
+
+  ContextRoot *getOrAllocateContextRoot();
+
   ::__sanitizer::StaticSpinMutex Mutex;
+  // If (unlikely) StaticSpinMutex internals change, we need to modify the LLVM
+  // instrumentation lowering side because it is responsible for allocating and
+  // zero-initializing ContextRoots.
+  static_assert(sizeof(Mutex) == 1);
 };
 
 /// This API is exposed for testing. See the APIs below about the contract with
@@ -172,17 +174,17 @@ extern __thread __ctx_profile::ContextRoot
 
 /// called by LLVM in the entry BB of a "entry point" function. The returned
 /// pointer may be "tainted" - its LSB set to 1 - to indicate it's scratch.
-ContextNode *__llvm_ctx_profile_start_context(__ctx_profile::ContextRoot *Root,
-                                              GUID Guid, uint32_t Counters,
-                                              uint32_t Callsites);
+ContextNode *
+__llvm_ctx_profile_start_context(__ctx_profile::FunctionData *FData, GUID Guid,
+                                 uint32_t Counters, uint32_t Callsites);
 
 /// paired with __llvm_ctx_profile_start_context, and called at the exit of the
 /// entry point function.
-void __llvm_ctx_profile_release_context(__ctx_profile::ContextRoot *Root);
+void __llvm_ctx_profile_release_context(__ctx_profile::FunctionData *FData);
 
 /// called for any other function than entry points, in the entry BB of such
 /// function. Same consideration about LSB of returned value as .._start_context
-ContextNode *__llvm_ctx_profile_get_context(__ctx_profile::FunctionData *Data,
+ContextNode *__llvm_ctx_profile_get_context(__ctx_profile::FunctionData *FData,
                                             void *Callee, GUID Guid,
                                             uint32_t NumCounters,
                                             uint32_t NumCallsites);
diff --git a/compiler-rt/lib/ctx_profile/tests/CtxInstrProfilingTest.cpp b/compiler-rt/lib/ctx_profile/tests/CtxInstrProfilingTest.cpp
index 62c7f53acec5f..ccb8f0e87fcdd 100644
--- a/compiler-rt/lib/ctx_profile/tests/CtxInstrProfilingTest.cpp
+++ b/compiler-rt/lib/ctx_profile/tests/CtxInstrProfilingTest.cpp
@@ -5,11 +5,11 @@
 using namespace __ctx_profile;
 
 class ContextTest : public ::testing::Test {
-  void SetUp() override { memset(&Root, 0, sizeof(ContextRoot)); }
+  void SetUp() override { Root.getOrAllocateContextRoot(); }
   void TearDown() override { __llvm_ctx_profile_free(); }
 
 public:
-  ContextRoot Root;
+  FunctionData Root;
 };
 
 TEST(ArenaTest, ZeroInit) {
@@ -43,19 +43,20 @@ TEST_F(ContextTest, Basic) {
   __llvm_ctx_profile_start_collection();
   auto *Ctx = __llvm_ctx_profile_start_context(&Root, 1, 10, 4);
   ASSERT_NE(Ctx, nullptr);
-  EXPECT_NE(Root.CurrentMem, nullptr);
-  EXPECT_EQ(Root.FirstMemBlock, Root.CurrentMem);
+  auto &CtxRoot = *Root.CtxRoot;
+  EXPECT_NE(CtxRoot.CurrentMem, nullptr);
+  EXPECT_EQ(CtxRoot.FirstMemBlock, CtxRoot.CurrentMem);
   EXPECT_EQ(Ctx->size(), sizeof(ContextNode) + 10 * sizeof(uint64_t) +
                              4 * sizeof(ContextNode *));
   EXPECT_EQ(Ctx->counters_size(), 10U);
   EXPECT_EQ(Ctx->callsites_size(), 4U);
-  EXPECT_EQ(__llvm_ctx_profile_current_context_root, &Root);
-  Root.Taken.CheckLocked();
-  EXPECT_FALSE(Root.Taken.TryLock());
+  EXPECT_EQ(__llvm_ctx_profile_current_context_root, &CtxRoot);
+  CtxRoot.Taken.CheckLocked();
+  EXPECT_FALSE(CtxRoot.Taken.TryLock());
   __llvm_ctx_profile_release_context(&Root);
   EXPECT_EQ(__llvm_ctx_profile_current_context_root, nullptr);
-  EXPECT_TRUE(Root.Taken.TryLock());
-  Root.Taken.Unlock();
+  EXPECT_TRUE(CtxRoot.Taken.TryLock());
+  CtxRoot.Taken.Unlock();
 }
 
 TEST_F(ContextTest, Callsite) {
@@ -172,7 +173,8 @@ TEST_F(ContextTest, NeedMoreMemory) {
   int FakeCalleeAddress = 0;
   const bool IsScratch = isScratch(Ctx);
   EXPECT_FALSE(IsScratch);
-  const auto *CurrentMem = Root.CurrentMem;
+  auto &CtxRoot = *Root.CtxRoot;
+  const auto *CurrentMem = CtxRoot.CurrentMem;
   __llvm_ctx_profile_expected_callee[0] = &FakeCalleeAddress;
   __llvm_ctx_profile_callsite[0] = &Ctx->subContexts()[2];
   FunctionData FData;
@@ -181,8 +183,8 @@ TEST_F(ContextTest, NeedMoreMemory) {
       __llvm_ctx_profile_get_context(&FData, &FakeCalleeAddress, 3, 1 << 20, 1);
   EXPECT_EQ(FData.FlatCtx, nullptr);
   EXPECT_EQ(Ctx->subContexts()[2], Subctx);
-  EXPECT_NE(CurrentMem, Root.CurrentMem);
-  EXPECT_NE(Root.CurrentMem, nullptr);
+  EXPECT_NE(CurrentMem, CtxRoot.CurrentMem);
+  EXPECT_NE(CtxRoot.CurrentMem, nullptr);
 }
 
 TEST_F(ContextTest, ConcurrentRootCollection) {
@@ -277,7 +279,7 @@ TEST_F(ContextTest, Dump) {
     void endFlatSection() override { ++ExitedFlatCount; }
   };
 
-  TestProfileWriter W(&Root, 1);
+  TestProfileWriter W(Root.CtxRoot, 1);
   EXPECT_FALSE(W.State);
   __llvm_ctx_profile_fetch(W);
   EXPECT_TRUE(W.State);
@@ -289,7 +291,7 @@ TEST_F(ContextTest, Dump) {
   (void)Flat;
   EXPECT_NE(FData.FlatCtx, nullptr);
   FData.FlatCtx->counters()[0] = 15U;
-  TestProfileWriter W2(&Root, 0);
+  TestProfileWriter W2(Root.CtxRoot, 0);
   EXPECT_FALSE(W2.State);
   __llvm_ctx_profile_fetch(W2);
   EXPECT_TRUE(W2.State);
diff --git a/llvm/lib/Transforms/Instrumentation/PGOCtxProfLowering.cpp b/llvm/lib/Transforms/Instrumentation/PGOCtxProfLowering.cpp
index 3c30dd61077e5..51532e57b72f5 100644
--- a/llvm/lib/Transforms/Instrumentation/PGOCtxProfLowering.cpp
+++ b/llvm/lib/Transforms/Instrumentation/PGOCtxProfLowering.cpp
@@ -53,10 +53,9 @@ class CtxInstrumentationLowerer final {
   Module &M;
   ModuleAnalysisManager &MAM;
   Type *ContextNodeTy = nullptr;
-  Type *ContextRootTy = nullptr;
   Type *FunctionDataTy = nullptr;
 
-  DenseMap<const Function *, Constant *> ContextRootMap;
+  DenseSet<const Function *> ContextRootSet;
   Function *StartCtx = nullptr;
   Function *GetCtx = nullptr;
   Function *ReleaseCtx = nullptr;
@@ -114,18 +113,10 @@ CtxInstrumentationLowerer::CtxInstrumentationLowerer(Module &M,
   auto *I32Ty = Type::getInt32Ty(M.getContext());
   auto *I64Ty = Type::getInt64Ty(M.getContext());
 
-  // The ContextRoot type
-  ContextRootTy =
-      StructType::get(M.getContext(), {
-                                          PointerTy,          /*FirstNode*/
-                                          PointerTy,          /*FirstMemBlock*/
-                                          PointerTy,          /*CurrentMem*/
-                                          I64Ty,              /*TotalEntries*/
-                                          SanitizerMutexType, /*Taken*/
-                                      });
   FunctionDataTy =
       StructType::get(M.getContext(), {
                                           PointerTy,          /*Next*/
+                                          PointerTy,          /*CtxRoot*/
                                           PointerTy,          /*FlatCtx*/
                                           SanitizerMutexType, /*Mutex*/
                                       });
@@ -144,10 +135,7 @@ CtxInstrumentationLowerer::CtxInstrumentationLowerer(Module &M,
     if (const auto *F = M.getFunction(Fname)) {
       if (F->isDeclaration())
         continue;
-      auto *G = M.getOrInsertGlobal(Fname + "_ctx_root", ContextRootTy);
-      cast<GlobalVariable>(G)->setInitializer(
-          Constant::getNullValue(ContextRootTy));
-      ContextRootMap.insert(std::make_pair(F, G));
+      ContextRootSet.insert(F);
       for (const auto &BB : *F)
         for (const auto &I : BB)
           if (const auto *CB = dyn_cast<CallBase>(&I))
@@ -165,7 +153,7 @@ CtxInstrumentationLowerer::CtxInstrumentationLowerer(Module &M,
       M.getOrInsertFunction(
            CompilerRtAPINames::StartCtx,
            FunctionType::get(PointerTy,
-                             {PointerTy, /*ContextRoot*/
+                             {PointerTy, /*FunctionData*/
                               I64Ty, /*Guid*/ I32Ty,
                               /*NumCounters*/ I32Ty /*NumCallsites*/},
                              false))
@@ -184,7 +172,7 @@ CtxInstrumentationLowerer::CtxInstrumentationLowerer(Module &M,
       M.getOrInsertFunction(CompilerRtAPINames::ReleaseCtx,
                             FunctionType::get(Type::getVoidTy(M.getContext()),
                                               {
-                                                  PointerTy, /*ContextRoot*/
+                                                  PointerTy, /*FunctionData*/
                                               },
                                               false))
           .getCallee());
@@ -224,7 +212,7 @@ bool CtxInstrumentationLowerer::lowerFunction(Function &F) {
   Value *RealContext = nullptr;
 
   StructType *ThisContextType = nullptr;
-  Value *TheRootContext = nullptr;
+  Value *TheRootFuctionData = nullptr;
   Value *ExpectedCalleeTLSAddr = nullptr;
   Value *CallsiteInfoTLSAddr = nullptr;
 
@@ -246,23 +234,23 @@ bool CtxInstrumentationLowerer::lowerFunction(Function &F) {
            ArrayType::get(Builder.getPtrTy(), NumCallsites)});
       // Figure out which way we obtain the context object for this function -
       // if it's an entrypoint, then we call StartCtx, otherwise GetCtx. In the
-      // former case, we also set TheRootContext since we need to release it
+      // former case, we also set TheRootFuctionData since we need to release it
       // at the end (plus it can be used to know if we have an entrypoint or a
       // regular function)
-      auto Iter = ContextRootMap.find(&F);
-      if (Iter != ContextRootMap.end()) {
-        TheRootContext = Iter->second;
+      // Make up a compact name, these names end up taking up a lot of space
+      // in the binary.
+      auto *FData = new GlobalVariable(M, FunctionDataTy, false,
+                                       GlobalVariable::InternalLinkage,
+                                       Constant::getNullValue(FunctionDataTy));
+
+      if (ContextRootSet.contains(&F)) {
         Context = Builder.CreateCall(
-            StartCtx, {TheRootContext, Guid, Builder.getInt32(NumCounters),
+            StartCtx, {FData, Guid, Builder.getInt32(NumCounters),
                        Builder.getInt32(NumCallsites)});
+        TheRootFuctionData = FData;
         ORE.emit(
             [&] { return OptimizationRemark(DEBUG_TYPE, "Entrypoint", &F); });
       } else {
-        // Make up a compact name, these names end up taking up a lot of space
-        // in the binary.
-        auto *FData = new GlobalVariable(
-            M, FunctionDataTy, false, GlobalVariable::InternalLinkage,
-            Constant::getNullValue(FunctionDataTy));
         Context = Builder.CreateCall(GetCtx, {FData, &F, Guid,
                                               Builder.getInt32(NumCounters),
                                               Builder.getInt32(NumCallsites)});
@@ -347,10 +335,10 @@ bool CtxInstrumentationLowerer::lowerFunction(Function &F) {
           break;
         }
         I.eraseFromParent();
-      } else if (TheRootContext && isa<ReturnInst>(I)) {
+      } else if (TheRootFuctionData && isa<ReturnInst>(I)) {
         // Remember to release the context if we are an entrypoint.
         IRBuilder<> Builder(&I);
-        Builder.CreateCall(ReleaseCtx, {TheRootContext});
+        Builder.CreateCall(ReleaseCtx, {TheRootFuctionData});
         ContextWasReleased = true;
       }
     }
@@ -359,7 +347,7 @@ bool CtxInstrumentationLowerer::lowerFunction(Function &F) {
   // to disallow this, (so this then stays as an error), another is to detect
   // that and then do a wrapper or disallow the tail call. This only affects
   // instrumentation, when we want to detect the call graph.
-  if (TheRootContext && !ContextWasReleased)
+  if (TheRootFuctionData && !ContextWasReleased)
     F.getContext().emitError(
         "[ctx_prof] An entrypoint was instrumented but it has no `ret` "
         "instructions above which to release the context: " +
diff --git a/llvm/test/Transforms/PGOProfile/ctx-instrumentation.ll b/llvm/test/Transforms/PGOProfile/ctx-instrumentation.ll
index a7ac26a52e8cf..ed3cb0824c504 100644
--- a/llvm/test/Transforms/PGOProfile/ctx-instrumentation.ll
+++ b/llvm/test/Transforms/PGOProfile/ctx-instrumentation.ll
@@ -9,15 +9,15 @@
 declare void @bar()
 
 ;.
-; LOWERING: @an_entrypoint_ctx_root = global { ptr, ptr, ptr, i64, i8 } zeroinitializer
-; LOWERING: @another_entrypoint_no_callees_ctx_root = global { ptr, ptr, ptr, i64, i8 } zeroinitializer
 ; LOWERING: @__llvm_ctx_profile_callsite = external hidden thread_local global ptr
 ; LOWERING: @__llvm_ctx_profile_expected_callee = external hidden thread_local global ptr
-; LOWERING: @[[GLOB0:[0-9]+]] = internal global { ptr, ptr, i8 } zeroinitializer
-; LOWERING: @[[GLOB1:[0-9]+]] = internal global { ptr, ptr, i8 } zeroinitializer
-; LOWERING: @[[GLOB2:[0-9]+]] = internal global { ptr, ptr, i8 } zeroinitializer
-; LOWERING: @[[GLOB3:[0-9]+]] = internal global { ptr, ptr, i8 } zeroinitializer
-; LOWERING: @[[GLOB4:[0-9]+]] = internal global { ptr, ptr, i8 } zeroinitializer
+; LOWERING: @[[GLOB0:[0-9]+]] = internal global { ptr, ptr, ptr, i8 } zeroinitializer
+; LOWERING: @[[GLOB1:[0-9]+]] = internal global { ptr, ptr, ptr, i8 } zeroinitializer
+; LOWERING: @[[GLOB2:[0-9]+]] = internal global { ptr, ptr, ptr, i8 } zeroinitializer
+; LOWERING: @[[GLOB3:[0-9]+]] = internal global { ptr, ptr, ptr, i8 } zeroinitializer
+; LOWERING: @[[GLOB4:[0-9]+]] = internal global { ptr, ptr, ptr, i8 } zeroinitializer
+; LOWERING: @[[GLOB5:[0-9]+]] = internal global { ptr, ptr, ptr, i8 } zeroinitializer
+; LOWERING: @[[GLOB6:[0-9]+]] = internal global { ptr, ptr, ptr, i8 } zeroinitializer
 ;.
 define void @foo(i32 %a, ptr %fct) {
 ; INSTRUMENT-LABEL: define void @foo(
@@ -97,7 +97,7 @@ define void @an_entrypoint(i32 %a) {
 ;
 ; LOWERING-LABEL: define void @an_entrypoint(
 ; LOWERING-SAME: i32 [[A:%.*]]) !guid [[META1:![0-9]+]] {
-; LOWERING-NEXT:    [[TMP1:%.*]] = call ptr @__llvm_ctx_profile_start_context(ptr @an_entrypoint_ctx_root, i64 4909520559318251808, i32 2, i32 1)
+; LOWERING-NEXT:    [[TMP1:%.*]] = call ptr @__llvm_ctx_profile_start_context(ptr @[[GLOB1]], i64 4909520559318251808, i32 2, i32 1)
 ; LOWERING-NEXT:    [[TMP2:%.*]] = ptrtoint ptr [[TMP1]] to i64
 ; LOWERING-NEXT:    [[TMP3:%.*]] = and i64 [[TMP2]], 1
 ; LOWERING-NEXT:    [[TMP4:%.*]] = call ptr @llvm.threadlocal.address.p0(ptr @__llvm_ctx_profile_expected_callee)
@@ -117,10 +117,10 @@ define void @an_entrypoint(i32 %a) {
 ; LOWERING-NEXT:    [[TMP13:%.*]] = getelementptr { { i64, ptr, i32, i32 }, [2 x i64], [1 x ptr] }, ptr [[TMP1]], i32 0, i32 2, i32 0
 ; LOWERING-NEXT:    store volatile ptr [[TMP13]], ptr [[TMP7]], align 8
 ; LOWERING-NEXT:    call void @foo(i32 1, ptr null)
-; LOWERING-NEXT:    call void @__llvm_ctx_profile_release_context(ptr @an_entrypoint_ctx_root)
+; LOWERING-NEXT:    call void @__llvm_ctx_profile_release_context(ptr @[[GLOB1]])
 ; LOWERING-NEXT:    ret void
 ; LOWERING:       no:
-; LOWERING-NEXT:    call void @__llvm_ctx_profile_release_context(ptr @an_entrypoint_ctx_root)
+; LOWERING-NEXT:    call void @__llvm_ctx_profile_release_context(ptr @[[GLOB1]])
 ; LOWERING-NEXT:    ret void
 ;
   %t = icmp eq i32 %a, 0
@@ -147,7 +147,7 @@ define void @another_entrypoint_no_callees(i32 %a) {
 ;
 ; LOWERING-LABEL: define void @another_entrypoint_no_callees(
 ; LOWERING-SAME: i32 [[A:%.*]]) !guid [[META2:![0-9]+]] {
-; LOWERING-NEXT:    [[TMP1:%.*]] = call ptr @__llvm_ctx_profile_start_context(ptr @another_entrypoint_no_callees_ctx_root, i64 -6371873725078000974, i32 2, i32 0)
+; LOWERING-NEXT:    [[TMP1:%.*]] = call ptr @__llvm_ctx_profile_start_context(ptr @[[GLOB2]], i64 -6371873725078000974, i32 2, i32 0)
 ; LOWERING-NEXT:    [[TMP2:%.*]] = ptrtoint ptr [[TMP1]] to i64
 ; LOWERING-NEXT:    [[TMP3:%.*]] = and i64 [[TMP2]], -2
 ; LOWERING-NEXT:    [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr
@@ -158,10 +158,10 @@ define void @another_entrypoint_no_callees(i32 %a) {
 ; LOWERING-NEXT:    [[TMP6:%.*]] = load i64, ptr [[TMP5]], align 4
 ; LOWERING-NEXT:    [[TMP7:%.*]] = add i64 [[TMP6]], 1
 ; LOWERING-NEXT:    store i64 [[TMP7]], ptr [[TMP5]], align 4
-; LOWERING-NEXT:    call void @__llvm_ctx_profile_release_context(ptr @another_entrypoint_no_callees_ctx_root)
+; LOWERING-NEXT:    call void @__llvm_ctx_profile_release_context(ptr @[[GLOB2]])
 ; LOWERING-NEXT:    ret void
 ; LOWERING:       no:
-; LOWERING-NEXT:    call void @__llvm_ctx_profile_release_context(ptr @another_entrypoint_no_callees_ctx_root)
+; LOWERING-NEXT:    call void @__llvm_ctx_profile_release_context(ptr @[[GLOB2]])
 ; LOWERING-NEXT:    ret void
 ;
   %t = icmp eq i32 %a, 0
@@ -181,7 +181,7 @@ define void @simple(i32 %a) {
 ;
 ; LOWERING-LABEL: define void @simple(
 ; LOWERING-SAME: i32 [[A:%.*]]) !guid [[META3:![0-9]+]] {
-; LOWERING-NEXT:    [[TMP1:%.*]] = call ptr @__llvm_ctx_profile_get_context(ptr @[[GLOB1]], ptr @simple, i64 -3006003237940970099, i32 1, i32 0)
+; LOWERING-NEXT:    [[TMP1:%.*]] = call ptr @__llvm_ctx_profile_get_context(ptr @[[GLOB3]], ptr @simple, i64 -3006003237940970099, i32 1, i32 0)
 ; LOWERING-NEXT:    [[TMP2:%.*]] = ptrtoint ptr [[TMP1]] to i64
 ; LOWERING-NEXT:    [[TMP3:%.*]] = and i64 [[TMP2]], -2
 ; LOWERING-NEXT:    [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr
@@ -205,7 +205,7 @@ define i32 @no_callsites(i32 %a) {
 ;
 ; LOWERING-LABEL: define i32 @no_callsites(
 ; LOWERING-SAME: i32 [[A:%.*]]) !guid [[META4:![0-9]+]] {
-; LOWERING-NEXT:    [[TMP1:%.*]] = call ptr @__llvm_ctx_profile_get_context(ptr @[[GLOB2]], ptr @no_callsites, i64 5679753335911435902, i32 2, i32 0)
+; LOWERING-NEXT:    [[TMP1:%.*]] = call ptr @__llvm_ctx_profile_get_context(ptr @[[GLOB4]], ptr @no_callsites, i64 5679753335911435902, i32 2, i32 0)
 ; LOWERING-NEXT:    [[TMP2:%.*]] = ptrtoint ptr [[TMP1]] to i64
 ; LOWERING-NEXT:    [[TMP3:%.*]] = and i64 [[TMP2]], -2
 ; LOWERING-NEXT:    [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr
@@ -237,7 +237,7 @@ define void @no_counters() {
 ;
 ; LOWERING-LABEL: define void @no_counters(
 ; LOWERING-SAME: ) !guid [[META5:![0-9]+]] {
-; LOWERING-NEXT:    [[TMP1:%.*]] = call ptr @__llvm_ctx_profile_get_context(ptr @[[GLOB3]], ptr @no_counters, i64 5458232184388660970, i32 1, i32 1)
+; LOWERING-NEXT:    [[TMP1:%.*]] = call ptr @__llvm_ctx_profile_get_context(ptr @[[GLOB5]], ptr @no_counters, i64 5458232184388660970, i32 1, i32 1)
 ; LOWERING-NEXT:    [[TMP2:%.*]] = ptrtoint ptr [[TMP1]] to i64
 ; LOWERING-NEXT:    [[TMP3:%.*]] = and i64 [[TMP2]], 1
 ; LOWERING-NEXT:    [[TMP4:%.*]] = call ptr @llvm.threadlocal.address.p0(ptr @__llvm_ctx_profile_expected_callee)
@@ -265,7 +265,7 @@ define void @inlineasm() {
 ;
 ; LOWERING-LABEL: define void @inlineasm(
 ; LOWERING-SAME: ) !guid [[META6:![0-9]+]] {
-; LOWERING-NEXT:    [[TMP1:%.*]] = call ptr @__llvm_ctx_profile_get_context(ptr @[[GLOB4]], ptr @inlineasm, i64 -3771893999295659109, i32 1, i32 0)
+; LOWERING-NEXT:    [[TMP1:%.*]] = call ptr @__llvm_ctx_profile_get_context(ptr @[[GLOB6]], ptr @inlineasm, i64 -3771893999295659109, i32 1, i32 0)
 ; LOWERING-NEXT:    [[TMP2:%.*]] = ptrtoint ptr [[TMP1]] to i64
 ; LOWERING-NEXT:    [[TMP3:%.*]] = and i64 [[TMP2]], -2
 ; LOWERING-NEXT:    [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr



More information about the llvm-commits mailing list