[llvm] [llvm][ctx_profile] Add instrumentation (PR #90136)

Mircea Trofin via llvm-commits llvm-commits at lists.llvm.org
Wed May 1 11:26:22 PDT 2024


https://github.com/mtrofin updated https://github.com/llvm/llvm-project/pull/90136

>From b0af1e2bf0ed35d748454495c01de77ec0854550 Mon Sep 17 00:00:00 2001
From: Mircea Trofin <mtrofin at google.com>
Date: Wed, 24 Apr 2024 07:51:55 -0700
Subject: [PATCH 1/2] [llvm][ctx_profile] Add instrumentation

This adds instrumenting callsites to PGOInstrumentation, *if* contextual
profiling is requested. The latter also enables inserting counters in
the entry basic block and disables value profiling (the latter is a
point in time change)

This change adds the skeleton of the contextual profiling lowering pass,
just so we can introduce the flag controlling that and the API to check
that. The actual lowering pass will be introduced in a subsequent patch.

(Tracking Issue: #89287, RFC referenced there)
---
 .../Instrumentation/PGOCtxProfLowering.h      | 24 +++++++++
 .../Transforms/Instrumentation/CMakeLists.txt |  1 +
 .../Instrumentation/PGOCtxProfLowering.cpp    | 19 +++++++
 .../Instrumentation/PGOInstrumentation.cpp    | 53 ++++++++++++++++---
 .../PGOProfile/ctx-instrumentation.ll         | 41 ++++++++++++++
 5 files changed, 130 insertions(+), 8 deletions(-)
 create mode 100644 llvm/include/llvm/Transforms/Instrumentation/PGOCtxProfLowering.h
 create mode 100644 llvm/lib/Transforms/Instrumentation/PGOCtxProfLowering.cpp
 create mode 100644 llvm/test/Transforms/PGOProfile/ctx-instrumentation.ll

diff --git a/llvm/include/llvm/Transforms/Instrumentation/PGOCtxProfLowering.h b/llvm/include/llvm/Transforms/Instrumentation/PGOCtxProfLowering.h
new file mode 100644
index 00000000000000..38afa0c6fd3294
--- /dev/null
+++ b/llvm/include/llvm/Transforms/Instrumentation/PGOCtxProfLowering.h
@@ -0,0 +1,24 @@
+//===-- PGOCtxProfLowering.h - Contextual PGO Instr. Lowering ---*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the PGOCtxProfLoweringPass class.
+//
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_TRANSFORMS_INSTRUMENTATION_PGOCTXPROFLOWERING_H
+#define LLVM_TRANSFORMS_INSTRUMENTATION_PGOCTXPROFLOWERING_H
+
+namespace llvm {
+class Type;
+
+class PGOCtxProfLoweringPass {
+public:
+  explicit PGOCtxProfLoweringPass() = default;
+  static bool isContextualIRPGOEnabled();
+};
+} // namespace llvm
+#endif
diff --git a/llvm/lib/Transforms/Instrumentation/CMakeLists.txt b/llvm/lib/Transforms/Instrumentation/CMakeLists.txt
index 981405329389f4..8d345d394b51a2 100644
--- a/llvm/lib/Transforms/Instrumentation/CMakeLists.txt
+++ b/llvm/lib/Transforms/Instrumentation/CMakeLists.txt
@@ -14,6 +14,7 @@ add_llvm_component_library(LLVMInstrumentation
   InstrProfiling.cpp
   KCFI.cpp
   LowerAllowCheckPass.cpp
+  PGOCtxProfLowering.cpp
   PGOForceFunctionAttrs.cpp
   PGOInstrumentation.cpp
   PGOMemOPSizeOpt.cpp
diff --git a/llvm/lib/Transforms/Instrumentation/PGOCtxProfLowering.cpp b/llvm/lib/Transforms/Instrumentation/PGOCtxProfLowering.cpp
new file mode 100644
index 00000000000000..3cf091c061541b
--- /dev/null
+++ b/llvm/lib/Transforms/Instrumentation/PGOCtxProfLowering.cpp
@@ -0,0 +1,19 @@
+//===- PGOCtxProfLowering.cpp - Contextual PGO Instr. Lowering ------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+
+#include "llvm/Transforms/Instrumentation/PGOCtxProfLowering.h"
+#include "llvm/Support/CommandLine.h"
+
+using namespace llvm;
+
+static cl::list<std::string> ContextRoots("profile-context-root");
+
+bool PGOCtxProfLoweringPass::isContextualIRPGOEnabled() {
+  return !ContextRoots.empty();
+}
diff --git a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
index a7b7556685e443..6eaddf5a3f6a83 100644
--- a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
+++ b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
@@ -110,6 +110,7 @@
 #include "llvm/Transforms/Instrumentation.h"
 #include "llvm/Transforms/Instrumentation/BlockCoverageInference.h"
 #include "llvm/Transforms/Instrumentation/CFGMST.h"
+#include "llvm/Transforms/Instrumentation/PGOCtxProfLowering.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/MisExpect.h"
 #include "llvm/Transforms/Utils/ModuleUtils.h"
@@ -333,6 +334,16 @@ extern cl::opt<bool> EnableVTableValueProfiling;
 extern cl::opt<InstrProfCorrelator::ProfCorrelatorKind> ProfileCorrelate;
 } // namespace llvm
 
+bool shouldInstrumentEntryBB() {
+  return PGOInstrumentEntry ||
+         PGOCtxProfLoweringPass::isContextualIRPGOEnabled();
+}
+
+bool isValueProfilingDisabled() {
+  return DisableValueProfiling ||
+         PGOCtxProfLoweringPass::isContextualIRPGOEnabled();
+}
+
 // Return a string describing the branch condition that can be
 // used in static branch probability heuristics:
 static std::string getBranchCondString(Instruction *TI) {
@@ -379,7 +390,7 @@ static GlobalVariable *createIRLevelProfileFlagVar(Module &M, bool IsCS) {
   uint64_t ProfileVersion = (INSTR_PROF_RAW_VERSION | VARIANT_MASK_IR_PROF);
   if (IsCS)
     ProfileVersion |= VARIANT_MASK_CSIR_PROF;
-  if (PGOInstrumentEntry)
+  if (shouldInstrumentEntryBB())
     ProfileVersion |= VARIANT_MASK_INSTR_ENTRY;
   if (DebugInfoCorrelate || ProfileCorrelate == InstrProfCorrelator::DEBUG_INFO)
     ProfileVersion |= VARIANT_MASK_DBG_CORRELATE;
@@ -861,7 +872,7 @@ static void instrumentOneFunc(
   }
 
   FuncPGOInstrumentation<PGOEdge, PGOBBInfo> FuncInfo(
-      F, TLI, ComdatMembers, true, BPI, BFI, IsCS, PGOInstrumentEntry,
+      F, TLI, ComdatMembers, true, BPI, BFI, IsCS, shouldInstrumentEntryBB(),
       PGOBlockCoverage);
 
   auto Name = FuncInfo.FuncNameVar;
@@ -883,6 +894,33 @@ static void instrumentOneFunc(
   unsigned NumCounters =
       InstrumentBBs.size() + FuncInfo.SIVisitor.getNumOfSelectInsts();
 
+  if (PGOCtxProfLoweringPass::isContextualIRPGOEnabled()) {
+    auto *CSIntrinsic =
+        Intrinsic::getDeclaration(M, Intrinsic::instrprof_callsite);
+    auto Visit = [&](llvm::function_ref<void(CallBase * CB)> Visitor) {
+      for (auto &BB : F)
+        for (auto &Instr : BB)
+          if (auto *CS = dyn_cast<CallBase>(&Instr)) {
+            if ((CS->getCalledFunction() &&
+                 CS->getCalledFunction()->isIntrinsic()) ||
+                dyn_cast<InlineAsm>(CS->getCalledOperand()))
+              continue;
+            Visitor(CS);
+          }
+    };
+    uint32_t TotalNrCallsites = 0;
+    Visit([&TotalNrCallsites](auto *) { ++TotalNrCallsites; });
+    uint32_t CallsiteIndex = 0;
+
+    Visit([&](auto *CB) {
+      IRBuilder<> Builder(CB);
+      Builder.CreateCall(CSIntrinsic,
+                         {Name, CFGHash, Builder.getInt32(TotalNrCallsites),
+                          Builder.getInt32(CallsiteIndex++),
+                          CB->getCalledOperand()});
+    });
+  }
+
   uint32_t I = 0;
   if (PGOTemporalInstrumentation) {
     NumCounters += PGOBlockCoverage ? 8 : 1;
@@ -914,7 +952,7 @@ static void instrumentOneFunc(
                                        FuncInfo.FunctionHash);
   assert(I == NumCounters);
 
-  if (DisableValueProfiling)
+  if (isValueProfilingDisabled())
     return;
 
   NumOfPGOICall += FuncInfo.ValueSites[IPVK_IndirectCallTarget].size();
@@ -1676,7 +1714,7 @@ void SelectInstVisitor::visitSelectInst(SelectInst &SI) {
 
 // Traverse all valuesites and annotate the instructions for all value kind.
 void PGOUseFunc::annotateValueSites() {
-  if (DisableValueProfiling)
+  if (isValueProfilingDisabled())
     return;
 
   // Create the PGOFuncName meta data.
@@ -1779,7 +1817,7 @@ static bool InstrumentAllFunctions(
     function_ref<BlockFrequencyInfo *(Function &)> LookupBFI, bool IsCS) {
   // For the context-sensitve instrumentation, we should have a separated pass
   // (before LTO/ThinLTO linking) to create these variables.
-  if (!IsCS)
+  if (!IsCS && !PGOCtxProfLoweringPass::isContextualIRPGOEnabled())
     createIRLevelProfileFlagVar(M, /*IsCS=*/false);
 
   Triple TT(M.getTargetTriple());
@@ -2015,9 +2053,8 @@ static bool annotateAllFunctions(
 
   // If the profile marked as always instrument the entry BB, do the
   // same. Note this can be overwritten by the internal option in CFGMST.h
-  bool InstrumentFuncEntry = PGOReader->instrEntryBBEnabled();
-  if (PGOInstrumentEntry.getNumOccurrences() > 0)
-    InstrumentFuncEntry = PGOInstrumentEntry;
+  bool InstrumentFuncEntry =
+      PGOReader->instrEntryBBEnabled() || shouldInstrumentEntryBB();
   bool HasSingleByteCoverage = PGOReader->hasSingleByteCoverage();
   for (auto &F : M) {
     if (skipPGOUse(F))
diff --git a/llvm/test/Transforms/PGOProfile/ctx-instrumentation.ll b/llvm/test/Transforms/PGOProfile/ctx-instrumentation.ll
new file mode 100644
index 00000000000000..2ad95ab51cc696
--- /dev/null
+++ b/llvm/test/Transforms/PGOProfile/ctx-instrumentation.ll
@@ -0,0 +1,41 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 4
+; RUN: opt -passes=pgo-instr-gen -profile-context-root=an_entrypoint \
+; RUN:   -S < %s | FileCheck --check-prefix=INSTRUMENT %s
+
+declare void @bar()
+
+;.
+; INSTRUMENT: @__profn_foo = private constant [3 x i8] c"foo"
+;.
+define void @foo(i32 %a, ptr %fct) {
+; INSTRUMENT-LABEL: define void @foo(
+; INSTRUMENT-SAME: i32 [[A:%.*]], ptr [[FCT:%.*]]) {
+; INSTRUMENT-NEXT:    call void @llvm.instrprof.increment(ptr @__profn_foo, i64 728453322856651412, i32 2, i32 0)
+; INSTRUMENT-NEXT:    [[T:%.*]] = icmp eq i32 [[A]], 0
+; INSTRUMENT-NEXT:    br i1 [[T]], label [[YES:%.*]], label [[NO:%.*]]
+; INSTRUMENT:       yes:
+; INSTRUMENT-NEXT:    call void @llvm.instrprof.increment(ptr @__profn_foo, i64 728453322856651412, i32 2, i32 1)
+; INSTRUMENT-NEXT:    call void @llvm.instrprof.callsite(ptr @__profn_foo, i64 728453322856651412, i32 2, i32 0, ptr [[FCT]])
+; INSTRUMENT-NEXT:    call void [[FCT]](i32 [[A]])
+; INSTRUMENT-NEXT:    br label [[EXIT:%.*]]
+; INSTRUMENT:       no:
+; INSTRUMENT-NEXT:    call void @llvm.instrprof.callsite(ptr @__profn_foo, i64 728453322856651412, i32 2, i32 1, ptr @bar)
+; INSTRUMENT-NEXT:    call void @bar()
+; INSTRUMENT-NEXT:    br label [[EXIT]]
+; INSTRUMENT:       exit:
+; INSTRUMENT-NEXT:    ret void
+;
+  %t = icmp eq i32 %a, 0
+  br i1 %t, label %yes, label %no
+yes:
+  call void %fct(i32 %a)
+  br label %exit
+no:
+  call void @bar()
+  br label %exit
+exit:
+  ret void
+}
+;.
+; INSTRUMENT: attributes #[[ATTR0:[0-9]+]] = { nounwind }
+;.

>From bef40ccd05c8b1508ab4c2a72e7062e3b05a50ed Mon Sep 17 00:00:00 2001
From: Mircea Trofin <mtrofin at google.com>
Date: Tue, 30 Apr 2024 12:57:27 -0700
Subject: [PATCH 2/2] feedback

---
 .../Instrumentation/PGOCtxProfLowering.cpp    |  7 +++++-
 .../Instrumentation/PGOInstrumentation.cpp    | 23 ++++++++++++++++---
 2 files changed, 26 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/Transforms/Instrumentation/PGOCtxProfLowering.cpp b/llvm/lib/Transforms/Instrumentation/PGOCtxProfLowering.cpp
index 3cf091c061541b..9d6dd5ccb38b8d 100644
--- a/llvm/lib/Transforms/Instrumentation/PGOCtxProfLowering.cpp
+++ b/llvm/lib/Transforms/Instrumentation/PGOCtxProfLowering.cpp
@@ -12,7 +12,12 @@
 
 using namespace llvm;
 
-static cl::list<std::string> ContextRoots("profile-context-root");
+static cl::list<std::string> ContextRoots(
+    "profile-context-root", cl::Hidden,
+    cl::desc(
+        "A function name, assumed to be global, which will be treated as the "
+        "root of an interesting graph, which will be profiled independently "
+        "from other similar graphs."));
 
 bool PGOCtxProfLoweringPass::isContextualIRPGOEnabled() {
   return !ContextRoots.empty();
diff --git a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
index 6eaddf5a3f6a83..b333b1582e802c 100644
--- a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
+++ b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
@@ -339,6 +339,10 @@ bool shouldInstrumentEntryBB() {
          PGOCtxProfLoweringPass::isContextualIRPGOEnabled();
 }
 
+// FIXME(mtrofin): re-enable this for ctx profiling, for non-indirect calls. Ctx
+// profiling implicitly captures indirect call cases, but not other values.
+// Supporting other values is relatively straight-forward - just another counter
+// range within the context.
 bool isValueProfilingDisabled() {
   return DisableValueProfiling ||
          PGOCtxProfLoweringPass::isContextualIRPGOEnabled();
@@ -897,6 +901,14 @@ static void instrumentOneFunc(
   if (PGOCtxProfLoweringPass::isContextualIRPGOEnabled()) {
     auto *CSIntrinsic =
         Intrinsic::getDeclaration(M, Intrinsic::instrprof_callsite);
+    // We want to count the instrumentable callsites, then instrument them. This
+    // is because the llvm.instrprof.callsite intrinsic has an argument (like
+    // the other instrprof intrinsics) capturing the total number of
+    // instrumented objects (counters, or callsites, in this case). In this
+    // case, we want that value so we can readily pass it to the compiler-rt
+    // APIs that may have to allocate memory based on the nr of callsites.
+    // The traversal logic is the same for both counting and instrumentation,
+    // just needs to be done in succession.
     auto Visit = [&](llvm::function_ref<void(CallBase * CB)> Visitor) {
       for (auto &BB : F)
         for (auto &Instr : BB)
@@ -908,10 +920,12 @@ static void instrumentOneFunc(
             Visitor(CS);
           }
     };
+    // First, count callsites.
     uint32_t TotalNrCallsites = 0;
     Visit([&TotalNrCallsites](auto *) { ++TotalNrCallsites; });
-    uint32_t CallsiteIndex = 0;
 
+    // Now instrument.
+    uint32_t CallsiteIndex = 0;
     Visit([&](auto *CB) {
       IRBuilder<> Builder(CB);
       Builder.CreateCall(CSIntrinsic,
@@ -2053,8 +2067,11 @@ static bool annotateAllFunctions(
 
   // If the profile marked as always instrument the entry BB, do the
   // same. Note this can be overwritten by the internal option in CFGMST.h
-  bool InstrumentFuncEntry =
-      PGOReader->instrEntryBBEnabled() || shouldInstrumentEntryBB();
+  bool InstrumentFuncEntry = PGOReader->instrEntryBBEnabled();
+  if (PGOInstrumentEntry.getNumOccurrences() > 0)
+    InstrumentFuncEntry = PGOInstrumentEntry;
+  InstrumentFuncEntry |= PGOCtxProfLoweringPass::isContextualIRPGOEnabled();
+
   bool HasSingleByteCoverage = PGOReader->hasSingleByteCoverage();
   for (auto &F : M) {
     if (skipPGOUse(F))



More information about the llvm-commits mailing list