[llvm] [TypeProf][PGO]Skip vtable-based ICP for which type profiles are known to be unrepresentative (PR #110575)

Mingming Liu via llvm-commits llvm-commits at lists.llvm.org
Mon Sep 30 14:30:45 PDT 2024


https://github.com/minglotus-6 created https://github.com/llvm/llvm-project/pull/110575

Performance critical core libraries could be highly-optimized for arch or micro-arch features. For instance, the absl crc library specializes different templated classes among different hardwares [1]. In a practical setting, it's likely that instrumented profiles are collected on one type of machine and used to optimize binaries that run on multiple types of hardwares.

While this kind of specialization is rare in terms of lines of code, compiler can do a better job to skip vtable-based ICP.
* The per-class `Extend` implementation is arch-specific as well. If an instrumented profile is collected on one arch and applied to another arch where `Extend` implementation is different, `Extend` might be regarded as unlikely function in the latter case. `ABSL_ATTRIBUTE_HOT` annotation alleviates the problem by putting all `Extend` implementation into the hot text section [2]


[1] https://github.com/abseil/abseil-cpp/blob/c6b27359c3d27438b1313dddd7598914c1274a50/absl/crc/internal/crc_x86_arm_combined.cc#L621-L650
[2] https://github.com/abseil/abseil-cpp/blame/c6b27359c3d27438b1313dddd7598914c1274a50/absl/crc/internal/crc_x86_arm_combined.cc#L370C3-L370C21

>From bc4335458b054f25f9e8e1fd1148375d80725d83 Mon Sep 17 00:00:00 2001
From: mingmingl <mingmingl at google.com>
Date: Mon, 30 Sep 2024 14:16:12 -0700
Subject: [PATCH] [TypeProf][PGO]Support specification of vtables for which
 type profiles are known to be unrepresentative

---
 .../Instrumentation/IndirectCallPromotion.cpp | 46 +++++++++++++++++--
 .../Transforms/PGOProfile/icp_vtable_cmp.ll   |  1 +
 2 files changed, 43 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp b/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp
index fbed593ab3aa74..6031bb8c05fa3f 100644
--- a/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp
+++ b/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp
@@ -132,6 +132,11 @@ static cl::opt<int> ICPMaxNumVTableLastCandidate(
     "icp-max-num-vtable-last-candidate", cl::init(1), cl::Hidden,
     cl::desc("The maximum number of vtable for the last candidate."));
 
+static cl::opt<std::string> ICPKnownUnrepresentativeVTables(
+    "icp-known-unrepresentative-vtables", cl::init(""), cl::Hidden,
+    cl::desc("A comma-separated list of mangled vtable names for which instrumented
+    profiles are not representative. For instance, the instantiated class is arch or micro-arch specific, while instrumented profiles are collected on one arch."));
+
 namespace {
 
 // The key is a vtable global variable, and the value is a map.
@@ -316,6 +321,8 @@ class IndirectCallPromoter {
 
   OptimizationRemarkEmitter &ORE;
 
+  const DenseSet<StringRef> &KnownUnrepresentativeBaseTypes;
+
   // A struct that records the direct target and it's call count.
   struct PromotionCandidate {
     Function *const TargetFunction;
@@ -391,10 +398,12 @@ class IndirectCallPromoter {
       Function &Func, Module &M, InstrProfSymtab *Symtab, bool SamplePGO,
       const VirtualCallSiteTypeInfoMap &VirtualCSInfo,
       VTableAddressPointOffsetValMap &VTableAddressPointOffsetVal,
+      DenseSet<StringRef> &KnownUnrepresentativeTypes,
       OptimizationRemarkEmitter &ORE)
       : F(Func), M(M), Symtab(Symtab), SamplePGO(SamplePGO),
         VirtualCSInfo(VirtualCSInfo),
-        VTableAddressPointOffsetVal(VTableAddressPointOffsetVal), ORE(ORE) {}
+        VTableAddressPointOffsetVal(VTableAddressPointOffsetVal), ORE(ORE),
+        KnownUnrepresentativeBaseTypes(KnownUnrepresentativeTypes) {}
   IndirectCallPromoter(const IndirectCallPromoter &) = delete;
   IndirectCallPromoter &operator=(const IndirectCallPromoter &) = delete;
 
@@ -851,8 +860,26 @@ bool IndirectCallPromoter::isProfitableToCompareVTables(
     LLVM_DEBUG(dbgs() << "\n");
 
     uint64_t CandidateVTableCount = 0;
-    for (auto &[GUID, Count] : VTableGUIDAndCounts)
+    SmallVector<MDNode *, 2> Types;
+    for (auto &[GUID, Count] : VTableGUIDAndCounts) {
       CandidateVTableCount += Count;
+      auto *VTableVar = Symtab->getGlobalVariable(GUID);
+
+      assert(VTableVar &&
+             "VTableVar must exist for GUID in VTableGUIDAndCounts");
+
+      Types.clear();
+      VTableVar->getMetadata(LLVMContext::MD_type, Types);
+
+      for (auto *Type : Types)
+        if (auto *TypeId = dyn_cast<MDString>(Type->getOperand(1).get()))
+          if (KnownUnrepresentativeBaseTypes.contains(TypeId->getString())) {
+            LLVM_DEBUG(dbgs()
+                       << "    vtable profiles are known to be "
+                          "unrepresentative. Bail out vtable comparison.")
+            return false;
+          }
+    }
 
     if (CandidateVTableCount < Candidate.Count * ICPVTablePercentageThreshold) {
       LLVM_DEBUG(
@@ -956,9 +983,19 @@ static bool promoteIndirectCalls(Module &M, ProfileSummaryInfo *PSI, bool InLTO,
   bool Changed = false;
   VirtualCallSiteTypeInfoMap VirtualCSInfo;
 
-  if (EnableVTableProfileUse)
+  DenseSet<StringRef> KnownUnrepresentativeTypeSet;
+
+  if (EnableVTableProfileUse) {
     computeVirtualCallSiteTypeInfoMap(M, MAM, VirtualCSInfo);
 
+    SmallVector<StringRef> KnownUnrepresentativeTypes;
+    llvm::SplitString(ICPKnownUnrepresentativeVTables,
+                      KnownUnrepresentativeTypes);
+
+    for (const StringRef Str : KnownUnrepresentativeTypes)
+      KnownUnrepresentativeTypeSet.insert(Str);
+  }
+
   // VTableAddressPointOffsetVal stores the vtable address points. The vtable
   // address point of a given <vtable, address point offset> is static (doesn't
   // change after being computed once).
@@ -977,7 +1014,8 @@ static bool promoteIndirectCalls(Module &M, ProfileSummaryInfo *PSI, bool InLTO,
     auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
 
     IndirectCallPromoter CallPromoter(F, M, &Symtab, SamplePGO, VirtualCSInfo,
-                                      VTableAddressPointOffsetVal, ORE);
+                                      VTableAddressPointOffsetVal,
+                                      KnownUnrepresentativeTypeSet, ORE);
     bool FuncChanged = CallPromoter.processFunction(PSI);
     if (ICPDUMPAFTER && FuncChanged) {
       LLVM_DEBUG(dbgs() << "\n== IR Dump After =="; F.print(dbgs()));
diff --git a/llvm/test/Transforms/PGOProfile/icp_vtable_cmp.ll b/llvm/test/Transforms/PGOProfile/icp_vtable_cmp.ll
index b6afce3d7c6d5d..bbae25787a05c6 100644
--- a/llvm/test/Transforms/PGOProfile/icp_vtable_cmp.ll
+++ b/llvm/test/Transforms/PGOProfile/icp_vtable_cmp.ll
@@ -2,6 +2,7 @@
 
 ; RUN: opt < %s -passes='pgo-icall-prom' -pass-remarks=pgo-icall-prom -enable-vtable-profile-use -icp-max-num-vtable-last-candidate=2 -S 2>&1 | FileCheck %s --check-prefixes=VTABLE-COMMON,VTABLE-CMP
 ; RUN: opt < %s -passes='pgo-icall-prom' -pass-remarks=pgo-icall-prom -enable-vtable-profile-use -icp-max-num-vtable-last-candidate=1 -S 2>&1 | FileCheck %s --check-prefixes=VTABLE-COMMON,FUNC-CMP
+; RUN: opt < %s -passes='pgo-icall-prom' -pass-remarks=pgo-icall-prom -enable-vtable-profile-use -icp-max-num-vtable-last-candidate=1 -icp-known-unrepresentative-vtables='Base1,Derived3' -S 2>&1 | FileCheck %s --check-prefixes=VTABLE-COMMON,FUNC-CMP
 
 target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"



More information about the llvm-commits mailing list