[llvm] [TypeProf][PGO]Skip vtable-based ICP for which type profiles are known to be unrepresentative (PR #110575)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Sep 30 14:31:23 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-pgo
Author: Mingming Liu (minglotus-6)
<details>
<summary>Changes</summary>
Performance critical core libraries could be highly-optimized for arch or micro-arch features. For instance, the absl crc library specializes different templated classes among different hardwares [1]. In a practical setting, it's likely that instrumented profiles are collected on one type of machine and used to optimize binaries that run on multiple types of hardwares.
While this kind of specialization is rare in terms of lines of code, compiler can do a better job to skip vtable-based ICP.
* The per-class `Extend` implementation is arch-specific as well. If an instrumented profile is collected on one arch and applied to another arch where `Extend` implementation is different, `Extend` might be regarded as unlikely function in the latter case. `ABSL_ATTRIBUTE_HOT` annotation alleviates the problem by putting all `Extend` implementation into the hot text section [2]
[1] https://github.com/abseil/abseil-cpp/blob/c6b27359c3d27438b1313dddd7598914c1274a50/absl/crc/internal/crc_x86_arm_combined.cc#L621-L650
[2] https://github.com/abseil/abseil-cpp/blame/c6b27359c3d27438b1313dddd7598914c1274a50/absl/crc/internal/crc_x86_arm_combined.cc#L370C3-L370C21
---
Full diff: https://github.com/llvm/llvm-project/pull/110575.diff
2 Files Affected:
- (modified) llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp (+42-4)
- (modified) llvm/test/Transforms/PGOProfile/icp_vtable_cmp.ll (+1)
``````````diff
diff --git a/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp b/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp
index fbed593ab3aa74..6031bb8c05fa3f 100644
--- a/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp
+++ b/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp
@@ -132,6 +132,11 @@ static cl::opt<int> ICPMaxNumVTableLastCandidate(
"icp-max-num-vtable-last-candidate", cl::init(1), cl::Hidden,
cl::desc("The maximum number of vtable for the last candidate."));
+static cl::opt<std::string> ICPKnownUnrepresentativeVTables(
+ "icp-known-unrepresentative-vtables", cl::init(""), cl::Hidden,
+ cl::desc("A comma-separated list of mangled vtable names for which instrumented
+ profiles are not representative. For instance, the instantiated class is arch or micro-arch specific, while instrumented profiles are collected on one arch."));
+
namespace {
// The key is a vtable global variable, and the value is a map.
@@ -316,6 +321,8 @@ class IndirectCallPromoter {
OptimizationRemarkEmitter &ORE;
+ const DenseSet<StringRef> &KnownUnrepresentativeBaseTypes;
+
// A struct that records the direct target and it's call count.
struct PromotionCandidate {
Function *const TargetFunction;
@@ -391,10 +398,12 @@ class IndirectCallPromoter {
Function &Func, Module &M, InstrProfSymtab *Symtab, bool SamplePGO,
const VirtualCallSiteTypeInfoMap &VirtualCSInfo,
VTableAddressPointOffsetValMap &VTableAddressPointOffsetVal,
+ DenseSet<StringRef> &KnownUnrepresentativeTypes,
OptimizationRemarkEmitter &ORE)
: F(Func), M(M), Symtab(Symtab), SamplePGO(SamplePGO),
VirtualCSInfo(VirtualCSInfo),
- VTableAddressPointOffsetVal(VTableAddressPointOffsetVal), ORE(ORE) {}
+ VTableAddressPointOffsetVal(VTableAddressPointOffsetVal), ORE(ORE),
+ KnownUnrepresentativeBaseTypes(KnownUnrepresentativeTypes) {}
IndirectCallPromoter(const IndirectCallPromoter &) = delete;
IndirectCallPromoter &operator=(const IndirectCallPromoter &) = delete;
@@ -851,8 +860,26 @@ bool IndirectCallPromoter::isProfitableToCompareVTables(
LLVM_DEBUG(dbgs() << "\n");
uint64_t CandidateVTableCount = 0;
- for (auto &[GUID, Count] : VTableGUIDAndCounts)
+ SmallVector<MDNode *, 2> Types;
+ for (auto &[GUID, Count] : VTableGUIDAndCounts) {
CandidateVTableCount += Count;
+ auto *VTableVar = Symtab->getGlobalVariable(GUID);
+
+ assert(VTableVar &&
+ "VTableVar must exist for GUID in VTableGUIDAndCounts");
+
+ Types.clear();
+ VTableVar->getMetadata(LLVMContext::MD_type, Types);
+
+ for (auto *Type : Types)
+ if (auto *TypeId = dyn_cast<MDString>(Type->getOperand(1).get()))
+ if (KnownUnrepresentativeBaseTypes.contains(TypeId->getString())) {
+ LLVM_DEBUG(dbgs()
+ << " vtable profiles are known to be "
+ "unrepresentative. Bail out vtable comparison.")
+ return false;
+ }
+ }
if (CandidateVTableCount < Candidate.Count * ICPVTablePercentageThreshold) {
LLVM_DEBUG(
@@ -956,9 +983,19 @@ static bool promoteIndirectCalls(Module &M, ProfileSummaryInfo *PSI, bool InLTO,
bool Changed = false;
VirtualCallSiteTypeInfoMap VirtualCSInfo;
- if (EnableVTableProfileUse)
+ DenseSet<StringRef> KnownUnrepresentativeTypeSet;
+
+ if (EnableVTableProfileUse) {
computeVirtualCallSiteTypeInfoMap(M, MAM, VirtualCSInfo);
+ SmallVector<StringRef> KnownUnrepresentativeTypes;
+ llvm::SplitString(ICPKnownUnrepresentativeVTables,
+ KnownUnrepresentativeTypes);
+
+ for (const StringRef Str : KnownUnrepresentativeTypes)
+ KnownUnrepresentativeTypeSet.insert(Str);
+ }
+
// VTableAddressPointOffsetVal stores the vtable address points. The vtable
// address point of a given <vtable, address point offset> is static (doesn't
// change after being computed once).
@@ -977,7 +1014,8 @@ static bool promoteIndirectCalls(Module &M, ProfileSummaryInfo *PSI, bool InLTO,
auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
IndirectCallPromoter CallPromoter(F, M, &Symtab, SamplePGO, VirtualCSInfo,
- VTableAddressPointOffsetVal, ORE);
+ VTableAddressPointOffsetVal,
+ KnownUnrepresentativeTypeSet, ORE);
bool FuncChanged = CallPromoter.processFunction(PSI);
if (ICPDUMPAFTER && FuncChanged) {
LLVM_DEBUG(dbgs() << "\n== IR Dump After =="; F.print(dbgs()));
diff --git a/llvm/test/Transforms/PGOProfile/icp_vtable_cmp.ll b/llvm/test/Transforms/PGOProfile/icp_vtable_cmp.ll
index b6afce3d7c6d5d..bbae25787a05c6 100644
--- a/llvm/test/Transforms/PGOProfile/icp_vtable_cmp.ll
+++ b/llvm/test/Transforms/PGOProfile/icp_vtable_cmp.ll
@@ -2,6 +2,7 @@
; RUN: opt < %s -passes='pgo-icall-prom' -pass-remarks=pgo-icall-prom -enable-vtable-profile-use -icp-max-num-vtable-last-candidate=2 -S 2>&1 | FileCheck %s --check-prefixes=VTABLE-COMMON,VTABLE-CMP
; RUN: opt < %s -passes='pgo-icall-prom' -pass-remarks=pgo-icall-prom -enable-vtable-profile-use -icp-max-num-vtable-last-candidate=1 -S 2>&1 | FileCheck %s --check-prefixes=VTABLE-COMMON,FUNC-CMP
+; RUN: opt < %s -passes='pgo-icall-prom' -pass-remarks=pgo-icall-prom -enable-vtable-profile-use -icp-max-num-vtable-last-candidate=1 -icp-known-unrepresentative-vtables='Base1,Derived3' -S 2>&1 | FileCheck %s --check-prefixes=VTABLE-COMMON,FUNC-CMP
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
``````````
</details>
https://github.com/llvm/llvm-project/pull/110575
More information about the llvm-commits
mailing list