[llvm] [TypeProf][PGO]Skip vtable-based ICP for which type profiles are known to be unrepresentative (PR #110575)
Mingming Liu via llvm-commits
llvm-commits at lists.llvm.org
Mon Sep 30 18:02:24 PDT 2024
https://github.com/minglotus-6 updated https://github.com/llvm/llvm-project/pull/110575
>From bc4335458b054f25f9e8e1fd1148375d80725d83 Mon Sep 17 00:00:00 2001
From: mingmingl <mingmingl at google.com>
Date: Mon, 30 Sep 2024 14:16:12 -0700
Subject: [PATCH 1/2] [TypeProf][PGO]Support specification of vtables for which
type profiles are known to be unrepresentative
---
.../Instrumentation/IndirectCallPromotion.cpp | 46 +++++++++++++++++--
.../Transforms/PGOProfile/icp_vtable_cmp.ll | 1 +
2 files changed, 43 insertions(+), 4 deletions(-)
diff --git a/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp b/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp
index fbed593ab3aa74..6031bb8c05fa3f 100644
--- a/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp
+++ b/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp
@@ -132,6 +132,11 @@ static cl::opt<int> ICPMaxNumVTableLastCandidate(
"icp-max-num-vtable-last-candidate", cl::init(1), cl::Hidden,
cl::desc("The maximum number of vtable for the last candidate."));
+static cl::opt<std::string> ICPKnownUnrepresentativeVTables(
+ "icp-known-unrepresentative-vtables", cl::init(""), cl::Hidden,
+ cl::desc("A comma-separated list of mangled vtable names for which instrumented
+ profiles are not representative. For instance, the instantiated class is arch or micro-arch specific, while instrumented profiles are collected on one arch."));
+
namespace {
// The key is a vtable global variable, and the value is a map.
@@ -316,6 +321,8 @@ class IndirectCallPromoter {
OptimizationRemarkEmitter &ORE;
+ const DenseSet<StringRef> &KnownUnrepresentativeBaseTypes;
+
// A struct that records the direct target and it's call count.
struct PromotionCandidate {
Function *const TargetFunction;
@@ -391,10 +398,12 @@ class IndirectCallPromoter {
Function &Func, Module &M, InstrProfSymtab *Symtab, bool SamplePGO,
const VirtualCallSiteTypeInfoMap &VirtualCSInfo,
VTableAddressPointOffsetValMap &VTableAddressPointOffsetVal,
+ DenseSet<StringRef> &KnownUnrepresentativeTypes,
OptimizationRemarkEmitter &ORE)
: F(Func), M(M), Symtab(Symtab), SamplePGO(SamplePGO),
VirtualCSInfo(VirtualCSInfo),
- VTableAddressPointOffsetVal(VTableAddressPointOffsetVal), ORE(ORE) {}
+ VTableAddressPointOffsetVal(VTableAddressPointOffsetVal), ORE(ORE),
+ KnownUnrepresentativeBaseTypes(KnownUnrepresentativeTypes) {}
IndirectCallPromoter(const IndirectCallPromoter &) = delete;
IndirectCallPromoter &operator=(const IndirectCallPromoter &) = delete;
@@ -851,8 +860,26 @@ bool IndirectCallPromoter::isProfitableToCompareVTables(
LLVM_DEBUG(dbgs() << "\n");
uint64_t CandidateVTableCount = 0;
- for (auto &[GUID, Count] : VTableGUIDAndCounts)
+ SmallVector<MDNode *, 2> Types;
+ for (auto &[GUID, Count] : VTableGUIDAndCounts) {
CandidateVTableCount += Count;
+ auto *VTableVar = Symtab->getGlobalVariable(GUID);
+
+ assert(VTableVar &&
+ "VTableVar must exist for GUID in VTableGUIDAndCounts");
+
+ Types.clear();
+ VTableVar->getMetadata(LLVMContext::MD_type, Types);
+
+ for (auto *Type : Types)
+ if (auto *TypeId = dyn_cast<MDString>(Type->getOperand(1).get()))
+ if (KnownUnrepresentativeBaseTypes.contains(TypeId->getString())) {
+ LLVM_DEBUG(dbgs()
+ << " vtable profiles are known to be "
+ "unrepresentative. Bail out vtable comparison.")
+ return false;
+ }
+ }
if (CandidateVTableCount < Candidate.Count * ICPVTablePercentageThreshold) {
LLVM_DEBUG(
@@ -956,9 +983,19 @@ static bool promoteIndirectCalls(Module &M, ProfileSummaryInfo *PSI, bool InLTO,
bool Changed = false;
VirtualCallSiteTypeInfoMap VirtualCSInfo;
- if (EnableVTableProfileUse)
+ DenseSet<StringRef> KnownUnrepresentativeTypeSet;
+
+ if (EnableVTableProfileUse) {
computeVirtualCallSiteTypeInfoMap(M, MAM, VirtualCSInfo);
+ SmallVector<StringRef> KnownUnrepresentativeTypes;
+ llvm::SplitString(ICPKnownUnrepresentativeVTables,
+ KnownUnrepresentativeTypes);
+
+ for (const StringRef Str : KnownUnrepresentativeTypes)
+ KnownUnrepresentativeTypeSet.insert(Str);
+ }
+
// VTableAddressPointOffsetVal stores the vtable address points. The vtable
// address point of a given <vtable, address point offset> is static (doesn't
// change after being computed once).
@@ -977,7 +1014,8 @@ static bool promoteIndirectCalls(Module &M, ProfileSummaryInfo *PSI, bool InLTO,
auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
IndirectCallPromoter CallPromoter(F, M, &Symtab, SamplePGO, VirtualCSInfo,
- VTableAddressPointOffsetVal, ORE);
+ VTableAddressPointOffsetVal,
+ KnownUnrepresentativeTypeSet, ORE);
bool FuncChanged = CallPromoter.processFunction(PSI);
if (ICPDUMPAFTER && FuncChanged) {
LLVM_DEBUG(dbgs() << "\n== IR Dump After =="; F.print(dbgs()));
diff --git a/llvm/test/Transforms/PGOProfile/icp_vtable_cmp.ll b/llvm/test/Transforms/PGOProfile/icp_vtable_cmp.ll
index b6afce3d7c6d5d..bbae25787a05c6 100644
--- a/llvm/test/Transforms/PGOProfile/icp_vtable_cmp.ll
+++ b/llvm/test/Transforms/PGOProfile/icp_vtable_cmp.ll
@@ -2,6 +2,7 @@
; RUN: opt < %s -passes='pgo-icall-prom' -pass-remarks=pgo-icall-prom -enable-vtable-profile-use -icp-max-num-vtable-last-candidate=2 -S 2>&1 | FileCheck %s --check-prefixes=VTABLE-COMMON,VTABLE-CMP
; RUN: opt < %s -passes='pgo-icall-prom' -pass-remarks=pgo-icall-prom -enable-vtable-profile-use -icp-max-num-vtable-last-candidate=1 -S 2>&1 | FileCheck %s --check-prefixes=VTABLE-COMMON,FUNC-CMP
+; RUN: opt < %s -passes='pgo-icall-prom' -pass-remarks=pgo-icall-prom -enable-vtable-profile-use -icp-max-num-vtable-last-candidate=1 -icp-known-unrepresentative-vtables='Base1,Derived3' -S 2>&1 | FileCheck %s --check-prefixes=VTABLE-COMMON,FUNC-CMP
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
>From 26956410ea50e3b5e82fb6a50dd010495d92d104 Mon Sep 17 00:00:00 2001
From: mingmingl <mingmingl at google.com>
Date: Mon, 30 Sep 2024 17:54:05 -0700
Subject: [PATCH 2/2] resolve review feedback
---
llvm/include/llvm/Support/CommandLine.h | 20 ++++++++++
llvm/lib/Support/CommandLine.cpp | 20 ++++++++++
.../Instrumentation/IndirectCallPromotion.cpp | 37 +++++++------------
.../Transforms/PGOProfile/icp_vtable_cmp.ll | 2 +-
4 files changed, 54 insertions(+), 25 deletions(-)
diff --git a/llvm/include/llvm/Support/CommandLine.h b/llvm/include/llvm/Support/CommandLine.h
index 5d60bb64bbb205..7454db1973ad4f 100644
--- a/llvm/include/llvm/Support/CommandLine.h
+++ b/llvm/include/llvm/Support/CommandLine.h
@@ -1208,6 +1208,26 @@ template <> class parser<char> : public basic_parser<char> {
void anchor() override;
};
+//--------------------------------------------------
+
+extern template class basic_parser<DenseSet<StringRef>>;
+
+template <>
+class parser<DenseSet<StringRef>> : public basic_parser<DenseSet<StringRef>> {
+public:
+ parser(Option &O) : basic_parser(O) {}
+
+ // Return true on error.
+ bool parse(Option &, StringRef, StringRef Arg, DenseSet<StringRef> &Val);
+
+ StringRef getValueName() const override { return "DenseSet<StringRef>"; }
+
+ void printOptionDiff(const Option &O, const DenseSet<StringRef> &V,
+ OptVal Default, size_t GlobalWidth) const;
+
+ void anchor() override;
+};
+
//--------------------------------------------------
// This collection of wrappers is the intermediary between class opt and class
// parser to handle all the template nastiness.
diff --git a/llvm/lib/Support/CommandLine.cpp b/llvm/lib/Support/CommandLine.cpp
index e34a770b1b53e5..7f22c82f6fa9d2 100644
--- a/llvm/lib/Support/CommandLine.cpp
+++ b/llvm/lib/Support/CommandLine.cpp
@@ -66,6 +66,7 @@ template class basic_parser<double>;
template class basic_parser<float>;
template class basic_parser<std::string>;
template class basic_parser<char>;
+template class basic_parser<DenseSet<StringRef>>;
template class opt<unsigned>;
template class opt<int>;
@@ -93,6 +94,7 @@ void parser<double>::anchor() {}
void parser<float>::anchor() {}
void parser<std::string>::anchor() {}
void parser<char>::anchor() {}
+void parser<DenseSet<StringRef>>::anchor() {}
//===----------------------------------------------------------------------===//
@@ -2060,6 +2062,24 @@ bool parser<float>::parse(Option &O, StringRef ArgName, StringRef Arg,
return false;
}
+// parser<DenseSet<StringRef> implementation
+//
+void parser<DenseSet<StringRef>>::printOptionDiff(
+ const Option &O, const DenseSet<StringRef> &V,
+ OptionValue<DenseSet<StringRef>> D, size_t GlobalWidth) const {}
+
+bool parser<DenseSet<StringRef>>::parse(Option &O, StringRef ArgName,
+ StringRef Arg,
+ DenseSet<StringRef> &Val) {
+ SmallVector<StringRef> StrRefs;
+ llvm::SplitString(Arg, StrRefs, ",");
+ for (const StringRef StrRef : StrRefs)
+ if (!Val.insert(StrRef).second)
+ return O.error(Arg + " has duplicated strings!");
+
+ return false;
+}
+
// generic_parser_base implementation
//
diff --git a/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp b/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp
index 6031bb8c05fa3f..8a8baff80eba0e 100644
--- a/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp
+++ b/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp
@@ -132,10 +132,13 @@ static cl::opt<int> ICPMaxNumVTableLastCandidate(
"icp-max-num-vtable-last-candidate", cl::init(1), cl::Hidden,
cl::desc("The maximum number of vtable for the last candidate."));
-static cl::opt<std::string> ICPKnownUnrepresentativeVTables(
- "icp-known-unrepresentative-vtables", cl::init(""), cl::Hidden,
- cl::desc("A comma-separated list of mangled vtable names for which instrumented
- profiles are not representative. For instance, the instantiated class is arch or micro-arch specific, while instrumented profiles are collected on one arch."));
+static cl::opt<DenseSet<StringRef>> ICPIgnoredBaseTypes(
+ "icp-ignored-base-types", cl::Hidden, cl::init(DenseSet<StringRef>()),
+ cl::desc("A comma-separated list of mangled vtable names. Classes "
+ "specified by the vtables and their derived ones will not be "
+ "vtable-ICP'ed. Useful when the profiled types and actual types "
+ "in the optimized binary could be different due to profiling "
+ "limitations."));
namespace {
@@ -321,8 +324,6 @@ class IndirectCallPromoter {
OptimizationRemarkEmitter &ORE;
- const DenseSet<StringRef> &KnownUnrepresentativeBaseTypes;
-
// A struct that records the direct target and it's call count.
struct PromotionCandidate {
Function *const TargetFunction;
@@ -398,12 +399,10 @@ class IndirectCallPromoter {
Function &Func, Module &M, InstrProfSymtab *Symtab, bool SamplePGO,
const VirtualCallSiteTypeInfoMap &VirtualCSInfo,
VTableAddressPointOffsetValMap &VTableAddressPointOffsetVal,
- DenseSet<StringRef> &KnownUnrepresentativeTypes,
OptimizationRemarkEmitter &ORE)
: F(Func), M(M), Symtab(Symtab), SamplePGO(SamplePGO),
VirtualCSInfo(VirtualCSInfo),
- VTableAddressPointOffsetVal(VTableAddressPointOffsetVal), ORE(ORE),
- KnownUnrepresentativeBaseTypes(KnownUnrepresentativeTypes) {}
+ VTableAddressPointOffsetVal(VTableAddressPointOffsetVal), ORE(ORE) {}
IndirectCallPromoter(const IndirectCallPromoter &) = delete;
IndirectCallPromoter &operator=(const IndirectCallPromoter &) = delete;
@@ -871,12 +870,13 @@ bool IndirectCallPromoter::isProfitableToCompareVTables(
Types.clear();
VTableVar->getMetadata(LLVMContext::MD_type, Types);
+ const DenseSet<StringRef> &VTableSet = ICPIgnoredBaseTypes.getValue();
for (auto *Type : Types)
if (auto *TypeId = dyn_cast<MDString>(Type->getOperand(1).get()))
- if (KnownUnrepresentativeBaseTypes.contains(TypeId->getString())) {
+ if (VTableSet.contains(TypeId->getString().str())) {
LLVM_DEBUG(dbgs()
<< " vtable profiles are known to be "
- "unrepresentative. Bail out vtable comparison.")
+ "unrepresentative. Bail out vtable comparison.");
return false;
}
}
@@ -983,19 +983,9 @@ static bool promoteIndirectCalls(Module &M, ProfileSummaryInfo *PSI, bool InLTO,
bool Changed = false;
VirtualCallSiteTypeInfoMap VirtualCSInfo;
- DenseSet<StringRef> KnownUnrepresentativeTypeSet;
-
- if (EnableVTableProfileUse) {
+ if (EnableVTableProfileUse)
computeVirtualCallSiteTypeInfoMap(M, MAM, VirtualCSInfo);
- SmallVector<StringRef> KnownUnrepresentativeTypes;
- llvm::SplitString(ICPKnownUnrepresentativeVTables,
- KnownUnrepresentativeTypes);
-
- for (const StringRef Str : KnownUnrepresentativeTypes)
- KnownUnrepresentativeTypeSet.insert(Str);
- }
-
// VTableAddressPointOffsetVal stores the vtable address points. The vtable
// address point of a given <vtable, address point offset> is static (doesn't
// change after being computed once).
@@ -1014,8 +1004,7 @@ static bool promoteIndirectCalls(Module &M, ProfileSummaryInfo *PSI, bool InLTO,
auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
IndirectCallPromoter CallPromoter(F, M, &Symtab, SamplePGO, VirtualCSInfo,
- VTableAddressPointOffsetVal,
- KnownUnrepresentativeTypeSet, ORE);
+ VTableAddressPointOffsetVal, ORE);
bool FuncChanged = CallPromoter.processFunction(PSI);
if (ICPDUMPAFTER && FuncChanged) {
LLVM_DEBUG(dbgs() << "\n== IR Dump After =="; F.print(dbgs()));
diff --git a/llvm/test/Transforms/PGOProfile/icp_vtable_cmp.ll b/llvm/test/Transforms/PGOProfile/icp_vtable_cmp.ll
index bbae25787a05c6..f3a43a830fa125 100644
--- a/llvm/test/Transforms/PGOProfile/icp_vtable_cmp.ll
+++ b/llvm/test/Transforms/PGOProfile/icp_vtable_cmp.ll
@@ -2,7 +2,7 @@
; RUN: opt < %s -passes='pgo-icall-prom' -pass-remarks=pgo-icall-prom -enable-vtable-profile-use -icp-max-num-vtable-last-candidate=2 -S 2>&1 | FileCheck %s --check-prefixes=VTABLE-COMMON,VTABLE-CMP
; RUN: opt < %s -passes='pgo-icall-prom' -pass-remarks=pgo-icall-prom -enable-vtable-profile-use -icp-max-num-vtable-last-candidate=1 -S 2>&1 | FileCheck %s --check-prefixes=VTABLE-COMMON,FUNC-CMP
-; RUN: opt < %s -passes='pgo-icall-prom' -pass-remarks=pgo-icall-prom -enable-vtable-profile-use -icp-max-num-vtable-last-candidate=1 -icp-known-unrepresentative-vtables='Base1,Derived3' -S 2>&1 | FileCheck %s --check-prefixes=VTABLE-COMMON,FUNC-CMP
+; RUN: opt < %s -passes='pgo-icall-prom' -pass-remarks=pgo-icall-prom -enable-vtable-profile-use -icp-max-num-vtable-last-candidate=1 -icp-ignored-base-types='Base1,Derived3' -S 2>&1 | FileCheck %s --check-prefixes=VTABLE-COMMON,FUNC-CMP
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
More information about the llvm-commits
mailing list