[llvm] 34f0edd - [TypeProf][PGO]Support skipping vtable comparisons for a class and its derived ones (#110575)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Oct 2 10:23:57 PDT 2024
Author: Mingming Liu
Date: 2024-10-02T10:23:54-07:00
New Revision: 34f0edd50992e6d18c80dd901caf1e8220be673b
URL: https://github.com/llvm/llvm-project/commit/34f0edd50992e6d18c80dd901caf1e8220be673b
DIFF: https://github.com/llvm/llvm-project/commit/34f0edd50992e6d18c80dd901caf1e8220be673b.diff
LOG: [TypeProf][PGO]Support skipping vtable comparisons for a class and its derived ones (#110575)
Performance critical core libraries could be highly-optimized for arch
or micro-arch features. For instance, the absl crc library specializes
different templated classes among different hardwares [1]. In a
practical setting, it's likely that instrumented profiles are collected
on one type of machine and used to optimize binaries that run on
multiple types of hardwares.
While this kind of specialization is rare in terms of lines of code,
compiler can do a better job to skip vtable-based ICP.
* The per-class `Extend` implementation is arch-specific as well. If an
instrumented profile is collected on one arch and applied to another
arch where `Extend` implementation is different, `Extend` might be
regarded as unlikely function in the latter case. `ABSL_ATTRIBUTE_HOT`
annotation alleviates the problem by putting all `Extend` implementation
into the hot text section [2]
This change introduces a comma-separated list to specify the mangled
vtable names, and ICP pass will skip vtable-based comparison if a vtable
variable definition is shown to be in its class hierarchy (per LLVM type
metadata).
[1]
https://github.com/abseil/abseil-cpp/blob/c6b27359c3d27438b1313dddd7598914c1274a50/absl/crc/internal/crc_x86_arm_combined.cc#L621-L650
[2]
https://github.com/abseil/abseil-cpp/blame/c6b27359c3d27438b1313dddd7598914c1274a50/absl/crc/internal/crc_x86_arm_combined.cc#L370C3-L370C21
Added:
Modified:
llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp
llvm/test/Transforms/PGOProfile/icp_vtable_cmp.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp b/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp
index 76d60337086146..86637109d94083 100644
--- a/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp
+++ b/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp
@@ -132,6 +132,15 @@ static cl::opt<int> ICPMaxNumVTableLastCandidate(
"icp-max-num-vtable-last-candidate", cl::init(1), cl::Hidden,
cl::desc("The maximum number of vtable for the last candidate."));
+static cl::list<std::string> ICPIgnoredBaseTypes(
+ "icp-ignored-base-types", cl::Hidden,
+ cl::desc(
+ "A list of mangled vtable type info names. Classes specified by the "
+ "type info names and their derived ones will not be vtable-ICP'ed. "
+ "Useful when the profiled types and actual types in the optimized "
+ "binary could be
diff erent due to profiling limitations. Type info "
+ "names are those string literals used in LLVM type metadata"));
+
namespace {
// The key is a vtable global variable, and the value is a map.
@@ -316,6 +325,8 @@ class IndirectCallPromoter {
OptimizationRemarkEmitter &ORE;
+ const DenseSet<StringRef> &IgnoredBaseTypes;
+
// A struct that records the direct target and it's call count.
struct PromotionCandidate {
Function *const TargetFunction;
@@ -366,6 +377,10 @@ class IndirectCallPromoter {
bool isProfitableToCompareVTables(const CallBase &CB,
ArrayRef<PromotionCandidate> Candidates);
+ // Return true if the vtable corresponding to VTableGUID should be skipped
+ // for vtable-based comparison.
+ bool shouldSkipVTable(uint64_t VTableGUID);
+
// Given an indirect callsite and the list of function candidates, compute
// the following vtable information in output parameters and return vtable
// pointer if type profiles exist.
@@ -391,10 +406,12 @@ class IndirectCallPromoter {
Function &Func, Module &M, InstrProfSymtab *Symtab, bool SamplePGO,
const VirtualCallSiteTypeInfoMap &VirtualCSInfo,
VTableAddressPointOffsetValMap &VTableAddressPointOffsetVal,
+ const DenseSet<StringRef> &IgnoredBaseTypes,
OptimizationRemarkEmitter &ORE)
: F(Func), M(M), Symtab(Symtab), SamplePGO(SamplePGO),
VirtualCSInfo(VirtualCSInfo),
- VTableAddressPointOffsetVal(VTableAddressPointOffsetVal), ORE(ORE) {}
+ VTableAddressPointOffsetVal(VTableAddressPointOffsetVal), ORE(ORE),
+ IgnoredBaseTypes(IgnoredBaseTypes) {}
IndirectCallPromoter(const IndirectCallPromoter &) = delete;
IndirectCallPromoter &operator=(const IndirectCallPromoter &) = delete;
@@ -851,9 +868,14 @@ bool IndirectCallPromoter::isProfitableToCompareVTables(
LLVM_DEBUG(dbgs() << "\n");
uint64_t CandidateVTableCount = 0;
- for (auto &[GUID, Count] : VTableGUIDAndCounts)
+
+ for (auto &[GUID, Count] : VTableGUIDAndCounts) {
CandidateVTableCount += Count;
+ if (shouldSkipVTable(GUID))
+ return false;
+ }
+
if (CandidateVTableCount < Candidate.Count * ICPVTablePercentageThreshold) {
LLVM_DEBUG(
dbgs() << " function count " << Candidate.Count
@@ -883,6 +905,27 @@ bool IndirectCallPromoter::isProfitableToCompareVTables(
return true;
}
+bool IndirectCallPromoter::shouldSkipVTable(uint64_t VTableGUID) {
+ if (IgnoredBaseTypes.empty())
+ return false;
+
+ auto *VTableVar = Symtab->getGlobalVariable(VTableGUID);
+
+ assert(VTableVar && "VTableVar must exist for GUID in VTableGUIDAndCounts");
+
+ SmallVector<MDNode *, 2> Types;
+ VTableVar->getMetadata(LLVMContext::MD_type, Types);
+
+ for (auto *Type : Types)
+ if (auto *TypeId = dyn_cast<MDString>(Type->getOperand(1).get()))
+ if (IgnoredBaseTypes.contains(TypeId->getString())) {
+ LLVM_DEBUG(dbgs() << " vtable profiles should be ignored. Bail "
+ "out of vtable comparison.");
+ return true;
+ }
+ return false;
+}
+
// For virtual calls in the module, collect per-callsite information which will
// be used to associate an ICP candidate with a vtable and a specific function
// in the vtable. With type intrinsics (llvm.type.test), we can find virtual
@@ -956,9 +999,15 @@ static bool promoteIndirectCalls(Module &M, ProfileSummaryInfo *PSI, bool InLTO,
bool Changed = false;
VirtualCallSiteTypeInfoMap VirtualCSInfo;
- if (EnableVTableProfileUse)
+ DenseSet<StringRef> IgnoredBaseTypes;
+
+ if (EnableVTableProfileUse) {
computeVirtualCallSiteTypeInfoMap(M, MAM, VirtualCSInfo);
+ for (StringRef Str : ICPIgnoredBaseTypes)
+ IgnoredBaseTypes.insert(Str);
+ }
+
// VTableAddressPointOffsetVal stores the vtable address points. The vtable
// address point of a given <vtable, address point offset> is static (doesn't
// change after being computed once).
@@ -977,7 +1026,8 @@ static bool promoteIndirectCalls(Module &M, ProfileSummaryInfo *PSI, bool InLTO,
auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
IndirectCallPromoter CallPromoter(F, M, &Symtab, SamplePGO, VirtualCSInfo,
- VTableAddressPointOffsetVal, ORE);
+ VTableAddressPointOffsetVal,
+ IgnoredBaseTypes, ORE);
bool FuncChanged = CallPromoter.processFunction(PSI);
if (ICPDUMPAFTER && FuncChanged) {
LLVM_DEBUG(dbgs() << "\n== IR Dump After =="; F.print(dbgs()));
diff --git a/llvm/test/Transforms/PGOProfile/icp_vtable_cmp.ll b/llvm/test/Transforms/PGOProfile/icp_vtable_cmp.ll
index b6afce3d7c6d5d..84bb7a5830af2d 100644
--- a/llvm/test/Transforms/PGOProfile/icp_vtable_cmp.ll
+++ b/llvm/test/Transforms/PGOProfile/icp_vtable_cmp.ll
@@ -1,7 +1,11 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; Tests that ICP compares vtables by checking IR.
; RUN: opt < %s -passes='pgo-icall-prom' -pass-remarks=pgo-icall-prom -enable-vtable-profile-use -icp-max-num-vtable-last-candidate=2 -S 2>&1 | FileCheck %s --check-prefixes=VTABLE-COMMON,VTABLE-CMP
+; Require exactly one vtable candidate for each function candidate. Tests that ICP compares function by checking IR.
; RUN: opt < %s -passes='pgo-icall-prom' -pass-remarks=pgo-icall-prom -enable-vtable-profile-use -icp-max-num-vtable-last-candidate=1 -S 2>&1 | FileCheck %s --check-prefixes=VTABLE-COMMON,FUNC-CMP
+; On top of line 4, ignore 'Base1' and its derived types for vtable-based comparison. Tests that ICP compares functions.
+; RUN: opt < %s -passes='pgo-icall-prom' -pass-remarks=pgo-icall-prom -enable-vtable-profile-use -icp-max-num-vtable-last-candidate=2 -icp-ignored-base-types='Base1' -S 2>&1 | FileCheck %s --check-prefixes=VTABLE-COMMON,FUNC-CMP
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
More information about the llvm-commits
mailing list