[llvm] 1518b26 - [TypeProf][InstrFDO]Implement more efficient comparison sequence for indirect-call-promotion with vtable profiles. (#81442)
via llvm-commits
llvm-commits at lists.llvm.org
Sat Jun 29 23:21:37 PDT 2024
Author: Mingming Liu
Date: 2024-06-29T23:21:33-07:00
New Revision: 1518b260ce2cbd9286365709642dc749e542d683
URL: https://github.com/llvm/llvm-project/commit/1518b260ce2cbd9286365709642dc749e542d683
DIFF: https://github.com/llvm/llvm-project/commit/1518b260ce2cbd9286365709642dc749e542d683.diff
LOG: [TypeProf][InstrFDO]Implement more efficient comparison sequence for indirect-call-promotion with vtable profiles. (#81442)
Clang's `-fwhole-program-vtables` is required for this optimization to
take place. If `-fwhole-program-vtables` is not enabled, this change is
no-op.
* Function-comparison (before):
```
%vtable = load ptr, ptr %obj
%vfn = getelementptr inbounds ptr, ptr %vtable, i64 1
%func = load ptr, ptr %vfn
%cond = icmp eq ptr %func, @callee
br i1 %cond, label bb1, label bb2:
bb1:
call @callee
bb2:
call %func
```
* VTable-comparison (after):
```
%vtable = load ptr, ptr %obj
%cond = icmp eq ptr %vtable, @vtable-address-point
br i1 %cond, label bb1, label bb2:
bb1:
call @callee
bb2:
%vfn = getelementptr inbounds ptr, ptr %vtable, i64 1
%func = load ptr, ptr %vfn
call %func
```
Key changes:
1. Find out virtual calls and the vtables they come from.
- The ICP relies on type intrinsic `llvm.type.test` to find out virtual
calls and the
compatible vtables, and relies on type metadata to find the address
point for comparison.
2. ICP pass does cost-benefit analysis and compares vtable only when the
number of vtables for a function candidate is within (option specified)
threshold.
3. Sink the function addressing and vtable load instruction to indirect
fallback.
- The sink helper functions are simplified versions of
`InstCombinerImpl::tryToSinkInstruction`. Currently debug intrinsics are
not handled. Ideally `InstCombinerImpl::tryToSinkInstructionDbgValues`
and `InstCombinerImpl::tryToSinkInstructionDbgVariableRecords` could be
moved into Transforms/Utils/Local.cpp (or another util cpp file) to
handle debug intrinsics when moving instructions across basic blocks.
4. Keep value profiles updated
1) Update vtable value profiles after inline
2) For either function-based comparison or vtable-based comparison,
update both vtable and indirect call value profiles.
Added:
llvm/test/Transforms/PGOProfile/icp_vtable_cmp.ll
llvm/test/Transforms/PGOProfile/icp_vtable_invoke.ll
llvm/test/Transforms/PGOProfile/icp_vtable_tail_call.ll
Modified:
compiler-rt/test/profile/Linux/instrprof-vtable-value-prof.cpp
llvm/include/llvm/Analysis/IndirectCallPromotionAnalysis.h
llvm/include/llvm/Analysis/IndirectCallVisitor.h
llvm/include/llvm/ProfileData/InstrProf.h
llvm/lib/Analysis/IndirectCallPromotionAnalysis.cpp
llvm/lib/ProfileData/InstrProf.cpp
llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp
llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
llvm/lib/Transforms/Utils/InlineFunction.cpp
llvm/test/Transforms/Inline/update_invoke_prof.ll
llvm/test/Transforms/Inline/update_value_profile.ll
Removed:
################################################################################
diff --git a/compiler-rt/test/profile/Linux/instrprof-vtable-value-prof.cpp b/compiler-rt/test/profile/Linux/instrprof-vtable-value-prof.cpp
index c23b2c77321c6..6cf73c6fdbd73 100644
--- a/compiler-rt/test/profile/Linux/instrprof-vtable-value-prof.cpp
+++ b/compiler-rt/test/profile/Linux/instrprof-vtable-value-prof.cpp
@@ -5,59 +5,61 @@
// ld.lld: error: /lib/../lib64/Scrt1.o: ABI version 1 is not supported
// UNSUPPORTED: ppc && host-byteorder-big-endian
-// RUN: %clangxx_pgogen -fuse-ld=lld -O2 -g -fprofile-generate=. -mllvm -enable-vtable-value-profiling %s -o %t-test
-// RUN: env LLVM_PROFILE_FILE=%t-test.profraw %t-test
+// RUN: rm -rf %t && mkdir %t && cd %t
+
+// RUN: %clangxx_pgogen -fuse-ld=lld -O2 -fprofile-generate=. -mllvm -enable-vtable-value-profiling %s -o test
+// RUN: env LLVM_PROFILE_FILE=test.profraw ./test
// Show vtable profiles from raw profile.
-// RUN: llvm-profdata show --function=main --ic-targets --show-vtables %t-test.profraw | FileCheck %s --check-prefixes=COMMON,RAW
+// RUN: llvm-profdata show --function=main --ic-targets --show-vtables test.profraw | FileCheck %s --check-prefixes=COMMON,RAW
// Generate indexed profile from raw profile and show the data.
-// RUN: llvm-profdata merge --keep-vtable-symbols %t-test.profraw -o %t-test.profdata
-// RUN: llvm-profdata show --function=main --ic-targets --show-vtables %t-test.profdata | FileCheck %s --check-prefixes=COMMON,INDEXED
+// RUN: llvm-profdata merge --keep-vtable-symbols test.profraw -o test.profdata
+// RUN: llvm-profdata show --function=main --ic-targets --show-vtables test.profdata | FileCheck %s --check-prefixes=COMMON,INDEXED
// Generate text profile from raw and indexed profiles respectively and show the data.
-// RUN: llvm-profdata merge --keep-vtable-symbols --text %t-test.profraw -o %t-raw.proftext
-// RUN: llvm-profdata show --function=main --ic-targets --show-vtables --text %t-raw.proftext | FileCheck %s --check-prefix=ICTEXT
-// RUN: llvm-profdata merge --keep-vtable-symbols --text %t-test.profdata -o %t-indexed.proftext
-// RUN: llvm-profdata show --function=main --ic-targets --show-vtables --text %t-indexed.proftext | FileCheck %s --check-prefix=ICTEXT
+// RUN: llvm-profdata merge --keep-vtable-symbols --text test.profraw -o raw.proftext
+// RUN: llvm-profdata show --function=main --ic-targets --show-vtables --text raw.proftext | FileCheck %s --check-prefix=ICTEXT
+// RUN: llvm-profdata merge --keep-vtable-symbols --text test.profdata -o indexed.proftext
+// RUN: llvm-profdata show --function=main --ic-targets --show-vtables --text indexed.proftext | FileCheck %s --check-prefix=ICTEXT
// Generate indexed profile from text profiles and show the data
-// RUN: llvm-profdata merge --keep-vtable-symbols --binary %t-raw.proftext -o %t-text.profraw
-// RUN: llvm-profdata show --function=main --ic-targets --show-vtables %t-text.profraw | FileCheck %s --check-prefixes=COMMON,INDEXED
-// RUN: llvm-profdata merge --keep-vtable-symbols --binary %t-indexed.proftext -o %t-text.profdata
-// RUN: llvm-profdata show --function=main --ic-targets --show-vtables %t-text.profdata | FileCheck %s --check-prefixes=COMMON,INDEXED
+// RUN: llvm-profdata merge --keep-vtable-symbols --binary raw.proftext -o text.profraw
+// RUN: llvm-profdata show --function=main --ic-targets --show-vtables text.profraw | FileCheck %s --check-prefixes=COMMON,INDEXED
+// RUN: llvm-profdata merge --keep-vtable-symbols --binary indexed.proftext -o text.profdata
+// RUN: llvm-profdata show --function=main --ic-targets --show-vtables text.profdata | FileCheck %s --check-prefixes=COMMON,INDEXED
// COMMON: Counters:
// COMMON-NEXT: main:
-// COMMON-NEXT: Hash: 0x0f9a16fe6d398548
-// COMMON-NEXT: Counters: 2
+// COMMON-NEXT: Hash: 0x068617320ec408a0
+// COMMON-NEXT: Counters: 4
// COMMON-NEXT: Indirect Call Site Count: 2
// COMMON-NEXT: Number of instrumented vtables: 2
// RAW: Indirect Target Results:
-// RAW-NEXT: [ 0, _ZN8Derived15func1Eii, 250 ] (25.00%)
-// RAW-NEXT: [ 0, {{.*}}instrprof-vtable-value-prof.cpp;_ZN12_GLOBAL__N_18Derived25func1Eii, 750 ] (75.00%)
-// RAW-NEXT: [ 1, _ZN8Derived15func2Eii, 250 ] (25.00%)
-// RAW-NEXT: [ 1, {{.*}}instrprof-vtable-value-prof.cpp;_ZN12_GLOBAL__N_18Derived25func2Eii, 750 ] (75.00%)
+// RAW-NEXT: [ 0, _ZN8Derived14funcEii, 50 ] (25.00%)
+// RAW-NEXT: [ 0, {{.*}}instrprof-vtable-value-prof.cpp;_ZN12_GLOBAL__N_18Derived24funcEii, 150 ] (75.00%)
+// RAW-NEXT: [ 1, _ZN8Derived1D0Ev, 250 ] (25.00%)
+// RAW-NEXT: [ 1, {{.*}}instrprof-vtable-value-prof.cpp;_ZN12_GLOBAL__N_18Derived2D0Ev, 750 ] (75.00%)
// RAW-NEXT: VTable Results:
-// RAW-NEXT: [ 0, _ZTV8Derived1, 250 ] (25.00%)
-// RAW-NEXT: [ 0, {{.*}}instrprof-vtable-value-prof.cpp;_ZTVN12_GLOBAL__N_18Derived2E, 750 ] (75.00%)
+// RAW-NEXT: [ 0, _ZTV8Derived1, 50 ] (25.00%)
+// RAW-NEXT: [ 0, {{.*}}instrprof-vtable-value-prof.cpp;_ZTVN12_GLOBAL__N_18Derived2E, 150 ] (75.00%)
// RAW-NEXT: [ 1, _ZTV8Derived1, 250 ] (25.00%)
// RAW-NEXT: [ 1, {{.*}}instrprof-vtable-value-prof.cpp;_ZTVN12_GLOBAL__N_18Derived2E, 750 ] (75.00%)
// INDEXED: Indirect Target Results:
-// INDEXED-NEXT: [ 0, {{.*}}instrprof-vtable-value-prof.cpp;_ZN12_GLOBAL__N_18Derived25func1Eii, 750 ] (75.00%)
-// INDEXED-NEXT: [ 0, _ZN8Derived15func1Eii, 250 ] (25.00%)
-// INDEXED-NEXT: [ 1, {{.*}}instrprof-vtable-value-prof.cpp;_ZN12_GLOBAL__N_18Derived25func2Eii, 750 ] (75.00%)
-// INDEXED-NEXT: [ 1, _ZN8Derived15func2Eii, 250 ] (25.00%)
+// INDEXED-NEXT: [ 0, {{.*}}instrprof-vtable-value-prof.cpp;_ZN12_GLOBAL__N_18Derived24funcEii, 150 ] (75.00%)
+// INDEXED-NEXT: [ 0, _ZN8Derived14funcEii, 50 ] (25.00%)
+// INDEXED-NEXT: [ 1, {{.*}}instrprof-vtable-value-prof.cpp;_ZN12_GLOBAL__N_18Derived2D0Ev, 750 ] (75.00%)
+// INDEXED-NEXT: [ 1, _ZN8Derived1D0Ev, 250 ] (25.00%)
// INDEXED-NEXT: VTable Results:
-// INDEXED-NEXT: [ 0, {{.*}}instrprof-vtable-value-prof.cpp;_ZTVN12_GLOBAL__N_18Derived2E, 750 ] (75.00%)
-// INDEXED-NEXT: [ 0, _ZTV8Derived1, 250 ] (25.00%)
+// INDEXED-NEXT: [ 0, {{.*}}instrprof-vtable-value-prof.cpp;_ZTVN12_GLOBAL__N_18Derived2E, 150 ] (75.00%)
+// INDEXED-NEXT: [ 0, _ZTV8Derived1, 50 ] (25.00%)
// INDEXED-NEXT: [ 1, {{.*}}instrprof-vtable-value-prof.cpp;_ZTVN12_GLOBAL__N_18Derived2E, 750 ] (75.00%)
// INDEXED-NEXT: [ 1, _ZTV8Derived1, 250 ] (25.00%)
// COMMON: Instrumentation level: IR entry_first = 0
// COMMON-NEXT: Functions shown: 1
-// COMMON-NEXT: Total functions: 6
+// COMMON-NEXT: Total functions: 7
// COMMON-NEXT: Maximum function count: 1000
-// COMMON-NEXT: Maximum internal block count: 250
+// COMMON-NEXT: Maximum internal block count: 1000
// COMMON-NEXT: Statistics for indirect call sites profile:
// COMMON-NEXT: Total number of sites: 2
// COMMON-NEXT: Total number of sites with values: 2
@@ -76,11 +78,13 @@
// ICTEXT: :ir
// ICTEXT: main
// ICTEXT: # Func Hash:
-// ICTEXT: 1124236338992350536
+// ICTEXT: 470088714870327456
// ICTEXT: # Num Counters:
-// ICTEXT: 2
+// ICTEXT: 4
// ICTEXT: # Counter Values:
// ICTEXT: 1000
+// ICTEXT: 1000
+// ICTEXT: 200
// ICTEXT: 1
// ICTEXT: # Num Value Kinds:
// ICTEXT: 2
@@ -89,41 +93,98 @@
// ICTEXT: # NumValueSites:
// ICTEXT: 2
// ICTEXT: 2
-// ICTEXT: {{.*}}instrprof-vtable-value-prof.cpp;_ZN12_GLOBAL__N_18Derived25func1Eii:750
-// ICTEXT: _ZN8Derived15func1Eii:250
+// ICTEXT: {{.*}}instrprof-vtable-value-prof.cpp;_ZN12_GLOBAL__N_18Derived24funcEii:150
+// ICTEXT: _ZN8Derived14funcEii:50
// ICTEXT: 2
-// ICTEXT: {{.*}}instrprof-vtable-value-prof.cpp;_ZN12_GLOBAL__N_18Derived25func2Eii:750
-// ICTEXT: _ZN8Derived15func2Eii:250
+// ICTEXT: {{.*}}instrprof-vtable-value-prof.cpp;_ZN12_GLOBAL__N_18Derived2D0Ev:750
+// ICTEXT: _ZN8Derived1D0Ev:250
// ICTEXT: # ValueKind = IPVK_VTableTarget:
// ICTEXT: 2
// ICTEXT: # NumValueSites:
// ICTEXT: 2
// ICTEXT: 2
-// ICTEXT: {{.*}}instrprof-vtable-value-prof.cpp;_ZTVN12_GLOBAL__N_18Derived2E:750
-// ICTEXT: _ZTV8Derived1:250
+// ICTEXT: {{.*}}instrprof-vtable-value-prof.cpp;_ZTVN12_GLOBAL__N_18Derived2E:150
+// ICTEXT: _ZTV8Derived1:50
// ICTEXT: 2
// ICTEXT: {{.*}}instrprof-vtable-value-prof.cpp;_ZTVN12_GLOBAL__N_18Derived2E:750
// ICTEXT: _ZTV8Derived1:250
+// Test indirect call promotion transformation using vtable profiles.
+// - Build with `-g` to enable debug information.
+// - In real world settings, ICP pass is disabled in prelink pipeline. In
+// the postlink pipeline, ICP is enabled after whole-program-devirtualization
+// pass. Do the same thing in this test.
+// - Enable `-fwhole-program-vtables` generate type metadata and intrinsics.
+// - Enable `-fno-split-lto-unit` and `-Wl,-lto-whole-program-visibility` to
+// preserve type intrinsics for ICP pass.
+// RUN: %clangxx -m64 -fprofile-use=test.profdata -Wl,--lto-whole-program-visibility \
+// RUN: -mllvm -disable-icp=true -Wl,-mllvm,-disable-icp=false -fuse-ld=lld \
+// RUN: -g -flto=thin -fwhole-program-vtables -fno-split-lto-unit -O2 \
+// RUN: -mllvm -enable-vtable-value-profiling -Wl,-mllvm,-enable-vtable-value-profiling \
+// RUN: -mllvm -enable-vtable-profile-use \
+// RUN: -Wl,-mllvm,-enable-vtable-profile-use -Rpass=pgo-icall-prom \
+// RUN: -Wl,-mllvm,-print-after=pgo-icall-prom \
+// RUN: -Wl,-mllvm,-filter-print-funcs=main %s 2>&1 \
+// RUN: | FileCheck %s --check-prefixes=REMARK,IR --implicit-check-not="!VP"
+
+// For the indirect call site `ptr->func`
+// REMARK: instrprof-vtable-value-prof.cpp:205:19: Promote indirect call to _ZN12_GLOBAL__N_18Derived24funcEii with count 150 out of 200, sink 1 instruction(s) and compare 1 vtable(s): {_ZTVN12_GLOBAL__N_18Derived2E}
+// REMARK: instrprof-vtable-value-prof.cpp:205:19: Promote indirect call to _ZN8Derived14funcEii with count 50 out of 50, sink 1 instruction(s) and compare 1 vtable(s): {_ZTV8Derived1}
+//
+// For the indirect call site `delete ptr`
+// REMARK: instrprof-vtable-value-prof.cpp:207:5: Promote indirect call to _ZN12_GLOBAL__N_18Derived2D0Ev with count 750 out of 1000, sink 2 instruction(s) and compare 1 vtable(s): {_ZTVN12_GLOBAL__N_18Derived2E}
+// REMARK: instrprof-vtable-value-prof.cpp:207:5: Promote indirect call to _ZN8Derived1D0Ev with count 250 out of 250, sink 2 instruction(s) and compare 1 vtable(s): {_ZTV8Derived1}
+
+// The IR matchers for indirect callsite `ptr->func`.
+// IR-LABEL: @main
+// IR: [[OBJ:%.*]] = {{.*}}call {{.*}} @_Z10createTypei
+// IR: [[VTABLE:%.*]] = load ptr, ptr [[OBJ]]
+// IR: [[CMP1:%.*]] = icmp eq ptr [[VTABLE]], getelementptr inbounds (i8, ptr @_ZTVN12_GLOBAL__N_18Derived2E, i32 16)
+// IR: br i1 [[CMP1]], label %[[BB1:.*]], label %[[BB2:[a-zA-Z0-9_.]+]],
+//
+// IR: [[BB1]]:
+// IR: [[RESBB1:%.*]] = {{.*}}call {{.*}} @_ZN12_GLOBAL__N_18Derived24funcEii
+// IR: br label %[[MERGE0:[a-zA-Z0-9_.]+]]
+//
+// IR: [[BB2]]:
+// IR: [[CMP2:%.*]] = icmp eq ptr [[VTABLE]], getelementptr inbounds (i8, ptr @_ZTV8Derived1, i32 16)
+// IR: br i1 [[CMP2]], label %[[BB3:.*]], label %[[BB4:[a-zA-Z0-9_.]+]],
+//
+// IR: [[BB3]]:
+// IR: [[RESBB3:%.*]] = {{.*}}call {{.*}} @_ZN8Derived14funcEii
+// IR: br label %[[MERGE1:[a-zA-Z0-9_.]+]],
+//
+// IR: [[BB4]]:
+// IR: [[FUNCPTR:%.*]] = load ptr, ptr [[VTABLE]]
+// IR: [[RESBB4:%.*]] = {{.*}}call {{.*}} [[FUNCPTR]]
+// IR: br label %[[MERGE1]]
+//
+// IR: [[MERGE1]]:
+// IR: [[RES1:%.*]] = phi i32 [ [[RESBB4]], %[[BB4]] ], [ [[RESBB3]], %[[BB3]] ]
+// IR: br label %[[MERGE0]]
+//
+// IR: [[MERGE0]]:
+// IR: [[RES2:%.*]] = phi i32 [ [[RES1]], %[[MERGE1]] ], [ [[RESBB1]], %[[BB1]] ]
#include <cstdio>
#include <cstdlib>
class Base {
public:
- virtual int func1(int a, int b) = 0;
- virtual int func2(int a, int b) = 0;
+ virtual int func(int a, int b) = 0;
+
+ virtual ~Base() {};
};
class Derived1 : public Base {
public:
- int func1(int a, int b) override { return a + b; }
+ int func(int a, int b) override { return a * b; }
- int func2(int a, int b) override { return a * b; }
+ ~Derived1() {}
};
namespace {
class Derived2 : public Base {
public:
- int func1(int a, int b) override { return a - b; }
+ int func(int a, int b) override { return a * (a - b); }
- int func2(int a, int b) override { return a * (a - b); }
+ ~Derived2() {}
};
} // namespace
__attribute__((noinline)) Base *createType(int a) {
@@ -140,7 +201,10 @@ int main(int argc, char **argv) {
int a = rand();
int b = rand();
Base *ptr = createType(i);
- sum += ptr->func1(a, b) + ptr->func2(b, a);
+ if (i % 5 == 0)
+ sum += ptr->func(b, a);
+
+ delete ptr;
}
printf("sum is %d\n", sum);
return 0;
diff --git a/llvm/include/llvm/Analysis/IndirectCallPromotionAnalysis.h b/llvm/include/llvm/Analysis/IndirectCallPromotionAnalysis.h
index e0e8a7cda9369..9c2be12fce2fb 100644
--- a/llvm/include/llvm/Analysis/IndirectCallPromotionAnalysis.h
+++ b/llvm/include/llvm/Analysis/IndirectCallPromotionAnalysis.h
@@ -57,7 +57,7 @@ class ICallPromotionAnalysis {
///
/// The returned array space is owned by this class, and overwritten on
/// subsequent calls.
- ArrayRef<InstrProfValueData> getPromotionCandidatesForInstruction(
+ MutableArrayRef<InstrProfValueData> getPromotionCandidatesForInstruction(
const Instruction *I, uint64_t &TotalCount, uint32_t &NumCandidates);
};
diff --git a/llvm/include/llvm/Analysis/IndirectCallVisitor.h b/llvm/include/llvm/Analysis/IndirectCallVisitor.h
index 66c972572b06c..6c424038070dc 100644
--- a/llvm/include/llvm/Analysis/IndirectCallVisitor.h
+++ b/llvm/include/llvm/Analysis/IndirectCallVisitor.h
@@ -37,8 +37,10 @@ struct PGOIndirectCallVisitor : public InstVisitor<PGOIndirectCallVisitor> {
// A heuristic is used to find the address feeding instructions.
static Instruction *tryGetVTableInstruction(CallBase *CB) {
assert(CB != nullptr && "Caller guaranteed");
- LoadInst *LI = dyn_cast<LoadInst>(CB->getCalledOperand());
+ if (!CB->isIndirectCall())
+ return nullptr;
+ LoadInst *LI = dyn_cast<LoadInst>(CB->getCalledOperand());
if (LI != nullptr) {
Value *FuncPtr = LI->getPointerOperand(); // GEP (or bitcast)
Value *VTablePtr = FuncPtr->stripInBoundsConstantOffsets();
diff --git a/llvm/include/llvm/ProfileData/InstrProf.h b/llvm/include/llvm/ProfileData/InstrProf.h
index 7fa6d44990a14..50e6f1d3b9b1f 100644
--- a/llvm/include/llvm/ProfileData/InstrProf.h
+++ b/llvm/include/llvm/ProfileData/InstrProf.h
@@ -294,6 +294,8 @@ getValueProfDataFromInst(const Instruction &Inst, InstrProfValueKind ValueKind,
uint32_t MaxNumValueData, uint32_t &ActualNumValueData,
uint64_t &TotalC, bool GetNoICPValue = false);
+// TODO: Unify metadata name 'PGOFuncName' and 'PGOName', by supporting read
+// of this metadata for backward compatibility and generating 'PGOName' only.
/// Extract the value profile data from \p Inst and returns them if \p Inst is
/// annotated with value profile data. Returns an empty vector otherwise.
SmallVector<InstrProfValueData, 4>
@@ -303,6 +305,8 @@ getValueProfDataFromInst(const Instruction &Inst, InstrProfValueKind ValueKind,
inline StringRef getPGOFuncNameMetadataName() { return "PGOFuncName"; }
+inline StringRef getPGONameMetadataName() { return "PGOName"; }
+
/// Return the PGOFuncName meta data associated with a function.
MDNode *getPGOFuncNameMetadata(const Function &F);
@@ -311,8 +315,14 @@ std::string getPGOName(const GlobalVariable &V, bool InLTO = false);
/// Create the PGOFuncName meta data if PGOFuncName is
diff erent from
/// function's raw name. This should only apply to internal linkage functions
/// declared by users only.
+/// TODO: Update all callers to 'createPGONameMetadata' and deprecate this
+/// function.
void createPGOFuncNameMetadata(Function &F, StringRef PGOFuncName);
+/// Create the PGOName metadata if a global object's PGO name is
diff erent from
+/// its mangled name. This should apply to local-linkage global objects only.
+void createPGONameMetadata(GlobalObject &GO, StringRef PGOName);
+
/// Check if we can use Comdat for profile variables. This will eliminate
/// the duplicated profile variables for Comdat functions.
bool needsComdatForCounter(const GlobalObject &GV, const Module &M);
diff --git a/llvm/lib/Analysis/IndirectCallPromotionAnalysis.cpp b/llvm/lib/Analysis/IndirectCallPromotionAnalysis.cpp
index a71ab23a30902..f43666f0037b6 100644
--- a/llvm/lib/Analysis/IndirectCallPromotionAnalysis.cpp
+++ b/llvm/lib/Analysis/IndirectCallPromotionAnalysis.cpp
@@ -87,7 +87,7 @@ uint32_t ICallPromotionAnalysis::getProfitablePromotionCandidates(
return I;
}
-ArrayRef<InstrProfValueData>
+MutableArrayRef<InstrProfValueData>
ICallPromotionAnalysis::getPromotionCandidatesForInstruction(
const Instruction *I, uint64_t &TotalCount, uint32_t &NumCandidates) {
uint32_t NumVals;
@@ -95,9 +95,9 @@ ICallPromotionAnalysis::getPromotionCandidatesForInstruction(
MaxNumPromotions, NumVals, TotalCount);
if (!Res) {
NumCandidates = 0;
- return ArrayRef<InstrProfValueData>();
+ return MutableArrayRef<InstrProfValueData>();
}
ValueDataArray = std::move(Res);
NumCandidates = getProfitablePromotionCandidates(I, NumVals, TotalCount);
- return ArrayRef<InstrProfValueData>(ValueDataArray.get(), NumVals);
+ return MutableArrayRef<InstrProfValueData>(ValueDataArray.get(), NumVals);
}
diff --git a/llvm/lib/ProfileData/InstrProf.cpp b/llvm/lib/ProfileData/InstrProf.cpp
index c7749f33d9af5..9dbaa2ca0f020 100644
--- a/llvm/lib/ProfileData/InstrProf.cpp
+++ b/llvm/lib/ProfileData/InstrProf.cpp
@@ -228,6 +228,12 @@ cl::opt<bool> EnableVTableValueProfiling(
"the types of a C++ pointer. The information is used in indirect "
"call promotion to do selective vtable-based comparison."));
+cl::opt<bool> EnableVTableProfileUse(
+ "enable-vtable-profile-use", cl::init(false),
+ cl::desc("If ThinLTO and WPD is enabled and this option is true, vtable "
+ "profiles will be used by ICP pass for more efficient indirect "
+ "call sequence. If false, type profiles won't be used."));
+
std::string getInstrProfSectionName(InstrProfSectKind IPSK,
Triple::ObjectFormatType OF,
bool AddSegmentInfo) {
@@ -391,7 +397,7 @@ std::string getPGOName(const GlobalVariable &V, bool InLTO) {
// PGONameMetadata should be set by compiler at profile use time
// and read by symtab creation to look up symbols corresponding to
// a MD5 hash.
- return getIRPGOObjectName(V, InLTO, /*PGONameMetadata=*/nullptr);
+ return getIRPGOObjectName(V, InLTO, V.getMetadata(getPGONameMetadataName()));
}
// See getIRPGOObjectName() for a discription of the format.
@@ -480,8 +486,7 @@ Error InstrProfSymtab::create(Module &M, bool InLTO) {
for (GlobalVariable &G : M.globals()) {
if (!G.hasName() || !G.hasMetadata(LLVMContext::MD_type))
continue;
- if (Error E = addVTableWithName(
- G, getIRPGOObjectName(G, InLTO, /* PGONameMetadata */ nullptr)))
+ if (Error E = addVTableWithName(G, getPGOName(G, InLTO)))
return E;
}
@@ -1425,16 +1430,28 @@ MDNode *getPGOFuncNameMetadata(const Function &F) {
return F.getMetadata(getPGOFuncNameMetadataName());
}
-void createPGOFuncNameMetadata(Function &F, StringRef PGOFuncName) {
- // Only for internal linkage functions.
- if (PGOFuncName == F.getName())
- return;
- // Don't create duplicated meta-data.
- if (getPGOFuncNameMetadata(F))
+static void createPGONameMetadata(GlobalObject &GO, StringRef MetadataName,
+ StringRef PGOName) {
+ // Only for internal linkage functions or global variables. The name is not
+ // the same as PGO name for these global objects.
+ if (GO.getName() == PGOName)
return;
- LLVMContext &C = F.getContext();
- MDNode *N = MDNode::get(C, MDString::get(C, PGOFuncName));
- F.setMetadata(getPGOFuncNameMetadataName(), N);
+
+ // Don't create duplicated metadata.
+ if (GO.getMetadata(MetadataName))
+ return;
+
+ LLVMContext &C = GO.getContext();
+ MDNode *N = MDNode::get(C, MDString::get(C, PGOName));
+ GO.setMetadata(MetadataName, N);
+}
+
+void createPGOFuncNameMetadata(Function &F, StringRef PGOFuncName) {
+ return createPGONameMetadata(F, getPGOFuncNameMetadataName(), PGOFuncName);
+}
+
+void createPGONameMetadata(GlobalObject &GO, StringRef PGOName) {
+ return createPGONameMetadata(GO, getPGONameMetadataName(), PGOName);
}
bool needsComdatForCounter(const GlobalObject &GO, const Module &M) {
diff --git a/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp b/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp
index fe9eaae9ac7ea..68f4544b82e1b 100644
--- a/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp
+++ b/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp
@@ -13,13 +13,16 @@
//===----------------------------------------------------------------------===//
#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Analysis/IndirectCallPromotionAnalysis.h"
#include "llvm/Analysis/IndirectCallVisitor.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/ProfileSummaryInfo.h"
+#include "llvm/Analysis/TypeMetadataUtils.h"
#include "llvm/IR/DiagnosticInfo.h"
+#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instructions.h"
@@ -40,6 +43,7 @@
#include <cassert>
#include <cstdint>
#include <memory>
+#include <set>
#include <string>
#include <utility>
#include <vector>
@@ -51,6 +55,12 @@ using namespace llvm;
STATISTIC(NumOfPGOICallPromotion, "Number of indirect call promotions.");
STATISTIC(NumOfPGOICallsites, "Number of indirect call candidate sites.");
+extern cl::opt<unsigned> MaxNumVTableAnnotations;
+
+namespace llvm {
+extern cl::opt<bool> EnableVTableProfileUse;
+}
+
// Command line option to disable indirect-call promotion with the default as
// false. This is for debug purpose.
static cl::opt<bool> DisableICP("disable-icp", cl::init(false), cl::Hidden,
@@ -103,13 +113,196 @@ static cl::opt<bool>
ICPDUMPAFTER("icp-dumpafter", cl::init(false), cl::Hidden,
cl::desc("Dump IR after transformation happens"));
+// Indirect call promotion pass will fall back to function-based comparison if
+// vtable-count / function-count is smaller than this threshold.
+static cl::opt<float> ICPVTablePercentageThreshold(
+ "icp-vtable-percentage-threshold", cl::init(0.99), cl::Hidden,
+ cl::desc("The percentage threshold of vtable-count / function-count for "
+ "cost-benefit analysis."));
+
+// Although comparing vtables can save a vtable load, we may need to compare
+// vtable pointer with multiple vtable address points due to class inheritance.
+// Comparing with multiple vtables inserts additional instructions on hot code
+// path, and doing so for an earlier candidate delays the comparisons for later
+// candidates. For the last candidate, only the fallback path is affected.
+// We allow multiple vtable comparison for the last function candidate and use
+// the option below to cap the number of vtables.
+static cl::opt<int> ICPMaxNumVTableLastCandidate(
+ "icp-max-num-vtable-last-candidate", cl::init(1), cl::Hidden,
+ cl::desc("The maximum number of vtable for the last candidate."));
+
namespace {
+// The key is a vtable global variable, and the value is a map.
+// In the inner map, the key represents address point offsets and the value is a
+// constant for this address point.
+using VTableAddressPointOffsetValMap =
+ SmallDenseMap<const GlobalVariable *, std::unordered_map<int, Constant *>>;
+
+// A struct to collect type information for a virtual call site.
+struct VirtualCallSiteInfo {
+ // The offset from the address point to virtual function in the vtable.
+ uint64_t FunctionOffset;
+ // The instruction that computes the address point of vtable.
+ Instruction *VPtr;
+ // The compatible type used in LLVM type intrinsics.
+ StringRef CompatibleTypeStr;
+};
+
+// The key is a virtual call, and value is its type information.
+using VirtualCallSiteTypeInfoMap =
+ SmallDenseMap<const CallBase *, VirtualCallSiteInfo>;
+
+// The key is vtable GUID, and value is its value profile count.
+using VTableGUIDCountsMap = SmallDenseMap<uint64_t, uint64_t, 16>;
+
+// Return the address point offset of the given compatible type.
+//
+// Type metadata of a vtable specifies the types that can contain a pointer to
+// this vtable, for example, `Base*` can be a pointer to an derived type
+// but not vice versa. See also https://llvm.org/docs/TypeMetadata.html
+static std::optional<uint64_t>
+getAddressPointOffset(const GlobalVariable &VTableVar,
+ StringRef CompatibleType) {
+ SmallVector<MDNode *> Types;
+ VTableVar.getMetadata(LLVMContext::MD_type, Types);
+
+ for (MDNode *Type : Types)
+ if (auto *TypeId = dyn_cast<MDString>(Type->getOperand(1).get());
+ TypeId && TypeId->getString() == CompatibleType)
+ return cast<ConstantInt>(
+ cast<ConstantAsMetadata>(Type->getOperand(0))->getValue())
+ ->getZExtValue();
+
+ return std::nullopt;
+}
+
+// Return a constant representing the vtable's address point specified by the
+// offset.
+static Constant *getVTableAddressPointOffset(GlobalVariable *VTable,
+ uint32_t AddressPointOffset) {
+ Module &M = *VTable->getParent();
+ LLVMContext &Context = M.getContext();
+ assert(AddressPointOffset <
+ M.getDataLayout().getTypeAllocSize(VTable->getValueType()) &&
+ "Out-of-bound access");
+
+ return ConstantExpr::getInBoundsGetElementPtr(
+ Type::getInt8Ty(Context), VTable,
+ llvm::ConstantInt::get(Type::getInt32Ty(Context), AddressPointOffset));
+}
+
+// Return the basic block in which Use `U` is used via its `UserInst`.
+static BasicBlock *getUserBasicBlock(Use &U, Instruction *UserInst) {
+ if (PHINode *PN = dyn_cast<PHINode>(UserInst))
+ return PN->getIncomingBlock(U);
+
+ return UserInst->getParent();
+}
+
+// `DestBB` is a suitable basic block to sink `Inst` into when `Inst` have users
+// and all users are in `DestBB`. The caller guarantees that `Inst->getParent()`
+// is the sole predecessor of `DestBB` and `DestBB` is dominated by
+// `Inst->getParent()`.
+static bool isDestBBSuitableForSink(Instruction *Inst, BasicBlock *DestBB) {
+ // 'BB' is used only by assert.
+ [[maybe_unused]] BasicBlock *BB = Inst->getParent();
+
+ assert(BB != DestBB && BB->getTerminator()->getNumSuccessors() == 2 &&
+ DestBB->getUniquePredecessor() == BB &&
+ "Guaranteed by ICP transformation");
+
+ BasicBlock *UserBB = nullptr;
+ for (Use &Use : Inst->uses()) {
+ User *User = Use.getUser();
+ // Do checked cast since IR verifier guarantees that the user of an
+ // instruction must be an instruction. See `Verifier::visitInstruction`.
+ Instruction *UserInst = cast<Instruction>(User);
+ // We can sink debug or pseudo instructions together with Inst.
+ if (UserInst->isDebugOrPseudoInst())
+ continue;
+ UserBB = getUserBasicBlock(Use, UserInst);
+ // Do not sink if Inst is used in a basic block that is not DestBB.
+ // TODO: Sink to the common dominator of all user blocks.
+ if (UserBB != DestBB)
+ return false;
+ }
+ return UserBB != nullptr;
+}
+
+// For the virtual call dispatch sequence, try to sink vtable load instructions
+// to the cold indirect call fallback.
+// FIXME: Move the sink eligibility check below to a utility function in
+// Transforms/Utils/ directory.
+static bool tryToSinkInstruction(Instruction *I, BasicBlock *DestBlock) {
+ if (!isDestBBSuitableForSink(I, DestBlock))
+ return false;
+
+ // Do not move control-flow-involving, volatile loads, vaarg, alloca
+ // instructions, etc.
+ if (isa<PHINode>(I) || I->isEHPad() || I->mayThrow() || !I->willReturn() ||
+ isa<AllocaInst>(I))
+ return false;
+
+ // Do not sink convergent call instructions.
+ if (const auto *C = dyn_cast<CallBase>(I))
+ if (C->isInlineAsm() || C->cannotMerge() || C->isConvergent())
+ return false;
+
+ // Do not move an instruction that may write to memory.
+ if (I->mayWriteToMemory())
+ return false;
+
+ // We can only sink load instructions if there is nothing between the load and
+ // the end of block that could change the value.
+ if (I->mayReadFromMemory()) {
+ // We already know that SrcBlock is the unique predecessor of DestBlock.
+ for (BasicBlock::iterator Scan = std::next(I->getIterator()),
+ E = I->getParent()->end();
+ Scan != E; ++Scan) {
+ // Note analysis analysis can tell whether two pointers can point to the
+ // same object in memory or not thereby find further opportunities to
+ // sink.
+ if (Scan->mayWriteToMemory())
+ return false;
+ }
+ }
+
+ BasicBlock::iterator InsertPos = DestBlock->getFirstInsertionPt();
+ I->moveBefore(*DestBlock, InsertPos);
+
+ // TODO: Sink debug intrinsic users of I to 'DestBlock'.
+ // 'InstCombinerImpl::tryToSinkInstructionDbgValues' and
+ // 'InstCombinerImpl::tryToSinkInstructionDbgVariableRecords' already have
+ // the core logic to do this.
+ return true;
+}
+
+// Try to sink instructions after VPtr to the indirect call fallback.
+// Return the number of sunk IR instructions.
+static int tryToSinkInstructions(BasicBlock *OriginalBB,
+ BasicBlock *IndirectCallBB) {
+ int SinkCount = 0;
+ // Do not sink across a critical edge for simplicity.
+ if (IndirectCallBB->getUniquePredecessor() != OriginalBB)
+ return SinkCount;
+ // Sink all eligible instructions in OriginalBB in reverse order.
+ for (Instruction &I :
+ llvm::make_early_inc_range(llvm::drop_begin(llvm::reverse(*OriginalBB))))
+ if (tryToSinkInstruction(&I, IndirectCallBB))
+ SinkCount++;
+
+ return SinkCount;
+}
+
// Promote indirect calls to conditional direct calls, keeping track of
// thresholds.
class IndirectCallPromoter {
private:
Function &F;
+ Module &M;
+
+ ProfileSummaryInfo *PSI = nullptr;
// Symtab that maps indirect call profile values to function names and
// defines.
@@ -117,6 +310,11 @@ class IndirectCallPromoter {
const bool SamplePGO;
+ // A map from a virtual call to its type information.
+ const VirtualCallSiteTypeInfoMap &VirtualCSInfo;
+
+ VTableAddressPointOffsetValMap &VTableAddressPointOffsetVal;
+
OptimizationRemarkEmitter &ORE;
// A struct that records the direct target and it's call count.
@@ -124,6 +322,16 @@ class IndirectCallPromoter {
Function *const TargetFunction;
const uint64_t Count;
+ // The following fields only exists for promotion candidates with vtable
+ // information.
+ //
+ // Due to class inheritance, one virtual call candidate can come from
+ // multiple vtables. `VTableGUIDAndCounts` tracks the vtable GUIDs and
+ // counts for 'TargetFunction'. `AddressPoints` stores the vtable address
+ // points for comparison.
+ VTableGUIDCountsMap VTableGUIDAndCounts;
+ SmallVector<Constant *> AddressPoints;
+
PromotionCandidate(Function *F, uint64_t C) : TargetFunction(F), Count(C) {}
};
@@ -137,18 +345,60 @@ class IndirectCallPromoter {
uint64_t TotalCount, uint32_t NumCandidates);
// Promote a list of targets for one indirect-call callsite by comparing
- // indirect callee with functions. Returns true if there are IR
+ // indirect callee with functions. Return true if there are IR
// transformations and false otherwise.
- bool tryToPromoteWithFuncCmp(CallBase &CB,
+ bool tryToPromoteWithFuncCmp(CallBase &CB, Instruction *VPtr,
ArrayRef<PromotionCandidate> Candidates,
uint64_t TotalCount,
ArrayRef<InstrProfValueData> ICallProfDataRef,
- uint32_t NumCandidates);
+ uint32_t NumCandidates,
+ VTableGUIDCountsMap &VTableGUIDCounts);
+
+ // Promote a list of targets for one indirect call by comparing vtables with
+ // functions. Return true if there are IR transformations and false
+ // otherwise.
+ bool tryToPromoteWithVTableCmp(
+ CallBase &CB, Instruction *VPtr,
+ const std::vector<PromotionCandidate> &Candidates,
+ uint64_t TotalFuncCount, uint32_t NumCandidates,
+ MutableArrayRef<InstrProfValueData> ICallProfDataRef,
+ VTableGUIDCountsMap &VTableGUIDCounts);
+
+ // Return true if it's profitable to compare vtables for the callsite.
+ bool isProfitableToCompareVTables(
+ const CallBase &CB, const std::vector<PromotionCandidate> &Candidates,
+ uint64_t TotalCount);
+
+ // Given an indirect callsite and the list of function candidates, compute
+ // the following vtable information in output parameters and return vtable
+ // pointer if type profiles exist.
+ // - Populate `VTableGUIDCounts` with <vtable-guid, count> using !prof
+ // metadata attached on the vtable pointer.
+ // - For each function candidate, finds out the vtables from which it gets
+ // called and stores the <vtable-guid, count> in promotion candidate.
+ Instruction *computeVTableInfos(const CallBase *CB,
+ VTableGUIDCountsMap &VTableGUIDCounts,
+ std::vector<PromotionCandidate> &Candidates);
+
+ Constant *getOrCreateVTableAddressPointVar(GlobalVariable *GV,
+ uint64_t AddressPointOffset);
+
+ void updateFuncValueProfiles(CallBase &CB, ArrayRef<InstrProfValueData> VDs,
+ uint64_t Sum, uint32_t MaxMDCount);
+
+ void updateVPtrValueProfiles(Instruction *VPtr,
+ VTableGUIDCountsMap &VTableGUIDCounts);
public:
- IndirectCallPromoter(Function &Func, InstrProfSymtab *Symtab, bool SamplePGO,
- OptimizationRemarkEmitter &ORE)
- : F(Func), Symtab(Symtab), SamplePGO(SamplePGO), ORE(ORE) {}
+ IndirectCallPromoter(
+ Function &Func, Module &M, ProfileSummaryInfo *PSI,
+ InstrProfSymtab *Symtab, bool SamplePGO,
+ const VirtualCallSiteTypeInfoMap &VirtualCSInfo,
+ VTableAddressPointOffsetValMap &VTableAddressPointOffsetVal,
+ OptimizationRemarkEmitter &ORE)
+ : F(Func), M(M), PSI(PSI), Symtab(Symtab), SamplePGO(SamplePGO),
+ VirtualCSInfo(VirtualCSInfo),
+ VTableAddressPointOffsetVal(VTableAddressPointOffsetVal), ORE(ORE) {}
IndirectCallPromoter(const IndirectCallPromoter &) = delete;
IndirectCallPromoter &operator=(const IndirectCallPromoter &) = delete;
@@ -244,25 +494,127 @@ IndirectCallPromoter::getPromotionCandidatesForCallSite(
return Ret;
}
+Constant *IndirectCallPromoter::getOrCreateVTableAddressPointVar(
+ GlobalVariable *GV, uint64_t AddressPointOffset) {
+ auto [Iter, Inserted] =
+ VTableAddressPointOffsetVal[GV].try_emplace(AddressPointOffset, nullptr);
+ if (Inserted)
+ Iter->second = getVTableAddressPointOffset(GV, AddressPointOffset);
+ return Iter->second;
+}
+
+Instruction *IndirectCallPromoter::computeVTableInfos(
+ const CallBase *CB, VTableGUIDCountsMap &GUIDCountsMap,
+ std::vector<PromotionCandidate> &Candidates) {
+ if (!EnableVTableProfileUse)
+ return nullptr;
+
+ // Take the following code sequence as an example, here is how the code works
+ // @vtable1 = {[n x ptr] [... ptr @func1]}
+ // @vtable2 = {[m x ptr] [... ptr @func2]}
+ //
+ // %vptr = load ptr, ptr %d, !prof !0
+ // %0 = tail call i1 @llvm.type.test(ptr %vptr, metadata !"vtable1")
+ // tail call void @llvm.assume(i1 %0)
+ // %vfn = getelementptr inbounds ptr, ptr %vptr, i64 1
+ // %1 = load ptr, ptr %vfn
+ // call void %1(ptr %d), !prof !1
+ //
+ // !0 = !{!"VP", i32 2, i64 100, i64 123, i64 50, i64 456, i64 50}
+ // !1 = !{!"VP", i32 0, i64 100, i64 789, i64 50, i64 579, i64 50}
+ //
+ // Step 1. Find out the %vptr instruction for indirect call and use its !prof
+ // to populate `GUIDCountsMap`.
+ // Step 2. For each vtable-guid, look up its definition from symtab. LTO can
+ // make vtable definitions visible across modules.
+ // Step 3. Compute the byte offset of the virtual call, by adding vtable
+ // address point offset and function's offset relative to vtable address
+ // point. For each function candidate, this step tells us the vtable from
+ // which it comes from, and the vtable address point to compare %vptr with.
+
+ // Only virtual calls have virtual call site info.
+ auto Iter = VirtualCSInfo.find(CB);
+ if (Iter == VirtualCSInfo.end())
+ return nullptr;
+
+ LLVM_DEBUG(dbgs() << "\nComputing vtable infos for callsite #"
+ << NumOfPGOICallsites << "\n");
+
+ const auto &VirtualCallInfo = Iter->second;
+ Instruction *VPtr = VirtualCallInfo.VPtr;
+
+ SmallDenseMap<Function *, int, 4> CalleeIndexMap;
+ for (size_t I = 0; I < Candidates.size(); I++)
+ CalleeIndexMap[Candidates[I].TargetFunction] = I;
+
+ uint32_t ActualNumValueData = 0;
+ uint64_t TotalVTableCount = 0;
+ auto VTableValueDataArray = getValueProfDataFromInst(
+ *VirtualCallInfo.VPtr, IPVK_VTableTarget, MaxNumVTableAnnotations,
+ ActualNumValueData, TotalVTableCount);
+ if (VTableValueDataArray.get() == nullptr)
+ return VPtr;
+
+ // Compute the functions and counts from by each vtable.
+ for (size_t j = 0; j < ActualNumValueData; j++) {
+ uint64_t VTableVal = VTableValueDataArray[j].Value;
+ GUIDCountsMap[VTableVal] = VTableValueDataArray[j].Count;
+ GlobalVariable *VTableVar = Symtab->getGlobalVariable(VTableVal);
+ if (!VTableVar) {
+ LLVM_DEBUG(dbgs() << " Cannot find vtable definition for " << VTableVal
+ << "; maybe the vtable isn't imported\n");
+ continue;
+ }
+
+ std::optional<uint64_t> MaybeAddressPointOffset =
+ getAddressPointOffset(*VTableVar, VirtualCallInfo.CompatibleTypeStr);
+ if (!MaybeAddressPointOffset)
+ continue;
+
+ const uint64_t AddressPointOffset = *MaybeAddressPointOffset;
+
+ Function *Callee = nullptr;
+ std::tie(Callee, std::ignore) = getFunctionAtVTableOffset(
+ VTableVar, AddressPointOffset + VirtualCallInfo.FunctionOffset, M);
+ if (!Callee)
+ continue;
+ auto CalleeIndexIter = CalleeIndexMap.find(Callee);
+ if (CalleeIndexIter == CalleeIndexMap.end())
+ continue;
+
+ auto &Candidate = Candidates[CalleeIndexIter->second];
+ // There shouldn't be duplicate GUIDs in one !prof metadata (except
+ // duplicated zeros), so assign counters directly won't cause overwrite or
+ // counter loss.
+ Candidate.VTableGUIDAndCounts[VTableVal] = VTableValueDataArray[j].Count;
+ Candidate.AddressPoints.push_back(
+ getOrCreateVTableAddressPointVar(VTableVar, AddressPointOffset));
+ }
+
+ return VPtr;
+}
+
+// Creates 'branch_weights' prof metadata using TrueWeight and FalseWeight.
+// Scales uint64_t counters down to uint32_t if necessary to prevent overflow.
+static MDNode *createBranchWeights(LLVMContext &Context, uint64_t TrueWeight,
+ uint64_t FalseWeight) {
+ MDBuilder MDB(Context);
+ uint64_t Scale = calculateCountScale(std::max(TrueWeight, FalseWeight));
+ return MDB.createBranchWeights(scaleBranchCount(TrueWeight, Scale),
+ scaleBranchCount(FalseWeight, Scale));
+}
+
CallBase &llvm::pgo::promoteIndirectCall(CallBase &CB, Function *DirectCallee,
uint64_t Count, uint64_t TotalCount,
bool AttachProfToDirectCall,
OptimizationRemarkEmitter *ORE) {
+ CallBase &NewInst = promoteCallWithIfThenElse(
+ CB, DirectCallee,
+ createBranchWeights(CB.getContext(), Count, TotalCount - Count));
- uint64_t ElseCount = TotalCount - Count;
- uint64_t MaxCount = (Count >= ElseCount ? Count : ElseCount);
- uint64_t Scale = calculateCountScale(MaxCount);
- MDBuilder MDB(CB.getContext());
- MDNode *BranchWeights = MDB.createBranchWeights(
- scaleBranchCount(Count, Scale), scaleBranchCount(ElseCount, Scale));
-
- CallBase &NewInst =
- promoteCallWithIfThenElse(CB, DirectCallee, BranchWeights);
-
- if (AttachProfToDirectCall) {
+ if (AttachProfToDirectCall)
setBranchWeights(NewInst, {static_cast<uint32_t>(Count)},
/*IsExpected=*/false);
- }
using namespace ore;
@@ -278,34 +630,175 @@ CallBase &llvm::pgo::promoteIndirectCall(CallBase &CB, Function *DirectCallee,
// Promote indirect-call to conditional direct-call for one callsite.
bool IndirectCallPromoter::tryToPromoteWithFuncCmp(
- CallBase &CB, ArrayRef<PromotionCandidate> Candidates, uint64_t TotalCount,
- ArrayRef<InstrProfValueData> ICallProfDataRef, uint32_t NumCandidates) {
+ CallBase &CB, Instruction *VPtr, ArrayRef<PromotionCandidate> Candidates,
+ uint64_t TotalCount, ArrayRef<InstrProfValueData> ICallProfDataRef,
+ uint32_t NumCandidates, VTableGUIDCountsMap &VTableGUIDCounts) {
uint32_t NumPromoted = 0;
for (const auto &C : Candidates) {
- uint64_t Count = C.Count;
- pgo::promoteIndirectCall(CB, C.TargetFunction, Count, TotalCount, SamplePGO,
- &ORE);
- assert(TotalCount >= Count);
- TotalCount -= Count;
+ uint64_t FuncCount = C.Count;
+ pgo::promoteIndirectCall(CB, C.TargetFunction, FuncCount, TotalCount,
+ SamplePGO, &ORE);
+ assert(TotalCount >= FuncCount);
+ TotalCount -= FuncCount;
NumOfPGOICallPromotion++;
NumPromoted++;
- }
+ if (!EnableVTableProfileUse || C.VTableGUIDAndCounts.empty())
+ continue;
+
+ // After a virtual call candidate gets promoted, update the vtable's counts
+ // proportionally. Each vtable-guid in `C.VTableGUIDAndCounts` represents
+ // a vtable from which the virtual call is loaded. Compute the sum and use
+ // 128-bit APInt to improve accuracy.
+ uint64_t SumVTableCount = 0;
+ for (const auto &[GUID, VTableCount] : C.VTableGUIDAndCounts)
+ SumVTableCount += VTableCount;
+
+ for (const auto &[GUID, VTableCount] : C.VTableGUIDAndCounts) {
+ APInt APFuncCount((unsigned)128, FuncCount, false /*signed*/);
+ APFuncCount *= VTableCount;
+ VTableGUIDCounts[GUID] -= APFuncCount.udiv(SumVTableCount).getZExtValue();
+ }
+ }
if (NumPromoted == 0)
return false;
- // Adjust the MD.prof metadata. First delete the old one.
- CB.setMetadata(LLVMContext::MD_prof, nullptr);
-
assert(NumPromoted <= ICallProfDataRef.size() &&
"Number of promoted functions should not be greater than the number "
"of values in profile metadata");
+
+ // Update value profiles on the indirect call.
+ updateFuncValueProfiles(CB, ICallProfDataRef.slice(NumPromoted), TotalCount,
+ NumCandidates);
+ updateVPtrValueProfiles(VPtr, VTableGUIDCounts);
+ return true;
+}
+
+void IndirectCallPromoter::updateFuncValueProfiles(
+ CallBase &CB, ArrayRef<InstrProfValueData> CallVDs, uint64_t TotalCount,
+ uint32_t MaxMDCount) {
+ // First clear the existing !prof.
+ CB.setMetadata(LLVMContext::MD_prof, nullptr);
// Annotate the remaining value profiles if counter is not zero.
if (TotalCount != 0)
- annotateValueSite(*F.getParent(), CB, ICallProfDataRef.slice(NumPromoted),
- TotalCount, IPVK_IndirectCallTarget, NumCandidates);
+ annotateValueSite(M, CB, CallVDs, TotalCount, IPVK_IndirectCallTarget,
+ MaxMDCount);
+}
+
+void IndirectCallPromoter::updateVPtrValueProfiles(
+ Instruction *VPtr, VTableGUIDCountsMap &VTableGUIDCounts) {
+ if (!EnableVTableProfileUse || VPtr == nullptr ||
+ !VPtr->getMetadata(LLVMContext::MD_prof))
+ return;
+ VPtr->setMetadata(LLVMContext::MD_prof, nullptr);
+ std::vector<InstrProfValueData> VTableValueProfiles;
+ uint64_t TotalVTableCount = 0;
+ for (auto [GUID, Count] : VTableGUIDCounts) {
+ if (Count == 0)
+ continue;
+
+ VTableValueProfiles.push_back({GUID, Count});
+ TotalVTableCount += Count;
+ }
+ llvm::sort(VTableValueProfiles,
+ [](const InstrProfValueData &LHS, const InstrProfValueData &RHS) {
+ return LHS.Count > RHS.Count;
+ });
+
+ annotateValueSite(M, *VPtr, VTableValueProfiles, TotalVTableCount,
+ IPVK_VTableTarget, VTableValueProfiles.size());
+}
+
+bool IndirectCallPromoter::tryToPromoteWithVTableCmp(
+ CallBase &CB, Instruction *VPtr,
+ const std::vector<PromotionCandidate> &Candidates, uint64_t TotalFuncCount,
+ uint32_t NumCandidates,
+ MutableArrayRef<InstrProfValueData> ICallProfDataRef,
+ VTableGUIDCountsMap &VTableGUIDCounts) {
+ SmallVector<uint64_t, 4> PromotedFuncCount;
+
+ for (const auto &Candidate : Candidates) {
+ for (auto &[GUID, Count] : Candidate.VTableGUIDAndCounts)
+ VTableGUIDCounts[GUID] -= Count;
+
+ // 'OriginalBB' is the basic block of indirect call. After each candidate
+ // is promoted, a new basic block is created for the indirect fallback basic
+ // block and indirect call `CB` is moved into this new BB.
+ BasicBlock *OriginalBB = CB.getParent();
+ promoteCallWithVTableCmp(
+ CB, VPtr, Candidate.TargetFunction, Candidate.AddressPoints,
+ createBranchWeights(CB.getContext(), Candidate.Count,
+ TotalFuncCount - Candidate.Count));
+
+ int SinkCount = tryToSinkInstructions(OriginalBB, CB.getParent());
+
+ ORE.emit([&]() {
+ OptimizationRemark Remark(DEBUG_TYPE, "Promoted", &CB);
+
+ const auto &VTableGUIDAndCounts = Candidate.VTableGUIDAndCounts;
+ Remark << "Promote indirect call to "
+ << ore::NV("DirectCallee", Candidate.TargetFunction)
+ << " with count " << ore::NV("Count", Candidate.Count)
+ << " out of " << ore::NV("TotalCount", TotalFuncCount) << ", sink "
+ << ore::NV("SinkCount", SinkCount)
+ << " instruction(s) and compare "
+ << ore::NV("VTable", VTableGUIDAndCounts.size())
+ << " vtable(s): {";
+
+ // Sort GUIDs so remark message is deterministic.
+ std::set<uint64_t> GUIDSet;
+ for (auto [GUID, Count] : VTableGUIDAndCounts)
+ GUIDSet.insert(GUID);
+ for (auto Iter = GUIDSet.begin(); Iter != GUIDSet.end(); Iter++) {
+ if (Iter != GUIDSet.begin())
+ Remark << ", ";
+ Remark << ore::NV("VTable", Symtab->getGlobalVariable(*Iter));
+ }
+
+ Remark << "}";
+
+ return Remark;
+ });
+
+ PromotedFuncCount.push_back(Candidate.Count);
+
+ assert(TotalFuncCount >= Candidate.Count &&
+ "Within one prof metadata, total count is the sum of counts from "
+ "individual <target, count> pairs");
+ // Use std::min since 'TotalFuncCount' is the saturated sum of individual
+ // counts, see
+ // https://github.com/llvm/llvm-project/blob/abedb3b8356d5d56f1c575c4f7682fba2cb19787/llvm/lib/ProfileData/InstrProf.cpp#L1281-L1288
+ TotalFuncCount -= std::min(TotalFuncCount, Candidate.Count);
+ NumOfPGOICallPromotion++;
+ }
+ if (PromotedFuncCount.empty())
+ return false;
+
+ // Update value profiles for 'CB' and 'VPtr', assuming that each 'CB' has a
+ // a distinct 'VPtr'.
+ // FIXME: When Clang `-fstrict-vtable-pointers` is enabled, a vtable might be
+ // used to load multiple virtual functions. The vtable profiles needs to be
+ // updated properly in that case (e.g, for each indirect call annotate both
+ // type profiles and function profiles in one !prof).
+ for (size_t I = 0; I < PromotedFuncCount.size(); I++)
+ ICallProfDataRef[I].Count -=
+ std::max(PromotedFuncCount[I], ICallProfDataRef[I].Count);
+ // Sort value profiles by count in descending order.
+ llvm::stable_sort(ICallProfDataRef, [](const InstrProfValueData &LHS,
+ const InstrProfValueData &RHS) {
+ return LHS.Count > RHS.Count;
+ });
+ // Drop the <target-value, count> pair if count is zero.
+ ArrayRef<InstrProfValueData> VDs(
+ ICallProfDataRef.begin(),
+ llvm::upper_bound(ICallProfDataRef, 0U,
+ [](uint64_t Count, const InstrProfValueData &ProfData) {
+ return ProfData.Count <= Count;
+ }));
+ updateFuncValueProfiles(CB, VDs, TotalFuncCount, NumCandidates);
+ updateVPtrValueProfiles(VPtr, VTableGUIDCounts);
return true;
}
@@ -322,14 +815,151 @@ bool IndirectCallPromoter::processFunction(ProfileSummaryInfo *PSI) {
if (!NumCandidates ||
(PSI && PSI->hasProfileSummary() && !PSI->isHotCount(TotalCount)))
continue;
+
auto PromotionCandidates = getPromotionCandidatesForCallSite(
*CB, ICallProfDataRef, TotalCount, NumCandidates);
- Changed |= tryToPromoteWithFuncCmp(*CB, PromotionCandidates, TotalCount,
- ICallProfDataRef, NumCandidates);
+
+ VTableGUIDCountsMap VTableGUIDCounts;
+ Instruction *VPtr =
+ computeVTableInfos(CB, VTableGUIDCounts, PromotionCandidates);
+
+ if (isProfitableToCompareVTables(*CB, PromotionCandidates, TotalCount))
+ Changed |= tryToPromoteWithVTableCmp(*CB, VPtr, PromotionCandidates,
+ TotalCount, NumCandidates,
+ ICallProfDataRef, VTableGUIDCounts);
+ else
+ Changed |= tryToPromoteWithFuncCmp(*CB, VPtr, PromotionCandidates,
+ TotalCount, ICallProfDataRef,
+ NumCandidates, VTableGUIDCounts);
}
return Changed;
}
+// TODO: Return false if the function addressing and vtable load instructions
+// cannot sink to indirect fallback.
+bool IndirectCallPromoter::isProfitableToCompareVTables(
+ const CallBase &CB, const std::vector<PromotionCandidate> &Candidates,
+ uint64_t TotalCount) {
+ if (!EnableVTableProfileUse || Candidates.empty())
+ return false;
+ LLVM_DEBUG(dbgs() << "\nEvaluating vtable profitability for callsite #"
+ << NumOfPGOICallsites << CB << "\n");
+ uint64_t RemainingVTableCount = TotalCount;
+ const size_t CandidateSize = Candidates.size();
+ for (size_t I = 0; I < CandidateSize; I++) {
+ auto &Candidate = Candidates[I];
+ auto &VTableGUIDAndCounts = Candidate.VTableGUIDAndCounts;
+
+ LLVM_DEBUG(dbgs() << " Candidate " << I << " FunctionCount: "
+ << Candidate.Count << ", VTableCounts:");
+ // Add [[maybe_unused]] since <GUID, Count> are only used by LLVM_DEBUG.
+ for ([[maybe_unused]] auto &[GUID, Count] : VTableGUIDAndCounts)
+ LLVM_DEBUG(dbgs() << " {" << Symtab->getGlobalVariable(GUID)->getName()
+ << ", " << Count << "}");
+ LLVM_DEBUG(dbgs() << "\n");
+
+ uint64_t CandidateVTableCount = 0;
+ for (auto &[GUID, Count] : VTableGUIDAndCounts)
+ CandidateVTableCount += Count;
+
+ if (CandidateVTableCount < Candidate.Count * ICPVTablePercentageThreshold) {
+ LLVM_DEBUG(
+ dbgs() << " function count " << Candidate.Count
+ << " and its vtable sum count " << CandidateVTableCount
+ << " have discrepancies. Bail out vtable comparison.\n");
+ return false;
+ }
+
+ RemainingVTableCount -= Candidate.Count;
+
+ // 'MaxNumVTable' limits the number of vtables to make vtable comparison
+ // profitable. Comparing multiple vtables for one function candidate will
+ // insert additional instructions on the hot path, and allowing more than
+ // one vtable for non last candidates may or may not elongate the dependency
+ // chain for the subsequent candidates. Set its value to 1 for non-last
+ // candidate and allow option to override it for the last candidate.
+ int MaxNumVTable = 1;
+ if (I == CandidateSize - 1)
+ MaxNumVTable = ICPMaxNumVTableLastCandidate;
+
+ if ((int)Candidate.AddressPoints.size() > MaxNumVTable) {
+ LLVM_DEBUG(dbgs() << " allow at most " << MaxNumVTable << " and got "
+ << Candidate.AddressPoints.size()
+ << " vtables. Bail out for vtable comparison.\n");
+ return false;
+ }
+ }
+
+ // If the indirect fallback is not cold, don't compare vtables.
+ if (PSI && PSI->hasProfileSummary() &&
+ !PSI->isColdCount(RemainingVTableCount)) {
+ LLVM_DEBUG(dbgs() << " Indirect fallback basic block is not cold. Bail "
+ "out for vtable comparison.\n");
+ return false;
+ }
+
+ return true;
+}
+
+// For virtual calls in the module, collect per-callsite information which will
+// be used to associate an ICP candidate with a vtable and a specific function
+// in the vtable. With type intrinsics (llvm.type.test), we can find virtual
+// calls in a compile-time efficient manner (by iterating its users) and more
+// importantly use the compatible type later to figure out the function byte
+// offset relative to the start of vtables.
+static void
+computeVirtualCallSiteTypeInfoMap(Module &M, ModuleAnalysisManager &MAM,
+ VirtualCallSiteTypeInfoMap &VirtualCSInfo) {
+ // Right now only llvm.type.test is used to find out virtual call sites.
+ // With ThinLTO and whole-program-devirtualization, llvm.type.test and
+ // llvm.public.type.test are emitted, and llvm.public.type.test is either
+ // refined to llvm.type.test or dropped before indirect-call-promotion pass.
+ //
+ // FIXME: For fullLTO with VFE, `llvm.type.checked.load intrinsic` is emitted.
+ // Find out virtual calls by looking at users of llvm.type.checked.load in
+ // that case.
+ Function *TypeTestFunc =
+ M.getFunction(Intrinsic::getName(Intrinsic::type_test));
+ if (!TypeTestFunc || TypeTestFunc->use_empty())
+ return;
+
+ auto &FAM = MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
+ auto LookupDomTree = [&FAM](Function &F) -> DominatorTree & {
+ return FAM.getResult<DominatorTreeAnalysis>(F);
+ };
+ // Iterate all type.test calls to find all indirect calls.
+ for (Use &U : llvm::make_early_inc_range(TypeTestFunc->uses())) {
+ auto *CI = dyn_cast<CallInst>(U.getUser());
+ if (!CI)
+ continue;
+ auto *TypeMDVal = cast<MetadataAsValue>(CI->getArgOperand(1));
+ if (!TypeMDVal)
+ continue;
+ auto *CompatibleTypeId = dyn_cast<MDString>(TypeMDVal->getMetadata());
+ if (!CompatibleTypeId)
+ continue;
+
+ // Find out all devirtualizable call sites given a llvm.type.test
+ // intrinsic call.
+ SmallVector<DevirtCallSite, 1> DevirtCalls;
+ SmallVector<CallInst *, 1> Assumes;
+ auto &DT = LookupDomTree(*CI->getFunction());
+ findDevirtualizableCallsForTypeTest(DevirtCalls, Assumes, CI, DT);
+
+ for (auto &DevirtCall : DevirtCalls) {
+ CallBase &CB = DevirtCall.CB;
+ // Given an indirect call, try find the instruction which loads a
+ // pointer to virtual table.
+ Instruction *VTablePtr =
+ PGOIndirectCallVisitor::tryGetVTableInstruction(&CB);
+ if (!VTablePtr)
+ continue;
+ VirtualCSInfo[&CB] = {DevirtCall.Offset, VTablePtr,
+ CompatibleTypeId->getString()};
+ }
+ }
+}
+
// A wrapper function that does the actual work.
static bool promoteIndirectCalls(Module &M, ProfileSummaryInfo *PSI, bool InLTO,
bool SamplePGO, ModuleAnalysisManager &MAM) {
@@ -342,6 +972,20 @@ static bool promoteIndirectCalls(Module &M, ProfileSummaryInfo *PSI, bool InLTO,
return false;
}
bool Changed = false;
+ VirtualCallSiteTypeInfoMap VirtualCSInfo;
+
+ if (EnableVTableProfileUse)
+ computeVirtualCallSiteTypeInfoMap(M, MAM, VirtualCSInfo);
+
+ // VTableAddressPointOffsetVal stores the vtable address points. The vtable
+ // address point of a given <vtable, address point offset> is static (doesn't
+ // change after being computed once).
+ // IndirectCallPromoter::getOrCreateVTableAddressPointVar creates the map
+ // entry the first time a <vtable, offset> pair is seen, as
+ // promoteIndirectCalls processes an IR module and calls IndirectCallPromoter
+ // repeatedly on each function.
+ VTableAddressPointOffsetValMap VTableAddressPointOffsetVal;
+
for (auto &F : M) {
if (F.isDeclaration() || F.hasOptNone())
continue;
@@ -350,7 +994,9 @@ static bool promoteIndirectCalls(Module &M, ProfileSummaryInfo *PSI, bool InLTO,
MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
- IndirectCallPromoter CallPromoter(F, &Symtab, SamplePGO, ORE);
+ IndirectCallPromoter CallPromoter(F, M, PSI, &Symtab, SamplePGO,
+ VirtualCSInfo,
+ VTableAddressPointOffsetVal, ORE);
bool FuncChanged = CallPromoter.processFunction(PSI);
if (ICPDUMPAFTER && FuncChanged) {
LLVM_DEBUG(dbgs() << "\n== IR Dump After =="; F.print(dbgs()));
diff --git a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
index 572d37a2b3e55..d10b58b17f42f 100644
--- a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
+++ b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
@@ -320,6 +320,8 @@ static cl::opt<unsigned> PGOFunctionCriticalEdgeThreshold(
cl::desc("Do not instrument functions with the number of critical edges "
" greater than this threshold."));
+extern cl::opt<unsigned> MaxNumVTableAnnotations;
+
namespace llvm {
// Command line option to turn on CFG dot dump after profile annotation.
// Defined in Analysis/BlockFrequencyInfo.cpp: -pgo-view-counts
@@ -332,6 +334,7 @@ extern cl::opt<std::string> ViewBlockFreqFuncName;
// Command line option to enable vtable value profiling. Defined in
// ProfileData/InstrProf.cpp: -enable-vtable-value-profiling=
extern cl::opt<bool> EnableVTableValueProfiling;
+extern cl::opt<bool> EnableVTableProfileUse;
extern cl::opt<InstrProfCorrelator::ProfCorrelatorKind> ProfileCorrelate;
} // namespace llvm
@@ -1728,6 +1731,14 @@ void SelectInstVisitor::visitSelectInst(SelectInst &SI) {
llvm_unreachable("Unknown visiting mode");
}
+static uint32_t getMaxNumAnnotations(InstrProfValueKind ValueProfKind) {
+ if (ValueProfKind == IPVK_MemOPSize)
+ return MaxNumMemOPAnnotations;
+ if (ValueProfKind == llvm::IPVK_VTableTarget)
+ return MaxNumVTableAnnotations;
+ return MaxNumAnnotations;
+}
+
// Traverse all valuesites and annotate the instructions for all value kind.
void PGOUseFunc::annotateValueSites() {
if (isValueProfilingDisabled())
@@ -1762,10 +1773,10 @@ void PGOUseFunc::annotateValueSites(uint32_t Kind) {
LLVM_DEBUG(dbgs() << "Read one value site profile (kind = " << Kind
<< "): Index = " << ValueSiteIndex << " out of "
<< NumValueSites << "\n");
- annotateValueSite(*M, *I.AnnotatedInst, ProfileRecord,
- static_cast<InstrProfValueKind>(Kind), ValueSiteIndex,
- Kind == IPVK_MemOPSize ? MaxNumMemOPAnnotations
- : MaxNumAnnotations);
+ annotateValueSite(
+ *M, *I.AnnotatedInst, ProfileRecord,
+ static_cast<InstrProfValueKind>(Kind), ValueSiteIndex,
+ getMaxNumAnnotations(static_cast<InstrProfValueKind>(Kind)));
ValueSiteIndex++;
}
}
@@ -2054,6 +2065,16 @@ static bool annotateAllFunctions(
return false;
}
+ if (EnableVTableProfileUse) {
+ for (GlobalVariable &G : M.globals()) {
+ if (!G.hasName() || !G.hasMetadata(LLVMContext::MD_type))
+ continue;
+
+ // Create the PGOFuncName meta data.
+ createPGONameMetadata(G, getPGOName(G, false /* InLTO*/));
+ }
+ }
+
// Add the profile summary (read from the header of the indexed summary) here
// so that we can use it below when reading counters (which checks if the
// function should be marked with a cold or inlinehint attribute).
@@ -2229,7 +2250,6 @@ PreservedAnalyses PGOInstrumentationUse::run(Module &M,
};
auto *PSI = &MAM.getResult<ProfileSummaryAnalysis>(M);
-
if (!annotateAllFunctions(M, ProfileFileName, ProfileRemappingFileName, *FS,
LookupTLI, LookupBPI, LookupBFI, PSI, IsCS))
return PreservedAnalyses::all();
diff --git a/llvm/lib/Transforms/Utils/InlineFunction.cpp b/llvm/lib/Transforms/Utils/InlineFunction.cpp
index f2130e4c286aa..0725addfbb90a 100644
--- a/llvm/lib/Transforms/Utils/InlineFunction.cpp
+++ b/llvm/lib/Transforms/Utils/InlineFunction.cpp
@@ -23,6 +23,7 @@
#include "llvm/Analysis/BlockFrequencyInfo.h"
#include "llvm/Analysis/CallGraph.h"
#include "llvm/Analysis/CaptureTracking.h"
+#include "llvm/Analysis/IndirectCallVisitor.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/MemoryProfileInfo.h"
#include "llvm/Analysis/ObjCARCAnalysisUtils.h"
@@ -56,6 +57,7 @@
#include "llvm/IR/MDBuilder.h"
#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
+#include "llvm/IR/ProfDataUtils.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
@@ -1976,16 +1978,28 @@ void llvm::updateProfileCallee(
? 0
: PriorEntryCount + EntryDelta;
+ auto updateVTableProfWeight = [](CallBase *CB, const uint64_t NewEntryCount,
+ const uint64_t PriorEntryCount) {
+ Instruction *VPtr = PGOIndirectCallVisitor::tryGetVTableInstruction(CB);
+ if (VPtr)
+ scaleProfData(*VPtr, NewEntryCount, PriorEntryCount);
+ };
+
// During inlining ?
if (VMap) {
uint64_t CloneEntryCount = PriorEntryCount - NewEntryCount;
for (auto Entry : *VMap) {
if (isa<CallInst>(Entry.first))
- if (auto *CI = dyn_cast_or_null<CallInst>(Entry.second))
+ if (auto *CI = dyn_cast_or_null<CallInst>(Entry.second)) {
CI->updateProfWeight(CloneEntryCount, PriorEntryCount);
+ updateVTableProfWeight(CI, CloneEntryCount, PriorEntryCount);
+ }
+
if (isa<InvokeInst>(Entry.first))
- if (auto *II = dyn_cast_or_null<InvokeInst>(Entry.second))
+ if (auto *II = dyn_cast_or_null<InvokeInst>(Entry.second)) {
II->updateProfWeight(CloneEntryCount, PriorEntryCount);
+ updateVTableProfWeight(II, CloneEntryCount, PriorEntryCount);
+ }
}
}
@@ -1996,10 +2010,14 @@ void llvm::updateProfileCallee(
// No need to update the callsite if it is pruned during inlining.
if (!VMap || VMap->count(&BB))
for (Instruction &I : BB) {
- if (CallInst *CI = dyn_cast<CallInst>(&I))
+ if (CallInst *CI = dyn_cast<CallInst>(&I)) {
CI->updateProfWeight(NewEntryCount, PriorEntryCount);
- if (InvokeInst *II = dyn_cast<InvokeInst>(&I))
+ updateVTableProfWeight(CI, NewEntryCount, PriorEntryCount);
+ }
+ if (InvokeInst *II = dyn_cast<InvokeInst>(&I)) {
II->updateProfWeight(NewEntryCount, PriorEntryCount);
+ updateVTableProfWeight(II, NewEntryCount, PriorEntryCount);
+ }
}
}
}
diff --git a/llvm/test/Transforms/Inline/update_invoke_prof.ll b/llvm/test/Transforms/Inline/update_invoke_prof.ll
index f6b86dfe5bb1b..12eb7dbf418c5 100644
--- a/llvm/test/Transforms/Inline/update_invoke_prof.ll
+++ b/llvm/test/Transforms/Inline/update_invoke_prof.ll
@@ -1,6 +1,7 @@
-; Test that branch weights and value profiles associated with invoke are updated
-; in both caller and callee after inline, but invoke instructions with taken or
-; not taken branch probabilities are not updated.
+; Tests that instructions with value profiles and count-type branch weights are
+; updated in both caller and callee after inline, but invoke instructions with
+; taken or not taken branch probabilities are not updated.
+
; RUN: opt < %s -passes='require<profile-summary>,cgscc(inline)' -S | FileCheck %s
declare i32 @__gxx_personality_v0(...)
@@ -15,21 +16,23 @@ declare void @callee1(ptr %func)
declare void @callee2(ptr %func)
-define void @callee(ptr %func) personality ptr @__gxx_personality_v0 !prof !17 {
+define void @callee(ptr %obj) personality ptr @__gxx_personality_v0 !prof !17 {
+ %vtable = load ptr, ptr %obj, !prof !21
+ %func = load ptr, ptr %vtable
invoke void %func()
- to label %next unwind label %lpad, !prof !18
+ to label %next unwind label %lpad, !prof !18
next:
invoke void @callee1(ptr %func)
- to label %cont unwind label %lpad, !prof !19
+ to label %cont unwind label %lpad, !prof !19
cont:
invoke void @callee2(ptr %func)
- to label %ret unwind label %lpad, !prof !20
+ to label %ret unwind label %lpad, !prof !20
lpad:
%exn = landingpad {ptr, i32}
- cleanup
+ cleanup
unreachable
ret:
@@ -57,26 +60,41 @@ ret:
!18 = !{!"VP", i32 0, i64 1500, i64 123, i64 900, i64 456, i64 600}
!19 = !{!"branch_weights", i32 1500}
!20 = !{!"branch_weights", i32 1234, i32 5678}
+!21 = !{!"VP", i32 2, i64 1500, i64 789, i64 900, i64 321, i64 600}
-; CHECK-LABEL: @caller(
-; CHECK: invoke void %func(
-; CHECK-NEXT: {{.*}} !prof ![[PROF1:[0-9]+]]
-; CHECK: invoke void @callee1(
-; CHECK-NEXT: {{.*}} !prof ![[PROF2:[0-9]+]]
-; CHECK: invoke void @callee2(
-; CHECK-NEXT: {{.*}} !prof ![[PROF3:[0-9]+]]
-
-; CHECK-LABL: @callee(
-; CHECK: invoke void %func(
-; CHECK-NEXT: {{.*}} !prof ![[PROF4:[0-9]+]]
-; CHECK: invoke void @callee1(
-; CHECK-NEXT: {{.*}} !prof ![[PROF5:[0-9]+]]
-; CHECK: invoke void @callee2(
-; CHECK-NEXT: {{.*}} !prof ![[PROF3]]
+; CHECK-LABEL: define void @caller(
+; CHECK-SAME: ptr [[FUNC:%.*]]) personality ptr @__gxx_personality_v0 !prof [[PROF14:![0-9]+]] {
+; CHECK-NEXT: [[VTABLE_I:%.*]] = load ptr, ptr [[FUNC]], align 8, !prof [[PROF15:![0-9]+]]
+; CHECK-NEXT: [[FUNC_I:%.*]] = load ptr, ptr [[VTABLE_I]], align 8
+; CHECK-NEXT: invoke void [[FUNC_I]]()
+; CHECK-NEXT: to label %[[NEXT_I:.*]] unwind label %[[LPAD_I:.*]], !prof [[PROF16:![0-9]+]]
+; CHECK: [[NEXT_I]]:
+; CHECK-NEXT: invoke void @callee1(ptr [[FUNC_I]])
+; CHECK-NEXT: to label %[[CONT_I:.*]] unwind label %[[LPAD_I]], !prof [[PROF17:![0-9]+]]
+; CHECK: [[CONT_I]]:
+; CHECK-NEXT: invoke void @callee2(ptr [[FUNC_I]])
+; CHECK-NEXT: to label %[[CALLEE_EXIT:.*]] unwind label %[[LPAD_I]], !prof [[PROF18:![0-9]+]]
+;
+; CHECK-LABEL: define void @callee(
+; CHECK-SAME: ptr [[OBJ:%.*]]) personality ptr @__gxx_personality_v0 !prof [[PROF19:![0-9]+]] {
+; CHECK-NEXT: [[VTABLE:%.*]] = load ptr, ptr [[OBJ]], align 8, !prof [[PROF20:![0-9]+]]
+; CHECK-NEXT: [[FUNC:%.*]] = load ptr, ptr [[VTABLE]], align 8
+; CHECK-NEXT: invoke void [[FUNC]]()
+; CHECK-NEXT: to label %[[NEXT:.*]] unwind label %[[LPAD:.*]], !prof [[PROF21:![0-9]+]]
+; CHECK: [[NEXT]]:
+; CHECK-NEXT: invoke void @callee1(ptr [[FUNC]])
+; CHECK-NEXT: to label %[[CONT:.*]] unwind label %[[LPAD]], !prof [[PROF22:![0-9]+]]
+; CHECK: [[CONT]]:
+; CHECK-NEXT: invoke void @callee2(ptr [[FUNC]])
+; CHECK-NEXT: to label %[[RET:.*]] unwind label %[[LPAD]], !prof [[PROF18]]
-; CHECK: ![[PROF1]] = !{!"VP", i32 0, i64 1000, i64 123, i64 600, i64 456, i64 400}
-; CHECK: ![[PROF2]] = !{!"branch_weights", i32 1000}
-; CHECK: ![[PROF3]] = !{!"branch_weights", i32 1234, i32 5678}
-; CHECK: ![[PROF4]] = !{!"VP", i32 0, i64 500, i64 123, i64 300, i64 456, i64 200}
-; CHECK: ![[PROF5]] = !{!"branch_weights", i32 500}
+; CHECK: [[PROF14]] = !{!"function_entry_count", i64 1000}
+; CHECK: [[PROF15]] = !{!"VP", i32 2, i64 1000, i64 789, i64 600, i64 321, i64 400}
+; CHECK: [[PROF16]] = !{!"VP", i32 0, i64 1000, i64 123, i64 600, i64 456, i64 400}
+; CHECK: [[PROF17]] = !{!"branch_weights", i32 1000}
+; CHECK: [[PROF18]] = !{!"branch_weights", i32 1234, i32 5678}
+; CHECK: [[PROF19]] = !{!"function_entry_count", i64 500}
+; CHECK: [[PROF20]] = !{!"VP", i32 2, i64 500, i64 789, i64 300, i64 321, i64 200}
+; CHECK: [[PROF21]] = !{!"VP", i32 0, i64 500, i64 123, i64 300, i64 456, i64 200}
+; CHECK: [[PROF22]] = !{!"branch_weights", i32 500}
diff --git a/llvm/test/Transforms/Inline/update_value_profile.ll b/llvm/test/Transforms/Inline/update_value_profile.ll
index daa95e93b68ec..96aa35fb572de 100644
--- a/llvm/test/Transforms/Inline/update_value_profile.ll
+++ b/llvm/test/Transforms/Inline/update_value_profile.ll
@@ -2,33 +2,33 @@
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
-; When 'callee' is inlined into caller1 and caller2, the indirect call value
-; profiles of the inlined copy should be scaled based on callers' profiles,
-; and the indirect call value profiles in 'callee' should be updated.
-define i32 @callee(ptr %0, i32 %1) !prof !20 {
+; When 'callee' is inlined into caller1 and caller2, the indirect call and vtable
+; value profiles of the inlined copy should be scaled based on callers' profiles.
+; The indirect call and vtable value profiles in 'callee' should be updated.
+define i32 @callee(ptr %0, i32 %1) !prof !19 {
; CHECK-LABEL: define i32 @callee(
; CHECK-SAME: ptr [[TMP0:%.*]], i32 [[TMP1:%.*]]) !prof [[PROF0:![0-9]+]] {
-; CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP0]], align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP0]], align 8, !prof [[PROF1:![0-9]+]]
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i64 8
; CHECK-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8
-; CHECK-NEXT: [[TMP6:%.*]] = tail call i32 [[TMP5]](ptr [[TMP0]], i32 [[TMP1]]), !prof [[PROF1:![0-9]+]]
+; CHECK-NEXT: [[TMP6:%.*]] = tail call i32 [[TMP5]](ptr [[TMP0]], i32 [[TMP1]]), !prof [[PROF2:![0-9]+]]
; CHECK-NEXT: ret i32 [[TMP6]]
;
- %3 = load ptr, ptr %0
+ %3 = load ptr, ptr %0, !prof !15
%5 = getelementptr inbounds i8, ptr %3, i64 8
%6 = load ptr, ptr %5
- %7 = tail call i32 %6(ptr %0, i32 %1), !prof !17
+ %7 = tail call i32 %6(ptr %0, i32 %1), !prof !16
ret i32 %7
}
-define i32 @caller1(i32 %0) !prof !18 {
+define i32 @caller1(i32 %0) !prof !17 {
; CHECK-LABEL: define i32 @caller1(
-; CHECK-SAME: i32 [[TMP0:%.*]]) !prof [[PROF2:![0-9]+]] {
+; CHECK-SAME: i32 [[TMP0:%.*]]) !prof [[PROF3:![0-9]+]] {
; CHECK-NEXT: [[TMP2:%.*]] = tail call ptr @_Z10createTypei(i32 [[TMP0]])
-; CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !prof [[PROF4:![0-9]+]]
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i64 8
; CHECK-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8
-; CHECK-NEXT: [[TMP6:%.*]] = tail call i32 [[TMP5]](ptr [[TMP2]], i32 [[TMP0]]), !prof [[PROF3:![0-9]+]]
+; CHECK-NEXT: [[TMP6:%.*]] = tail call i32 [[TMP5]](ptr [[TMP2]], i32 [[TMP0]]), !prof [[PROF5:![0-9]+]]
; CHECK-NEXT: ret i32 [[TMP6]]
;
%2 = tail call ptr @_Z10createTypei(i32 %0)
@@ -36,14 +36,14 @@ define i32 @caller1(i32 %0) !prof !18 {
ret i32 %3
}
-define i32 @caller2(i32 %0) !prof !19 {
+define i32 @caller2(i32 %0) !prof !18 {
; CHECK-LABEL: define i32 @caller2(
-; CHECK-SAME: i32 [[TMP0:%.*]]) !prof [[PROF4:![0-9]+]] {
+; CHECK-SAME: i32 [[TMP0:%.*]]) !prof [[PROF6:![0-9]+]] {
; CHECK-NEXT: [[TMP2:%.*]] = tail call ptr @_Z10createTypei(i32 [[TMP0]])
-; CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !prof [[PROF7:![0-9]+]]
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i64 8
; CHECK-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8
-; CHECK-NEXT: [[TMP6:%.*]] = tail call i32 [[TMP5]](ptr [[TMP2]], i32 [[TMP0]]), !prof [[PROF5:![0-9]+]]
+; CHECK-NEXT: [[TMP6:%.*]] = tail call i32 [[TMP5]](ptr [[TMP2]], i32 [[TMP0]]), !prof [[PROF8:![0-9]+]]
; CHECK-NEXT: ret i32 [[TMP6]]
;
%2 = tail call ptr @_Z10createTypei(i32 %0)
@@ -67,15 +67,19 @@ declare ptr @_Z10createTypei(i32)
!12 = !{i32 10000, i64 100, i32 1}
!13 = !{i32 999000, i64 100, i32 1}
!14 = !{i32 999999, i64 1, i32 2}
-!17 = !{!"VP", i32 0, i64 1600, i64 123, i64 1000, i64 456, i64 600}
-!18 = !{!"function_entry_count", i64 1000}
-!19 = !{!"function_entry_count", i64 600}
-!20 = !{!"function_entry_count", i64 1700}
+!15 = !{!"VP", i32 2, i64 1600, i64 321, i64 1000, i64 789, i64 600}
+!16 = !{!"VP", i32 0, i64 1600, i64 123, i64 1000, i64 456, i64 600}
+!17 = !{!"function_entry_count", i64 1000}
+!18 = !{!"function_entry_count", i64 600}
+!19 = !{!"function_entry_count", i64 1700}
;.
; CHECK: [[PROF0]] = !{!"function_entry_count", i64 100}
-; CHECK: [[PROF1]] = !{!"VP", i32 0, i64 94, i64 123, i64 58, i64 456, i64 35}
-; CHECK: [[PROF2]] = !{!"function_entry_count", i64 1000}
-; CHECK: [[PROF3]] = !{!"VP", i32 0, i64 941, i64 123, i64 588, i64 456, i64 352}
-; CHECK: [[PROF4]] = !{!"function_entry_count", i64 600}
-; CHECK: [[PROF5]] = !{!"VP", i32 0, i64 564, i64 123, i64 352, i64 456, i64 211}
+; CHECK: [[PROF1]] = !{!"VP", i32 2, i64 94, i64 321, i64 58, i64 789, i64 35}
+; CHECK: [[PROF2]] = !{!"VP", i32 0, i64 94, i64 123, i64 58, i64 456, i64 35}
+; CHECK: [[PROF3]] = !{!"function_entry_count", i64 1000}
+; CHECK: [[PROF4]] = !{!"VP", i32 2, i64 941, i64 321, i64 588, i64 789, i64 352}
+; CHECK: [[PROF5]] = !{!"VP", i32 0, i64 941, i64 123, i64 588, i64 456, i64 352}
+; CHECK: [[PROF6]] = !{!"function_entry_count", i64 600}
+; CHECK: [[PROF7]] = !{!"VP", i32 2, i64 564, i64 321, i64 352, i64 789, i64 211}
+; CHECK: [[PROF8]] = !{!"VP", i32 0, i64 564, i64 123, i64 352, i64 456, i64 211}
;.
diff --git a/llvm/test/Transforms/PGOProfile/icp_vtable_cmp.ll b/llvm/test/Transforms/PGOProfile/icp_vtable_cmp.ll
new file mode 100644
index 0000000000000..c77be3b1ed244
--- /dev/null
+++ b/llvm/test/Transforms/PGOProfile/icp_vtable_cmp.ll
@@ -0,0 +1,139 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+
+; RUN: opt < %s -passes='pgo-icall-prom' -pass-remarks=pgo-icall-prom -enable-vtable-profile-use -icp-max-num-vtable-last-candidate=2 -S 2>&1 | FileCheck %s --check-prefixes=VTABLE-COMMON,VTABLE-CMP
+; RUN: opt < %s -passes='pgo-icall-prom' -pass-remarks=pgo-icall-prom -enable-vtable-profile-use -icp-max-num-vtable-last-candidate=1 -S 2>&1 | FileCheck %s --check-prefixes=VTABLE-COMMON,FUNC-CMP
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+ at Base1 = constant { [4 x ptr] } { [4 x ptr] [ptr null, ptr null, ptr @Base1_foo, ptr @Base1_bar] }, !type !0
+ at Base2 = constant { [3 x ptr] } { [3 x ptr] [ptr null, ptr null, ptr @Base2_foo] }, !type !2
+ at Base3 = constant { [3 x ptr] } { [3 x ptr] [ptr null, ptr null, ptr @Base3_foo] }, !type !6
+
+ at Derived1 = constant { [3 x ptr], [4 x ptr] } { [3 x ptr] [ptr null, ptr null, ptr @Base2_foo], [4 x ptr] [ptr null, ptr null, ptr @Base1_foo, ptr @Derived1_bar] }, !type !1, !type !2, !type !3
+ at Derived2 = constant { [3 x ptr], [3 x ptr], [4 x ptr] } { [3 x ptr] [ptr null, ptr null, ptr @Base3_foo], [3 x ptr] [ptr null, ptr null, ptr @Base2_foo], [4 x ptr] [ptr null, ptr null, ptr @Base1_foo, ptr @Derived2_bar] }, !type !4, !type !5, !type !6, !type !7
+ at Derived3 = constant { [4 x ptr] } { [4 x ptr] [ptr null, ptr null, ptr @Base1_foo, ptr @Base1_bar] }, !type !0, !type !8
+
+; VTABLE-CMP: remark: <unknown>:0:0: Promote indirect call to Derived1_bar with count 600 out of 1600, sink 2 instruction(s) and compare 1 vtable(s): {Derived1}
+; VTABLE-CMP: remark: <unknown>:0:0: Promote indirect call to Derived2_bar with count 500 out of 1000, sink 2 instruction(s) and compare 1 vtable(s): {Derived2}
+; VTABLE-CMP: remark: <unknown>:0:0: Promote indirect call to Base1_bar with count 400 out of 500, sink 2 instruction(s) and compare 2 vtable(s): {Derived3, Base1}
+
+define void @test(ptr %d) {
+; VTABLE-CMP-LABEL: define void @test(
+; VTABLE-CMP-SAME: ptr [[D:%.*]]) {
+; VTABLE-CMP-NEXT: [[ENTRY:.*:]]
+; VTABLE-CMP-NEXT: [[VTABLE:%.*]] = load ptr, ptr [[D]], align 8, !prof [[PROF9:![0-9]+]]
+; VTABLE-CMP-NEXT: [[TMP0:%.*]] = tail call i1 @llvm.type.test(ptr [[VTABLE]], metadata !"Base1")
+; VTABLE-CMP-NEXT: tail call void @llvm.assume(i1 [[TMP0]])
+; VTABLE-CMP-NEXT: [[TMP1:%.*]] = icmp eq ptr [[VTABLE]], getelementptr inbounds (i8, ptr @Derived1, i32 40)
+; VTABLE-CMP-NEXT: br i1 [[TMP1]], label %[[IF_TRUE_DIRECT_TARG:.*]], label %[[IF_FALSE_ORIG_INDIRECT:.*]], !prof [[PROF10:![0-9]+]]
+; VTABLE-CMP: [[IF_TRUE_DIRECT_TARG]]:
+; VTABLE-CMP-NEXT: call void @Derived1_bar(ptr [[D]])
+; VTABLE-CMP-NEXT: br label %[[IF_END_ICP:.*]]
+; VTABLE-CMP: [[IF_FALSE_ORIG_INDIRECT]]:
+; VTABLE-CMP-NEXT: [[TMP2:%.*]] = icmp eq ptr [[VTABLE]], getelementptr inbounds (i8, ptr @Derived2, i32 64)
+; VTABLE-CMP-NEXT: br i1 [[TMP2]], label %[[IF_TRUE_DIRECT_TARG1:.*]], label %[[IF_FALSE_ORIG_INDIRECT2:.*]], !prof [[PROF11:![0-9]+]]
+; VTABLE-CMP: [[IF_TRUE_DIRECT_TARG1]]:
+; VTABLE-CMP-NEXT: call void @Derived2_bar(ptr [[D]])
+; VTABLE-CMP-NEXT: br label %[[IF_END_ICP3:.*]]
+; VTABLE-CMP: [[IF_FALSE_ORIG_INDIRECT2]]:
+; VTABLE-CMP-NEXT: [[TMP3:%.*]] = icmp eq ptr [[VTABLE]], getelementptr inbounds (i8, ptr @Base1, i32 16)
+; VTABLE-CMP-NEXT: [[TMP4:%.*]] = icmp eq ptr [[VTABLE]], getelementptr inbounds (i8, ptr @Derived3, i32 16)
+; VTABLE-CMP-NEXT: [[TMP5:%.*]] = or i1 [[TMP3]], [[TMP4]]
+; VTABLE-CMP-NEXT: br i1 [[TMP5]], label %[[IF_TRUE_DIRECT_TARG4:.*]], label %[[IF_FALSE_ORIG_INDIRECT5:.*]], !prof [[PROF12:![0-9]+]]
+; VTABLE-CMP: [[IF_TRUE_DIRECT_TARG4]]:
+; VTABLE-CMP-NEXT: call void @Base1_bar(ptr [[D]])
+; VTABLE-CMP-NEXT: br label %[[IF_END_ICP6:.*]]
+; VTABLE-CMP: [[IF_FALSE_ORIG_INDIRECT5]]:
+; VTABLE-CMP-NEXT: [[VFN:%.*]] = getelementptr inbounds ptr, ptr [[VTABLE]], i64 1
+; VTABLE-CMP-NEXT: [[TMP6:%.*]] = load ptr, ptr [[VFN]], align 8
+; VTABLE-CMP-NEXT: call void [[TMP6]](ptr [[D]])
+; VTABLE-CMP-NEXT: br label %[[IF_END_ICP6]]
+; VTABLE-CMP: [[IF_END_ICP6]]:
+; VTABLE-CMP-NEXT: br label %[[IF_END_ICP3]]
+; VTABLE-CMP: [[IF_END_ICP3]]:
+; VTABLE-CMP-NEXT: br label %[[IF_END_ICP]]
+; VTABLE-CMP: [[IF_END_ICP]]:
+; VTABLE-CMP-NEXT: ret void
+;
+; FUNC-CMP-LABEL: define void @test(
+; FUNC-CMP-SAME: ptr [[D:%.*]]) {
+; FUNC-CMP-NEXT: [[ENTRY:.*:]]
+; FUNC-CMP-NEXT: [[VTABLE:%.*]] = load ptr, ptr [[D]], align 8, !prof [[PROF9:![0-9]+]]
+; FUNC-CMP-NEXT: [[TMP0:%.*]] = tail call i1 @llvm.type.test(ptr [[VTABLE]], metadata !"Base1")
+; FUNC-CMP-NEXT: tail call void @llvm.assume(i1 [[TMP0]])
+; FUNC-CMP-NEXT: [[VFN:%.*]] = getelementptr inbounds ptr, ptr [[VTABLE]], i64 1
+; FUNC-CMP-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VFN]], align 8
+; FUNC-CMP-NEXT: [[TMP2:%.*]] = icmp eq ptr [[TMP1]], @Derived1_bar
+; FUNC-CMP-NEXT: br i1 [[TMP2]], label %[[IF_TRUE_DIRECT_TARG:.*]], label %[[IF_FALSE_ORIG_INDIRECT:.*]], !prof [[PROF10:![0-9]+]]
+; FUNC-CMP: [[IF_TRUE_DIRECT_TARG]]:
+; FUNC-CMP-NEXT: call void @Derived1_bar(ptr [[D]])
+; FUNC-CMP-NEXT: br label %[[IF_END_ICP:.*]]
+; FUNC-CMP: [[IF_FALSE_ORIG_INDIRECT]]:
+; FUNC-CMP-NEXT: [[TMP3:%.*]] = icmp eq ptr [[TMP1]], @Derived2_bar
+; FUNC-CMP-NEXT: br i1 [[TMP3]], label %[[IF_TRUE_DIRECT_TARG1:.*]], label %[[IF_FALSE_ORIG_INDIRECT2:.*]], !prof [[PROF11:![0-9]+]]
+; FUNC-CMP: [[IF_TRUE_DIRECT_TARG1]]:
+; FUNC-CMP-NEXT: call void @Derived2_bar(ptr [[D]])
+; FUNC-CMP-NEXT: br label %[[IF_END_ICP3:.*]]
+; FUNC-CMP: [[IF_FALSE_ORIG_INDIRECT2]]:
+; FUNC-CMP-NEXT: [[TMP4:%.*]] = icmp eq ptr [[TMP1]], @Base1_bar
+; FUNC-CMP-NEXT: br i1 [[TMP4]], label %[[IF_TRUE_DIRECT_TARG4:.*]], label %[[IF_FALSE_ORIG_INDIRECT5:.*]], !prof [[PROF12:![0-9]+]]
+; FUNC-CMP: [[IF_TRUE_DIRECT_TARG4]]:
+; FUNC-CMP-NEXT: call void @Base1_bar(ptr [[D]])
+; FUNC-CMP-NEXT: br label %[[IF_END_ICP6:.*]]
+; FUNC-CMP: [[IF_FALSE_ORIG_INDIRECT5]]:
+; FUNC-CMP-NEXT: call void [[TMP1]](ptr [[D]])
+; FUNC-CMP-NEXT: br label %[[IF_END_ICP6]]
+; FUNC-CMP: [[IF_END_ICP6]]:
+; FUNC-CMP-NEXT: br label %[[IF_END_ICP3]]
+; FUNC-CMP: [[IF_END_ICP3]]:
+; FUNC-CMP-NEXT: br label %[[IF_END_ICP]]
+; FUNC-CMP: [[IF_END_ICP]]:
+; FUNC-CMP-NEXT: ret void
+;
+entry:
+ %vtable = load ptr, ptr %d, !prof !9
+ %0 = tail call i1 @llvm.type.test(ptr %vtable, metadata !"Base1")
+ tail call void @llvm.assume(i1 %0)
+ %vfn = getelementptr inbounds ptr, ptr %vtable, i64 1
+ %1 = load ptr, ptr %vfn
+ call void %1(ptr %d), !prof !10
+ ret void
+}
+
+define void @Base1_bar(ptr %this) {
+ ret void
+}
+
+define void @Derived1_bar(ptr %this) {
+ ret void
+}
+
+define void @Derived2_bar(ptr %this) {
+ ret void
+}
+
+
+declare i1 @llvm.type.test(ptr, metadata)
+declare void @llvm.assume(i1)
+declare i32 @Base2_foo(ptr)
+declare i32 @Base1_foo(ptr)
+declare void @Base3_foo(ptr)
+
+!0 = !{i64 16, !"Base1"}
+!1 = !{i64 40, !"Base1"}
+!2 = !{i64 16, !"Base2"}
+!3 = !{i64 16, !"Derived1"}
+!4 = !{i64 64, !"Base1"}
+!5 = !{i64 40, !"Base2"}
+!6 = !{i64 16, !"Base3"}
+!7 = !{i64 16, !"Derived2"}
+!8 = !{i64 16, !"Derived3"}
+!9 = !{!"VP", i32 2, i64 1600, i64 -4123858694673519054, i64 600, i64 -7211198353767973908, i64 500, i64 -3574436251470806727, i64 200, i64 6288809125658696740, i64 200, i64 12345678, i64 100}
+!10 = !{!"VP", i32 0, i64 1600, i64 3827408714133779784, i64 600, i64 5837445539218476403, i64 500, i64 -9064955852395570538, i64 400, i64 56781234, i64 100}
+;.
+; VTABLE-COMMON: [[PROF9]] = !{!"VP", i32 2, i64 100, i64 12345678, i64 100}
+; VTABLE-COMMON: [[PROF10]] = !{!"branch_weights", i32 600, i32 1000}
+; VTABLE-COMMON: [[PROF11]] = !{!"branch_weights", i32 500, i32 500}
+; VTABLE-COMMON: [[PROF12]] = !{!"branch_weights", i32 400, i32 100}
+
diff --git a/llvm/test/Transforms/PGOProfile/icp_vtable_invoke.ll b/llvm/test/Transforms/PGOProfile/icp_vtable_invoke.ll
new file mode 100644
index 0000000000000..6d3a6972f6885
--- /dev/null
+++ b/llvm/test/Transforms/PGOProfile/icp_vtable_invoke.ll
@@ -0,0 +1,125 @@
+; RUN: opt < %s -passes='pgo-icall-prom' -enable-vtable-profile-use -S | FileCheck %s --check-prefix=VTABLE
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+ at _ZTV4Base = constant { [3 x ptr] } { [3 x ptr] [ptr null, ptr null, ptr @_ZN4Base10get_ticketEv] }, !type !0, !type !1
+ at _ZTV7Derived = constant { [3 x ptr] } { [3 x ptr] [ptr null, ptr null, ptr @_ZN7Derived10get_ticketEv] }, !type !0, !type !1, !type !2, !type !3
+
+ at .str = private constant [15 x i8] c"out of tickets\00"
+
+define i32 @test(ptr %b) personality ptr @__gxx_personality_v0 {
+; VTABLE-LABEL: define i32 @test(
+; VTABLE-SAME: ptr [[B:%.*]]) personality ptr @__gxx_personality_v0 {
+; VTABLE-NEXT: [[ENTRY:.*:]]
+; VTABLE-NEXT: [[VTABLE:%.*]] = load ptr, ptr [[B]], align 8
+; VTABLE-NEXT: [[TMP0:%.*]] = tail call i1 @llvm.type.test(ptr [[VTABLE]], metadata !"_ZTS4Base")
+; VTABLE-NEXT: tail call void @llvm.assume(i1 [[TMP0]])
+; VTABLE-NEXT: [[TMP3:%.*]] = icmp eq ptr [[VTABLE]], getelementptr inbounds (i8, ptr @_ZTV7Derived, i32 16)
+; VTABLE-NEXT: br i1 [[TMP3]], label %[[IF_TRUE_DIRECT_TARG:.*]], label %[[IF_FALSE_ORIG_INDIRECT:.*]], !prof [[PROF4:![0-9]+]]
+; VTABLE: [[IF_TRUE_DIRECT_TARG]]:
+; VTABLE-NEXT: [[TMP2:%.*]] = invoke i32 @_ZN7Derived10get_ticketEv(ptr [[B]])
+; VTABLE-NEXT: to label %[[IF_END_ICP:.*]] unwind label %[[LPAD:.*]]
+; VTABLE: [[IF_FALSE_ORIG_INDIRECT]]:
+; VTABLE-NEXT: [[TMP4:%.*]] = icmp eq ptr [[VTABLE]], getelementptr inbounds (i8, ptr @_ZTV4Base, i32 16)
+; VTABLE-NEXT: br i1 [[TMP4]], label %[[IF_TRUE_DIRECT_TARG1:.*]], label %[[IF_FALSE_ORIG_INDIRECT2:.*]], !prof [[PROF5:![0-9]+]]
+; VTABLE: [[IF_TRUE_DIRECT_TARG1]]:
+; VTABLE-NEXT: [[TMP5:%.*]] = invoke i32 @_ZN4Base10get_ticketEv(ptr [[B]])
+; VTABLE-NEXT: to label %[[IF_END_ICP3:.*]] unwind label %[[LPAD]]
+; VTABLE: [[IF_FALSE_ORIG_INDIRECT2]]:
+; VTABLE-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VTABLE]], align 8
+; VTABLE-NEXT: [[CALL:%.*]] = invoke i32 [[TMP1]](ptr [[B]])
+; VTABLE-NEXT: to label %[[IF_END_ICP3]] unwind label %[[LPAD]]
+; VTABLE: [[IF_END_ICP3]]:
+; VTABLE-NEXT: [[TMP6:%.*]] = phi i32 [ [[CALL]], %[[IF_FALSE_ORIG_INDIRECT2]] ], [ [[TMP5]], %[[IF_TRUE_DIRECT_TARG1]] ]
+; VTABLE-NEXT: br label %[[IF_END_ICP]]
+; VTABLE: [[IF_END_ICP]]:
+; VTABLE-NEXT: [[TMP7:%.*]] = phi i32 [ [[TMP6]], %[[IF_END_ICP3]] ], [ [[TMP2]], %[[IF_TRUE_DIRECT_TARG]] ]
+; VTABLE-NEXT: br label %[[NEXT:.*]]
+; VTABLE: [[NEXT]]:
+; VTABLE-NEXT: ret i32 [[TMP7]]
+; VTABLE: [[LPAD]]:
+; VTABLE-NEXT: [[EXN:%.*]] = landingpad { ptr, i32 }
+; VTABLE-NEXT: cleanup
+; VTABLE-NEXT: unreachable
+;
+entry:
+ %vtable = load ptr, ptr %b, !prof !4
+ %0 = tail call i1 @llvm.type.test(ptr %vtable, metadata !"_ZTS4Base")
+ tail call void @llvm.assume(i1 %0)
+ %1 = load ptr, ptr %vtable
+ %call = invoke i32 %1(ptr %b) to label %next unwind label %lpad, !prof !5
+
+next:
+ ret i32 %call
+
+lpad:
+ %exn = landingpad {ptr, i32}
+ cleanup
+ unreachable
+}
+
+declare void @make_error(ptr, ptr, i32)
+declare i32 @get_ticket_id()
+declare ptr @__cxa_allocate_exception(i64)
+
+define i32 @_ZN4Base10get_ticketEv(ptr %this) personality ptr @__gxx_personality_v0 {
+entry:
+ %call = tail call i32 @get_ticket_id()
+ %cmp.not = icmp eq i32 %call, -1
+ br i1 %cmp.not, label %if.end, label %if.then
+
+if.then:
+ ret i32 %call
+
+if.end:
+ %exception = tail call ptr @__cxa_allocate_exception(i64 1)
+ invoke void @make_error(ptr %exception, ptr @.str, i32 1)
+ to label %invoke.cont unwind label %lpad
+
+invoke.cont:
+ unreachable
+
+lpad:
+ %0 = landingpad { ptr, i32 }
+ cleanup
+ resume { ptr, i32 } %0
+}
+
+define i32 @_ZN7Derived10get_ticketEv(ptr %this) personality ptr @__gxx_personality_v0 {
+entry:
+ %call = tail call i32 @get_ticket_id()
+ %cmp.not = icmp eq i32 %call, -1
+ br i1 %cmp.not, label %if.end, label %if.then
+
+if.then:
+ ret i32 %call
+
+if.end:
+ %exception = tail call ptr @__cxa_allocate_exception(i64 1)
+ invoke void @make_error(ptr %exception, ptr @.str, i32 2)
+ to label %invoke.cont unwind label %lpad
+
+invoke.cont:
+ unreachable
+
+lpad:
+ %0 = landingpad { ptr, i32 }
+ cleanup
+ resume { ptr, i32 } %0
+}
+
+declare i1 @llvm.type.test(ptr, metadata)
+declare void @llvm.assume(i1)
+declare i32 @__gxx_personality_v0(...)
+
+!0 = !{i64 16, !"_ZTS4Base"}
+!1 = !{i64 16, !"_ZTSM4BaseFivE.virtual"}
+!2 = !{i64 16, !"_ZTS7Derived"}
+!3 = !{i64 16, !"_ZTSM7DerivedFivE.virtual"}
+!4 = !{!"VP", i32 2, i64 1600, i64 13870436605473471591, i64 900, i64 1960855528937986108, i64 700}
+!5 = !{!"VP", i32 0, i64 1600, i64 14811317294552474744, i64 900, i64 9261744921105590125, i64 700}
+
+; VTABLE: [[PROF4]] = !{!"branch_weights", i32 900, i32 700}
+; VTABLE: [[PROF5]] = !{!"branch_weights", i32 700, i32 0}
+;.
diff --git a/llvm/test/Transforms/PGOProfile/icp_vtable_tail_call.ll b/llvm/test/Transforms/PGOProfile/icp_vtable_tail_call.ll
new file mode 100644
index 0000000000000..fb9ec0d0c85ff
--- /dev/null
+++ b/llvm/test/Transforms/PGOProfile/icp_vtable_tail_call.ll
@@ -0,0 +1,68 @@
+; RUN: opt < %s -passes='pgo-icall-prom' -pass-remarks=pgo-icall-prom -enable-vtable-profile-use -S 2>&1 | FileCheck %s --check-prefixes=VTABLE,REMARK
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; REMARK: remark: <unknown>:0:0: Promote indirect call to _ZN7Derived5func1Eii with count 900 out of 1600, sink 1 instruction(s) and compare 1 vtable(s): {_ZTV7Derived}
+; REMARK: remark: <unknown>:0:0: Promote indirect call to _ZN4Base5func1Eii with count 700 out of 700, sink 1 instruction(s) and compare 1 vtable(s): {_ZTV4Base}
+
+ at _ZTV7Derived = constant { [3 x ptr] } { [3 x ptr] [ptr null, ptr null, ptr @_ZN7Derived5func1Eii] }, !type !0, !type !1, !type !2, !type !3
+ at _ZTV4Base = constant { [3 x ptr] } { [3 x ptr] [ptr null, ptr null, ptr @_ZN4Base5func1Eii] }, !type !0, !type !1
+
+define i32 @test_tail_call(ptr %ptr, i32 %a, i32 %b) {
+; VTABLE-LABEL: define i32 @test_tail_call(
+; VTABLE-SAME: ptr [[PTR:%.*]], i32 [[A:%.*]], i32 [[B:%.*]]) {
+; VTABLE-NEXT: entry:
+; VTABLE-NEXT: [[VTABLE:%.*]] = load ptr, ptr [[PTR]], align 8
+; VTABLE-NEXT: [[TMP0:%.*]] = tail call i1 @llvm.type.test(ptr [[VTABLE]], metadata !"_ZTS4Base")
+; VTABLE-NEXT: tail call void @llvm.assume(i1 [[TMP0]])
+; VTABLE-NEXT: [[TMP2:%.*]] = icmp eq ptr [[VTABLE]], getelementptr inbounds (i8, ptr @_ZTV7Derived, i32 16)
+; VTABLE-NEXT: br i1 [[TMP2]], label [[IF_TRUE_DIRECT_TARG:%.*]], label [[TMP4:%.*]], !prof [[PROF4:![0-9]+]]
+; VTABLE: if.true.direct_targ:
+; VTABLE-NEXT: [[TMP3:%.*]] = musttail call i32 @_ZN7Derived5func1Eii(ptr [[PTR]], i32 [[A]], i32 [[B]])
+; VTABLE-NEXT: ret i32 [[TMP3]]
+; VTABLE: 3:
+; VTABLE-NEXT: [[TMP4:%.*]] = icmp eq ptr [[VTABLE]], getelementptr inbounds (i8, ptr @_ZTV4Base, i32 16)
+; VTABLE-NEXT: br i1 [[TMP4]], label [[IF_TRUE_DIRECT_TARG1:%.*]], label [[TMP7:%.*]], !prof [[PROF5:![0-9]+]]
+; VTABLE: if.true.direct_targ1:
+; VTABLE-NEXT: [[TMP6:%.*]] = musttail call i32 @_ZN4Base5func1Eii(ptr [[PTR]], i32 [[A]], i32 [[B]])
+; VTABLE-NEXT: ret i32 [[TMP6]]
+; VTABLE: 6:
+; VTABLE-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VTABLE]], align 8
+; VTABLE-NEXT: [[CALL:%.*]] = musttail call i32 [[TMP1]](ptr [[PTR]], i32 [[A]], i32 [[B]])
+; VTABLE-NEXT: ret i32 [[CALL]]
+;
+entry:
+ %vtable = load ptr, ptr %ptr, !prof !4
+ %0 = tail call i1 @llvm.type.test(ptr %vtable, metadata !"_ZTS4Base")
+ tail call void @llvm.assume(i1 %0)
+ %1 = load ptr, ptr %vtable
+ %call = musttail call i32 %1(ptr %ptr, i32 %a, i32 %b), !prof !5
+ ret i32 %call
+}
+
+declare i1 @llvm.type.test(ptr, metadata)
+declare void @llvm.assume(i1)
+
+define i32 @_ZN7Derived5func1Eii(ptr %this, i32 %a, i32 %b) {
+entry:
+ %sub = sub nsw i32 %a, %b
+ ret i32 %sub
+}
+
+define i32 @_ZN4Base5func1Eii(ptr %this, i32 %a, i32 %b) {
+entry:
+ %add = add nsw i32 %b, %a
+ ret i32 %add
+}
+
+
+!0 = !{i64 16, !"_ZTS4Base"}
+!1 = !{i64 16, !"_ZTSM4BaseFiiiE.virtual"}
+!2 = !{i64 16, !"_ZTS7Derived"}
+!3 = !{i64 16, !"_ZTSM7DerivedFiiiE.virtual"}
+!4 = !{!"VP", i32 2, i64 1600, i64 13870436605473471591, i64 900, i64 1960855528937986108, i64 700}
+!5 = !{!"VP", i32 0, i64 1600, i64 7889036118036845314, i64 900, i64 10495086226207060333, i64 700}
+
+; VTABLE: [[PROF4]] = !{!"branch_weights", i32 900, i32 700}
+; VTABLE: [[PROF5]] = !{!"branch_weights", i32 700, i32 0}
More information about the llvm-commits
mailing list