[compiler-rt] [llvm] [TypeProf][IndirectCallPromotion]Implement vtable-based transformation (PR #81442)
Mingming Liu via llvm-commits
llvm-commits at lists.llvm.org
Sun Feb 11 22:30:34 PST 2024
https://github.com/minglotus-6 created https://github.com/llvm/llvm-project/pull/81442
* Cost-benefit analysis will be added in a subsequent patch.
* The parent patch is https://github.com/llvm/llvm-project/pull/81378
>From 48adcf1a142de6abeeb16596c5087fe83e8f422b Mon Sep 17 00:00:00 2001
From: mingmingl <mingmingl at google.com>
Date: Wed, 7 Feb 2024 15:12:36 -0800
Subject: [PATCH 1/5] =?UTF-8?q?[=F0=9D=98=80=F0=9D=97=BD=F0=9D=97=BF]=20ch?=
=?UTF-8?q?anges=20to=20main=20this=20commit=20is=20based=20on?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Created using spr 1.3.4
[skip ci]
---
compiler-rt/include/profile/InstrProfData.inc | 58 +-
compiler-rt/lib/profile/InstrProfiling.h | 35 +-
.../lib/profile/InstrProfilingBuffer.c | 58 +-
.../lib/profile/InstrProfilingInternal.h | 4 +-
compiler-rt/lib/profile/InstrProfilingMerge.c | 25 +-
.../lib/profile/InstrProfilingPlatformLinux.c | 20 +
.../lib/profile/InstrProfilingWriter.c | 37 +-
.../llvm/Analysis/IndirectCallVisitor.h | 70 +-
llvm/include/llvm/ProfileData/InstrProf.h | 170 ++++-
.../llvm/ProfileData/InstrProfData.inc | 40 +-
.../llvm/ProfileData/InstrProfReader.h | 20 +
.../llvm/ProfileData/InstrProfWriter.h | 4 +
.../IndirectCallPromotionAnalysis.cpp | 4 +
llvm/lib/Analysis/ModuleSummaryAnalysis.cpp | 20 +
llvm/lib/Bitcode/Writer/BitcodeWriter.cpp | 13 +-
llvm/lib/ProfileData/InstrProf.cpp | 172 ++++-
llvm/lib/ProfileData/InstrProfReader.cpp | 72 ++-
llvm/lib/ProfileData/InstrProfWriter.cpp | 59 +-
.../Instrumentation/IndirectCallPromotion.cpp | 45 +-
.../Instrumentation/InstrProfiling.cpp | 173 +++++
.../Instrumentation/PGOInstrumentation.cpp | 7 +
.../Instrumentation/ValueProfilePlugins.inc | 36 +-
.../thinlto-func-summary-vtableref-pgo.ll | 74 +++
.../InstrProfiling/coverage.ll | 8 +-
.../thinlto_indirect_call_promotion.profraw | Bin 528 -> 544 bytes
.../PGOProfile/Inputs/vtable_prof.profraw | Bin 0 -> 656 bytes
.../Transforms/PGOProfile/comdat_internal.ll | 4 +-
.../Transforms/PGOProfile/vtable_profile.ll | 98 +++
.../llvm-profdata/Inputs/c-general.profraw | Bin 2016 -> 2032 bytes
.../llvm-profdata/Inputs/compressed.profraw | Bin 1968 -> 1984 bytes
.../Inputs/update_vtable_value_prof_inputs.sh | 102 +++
.../Inputs/vtable-value-prof-basic.profraw | Bin 0 -> 960 bytes
.../Inputs/vtable-value-prof.proftext | 73 +++
.../llvm-profdata/binary-ids-padding.test | 6 +-
.../llvm-profdata/large-binary-id-size.test | 4 +-
...alformed-not-space-for-another-header.test | 6 +-
.../malformed-num-counters-zero.test | 6 +-
.../malformed-ptr-to-counter-array.test | 6 +-
.../misaligned-binary-ids-size.test | 4 +-
.../mismatched-raw-profile-header.test | 2 +
.../tools/llvm-profdata/raw-32-bits-be.test | 11 +-
.../tools/llvm-profdata/raw-32-bits-le.test | 10 +-
.../tools/llvm-profdata/raw-64-bits-be.test | 10 +-
.../tools/llvm-profdata/raw-64-bits-le.test | 10 +-
.../tools/llvm-profdata/raw-two-profiles.test | 8 +-
.../vtable-value-prof-basic.test | 124 ++++
.../llvm-profdata/vtable-value-prof.proftext | 16 +
llvm/tools/llvm-profdata/llvm-profdata.cpp | 30 +-
llvm/unittests/ProfileData/InstrProfTest.cpp | 604 ++++++++++++++----
49 files changed, 2065 insertions(+), 293 deletions(-)
create mode 100644 llvm/test/Bitcode/thinlto-func-summary-vtableref-pgo.ll
create mode 100644 llvm/test/Transforms/PGOProfile/Inputs/vtable_prof.profraw
create mode 100644 llvm/test/Transforms/PGOProfile/vtable_profile.ll
create mode 100755 llvm/test/tools/llvm-profdata/Inputs/update_vtable_value_prof_inputs.sh
create mode 100644 llvm/test/tools/llvm-profdata/Inputs/vtable-value-prof-basic.profraw
create mode 100644 llvm/test/tools/llvm-profdata/Inputs/vtable-value-prof.proftext
create mode 100644 llvm/test/tools/llvm-profdata/vtable-value-prof-basic.test
create mode 100644 llvm/test/tools/llvm-profdata/vtable-value-prof.proftext
diff --git a/compiler-rt/include/profile/InstrProfData.inc b/compiler-rt/include/profile/InstrProfData.inc
index 25df899b3f3619..f0bc2d960ce688 100644
--- a/compiler-rt/include/profile/InstrProfData.inc
+++ b/compiler-rt/include/profile/InstrProfData.inc
@@ -94,6 +94,26 @@ INSTR_PROF_DATA(const uint32_t, llvm::Type::getInt32Ty(Ctx), NumBitmapBytes, \
#undef INSTR_PROF_DATA
/* INSTR_PROF_DATA end. */
+/* For a virtual table object, record the name hash to associate profiled
+ * addresses with global variables, and record {starting address, size in bytes}
+ * to map the profiled virtual table (which usually have an offset from the
+ * starting address) back to a virtual table object. */
+#ifndef INSTR_PROF_VTABLE_DATA
+#define INSTR_PROF_VTABLE_DATA(Type, LLVMType, Name, Initializer)
+#else
+#define INSTR_PROF_VTABLE_DATA_DEFINED
+#endif
+INSTR_PROF_VTABLE_DATA(
+ const uint64_t, llvm::Type::getInt64Ty(Ctx), VTableNameHash,
+ ConstantInt::get(llvm::Type::getInt64Ty(Ctx),
+ IndexedInstrProf::ComputeHash(PGOVTableName)))
+INSTR_PROF_VTABLE_DATA(const IntPtrT, llvm::Type::getInt8PtrTy(Ctx),
+ VTablePointer, VTableAddr)
+INSTR_PROF_VTABLE_DATA(const uint32_t, llvm::Type::getInt32Ty(Ctx), VTableSize,
+ ConstantInt::get(llvm::Type::getInt32Ty(Ctx),
+ VTableSizeVal))
+#undef INSTR_PROF_VTABLE_DATA
+/* INSTR_PROF_VTABLE_DATA end. */
/* This is an internal data structure used by value profiler. It
* is defined here to allow serialization code sharing by LLVM
@@ -145,6 +165,8 @@ INSTR_PROF_RAW_HEADER(uint64_t, CountersDelta,
INSTR_PROF_RAW_HEADER(uint64_t, BitmapDelta,
(uintptr_t)BitmapBegin - (uintptr_t)DataBegin)
INSTR_PROF_RAW_HEADER(uint64_t, NamesDelta, (uintptr_t)NamesBegin)
+INSTR_PROF_RAW_HEADER(uint64_t, VNamesSize, VNamesSize)
+INSTR_PROF_RAW_HEADER(uint64_t, NumVTables, NumVTables)
INSTR_PROF_RAW_HEADER(uint64_t, ValueKindLast, IPVK_Last)
#undef INSTR_PROF_RAW_HEADER
/* INSTR_PROF_RAW_HEADER end */
@@ -186,13 +208,28 @@ VALUE_PROF_FUNC_PARAM(uint32_t, CounterIndex, Type::getInt32Ty(Ctx))
VALUE_PROF_KIND(IPVK_IndirectCallTarget, 0, "indirect call target")
/* For memory intrinsic functions size profiling. */
VALUE_PROF_KIND(IPVK_MemOPSize, 1, "memory intrinsic functions size")
+/* For virtual table address profiling, the addresses of the virtual table
+ * (i.e., the address contained in objects pointing to a virtual table) are
+ * profiled. Note this may not be the address of the per C++ class virtual table
+ * object (i.e., there is an offset).
+ *
+ * The profiled addresses are stored in raw profile, together with the following
+ * two types of information.
+ * 1. The (beginning and ending) addresses of per C++ class virtual table objects.
+ * 2. The (compressed) virtual table object names.
+ * RawInstrProfReader converts profiled virtual table addresses to virtual table
+ * objects' MD5 hash.
+ */
+VALUE_PROF_KIND(IPVK_VTableTarget, 2, "The address of the compatible vtable (i.e., "
+ "there is an offset from this address to per C++ "
+ "class virtual table global variable.)")
/* These two kinds must be the last to be
* declared. This is to make sure the string
* array created with the template can be
* indexed with the kind value.
*/
VALUE_PROF_KIND(IPVK_First, IPVK_IndirectCallTarget, "first")
-VALUE_PROF_KIND(IPVK_Last, IPVK_MemOPSize, "last")
+VALUE_PROF_KIND(IPVK_Last, IPVK_VTableTarget, "last")
#undef VALUE_PROF_KIND
/* VALUE_PROF_KIND end */
@@ -267,9 +304,9 @@ COVMAP_HEADER(uint32_t, Int32Ty, Version, \
#undef COVMAP_HEADER
/* COVMAP_HEADER end. */
-
#ifdef INSTR_PROF_SECT_ENTRY
#define INSTR_PROF_DATA_DEFINED
+
INSTR_PROF_SECT_ENTRY(IPSK_data, \
INSTR_PROF_QUOTE(INSTR_PROF_DATA_COMMON), \
INSTR_PROF_DATA_COFF, "__DATA,")
@@ -282,12 +319,18 @@ INSTR_PROF_SECT_ENTRY(IPSK_bitmap, \
INSTR_PROF_SECT_ENTRY(IPSK_name, \
INSTR_PROF_QUOTE(INSTR_PROF_NAME_COMMON), \
INSTR_PROF_NAME_COFF, "__DATA,")
+INSTR_PROF_SECT_ENTRY(IPSK_vname, \
+ INSTR_PROF_QUOTE(INSTR_PROF_VNAME_COMMON), \
+ INSTR_PROF_VNAME_COFF, "__DATA,")
INSTR_PROF_SECT_ENTRY(IPSK_vals, \
INSTR_PROF_QUOTE(INSTR_PROF_VALS_COMMON), \
INSTR_PROF_VALS_COFF, "__DATA,")
INSTR_PROF_SECT_ENTRY(IPSK_vnodes, \
INSTR_PROF_QUOTE(INSTR_PROF_VNODES_COMMON), \
INSTR_PROF_VNODES_COFF, "__DATA,")
+INSTR_PROF_SECT_ENTRY(IPSK_vtab, \
+ INSTR_PROF_QUOTE(INSTR_PROF_VTAB_COMMON), \
+ INSTR_PROF_VTAB_COFF, "__DATA,")
INSTR_PROF_SECT_ENTRY(IPSK_covmap, \
INSTR_PROF_QUOTE(INSTR_PROF_COVMAP_COMMON), \
INSTR_PROF_COVMAP_COFF, "__LLVM_COV,")
@@ -307,7 +350,6 @@ INSTR_PROF_SECT_ENTRY(IPSK_covname, \
#undef INSTR_PROF_SECT_ENTRY
#endif
-
#ifdef INSTR_PROF_VALUE_PROF_DATA
#define INSTR_PROF_DATA_DEFINED
@@ -479,7 +521,6 @@ getValueProfRecordHeaderSize(uint32_t NumValueSites);
#undef INSTR_PROF_VALUE_PROF_DATA
#endif /* INSTR_PROF_VALUE_PROF_DATA */
-
#ifdef INSTR_PROF_COMMON_API_IMPL
#define INSTR_PROF_DATA_DEFINED
#ifdef __cplusplus
@@ -663,9 +704,9 @@ serializeValueProfDataFrom(ValueProfRecordClosure *Closure,
(uint64_t)'f' << 16 | (uint64_t)'R' << 8 | (uint64_t)129
/* Raw profile format version (start from 1). */
-#define INSTR_PROF_RAW_VERSION 9
+#define INSTR_PROF_RAW_VERSION 10
/* Indexed profile format version (start from 1). */
-#define INSTR_PROF_INDEX_VERSION 11
+#define INSTR_PROF_INDEX_VERSION 12
/* Coverage mapping format version (start from 0). */
#define INSTR_PROF_COVMAP_VERSION 6
@@ -703,10 +744,12 @@ serializeValueProfDataFrom(ValueProfRecordClosure *Closure,
than WIN32 */
#define INSTR_PROF_DATA_COMMON __llvm_prf_data
#define INSTR_PROF_NAME_COMMON __llvm_prf_names
+#define INSTR_PROF_VNAME_COMMON __llvm_prf_vtabnames
#define INSTR_PROF_CNTS_COMMON __llvm_prf_cnts
#define INSTR_PROF_BITS_COMMON __llvm_prf_bits
#define INSTR_PROF_VALS_COMMON __llvm_prf_vals
#define INSTR_PROF_VNODES_COMMON __llvm_prf_vnds
+#define INSTR_PROF_VTAB_COMMON __llvm_prf_vtab
#define INSTR_PROF_COVMAP_COMMON __llvm_covmap
#define INSTR_PROF_COVFUN_COMMON __llvm_covfun
#define INSTR_PROF_COVDATA_COMMON __llvm_covdata
@@ -717,10 +760,12 @@ serializeValueProfDataFrom(ValueProfRecordClosure *Closure,
*/
#define INSTR_PROF_DATA_COFF ".lprfd$M"
#define INSTR_PROF_NAME_COFF ".lprfn$M"
+#define INSTR_PROF_VNAME_COFF ".lprfn$M"
#define INSTR_PROF_CNTS_COFF ".lprfc$M"
#define INSTR_PROF_BITS_COFF ".lprfb$M"
#define INSTR_PROF_VALS_COFF ".lprfv$M"
#define INSTR_PROF_VNODES_COFF ".lprfnd$M"
+#define INSTR_PROF_VTAB_COFF ".lprfvt$M"
#define INSTR_PROF_COVMAP_COFF ".lcovmap$M"
#define INSTR_PROF_COVFUN_COFF ".lcovfun$M"
/* Since cov data and cov names sections are not allocated, we don't need to
@@ -938,3 +983,4 @@ InstrProfIsSingleValRange(uint64_t Value) {
}
#endif /* INSTR_PROF_VALUE_PROF_MEMOP_API */
+
diff --git a/compiler-rt/lib/profile/InstrProfiling.h b/compiler-rt/lib/profile/InstrProfiling.h
index 01239083369187..9e6306ace61f2c 100644
--- a/compiler-rt/lib/profile/InstrProfiling.h
+++ b/compiler-rt/lib/profile/InstrProfiling.h
@@ -49,6 +49,12 @@ typedef struct ValueProfNode {
#include "profile/InstrProfData.inc"
} ValueProfNode;
+typedef void *IntPtrT;
+typedef struct VTableProfData {
+#define INSTR_PROF_VTABLE_DATA(Type, LLVMType, Name, Initializer) Type Name;
+#include "profile/InstrProfData.inc"
+} VTableProfData;
+
/*!
* \brief Return 1 if profile counters are continuously synced to the raw
* profile via an mmap(). This is in contrast to the default mode, in which
@@ -103,12 +109,16 @@ const __llvm_profile_data *__llvm_profile_begin_data(void);
const __llvm_profile_data *__llvm_profile_end_data(void);
const char *__llvm_profile_begin_names(void);
const char *__llvm_profile_end_names(void);
+const char *__llvm_profile_begin_vtabnames(void);
+const char *__llvm_profile_end_vtabnames(void);
char *__llvm_profile_begin_counters(void);
char *__llvm_profile_end_counters(void);
char *__llvm_profile_begin_bitmap(void);
char *__llvm_profile_end_bitmap(void);
ValueProfNode *__llvm_profile_begin_vnodes();
ValueProfNode *__llvm_profile_end_vnodes();
+VTableProfData *__llvm_profile_begin_vtables();
+VTableProfData *__llvm_profile_end_vtables();
uint32_t *__llvm_profile_begin_orderfile();
/*!
@@ -252,20 +262,31 @@ uint64_t __llvm_profile_get_num_bitmap_bytes(const char *Begin,
/*! \brief Get the size of the profile name section in bytes. */
uint64_t __llvm_profile_get_name_size(const char *Begin, const char *End);
-/* ! \brief Given the sizes of the data and counter information, return the
- * number of padding bytes before and after the counters, and after the names,
- * in the raw profile.
+/*! \brief Get the number of virtual table profile data entries */
+uint64_t __llvm_profile_get_num_vtable(const VTableProfData *Begin,
+ const VTableProfData *End);
+
+/*! \brief Get the size of virtual table profile data in bytes. */
+uint64_t __llvm_profile_get_vtable_section_size(const VTableProfData *Begin,
+ const VTableProfData *End);
+
+/* ! \brief Given the sizes of the data and counter information, computes the
+ * number of padding bytes before and after the counter section, as well as the
+ * number of padding bytes after other setions in the raw profile.
+ * Returns -1 upon errors and 0 upon success. Output parameters should be used
+ * iff return value is 0.
*
* Note: When mmap() mode is disabled, no padding bytes before/after counters
* are needed. However, in mmap() mode, the counter section in the raw profile
* must be page-aligned: this API computes the number of padding bytes
* needed to achieve that.
*/
-void __llvm_profile_get_padding_sizes_for_counters(
+int __llvm_profile_get_padding_sizes_for_counters(
uint64_t DataSize, uint64_t CountersSize, uint64_t NumBitmapBytes,
- uint64_t NamesSize, uint64_t *PaddingBytesBeforeCounters,
- uint64_t *PaddingBytesAfterCounters, uint64_t *PaddingBytesAfterBitmap,
- uint64_t *PaddingBytesAfterNames);
+ uint64_t NamesSize, uint64_t VTableSize, uint64_t VNameSize,
+ uint64_t *PaddingBytesBeforeCounters, uint64_t *PaddingBytesAfterCounters,
+ uint64_t *PaddingBytesAfterBitmap, uint64_t *PaddingBytesAfterNames,
+ uint64_t *PaddingBytesAfterVTable, uint64_t *PaddingBytesAfterVNames);
/*!
* \brief Set the flag that profile data has been dumped to the file.
diff --git a/compiler-rt/lib/profile/InstrProfilingBuffer.c b/compiler-rt/lib/profile/InstrProfilingBuffer.c
index af52804b2b532c..f31dc7d4e2111a 100644
--- a/compiler-rt/lib/profile/InstrProfilingBuffer.c
+++ b/compiler-rt/lib/profile/InstrProfilingBuffer.c
@@ -70,6 +70,18 @@ uint64_t __llvm_profile_get_data_size(const __llvm_profile_data *Begin,
const __llvm_profile_data *End) {
return __llvm_profile_get_num_data(Begin, End) * sizeof(__llvm_profile_data);
}
+COMPILER_RT_VISIBILITY
+uint64_t __llvm_profile_get_num_vtable(const VTableProfData *Begin,
+ const VTableProfData *End) {
+ intptr_t EndI = (intptr_t)End, BeginI = (intptr_t)Begin;
+ return (EndI + sizeof(VTableProfData) - 1 - BeginI) / sizeof(VTableProfData);
+}
+
+COMPILER_RT_VISIBILITY
+uint64_t __llvm_profile_get_vtable_section_size(const VTableProfData *Begin,
+ const VTableProfData *End) {
+ return __llvm_profile_get_num_vtable(Begin, End) * sizeof(VTableProfData);
+}
COMPILER_RT_VISIBILITY size_t __llvm_profile_counter_entry_size(void) {
if (__llvm_profile_get_version() & VARIANT_MASK_BYTE_COVERAGE)
@@ -119,11 +131,13 @@ static int needsCounterPadding(void) {
}
COMPILER_RT_VISIBILITY
-void __llvm_profile_get_padding_sizes_for_counters(
+int __llvm_profile_get_padding_sizes_for_counters(
uint64_t DataSize, uint64_t CountersSize, uint64_t NumBitmapBytes,
- uint64_t NamesSize, uint64_t *PaddingBytesBeforeCounters,
- uint64_t *PaddingBytesAfterCounters, uint64_t *PaddingBytesAfterBitmapBytes,
- uint64_t *PaddingBytesAfterNames) {
+ uint64_t NamesSize, uint64_t VTableSize, uint64_t VNameSize,
+ uint64_t *PaddingBytesBeforeCounters, uint64_t *PaddingBytesAfterCounters,
+ uint64_t *PaddingBytesAfterBitmapBytes, uint64_t *PaddingBytesAfterNames,
+ uint64_t *PaddingBytesAfterVTable, uint64_t *PaddingBytesAfterVName) {
+ // Counter padding is needed only if continuous mode is enabled.
if (!needsCounterPadding()) {
*PaddingBytesBeforeCounters = 0;
*PaddingBytesAfterCounters =
@@ -131,9 +145,19 @@ void __llvm_profile_get_padding_sizes_for_counters(
*PaddingBytesAfterBitmapBytes =
__llvm_profile_get_num_padding_bytes(NumBitmapBytes);
*PaddingBytesAfterNames = __llvm_profile_get_num_padding_bytes(NamesSize);
- return;
+ if (PaddingBytesAfterVTable != NULL)
+ *PaddingBytesAfterVTable =
+ __llvm_profile_get_num_padding_bytes(VTableSize);
+ if (PaddingBytesAfterVName != NULL)
+ *PaddingBytesAfterVName = __llvm_profile_get_num_padding_bytes(VNameSize);
+ return 0;
}
+ // Value profiling not supported in continuous mode at profile-write time.
+ // Return -1 to alert the incompatibility.
+ if (VTableSize != 0 || VNameSize != 0)
+ return -1;
+
// In continuous mode, the file offsets for headers and for the start of
// counter sections need to be page-aligned.
*PaddingBytesBeforeCounters =
@@ -142,6 +166,13 @@ void __llvm_profile_get_padding_sizes_for_counters(
*PaddingBytesAfterBitmapBytes =
calculateBytesNeededToPageAlign(NumBitmapBytes);
*PaddingBytesAfterNames = calculateBytesNeededToPageAlign(NamesSize);
+ // Set these two variables to zero to avoid uninitialized variables
+ // even if VTableSize and VNameSize are known to be zero.
+ if (PaddingBytesAfterVTable != NULL)
+ *PaddingBytesAfterVTable = 0;
+ if (PaddingBytesAfterVName != NULL)
+ *PaddingBytesAfterVName = 0;
+ return 0;
}
COMPILER_RT_VISIBILITY
@@ -162,9 +193,11 @@ uint64_t __llvm_profile_get_size_for_buffer_internal(
uint64_t PaddingBytesBeforeCounters, PaddingBytesAfterCounters,
PaddingBytesAfterNames, PaddingBytesAfterBitmapBytes;
__llvm_profile_get_padding_sizes_for_counters(
- DataSize, CountersSize, NumBitmapBytes, NamesSize,
- &PaddingBytesBeforeCounters, &PaddingBytesAfterCounters,
- &PaddingBytesAfterBitmapBytes, &PaddingBytesAfterNames);
+ DataSize, CountersSize, NumBitmapBytes, NamesSize, 0 /* VTableSize */,
+ 0 /* VNameSize */, &PaddingBytesBeforeCounters,
+ &PaddingBytesAfterCounters, &PaddingBytesAfterBitmapBytes,
+ &PaddingBytesAfterNames, NULL /* PaddingBytesAfterVTable */,
+ NULL /* PaddingbytesAfterVNames */);
return sizeof(__llvm_profile_header) + __llvm_write_binary_ids(NULL) +
DataSize + PaddingBytesBeforeCounters + CountersSize +
@@ -191,7 +224,10 @@ COMPILER_RT_VISIBILITY int __llvm_profile_write_buffer_internal(
const char *NamesBegin, const char *NamesEnd) {
ProfDataWriter BufferWriter;
initBufferWriter(&BufferWriter, Buffer);
- return lprofWriteDataImpl(&BufferWriter, DataBegin, DataEnd, CountersBegin,
- CountersEnd, BitmapBegin, BitmapEnd, 0, NamesBegin,
- NamesEnd, 0);
+ // Set virtual table arguments to NULL since they are not supported yet.
+ return lprofWriteDataImpl(
+ &BufferWriter, DataBegin, DataEnd, CountersBegin, CountersEnd,
+ BitmapBegin, BitmapEnd, 0 /* VPDataReader */, NamesBegin, NamesEnd,
+ NULL /* VTableBegin */, NULL /* VTableEnd */, NULL /* VNamesBegin */,
+ NULL /* VNamesEnd */, 0 /* SkipNameDataWrite */);
}
diff --git a/compiler-rt/lib/profile/InstrProfilingInternal.h b/compiler-rt/lib/profile/InstrProfilingInternal.h
index 03ed67fcfa766f..38159b668a1dfd 100644
--- a/compiler-rt/lib/profile/InstrProfilingInternal.h
+++ b/compiler-rt/lib/profile/InstrProfilingInternal.h
@@ -156,7 +156,9 @@ int lprofWriteDataImpl(ProfDataWriter *Writer,
const char *CountersBegin, const char *CountersEnd,
const char *BitmapBegin, const char *BitmapEnd,
VPDataReaderType *VPDataReader, const char *NamesBegin,
- const char *NamesEnd, int SkipNameDataWrite);
+ const char *NamesEnd, const VTableProfData *VTableBegin,
+ const VTableProfData *VTableEnd, const char *VNamesBegin,
+ const char *VNamesEnd, int SkipNameDataWrite);
/* Merge value profile data pointed to by SrcValueProfData into
* in-memory profile counters pointed by to DstData. */
diff --git a/compiler-rt/lib/profile/InstrProfilingMerge.c b/compiler-rt/lib/profile/InstrProfilingMerge.c
index b5850e99ee37d8..ad7a50dc77f44e 100644
--- a/compiler-rt/lib/profile/InstrProfilingMerge.c
+++ b/compiler-rt/lib/profile/InstrProfilingMerge.c
@@ -107,6 +107,27 @@ static uintptr_t signextIfWin64(void *V) {
#endif
}
+static uint64_t
+getDistanceFromCounterToValueProf(const __llvm_profile_header *const Header) {
+ // Skip names section, vtable profile data section and vtable names section
+ // for runtime profile merge. To merge runtime addresses from multiple
+ // profiles collected from the same instrumented binary, the binary should be
+ // loaded at fixed base address (e.g., build with -no-pie, or run with ASLR
+ // disabled).
+ // In this set-up these three sections remain unchanged.
+ const uint64_t VTableSectionSize =
+ Header->NumVTables * sizeof(VTableProfData);
+ const uint64_t PaddingBytesAfterVTableSection =
+ __llvm_profile_get_num_padding_bytes(VTableSectionSize);
+ const uint64_t VNamesSize = Header->VNamesSize;
+ const uint64_t PaddingBytesAfterVNamesSize =
+ __llvm_profile_get_num_padding_bytes(VNamesSize);
+ return Header->NamesSize +
+ __llvm_profile_get_num_padding_bytes(Header->NamesSize) +
+ VTableSectionSize + PaddingBytesAfterVTableSection + VNamesSize +
+ PaddingBytesAfterVNamesSize;
+}
+
COMPILER_RT_VISIBILITY
int __llvm_profile_merge_from_buffer(const char *ProfileData,
uint64_t ProfileSize) {
@@ -136,9 +157,9 @@ int __llvm_profile_merge_from_buffer(const char *ProfileData,
Header->NumCounters * __llvm_profile_counter_entry_size();
SrcBitmapStart = SrcCountersEnd;
SrcNameStart = SrcBitmapStart + Header->NumBitmapBytes;
+
SrcValueProfDataStart =
- SrcNameStart + Header->NamesSize +
- __llvm_profile_get_num_padding_bytes(Header->NamesSize);
+ SrcNameStart + getDistanceFromCounterToValueProf(Header);
if (SrcNameStart < SrcCountersStart || SrcNameStart < SrcBitmapStart)
return 1;
diff --git a/compiler-rt/lib/profile/InstrProfilingPlatformLinux.c b/compiler-rt/lib/profile/InstrProfilingPlatformLinux.c
index 19266ab6c6fb8a..d2554a2702aaf6 100644
--- a/compiler-rt/lib/profile/InstrProfilingPlatformLinux.c
+++ b/compiler-rt/lib/profile/InstrProfilingPlatformLinux.c
@@ -24,8 +24,12 @@
#define PROF_DATA_STOP INSTR_PROF_SECT_STOP(INSTR_PROF_DATA_COMMON)
#define PROF_NAME_START INSTR_PROF_SECT_START(INSTR_PROF_NAME_COMMON)
#define PROF_NAME_STOP INSTR_PROF_SECT_STOP(INSTR_PROF_NAME_COMMON)
+#define PROF_VNAME_START INSTR_PROF_SECT_START(INSTR_PROF_VNAME_COMMON)
+#define PROF_VNAME_STOP INSTR_PROF_SECT_STOP(INSTR_PROF_VNAME_COMMON)
#define PROF_CNTS_START INSTR_PROF_SECT_START(INSTR_PROF_CNTS_COMMON)
#define PROF_CNTS_STOP INSTR_PROF_SECT_STOP(INSTR_PROF_CNTS_COMMON)
+#define PROF_VTABLE_START INSTR_PROF_SECT_START(INSTR_PROF_VTAB_COMMON)
+#define PROF_VTABLE_STOP INSTR_PROF_SECT_STOP(INSTR_PROF_VTAB_COMMON)
#define PROF_BITS_START INSTR_PROF_SECT_START(INSTR_PROF_BITS_COMMON)
#define PROF_BITS_STOP INSTR_PROF_SECT_STOP(INSTR_PROF_BITS_COMMON)
#define PROF_ORDERFILE_START INSTR_PROF_SECT_START(INSTR_PROF_ORDERFILE_COMMON)
@@ -41,6 +45,10 @@ extern __llvm_profile_data PROF_DATA_STOP COMPILER_RT_VISIBILITY
COMPILER_RT_WEAK;
extern char PROF_CNTS_START COMPILER_RT_VISIBILITY COMPILER_RT_WEAK;
extern char PROF_CNTS_STOP COMPILER_RT_VISIBILITY COMPILER_RT_WEAK;
+extern VTableProfData PROF_VTABLE_START COMPILER_RT_VISIBILITY COMPILER_RT_WEAK;
+extern VTableProfData PROF_VTABLE_STOP COMPILER_RT_VISIBILITY COMPILER_RT_WEAK;
+extern char PROF_VNAME_START COMPILER_RT_VISIBILITY COMPILER_RT_WEAK;
+extern char PROF_VNAME_STOP COMPILER_RT_VISIBILITY COMPILER_RT_WEAK;
extern char PROF_BITS_START COMPILER_RT_VISIBILITY COMPILER_RT_WEAK;
extern char PROF_BITS_STOP COMPILER_RT_VISIBILITY COMPILER_RT_WEAK;
extern uint32_t PROF_ORDERFILE_START COMPILER_RT_VISIBILITY COMPILER_RT_WEAK;
@@ -63,6 +71,18 @@ COMPILER_RT_VISIBILITY const char *__llvm_profile_begin_names(void) {
COMPILER_RT_VISIBILITY const char *__llvm_profile_end_names(void) {
return &PROF_NAME_STOP;
}
+COMPILER_RT_VISIBILITY const char *__llvm_profile_begin_vtabnames(void) {
+ return &PROF_VNAME_START;
+}
+COMPILER_RT_VISIBILITY const char *__llvm_profile_end_vtabnames(void) {
+ return &PROF_VNAME_STOP;
+}
+COMPILER_RT_VISIBILITY VTableProfData *__llvm_profile_begin_vtables(void) {
+ return &PROF_VTABLE_START;
+}
+COMPILER_RT_VISIBILITY VTableProfData *__llvm_profile_end_vtables(void) {
+ return &PROF_VTABLE_STOP;
+}
COMPILER_RT_VISIBILITY char *__llvm_profile_begin_counters(void) {
return &PROF_CNTS_START;
}
diff --git a/compiler-rt/lib/profile/InstrProfilingWriter.c b/compiler-rt/lib/profile/InstrProfilingWriter.c
index 4d767d13851485..8816a71155511b 100644
--- a/compiler-rt/lib/profile/InstrProfilingWriter.c
+++ b/compiler-rt/lib/profile/InstrProfilingWriter.c
@@ -250,9 +250,14 @@ COMPILER_RT_VISIBILITY int lprofWriteData(ProfDataWriter *Writer,
const char *BitmapEnd = __llvm_profile_end_bitmap();
const char *NamesBegin = __llvm_profile_begin_names();
const char *NamesEnd = __llvm_profile_end_names();
+ const VTableProfData *VTableBegin = __llvm_profile_begin_vtables();
+ const VTableProfData *VTableEnd = __llvm_profile_end_vtables();
+ const char *VNamesBegin = __llvm_profile_begin_vtabnames();
+ const char *VNamesEnd = __llvm_profile_end_vtabnames();
return lprofWriteDataImpl(Writer, DataBegin, DataEnd, CountersBegin,
CountersEnd, BitmapBegin, BitmapEnd, VPDataReader,
- NamesBegin, NamesEnd, SkipNameDataWrite);
+ NamesBegin, NamesEnd, VTableBegin, VTableEnd,
+ VNamesBegin, VNamesEnd, SkipNameDataWrite);
}
COMPILER_RT_VISIBILITY int
@@ -261,7 +266,9 @@ lprofWriteDataImpl(ProfDataWriter *Writer, const __llvm_profile_data *DataBegin,
const char *CountersBegin, const char *CountersEnd,
const char *BitmapBegin, const char *BitmapEnd,
VPDataReaderType *VPDataReader, const char *NamesBegin,
- const char *NamesEnd, int SkipNameDataWrite) {
+ const char *NamesEnd, const VTableProfData *VTableBegin,
+ const VTableProfData *VTableEnd, const char *VNamesBegin,
+ const char *VNamesEnd, int SkipNameDataWrite) {
/* Calculate size of sections. */
const uint64_t DataSectionSize =
__llvm_profile_get_data_size(DataBegin, DataEnd);
@@ -273,6 +280,12 @@ lprofWriteDataImpl(ProfDataWriter *Writer, const __llvm_profile_data *DataBegin,
const uint64_t NumBitmapBytes =
__llvm_profile_get_num_bitmap_bytes(BitmapBegin, BitmapEnd);
const uint64_t NamesSize = __llvm_profile_get_name_size(NamesBegin, NamesEnd);
+ const uint64_t NumVTables =
+ __llvm_profile_get_num_vtable(VTableBegin, VTableEnd);
+ const uint64_t VTableSectionSize =
+ __llvm_profile_get_vtable_section_size(VTableBegin, VTableEnd);
+ const uint64_t VNamesSize =
+ __llvm_profile_get_name_size(VNamesBegin, VNamesEnd);
/* Create the header. */
__llvm_profile_header Header;
@@ -280,11 +293,15 @@ lprofWriteDataImpl(ProfDataWriter *Writer, const __llvm_profile_data *DataBegin,
/* Determine how much padding is needed before/after the counters and after
* the names. */
uint64_t PaddingBytesBeforeCounters, PaddingBytesAfterCounters,
- PaddingBytesAfterNames, PaddingBytesAfterBitmapBytes;
- __llvm_profile_get_padding_sizes_for_counters(
- DataSectionSize, CountersSectionSize, NumBitmapBytes, NamesSize,
- &PaddingBytesBeforeCounters, &PaddingBytesAfterCounters,
- &PaddingBytesAfterBitmapBytes, &PaddingBytesAfterNames);
+ PaddingBytesAfterBitmapBytes, PaddingBytesAfterNames,
+ PaddingBytesAfterVTable, PaddingBytesAfterVNames;
+ if (__llvm_profile_get_padding_sizes_for_counters(
+ DataSectionSize, CountersSectionSize, NumBitmapBytes, NamesSize,
+ VTableSectionSize, VNamesSize, &PaddingBytesBeforeCounters,
+ &PaddingBytesAfterCounters, &PaddingBytesAfterBitmapBytes,
+ &PaddingBytesAfterNames, &PaddingBytesAfterVTable,
+ &PaddingBytesAfterVNames) == -1)
+ return -1;
{
/* Initialize header structure. */
@@ -323,7 +340,11 @@ lprofWriteDataImpl(ProfDataWriter *Writer, const __llvm_profile_data *DataBegin,
{BitmapBegin, sizeof(uint8_t), NumBitmapBytes, 0},
{NULL, sizeof(uint8_t), PaddingBytesAfterBitmapBytes, 1},
{SkipNameDataWrite ? NULL : NamesBegin, sizeof(uint8_t), NamesSize, 0},
- {NULL, sizeof(uint8_t), PaddingBytesAfterNames, 1}};
+ {NULL, sizeof(uint8_t), PaddingBytesAfterNames, 1},
+ {VTableBegin, sizeof(uint8_t), VTableSectionSize, 0},
+ {NULL, sizeof(uint8_t), PaddingBytesAfterVTable, 1},
+ {SkipNameDataWrite ? NULL : VNamesBegin, sizeof(uint8_t), VNamesSize, 0},
+ {NULL, sizeof(uint8_t), PaddingBytesAfterVNames, 1}};
if (Writer->Write(Writer, IOVecData, sizeof(IOVecData) / sizeof(*IOVecData)))
return -1;
diff --git a/llvm/include/llvm/Analysis/IndirectCallVisitor.h b/llvm/include/llvm/Analysis/IndirectCallVisitor.h
index 0825e19ecd2d24..5969241a179ea1 100644
--- a/llvm/include/llvm/Analysis/IndirectCallVisitor.h
+++ b/llvm/include/llvm/Analysis/IndirectCallVisitor.h
@@ -12,27 +12,87 @@
#ifndef LLVM_ANALYSIS_INDIRECTCALLVISITOR_H
#define LLVM_ANALYSIS_INDIRECTCALLVISITOR_H
+#include "llvm/ADT/SetVector.h"
#include "llvm/IR/InstVisitor.h"
#include <vector>
namespace llvm {
-// Visitor class that finds all indirect call.
+// Visitor class that finds indirect calls or instructions that gives vtable
+// value, depending on Type.
struct PGOIndirectCallVisitor : public InstVisitor<PGOIndirectCallVisitor> {
+ enum class InstructionType {
+ kIndirectCall = 0,
+ kVTableVal = 1,
+ };
std::vector<CallBase *> IndirectCalls;
- PGOIndirectCallVisitor() = default;
+ std::vector<Instruction *> ProfiledAddresses;
+ PGOIndirectCallVisitor(InstructionType Type) : Type(Type) {}
+
+ // Given an indirect call instruction, try to find the the following pattern
+ //
+ // %vtable = load ptr, ptr %obj
+ // %vfn = getelementptr inbounds ptr, ptr %vtable, i64 1
+ // %2 = load ptr, ptr %vfn
+ // $call = tail call i32 %2
+ //
+ // A heuristic is used to find the address feeding instructions.
+ static Instruction *tryGetVTableInstruction(CallBase *CB) {
+ assert(CB != nullptr && "Caller guaranteed");
+ LoadInst *LI = dyn_cast<LoadInst>(CB->getCalledOperand());
+
+ if (LI != nullptr) {
+ Value *FuncPtr = LI->getPointerOperand(); // GEP (or bitcast)
+ Value *VTablePtr = FuncPtr->stripInBoundsConstantOffsets();
+ // FIXME: Add support in the frontend so LLVM type intrinsics are
+ // emitted without LTO. This way, added intrinsics could filter
+ // non-vtable instructions and reduce instrumentation overhead.
+ // Since a non-vtable profiled address is not within the address
+ // range of vtable objects, it's stored as zero in indexed profiles.
+ // A pass that looks up symbol with an zero hash will (almost) always
+ // find nullptr and skip the actual transformation (e.g., comparison
+ // of symbols). So the performance overhead from non-vtable profiled
+ // address is negligible if exists at all. Comparing loaded address
+ // with symbol address guarantees correctness.
+ if (VTablePtr != nullptr && isa<Instruction>(VTablePtr)) {
+ return cast<Instruction>(VTablePtr);
+ }
+ }
+ return nullptr;
+ }
void visitCallBase(CallBase &Call) {
- if (Call.isIndirectCall())
+ if (Call.isIndirectCall()) {
IndirectCalls.push_back(&Call);
+
+ if (Type != InstructionType::kVTableVal)
+ return;
+
+ Instruction *VPtr =
+ PGOIndirectCallVisitor::tryGetVTableInstruction(&Call);
+ if (VPtr) {
+ ProfiledAddresses.push_back(VPtr);
+ }
+ }
}
+
+private:
+ InstructionType Type;
};
-// Helper function that finds all indirect call sites.
inline std::vector<CallBase *> findIndirectCalls(Function &F) {
- PGOIndirectCallVisitor ICV;
+ PGOIndirectCallVisitor ICV(
+ PGOIndirectCallVisitor::InstructionType::kIndirectCall);
ICV.visit(F);
return ICV.IndirectCalls;
}
+
+inline std::vector<Instruction *> findVTableAddrs(Function &F) {
+ PGOIndirectCallVisitor ICV(
+ PGOIndirectCallVisitor::InstructionType::kVTableVal);
+ ICV.visit(F);
+ return ICV.ProfiledAddresses;
+}
+
} // namespace llvm
#endif
diff --git a/llvm/include/llvm/ProfileData/InstrProf.h b/llvm/include/llvm/ProfileData/InstrProf.h
index 87e7bbbd727ee5..6cdceae5eeb960 100644
--- a/llvm/include/llvm/ProfileData/InstrProf.h
+++ b/llvm/include/llvm/ProfileData/InstrProf.h
@@ -89,6 +89,9 @@ inline StringRef getInstrProfValueProfMemOpFuncName() {
/// Return the name prefix of variables containing instrumented function names.
inline StringRef getInstrProfNameVarPrefix() { return "__profn_"; }
+/// Return the name prefix of variables containing virtual table profile data.
+inline StringRef getInstrProfVTableVarPrefix() { return "__profvt_"; }
+
/// Return the name prefix of variables containing per-function control data.
inline StringRef getInstrProfDataVarPrefix() { return "__profd_"; }
@@ -110,6 +113,8 @@ inline StringRef getInstrProfNamesVarName() {
return "__llvm_prf_nm";
}
+inline StringRef getInstrProfVTableNamesVarName() { return "__llvm_prf_vnm"; }
+
/// Return the name of a covarage mapping variable (internal linkage)
/// for each instrumented source module. Such variables are allocated
/// in the __llvm_covmap section.
@@ -246,6 +251,9 @@ Error collectGlobalObjectNameStrings(ArrayRef<std::string> NameStrs,
Error collectPGOFuncNameStrings(ArrayRef<GlobalVariable *> NameVars,
std::string &Result, bool doCompression = true);
+Error collectVTableStrings(ArrayRef<GlobalVariable *> VTables,
+ std::string &Result, bool doCompression);
+
/// Check if INSTR_PROF_RAW_VERSION_VAR is defined. This global is only being
/// set in IR PGO compilation.
bool isIRPGOFlagSet(const Module *M);
@@ -269,13 +277,15 @@ void annotateValueSite(Module &M, Instruction &Inst,
uint32_t MaxMDCount = 3);
/// Same as the above interface but using an ArrayRef, as well as \p Sum.
+/// This function will not annotate !prof metadata on the instruction if the
+/// referenced array is empty.
void annotateValueSite(Module &M, Instruction &Inst,
ArrayRef<InstrProfValueData> VDs, uint64_t Sum,
InstrProfValueKind ValueKind, uint32_t MaxMDCount);
/// Extract the value profile data from \p Inst which is annotated with
/// value profile meta data. Return false if there is no value data annotated,
-/// otherwise return true.
+/// otherwise return true.
bool getValueProfDataFromInst(const Instruction &Inst,
InstrProfValueKind ValueKind,
uint32_t MaxNumValueData,
@@ -283,11 +293,23 @@ bool getValueProfDataFromInst(const Instruction &Inst,
uint32_t &ActualNumValueData, uint64_t &TotalC,
bool GetNoICPValue = false);
+/// Extract the value profile data from \p Inst and returns them if \p Inst is
+/// annotated with value profile data. Returns nullptr otherwise. It's similar
+/// to `getValueProfDataFromInst` above except that an array is allocated only
+/// after a preliminary checking that the value profiles of kind `ValueKind`
+/// exist.
+std::unique_ptr<InstrProfValueData[]>
+getValueProfDataFromInst(const Instruction &Inst, InstrProfValueKind ValueKind,
+ uint32_t MaxNumValueData, uint32_t &ActualNumValueData,
+ uint64_t &TotalC, bool GetNoICPValue = false);
+
inline StringRef getPGOFuncNameMetadataName() { return "PGOFuncName"; }
/// Return the PGOFuncName meta data associated with a function.
MDNode *getPGOFuncNameMetadata(const Function &F);
+std::string getPGOName(const GlobalVariable &V, bool InLTO = false);
+
/// Create the PGOFuncName meta data if PGOFuncName is different from
/// function's raw name. This should only apply to internal linkage functions
/// declared by users only.
@@ -295,7 +317,7 @@ void createPGOFuncNameMetadata(Function &F, StringRef PGOFuncName);
/// Check if we can use Comdat for profile variables. This will eliminate
/// the duplicated profile variables for Comdat functions.
-bool needsComdatForCounter(const Function &F, const Module &M);
+bool needsComdatForCounter(const GlobalValue &GV, const Module &M);
/// An enum describing the attributes of an instrumented profile.
enum class InstrProfKind {
@@ -429,20 +451,36 @@ uint64_t ComputeHash(StringRef K);
class InstrProfSymtab {
public:
using AddrHashMap = std::vector<std::pair<uint64_t, uint64_t>>;
+ using RangeHashMap =
+ std::vector<std::pair<std::pair<uint64_t, uint64_t>, uint64_t>>;
private:
StringRef Data;
uint64_t Address = 0;
- // Unique name strings.
+ // Unique name strings. Used to ensure entries in MD5NameMap (a vector that's
+ // going to be sorted) has unique MD5 keys in the first place.
StringSet<> NameTab;
+ // Records the unique virtual table names. This is used by InstrProfWriter to
+ // write out an on-disk chained hash table of virtual table names.
+ // InstrProfWriter stores per function profile data (keyed by function names)
+ // so it doesn't use a StringSet for function names.
+ StringSet<> VTableNames;
// A map from MD5 keys to function name strings.
std::vector<std::pair<uint64_t, StringRef>> MD5NameMap;
+ // A map from MD5 keys to virtual table definitions. Only populated when
+ // building the Symtab from a module.
+ std::vector<std::pair<uint64_t, GlobalVariable *>> MD5VTableMap;
// A map from MD5 keys to function define. We only populate this map
// when build the Symtab from a Module.
std::vector<std::pair<uint64_t, Function *>> MD5FuncMap;
// A map from function runtime address to function name MD5 hash.
// This map is only populated and used by raw instr profile reader.
AddrHashMap AddrToMD5Map;
+ // A map from virtual table runtime address to function name MD5 hash.
+ // This map is only populated and used by raw instr profile reader.
+ // This is a different map from 'AddrToMD5Map' for readability and
+ // debuggability.
+ RangeHashMap VTableAddrRangeToMD5Map;
bool Sorted = false;
static StringRef getExternalSymbol() {
@@ -470,9 +508,19 @@ class InstrProfSymtab {
/// \c NameStrings is a string composed of one of more sub-strings
/// encoded in the format described in \c collectPGOFuncNameStrings.
- /// This method is a wrapper to \c readPGOFuncNameStrings method.
+ /// This method is a wrapper to \c readAndDecodeStrings method.
Error create(StringRef NameStrings);
+ /// \c FuncNameStrings is a string composed of one or more encoded function
+ /// name strings, and \c VTableNameStrings composes of one or more encoded
+ /// vtable names. This function is a wrapper to \c readAndDecodeStrings
+ /// method.
+ Error create(StringRef FuncNameStrings, StringRef VTableNameStrings);
+
+ /// Initialize 'this' with the set of vtable names encoded in
+ /// \c CompressedVTableNames.
+ Error initVTableNamesFromCompressedStrings(StringRef CompressedVTableNames);
+
/// This interface is used by reader of CoverageMapping test
/// format.
inline Error create(StringRef D, uint64_t BaseAddr);
@@ -485,32 +533,70 @@ class InstrProfSymtab {
/// Create InstrProfSymtab from a set of names iteratable from
/// \p IterRange. This interface is used by IndexedProfReader.
- template <typename NameIterRange> Error create(const NameIterRange &IterRange);
-
- /// Update the symtab by adding \p FuncName to the table. This interface
- /// is used by the raw and text profile readers.
- Error addFuncName(StringRef FuncName) {
- if (FuncName.empty())
+ template <typename NameIterRange>
+ Error create(const NameIterRange &IterRange);
+
+ /// Create InstrProfSymtab from a set of function names and vtable
+ /// names iteratable from \p IterRange. This interface is used by
+ /// IndexedProfReader.
+ template <typename FuncNameIterRange, typename VTableNameIterRange>
+ Error create(const FuncNameIterRange &FuncIterRange,
+ const VTableNameIterRange &VTableIterRange);
+
+ Error addSymbolName(StringRef SymbolName) {
+ if (SymbolName.empty())
return make_error<InstrProfError>(instrprof_error::malformed,
- "function name is empty");
- auto Ins = NameTab.insert(FuncName);
+ "symbol name is empty");
+
+ // Insert into NameTab so that MD5NameMap (a vector that will be sorted)
+ // won't have duplicated entries in the first place.
+ auto Ins = NameTab.insert(SymbolName);
if (Ins.second) {
MD5NameMap.push_back(std::make_pair(
- IndexedInstrProf::ComputeHash(FuncName), Ins.first->getKey()));
+ IndexedInstrProf::ComputeHash(SymbolName), Ins.first->getKey()));
Sorted = false;
}
return Error::success();
}
+ /// The method name is kept since there are many callers.
+ /// It just forwards to 'addSymbolName'.
+ Error addFuncName(StringRef FuncName) { return addSymbolName(FuncName); }
+
+ /// Adds VTableName as a known symbol, and inserts it to a map that
+ /// tracks all vtable names.
+ Error addVTableName(StringRef VTableName) {
+ if (Error E = addSymbolName(VTableName))
+ return E;
+
+ // Record VTableName. InstrProfWriter uses this map. The comment around
+ // class member explains why.
+ VTableNames.insert(VTableName);
+ return Error::success();
+ }
+
+ const StringSet<> &getVTableNames() const { return VTableNames; }
+
/// Map a function address to its name's MD5 hash. This interface
/// is only used by the raw profiler reader.
void mapAddress(uint64_t Addr, uint64_t MD5Val) {
AddrToMD5Map.push_back(std::make_pair(Addr, MD5Val));
}
+ /// Map the address range (i.e., [start_address, end_address]) of a variable
+ /// to its names' MD5 hash. This interface is only used by the raw profile
+ /// reader.
+ void mapVTableAddress(uint64_t StartAddr, uint64_t EndAddr, uint64_t MD5Val) {
+ VTableAddrRangeToMD5Map.push_back(
+ std::make_pair(std::make_pair(StartAddr, EndAddr), MD5Val));
+ }
+
/// Return a function's hash, or 0, if the function isn't in this SymTab.
uint64_t getFunctionHashFromAddress(uint64_t Address);
+ /// Return a vtable's hash, or 0 if the vtable doesn't exist in this SymTab.
+ uint64_t getVTableHashFromAddress(uint64_t Address);
+
/// Return function's PGO name from the function name's symbol
/// address in the object file. If an error occurs, return
/// an empty string.
@@ -532,6 +618,8 @@ class InstrProfSymtab {
/// Return function from the name's md5 hash. Return nullptr if not found.
inline Function *getFunction(uint64_t FuncMD5Hash);
+ // Return vtable from the name's MD5 hash. Return nullptr if not found.
+ inline GlobalVariable *getGlobalVariable(uint64_t GlobalVariableMD5Hash);
/// Return the name section data.
inline StringRef getNameData() const { return Data; }
@@ -556,6 +644,23 @@ Error InstrProfSymtab::create(const NameIterRange &IterRange) {
return Error::success();
}
+template <typename FuncNameIterRange, typename VTableNameIterRange>
+Error InstrProfSymtab::create(const FuncNameIterRange &FuncIterRange,
+ const VTableNameIterRange &VTableIterRange) {
+ for (auto Name : FuncIterRange)
+ if (Error E = addFuncName(Name))
+ return E;
+
+ for (auto VTableName : VTableIterRange) {
+ if (Error E = addVTableName(VTableName)) {
+ return E;
+ }
+ }
+
+ finalizeSymtab();
+ return Error::success();
+}
+
void InstrProfSymtab::finalizeSymtab() {
if (Sorted)
return;
@@ -564,6 +669,13 @@ void InstrProfSymtab::finalizeSymtab() {
llvm::sort(AddrToMD5Map, less_first());
AddrToMD5Map.erase(std::unique(AddrToMD5Map.begin(), AddrToMD5Map.end()),
AddrToMD5Map.end());
+ // VTable object address ranges should not overlap; so sort by either
+ // beginning address or end address is fine.
+ llvm::sort(VTableAddrRangeToMD5Map, less_first());
+ // std::unique uses == operator for std::pair.
+ VTableAddrRangeToMD5Map.erase(std::unique(VTableAddrRangeToMD5Map.begin(),
+ VTableAddrRangeToMD5Map.end()),
+ VTableAddrRangeToMD5Map.end());
Sorted = true;
}
@@ -594,6 +706,19 @@ Function* InstrProfSymtab::getFunction(uint64_t FuncMD5Hash) {
return nullptr;
}
+GlobalVariable *
+InstrProfSymtab::getGlobalVariable(uint64_t GlobalVariableMD5Hash) {
+ finalizeSymtab();
+ auto Result =
+ llvm::lower_bound(MD5VTableMap, GlobalVariableMD5Hash,
+ [](const std::pair<uint64_t, GlobalVariable *> &LHS,
+ uint64_t RHS) { return LHS.first < RHS; });
+
+ if (Result != MD5VTableMap.end() && Result->first == GlobalVariableMD5Hash)
+ return Result->second;
+ return nullptr;
+}
+
// To store the sums of profile count values, or the percentage of
// the sums of the total count values.
struct CountSumOrPercent {
@@ -820,6 +945,7 @@ struct InstrProfRecord {
struct ValueProfData {
std::vector<InstrProfValueSiteRecord> IndirectCallSites;
std::vector<InstrProfValueSiteRecord> MemOPSizes;
+ std::vector<InstrProfValueSiteRecord> VTableTargets;
};
std::unique_ptr<ValueProfData> ValueData;
@@ -842,6 +968,8 @@ struct InstrProfRecord {
return ValueData->IndirectCallSites;
case IPVK_MemOPSize:
return ValueData->MemOPSizes;
+ case IPVK_VTableTarget:
+ return ValueData->VTableTargets;
default:
llvm_unreachable("Unknown value kind!");
}
@@ -856,6 +984,8 @@ struct InstrProfRecord {
return ValueData->IndirectCallSites;
case IPVK_MemOPSize:
return ValueData->MemOPSizes;
+ case IPVK_VTableTarget:
+ return ValueData->VTableTargets;
default:
llvm_unreachable("Unknown value kind!");
}
@@ -1025,7 +1155,9 @@ enum ProfVersion {
Version10 = 10,
// An additional field is used for bitmap bytes.
Version11 = 11,
- // The current version is 11.
+ // VTable profiling,
+ Version12 = 12,
+ // The current version is 12.
CurrentVersion = INSTR_PROF_INDEX_VERSION
};
const uint64_t Version = ProfVersion::CurrentVersion;
@@ -1046,6 +1178,7 @@ struct Header {
uint64_t MemProfOffset;
uint64_t BinaryIdOffset;
uint64_t TemporalProfTracesOffset;
+ uint64_t VTableNamesOffset; // Organize virtual table names.
// New fields should only be added at the end to ensure that the size
// computation is correct. The methods below need to be updated to ensure that
// the new field is read correctly.
@@ -1182,8 +1315,13 @@ template <> inline uint64_t getMagic<uint32_t>() {
// It should also match the synthesized type in
// Transforms/Instrumentation/InstrProfiling.cpp:getOrCreateRegionCounters.
template <class IntPtrT> struct alignas(8) ProfileData {
- #define INSTR_PROF_DATA(Type, LLVMType, Name, Init) Type Name;
- #include "llvm/ProfileData/InstrProfData.inc"
+#define INSTR_PROF_DATA(Type, LLVMType, Name, Init) Type Name;
+#include "llvm/ProfileData/InstrProfData.inc"
+};
+
+template <class IntPtrT> struct alignas(8) VTableProfileData {
+#define INSTR_PROF_VTABLE_DATA(Type, LLVMType, Name, Init) Type Name;
+#include "llvm/ProfileData/InstrProfData.inc"
};
// File header structure of the LLVM profile data in raw format.
diff --git a/llvm/include/llvm/ProfileData/InstrProfData.inc b/llvm/include/llvm/ProfileData/InstrProfData.inc
index 25df899b3f3619..77720aba3eb484 100644
--- a/llvm/include/llvm/ProfileData/InstrProfData.inc
+++ b/llvm/include/llvm/ProfileData/InstrProfData.inc
@@ -94,6 +94,22 @@ INSTR_PROF_DATA(const uint32_t, llvm::Type::getInt32Ty(Ctx), NumBitmapBytes, \
#undef INSTR_PROF_DATA
/* INSTR_PROF_DATA end. */
+#ifndef INSTR_PROF_VTABLE_DATA
+#define INSTR_PROF_VTABLE_DATA(Type, LLVMType, Name, Initializer)
+#else
+#define INSTR_PROF_VTABLE_DATA_DEFINED
+#endif
+INSTR_PROF_VTABLE_DATA(
+ const uint64_t, llvm::Type::getInt64Ty(Ctx), VTableNameHash,
+ ConstantInt::get(llvm::Type::getInt64Ty(Ctx),
+ IndexedInstrProf::ComputeHash(PGOVTableName)))
+INSTR_PROF_VTABLE_DATA(const IntPtrT, llvm::PointerType::getUnqual(Ctx),
+ VTablePointer, VTableAddr)
+INSTR_PROF_VTABLE_DATA(const uint32_t, llvm::Type::getInt32Ty(Ctx), VTableSize,
+ ConstantInt::get(llvm::Type::getInt32Ty(Ctx),
+ VTableSizeVal))
+#undef INSTR_PROF_VTABLE_DATA
+/* INSTR_PROF_VTABLE_DATA end. */
/* This is an internal data structure used by value profiler. It
* is defined here to allow serialization code sharing by LLVM
@@ -145,6 +161,8 @@ INSTR_PROF_RAW_HEADER(uint64_t, CountersDelta,
INSTR_PROF_RAW_HEADER(uint64_t, BitmapDelta,
(uintptr_t)BitmapBegin - (uintptr_t)DataBegin)
INSTR_PROF_RAW_HEADER(uint64_t, NamesDelta, (uintptr_t)NamesBegin)
+INSTR_PROF_RAW_HEADER(uint64_t, VNamesSize, VNamesSize)
+INSTR_PROF_RAW_HEADER(uint64_t, NumVTables, NumVTables)
INSTR_PROF_RAW_HEADER(uint64_t, ValueKindLast, IPVK_Last)
#undef INSTR_PROF_RAW_HEADER
/* INSTR_PROF_RAW_HEADER end */
@@ -186,13 +204,14 @@ VALUE_PROF_FUNC_PARAM(uint32_t, CounterIndex, Type::getInt32Ty(Ctx))
VALUE_PROF_KIND(IPVK_IndirectCallTarget, 0, "indirect call target")
/* For memory intrinsic functions size profiling. */
VALUE_PROF_KIND(IPVK_MemOPSize, 1, "memory intrinsic functions size")
+VALUE_PROF_KIND(IPVK_VTableTarget, 2, "vtable target")
/* These two kinds must be the last to be
* declared. This is to make sure the string
* array created with the template can be
* indexed with the kind value.
*/
VALUE_PROF_KIND(IPVK_First, IPVK_IndirectCallTarget, "first")
-VALUE_PROF_KIND(IPVK_Last, IPVK_MemOPSize, "last")
+VALUE_PROF_KIND(IPVK_Last, IPVK_VTableTarget, "last")
#undef VALUE_PROF_KIND
/* VALUE_PROF_KIND end */
@@ -267,7 +286,6 @@ COVMAP_HEADER(uint32_t, Int32Ty, Version, \
#undef COVMAP_HEADER
/* COVMAP_HEADER end. */
-
#ifdef INSTR_PROF_SECT_ENTRY
#define INSTR_PROF_DATA_DEFINED
INSTR_PROF_SECT_ENTRY(IPSK_data, \
@@ -282,12 +300,18 @@ INSTR_PROF_SECT_ENTRY(IPSK_bitmap, \
INSTR_PROF_SECT_ENTRY(IPSK_name, \
INSTR_PROF_QUOTE(INSTR_PROF_NAME_COMMON), \
INSTR_PROF_NAME_COFF, "__DATA,")
+INSTR_PROF_SECT_ENTRY(IPSK_vname, \
+ INSTR_PROF_QUOTE(INSTR_PROF_VNAME_COMMON), \
+ INSTR_PROF_VNAME_COFF, "__DATA,")
INSTR_PROF_SECT_ENTRY(IPSK_vals, \
INSTR_PROF_QUOTE(INSTR_PROF_VALS_COMMON), \
INSTR_PROF_VALS_COFF, "__DATA,")
INSTR_PROF_SECT_ENTRY(IPSK_vnodes, \
INSTR_PROF_QUOTE(INSTR_PROF_VNODES_COMMON), \
INSTR_PROF_VNODES_COFF, "__DATA,")
+INSTR_PROF_SECT_ENTRY(IPSK_vtab, \
+ INSTR_PROF_QUOTE(INSTR_PROF_VTAB_COMMON), \
+ INSTR_PROF_VTAB_COFF, "__DATA,")
INSTR_PROF_SECT_ENTRY(IPSK_covmap, \
INSTR_PROF_QUOTE(INSTR_PROF_COVMAP_COMMON), \
INSTR_PROF_COVMAP_COFF, "__LLVM_COV,")
@@ -307,7 +331,6 @@ INSTR_PROF_SECT_ENTRY(IPSK_covname, \
#undef INSTR_PROF_SECT_ENTRY
#endif
-
#ifdef INSTR_PROF_VALUE_PROF_DATA
#define INSTR_PROF_DATA_DEFINED
@@ -347,7 +370,7 @@ typedef struct ValueProfRecord {
/*!
* Return the number of value sites.
*/
- uint32_t getNumValueSites() const { return NumValueSites; }
+ uint32_t getNumValueSites() const { return NumValueSites; }
/*!
* Read data from this record and save it to Record.
*/
@@ -479,7 +502,6 @@ getValueProfRecordHeaderSize(uint32_t NumValueSites);
#undef INSTR_PROF_VALUE_PROF_DATA
#endif /* INSTR_PROF_VALUE_PROF_DATA */
-
#ifdef INSTR_PROF_COMMON_API_IMPL
#define INSTR_PROF_DATA_DEFINED
#ifdef __cplusplus
@@ -663,9 +685,9 @@ serializeValueProfDataFrom(ValueProfRecordClosure *Closure,
(uint64_t)'f' << 16 | (uint64_t)'R' << 8 | (uint64_t)129
/* Raw profile format version (start from 1). */
-#define INSTR_PROF_RAW_VERSION 9
+#define INSTR_PROF_RAW_VERSION 10
/* Indexed profile format version (start from 1). */
-#define INSTR_PROF_INDEX_VERSION 11
+#define INSTR_PROF_INDEX_VERSION 12
/* Coverage mapping format version (start from 0). */
#define INSTR_PROF_COVMAP_VERSION 6
@@ -703,10 +725,12 @@ serializeValueProfDataFrom(ValueProfRecordClosure *Closure,
than WIN32 */
#define INSTR_PROF_DATA_COMMON __llvm_prf_data
#define INSTR_PROF_NAME_COMMON __llvm_prf_names
+#define INSTR_PROF_VNAME_COMMON __llvm_prf_vtabnames
#define INSTR_PROF_CNTS_COMMON __llvm_prf_cnts
#define INSTR_PROF_BITS_COMMON __llvm_prf_bits
#define INSTR_PROF_VALS_COMMON __llvm_prf_vals
#define INSTR_PROF_VNODES_COMMON __llvm_prf_vnds
+#define INSTR_PROF_VTAB_COMMON __llvm_prf_vtab
#define INSTR_PROF_COVMAP_COMMON __llvm_covmap
#define INSTR_PROF_COVFUN_COMMON __llvm_covfun
#define INSTR_PROF_COVDATA_COMMON __llvm_covdata
@@ -717,10 +741,12 @@ serializeValueProfDataFrom(ValueProfRecordClosure *Closure,
*/
#define INSTR_PROF_DATA_COFF ".lprfd$M"
#define INSTR_PROF_NAME_COFF ".lprfn$M"
+#define INSTR_PROF_VNAME_COFF ".lprfvn$M"
#define INSTR_PROF_CNTS_COFF ".lprfc$M"
#define INSTR_PROF_BITS_COFF ".lprfb$M"
#define INSTR_PROF_VALS_COFF ".lprfv$M"
#define INSTR_PROF_VNODES_COFF ".lprfnd$M"
+#define INSTR_PROF_VTAB_COFF ".lprfvt$M"
#define INSTR_PROF_COVMAP_COFF ".lcovmap$M"
#define INSTR_PROF_COVFUN_COFF ".lcovfun$M"
/* Since cov data and cov names sections are not allocated, we don't need to
diff --git a/llvm/include/llvm/ProfileData/InstrProfReader.h b/llvm/include/llvm/ProfileData/InstrProfReader.h
index 87f15639a2c3c9..c1edd7afb75bd7 100644
--- a/llvm/include/llvm/ProfileData/InstrProfReader.h
+++ b/llvm/include/llvm/ProfileData/InstrProfReader.h
@@ -326,12 +326,16 @@ class RawInstrProfReader : public InstrProfReader {
uint64_t NamesDelta;
const RawInstrProf::ProfileData<IntPtrT> *Data;
const RawInstrProf::ProfileData<IntPtrT> *DataEnd;
+ const RawInstrProf::VTableProfileData<IntPtrT> *VTableBegin = nullptr;
+ const RawInstrProf::VTableProfileData<IntPtrT> *VTableEnd = nullptr;
const char *CountersStart;
const char *CountersEnd;
const char *BitmapStart;
const char *BitmapEnd;
const char *NamesStart;
const char *NamesEnd;
+ const char *VNamesStart = nullptr;
+ const char *VNamesEnd = nullptr;
// After value profile is all read, this pointer points to
// the header of next profile data (if exists)
const uint8_t *ValueDataStart;
@@ -622,6 +626,12 @@ class InstrProfReaderIndex : public InstrProfReaderIndexBase {
InstrProfKind getProfileKind() const override;
Error populateSymtab(InstrProfSymtab &Symtab) override {
+ // FIXME: the create method calls 'finalizeSymtab' and sorts a bunch of
+ // arrays/maps. Since there are other data sources other than 'HashTable' to
+ // populate a symtab, it might make sense to have something like this
+ // 1. Let each data source populate Symtab and init the arrays/maps without
+ // calling 'finalizeSymtab'
+ // 2. Call 'finalizeSymtab' once to get all arrays/maps sorted if needed.
return Symtab.create(HashTable->keys());
}
};
@@ -656,6 +666,16 @@ class IndexedInstrProfReader : public InstrProfReader {
std::unique_ptr<MemProfRecordHashTable> MemProfRecordTable;
/// MemProf frame profile data on-disk indexed via frame id.
std::unique_ptr<MemProfFrameHashTable> MemProfFrameTable;
+ /// The reader itself doesn't decompress vtable names. A compiler that reads
+ /// indexed profiles could construct symtab from module IR so it doesn't need
+ /// the decompressed names.
+ /// When a symtab is constructed from profiles by llvm-profdata, the list of
+ /// names could be decompressed based on `VTableNamePtr` and
+ /// `CompressedVTableNamesLen`.
+ /// VTableNamePtr points to the beginning of compressed vtable names.
+ const char *VTableNamePtr = nullptr;
+ /// The length of compressed vtable names.
+ uint64_t CompressedVTableNamesLen = 0;
/// Total size of binary ids.
uint64_t BinaryIdsSize{0};
/// Start address of binary id length and data pairs.
diff --git a/llvm/include/llvm/ProfileData/InstrProfWriter.h b/llvm/include/llvm/ProfileData/InstrProfWriter.h
index 047b14f223bd94..049fa36bb53f5c 100644
--- a/llvm/include/llvm/ProfileData/InstrProfWriter.h
+++ b/llvm/include/llvm/ProfileData/InstrProfWriter.h
@@ -63,6 +63,9 @@ class InstrProfWriter {
// List of binary ids.
std::vector<llvm::object::BuildID> BinaryIds;
+ // Read the vtable names from raw instr profile reader.
+ StringSet<> VTableNames;
+
// An enum describing the attributes of the profile.
InstrProfKind ProfileKind = InstrProfKind::Unknown;
// Use raw pointer here for the incomplete type object.
@@ -84,6 +87,7 @@ class InstrProfWriter {
void addRecord(NamedInstrProfRecord &&I, function_ref<void(Error)> Warn) {
addRecord(std::move(I), 1, Warn);
}
+ void addVTableName(StringRef VTableName) { VTableNames.insert(VTableName); }
/// Add \p SrcTraces using reservoir sampling where \p SrcStreamSize is the
/// total number of temporal profiling traces the source has seen.
diff --git a/llvm/lib/Analysis/IndirectCallPromotionAnalysis.cpp b/llvm/lib/Analysis/IndirectCallPromotionAnalysis.cpp
index ebfa1c8fc08e1c..ab53717eb889a0 100644
--- a/llvm/lib/Analysis/IndirectCallPromotionAnalysis.cpp
+++ b/llvm/lib/Analysis/IndirectCallPromotionAnalysis.cpp
@@ -45,6 +45,10 @@ static cl::opt<unsigned>
cl::desc("Max number of promotions for a single indirect "
"call callsite"));
+cl::opt<unsigned> MaxNumVTableAnnotations(
+ "icp-max-num-vtables", cl::init(6), cl::Hidden,
+ cl::desc("Max number of vtables annotated for a vtable load instruction."));
+
ICallPromotionAnalysis::ICallPromotionAnalysis() {
ValueDataArray = std::make_unique<InstrProfValueData[]>(MaxNumPromotions);
}
diff --git a/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp b/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp
index 1f15e94783240a..3ad0bab827a512 100644
--- a/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp
+++ b/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp
@@ -82,6 +82,8 @@ static cl::opt<std::string> ModuleSummaryDotFile(
extern cl::opt<bool> ScalePartialSampleProfileWorkingSetSize;
+extern cl::opt<unsigned> MaxNumVTableAnnotations;
+
// Walk through the operands of a given User via worklist iteration and populate
// the set of GlobalValue references encountered. Invoked either on an
// Instruction or a GlobalVariable (which walks its initializer).
@@ -124,6 +126,24 @@ static bool findRefEdges(ModuleSummaryIndex &Index, const User *CurUser,
Worklist.push_back(Operand);
}
}
+
+ const Instruction *I = dyn_cast<Instruction>(CurUser);
+ if (I) {
+ uint32_t ActualNumValueData = 0;
+ uint64_t TotalCount = 0;
+ // MaxNumVTableAnnotations is the maximum number of vtables annotated on
+ // the instruction.
+ auto ValueDataArray =
+ getValueProfDataFromInst(*I, IPVK_VTableTarget, MaxNumVTableAnnotations,
+ ActualNumValueData, TotalCount);
+
+ if (ValueDataArray.get()) {
+ for (uint32_t j = 0; j < ActualNumValueData; j++) {
+ RefEdges.insert(Index.getOrInsertValueInfo(/* VTableGUID = */
+ ValueDataArray[j].Value));
+ }
+ }
+ }
return HasBlockAddress;
}
diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
index 13be0b0c3307fb..7686e32b69305b 100644
--- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -199,7 +199,7 @@ class ModuleBitcodeWriterBase : public BitcodeWriterBase {
for (const auto &GUIDSummaryLists : *Index)
// Examine all summaries for this GUID.
for (auto &Summary : GUIDSummaryLists.second.SummaryList)
- if (auto FS = dyn_cast<FunctionSummary>(Summary.get()))
+ if (auto FS = dyn_cast<FunctionSummary>(Summary.get())) {
// For each call in the function summary, see if the call
// is to a GUID (which means it is for an indirect call,
// otherwise we would have a Value for it). If so, synthesize
@@ -207,6 +207,15 @@ class ModuleBitcodeWriterBase : public BitcodeWriterBase {
for (auto &CallEdge : FS->calls())
if (!CallEdge.first.haveGVs() || !CallEdge.first.getValue())
assignValueId(CallEdge.first.getGUID());
+
+ // For each referenced variables in the function summary, see if the
+ // variable is represented by a GUID (as opposed to a symbol to
+ // declarations or definitions in the module). If so, synthesize a
+ // value id.
+ for (auto &RefEdge : FS->refs())
+ if ((!RefEdge.haveGVs() || !RefEdge.getValue()))
+ assignValueId(RefEdge.getGUID());
+ }
}
protected:
@@ -4071,7 +4080,7 @@ void ModuleBitcodeWriterBase::writePerModuleFunctionSummaryRecord(
NameVals.push_back(SpecialRefCnts.second); // worefcnt
for (auto &RI : FS->refs())
- NameVals.push_back(VE.getValueID(RI.getValue()));
+ NameVals.push_back(getValueId(RI));
const bool UseRelBFRecord =
WriteRelBFToSummary && !F.hasProfileData() &&
diff --git a/llvm/lib/ProfileData/InstrProf.cpp b/llvm/lib/ProfileData/InstrProf.cpp
index 2640027455e0da..91e79e8b2e9add 100644
--- a/llvm/lib/ProfileData/InstrProf.cpp
+++ b/llvm/lib/ProfileData/InstrProf.cpp
@@ -219,6 +219,12 @@ cl::opt<bool> DoInstrProfNameCompression(
"enable-name-compression",
cl::desc("Enable name/filename string compression"), cl::init(true));
+cl::opt<bool> EnableVTableValueProfiling(
+ "enable-vtable-value-profiling", cl::init(false),
+ cl::desc("If true, the virtual table address will be instrumented to know "
+ "the types of a C++ pointer. The information is used in indirect "
+ "call promotion to do selective vtable-based comparison."));
+
std::string getInstrProfSectionName(InstrProfSectKind IPSK,
Triple::ObjectFormatType OF,
bool AddSegmentInfo) {
@@ -378,6 +384,13 @@ std::string getPGOFuncName(const Function &F, bool InLTO, uint64_t Version) {
return getPGOFuncName(F.getName(), GlobalValue::ExternalLinkage, "");
}
+std::string getPGOName(const GlobalVariable &V, bool InLTO) {
+ // PGONameMetadata should be set by compiler at profile use time
+ // and read by symtab creation to look up symbols corresponding to
+ // a MD5 hash.
+ return getIRPGOObjectName(V, InLTO, nullptr /* PGONameMetadata */);
+}
+
// See getIRPGOFuncName() for a discription of the format.
std::pair<StringRef, StringRef>
getParsedIRPGOFuncName(StringRef IRPGOFuncName) {
@@ -460,6 +473,17 @@ Error InstrProfSymtab::create(Module &M, bool InLTO) {
if (Error E = addFuncWithName(F, getPGOFuncName(F, InLTO)))
return E;
}
+
+ SmallVector<MDNode *, 2> Types;
+ for (GlobalVariable &G : M.globals()) {
+ if (!G.hasName())
+ continue;
+ Types.clear();
+ G.getMetadata(LLVMContext::MD_type, Types);
+ if (!Types.empty()) {
+ MD5VTableMap.emplace_back(G.getGUID(), &G);
+ }
+ }
Sorted = false;
finalizeSymtab();
return Error::success();
@@ -518,6 +542,25 @@ Error InstrProfSymtab::create(StringRef NameStrings) {
std::bind(&InstrProfSymtab::addFuncName, this, std::placeholders::_1));
}
+Error InstrProfSymtab::create(StringRef FuncNameStrings,
+ StringRef VTableNameStrings) {
+ if (Error E = readAndDecodeStrings(FuncNameStrings,
+ std::bind(&InstrProfSymtab::addFuncName,
+ this, std::placeholders::_1)))
+ return E;
+
+ return readAndDecodeStrings(
+ VTableNameStrings,
+ std::bind(&InstrProfSymtab::addVTableName, this, std::placeholders::_1));
+}
+
+Error InstrProfSymtab::initVTableNamesFromCompressedStrings(
+ StringRef CompressedVTableStrings) {
+ return readAndDecodeStrings(
+ CompressedVTableStrings,
+ std::bind(&InstrProfSymtab::addVTableName, this, std::placeholders::_1));
+}
+
Error InstrProfSymtab::addFuncWithName(Function &F, StringRef PGOFuncName) {
if (Error E = addFuncName(PGOFuncName))
return E;
@@ -550,6 +593,28 @@ Error InstrProfSymtab::addFuncWithName(Function &F, StringRef PGOFuncName) {
return Error::success();
}
+uint64_t InstrProfSymtab::getVTableHashFromAddress(uint64_t Address) {
+ finalizeSymtab();
+ auto It = lower_bound(
+ VTableAddrRangeToMD5Map, Address,
+ [](std::pair<std::pair<uint64_t, uint64_t>, uint64_t> VTableRangeAddr,
+ uint64_t Addr) {
+ // Find the first address range of which end address is larger than
+ // `Addr`. Smaller-than-or-equal-to is used because the profiled address
+ // within a vtable should be [start-address, end-address).
+ return VTableRangeAddr.first.second <= Addr;
+ });
+
+ // Returns the MD5 hash if Address is within the address range of an entry.
+ if (It != VTableAddrRangeToMD5Map.end() && It->first.first <= Address) {
+ return It->second;
+ }
+ // The virtual table address collected from value profiler could be defined
+ // in another module that is not instrumented. Force the value to be 0 in
+ // this case.
+ return 0;
+}
+
uint64_t InstrProfSymtab::getFunctionHashFromAddress(uint64_t Address) {
finalizeSymtab();
auto It = partition_point(AddrToMD5Map, [=](std::pair<uint64_t, uint64_t> A) {
@@ -626,6 +691,17 @@ Error collectPGOFuncNameStrings(ArrayRef<GlobalVariable *> NameVars,
NameStrs, compression::zlib::isAvailable() && doCompression, Result);
}
+Error collectVTableStrings(ArrayRef<GlobalVariable *> VTables,
+ std::string &Result, bool doCompression) {
+ std::vector<std::string> VTableNameStrs;
+ for (auto *VTable : VTables) {
+ VTableNameStrs.push_back(getPGOName(*VTable));
+ }
+ return collectGlobalObjectNameStrings(
+ VTableNameStrs, compression::zlib::isAvailable() && doCompression,
+ Result);
+}
+
void InstrProfRecord::accumulateCounts(CountSumOrPercent &Sum) const {
uint64_t FuncSum = 0;
Sum.NumEntries += Counts.size();
@@ -888,6 +964,9 @@ uint64_t InstrProfRecord::remapValue(uint64_t Value, uint32_t ValueKind,
if (ValueKind == IPVK_IndirectCallTarget)
return SymTab->getFunctionHashFromAddress(Value);
+ if (ValueKind == IPVK_VTableTarget)
+ return SymTab->getVTableHashFromAddress(Value);
+
return Value;
}
@@ -1181,6 +1260,8 @@ void annotateValueSite(Module &M, Instruction &Inst,
ArrayRef<InstrProfValueData> VDs,
uint64_t Sum, InstrProfValueKind ValueKind,
uint32_t MaxMDCount) {
+ if (VDs.empty())
+ return;
LLVMContext &Ctx = M.getContext();
MDBuilder MDHelper(Ctx);
SmallVector<Metadata *, 3> Vals;
@@ -1206,46 +1287,44 @@ void annotateValueSite(Module &M, Instruction &Inst,
Inst.setMetadata(LLVMContext::MD_prof, MDNode::get(Ctx, Vals));
}
-bool getValueProfDataFromInst(const Instruction &Inst,
- InstrProfValueKind ValueKind,
- uint32_t MaxNumValueData,
- InstrProfValueData ValueData[],
- uint32_t &ActualNumValueData, uint64_t &TotalC,
- bool GetNoICPValue) {
+MDNode *mayHaveValueProfileOfKind(const Instruction &Inst,
+ InstrProfValueKind ValueKind) {
MDNode *MD = Inst.getMetadata(LLVMContext::MD_prof);
if (!MD)
- return false;
+ return nullptr;
- unsigned NOps = MD->getNumOperands();
+ if (MD->getNumOperands() < 5)
+ return nullptr;
- if (NOps < 5)
- return false;
-
- // Operand 0 is a string tag "VP":
MDString *Tag = cast<MDString>(MD->getOperand(0));
- if (!Tag)
- return false;
-
- if (!Tag->getString().equals("VP"))
- return false;
+ if (!Tag || !Tag->getString().equals("VP"))
+ return nullptr;
// Now check kind:
ConstantInt *KindInt = mdconst::dyn_extract<ConstantInt>(MD->getOperand(1));
if (!KindInt)
- return false;
+ return nullptr;
if (KindInt->getZExtValue() != ValueKind)
- return false;
+ return nullptr;
+ return MD;
+}
+
+static bool getValueProfDataFromInstImpl(const MDNode *const MD,
+ const uint32_t MaxNumDataWant,
+ InstrProfValueData ValueData[],
+ uint32_t &ActualNumValueData,
+ uint64_t &TotalC, bool GetNoICPValue) {
+ const unsigned NOps = MD->getNumOperands();
// Get total count
ConstantInt *TotalCInt = mdconst::dyn_extract<ConstantInt>(MD->getOperand(2));
if (!TotalCInt)
return false;
TotalC = TotalCInt->getZExtValue();
-
ActualNumValueData = 0;
for (unsigned I = 3; I < NOps; I += 2) {
- if (ActualNumValueData >= MaxNumValueData)
+ if (ActualNumValueData >= MaxNumDataWant)
break;
ConstantInt *Value = mdconst::dyn_extract<ConstantInt>(MD->getOperand(I));
ConstantInt *Count =
@@ -1262,6 +1341,36 @@ bool getValueProfDataFromInst(const Instruction &Inst,
return true;
}
+std::unique_ptr<InstrProfValueData[]>
+getValueProfDataFromInst(const Instruction &Inst, InstrProfValueKind ValueKind,
+ uint32_t MaxNumValueData, uint32_t &ActualNumValueData,
+ uint64_t &TotalC, bool GetNoICPValue) {
+ MDNode *MD = mayHaveValueProfileOfKind(Inst, ValueKind);
+ if (!MD)
+ return nullptr;
+ auto ValueDataArray = std::make_unique<InstrProfValueData[]>(MaxNumValueData);
+ if (!getValueProfDataFromInstImpl(MD, MaxNumValueData, ValueDataArray.get(),
+ ActualNumValueData, TotalC, GetNoICPValue))
+ return nullptr;
+ return ValueDataArray;
+}
+
+// FIXME: Migrate existing callers to the function above that returns an
+// array.
+bool getValueProfDataFromInst(const Instruction &Inst,
+ InstrProfValueKind ValueKind,
+ uint32_t MaxNumValueData,
+ InstrProfValueData ValueData[],
+ uint32_t &ActualNumValueData, uint64_t &TotalC,
+ bool GetNoICPValue) {
+ MDNode *MD = mayHaveValueProfileOfKind(Inst, ValueKind);
+ if (!MD)
+ return false;
+ return getValueProfDataFromInstImpl(MD, MaxNumValueData, ValueData,
+ ActualNumValueData, TotalC,
+ GetNoICPValue);
+}
+
MDNode *getPGOFuncNameMetadata(const Function &F) {
return F.getMetadata(getPGOFuncNameMetadataName());
}
@@ -1278,8 +1387,8 @@ void createPGOFuncNameMetadata(Function &F, StringRef PGOFuncName) {
F.setMetadata(getPGOFuncNameMetadataName(), N);
}
-bool needsComdatForCounter(const Function &F, const Module &M) {
- if (F.hasComdat())
+bool needsComdatForCounter(const GlobalValue &GV, const Module &M) {
+ if (GV.hasComdat())
return true;
if (!Triple(M.getTargetTriple()).supportsCOMDAT())
@@ -1295,7 +1404,7 @@ bool needsComdatForCounter(const Function &F, const Module &M) {
// available_externally functions will end up being duplicated in raw profile
// data. This can result in distorted profile as the counts of those dups
// will be accumulated by the profile merger.
- GlobalValue::LinkageTypes Linkage = F.getLinkage();
+ GlobalValue::LinkageTypes Linkage = GV.getLinkage();
if (Linkage != GlobalValue::ExternalWeakLinkage &&
Linkage != GlobalValue::AvailableExternallyLinkage)
return false;
@@ -1451,7 +1560,7 @@ void OverlapStats::dump(raw_fd_ostream &OS) const {
for (unsigned I = 0; I < IPVK_Last - IPVK_First + 1; I++) {
if (Base.ValueCounts[I] < 1.0f && Test.ValueCounts[I] < 1.0f)
continue;
- char ProfileKindName[20];
+ char ProfileKindName[20] = {0};
switch (I) {
case IPVK_IndirectCallTarget:
strncpy(ProfileKindName, "IndirectCall", 19);
@@ -1459,6 +1568,9 @@ void OverlapStats::dump(raw_fd_ostream &OS) const {
case IPVK_MemOPSize:
strncpy(ProfileKindName, "MemOP", 19);
break;
+ case IPVK_VTableTarget:
+ strncpy(ProfileKindName, "VTable", 19);
+ break;
default:
snprintf(ProfileKindName, 19, "VP[%d]", I);
break;
@@ -1523,9 +1635,12 @@ Expected<Header> Header::readFromBuffer(const unsigned char *Buffer) {
// When a new field is added in the header add a case statement here to
// populate it.
static_assert(
- IndexedInstrProf::ProfVersion::CurrentVersion == Version11,
+ IndexedInstrProf::ProfVersion::CurrentVersion == Version12,
"Please update the reading code below if a new field has been added, "
"if not add a case statement to fall through to the latest version.");
+ case 12ull:
+ H.VTableNamesOffset = read(Buffer, offsetOf(&Header::VTableNamesOffset));
+ [[fallthrough]];
case 11ull:
[[fallthrough]];
case 10ull:
@@ -1551,10 +1666,13 @@ size_t Header::size() const {
// When a new field is added to the header add a case statement here to
// compute the size as offset of the new field + size of the new field. This
// relies on the field being added to the end of the list.
- static_assert(IndexedInstrProf::ProfVersion::CurrentVersion == Version11,
+ static_assert(IndexedInstrProf::ProfVersion::CurrentVersion == Version12,
"Please update the size computation below if a new field has "
"been added to the header, if not add a case statement to "
"fall through to the latest version.");
+ case 12ull:
+ return offsetOf(&Header::VTableNamesOffset) +
+ sizeof(Header::VTableNamesOffset);
case 11ull:
[[fallthrough]];
case 10ull:
diff --git a/llvm/lib/ProfileData/InstrProfReader.cpp b/llvm/lib/ProfileData/InstrProfReader.cpp
index 0d8d43daae960b..4ef6823381749e 100644
--- a/llvm/lib/ProfileData/InstrProfReader.cpp
+++ b/llvm/lib/ProfileData/InstrProfReader.cpp
@@ -366,6 +366,14 @@ TextInstrProfReader::readValueProfileData(InstrProfRecord &Record) {
return E;
Value = IndexedInstrProf::ComputeHash(VD.first);
}
+ } else if (ValueKind == IPVK_VTableTarget) {
+ if (InstrProfSymtab::isExternalSymbol(VD.first)) {
+ Value = 0;
+ } else {
+ if (Error E = Symtab->addVTableName(VD.first))
+ return E;
+ Value = IndexedInstrProf::ComputeHash(VD.first);
+ }
} else {
READ_NUM(VD.first, Value);
}
@@ -533,7 +541,8 @@ Error RawInstrProfReader<IntPtrT>::readNextHeader(const char *CurrentPos) {
template <class IntPtrT>
Error RawInstrProfReader<IntPtrT>::createSymtab(InstrProfSymtab &Symtab) {
- if (Error E = Symtab.create(StringRef(NamesStart, NamesEnd - NamesStart)))
+ if (Error E = Symtab.create(StringRef(NamesStart, NamesEnd - NamesStart),
+ StringRef(VNamesStart, VNamesEnd - VNamesStart)))
return error(std::move(E));
for (const RawInstrProf::ProfileData<IntPtrT> *I = Data; I != DataEnd; ++I) {
const IntPtrT FPtr = swap(I->FunctionPointer);
@@ -541,6 +550,21 @@ Error RawInstrProfReader<IntPtrT>::createSymtab(InstrProfSymtab &Symtab) {
continue;
Symtab.mapAddress(FPtr, swap(I->NameRef));
}
+
+ if (VTableBegin != nullptr && VTableEnd != nullptr) {
+ for (const RawInstrProf::VTableProfileData<IntPtrT> *I = VTableBegin;
+ I != VTableEnd; ++I) {
+ const IntPtrT VPtr = swap(I->VTablePointer);
+ if (!VPtr)
+ continue;
+ // Map both begin and end address to the name hash, since the instrumented
+ // address could be somewhere in the middle.
+ // VPtr is of type uint32_t or uint64_t so 'VPtr + I->VTableSize' marks
+ // the end of vtable address.
+ Symtab.mapVTableAddress(VPtr, VPtr + swap(I->VTableSize),
+ swap(I->VTableNameHash));
+ }
+ }
return success();
}
@@ -582,10 +606,17 @@ Error RawInstrProfReader<IntPtrT>::readHeader(
auto NumBitmapBytes = swap(Header.NumBitmapBytes);
auto PaddingBytesAfterBitmapBytes = swap(Header.PaddingBytesAfterBitmapBytes);
auto NamesSize = swap(Header.NamesSize);
+ auto VTableNameSize = swap(Header.VNamesSize);
+ auto NumVTables = swap(Header.NumVTables);
ValueKindLast = swap(Header.ValueKindLast);
auto DataSize = NumData * sizeof(RawInstrProf::ProfileData<IntPtrT>);
- auto PaddingSize = getNumPaddingBytes(NamesSize);
+ auto PaddingBytesAfterNames = getNumPaddingBytes(NamesSize);
+ auto PaddingBytesAfterVTableNames = getNumPaddingBytes(VTableNameSize);
+
+ auto VTableSectionSize =
+ NumVTables * sizeof(RawInstrProf::VTableProfileData<IntPtrT>);
+ auto PaddingBytesAfterVTableProfData = getNumPaddingBytes(VTableSectionSize);
// Profile data starts after profile header and binary ids if exist.
ptrdiff_t DataOffset = sizeof(RawInstrProf::Header) + BinaryIdSize;
@@ -594,7 +625,12 @@ Error RawInstrProfReader<IntPtrT>::readHeader(
CountersOffset + CountersSize + PaddingBytesAfterCounters;
ptrdiff_t NamesOffset =
BitmapOffset + NumBitmapBytes + PaddingBytesAfterBitmapBytes;
- ptrdiff_t ValueDataOffset = NamesOffset + NamesSize + PaddingSize;
+ ptrdiff_t VTableProfDataOffset =
+ NamesOffset + NamesSize + PaddingBytesAfterNames;
+ ptrdiff_t VTableNameOffset = VTableProfDataOffset + VTableSectionSize +
+ PaddingBytesAfterVTableProfData;
+ ptrdiff_t ValueDataOffset =
+ VTableNameOffset + VTableNameSize + PaddingBytesAfterVTableNames;
auto *Start = reinterpret_cast<const char *>(&Header);
if (Start + ValueDataOffset > DataBuffer->getBufferEnd())
@@ -614,8 +650,14 @@ Error RawInstrProfReader<IntPtrT>::readHeader(
Data = reinterpret_cast<const RawInstrProf::ProfileData<IntPtrT> *>(
Start + DataOffset);
DataEnd = Data + NumData;
+ VTableBegin =
+ reinterpret_cast<const RawInstrProf::VTableProfileData<IntPtrT> *>(
+ Start + VTableProfDataOffset);
+ VTableEnd = VTableBegin + NumVTables;
NamesStart = Start + NamesOffset;
NamesEnd = NamesStart + NamesSize;
+ VNamesStart = Start + VTableNameOffset;
+ VNamesEnd = VNamesStart + VTableNameSize;
}
CountersStart = Start + CountersOffset;
@@ -1260,6 +1302,19 @@ Error IndexedInstrProfReader::readHeader() {
"corrupted binary ids");
}
+ if (GET_VERSION(Header->formatVersion()) >= 12) {
+ uint64_t VTableNamesOffset =
+ endian::byte_swap<uint64_t, llvm::endianness::little>(
+ Header->VTableNamesOffset);
+ const unsigned char *Ptr = Start + VTableNamesOffset;
+
+ CompressedVTableNamesLen =
+ support::endian::readNext<uint64_t, llvm::endianness::little,
+ unaligned>(Ptr);
+
+ VTableNamePtr = (const char *)Ptr;
+ }
+
if (GET_VERSION(Header->formatVersion()) >= 10 &&
Header->formatVersion() & VARIANT_MASK_TEMPORAL_PROF) {
uint64_t TemporalProfTracesOffset =
@@ -1319,7 +1374,16 @@ InstrProfSymtab &IndexedInstrProfReader::getSymtab() {
if (Symtab)
return *Symtab;
- std::unique_ptr<InstrProfSymtab> NewSymtab = std::make_unique<InstrProfSymtab>();
+ std::unique_ptr<InstrProfSymtab> NewSymtab =
+ std::make_unique<InstrProfSymtab>();
+
+ if (Error E = NewSymtab->initVTableNamesFromCompressedStrings(
+ StringRef(VTableNamePtr, CompressedVTableNamesLen))) {
+ auto [ErrCode, Msg] = InstrProfError::take(std::move(E));
+ consumeError(error(ErrCode, Msg));
+ }
+
+ // finalizeSymtab is called inside populateSymtab.
if (Error E = Index->populateSymtab(*NewSymtab)) {
auto [ErrCode, Msg] = InstrProfError::take(std::move(E));
consumeError(error(ErrCode, Msg));
diff --git a/llvm/lib/ProfileData/InstrProfWriter.cpp b/llvm/lib/ProfileData/InstrProfWriter.cpp
index d65f8fe50313dc..7592c0ffd3272b 100644
--- a/llvm/lib/ProfileData/InstrProfWriter.cpp
+++ b/llvm/lib/ProfileData/InstrProfWriter.cpp
@@ -19,6 +19,7 @@
#include "llvm/ProfileData/InstrProf.h"
#include "llvm/ProfileData/MemProf.h"
#include "llvm/ProfileData/ProfileCommon.h"
+#include "llvm/Support/Compression.h"
#include "llvm/Support/Endian.h"
#include "llvm/Support/EndianStream.h"
#include "llvm/Support/Error.h"
@@ -455,12 +456,13 @@ Error InstrProfWriter::writeImpl(ProfOStream &OS) {
Header.MemProfOffset = 0;
Header.BinaryIdOffset = 0;
Header.TemporalProfTracesOffset = 0;
+ Header.VTableNamesOffset = 0;
int N = sizeof(IndexedInstrProf::Header) / sizeof(uint64_t);
// Only write out all the fields except 'HashOffset', 'MemProfOffset',
- // 'BinaryIdOffset' and `TemporalProfTracesOffset`. We need to remember the
- // offset of these fields to allow back patching later.
- for (int I = 0; I < N - 4; I++)
+ // 'BinaryIdOffset', `TemporalProfTracesOffset` and `VTableNamesOffset`. We
+ // need to remember the offset of these fields to allow back patching later.
+ for (int I = 0; I < N - 5; I++)
OS.write(reinterpret_cast<uint64_t *>(&Header)[I]);
// Save the location of Header.HashOffset field in \c OS.
@@ -484,6 +486,9 @@ Error InstrProfWriter::writeImpl(ProfOStream &OS) {
uint64_t TemporalProfTracesOffset = OS.tell();
OS.write(0);
+ uint64_t VTableNamesOffset = OS.tell();
+ OS.write(0);
+
// Reserve space to write profile summary data.
uint32_t NumEntries = ProfileSummaryBuilder::DefaultCutoffs.size();
uint32_t SummarySize = Summary::getSize(Summary::NumKinds, NumEntries);
@@ -604,6 +609,43 @@ Error InstrProfWriter::writeImpl(ProfOStream &OS) {
OS.writeByte(0);
}
+ // if version >= the version with vtable profile metadata.
+ uint64_t VTableNamesSectionStart = 0;
+ if (IndexedInstrProf::ProfVersion::CurrentVersion >= 12) {
+ VTableNamesSectionStart = OS.tell();
+
+ std::string CompressedVTableNames;
+
+ std::vector<std::string> VTableNameStrs;
+ for (const auto &VTableName : VTableNames.keys()) {
+ VTableNameStrs.push_back(VTableName.str());
+ }
+
+ if (!VTableNameStrs.empty()) {
+ if (Error E = collectGlobalObjectNameStrings(
+ VTableNameStrs, compression::zlib::isAvailable(),
+ CompressedVTableNames))
+ return E;
+ }
+
+ uint64_t CompressedStringLen = CompressedVTableNames.length();
+
+ // Record the length of compressed string.
+ OS.write(CompressedStringLen);
+
+ // Write the chars in compressed strings.
+ for (auto &c : CompressedVTableNames)
+ OS.writeByte(static_cast<uint8_t>(c));
+
+ // Pad up to a multiple of 8.
+ // InstrProfReader could read bytes according to 'CompressedStringLen'.
+ uint64_t PaddedLength = alignTo(CompressedStringLen, 8);
+
+ for (uint64_t K = CompressedStringLen; K < PaddedLength; K++) {
+ OS.writeByte(0);
+ }
+ }
+
uint64_t TemporalProfTracesSectionStart = 0;
if (static_cast<bool>(ProfileKind & InstrProfKind::TemporalProfile)) {
TemporalProfTracesSectionStart = OS.tell();
@@ -647,6 +689,7 @@ Error InstrProfWriter::writeImpl(ProfOStream &OS) {
// Patch the Header.TemporalProfTracesOffset (=0 for profiles without
// traces).
{TemporalProfTracesOffset, &TemporalProfTracesSectionStart, 1},
+ {VTableNamesOffset, &VTableNamesSectionStart, 1},
// Patch the summary data.
{SummaryOffset, reinterpret_cast<uint64_t *>(TheSummary.get()),
(int)(SummarySize / sizeof(uint64_t))},
@@ -699,7 +742,8 @@ Error InstrProfWriter::validateRecord(const InstrProfRecord &Func) {
std::unique_ptr<InstrProfValueData[]> VD = Func.getValueForSite(VK, S);
DenseSet<uint64_t> SeenValues;
for (uint32_t I = 0; I < ND; I++)
- if ((VK != IPVK_IndirectCallTarget) && !SeenValues.insert(VD[I].Value).second)
+ if ((VK != IPVK_IndirectCallTarget && VK != IPVK_VTableTarget) &&
+ !SeenValues.insert(VD[I].Value).second)
return make_error<InstrProfError>(instrprof_error::invalid_prof);
}
}
@@ -747,7 +791,7 @@ void InstrProfWriter::writeRecordInText(StringRef Name, uint64_t Hash,
OS << ND << "\n";
std::unique_ptr<InstrProfValueData[]> VD = Func.getValueForSite(VK, S);
for (uint32_t I = 0; I < ND; I++) {
- if (VK == IPVK_IndirectCallTarget)
+ if (VK == IPVK_IndirectCallTarget || VK == IPVK_VTableTarget)
OS << Symtab.getFuncOrVarNameIfDefined(VD[I].Value) << ":"
<< VD[I].Count << "\n";
else
@@ -786,6 +830,11 @@ Error InstrProfWriter::writeText(raw_fd_ostream &OS) {
}
}
+ for (const auto &VTableName : VTableNames) {
+ if (Error E = Symtab.addVTableName(VTableName.getKey()))
+ return E;
+ }
+
if (static_cast<bool>(ProfileKind & InstrProfKind::TemporalProfile))
writeTextTemporalProfTraceData(OS, Symtab);
diff --git a/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp b/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp
index 7344fea1751719..6a44a32bb34dc9 100644
--- a/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp
+++ b/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp
@@ -136,11 +136,13 @@ class IndirectCallPromoter {
const CallBase &CB, const ArrayRef<InstrProfValueData> &ValueDataRef,
uint64_t TotalCount, uint32_t NumCandidates);
- // Promote a list of targets for one indirect-call callsite. Return
- // the number of promotions.
- uint32_t tryToPromote(CallBase &CB,
- const std::vector<PromotionCandidate> &Candidates,
- uint64_t &TotalCount);
+ // Promote a list of targets for one indirect-call callsite by comparing
+ // indirect callee with functions. Returns true if there are IR
+ // transformations and false otherwise.
+ bool tryToPromoteWithFuncCmp(
+ CallBase &CB, const std::vector<PromotionCandidate> &Candidates,
+ uint64_t TotalCount, ArrayRef<InstrProfValueData> ICallProfDataRef,
+ uint32_t NumCandidates);
public:
IndirectCallPromoter(Function &Func, InstrProfSymtab *Symtab, bool SamplePGO,
@@ -273,9 +275,10 @@ CallBase &llvm::pgo::promoteIndirectCall(CallBase &CB, Function *DirectCallee,
}
// Promote indirect-call to conditional direct-call for one callsite.
-uint32_t IndirectCallPromoter::tryToPromote(
+bool IndirectCallPromoter::tryToPromoteWithFuncCmp(
CallBase &CB, const std::vector<PromotionCandidate> &Candidates,
- uint64_t &TotalCount) {
+ uint64_t TotalCount, ArrayRef<InstrProfValueData> ICallProfDataRef,
+ uint32_t NumCandidates) {
uint32_t NumPromoted = 0;
for (const auto &C : Candidates) {
@@ -287,7 +290,18 @@ uint32_t IndirectCallPromoter::tryToPromote(
NumOfPGOICallPromotion++;
NumPromoted++;
}
- return NumPromoted;
+
+ const bool Changed = (NumPromoted != 0);
+
+ if (Changed) {
+ CB.setMetadata(LLVMContext::MD_prof, nullptr);
+
+ if (TotalCount != 0)
+ annotateValueSite(*F.getParent(), CB, ICallProfDataRef.slice(NumPromoted),
+ TotalCount, IPVK_IndirectCallTarget, NumCandidates);
+ }
+
+ return Changed;
}
// Traverse all the indirect-call callsite and get the value profile
@@ -305,19 +319,8 @@ bool IndirectCallPromoter::processFunction(ProfileSummaryInfo *PSI) {
continue;
auto PromotionCandidates = getPromotionCandidatesForCallSite(
*CB, ICallProfDataRef, TotalCount, NumCandidates);
- uint32_t NumPromoted = tryToPromote(*CB, PromotionCandidates, TotalCount);
- if (NumPromoted == 0)
- continue;
-
- Changed = true;
- // Adjust the MD.prof metadata. First delete the old one.
- CB->setMetadata(LLVMContext::MD_prof, nullptr);
- // If all promoted, we don't need the MD.prof metadata.
- if (TotalCount == 0 || NumPromoted == NumVals)
- continue;
- // Otherwise we need update with the un-promoted records back.
- annotateValueSite(*F.getParent(), *CB, ICallProfDataRef.slice(NumPromoted),
- TotalCount, IPVK_IndirectCallTarget, NumCandidates);
+ Changed |= tryToPromoteWithFuncCmp(*CB, PromotionCandidates, TotalCount,
+ ICallProfDataRef, NumCandidates);
}
return Changed;
}
diff --git a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
index a19b1408725441..49978dac034e82 100644
--- a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
+++ b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
@@ -64,6 +64,9 @@ using namespace llvm;
#define DEBUG_TYPE "instrprof"
namespace llvm {
+// Command line option to enable vtable value profiling. Defined in
+// ProfileData/InstrProf.cpp: -enable-vtable-value-profiling=
+extern cl::opt<bool> EnableVTableValueProfiling;
// TODO: Remove -debug-info-correlate in next LLVM release, in favor of
// -profile-correlate=debug-info.
cl::opt<bool> DebugInfoCorrelate(
@@ -196,12 +199,18 @@ class InstrLowerer final {
PerFunctionProfileData() = default;
};
DenseMap<GlobalVariable *, PerFunctionProfileData> ProfileDataMap;
+ // Key is virtual table variable, value is 'VTableProfData' in the form of
+ // GlobalVariable.
+ DenseMap<GlobalVariable *, GlobalVariable *> VTableDataMap;
/// If runtime relocation is enabled, this maps functions to the load
/// instruction that produces the profile relocation bias.
DenseMap<const Function *, LoadInst *> FunctionToProfileBiasMap;
std::vector<GlobalValue *> CompilerUsedVars;
std::vector<GlobalValue *> UsedVars;
std::vector<GlobalVariable *> ReferencedNames;
+ // The list of virtual table variables of which the VTableProfData is
+ // collected.
+ std::vector<GlobalVariable *> ReferencedVTables;
GlobalVariable *NamesVar = nullptr;
size_t NamesSize = 0;
@@ -294,9 +303,15 @@ class InstrLowerer final {
/// Create INSTR_PROF_DATA variable for counters and bitmaps.
void createDataVariable(InstrProfCntrInstBase *Inc);
+ /// Get the counters for virtual table values, creating them if necessary.
+ void getOrCreateVTableProfData(GlobalVariable *GV);
+
/// Emit the section with compressed function names.
void emitNameData();
+ /// Emit the section with compressed vtable names.
+ void emitVTableNames();
+
/// Emit value nodes section for value profiling.
void emitVNodes();
@@ -740,6 +755,15 @@ bool InstrLowerer::lower() {
}
}
+ if (EnableVTableValueProfiling) {
+ for (GlobalVariable &GV : M.globals()) {
+ // Global variables with type metadata are virtual table variables.
+ if (GV.hasMetadata(LLVMContext::MD_type)) {
+ getOrCreateVTableProfData(&GV);
+ }
+ }
+ }
+
for (Function &F : M)
MadeChange |= lowerIntrinsics(&F);
@@ -753,6 +777,7 @@ bool InstrLowerer::lower() {
emitVNodes();
emitNameData();
+ emitVTableNames();
// Emit runtime hook for the cases where the target does not unconditionally
// require pulling in profile runtime, and coverage is enabled on code that is
@@ -1220,6 +1245,129 @@ void InstrLowerer::maybeSetComdat(GlobalVariable *GV, Function *Fn,
GV->setLinkage(GlobalValue::InternalLinkage);
}
+static inline bool shouldRecordVTableAddr(GlobalVariable *GV) {
+ if (!profDataReferencedByCode(*GV->getParent()))
+ return false;
+
+ if (!GV->hasLinkOnceLinkage() && !GV->hasLocalLinkage() &&
+ !GV->hasAvailableExternallyLinkage())
+ return true;
+
+ // This avoids the profile data from referencing internal symbols in
+ // COMDAT.
+ if (GV->hasLocalLinkage() && GV->hasComdat())
+ return false;
+
+ return true;
+}
+
+// FIXME: Does symbolic relocation from 'getFuncAddrForProfData' matter here?
+static inline Constant *getVTableAddrForProfData(GlobalVariable *GV) {
+ auto *Int8PtrTy = PointerType::getUnqual(GV->getContext());
+
+ // Store a nullptr in __profvt_ if a real address shouldn't be used.
+ if (!shouldRecordVTableAddr(GV))
+ return ConstantPointerNull::get(Int8PtrTy);
+
+ return ConstantExpr::getBitCast(GV, Int8PtrTy);
+}
+
+void InstrLowerer::getOrCreateVTableProfData(GlobalVariable *GV) {
+ assert(!DebugInfoCorrelate &&
+ "Value profiling is not supported with lightweight instrumentation");
+ if (GV->isDeclaration() || GV->hasAvailableExternallyLinkage())
+ return;
+
+ if (GV->getName().starts_with("llvm.") ||
+ GV->getName().starts_with("__llvm") ||
+ GV->getName().starts_with("__prof"))
+ return;
+
+ // VTableProfData already created
+ auto It = VTableDataMap.find(GV);
+ if (It != VTableDataMap.end() && It->second)
+ return;
+
+ GlobalValue::LinkageTypes Linkage = GV->getLinkage();
+ GlobalValue::VisibilityTypes Visibility = GV->getVisibility();
+
+ // This is to keep consistent with per-function profile data
+ // for correctness.
+ if (TT.isOSBinFormatXCOFF()) {
+ Linkage = GlobalValue::InternalLinkage;
+ Visibility = GlobalValue::DefaultVisibility;
+ }
+
+ LLVMContext &Ctx = M.getContext();
+ Type *DataTypes[] = {
+#define INSTR_PROF_VTABLE_DATA(Type, LLVMType, Name, Init) LLVMType,
+#include "llvm/ProfileData/InstrProfData.inc"
+ };
+
+ auto *DataTy = StructType::get(Ctx, ArrayRef(DataTypes));
+
+ // Used by INSTR_PROF_VTABLE_DATA MACRO
+ Constant *VTableAddr = getVTableAddrForProfData(GV);
+ const std::string PGOVTableName = getPGOName(*GV);
+ // Record the length of the vtable. This is needed since vtable pointers
+ // loaded from C++ objects might be from the middle of a vtable definition.
+ uint32_t VTableSizeVal =
+ M.getDataLayout().getTypeAllocSize(GV->getValueType());
+
+ Constant *DataVals[] = {
+#define INSTR_PROF_VTABLE_DATA(Type, LLVMType, Name, Init) Init,
+#include "llvm/ProfileData/InstrProfData.inc"
+ };
+
+ std::string VarName = getInstrProfVTableVarPrefix().str() + PGOVTableName;
+ auto *Data =
+ new GlobalVariable(M, DataTy, false /* constant */, Linkage,
+ ConstantStruct::get(DataTy, DataVals), VarName);
+
+ Data->setVisibility(Visibility);
+ Data->setSection(getInstrProfSectionName(IPSK_vtab, TT.getObjectFormat()));
+ Data->setAlignment(Align(8));
+
+ const bool NeedComdat = needsComdatForCounter(*GV, M);
+
+ // GV is the data structure to record vtable information.
+ // Place the global variable for per-vtable profile data in a comdat group
+ // if the associated vtable definition is a COMDAT. This makes sure only one
+ // copy of the variable for the vtable will be emitted after linking.
+ auto MaybeSetComdat = [&](GlobalVariable *GV, StringRef GroupName) {
+ bool UseComdat = (NeedComdat || TT.isOSBinFormatELF());
+ if (UseComdat) {
+ // Create a new comdat group using the name of the global variable as
+ // opposed to using the comdat group of the vtable.
+ Comdat *C = M.getOrInsertComdat(GroupName);
+ // For ELF, when not using COMDAT, put the vtable profile data into a
+ // nodeduplicate COMDAT which is lowered to a zero-flag zero group.
+ // This allows -z -start-stop-gc to discard the entire group when the
+ // vtable def is discarded.
+ if (!NeedComdat)
+ C->setSelectionKind(Comdat::NoDeduplicate);
+ GV->setComdat(C);
+ // COFF doesn't allow the comdat group leader to have private linkage, so
+ // upgrade private linkage to internal linkage to produce a symbol table
+ // entry.
+ if (TT.isOSBinFormatCOFF() && GV->hasPrivateLinkage()) {
+ GV->setLinkage(GlobalValue::InternalLinkage);
+ }
+ return;
+ }
+ };
+
+ MaybeSetComdat(Data, Data->getName());
+
+ VTableDataMap[GV] = Data;
+
+ ReferencedVTables.push_back(GV);
+
+ // VTable <Hash, Addr> is used by runtime but not referenced by other
+ // sections. Conservatively mark it linker retained.
+ UsedVars.push_back(Data);
+}
+
GlobalVariable *InstrLowerer::setupProfileSection(InstrProfInstBase *Inc,
InstrProfSectKind IPSK) {
GlobalVariable *NamePtr = Inc->getName();
@@ -1633,6 +1781,31 @@ void InstrLowerer::emitNameData() {
NamePtr->eraseFromParent();
}
+void InstrLowerer::emitVTableNames() {
+ if (!EnableVTableValueProfiling || ReferencedVTables.empty())
+ return;
+
+ // Collect the PGO names of referenced vtables and compress them.
+ std::string CompressedVTableNames;
+ if (Error E = collectVTableStrings(ReferencedVTables, CompressedVTableNames,
+ DoInstrProfNameCompression)) {
+ report_fatal_error(Twine(toString(std::move(E))), false);
+ }
+
+ auto &Ctx = M.getContext();
+ auto *VTableNamesVal = ConstantDataArray::getString(
+ Ctx, StringRef(CompressedVTableNames), false /* AddNull */);
+ GlobalVariable *VTableNamesVar =
+ new GlobalVariable(M, VTableNamesVal->getType(), true /* constant */,
+ GlobalValue::PrivateLinkage, VTableNamesVal,
+ getInstrProfVTableNamesVarName());
+ VTableNamesVar->setSection(
+ getInstrProfSectionName(IPSK_vname, TT.getObjectFormat()));
+ VTableNamesVar->setAlignment(Align(1));
+ // Make VTableNames linker retained.
+ UsedVars.push_back(VTableNamesVar);
+}
+
void InstrLowerer::emitRegistration() {
if (!needsRuntimeRegistrationOfSectionRange(TT))
return;
diff --git a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
index c20fc942eaf0d5..f1aa17de429338 100644
--- a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
+++ b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
@@ -327,6 +327,11 @@ extern cl::opt<PGOViewCountsType> PGOViewCounts;
// Defined in Analysis/BlockFrequencyInfo.cpp: -view-bfi-func-name=
extern cl::opt<std::string> ViewBlockFreqFuncName;
+extern cl::opt<bool> DebugInfoCorrelate;
+
+// Command line option to enable vtable value profiling. Defined in
+// ProfileData/InstrProf.cpp: -enable-vtable-value-profiling=
+extern cl::opt<bool> EnableVTableValueProfiling;
extern cl::opt<InstrProfCorrelator::ProfCorrelatorKind> ProfileCorrelate;
} // namespace llvm
@@ -581,6 +586,8 @@ template <class Edge, class BBInfo> class FuncPGOInstrumentation {
NumOfPGOMemIntrinsics += ValueSites[IPVK_MemOPSize].size();
NumOfPGOBB += MST.bbInfoSize();
ValueSites[IPVK_IndirectCallTarget] = VPC.get(IPVK_IndirectCallTarget);
+ if (EnableVTableValueProfiling)
+ ValueSites[IPVK_VTableTarget] = VPC.get(IPVK_VTableTarget);
} else {
NumOfCSPGOSelectInsts += SIVisitor.getNumOfSelectInsts();
NumOfCSPGOMemIntrinsics += ValueSites[IPVK_MemOPSize].size();
diff --git a/llvm/lib/Transforms/Instrumentation/ValueProfilePlugins.inc b/llvm/lib/Transforms/Instrumentation/ValueProfilePlugins.inc
index 3a129de1acd02d..96b21301ce676f 100644
--- a/llvm/lib/Transforms/Instrumentation/ValueProfilePlugins.inc
+++ b/llvm/lib/Transforms/Instrumentation/ValueProfilePlugins.inc
@@ -90,9 +90,39 @@ public:
}
};
+///------------------------ VirtualTableValueProfilingPlugin
+///------------------------
+class VTableProfilingPlugin {
+ Function &F;
+
+public:
+ static constexpr InstrProfValueKind Kind = IPVK_VTableTarget;
+
+ VTableProfilingPlugin(Function &Fn, TargetLibraryInfo &TLI) : F(Fn) {}
+
+ void run(std::vector<CandidateInfo> &Candidates) {
+ std::vector<Instruction *> Result = findVTableAddrs(F);
+ for (Instruction *I : Result) {
+ Instruction *InsertPt = I->getNextNonDebugInstruction();
+ // When finding an insertion point, keep PHI and EH pad instructions
+ // before vp intrinsics. This is similar to
+ // `BasicBlock::getFirstInsertionPt`.
+ while (InsertPt && (dyn_cast<PHINode>(InsertPt) || InsertPt->isEHPad()))
+ InsertPt = InsertPt->getNextNonDebugInstruction();
+ // Skip instrumentating the value if InsertPt is the last instruction.
+ // FIXME: Set InsertPt to the end of basic block to instrument the value
+ // if InsertPt is the last instruction.
+ if (InsertPt == nullptr)
+ continue;
+
+ Instruction *AnnotatedInst = I;
+ Candidates.emplace_back(CandidateInfo{I, InsertPt, AnnotatedInst});
+ }
+ }
+};
+
///----------------------- Registration of the plugins -------------------------
/// For now, registering a plugin with the ValueProfileCollector is done by
/// adding the plugin type to the VP_PLUGIN_LIST macro.
-#define VP_PLUGIN_LIST \
- MemIntrinsicPlugin, \
- IndirectCallPromotionPlugin
+#define VP_PLUGIN_LIST \
+ MemIntrinsicPlugin, IndirectCallPromotionPlugin, VTableProfilingPlugin
diff --git a/llvm/test/Bitcode/thinlto-func-summary-vtableref-pgo.ll b/llvm/test/Bitcode/thinlto-func-summary-vtableref-pgo.ll
new file mode 100644
index 00000000000000..ba3ce9a75ee832
--- /dev/null
+++ b/llvm/test/Bitcode/thinlto-func-summary-vtableref-pgo.ll
@@ -0,0 +1,74 @@
+; Promote at most one function and annotate at most one vtable.
+; As a result, only one value (of each relevant kind) shows up in the function
+; summary.
+
+; RUN: opt -module-summary -icp-max-num-vtables=1 -icp-max-prom=1 %s -o %t.o
+
+; RUN: llvm-bcanalyzer -dump %t.o | FileCheck %s
+
+; RUN: llvm-dis -o - %t.o | FileCheck %s --check-prefix=DIS
+; Round trip it through llvm-as
+; RUN: llvm-dis -o - %t.o | llvm-as -o - | llvm-dis -o - | FileCheck %s --check-prefix=DIS
+
+; CHECK: <GLOBALVAL_SUMMARY_BLOCK
+; CHECK-NEXT: <VERSION op0=9/>
+; CHECK-NEXT: <FLAGS op0=0/>
+; The `VALUE_GUID` below represents the "_ZTV4Base" referenced by the instruction
+; that loads vtable pointers.
+; CHECK-NEXT: <VALUE_GUID op0=21 op1=1960855528937986108/>
+; The `VALUE_GUID` below represents the "_ZN4Base4funcEv" referenced by the
+; indirect call instruction.
+; CHECK-NEXT: <VALUE_GUID op0=20 op1=5459407273543877811/>
+; NOTE vtables and functions from Derived class is dropped because
+; `-icp-max-num-vtables` and `-icp-max-prom` are both set to one.
+; <PERMODULE_PROFILE> has the format [valueid, flags, instcount, funcflags,
+; numrefs, rorefcnt, worefcnt,
+; m x valueid,
+; n x (valueid, hotness+tailcall)]
+; CHECK-NEXT: <PERMODULE_PROFILE abbrevid=4 op0=0 op1=0 op2=4 op3=256 op4=1 op5=1 op6=0 op7=21 op8=20 op9=3/>
+; CHECK-NEXT: </GLOBALVAL_SUMMARY_BLOCK>
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Function has one BB and an entry count of 150, so the BB is hot according to
+; ProfileSummary and reflected so in the bitcode (see llvm-dis output).
+define i32 @_Z4testP4Base(ptr %0) !prof !15 {
+ %2 = load ptr, ptr %0, !prof !16
+ %3 = load ptr, ptr %2
+ %4 = tail call i32 %3(ptr %0), !prof !17
+ ret i32 %4
+}
+
+!llvm.module.flags = !{!1}
+
+!1 = !{i32 1, !"ProfileSummary", !2}
+!2 = !{!3, !4, !5, !6, !7, !8, !9, !10}
+!3 = !{!"ProfileFormat", !"InstrProf"}
+!4 = !{!"TotalCount", i64 10000}
+!5 = !{!"MaxCount", i64 200}
+!6 = !{!"MaxInternalCount", i64 200}
+!7 = !{!"MaxFunctionCount", i64 200}
+!8 = !{!"NumCounts", i64 3}
+!9 = !{!"NumFunctions", i64 3}
+!10 = !{!"DetailedSummary", !11}
+!11 = !{!12, !13, !14}
+!12 = !{i32 10000, i64 100, i32 1}
+!13 = !{i32 990000, i64 100, i32 1}
+!14 = !{i32 999999, i64 1, i32 2}
+
+!15 = !{!"function_entry_count", i32 150}
+; 1960855528937986108 is the MD5 hash of _ZTV4Base, and
+; 13870436605473471591 is the MD5 hash of _ZTV7Derived
+!16 = !{!"VP", i32 2, i64 150, i64 1960855528937986108, i64 100, i64 13870436605473471591, i64 50}
+; 5459407273543877811 is the MD5 hash of _ZN4Base4funcEv, and
+; 6174874150489409711 is the MD5 hash of _ZN7Derived4funcEv
+!17 = !{!"VP", i32 0, i64 150, i64 5459407273543877811, i64 100, i64 6174874150489409711, i64 50}
+
+; ModuleSummaryIndex stores <guid, global-value summary> map in std::map; so
+; global value summares are printed out in the order that gv's guid increases.
+; DIS: ^0 = module: (path: "{{.*}}", hash: (0, 0, 0, 0, 0))
+; DIS: ^1 = gv: (guid: 1960855528937986108)
+; DIS: ^2 = gv: (guid: 5459407273543877811)
+; DIS: ^3 = gv: (name: "_Z4testP4Base", summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 0, canAutoHide: 0), insts: 4, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 0, returnDoesNotAlias: 0, noInline: 0, alwaysInline: 0, noUnwind: 0, mayThrow: 0, hasUnknownCall: 1, mustBeUnreachable: 0), calls: ((callee: ^2, hotness: hot)), refs: (readonly ^1)))) ; guid = 15857150948103218965
+; DIS: ^4 = blockcount: 0
diff --git a/llvm/test/Instrumentation/InstrProfiling/coverage.ll b/llvm/test/Instrumentation/InstrProfiling/coverage.ll
index bbf895ea4b34e1..08cbcaa962b765 100644
--- a/llvm/test/Instrumentation/InstrProfiling/coverage.ll
+++ b/llvm/test/Instrumentation/InstrProfiling/coverage.ll
@@ -5,12 +5,12 @@ target triple = "aarch64-unknown-linux-gnu"
@__profn_foo = private constant [3 x i8] c"foo"
; CHECK: @__profc_foo = private global [1 x i8] c"\FF", section "__llvm_prf_cnts", comdat, align 1
-; CHECK: @__profd_foo = private global { i64, i64, i64, i64, ptr, ptr, i32, [2 x i16], i32 } { i64 {{.*}}, i64 {{.*}}, i64 sub (i64 ptrtoint (ptr @__profc_foo to i64)
-; BINARY: @__profd_foo = private global { i64, i64, i64, i64, ptr, ptr, i32, [2 x i16], i32 } { i64 {{.*}}, i64 {{.*}}, i64 ptrtoint (ptr @__profc_foo to i64),
+; CHECK: @__profd_foo = private global { i64, i64, i64, i64, ptr, ptr, i32, [3 x i16], i32 } { i64 {{.*}}, i64 {{.*}}, i64 sub (i64 ptrtoint (ptr @__profc_foo to i64)
+; BINARY: @__profd_foo = private global { i64, i64, i64, i64, ptr, ptr, i32, [3 x i16], i32 } { i64 {{.*}}, i64 {{.*}}, i64 ptrtoint (ptr @__profc_foo to i64),
@__profn_bar = private constant [3 x i8] c"bar"
; CHECK: @__profc_bar = private global [1 x i8] c"\FF", section "__llvm_prf_cnts", comdat, align 1
-; CHECK: @__profd_bar = private global { i64, i64, i64, i64, ptr, ptr, i32, [2 x i16], i32 } { i64 {{.*}}, i64 {{.*}}, i64 sub (i64 ptrtoint (ptr @__profc_bar to i64)
-; BINARY: @__profd_bar = private global { i64, i64, i64, i64, ptr, ptr, i32, [2 x i16], i32 } { i64 {{.*}}, i64 {{.*}}, i64 ptrtoint (ptr @__profc_bar to i64),
+; CHECK: @__profd_bar = private global { i64, i64, i64, i64, ptr, ptr, i32, [3 x i16], i32 } { i64 {{.*}}, i64 {{.*}}, i64 sub (i64 ptrtoint (ptr @__profc_bar to i64)
+; BINARY: @__profd_bar = private global { i64, i64, i64, i64, ptr, ptr, i32, [3 x i16], i32 } { i64 {{.*}}, i64 {{.*}}, i64 ptrtoint (ptr @__profc_bar to i64),
; CHECK: @__llvm_prf_nm = {{.*}} section "__llvm_prf_names"
; BINARY: @__llvm_prf_nm ={{.*}} section "__llvm_covnames"
diff --git a/llvm/test/Transforms/PGOProfile/Inputs/thinlto_indirect_call_promotion.profraw b/llvm/test/Transforms/PGOProfile/Inputs/thinlto_indirect_call_promotion.profraw
index 5efda10bb98a941c04b6846db05d3691bc36aac0..5d96ba8ac220508002ae9a7cdb0beb13e0a25144 100644
GIT binary patch
delta 133
zcmbQhvVeuNu_!ISs37M**F;W##g0c6JDpbj|Gzn}&24We0|sE4n5oVhFbgO-ajG?I
s0+?~tnzsPN04lGLYj at i_S(ee5^#Dj at awy|$1+XHZ93#{)ux=zi0I(w{I{*Lx
delta 117
zcmZ3$GJ%D&u_!ISs37M*=R{6_L67IVA1SZ;|9^9yv+SKv1_s87mFlblGl86mORZTI
rz>KHXyapf!P<n@?i|n1rx{SuG4Iq)psf at D~z=}Xx86W_x8;K796T>9f
diff --git a/llvm/test/Transforms/PGOProfile/Inputs/vtable_prof.profraw b/llvm/test/Transforms/PGOProfile/Inputs/vtable_prof.profraw
new file mode 100644
index 0000000000000000000000000000000000000000..5adeb774cddd6462bd2d3779a96d9ad5a06d5e23
GIT binary patch
literal 656
zcmZoHO3N=Q$obF700xW at ih+R*#(>fsXncDp|G<9;NPf(`(<%&25s=FS6^fqKW9??N
zGj7VQAPc)yoJ=r%1$-<h`e5o|CjGkF``j#L>TQo!%iMorv-oE~?b`s=moX8dAEXai
zZ{I{cNtM}66M)L!U*_`VDuC*1;77F&?qR5f3mFb--sHb`6Q<7rs&4`TeGkj4d7Hnn
z_QUiEK=mykpl|Z0nI4u#dwXE|9{hu7+(1CzJE&)WLEZg=8Nz`12PO`qVd`OYj%~%Q
z?tUW^4?Qnm9Z%ksIv!_J&iI}=libt)X>)<6?kOE_UqcgL?X%uyC1=kZ5ixZVHa)cJ
q8pk>@hHDd5b}&Q$t%KPG4tuD3VBrh17v_JMy|8eE$;12!lLr8f`l*Eg
literal 0
HcmV?d00001
diff --git a/llvm/test/Transforms/PGOProfile/comdat_internal.ll b/llvm/test/Transforms/PGOProfile/comdat_internal.ll
index 8c6942c0f527bc..1bad0db1b47624 100644
--- a/llvm/test/Transforms/PGOProfile/comdat_internal.ll
+++ b/llvm/test/Transforms/PGOProfile/comdat_internal.ll
@@ -13,9 +13,9 @@ $foo = comdat any
; CHECK: @__llvm_profile_raw_version = hidden constant i64 {{[0-9]+}}, comdat
; CHECK-NOT: __profn__stdin__foo
; CHECK: @__profc__stdin__foo.[[#FOO_HASH]] = private global [1 x i64] zeroinitializer, section "__llvm_prf_cnts", comdat, align 8
-; CHECK: @__profd__stdin__foo.[[#FOO_HASH]] = private global { i64, i64, i64, i64, ptr, ptr, i32, [2 x i16], i32 } { i64 {{.*}}, i64 [[#FOO_HASH]], i64 sub (i64 ptrtoint (ptr @__profc__stdin__foo.742261418966908927 to i64), i64 ptrtoint (ptr @__profd__stdin__foo.742261418966908927 to i64)), i64 0, ptr null
+; CHECK: @__profd__stdin__foo.[[#FOO_HASH]] = private global { i64, i64, i64, i64, ptr, ptr, i32, [3 x i16], i32 } { i64 {{.*}}, i64 [[#FOO_HASH]], i64 sub (i64 ptrtoint (ptr @__profc__stdin__foo.742261418966908927 to i64), i64 ptrtoint (ptr @__profd__stdin__foo.742261418966908927 to i64)), i64 0, ptr null
; CHECK-NOT: @foo
-; CHECK-SAME: , ptr null, i32 1, [2 x i16] zeroinitializer, i32 0 }, section "__llvm_prf_data", comdat($__profc__stdin__foo.[[#FOO_HASH]]), align 8
+; CHECK-SAME: , ptr null, i32 1, [3 x i16] zeroinitializer, i32 0 }, section "__llvm_prf_data", comdat($__profc__stdin__foo.[[#FOO_HASH]]), align 8
; CHECK: @__llvm_prf_nm
; CHECK: @llvm.compiler.used
diff --git a/llvm/test/Transforms/PGOProfile/vtable_profile.ll b/llvm/test/Transforms/PGOProfile/vtable_profile.ll
new file mode 100644
index 00000000000000..edc866e4e4efb5
--- /dev/null
+++ b/llvm/test/Transforms/PGOProfile/vtable_profile.ll
@@ -0,0 +1,98 @@
+; RUN: opt < %s -passes=pgo-instr-gen -enable-vtable-value-profiling -S | FileCheck %s --check-prefix=GEN
+; RUN: opt < %s -passes=pgo-instr-gen,instrprof -enable-vtable-value-profiling -S | FileCheck %s --check-prefix=LOWER
+
+; __llvm_prf_vnm stores zlib-compressed vtable names.
+; REQUIRES: zlib
+
+source_filename = "vtable_local.ll"
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; The test IR is generated based on the following C++ program.
+; Base1 has external linkage and Base2 has local linkage.
+; class Derived uses multiple inheritance so its virtual table
+; global variable contains two vtables. func1 is loaded from
+; the vtable compatible with class Base1, and func2 is loaded
+; from the vtable compatible with class Base2.
+
+; class Base1 {
+; public:
+; virtual int func1(int a) ;
+; };
+;
+; namespace {
+; class Base2 {
+; public:
+; __attribute__((noinline)) virtual int func2(int a) {
+; return a;
+; }
+; };
+; }
+
+; class Derived : public Base1, public Base2 {
+; public:
+; Derived(int c) : v(c) {}
+; private:
+; int v;
+; };
+;
+; Derived* createType();
+
+; int func(int a) {
+; Derived* d = createType();
+; return d->func2(a) + d->func1(a);
+; }
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+ at _ZTV7Derived = constant { [3 x ptr], [3 x ptr] } { [3 x ptr] [ptr null, ptr null, ptr @_ZN5Base15func1Ei], [3 x ptr] [ptr inttoptr (i64 -8 to ptr), ptr null, ptr @_ZN12_GLOBAL__N_15Base25func2Ei] }, !type !0, !type !3, !type !6, !type !8, !type !10
+ at _ZTV5Base1 = available_externally constant { [3 x ptr] } { [3 x ptr] [ptr null, ptr null, ptr @_ZN5Base15func1Ei] }, !type !0
+ at _ZTVN12_GLOBAL__N_15Base2E = internal constant { [3 x ptr] } { [3 x ptr] [ptr null, ptr null, ptr @_ZN12_GLOBAL__N_15Base25func2Ei] }, !type !11, !type !8; !vcall_visibility !12
+ at llvm.compiler.used = appending global [1 x ptr] [ptr @_ZTV5Base1], section "llvm.metadata"
+
+; GEN: __llvm_profile_raw_version = comdat any
+; GEN: __llvm_profile_raw_version = hidden constant i64 72057594037927946, comdat
+; GEN: __profn__Z4funci = private constant [8 x i8] c"_Z4funci"
+
+; LOWER: $__profvt__ZTV7Derived = comdat nodeduplicate
+; LOWER: $"__profvt_vtable_local.ll;_ZTVN12_GLOBAL__N_15Base2E" = comdat nodeduplicate
+; LOWER: @__profvt__ZTV7Derived = global { i64, ptr, i32 } { i64 -4576307468236080025, ptr @_ZTV7Derived, i32 48 }, section "__llvm_prf_vtab", comdat, align 8
+; LOWER: @"__profvt_vtable_local.ll;_ZTVN12_GLOBAL__N_15Base2E" = internal global { i64, ptr, i32 } { i64 1419990121885302679, ptr @_ZTVN12_GLOBAL__N_15Base2E, i32 24 }, section "__llvm_prf_vtab", comdat, align 8
+; LOWER: @__llvm_prf_vnm = private constant [64 x i8] c"7>x\DA\8B\8F\0A\093wI-\CA,KMa,+IL\CAI\8D\CF\C9ON\CC\D1\CB\C9\B1\8E\07J\FA\19\1A\C5\BB\FB\F8;9\FA\C4\C7\FB\C5\1B\9A:%\16\A7\1A\B9\02\00\19:\12o", section "__llvm_prf_vtabnames", align 1
+; LOWER: @llvm.used = appending global [5 x ptr] [ptr @__profvt__ZTV7Derived, ptr @"__profvt_vtable_local.ll;_ZTVN12_GLOBAL__N_15Base2E", ptr @__llvm_prf_vnodes, ptr @__llvm_prf_nm, ptr @__llvm_prf_vnm], section "llvm.metadata"
+
+define i32 @_Z4funci(i32 %a) {
+entry:
+ %call = call ptr @_Z10createTypev()
+ %add.ptr = getelementptr inbounds i8, ptr %call, i64 8
+ %vtable = load ptr, ptr %add.ptr
+; GEN: [[P1:%[0-9]+]] = ptrtoint ptr %vtable to i64
+; GEN: call void @llvm.instrprof.value.profile(ptr @__profn__Z4funci, i64 [[CFGHash:[0-9]+]], i64 [[P1]], i32 2, i32 0)
+; LOWER: [[P1:%[0-9]+]] = ptrtoint ptr %vtable to i64
+; LOWER: call void @__llvm_profile_instrument_target(i64 [[P1]], ptr @__profd__Z4funci, i32 2)
+ %vfunc1 = load ptr, ptr %vtable
+ %call1 = call i32 %vfunc1(ptr %add.ptr, i32 %a)
+ %vtable2 = load ptr, ptr %call
+; GEN: [[P2:%[0-9]+]] = ptrtoint ptr %vtable2 to i64
+; GEN: call void @llvm.instrprof.value.profile(ptr @__profn__Z4funci, i64 [[CFGHash]], i64 [[P2]], i32 2, i32 1)
+; LOWER: [[P2:%[0-9]+]] = ptrtoint ptr %vtable2 to i64
+; LOWER: call void @__llvm_profile_instrument_target(i64 [[P2]], ptr @__profd__Z4funci, i32 3)
+ %vfunc2 = load ptr, ptr %vtable2
+ %call4 = call i32 %vfunc2(ptr %call, i32 %a)
+ %add = add nsw i32 %call1, %call4
+ ret i32 %add
+}
+
+declare ptr @_Z10createTypev()
+declare i32 @_ZN12_GLOBAL__N_15Base25func2Ei(ptr %this, i32 %a)
+declare i32 @_ZN5Base15func1Ei(ptr, i32)
+
+!0 = !{i64 16, !"_ZTS5Base1"}
+!3 = !{i64 16, !"_ZTS7Derived"}
+!6 = !{i64 40, !7}
+!7 = distinct !{}
+!8 = !{i64 16, !9}
+!9 = distinct !{}
+!10 = !{i64 40, !9}
+!11 = !{i64 16, !7}
diff --git a/llvm/test/tools/llvm-profdata/Inputs/c-general.profraw b/llvm/test/tools/llvm-profdata/Inputs/c-general.profraw
index 9cd225587c92511e99f3497ce1d5f47c6fc5f0af..a5dcc9fb22e2e125eccd0ad52a509a84e218781a 100644
GIT binary patch
delta 40
ycmV+ at 0N4NE5AY8OfpTVVa&T<_3Xus<4&W)m$E2$N|IVI0I9pYdP6HaTaBv5DToMxi
delta 39
vcmeys|A3#fu_!ISs37M*=R{6_K?|$bHJ=*(|L<GyrHQwmfq`*jWjQ+lUJ(&8
diff --git a/llvm/test/tools/llvm-profdata/Inputs/compressed.profraw b/llvm/test/tools/llvm-profdata/Inputs/compressed.profraw
index 9966729d92ddc33bf89eeb3fee87215bbabbbef1..4d36ffcf5e05b084cf0d1e04fe3933f80b0b1749 100644
GIT binary patch
delta 40
ycmV+ at 0N4Mp55NxzfpTVVa&T<_3Xus<4&eFQuj8rz|DDYvP#jj1P6HaTa6kus4H8fQ
delta 39
vcmX at Wzk#2#u_!ISs37M*=R{6_L5r?)Gq*SV|KArNnC4N>z`(e%(w!XuMI#TR
diff --git a/llvm/test/tools/llvm-profdata/Inputs/update_vtable_value_prof_inputs.sh b/llvm/test/tools/llvm-profdata/Inputs/update_vtable_value_prof_inputs.sh
new file mode 100755
index 00000000000000..89c3e642ac7ef7
--- /dev/null
+++ b/llvm/test/tools/llvm-profdata/Inputs/update_vtable_value_prof_inputs.sh
@@ -0,0 +1,102 @@
+#!/bin/bash
+
+if [ $# -lt 1 ]; then
+ echo "Path to clang++ required!"
+ echo "Usage: update_vtable_value_prof_inputs.sh /path/to/updated/clang++"
+ exit 1
+else
+ CLANG=$1
+fi
+
+
+# Remember current directory.
+CURDIR=$PWD
+
+# Allows the script to be invoked from other directories.
+OUTDIR=$(dirname $(realpath -s $0))
+echo $OUTDIR
+
+cd $OUTDIR
+
+# vtable_prof.cc has the following class hierarchy:
+# class Base
+# ├── class Derived1
+# └── class Derived2
+# Derived1 is a class in the global namespace and Derived2 is in anonymous
+# namespace for test coverage. Overridden virtual methods are annotated as
+# `noinline` so the callsite remains indirect calls for testing purposes.
+cat > vtable_prof.cc << EOF
+#include <cstdlib>
+#include <cstdio>
+
+class Base {
+ public:
+ virtual int func1(int a, int b) = 0;
+ virtual int func2(int a, int b) = 0;
+};
+
+class Derived1 : public Base {
+ public:
+ __attribute__((noinline))
+ int func1(int a, int b) override
+ {
+ return a + b;
+ }
+
+ __attribute__((noinline))
+ int func2(int a, int b) override {
+ return a * b;
+ }
+};
+
+namespace {
+class Derived2 : public Base {
+ public:
+ __attribute__((noinline))
+ int func1(int a, int b) override {
+ return a - b;
+ }
+
+ __attribute__((noinline))
+ int func2(int a, int b) override {
+ return a * (a - b);
+ }
+};
+} // namespace
+
+__attribute__((noinline)) Base* createType(int a) {
+ Base* base = nullptr;
+ if (a % 4 == 0)
+ base = new Derived1();
+ else
+ base = new Derived2();
+ return base;
+}
+
+
+int main(int argc, char** argv) {
+ int sum = 0;
+ for (int i = 0; i < 1000; i++) {
+ int a = rand();
+ int b = rand();
+ Base* ptr = createType(i);
+ sum += ptr->func1(a, b) + ptr->func2(b, a);
+ }
+ printf("sum is %d\n", sum);
+ return 0;
+}
+EOF
+
+
+# Clean up temporary files on exit and return to original directory.
+cleanup() {
+ rm -f vtable_prof
+ rm -f vtable_prof.cc
+ cd $CURDIR
+}
+trap cleanup EXIT
+
+FLAGS="-fuse-ld=lld -O2 -g -fprofile-generate=. -mllvm -enable-vtable-value-profiling"
+
+${CLANG} ${FLAGS} vtable_prof.cc -o vtable_prof
+env LLVM_PROFILE_FILE=vtable-value-prof-basic.profraw ./vtable_prof
diff --git a/llvm/test/tools/llvm-profdata/Inputs/vtable-value-prof-basic.profraw b/llvm/test/tools/llvm-profdata/Inputs/vtable-value-prof-basic.profraw
new file mode 100644
index 0000000000000000000000000000000000000000..322c8dcd73f935564ca6775962dfa7cbfbdbeda6
GIT binary patch
literal 960
zcmZoHO3N=Q$obF300xW at ih+R*#(>fsXnb^T5>(*8e+cNvxiv$7)&KuHuBfD62xVZf
zg~~Ib(b2Pdtlf-u#!a~uWMQ|8lL at 9DX8r>{Rj_^-AEqD1|8=$Zxmn88+a9f!x&Opw
z at lSx-50h8mhv;8$5<-LY!!Srq-$Xr0mDx=oZSOC0`En(o>30x7wI3cpPzx3^9MrtY
zfA1ztzXqEA00R1p<k**G8rrkL^ndsVF#u{jg8~8l5A7N^IQjV|!u0Pz({DgP|2t at a
z{(@Tgf*Hbq`4c7%qhab{^fJcyid)_NMkXG5UcNe>yeoA)&ZeC4J#!|xr~lLD0#Dsj
zI^Mp9CcfHdz0FF_o;f07%G5b+s*2jvCC9`?o-Ti>svgZODk*hz_wOIpmcNc1|9w>Y
z&=v_r9wlGy6JiY2N)|B?_Z(ljx&GM=lcpW at koZu5y4Nb%QFreJNezYuG;wQ?v$#0n
zPDs(w_VhXB*{iQ{%Kx&r-?lz>uU}~PUXj|##Nd5hxD9Cb3Lu81HE?=?a$xBJ79Ozp
jfrSSwzG3pP at Q2BR%>tPXHSYw}KQQ$IP=CPG6Oso21zGjH
literal 0
HcmV?d00001
diff --git a/llvm/test/tools/llvm-profdata/Inputs/vtable-value-prof.proftext b/llvm/test/tools/llvm-profdata/Inputs/vtable-value-prof.proftext
new file mode 100644
index 00000000000000..ec85dc4c3b12f0
--- /dev/null
+++ b/llvm/test/tools/llvm-profdata/Inputs/vtable-value-prof.proftext
@@ -0,0 +1,73 @@
+# IR level Instrumentation Flag
+:ir
+/path/to/vtable_prof.cc;_ZN12_GLOBAL__N_18Derived25func1Eii
+# Func Hash:
+742261418966908927
+# Num Counters:
+1
+# Counter Values:
+750
+
+/path/to/vtable_prof.cc;_ZN12_GLOBAL__N_18Derived25func2Eii
+# Func Hash:
+742261418966908927
+# Num Counters:
+1
+# Counter Values:
+750
+
+_Z10createTypei
+# Func Hash:
+146835647075900052
+# Num Counters:
+2
+# Counter Values:
+750
+250
+
+_ZN8Derived15func1Eii
+# Func Hash:
+742261418966908927
+# Num Counters:
+1
+# Counter Values:
+250
+
+_ZN8Derived15func2Eii
+# Func Hash:
+742261418966908927
+# Num Counters:
+1
+# Counter Values:
+250
+
+main
+# Func Hash:
+1124236338992350536
+# Num Counters:
+2
+# Counter Values:
+1000
+1
+# Num Value Kinds:
+2
+# ValueKind = IPVK_IndirectCallTarget:
+0
+# NumValueSites:
+2
+2
+/path/to/vtable_prof.cc;_ZN12_GLOBAL__N_18Derived25func1Eii:750
+_ZN8Derived15func1Eii:250
+2
+/path/to/vtable_prof.cc;_ZN12_GLOBAL__N_18Derived25func2Eii:750
+_ZN8Derived15func2Eii:250
+# ValueKind = IPVK_VTableTarget:
+2
+# NumValueSites:
+2
+2
+/path/to/vtable_prof.cc;_ZTVN12_GLOBAL__N_18Derived2E:750
+_ZTV8Derived1:250
+2
+/path/to/vtable_prof.cc;_ZTVN12_GLOBAL__N_18Derived2E:750
+_ZTV8Derived1:250
diff --git a/llvm/test/tools/llvm-profdata/binary-ids-padding.test b/llvm/test/tools/llvm-profdata/binary-ids-padding.test
index eda63203a304a4..61881b69cfd5c0 100644
--- a/llvm/test/tools/llvm-profdata/binary-ids-padding.test
+++ b/llvm/test/tools/llvm-profdata/binary-ids-padding.test
@@ -10,10 +10,12 @@
// INSTR_PROF_RAW_HEADER(uint64_t, CountersDelta, (uintptr_t)CountersBegin)
// INSTR_PROF_RAW_HEADER(uint64_t, BitmaskDelta, (uintptr_t)BitmaskBegin)
// INSTR_PROF_RAW_HEADER(uint64_t, NamesDelta, (uintptr_t)NamesBegin)
+// INSTR_PROF_RAW_HEADER(uint64_t, VNamesSize, VNamesSize)
+// INSTR_PROF_RAW_HEADER(uint64_t, NumVTables, NumVTables)
// INSTR_PROF_RAW_HEADER(uint64_t, ValueKindLast, IPVK_Last)
RUN: printf '\201rforpl\377' > %t.profraw
-RUN: printf '\11\0\0\0\0\0\0\0' >> %t.profraw
+RUN: printf '\12\0\0\0\0\0\0\0' >> %t.profraw
// There will be 2 20-byte binary IDs, so the total Binary IDs size will be 64 bytes.
// 2 * 8 binary ID sizes
// + 2 * 20 binary IDs (of size 20)
@@ -32,6 +34,8 @@ RUN: printf '\0\0\4\0\1\0\0\0' >> %t.profraw
RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
RUN: printf '\0\0\4\0\2\0\0\0' >> %t.profraw
RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
// Binary IDs - There are only two in this case that are 20 bytes.
RUN: printf '\24\0\0\0\0\0\0\0' >> %t.profraw
diff --git a/llvm/test/tools/llvm-profdata/large-binary-id-size.test b/llvm/test/tools/llvm-profdata/large-binary-id-size.test
index 38b838e0d100af..316a9a4c9df4ce 100644
--- a/llvm/test/tools/llvm-profdata/large-binary-id-size.test
+++ b/llvm/test/tools/llvm-profdata/large-binary-id-size.test
@@ -1,5 +1,5 @@
RUN: printf '\201rforpl\377' > %t.profraw
-RUN: printf '\11\0\0\0\0\0\0\0' >> %t.profraw
+RUN: printf '\12\0\0\0\0\0\0\0' >> %t.profraw
RUN: printf '\40\0\0\0\0\0\0\0' >> %t.profraw
RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
@@ -12,6 +12,8 @@ RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
// Check for a corrupted size being too large past the end of the file.
RUN: printf '\7\7\7\7\7\7\7\7' >> %t.profraw
diff --git a/llvm/test/tools/llvm-profdata/malformed-not-space-for-another-header.test b/llvm/test/tools/llvm-profdata/malformed-not-space-for-another-header.test
index c967e850dbe352..8b686d5c50cb74 100644
--- a/llvm/test/tools/llvm-profdata/malformed-not-space-for-another-header.test
+++ b/llvm/test/tools/llvm-profdata/malformed-not-space-for-another-header.test
@@ -10,10 +10,12 @@
// INSTR_PROF_RAW_HEADER(uint64_t, CountersDelta, (uintptr_t)CountersBegin)
// INSTR_PROF_RAW_HEADER(uint64_t, BitmaskDelta, (uintptr_t)BitmaskBegin)
// INSTR_PROF_RAW_HEADER(uint64_t, NamesDelta, (uintptr_t)NamesBegin)
+// INSTR_PROF_RAW_HEADER(uint64_t, VNamesSize, VNamesSize)
+// INSTR_PROF_RAW_HEADER(uint64_t, NumVTables, NumVTables)
// INSTR_PROF_RAW_HEADER(uint64_t, ValueKindLast, IPVK_Last)
RUN: printf '\201rforpl\377' > %t.profraw
-RUN: printf '\11\0\0\0\0\0\0\0' >> %t.profraw
+RUN: printf '\12\0\0\0\0\0\0\0' >> %t.profraw
RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
RUN: printf '\1\0\0\0\0\0\0\0' >> %t.profraw
RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
@@ -26,6 +28,8 @@ RUN: printf '\0\0\4\0\1\0\0\0' >> %t.profraw
RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
RUN: printf '\0\0\4\0\2\0\0\0' >> %t.profraw
RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
// Data Section
//
diff --git a/llvm/test/tools/llvm-profdata/malformed-num-counters-zero.test b/llvm/test/tools/llvm-profdata/malformed-num-counters-zero.test
index 2e747f81a6bfae..089afad4206223 100644
--- a/llvm/test/tools/llvm-profdata/malformed-num-counters-zero.test
+++ b/llvm/test/tools/llvm-profdata/malformed-num-counters-zero.test
@@ -10,10 +10,12 @@
// INSTR_PROF_RAW_HEADER(uint64_t, CountersDelta, (uintptr_t)CountersBegin)
// INSTR_PROF_RAW_HEADER(uint64_t, BitmaskDelta, (uintptr_t)BitmaskBegin)
// INSTR_PROF_RAW_HEADER(uint64_t, NamesDelta, (uintptr_t)NamesBegin)
+// INSTR_PROF_RAW_HEADER(uint64_t, VNamesSize, VNamesSize)
+// INSTR_PROF_RAW_HEADER(uint64_t, NumVTables, NumVTables)
// INSTR_PROF_RAW_HEADER(uint64_t, ValueKindLast, IPVK_Last)
RUN: printf '\201rforpl\377' > %t.profraw
-RUN: printf '\11\0\0\0\0\0\0\0' >> %t.profraw
+RUN: printf '\12\0\0\0\0\0\0\0' >> %t.profraw
RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
RUN: printf '\1\0\0\0\0\0\0\0' >> %t.profraw
RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
@@ -26,6 +28,8 @@ RUN: printf '\0\0\4\0\1\0\0\0' >> %t.profraw
RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
RUN: printf '\0\0\4\0\2\0\0\0' >> %t.profraw
RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
// Data Section
//
diff --git a/llvm/test/tools/llvm-profdata/malformed-ptr-to-counter-array.test b/llvm/test/tools/llvm-profdata/malformed-ptr-to-counter-array.test
index 3c23bc7dd0f7f9..e404ba4210cc14 100644
--- a/llvm/test/tools/llvm-profdata/malformed-ptr-to-counter-array.test
+++ b/llvm/test/tools/llvm-profdata/malformed-ptr-to-counter-array.test
@@ -10,10 +10,12 @@
// INSTR_PROF_RAW_HEADER(uint64_t, CountersDelta, (uintptr_t)CountersBegin)
// INSTR_PROF_RAW_HEADER(uint64_t, BitmaskDelta, (uintptr_t)BitmaskBegin)
// INSTR_PROF_RAW_HEADER(uint64_t, NamesDelta, (uintptr_t)NamesBegin)
+// INSTR_PROF_RAW_HEADER(uint64_t, VNamesSize, VNamesSize)
+// INSTR_PROF_RAW_HEADER(uint64_t, NumVTables, NumVTables)
// INSTR_PROF_RAW_HEADER(uint64_t, ValueKindLast, IPVK_Last)
RUN: printf '\201rforpl\377' > %t.profraw
-RUN: printf '\11\0\0\0\0\0\0\0' >> %t.profraw
+RUN: printf '\12\0\0\0\0\0\0\0' >> %t.profraw
RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
RUN: printf '\1\0\0\0\0\0\0\0' >> %t.profraw
RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
@@ -26,6 +28,8 @@ RUN: printf '\0\0\6\0\1\0\0\0' >> %t.profraw
RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
RUN: printf '\0\0\6\0\2\0\0\0' >> %t.profraw
RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
// Data Section
//
diff --git a/llvm/test/tools/llvm-profdata/misaligned-binary-ids-size.test b/llvm/test/tools/llvm-profdata/misaligned-binary-ids-size.test
index 4a5c42843ff4dd..ee54bfb9785678 100644
--- a/llvm/test/tools/llvm-profdata/misaligned-binary-ids-size.test
+++ b/llvm/test/tools/llvm-profdata/misaligned-binary-ids-size.test
@@ -1,5 +1,5 @@
RUN: printf '\201rforpl\377' > %t.profraw
-RUN: printf '\11\0\0\0\0\0\0\0' >> %t.profraw
+RUN: printf '\12\0\0\0\0\0\0\0' >> %t.profraw
// We should fail on this because the binary IDs is not a multiple of 8 bytes.
RUN: printf '\77\0\0\0\0\0\0\0' >> %t.profraw
RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
@@ -10,6 +10,8 @@ RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
// Binary IDs - There are only two in this case that are 20 bytes.
RUN: printf '\24\0\0\0\0\0\0\0' >> %t.profraw
diff --git a/llvm/test/tools/llvm-profdata/mismatched-raw-profile-header.test b/llvm/test/tools/llvm-profdata/mismatched-raw-profile-header.test
index 2a92575ee34075..dfa163f1f3439a 100644
--- a/llvm/test/tools/llvm-profdata/mismatched-raw-profile-header.test
+++ b/llvm/test/tools/llvm-profdata/mismatched-raw-profile-header.test
@@ -15,6 +15,8 @@ RUN: printf '\0\0\0\0\0\0\0\20' >> %t
RUN: printf '\0\0\0\1\0\4\0\0' >> %t
RUN: printf '\0\0\0\2\0\4\0\0' >> %t
RUN: printf '\0\0\0\0\0\0\0\0' >> %t
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t
RUN: not llvm-profdata show %t -o /dev/null 2>&1 | FileCheck %s
diff --git a/llvm/test/tools/llvm-profdata/raw-32-bits-be.test b/llvm/test/tools/llvm-profdata/raw-32-bits-be.test
index 8220361df6cfa6..63782c8b94d4a5 100644
--- a/llvm/test/tools/llvm-profdata/raw-32-bits-be.test
+++ b/llvm/test/tools/llvm-profdata/raw-32-bits-be.test
@@ -1,5 +1,6 @@
+// Header
RUN: printf '\377lprofR\201' > %t
-RUN: printf '\0\0\0\0\0\0\0\11' >> %t
+RUN: printf '\0\0\0\0\0\0\0\12' >> %t
RUN: printf '\0\0\0\0\0\0\0\0' >> %t
RUN: printf '\0\0\0\0\0\0\0\2' >> %t
RUN: printf '\0\0\0\0\0\0\0\0' >> %t
@@ -12,6 +13,8 @@ RUN: printf '\0\0\0\0\1\0\0\0' >> %t
RUN: printf '\0\0\0\0\3\0\0\0' >> %t
RUN: printf '\0\0\0\0\2\0\0\0' >> %t
RUN: printf '\0\0\0\0\0\0\0\0' >> %t
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t
RUN: printf '\134\370\302\114\333\030\275\254' >> %t
RUN: printf '\0\0\0\0\0\0\0\1' >> %t
@@ -20,9 +23,8 @@ RUN: printf '\3\0\0\0' >> %t
RUN: printf '\0\0\0\0' >> %t
RUN: printf '\0\0\0\0' >> %t
RUN: printf '\0\0\0\1' >> %t
-RUN: printf '\0\0\0\0' >> %t
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t
RUN: printf '\0\0\0\3' >> %t
-RUN: printf '\0\0\0\0' >> %t
RUN: printf '\344\023\165\112\031\035\265\067' >> %t
RUN: printf '\0\0\0\0\0\0\0\2' >> %t
@@ -31,9 +33,8 @@ RUN: printf '\2\xff\xff\xd3' >> %t
RUN: printf '\0\0\0\0' >> %t
RUN: printf '\0\0\0\0' >> %t
RUN: printf '\0\0\0\2' >> %t
-RUN: printf '\0\0\0\0' >> %t
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t
RUN: printf '\0\0\0\1' >> %t
-RUN: printf '\0\0\0\0' >> %t
RUN: printf '\0\0\0\0\0\0\0\023' >> %t
RUN: printf '\0\0\0\0\0\0\0\067' >> %t
diff --git a/llvm/test/tools/llvm-profdata/raw-32-bits-le.test b/llvm/test/tools/llvm-profdata/raw-32-bits-le.test
index 9352ae132380d6..e9569bec1178bd 100644
--- a/llvm/test/tools/llvm-profdata/raw-32-bits-le.test
+++ b/llvm/test/tools/llvm-profdata/raw-32-bits-le.test
@@ -1,5 +1,5 @@
RUN: printf '\201Rforpl\377' > %t
-RUN: printf '\11\0\0\0\0\0\0\0' >> %t
+RUN: printf '\12\0\0\0\0\0\0\0' >> %t
RUN: printf '\0\0\0\0\0\0\0\0' >> %t
RUN: printf '\2\0\0\0\0\0\0\0' >> %t
RUN: printf '\0\0\0\0\0\0\0\0' >> %t
@@ -12,6 +12,8 @@ RUN: printf '\0\0\0\1\0\0\0\0' >> %t
RUN: printf '\0\0\0\3\0\0\0\0' >> %t
RUN: printf '\0\0\0\2\0\0\0\0' >> %t
RUN: printf '\0\0\0\0\0\0\0\0' >> %t
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t
RUN: printf '\254\275\030\333\114\302\370\134' >> %t
RUN: printf '\1\0\0\0\0\0\0\0' >> %t
@@ -20,9 +22,8 @@ RUN: printf '\0\0\0\3' >> %t
RUN: printf '\0\0\0\0' >> %t
RUN: printf '\0\0\0\0' >> %t
RUN: printf '\1\0\0\0' >> %t
-RUN: printf '\0\0\0\0' >> %t
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t
RUN: printf '\3\0\0\0' >> %t
-RUN: printf '\0\0\0\0' >> %t
RUN: printf '\067\265\035\031\112\165\023\344' >> %t
RUN: printf '\02\0\0\0\0\0\0\0' >> %t
@@ -31,9 +32,8 @@ RUN: printf '\xd3\xff\xff\2' >> %t
RUN: printf '\0\0\0\0' >> %t
RUN: printf '\0\0\0\0' >> %t
RUN: printf '\2\0\0\0' >> %t
-RUN: printf '\0\0\0\0' >> %t
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t
RUN: printf '\1\0\0\0' >> %t
-RUN: printf '\0\0\0\0' >> %t
RUN: printf '\023\0\0\0\0\0\0\0' >> %t
RUN: printf '\067\0\0\0\0\0\0\0' >> %t
diff --git a/llvm/test/tools/llvm-profdata/raw-64-bits-be.test b/llvm/test/tools/llvm-profdata/raw-64-bits-be.test
index c3e995add6ff2e..0bc579eec58abb 100644
--- a/llvm/test/tools/llvm-profdata/raw-64-bits-be.test
+++ b/llvm/test/tools/llvm-profdata/raw-64-bits-be.test
@@ -1,5 +1,5 @@
RUN: printf '\377lprofr\201' > %t
-RUN: printf '\0\0\0\0\0\0\0\11' >> %t
+RUN: printf '\0\0\0\0\0\0\0\12' >> %t
RUN: printf '\0\0\0\0\0\0\0\0' >> %t
RUN: printf '\0\0\0\0\0\0\0\2' >> %t
RUN: printf '\0\0\0\0\0\0\0\0' >> %t
@@ -12,6 +12,8 @@ RUN: printf '\0\0\0\1\0\4\0\0' >> %t
RUN: printf '\0\0\0\3\0\4\0\0' >> %t
RUN: printf '\0\0\0\2\0\4\0\0' >> %t
RUN: printf '\0\0\0\0\0\0\0\0' >> %t
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t
RUN: printf '\134\370\302\114\333\030\275\254' >> %t
RUN: printf '\0\0\0\0\0\0\0\1' >> %t
@@ -20,9 +22,8 @@ RUN: printf '\0\0\0\3\0\4\0\0' >> %t
RUN: printf '\0\0\0\0\0\0\0\0' >> %t
RUN: printf '\0\0\0\0\0\0\0\0' >> %t
RUN: printf '\0\0\0\1' >> %t
-RUN: printf '\0\0\0\0' >> %t
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t
RUN: printf '\0\0\0\3' >> %t
-RUN: printf '\0\0\0\0' >> %t
RUN: printf '\344\023\165\112\031\035\265\067' >> %t
RUN: printf '\0\0\0\0\0\0\0\02' >> %t
@@ -31,9 +32,8 @@ RUN: printf '\0\0\0\3\0\3\xff\xc3' >> %t
RUN: printf '\0\0\0\0\0\0\0\0' >> %t
RUN: printf '\0\0\0\0\0\0\0\0' >> %t
RUN: printf '\0\0\0\02' >> %t
-RUN: printf '\0\0\0\0' >> %t
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t
RUN: printf '\0\0\0\1' >> %t
-RUN: printf '\0\0\0\0' >> %t
RUN: printf '\0\0\0\0\0\0\0\023' >> %t
RUN: printf '\0\0\0\0\0\0\0\067' >> %t
diff --git a/llvm/test/tools/llvm-profdata/raw-64-bits-le.test b/llvm/test/tools/llvm-profdata/raw-64-bits-le.test
index 0b3ef2a89abe52..ca9ea54c3f0146 100644
--- a/llvm/test/tools/llvm-profdata/raw-64-bits-le.test
+++ b/llvm/test/tools/llvm-profdata/raw-64-bits-le.test
@@ -1,5 +1,5 @@
RUN: printf '\201rforpl\377' > %t
-RUN: printf '\11\0\0\0\0\0\0\0' >> %t
+RUN: printf '\12\0\0\0\0\0\0\0' >> %t
RUN: printf '\0\0\0\0\0\0\0\0' >> %t
RUN: printf '\2\0\0\0\0\0\0\0' >> %t
RUN: printf '\0\0\0\0\0\0\0\0' >> %t
@@ -12,6 +12,8 @@ RUN: printf '\0\0\4\0\1\0\0\0' >> %t
RUN: printf '\0\0\4\0\3\0\0\0' >> %t
RUN: printf '\0\0\4\0\2\0\0\0' >> %t
RUN: printf '\0\0\0\0\0\0\0\0' >> %t
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t
RUN: printf '\254\275\030\333\114\302\370\134' >> %t
RUN: printf '\1\0\0\0\0\0\0\0' >> %t
@@ -20,9 +22,8 @@ RUN: printf '\0\0\4\0\3\0\0\0' >> %t
RUN: printf '\0\0\0\0\0\0\0\0' >> %t
RUN: printf '\0\0\0\0\0\0\0\0' >> %t
RUN: printf '\1\0\0\0' >> %t
-RUN: printf '\0\0\0\0' >> %t
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t
RUN: printf '\3\0\0\0' >> %t
-RUN: printf '\0\0\0\0' >> %t
RUN: printf '\067\265\035\031\112\165\023\344' >> %t
RUN: printf '\02\0\0\0\0\0\0\0' >> %t
@@ -31,9 +32,8 @@ RUN: printf '\xc3\xff\3\0\3\0\0\0' >> %t
RUN: printf '\0\0\0\0\0\0\0\0' >> %t
RUN: printf '\0\0\0\0\0\0\0\0' >> %t
RUN: printf '\02\0\0\0' >> %t
-RUN: printf '\0\0\0\0' >> %t
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t
RUN: printf '\1\0\0\0' >> %t
-RUN: printf '\0\0\0\0' >> %t
RUN: printf '\023\0\0\0\0\0\0\0' >> %t
RUN: printf '\067\0\0\0\0\0\0\0' >> %t
diff --git a/llvm/test/tools/llvm-profdata/raw-two-profiles.test b/llvm/test/tools/llvm-profdata/raw-two-profiles.test
index f4a9aa8e1bbc3a..70a4210dea9f84 100644
--- a/llvm/test/tools/llvm-profdata/raw-two-profiles.test
+++ b/llvm/test/tools/llvm-profdata/raw-two-profiles.test
@@ -1,5 +1,5 @@
RUN: printf '\201rforpl\377' > %t-foo.profraw
-RUN: printf '\11\0\0\0\0\0\0\0' >> %t-foo.profraw
+RUN: printf '\12\0\0\0\0\0\0\0' >> %t-foo.profraw
RUN: printf '\0\0\0\0\0\0\0\0' >> %t-foo.profraw
RUN: printf '\1\0\0\0\0\0\0\0' >> %t-foo.profraw
RUN: printf '\0\0\0\0\0\0\0\0' >> %t-foo.profraw
@@ -12,6 +12,8 @@ RUN: printf '\0\0\4\0\1\0\0\0' >> %t-foo.profraw
RUN: printf '\0\0\0\0\0\0\0\0' >> %t-foo.profraw
RUN: printf '\0\0\4\0\2\0\0\0' >> %t-foo.profraw
RUN: printf '\0\0\0\0\0\0\0\0' >> %t-foo.profraw
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t-foo.profraw
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t-foo.profraw
RUN: printf '\254\275\030\333\114\302\370\134' >> %t-foo.profraw
RUN: printf '\1\0\0\0\0\0\0\0' >> %t-foo.profraw
@@ -26,7 +28,7 @@ RUN: printf '\023\0\0\0\0\0\0\0' >> %t-foo.profraw
RUN: printf '\3\0foo\0\0\0' >> %t-foo.profraw
RUN: printf '\201rforpl\377' > %t-bar.profraw
-RUN: printf '\11\0\0\0\0\0\0\0' >> %t-bar.profraw
+RUN: printf '\12\0\0\0\0\0\0\0' >> %t-bar.profraw
RUN: printf '\0\0\0\0\0\0\0\0' >> %t-bar.profraw
RUN: printf '\1\0\0\0\0\0\0\0' >> %t-bar.profraw
RUN: printf '\0\0\0\0\0\0\0\0' >> %t-bar.profraw
@@ -39,6 +41,8 @@ RUN: printf '\0\0\6\0\1\0\0\0' >> %t-bar.profraw
RUN: printf '\0\0\0\0\0\0\0\0' >> %t-bar.profraw
RUN: printf '\0\0\6\0\2\0\0\0' >> %t-bar.profraw
RUN: printf '\0\0\0\0\0\0\0\0' >> %t-bar.profraw
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t-bar.profraw
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t-bar.profraw
RUN: printf '\067\265\035\031\112\165\023\344' >> %t-bar.profraw
RUN: printf '\02\0\0\0\0\0\0\0' >> %t-bar.profraw
diff --git a/llvm/test/tools/llvm-profdata/vtable-value-prof-basic.test b/llvm/test/tools/llvm-profdata/vtable-value-prof-basic.test
new file mode 100644
index 00000000000000..fb070dc97a4d8a
--- /dev/null
+++ b/llvm/test/tools/llvm-profdata/vtable-value-prof-basic.test
@@ -0,0 +1,124 @@
+To update the inputs used below, run
+Inputs/update_vtable_value_prof_inputs.sh /path/to/updated/clang++
+
+; Raw profiles stores zlib-compressed vtable names. Raw profile reader needs
+; to decompress them.
+; REQUIRES: zlib
+
+; RUN: rm -rf %t && mkdir %t && cd %t
+
+Show profile data from raw profiles.
+RUN: llvm-profdata show --function=main --ic-targets --show-vtables %p/Inputs/vtable-value-prof-basic.profraw | FileCheck %s --check-prefix=RAW
+
+Generate indexed profile from raw profile and show the data.
+RUN: llvm-profdata merge %p/Inputs/vtable-value-prof-basic.profraw -o indexed.profdata
+RUN: llvm-profdata show --function=main --ic-targets --show-vtables indexed.profdata | FileCheck %s --check-prefix=INDEXED
+
+Generate text profile from raw profile and show the data.
+RUN: llvm-profdata merge --text %p/Inputs/vtable-value-prof-basic.profraw -o vtable-value-prof-basic.proftext
+RUN: llvm-profdata show --function=main --ic-targets --show-vtables --text vtable-value-prof-basic.proftext | FileCheck %s --check-prefix=ICTEXT
+
+RAW: Counters:
+RAW-NEXT: main:
+RAW-NEXT: Hash: 0x0f9a16fe6d398548
+RAW-NEXT: Counters: 2
+RAW-NEXT: Indirect Call Site Count: 2
+RAW-NEXT: Number of instrumented vtables: 2
+RAW-NEXT: Indirect Target Results:
+RAW-NEXT: [ 0, _ZN8Derived15func1Eii, 250 ] (25.00%)
+RAW-NEXT: [ 0, {{.*}}vtable_prof.cc;_ZN12_GLOBAL__N_18Derived25func1Eii, 750 ] (75.00%)
+RAW-NEXT: [ 1, _ZN8Derived15func2Eii, 250 ] (25.00%)
+RAW-NEXT: [ 1, {{.*}}vtable_prof.cc;_ZN12_GLOBAL__N_18Derived25func2Eii, 750 ] (75.00%)
+RAW-NEXT: VTable Results:
+RAW-NEXT: [ 0, _ZTV8Derived1, 250 ] (25.00%)
+RAW-NEXT: [ 0, {{.*}}vtable_prof.cc;_ZTVN12_GLOBAL__N_18Derived2E, 750 ] (75.00%)
+RAW-NEXT: [ 1, _ZTV8Derived1, 250 ] (25.00%)
+RAW-NEXT: [ 1, {{.*}}vtable_prof.cc;_ZTVN12_GLOBAL__N_18Derived2E, 750 ] (75.00%)
+RAW-NEXT: Instrumentation level: IR entry_first = 0
+RAW-NEXT: Functions shown: 1
+RAW-NEXT: Total functions: 6
+RAW-NEXT: Maximum function count: 1000
+RAW-NEXT: Maximum internal block count: 250
+RAW-NEXT: Statistics for indirect call sites profile:
+RAW-NEXT: Total number of sites: 2
+RAW-NEXT: Total number of sites with values: 2
+RAW-NEXT: Total number of profiled values: 4
+RAW-NEXT: Value sites histogram:
+RAW-NEXT: NumTargets, SiteCount
+RAW-NEXT: 2, 2
+RAW-NEXT: Statistics for vtable profile:
+RAW-NEXT: Total number of sites: 2
+RAW-NEXT: Total number of sites with values: 2
+RAW-NEXT: Total number of profiled values: 4
+RAW-NEXT: Value sites histogram:
+RAW-NEXT: NumTargets, SiteCount
+RAW-NEXT: 2, 2
+
+
+INDEXED: Counters:
+INDEXED-NEXT: main:
+INDEXED-NEXT: Hash: 0x0f9a16fe6d398548
+INDEXED-NEXT: Counters: 2
+INDEXED-NEXT: Indirect Call Site Count: 2
+INDEXED-NEXT: Number of instrumented vtables: 2
+INDEXED-NEXT: Indirect Target Results:
+INDEXED-NEXT: [ 0, {{.*}}vtable_prof.cc;_ZN12_GLOBAL__N_18Derived25func1Eii, 750 ] (75.00%)
+INDEXED-NEXT: [ 0, _ZN8Derived15func1Eii, 250 ] (25.00%)
+INDEXED-NEXT: [ 1, {{.*}}vtable_prof.cc;_ZN12_GLOBAL__N_18Derived25func2Eii, 750 ] (75.00%)
+INDEXED-NEXT: [ 1, _ZN8Derived15func2Eii, 250 ] (25.00%)
+INDEXED-NEXT: VTable Results:
+INDEXED-NEXT: [ 0, {{.*}}vtable_prof.cc;_ZTVN12_GLOBAL__N_18Derived2E, 750 ] (75.00%)
+INDEXED-NEXT: [ 0, _ZTV8Derived1, 250 ] (25.00%)
+INDEXED-NEXT: [ 1, {{.*}}vtable_prof.cc;_ZTVN12_GLOBAL__N_18Derived2E, 750 ] (75.00%)
+INDEXED-NEXT: [ 1, _ZTV8Derived1, 250 ] (25.00%)
+INDEXED-NEXT: Instrumentation level: IR entry_first = 0
+INDEXED-NEXT: Functions shown: 1
+INDEXED-NEXT: Total functions: 6
+INDEXED-NEXT: Maximum function count: 1000
+INDEXED-NEXT: Maximum internal block count: 250
+INDEXED-NEXT: Statistics for indirect call sites profile:
+INDEXED-NEXT: Total number of sites: 2
+INDEXED-NEXT: Total number of sites with values: 2
+INDEXED-NEXT: Total number of profiled values: 4
+INDEXED-NEXT: Value sites histogram:
+INDEXED-NEXT: NumTargets, SiteCount
+INDEXED-NEXT: 2, 2
+INDEXED-NEXT: Statistics for vtable profile:
+INDEXED-NEXT: Total number of sites: 2
+INDEXED-NEXT: Total number of sites with values: 2
+INDEXED-NEXT: Total number of profiled values: 4
+INDEXED-NEXT: Value sites histogram:
+INDEXED-NEXT: NumTargets, SiteCount
+INDEXED-NEXT: 2, 2
+
+ICTEXT: :ir
+ICTEXT: main
+ICTEXT: # Func Hash:
+ICTEXT: 1124236338992350536
+ICTEXT: # Num Counters:
+ICTEXT: 2
+ICTEXT: # Counter Values:
+ICTEXT: 1000
+ICTEXT: 1
+ICTEXT: # Num Value Kinds:
+ICTEXT: 2
+ICTEXT: # ValueKind = IPVK_IndirectCallTarget:
+ICTEXT: 0
+ICTEXT: # NumValueSites:
+ICTEXT: 2
+ICTEXT: 2
+ICTEXT: {{.*}}vtable_prof.cc;_ZN12_GLOBAL__N_18Derived25func1Eii:750
+ICTEXT: _ZN8Derived15func1Eii:250
+ICTEXT: 2
+ICTEXT: {{.*}}vtable_prof.cc;_ZN12_GLOBAL__N_18Derived25func2Eii:750
+ICTEXT: _ZN8Derived15func2Eii:250
+ICTEXT: # ValueKind = IPVK_VTableTarget:
+ICTEXT: 2
+ICTEXT: # NumValueSites:
+ICTEXT: 2
+ICTEXT: 2
+ICTEXT: {{.*}}vtable_prof.cc;_ZTVN12_GLOBAL__N_18Derived2E:750
+ICTEXT: _ZTV8Derived1:250
+ICTEXT: 2
+ICTEXT: {{.*}}vtable_prof.cc;_ZTVN12_GLOBAL__N_18Derived2E:750
+ICTEXT: _ZTV8Derived1:250
diff --git a/llvm/test/tools/llvm-profdata/vtable-value-prof.proftext b/llvm/test/tools/llvm-profdata/vtable-value-prof.proftext
new file mode 100644
index 00000000000000..38073916ec445b
--- /dev/null
+++ b/llvm/test/tools/llvm-profdata/vtable-value-prof.proftext
@@ -0,0 +1,16 @@
+# RUN: llvm-profdata show --function=main --show-vtables %p/Inputs/vtable-value-prof.proftext | FileCheck %s
+
+# CHECK: Counters:
+# CHECK: main:
+# CHECK: Hash: 0x0f9a16fe6d398548
+# CHECK: Counters: 2
+# CHECK: VTable Results:
+# CHECK: [ 0, /path/to/vtable_prof.cc;_ZTVN12_GLOBAL__N_18Derived2E, 750 ] (75.00%)
+# CHECK: [ 0, _ZTV8Derived1, 250 ] (25.00%)
+# CHECK: [ 1, /path/to/vtable_prof.cc;_ZTVN12_GLOBAL__N_18Derived2E, 750 ] (75.00%)
+# CHECK: [ 1, _ZTV8Derived1, 250 ] (25.00%)
+# CHECK: Instrumentation level: IR entry_first = 0
+# CHECK: Functions shown: 1
+# CHECK: Total functions: 6
+# CHECK: Maximum function count: 1000
+# CHECK: Maximum internal block count: 250
diff --git a/llvm/tools/llvm-profdata/llvm-profdata.cpp b/llvm/tools/llvm-profdata/llvm-profdata.cpp
index 7754ca36125720..9fb56b8e2647e0 100644
--- a/llvm/tools/llvm-profdata/llvm-profdata.cpp
+++ b/llvm/tools/llvm-profdata/llvm-profdata.cpp
@@ -345,6 +345,9 @@ cl::opt<bool> ShowIndirectCallTargets(
"ic-targets", cl::init(false),
cl::desc("Show indirect call site target values for shown functions"),
cl::sub(ShowSubcommand));
+cl::opt<bool> ShowVTables("show-vtables", cl::init(false),
+ cl::desc("Show vtable names for shown functions"),
+ cl::sub(ShowSubcommand));
cl::opt<bool> ShowMemOPSizes(
"memop-sizes", cl::init(false),
cl::desc("Show the profiled sizes of the memory intrinsic calls "
@@ -722,6 +725,13 @@ static void loadInput(const WeightedFile &Input, SymbolRemapper *Remapper,
});
}
+ const InstrProfSymtab &symtab = Reader->getSymtab();
+ const auto &VTableNames = symtab.getVTableNames();
+
+ for (const auto &kv : VTableNames) {
+ WC->Writer.addVTableName(kv.getKey());
+ }
+
if (Reader->hasTemporalProfile()) {
auto &Traces = Reader->getTemporalProfTraces(Input.Weight);
if (!Traces.empty())
@@ -1353,8 +1363,8 @@ remapSamples(const sampleprof::FunctionSamples &Samples,
BodySample.second.getSamples());
for (const auto &Target : BodySample.second.getCallTargets()) {
Result.addCalledTargetSamples(BodySample.first.LineOffset,
- MaskedDiscriminator,
- Remapper(Target.first), Target.second);
+ MaskedDiscriminator, Remapper(Target.first),
+ Target.second);
}
}
for (const auto &CallsiteSamples : Samples.getCallsiteSamples()) {
@@ -2817,6 +2827,10 @@ static int showInstrProfile(ShowFormat SFormat, raw_fd_ostream &OS) {
OS << " Indirect Call Site Count: "
<< Func.getNumValueSites(IPVK_IndirectCallTarget) << "\n";
+ if (ShowVTables)
+ OS << " Number of instrumented vtables: "
+ << Func.getNumValueSites(IPVK_VTableTarget) << "\n";
+
uint32_t NumMemOPCalls = Func.getNumValueSites(IPVK_MemOPSize);
if (ShowMemOPSizes && NumMemOPCalls > 0)
OS << " Number of Memory Intrinsics Calls: " << NumMemOPCalls
@@ -2838,6 +2852,13 @@ static int showInstrProfile(ShowFormat SFormat, raw_fd_ostream &OS) {
&(Reader->getSymtab()));
}
+ if (ShowVTables) {
+ OS << " VTable Results:\n";
+ traverseAllValueSites(Func, IPVK_VTableTarget,
+ VPStats[IPVK_VTableTarget], OS,
+ &(Reader->getSymtab()));
+ }
+
if (ShowMemOPSizes && NumMemOPCalls > 0) {
OS << " Memory Intrinsic Size Results:\n";
traverseAllValueSites(Func, IPVK_MemOPSize, VPStats[IPVK_MemOPSize], OS,
@@ -2886,6 +2907,11 @@ static int showInstrProfile(ShowFormat SFormat, raw_fd_ostream &OS) {
VPStats[IPVK_IndirectCallTarget]);
}
+ if (ShownFunctions && ShowVTables) {
+ OS << "Statistics for vtable profile:\n";
+ showValueSitesStats(OS, IPVK_VTableTarget, VPStats[IPVK_VTableTarget]);
+ }
+
if (ShownFunctions && ShowMemOPSizes) {
OS << "Statistics for memory intrinsic calls sizes profile:\n";
showValueSitesStats(OS, IPVK_MemOPSize, VPStats[IPVK_MemOPSize]);
diff --git a/llvm/unittests/ProfileData/InstrProfTest.cpp b/llvm/unittests/ProfileData/InstrProfTest.cpp
index 8ffb68de7a2d20..b007a374c2cf2c 100644
--- a/llvm/unittests/ProfileData/InstrProfTest.cpp
+++ b/llvm/unittests/ProfileData/InstrProfTest.cpp
@@ -638,32 +638,78 @@ TEST_F(InstrProfTest, test_irpgo_read_deprecated_names) {
Succeeded());
}
+// callee1 to callee6 are from vtable1 to vtable6 respectively.
static const char callee1[] = "callee1";
static const char callee2[] = "callee2";
static const char callee3[] = "callee3";
static const char callee4[] = "callee4";
static const char callee5[] = "callee5";
static const char callee6[] = "callee6";
+// callee7 and callee8 are not from any vtables.
+static const char callee7[] = "callee7";
+static const char callee8[] = "callee8";
+// 'callee' is primarily used to create multiple-element vtables.
+static const char callee[] = "callee";
+static const uint64_t vtable1[] = {uint64_t(callee), uint64_t(callee1)};
+static const uint64_t vtable2[] = {uint64_t(callee2), uint64_t(callee)};
+static const uint64_t vtable3[] = {
+ uint64_t(callee),
+ uint64_t(callee3),
+};
+static const uint64_t vtable4[] = {uint64_t(callee4), uint64_t(callee)};
+static const uint64_t vtable5[] = {uint64_t(callee5), uint64_t(callee)};
+static const uint64_t vtable6[] = {uint64_t(callee6), uint64_t(callee)};
+
+// Returns the address of callee with a numbered suffix in vtable.
+static uint64_t getCalleeAddress(const uint64_t *vtableAddr) {
+ uint64_t CalleeAddr;
+ // Callee with a numbered suffix is the 2nd element in vtable1 and vtable3,
+ // and the 1st element in the rest of vtables.
+ if (vtableAddr == vtable1 || vtableAddr == vtable3)
+ CalleeAddr = uint64_t(vtableAddr) + 8;
+ else
+ CalleeAddr = uint64_t(vtableAddr);
+ return CalleeAddr;
+}
-TEST_P(InstrProfReaderWriterTest, icall_data_read_write) {
+TEST_P(InstrProfReaderWriterTest, icall_and_vtable_data_read_write) {
NamedInstrProfRecord Record1("caller", 0x1234, {1, 2});
- // 4 value sites.
- Record1.reserveSites(IPVK_IndirectCallTarget, 4);
- InstrProfValueData VD0[] = {
- {(uint64_t)callee1, 1}, {(uint64_t)callee2, 2}, {(uint64_t)callee3, 3}};
- Record1.addValueData(IPVK_IndirectCallTarget, 0, VD0, 3, nullptr);
- // No value profile data at the second site.
- Record1.addValueData(IPVK_IndirectCallTarget, 1, nullptr, 0, nullptr);
- InstrProfValueData VD2[] = {{(uint64_t)callee1, 1}, {(uint64_t)callee2, 2}};
- Record1.addValueData(IPVK_IndirectCallTarget, 2, VD2, 2, nullptr);
- InstrProfValueData VD3[] = {{(uint64_t)callee1, 1}};
- Record1.addValueData(IPVK_IndirectCallTarget, 3, VD3, 1, nullptr);
+ // 4 indirect call value sites.
+ {
+ Record1.reserveSites(IPVK_IndirectCallTarget, 4);
+ InstrProfValueData VD0[] = {
+ {(uint64_t)callee1, 1}, {(uint64_t)callee2, 2}, {(uint64_t)callee3, 3}};
+ Record1.addValueData(IPVK_IndirectCallTarget, 0, VD0, 3, nullptr);
+ // No value profile data at the second site.
+ Record1.addValueData(IPVK_IndirectCallTarget, 1, nullptr, 0, nullptr);
+ InstrProfValueData VD2[] = {{(uint64_t)callee1, 1}, {(uint64_t)callee2, 2}};
+ Record1.addValueData(IPVK_IndirectCallTarget, 2, VD2, 2, nullptr);
+ InstrProfValueData VD3[] = {{(uint64_t)callee7, 1}, {(uint64_t)callee8, 2}};
+ Record1.addValueData(IPVK_IndirectCallTarget, 3, VD3, 2, nullptr);
+ }
+
+ // 2 vtable value sites.
+ {
+ InstrProfValueData VD0[] = {
+ {getCalleeAddress(vtable1), 1},
+ {getCalleeAddress(vtable2), 2},
+ {getCalleeAddress(vtable3), 3},
+ };
+ InstrProfValueData VD2[] = {
+ {getCalleeAddress(vtable1), 1},
+ {getCalleeAddress(vtable2), 2},
+ };
+ Record1.addValueData(IPVK_VTableTarget, 0, VD0, 3, nullptr);
+ Record1.addValueData(IPVK_VTableTarget, 2, VD2, 2, nullptr);
+ }
Writer.addRecord(std::move(Record1), getProfWeight(), Err);
Writer.addRecord({"callee1", 0x1235, {3, 4}}, Err);
Writer.addRecord({"callee2", 0x1235, {3, 4}}, Err);
Writer.addRecord({"callee3", 0x1235, {3, 4}}, Err);
+ Writer.addRecord({"callee7", 0x1235, {3, 4}}, Err);
+ Writer.addRecord({"callee8", 0x1235, {3, 4}}, Err);
// Set writer value prof data endianness.
Writer.setValueProfDataEndianness(getEndianness());
@@ -676,24 +722,66 @@ TEST_P(InstrProfReaderWriterTest, icall_data_read_write) {
Expected<InstrProfRecord> R = Reader->getInstrProfRecord("caller", 0x1234);
ASSERT_THAT_ERROR(R.takeError(), Succeeded());
+
+ // Test the number of instrumented indirect call sites and the number of
+ // profiled values at each site.
ASSERT_EQ(4U, R->getNumValueSites(IPVK_IndirectCallTarget));
EXPECT_EQ(3U, R->getNumValueDataForSite(IPVK_IndirectCallTarget, 0));
EXPECT_EQ(0U, R->getNumValueDataForSite(IPVK_IndirectCallTarget, 1));
EXPECT_EQ(2U, R->getNumValueDataForSite(IPVK_IndirectCallTarget, 2));
- EXPECT_EQ(1U, R->getNumValueDataForSite(IPVK_IndirectCallTarget, 3));
+ EXPECT_EQ(2U, R->getNumValueDataForSite(IPVK_IndirectCallTarget, 3));
+
+ // Test the number of instrumented vtable sites and the number of profiled
+ // values at each site.
+ ASSERT_EQ(2U, R->getNumValueSites(IPVK_VTableTarget));
+ EXPECT_EQ(3U, R->getNumValueDataForSite(IPVK_VTableTarget, 0));
+ EXPECT_EQ(2U, R->getNumValueDataForSite(IPVK_VTableTarget, 1));
+
+ // First indirect site.
+ {
+ uint64_t TotalC;
+ std::unique_ptr<InstrProfValueData[]> VD =
+ R->getValueForSite(IPVK_IndirectCallTarget, 0, &TotalC);
+
+ EXPECT_EQ(3U * getProfWeight(), VD[0].Count);
+ EXPECT_EQ(2U * getProfWeight(), VD[1].Count);
+ EXPECT_EQ(1U * getProfWeight(), VD[2].Count);
+ EXPECT_EQ(6U * getProfWeight(), TotalC);
+
+ EXPECT_EQ(StringRef((const char *)VD[0].Value, 7), StringRef("callee3"));
+ EXPECT_EQ(StringRef((const char *)VD[1].Value, 7), StringRef("callee2"));
+ EXPECT_EQ(StringRef((const char *)VD[2].Value, 7), StringRef("callee1"));
+ }
- uint64_t TotalC;
- std::unique_ptr<InstrProfValueData[]> VD =
- R->getValueForSite(IPVK_IndirectCallTarget, 0, &TotalC);
+ // First vtable site.
+ {
+ uint64_t TotalC;
+ std::unique_ptr<InstrProfValueData[]> VD =
+ R->getValueForSite(IPVK_VTableTarget, 0, &TotalC);
+
+ EXPECT_EQ(3U * getProfWeight(), VD[0].Count);
+ EXPECT_EQ(2U * getProfWeight(), VD[1].Count);
+ EXPECT_EQ(1U * getProfWeight(), VD[2].Count);
+ EXPECT_EQ(6U * getProfWeight(), TotalC);
- EXPECT_EQ(3U * getProfWeight(), VD[0].Count);
- EXPECT_EQ(2U * getProfWeight(), VD[1].Count);
- EXPECT_EQ(1U * getProfWeight(), VD[2].Count);
- EXPECT_EQ(6U * getProfWeight(), TotalC);
+ EXPECT_EQ(VD[0].Value, getCalleeAddress(vtable3));
+ EXPECT_EQ(VD[1].Value, getCalleeAddress(vtable2));
+ EXPECT_EQ(VD[2].Value, getCalleeAddress(vtable1));
+ }
- EXPECT_EQ(StringRef((const char *)VD[0].Value, 7), StringRef("callee3"));
- EXPECT_EQ(StringRef((const char *)VD[1].Value, 7), StringRef("callee2"));
- EXPECT_EQ(StringRef((const char *)VD[2].Value, 7), StringRef("callee1"));
+ // Second vtable site.
+ {
+ uint64_t TotalC;
+ std::unique_ptr<InstrProfValueData[]> VD =
+ R->getValueForSite(IPVK_VTableTarget, 1, &TotalC);
+
+ EXPECT_EQ(2U * getProfWeight(), VD[0].Count);
+ EXPECT_EQ(1U * getProfWeight(), VD[1].Count);
+ EXPECT_EQ(3U * getProfWeight(), TotalC);
+
+ EXPECT_EQ(VD[0].Value, getCalleeAddress(vtable2));
+ EXPECT_EQ(VD[1].Value, getCalleeAddress(vtable1));
+ }
}
INSTANTIATE_TEST_SUITE_P(
@@ -801,33 +889,53 @@ TEST_P(MaybeSparseInstrProfTest, annotate_vp_data) {
ASSERT_EQ(1U, ValueData[3].Count);
}
-TEST_P(MaybeSparseInstrProfTest, icall_data_merge) {
+TEST_P(MaybeSparseInstrProfTest, icall_and_vtable_data_merge) {
static const char caller[] = "caller";
NamedInstrProfRecord Record11(caller, 0x1234, {1, 2});
NamedInstrProfRecord Record12(caller, 0x1234, {1, 2});
- // 5 value sites.
- Record11.reserveSites(IPVK_IndirectCallTarget, 5);
- InstrProfValueData VD0[] = {{uint64_t(callee1), 1},
- {uint64_t(callee2), 2},
- {uint64_t(callee3), 3},
- {uint64_t(callee4), 4}};
- Record11.addValueData(IPVK_IndirectCallTarget, 0, VD0, 4, nullptr);
+ // 5 value sites for indirect calls.
+ {
+ Record11.reserveSites(IPVK_IndirectCallTarget, 5);
+ InstrProfValueData VD0[] = {{uint64_t(callee1), 1},
+ {uint64_t(callee2), 2},
+ {uint64_t(callee3), 3},
+ {uint64_t(callee4), 4}};
+ Record11.addValueData(IPVK_IndirectCallTarget, 0, VD0, 4, nullptr);
- // No value profile data at the second site.
- Record11.addValueData(IPVK_IndirectCallTarget, 1, nullptr, 0, nullptr);
+ // No value profile data at the second site.
+ Record11.addValueData(IPVK_IndirectCallTarget, 1, nullptr, 0, nullptr);
- InstrProfValueData VD2[] = {
- {uint64_t(callee1), 1}, {uint64_t(callee2), 2}, {uint64_t(callee3), 3}};
- Record11.addValueData(IPVK_IndirectCallTarget, 2, VD2, 3, nullptr);
+ InstrProfValueData VD2[] = {
+ {uint64_t(callee1), 1}, {uint64_t(callee2), 2}, {uint64_t(callee3), 3}};
+ Record11.addValueData(IPVK_IndirectCallTarget, 2, VD2, 3, nullptr);
- InstrProfValueData VD3[] = {{uint64_t(callee1), 1}};
- Record11.addValueData(IPVK_IndirectCallTarget, 3, VD3, 1, nullptr);
+ InstrProfValueData VD3[] = {{uint64_t(callee7), 1}, {uint64_t(callee8), 2}};
+ Record11.addValueData(IPVK_IndirectCallTarget, 3, VD3, 2, nullptr);
- InstrProfValueData VD4[] = {{uint64_t(callee1), 1},
- {uint64_t(callee2), 2},
- {uint64_t(callee3), 3}};
- Record11.addValueData(IPVK_IndirectCallTarget, 4, VD4, 3, nullptr);
+ InstrProfValueData VD4[] = {
+ {uint64_t(callee1), 1}, {uint64_t(callee2), 2}, {uint64_t(callee3), 3}};
+ Record11.addValueData(IPVK_IndirectCallTarget, 4, VD4, 3, nullptr);
+ }
+ // 3 value sites for vtables.
+ {
+ Record11.reserveSites(IPVK_VTableTarget, 3);
+ InstrProfValueData VD0[] = {{getCalleeAddress(vtable1), 1},
+ {getCalleeAddress(vtable2), 2},
+ {getCalleeAddress(vtable3), 3},
+ {getCalleeAddress(vtable4), 4}};
+ Record11.addValueData(IPVK_VTableTarget, 0, VD0, 4, nullptr);
+
+ InstrProfValueData VD2[] = {{getCalleeAddress(vtable1), 1},
+ {getCalleeAddress(vtable2), 2},
+ {getCalleeAddress(vtable3), 3}};
+ Record11.addValueData(IPVK_VTableTarget, 1, VD2, 3, nullptr);
+
+ InstrProfValueData VD4[] = {{getCalleeAddress(vtable1), 1},
+ {getCalleeAddress(vtable2), 2},
+ {getCalleeAddress(vtable3), 3}};
+ Record11.addValueData(IPVK_VTableTarget, 3, VD4, 3, nullptr);
+ }
// A different record for the same caller.
Record12.reserveSites(IPVK_IndirectCallTarget, 5);
@@ -843,11 +951,28 @@ TEST_P(MaybeSparseInstrProfTest, icall_data_merge) {
Record12.addValueData(IPVK_IndirectCallTarget, 3, nullptr, 0, nullptr);
- InstrProfValueData VD42[] = {{uint64_t(callee1), 1},
- {uint64_t(callee2), 2},
- {uint64_t(callee3), 3}};
+ InstrProfValueData VD42[] = {
+ {uint64_t(callee1), 1}, {uint64_t(callee2), 2}, {uint64_t(callee3), 3}};
Record12.addValueData(IPVK_IndirectCallTarget, 4, VD42, 3, nullptr);
+ // 3 value sites for vtables.
+ {
+ Record12.reserveSites(IPVK_VTableTarget, 3);
+ InstrProfValueData VD0[] = {{getCalleeAddress(vtable2), 5},
+ {getCalleeAddress(vtable3), 3}};
+ Record12.addValueData(IPVK_VTableTarget, 0, VD0, 2, nullptr);
+
+ InstrProfValueData VD2[] = {{getCalleeAddress(vtable2), 1},
+ {getCalleeAddress(vtable3), 3},
+ {getCalleeAddress(vtable4), 4}};
+ Record12.addValueData(IPVK_VTableTarget, 1, VD2, 3, nullptr);
+
+ InstrProfValueData VD4[] = {{getCalleeAddress(vtable1), 1},
+ {getCalleeAddress(vtable2), 2},
+ {getCalleeAddress(vtable3), 3}};
+ Record12.addValueData(IPVK_VTableTarget, 3, VD4, 3, nullptr);
+ }
+
Writer.addRecord(std::move(Record11), Err);
// Merge profile data.
Writer.addRecord(std::move(Record12), Err);
@@ -857,53 +982,99 @@ TEST_P(MaybeSparseInstrProfTest, icall_data_merge) {
Writer.addRecord({callee3, 0x1235, {3, 4}}, Err);
Writer.addRecord({callee3, 0x1235, {3, 4}}, Err);
Writer.addRecord({callee4, 0x1235, {3, 5}}, Err);
+ Writer.addRecord({callee7, 0x1235, {3, 5}}, Err);
+ Writer.addRecord({callee8, 0x1235, {3, 5}}, Err);
auto Profile = Writer.writeBuffer();
readProfile(std::move(Profile));
+ // Test the number of instrumented value sites and the number of profiled
+ // values for each site.
Expected<InstrProfRecord> R = Reader->getInstrProfRecord("caller", 0x1234);
EXPECT_THAT_ERROR(R.takeError(), Succeeded());
+ // For indirect calls.
ASSERT_EQ(5U, R->getNumValueSites(IPVK_IndirectCallTarget));
ASSERT_EQ(4U, R->getNumValueDataForSite(IPVK_IndirectCallTarget, 0));
ASSERT_EQ(0U, R->getNumValueDataForSite(IPVK_IndirectCallTarget, 1));
ASSERT_EQ(4U, R->getNumValueDataForSite(IPVK_IndirectCallTarget, 2));
- ASSERT_EQ(1U, R->getNumValueDataForSite(IPVK_IndirectCallTarget, 3));
+ ASSERT_EQ(2U, R->getNumValueDataForSite(IPVK_IndirectCallTarget, 3));
ASSERT_EQ(3U, R->getNumValueDataForSite(IPVK_IndirectCallTarget, 4));
+ // For vtables.
+ ASSERT_EQ(3U, R->getNumValueSites(IPVK_VTableTarget));
+ ASSERT_EQ(4U, R->getNumValueDataForSite(IPVK_VTableTarget, 0));
+ ASSERT_EQ(4U, R->getNumValueDataForSite(IPVK_VTableTarget, 1));
+ ASSERT_EQ(3U, R->getNumValueDataForSite(IPVK_VTableTarget, 2));
+
+ // Test the merged values for indirect calls.
+ {
+ std::unique_ptr<InstrProfValueData[]> VD =
+ R->getValueForSite(IPVK_IndirectCallTarget, 0);
+ EXPECT_EQ(StringRef((const char *)VD[0].Value, 7), StringRef("callee2"));
+ EXPECT_EQ(7U, VD[0].Count);
+ EXPECT_EQ(StringRef((const char *)VD[1].Value, 7), StringRef("callee3"));
+ EXPECT_EQ(6U, VD[1].Count);
+ EXPECT_EQ(StringRef((const char *)VD[2].Value, 7), StringRef("callee4"));
+ EXPECT_EQ(4U, VD[2].Count);
+ EXPECT_EQ(StringRef((const char *)VD[3].Value, 7), StringRef("callee1"));
+ EXPECT_EQ(1U, VD[3].Count);
+
+ std::unique_ptr<InstrProfValueData[]> VD_2(
+ R->getValueForSite(IPVK_IndirectCallTarget, 2));
+ EXPECT_EQ(StringRef((const char *)VD_2[0].Value, 7), StringRef("callee3"));
+ EXPECT_EQ(6U, VD_2[0].Count);
+ EXPECT_EQ(StringRef((const char *)VD_2[1].Value, 7), StringRef("callee4"));
+ EXPECT_EQ(4U, VD_2[1].Count);
+ EXPECT_EQ(StringRef((const char *)VD_2[2].Value, 7), StringRef("callee2"));
+ EXPECT_EQ(3U, VD_2[2].Count);
+ EXPECT_EQ(StringRef((const char *)VD_2[3].Value, 7), StringRef("callee1"));
+ EXPECT_EQ(1U, VD_2[3].Count);
+
+ std::unique_ptr<InstrProfValueData[]> VD_3(
+ R->getValueForSite(IPVK_IndirectCallTarget, 3));
+ EXPECT_EQ(StringRef((const char *)VD_3[0].Value, 7), StringRef("callee8"));
+ EXPECT_EQ(2U, VD_3[0].Count);
+ EXPECT_EQ(StringRef((const char *)VD_3[1].Value, 7), StringRef("callee7"));
+ EXPECT_EQ(1U, VD_3[1].Count);
+
+ std::unique_ptr<InstrProfValueData[]> VD_4(
+ R->getValueForSite(IPVK_IndirectCallTarget, 4));
+ EXPECT_EQ(StringRef((const char *)VD_4[0].Value, 7), StringRef("callee3"));
+ EXPECT_EQ(6U, VD_4[0].Count);
+ EXPECT_EQ(StringRef((const char *)VD_4[1].Value, 7), StringRef("callee2"));
+ EXPECT_EQ(4U, VD_4[1].Count);
+ EXPECT_EQ(StringRef((const char *)VD_4[2].Value, 7), StringRef("callee1"));
+ EXPECT_EQ(2U, VD_4[2].Count);
+ }
- std::unique_ptr<InstrProfValueData[]> VD =
- R->getValueForSite(IPVK_IndirectCallTarget, 0);
- ASSERT_EQ(StringRef((const char *)VD[0].Value, 7), StringRef("callee2"));
- ASSERT_EQ(7U, VD[0].Count);
- ASSERT_EQ(StringRef((const char *)VD[1].Value, 7), StringRef("callee3"));
- ASSERT_EQ(6U, VD[1].Count);
- ASSERT_EQ(StringRef((const char *)VD[2].Value, 7), StringRef("callee4"));
- ASSERT_EQ(4U, VD[2].Count);
- ASSERT_EQ(StringRef((const char *)VD[3].Value, 7), StringRef("callee1"));
- ASSERT_EQ(1U, VD[3].Count);
-
- std::unique_ptr<InstrProfValueData[]> VD_2(
- R->getValueForSite(IPVK_IndirectCallTarget, 2));
- ASSERT_EQ(StringRef((const char *)VD_2[0].Value, 7), StringRef("callee3"));
- ASSERT_EQ(6U, VD_2[0].Count);
- ASSERT_EQ(StringRef((const char *)VD_2[1].Value, 7), StringRef("callee4"));
- ASSERT_EQ(4U, VD_2[1].Count);
- ASSERT_EQ(StringRef((const char *)VD_2[2].Value, 7), StringRef("callee2"));
- ASSERT_EQ(3U, VD_2[2].Count);
- ASSERT_EQ(StringRef((const char *)VD_2[3].Value, 7), StringRef("callee1"));
- ASSERT_EQ(1U, VD_2[3].Count);
-
- std::unique_ptr<InstrProfValueData[]> VD_3(
- R->getValueForSite(IPVK_IndirectCallTarget, 3));
- ASSERT_EQ(StringRef((const char *)VD_3[0].Value, 7), StringRef("callee1"));
- ASSERT_EQ(1U, VD_3[0].Count);
-
- std::unique_ptr<InstrProfValueData[]> VD_4(
- R->getValueForSite(IPVK_IndirectCallTarget, 4));
- ASSERT_EQ(StringRef((const char *)VD_4[0].Value, 7), StringRef("callee3"));
- ASSERT_EQ(6U, VD_4[0].Count);
- ASSERT_EQ(StringRef((const char *)VD_4[1].Value, 7), StringRef("callee2"));
- ASSERT_EQ(4U, VD_4[1].Count);
- ASSERT_EQ(StringRef((const char *)VD_4[2].Value, 7), StringRef("callee1"));
- ASSERT_EQ(2U, VD_4[2].Count);
+ // Test the merged values for vtables
+ {
+ auto VD0 = R->getValueForSite(IPVK_VTableTarget, 0);
+ EXPECT_EQ(VD0[0].Value, getCalleeAddress(vtable2));
+ EXPECT_EQ(VD0[0].Count, 7U);
+ EXPECT_EQ(VD0[1].Value, getCalleeAddress(vtable3));
+ EXPECT_EQ(VD0[1].Count, 6U);
+ EXPECT_EQ(VD0[2].Value, getCalleeAddress(vtable4));
+ EXPECT_EQ(VD0[2].Count, 4U);
+ EXPECT_EQ(VD0[3].Value, getCalleeAddress(vtable1));
+ EXPECT_EQ(VD0[3].Count, 1U);
+
+ auto VD1 = R->getValueForSite(IPVK_VTableTarget, 1);
+ EXPECT_EQ(VD1[0].Value, getCalleeAddress(vtable3));
+ EXPECT_EQ(VD1[0].Count, 6U);
+ EXPECT_EQ(VD1[1].Value, getCalleeAddress(vtable4));
+ EXPECT_EQ(VD1[1].Count, 4U);
+ EXPECT_EQ(VD1[2].Value, getCalleeAddress(vtable2));
+ EXPECT_EQ(VD1[2].Count, 3U);
+ EXPECT_EQ(VD1[3].Value, getCalleeAddress(vtable1));
+ EXPECT_EQ(VD1[3].Count, 1U);
+
+ auto VD2 = R->getValueForSite(IPVK_VTableTarget, 2);
+ EXPECT_EQ(VD2[0].Value, getCalleeAddress(vtable3));
+ EXPECT_EQ(VD2[0].Count, 6U);
+ EXPECT_EQ(VD2[1].Value, getCalleeAddress(vtable2));
+ EXPECT_EQ(VD2[1].Count, 4U);
+ EXPECT_EQ(VD2[2].Value, getCalleeAddress(vtable1));
+ EXPECT_EQ(VD2[2].Count, 2U);
+ }
}
struct ValueProfileMergeEdgeCaseTest
@@ -1027,30 +1198,62 @@ INSTANTIATE_TEST_SUITE_P(
EdgeCaseTest, ValueProfileMergeEdgeCaseTest,
::testing::Combine(::testing::Bool(), /* Sparse */
::testing::Values(IPVK_IndirectCallTarget,
- IPVK_MemOPSize) /* ValueKind */
+ IPVK_MemOPSize,
+ IPVK_VTableTarget) /* ValueKind */
));
static void addValueProfData(InstrProfRecord &Record) {
- Record.reserveSites(IPVK_IndirectCallTarget, 5);
- InstrProfValueData VD0[] = {{uint64_t(callee1), 400},
- {uint64_t(callee2), 1000},
- {uint64_t(callee3), 500},
- {uint64_t(callee4), 300},
- {uint64_t(callee5), 100}};
- Record.addValueData(IPVK_IndirectCallTarget, 0, VD0, 5, nullptr);
- InstrProfValueData VD1[] = {{uint64_t(callee5), 800},
- {uint64_t(callee3), 1000},
- {uint64_t(callee2), 2500},
- {uint64_t(callee1), 1300}};
- Record.addValueData(IPVK_IndirectCallTarget, 1, VD1, 4, nullptr);
- InstrProfValueData VD2[] = {{uint64_t(callee6), 800},
- {uint64_t(callee3), 1000},
- {uint64_t(callee4), 5500}};
- Record.addValueData(IPVK_IndirectCallTarget, 2, VD2, 3, nullptr);
- InstrProfValueData VD3[] = {{uint64_t(callee2), 1800},
- {uint64_t(callee3), 2000}};
- Record.addValueData(IPVK_IndirectCallTarget, 3, VD3, 2, nullptr);
- Record.addValueData(IPVK_IndirectCallTarget, 4, nullptr, 0, nullptr);
+ // Add test data for indirect calls.
+ {
+ Record.reserveSites(IPVK_IndirectCallTarget, 6);
+ InstrProfValueData VD0[] = {{uint64_t(callee1), 400},
+ {uint64_t(callee2), 1000},
+ {uint64_t(callee3), 500},
+ {uint64_t(callee4), 300},
+ {uint64_t(callee5), 100}};
+ Record.addValueData(IPVK_IndirectCallTarget, 0, VD0, 5, nullptr);
+ InstrProfValueData VD1[] = {{uint64_t(callee5), 800},
+ {uint64_t(callee3), 1000},
+ {uint64_t(callee2), 2500},
+ {uint64_t(callee1), 1300}};
+ Record.addValueData(IPVK_IndirectCallTarget, 1, VD1, 4, nullptr);
+ InstrProfValueData VD2[] = {{uint64_t(callee6), 800},
+ {uint64_t(callee3), 1000},
+ {uint64_t(callee4), 5500}};
+ Record.addValueData(IPVK_IndirectCallTarget, 2, VD2, 3, nullptr);
+ InstrProfValueData VD3[] = {{uint64_t(callee2), 1800},
+ {uint64_t(callee3), 2000}};
+ Record.addValueData(IPVK_IndirectCallTarget, 3, VD3, 2, nullptr);
+ Record.addValueData(IPVK_IndirectCallTarget, 4, nullptr, 0, nullptr);
+ InstrProfValueData VD5[] = {{uint64_t(callee7), 1234},
+ {uint64_t(callee8), 5678}};
+ Record.addValueData(IPVK_IndirectCallTarget, 5, VD5, 2, nullptr);
+ }
+
+ // Add test data for vtables
+ {
+ Record.reserveSites(IPVK_VTableTarget, 4);
+ InstrProfValueData VD0[] = {
+ {getCalleeAddress(vtable1), 400}, {getCalleeAddress(vtable2), 1000},
+ {getCalleeAddress(vtable3), 500}, {getCalleeAddress(vtable4), 300},
+ {getCalleeAddress(vtable5), 100},
+ };
+ InstrProfValueData VD1[] = {{getCalleeAddress(vtable5), 800},
+ {getCalleeAddress(vtable3), 1000},
+ {getCalleeAddress(vtable2), 2500},
+ {getCalleeAddress(vtable1), 1300}};
+ InstrProfValueData VD2[] = {
+ {getCalleeAddress(vtable6), 800},
+ {getCalleeAddress(vtable3), 1000},
+ {getCalleeAddress(vtable4), 5500},
+ };
+ InstrProfValueData VD3[] = {{getCalleeAddress(vtable2), 1800},
+ {getCalleeAddress(vtable3), 2000}};
+ Record.addValueData(IPVK_VTableTarget, 0, VD0, 5, nullptr);
+ Record.addValueData(IPVK_VTableTarget, 1, VD1, 4, nullptr);
+ Record.addValueData(IPVK_VTableTarget, 2, VD2, 3, nullptr);
+ Record.addValueData(IPVK_VTableTarget, 3, VD3, 2, nullptr);
+ }
}
TEST(ValueProfileReadWriteTest, value_prof_data_read_write) {
@@ -1063,59 +1266,111 @@ TEST(ValueProfileReadWriteTest, value_prof_data_read_write) {
VPData->deserializeTo(Record, nullptr);
// Now read data from Record and sanity check the data
- ASSERT_EQ(5U, Record.getNumValueSites(IPVK_IndirectCallTarget));
+ ASSERT_EQ(6U, Record.getNumValueSites(IPVK_IndirectCallTarget));
ASSERT_EQ(5U, Record.getNumValueDataForSite(IPVK_IndirectCallTarget, 0));
ASSERT_EQ(4U, Record.getNumValueDataForSite(IPVK_IndirectCallTarget, 1));
ASSERT_EQ(3U, Record.getNumValueDataForSite(IPVK_IndirectCallTarget, 2));
ASSERT_EQ(2U, Record.getNumValueDataForSite(IPVK_IndirectCallTarget, 3));
ASSERT_EQ(0U, Record.getNumValueDataForSite(IPVK_IndirectCallTarget, 4));
+ ASSERT_EQ(2U, Record.getNumValueDataForSite(IPVK_IndirectCallTarget, 5));
auto Cmp = [](const InstrProfValueData &VD1, const InstrProfValueData &VD2) {
return VD1.Count > VD2.Count;
};
+
std::unique_ptr<InstrProfValueData[]> VD_0(
Record.getValueForSite(IPVK_IndirectCallTarget, 0));
llvm::sort(&VD_0[0], &VD_0[5], Cmp);
- ASSERT_EQ(StringRef((const char *)VD_0[0].Value, 7), StringRef("callee2"));
- ASSERT_EQ(1000U, VD_0[0].Count);
- ASSERT_EQ(StringRef((const char *)VD_0[1].Value, 7), StringRef("callee3"));
- ASSERT_EQ(500U, VD_0[1].Count);
- ASSERT_EQ(StringRef((const char *)VD_0[2].Value, 7), StringRef("callee1"));
- ASSERT_EQ(400U, VD_0[2].Count);
- ASSERT_EQ(StringRef((const char *)VD_0[3].Value, 7), StringRef("callee4"));
- ASSERT_EQ(300U, VD_0[3].Count);
- ASSERT_EQ(StringRef((const char *)VD_0[4].Value, 7), StringRef("callee5"));
- ASSERT_EQ(100U, VD_0[4].Count);
+ EXPECT_EQ(StringRef((const char *)VD_0[0].Value, 7), StringRef("callee2"));
+ EXPECT_EQ(1000U, VD_0[0].Count);
+ EXPECT_EQ(StringRef((const char *)VD_0[1].Value, 7), StringRef("callee3"));
+ EXPECT_EQ(500U, VD_0[1].Count);
+ EXPECT_EQ(StringRef((const char *)VD_0[2].Value, 7), StringRef("callee1"));
+ EXPECT_EQ(400U, VD_0[2].Count);
+ EXPECT_EQ(StringRef((const char *)VD_0[3].Value, 7), StringRef("callee4"));
+ EXPECT_EQ(300U, VD_0[3].Count);
+ EXPECT_EQ(StringRef((const char *)VD_0[4].Value, 7), StringRef("callee5"));
+ EXPECT_EQ(100U, VD_0[4].Count);
std::unique_ptr<InstrProfValueData[]> VD_1(
Record.getValueForSite(IPVK_IndirectCallTarget, 1));
llvm::sort(&VD_1[0], &VD_1[4], Cmp);
- ASSERT_EQ(StringRef((const char *)VD_1[0].Value, 7), StringRef("callee2"));
- ASSERT_EQ(2500U, VD_1[0].Count);
- ASSERT_EQ(StringRef((const char *)VD_1[1].Value, 7), StringRef("callee1"));
- ASSERT_EQ(1300U, VD_1[1].Count);
- ASSERT_EQ(StringRef((const char *)VD_1[2].Value, 7), StringRef("callee3"));
- ASSERT_EQ(1000U, VD_1[2].Count);
- ASSERT_EQ(StringRef((const char *)VD_1[3].Value, 7), StringRef("callee5"));
- ASSERT_EQ(800U, VD_1[3].Count);
+ EXPECT_EQ(StringRef((const char *)VD_1[0].Value, 7), StringRef("callee2"));
+ EXPECT_EQ(2500U, VD_1[0].Count);
+ EXPECT_EQ(StringRef((const char *)VD_1[1].Value, 7), StringRef("callee1"));
+ EXPECT_EQ(1300U, VD_1[1].Count);
+ EXPECT_EQ(StringRef((const char *)VD_1[2].Value, 7), StringRef("callee3"));
+ EXPECT_EQ(1000U, VD_1[2].Count);
+ EXPECT_EQ(StringRef((const char *)VD_1[3].Value, 7), StringRef("callee5"));
+ EXPECT_EQ(800U, VD_1[3].Count);
std::unique_ptr<InstrProfValueData[]> VD_2(
Record.getValueForSite(IPVK_IndirectCallTarget, 2));
llvm::sort(&VD_2[0], &VD_2[3], Cmp);
- ASSERT_EQ(StringRef((const char *)VD_2[0].Value, 7), StringRef("callee4"));
- ASSERT_EQ(5500U, VD_2[0].Count);
- ASSERT_EQ(StringRef((const char *)VD_2[1].Value, 7), StringRef("callee3"));
- ASSERT_EQ(1000U, VD_2[1].Count);
- ASSERT_EQ(StringRef((const char *)VD_2[2].Value, 7), StringRef("callee6"));
- ASSERT_EQ(800U, VD_2[2].Count);
+ EXPECT_EQ(StringRef((const char *)VD_2[0].Value, 7), StringRef("callee4"));
+ EXPECT_EQ(5500U, VD_2[0].Count);
+ EXPECT_EQ(StringRef((const char *)VD_2[1].Value, 7), StringRef("callee3"));
+ EXPECT_EQ(1000U, VD_2[1].Count);
+ EXPECT_EQ(StringRef((const char *)VD_2[2].Value, 7), StringRef("callee6"));
+ EXPECT_EQ(800U, VD_2[2].Count);
std::unique_ptr<InstrProfValueData[]> VD_3(
Record.getValueForSite(IPVK_IndirectCallTarget, 3));
llvm::sort(&VD_3[0], &VD_3[2], Cmp);
- ASSERT_EQ(StringRef((const char *)VD_3[0].Value, 7), StringRef("callee3"));
- ASSERT_EQ(2000U, VD_3[0].Count);
- ASSERT_EQ(StringRef((const char *)VD_3[1].Value, 7), StringRef("callee2"));
- ASSERT_EQ(1800U, VD_3[1].Count);
+ EXPECT_EQ(StringRef((const char *)VD_3[0].Value, 7), StringRef("callee3"));
+ EXPECT_EQ(2000U, VD_3[0].Count);
+ EXPECT_EQ(StringRef((const char *)VD_3[1].Value, 7), StringRef("callee2"));
+ EXPECT_EQ(1800U, VD_3[1].Count);
+
+ ASSERT_EQ(4U, Record.getNumValueSites(IPVK_VTableTarget));
+ ASSERT_EQ(5U, Record.getNumValueDataForSite(IPVK_VTableTarget, 0));
+ ASSERT_EQ(4U, Record.getNumValueDataForSite(IPVK_VTableTarget, 1));
+ ASSERT_EQ(3U, Record.getNumValueDataForSite(IPVK_VTableTarget, 2));
+ ASSERT_EQ(2U, Record.getNumValueDataForSite(IPVK_VTableTarget, 3));
+
+ std::unique_ptr<InstrProfValueData[]> VD0(
+ Record.getValueForSite(IPVK_VTableTarget, 0));
+ llvm::sort(&VD0[0], &VD0[5], Cmp);
+ EXPECT_EQ(VD0[0].Value, getCalleeAddress(vtable2));
+ EXPECT_EQ(VD0[0].Count, 1000U);
+ EXPECT_EQ(VD0[1].Value, getCalleeAddress(vtable3));
+ EXPECT_EQ(VD0[1].Count, 500U);
+ EXPECT_EQ(VD0[2].Value, getCalleeAddress(vtable1));
+ EXPECT_EQ(VD0[2].Count, 400U);
+ EXPECT_EQ(VD0[3].Value, getCalleeAddress(vtable4));
+ EXPECT_EQ(VD0[3].Count, 300U);
+ EXPECT_EQ(VD0[4].Value, getCalleeAddress(vtable5));
+ EXPECT_EQ(VD0[4].Count, 100U);
+
+ std::unique_ptr<InstrProfValueData[]> VD1(
+ Record.getValueForSite(IPVK_VTableTarget, 1));
+ llvm::sort(&VD1[0], &VD1[4], Cmp);
+ EXPECT_EQ(VD1[0].Value, getCalleeAddress(vtable2));
+ EXPECT_EQ(VD1[0].Count, 2500U);
+ EXPECT_EQ(VD1[1].Value, getCalleeAddress(vtable1));
+ EXPECT_EQ(VD1[1].Count, 1300U);
+ EXPECT_EQ(VD1[2].Value, getCalleeAddress(vtable3));
+ EXPECT_EQ(VD1[2].Count, 1000U);
+ EXPECT_EQ(VD1[3].Value, getCalleeAddress(vtable5));
+ EXPECT_EQ(VD1[3].Count, 800U);
+
+ std::unique_ptr<InstrProfValueData[]> VD2(
+ Record.getValueForSite(IPVK_VTableTarget, 2));
+ llvm::sort(&VD2[0], &VD2[3], Cmp);
+ EXPECT_EQ(VD2[0].Value, getCalleeAddress(vtable4));
+ EXPECT_EQ(VD2[0].Count, 5500U);
+ EXPECT_EQ(VD2[1].Value, getCalleeAddress(vtable3));
+ EXPECT_EQ(VD2[1].Count, 1000U);
+ EXPECT_EQ(VD2[2].Value, getCalleeAddress(vtable6));
+ EXPECT_EQ(VD2[2].Count, 800U);
+
+ std::unique_ptr<InstrProfValueData[]> VD3(
+ Record.getValueForSite(IPVK_VTableTarget, 3));
+ llvm::sort(&VD3[0], &VD3[2], Cmp);
+ EXPECT_EQ(VD3[0].Value, getCalleeAddress(vtable3));
+ EXPECT_EQ(VD3[0].Count, 2000U);
+ EXPECT_EQ(VD3[1].Value, getCalleeAddress(vtable2));
+ EXPECT_EQ(VD3[1].Count, 1800U);
}
TEST(ValueProfileReadWriteTest, symtab_mapping) {
@@ -1132,10 +1387,27 @@ TEST(ValueProfileReadWriteTest, symtab_mapping) {
Symtab.mapAddress(uint64_t(callee4), 0x4000ULL);
// Missing mapping for callee5
+ auto getVTableStartAddr = [](const uint64_t *vtable) -> uint64_t {
+ return uint64_t(vtable);
+ };
+ auto getVTableEndAddr = [](const uint64_t *vtable) -> uint64_t {
+ return uint64_t(vtable) + 16;
+ };
+ // vtable1, vtable2, vtable3, vtable4 get mapped; vtable5, vtable6 are not
+ // mapped.
+ Symtab.mapVTableAddress(getVTableStartAddr(vtable1),
+ getVTableEndAddr(vtable1), MD5Hash("vtable1"));
+ Symtab.mapVTableAddress(getVTableStartAddr(vtable2),
+ getVTableEndAddr(vtable2), MD5Hash("vtable2"));
+ Symtab.mapVTableAddress(getVTableStartAddr(vtable3),
+ getVTableEndAddr(vtable3), MD5Hash("vtable3"));
+ Symtab.mapVTableAddress(getVTableStartAddr(vtable4),
+ getVTableEndAddr(vtable4), MD5Hash("vtable4"));
+
VPData->deserializeTo(Record, &Symtab);
// Now read data from Record and sanity check the data
- ASSERT_EQ(5U, Record.getNumValueSites(IPVK_IndirectCallTarget));
+ ASSERT_EQ(6U, Record.getNumValueSites(IPVK_IndirectCallTarget));
ASSERT_EQ(5U, Record.getNumValueDataForSite(IPVK_IndirectCallTarget, 0));
auto Cmp = [](const InstrProfValueData &VD1, const InstrProfValueData &VD2) {
@@ -1153,6 +1425,74 @@ TEST(ValueProfileReadWriteTest, symtab_mapping) {
// callee5 does not have a mapped value -- default to 0.
ASSERT_EQ(VD_0[4].Value, 0ULL);
+
+ // Sanity check the vtable value data
+ ASSERT_EQ(4U, Record.getNumValueSites(IPVK_VTableTarget));
+
+ {
+ // The first vtable site.
+ std::unique_ptr<InstrProfValueData[]> VD(
+ Record.getValueForSite(IPVK_VTableTarget, 0));
+ ASSERT_EQ(5U, Record.getNumValueDataForSite(IPVK_VTableTarget, 0));
+ llvm::sort(&VD[0], &VD[5], Cmp);
+ EXPECT_EQ(1000U, VD[0].Count);
+ EXPECT_EQ(VD[0].Value, MD5Hash("vtable2"));
+ EXPECT_EQ(500U, VD[1].Count);
+ EXPECT_EQ(VD[1].Value, MD5Hash("vtable3"));
+ EXPECT_EQ(VD[2].Value, MD5Hash("vtable1"));
+ EXPECT_EQ(400U, VD[2].Count);
+ EXPECT_EQ(VD[3].Value, MD5Hash("vtable4"));
+ EXPECT_EQ(300U, VD[3].Count);
+
+ // vtable5 isn't mapped -- default to 0.
+ EXPECT_EQ(VD[4].Value, 0U);
+ EXPECT_EQ(VD[4].Count, 100U);
+ }
+
+ {
+ // The second vtable site.
+ std::unique_ptr<InstrProfValueData[]> VD(
+ Record.getValueForSite(IPVK_VTableTarget, 1));
+ ASSERT_EQ(4, Record.getNumValueDataForSite(IPVK_VTableTarget, 1));
+ llvm::sort(&VD[0], &VD[4], Cmp);
+ EXPECT_EQ(VD[0].Value, MD5Hash("vtable2"));
+ EXPECT_EQ(2500U, VD[0].Count);
+ EXPECT_EQ(VD[1].Value, MD5Hash("vtable1"));
+ EXPECT_EQ(1300U, VD[1].Count);
+
+ EXPECT_EQ(VD[2].Value, MD5Hash("vtable3"));
+ EXPECT_EQ(1000U, VD[2].Count);
+ // vtable5 isn't mapped -- default to 0.
+ EXPECT_EQ(VD[3].Value, 0U);
+ EXPECT_EQ(800U, VD[3].Count);
+ }
+
+ {
+ // The third vtable site.
+ std::unique_ptr<InstrProfValueData[]> VD(
+ Record.getValueForSite(IPVK_VTableTarget, 2));
+ ASSERT_EQ(3, Record.getNumValueDataForSite(IPVK_VTableTarget, 2));
+ llvm::sort(&VD[0], &VD[3], Cmp);
+ EXPECT_EQ(5500U, VD[0].Count);
+ EXPECT_EQ(VD[0].Value, MD5Hash("vtable4"));
+ EXPECT_EQ(1000U, VD[1].Count);
+ EXPECT_EQ(VD[1].Value, MD5Hash("vtable3"));
+ // vtable6 isn't mapped -- default to 0.
+ EXPECT_EQ(VD[2].Value, 0U);
+ EXPECT_EQ(800U, VD[2].Count);
+ }
+
+ {
+ // The fourth vtable site.
+ std::unique_ptr<InstrProfValueData[]> VD(
+ Record.getValueForSite(IPVK_VTableTarget, 3));
+ ASSERT_EQ(2, Record.getNumValueDataForSite(IPVK_VTableTarget, 3));
+ llvm::sort(&VD[0], &VD[2], Cmp);
+ EXPECT_EQ(2000U, VD[0].Count);
+ EXPECT_EQ(VD[0].Value, MD5Hash("vtable3"));
+ EXPECT_EQ(1800U, VD[1].Count);
+ EXPECT_EQ(VD[1].Value, MD5Hash("vtable2"));
+ }
}
TEST_P(MaybeSparseInstrProfTest, get_max_function_count) {
>From 66dbbfef52bdc092cbd4ed619bba38c003f6063d Mon Sep 17 00:00:00 2001
From: mingmingl <mingmingl at google.com>
Date: Thu, 8 Feb 2024 09:07:27 -0800
Subject: [PATCH 2/5] [InstrProf] Add vtables with type metadata into symtab to
look it up with GUID
---
llvm/include/llvm/ProfileData/InstrProf.h | 19 +++++
llvm/lib/ProfileData/InstrProf.cpp | 87 ++++++++++++++------
llvm/unittests/ProfileData/InstrProfTest.cpp | 55 +++++++++++++
3 files changed, 138 insertions(+), 23 deletions(-)
diff --git a/llvm/include/llvm/ProfileData/InstrProf.h b/llvm/include/llvm/ProfileData/InstrProf.h
index 53108a093bf4dd..6e799cf8aa273e 100644
--- a/llvm/include/llvm/ProfileData/InstrProf.h
+++ b/llvm/include/llvm/ProfileData/InstrProf.h
@@ -487,8 +487,25 @@ class InstrProfSymtab {
return "** External Symbol **";
}
+ // Returns the canonical name of the given PGOName by stripping the names
+ // suffixes that begins with ".". If MayHaveUniqueSuffix is true, ".__uniq."
+ // suffix is kept in the canonical name.
+ StringRef getCanonicalName(StringRef PGOName, bool MayHaveUniqueSuffix);
+
+ // Add the function into the symbol table, by creating the following
+ // map entries:
+ // - <MD5Hash(PGOFuncName), PGOFuncName>
+ // - <MD5Hash(PGOFuncName), F>
+ // - <MD5Hash(getCanonicalName(PGOFuncName), F)
Error addFuncWithName(Function &F, StringRef PGOFuncName);
+ // Add the vtable into the symbol table, by creating the following
+ // map entries:
+ // - <MD5Hash(PGOVTableName), PGOVTableName>
+ // - <MD5Hash(PGOVTableName), V>
+ // - <MD5Hash(getCanonicalName(PGOVTableName), V)
+ Error addVTableWithName(GlobalVariable &V, StringRef PGOVTableName);
+
// If the symtab is created by a series of calls to \c addFuncName, \c
// finalizeSymtab needs to be called before looking up function names.
// This is required because the underlying map is a vector (for space
@@ -543,6 +560,7 @@ class InstrProfSymtab {
Error create(const FuncNameIterRange &FuncIterRange,
const VTableNameIterRange &VTableIterRange);
+ // Map the MD5 of the symbol name to the name.
Error addSymbolName(StringRef SymbolName) {
if (SymbolName.empty())
return make_error<InstrProfError>(instrprof_error::malformed,
@@ -665,6 +683,7 @@ void InstrProfSymtab::finalizeSymtab() {
if (Sorted)
return;
llvm::sort(MD5NameMap, less_first());
+ llvm::sort(MD5VTableMap, less_first());
llvm::sort(MD5FuncMap, less_first());
llvm::sort(AddrToMD5Map, less_first());
AddrToMD5Map.erase(std::unique(AddrToMD5Map.begin(), AddrToMD5Map.end()),
diff --git a/llvm/lib/ProfileData/InstrProf.cpp b/llvm/lib/ProfileData/InstrProf.cpp
index 9ebcba10c860ff..a09a2bb0ba77cb 100644
--- a/llvm/lib/ProfileData/InstrProf.cpp
+++ b/llvm/lib/ProfileData/InstrProf.cpp
@@ -480,7 +480,9 @@ Error InstrProfSymtab::create(Module &M, bool InLTO) {
Types.clear();
G.getMetadata(LLVMContext::MD_type, Types);
if (!Types.empty()) {
- MD5VTableMap.emplace_back(G.getGUID(), &G);
+ if (Error E = addVTableWithName(
+ G, getIRPGOObjectName(G, InLTO, /* PGONameMetadata */ nullptr)))
+ return E;
}
}
Sorted = false;
@@ -488,6 +490,30 @@ Error InstrProfSymtab::create(Module &M, bool InLTO) {
return Error::success();
}
+Error InstrProfSymtab::addVTableWithName(GlobalVariable &VTable,
+ StringRef VTablePGOName) {
+ if (Error E = addVTableName(VTablePGOName))
+ return E;
+
+ MD5VTableMap.emplace_back(GlobalValue::getGUID(VTablePGOName), &VTable);
+
+ // NOTE: `-funique-internal-linkage-names` doesn't uniqufy vtables, so no
+ // need to check ".__uniq."
+
+ // If a local-linkage vtable is promoted to have external linkage in ThinLTO,
+ // it will have `.llvm.` in its name. Use the name before externalization.
+ StringRef CanonicalName =
+ getCanonicalName(VTablePGOName, /* MayHaveUniqueSuffix= */ false);
+ if (CanonicalName != VTablePGOName) {
+ if (Error E = addVTableName(CanonicalName))
+ return E;
+
+ MD5VTableMap.emplace_back(GlobalValue::getGUID(CanonicalName), &VTable);
+ }
+
+ return Error::success();
+}
+
/// \c NameStrings is a string composed of one of more possibly encoded
/// sub-strings. The substrings are separated by 0 or more zero bytes. This
/// method decodes the string and calls `NameCallback` for each substring.
@@ -560,35 +586,50 @@ Error InstrProfSymtab::initVTableNamesFromCompressedStrings(
std::bind(&InstrProfSymtab::addVTableName, this, std::placeholders::_1));
}
-Error InstrProfSymtab::addFuncWithName(Function &F, StringRef PGOFuncName) {
- if (Error E = addFuncName(PGOFuncName))
- return E;
- MD5FuncMap.emplace_back(Function::getGUID(PGOFuncName), &F);
+StringRef InstrProfSymtab::getCanonicalName(StringRef PGOName,
+ bool MayHaveUniqueSuffix) {
+ size_t pos = 0;
// In ThinLTO, local function may have been promoted to global and have
// suffix ".llvm." added to the function name. We need to add the
// stripped function name to the symbol table so that we can find a match
// from profile.
//
- // We may have other suffixes similar as ".llvm." which are needed to
- // be stripped before the matching, but ".__uniq." suffix which is used
- // to differentiate internal linkage functions in different modules
- // should be kept. Now this is the only suffix with the pattern ".xxx"
- // which is kept before matching.
- const std::string UniqSuffix = ".__uniq.";
- auto pos = PGOFuncName.find(UniqSuffix);
- // Search '.' after ".__uniq." if ".__uniq." exists, otherwise
- // search '.' from the beginning.
- if (pos != std::string::npos)
- pos += UniqSuffix.length();
- else
- pos = 0;
- pos = PGOFuncName.find('.', pos);
- if (pos != std::string::npos && pos != 0) {
- StringRef OtherFuncName = PGOFuncName.substr(0, pos);
- if (Error E = addFuncName(OtherFuncName))
+ // ".__uniq." suffix is used to differentiate internal linkage functions in
+ // different modules and should be kept. Now this is the only suffix with the
+ // pattern ".xxx" which is kept before matching, other suffixes similar as
+ // ".llvm." will be stripped.
+ if (MayHaveUniqueSuffix) {
+ const std::string UniqSuffix = ".__uniq.";
+ pos = PGOName.find(UniqSuffix);
+ if (pos != StringRef::npos)
+ pos += UniqSuffix.length();
+ else
+ pos = 0;
+ }
+
+ // Search '.' after ".__uniq." if ".__uniq." exists, otherwise search '.' from
+ // the beginning.
+ pos = PGOName.find('.', pos);
+ if (pos != StringRef::npos && pos != 0)
+ return PGOName.substr(0, pos);
+
+ return PGOName;
+}
+
+Error InstrProfSymtab::addFuncWithName(Function &F, StringRef PGOFuncName) {
+ if (Error E = addFuncName(PGOFuncName))
+ return E;
+ MD5FuncMap.emplace_back(Function::getGUID(PGOFuncName), &F);
+
+ StringRef CanonicalName =
+ getCanonicalName(PGOFuncName, /* MayHaveUniqueSuffix= */ true);
+
+ if (CanonicalName != PGOFuncName) {
+ if (Error E = addFuncName(CanonicalName))
return E;
- MD5FuncMap.emplace_back(Function::getGUID(OtherFuncName), &F);
+ MD5FuncMap.emplace_back(Function::getGUID(CanonicalName), &F);
}
+
return Error::success();
}
diff --git a/llvm/unittests/ProfileData/InstrProfTest.cpp b/llvm/unittests/ProfileData/InstrProfTest.cpp
index 4b99195c1b859a..edde544660e454 100644
--- a/llvm/unittests/ProfileData/InstrProfTest.cpp
+++ b/llvm/unittests/ProfileData/InstrProfTest.cpp
@@ -6,6 +6,8 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/LLVMContext.h"
@@ -1605,6 +1607,44 @@ TEST(SymtabTest, instr_prof_symtab_module_test) {
Function::Create(FTy, Function::WeakODRLinkage, "Wblah", M.get());
Function::Create(FTy, Function::WeakODRLinkage, "Wbar", M.get());
+ // [ptr, ptr, ptr]
+ ArrayType *VTableArrayType = ArrayType::get(
+ PointerType::get(Ctx, M->getDataLayout().getDefaultGlobalsAddressSpace()),
+ 3);
+ Constant *Int32TyNull =
+ llvm::ConstantExpr::getNullValue(PointerType::getUnqual(Ctx));
+ SmallVector<llvm::Type *, 1> tys = {VTableArrayType};
+ StructType *VTableType = llvm::StructType::get(Ctx, tys);
+
+ // Create a vtable definition with external linkage.
+ GlobalVariable *ExternalGV = new llvm::GlobalVariable(
+ *M, VTableType, /* isConstant= */ true,
+ llvm::GlobalValue::ExternalLinkage,
+ llvm::ConstantStruct::get(
+ VTableType, {llvm::ConstantArray::get(
+ VTableArrayType,
+ {Int32TyNull, Int32TyNull,
+ Function::Create(FTy, Function::ExternalLinkage,
+ "VFuncInExternalGV", M.get())})}),
+ "ExternalGV");
+
+ // Create a vtable definition for local-linkage function.
+ GlobalVariable *LocalGV = new llvm::GlobalVariable(
+ *M, VTableType, /* isConstant= */ true,
+ llvm::GlobalValue::InternalLinkage,
+ llvm::ConstantStruct::get(
+ VTableType,
+ {llvm::ConstantArray::get(
+ VTableArrayType, {Int32TyNull, Int32TyNull,
+ Function::Create(FTy, Function::ExternalLinkage,
+ "VFuncInLocalGV", M.get())})}),
+ "LocalGV");
+
+ // Add type metadata for the test data, since vtables with type metadata are
+ // added to symtab.
+ ExternalGV->addTypeMetadata(16, MDString::get(Ctx, "ExternalGV"));
+ LocalGV->addTypeMetadata(16, MDString::get(Ctx, "LocalGV"));
+
InstrProfSymtab ProfSymtab;
EXPECT_THAT_ERROR(ProfSymtab.create(*M), Succeeded());
@@ -1626,6 +1666,21 @@ TEST(SymtabTest, instr_prof_symtab_module_test) {
EXPECT_EQ(StringRef(PGOName), PGOFuncName);
EXPECT_THAT(PGOFuncName.str(), EndsWith(Funcs[I].str()));
}
+
+ StringRef VTables[] = {"ExternalGV", "LocalGV"};
+ for (StringRef VTableName : VTables) {
+ GlobalVariable *GV =
+ M->getGlobalVariable(VTableName, /* AllowInternal=*/true);
+
+ // Test that ProfSymtab returns the expected name given a hash.
+ std::string IRPGOName = getPGOName(*GV);
+ uint64_t GUID = IndexedInstrProf::ComputeHash(IRPGOName);
+ EXPECT_EQ(IRPGOName, ProfSymtab.getFuncOrVarName(GUID));
+ EXPECT_EQ(VTableName, getParsedIRPGOName(IRPGOName).second);
+
+ // Test that ProfSymtab returns the expected global variable
+ EXPECT_EQ(GV, ProfSymtab.getGlobalVariable(GUID));
+ }
}
// Testing symtab serialization and creator/deserialization interface
>From 7ebae253ab1808bca328453f68af2b595d07176e Mon Sep 17 00:00:00 2001
From: mingmingl <mingmingl at google.com>
Date: Thu, 8 Feb 2024 11:32:50 -0800
Subject: [PATCH 3/5] [NFC][CallPromotionUtils]Extract a helper function
versionCallSiteWithCond from versionCallSite
---
.../Transforms/Utils/CallPromotionUtils.cpp | 36 +++++++++++--------
1 file changed, 21 insertions(+), 15 deletions(-)
diff --git a/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp b/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp
index 4e84927f1cfc90..d0cf0792eface0 100644
--- a/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp
+++ b/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp
@@ -188,10 +188,9 @@ static void createRetBitCast(CallBase &CB, Type *RetTy, CastInst **RetBitCast) {
/// Predicate and clone the given call site.
///
/// This function creates an if-then-else structure at the location of the call
-/// site. The "if" condition compares the call site's called value to the given
-/// callee. The original call site is moved into the "else" block, and a clone
-/// of the call site is placed in the "then" block. The cloned instruction is
-/// returned.
+/// site. The "if" condition is specified by `Cond`.
+/// The original call site is moved into the "else" block, and a clone of the
+/// call site is placed in the "then" block. The cloned instruction is returned.
///
/// For example, the call instruction below:
///
@@ -202,7 +201,6 @@ static void createRetBitCast(CallBase &CB, Type *RetTy, CastInst **RetBitCast) {
/// Is replace by the following:
///
/// orig_bb:
-/// %cond = icmp eq i32 ()* %ptr, @func
/// br i1 %cond, %then_bb, %else_bb
///
/// then_bb:
@@ -232,7 +230,6 @@ static void createRetBitCast(CallBase &CB, Type *RetTy, CastInst **RetBitCast) {
/// Is replace by the following:
///
/// orig_bb:
-/// %cond = icmp eq i32 ()* %ptr, @func
/// br i1 %cond, %then_bb, %else_bb
///
/// then_bb:
@@ -267,7 +264,6 @@ static void createRetBitCast(CallBase &CB, Type *RetTy, CastInst **RetBitCast) {
/// Is replaced by the following:
///
/// cond_bb:
-/// %cond = icmp eq i32 ()* %ptr, @func
/// br i1 %cond, %then_bb, %orig_bb
///
/// then_bb:
@@ -280,19 +276,13 @@ static void createRetBitCast(CallBase &CB, Type *RetTy, CastInst **RetBitCast) {
/// ; The original call instruction stays in its original block.
/// %t0 = musttail call i32 %ptr()
/// ret %t0
-CallBase &llvm::versionCallSite(CallBase &CB, Value *Callee,
- MDNode *BranchWeights) {
+static CallBase &versionCallSiteWithCond(CallBase &CB, Value *Cond,
+ MDNode *BranchWeights) {
IRBuilder<> Builder(&CB);
CallBase *OrigInst = &CB;
BasicBlock *OrigBlock = OrigInst->getParent();
- // Create the compare. The called value and callee must have the same type to
- // be compared.
- if (CB.getCalledOperand()->getType() != Callee->getType())
- Callee = Builder.CreateBitCast(Callee, CB.getCalledOperand()->getType());
- auto *Cond = Builder.CreateICmpEQ(CB.getCalledOperand(), Callee);
-
if (OrigInst->isMustTailCall()) {
// Create an if-then structure. The original instruction stays in its block,
// and a clone of the original instruction is placed in the "then" block.
@@ -380,6 +370,22 @@ CallBase &llvm::versionCallSite(CallBase &CB, Value *Callee,
return *NewInst;
}
+// Predicate and clone the given call site usingc condition `CB.callee ==
+// Callee`. See the comment `versionCallSiteWithCond` for the transformation.
+CallBase &llvm::versionCallSite(CallBase &CB, Value *Callee,
+ MDNode *BranchWeights) {
+
+ IRBuilder<> Builder(&CB);
+
+ // Create the compare. The called value and callee must have the same type to
+ // be compared.
+ if (CB.getCalledOperand()->getType() != Callee->getType())
+ Callee = Builder.CreateBitCast(Callee, CB.getCalledOperand()->getType());
+ auto *Cond = Builder.CreateICmpEQ(CB.getCalledOperand(), Callee);
+
+ return versionCallSiteWithCond(CB, Cond, BranchWeights);
+}
+
bool llvm::isLegalToPromote(const CallBase &CB, Function *Callee,
const char **FailureReason) {
assert(!CB.getCalledFunction() && "Only indirect call sites can be promoted");
>From ac5dc1bf77b67cbf0aa5e2c8fb6a7ce0080fb501 Mon Sep 17 00:00:00 2001
From: mingmingl <mingmingl at google.com>
Date: Sat, 10 Feb 2024 12:03:25 -0800
Subject: [PATCH 4/5] [CallPromotionUtils]Implement conditional indirect call
promotion with vtable-based comparison
---
.../Transforms/Utils/CallPromotionUtils.h | 50 ++++++-
.../Transforms/Utils/CallPromotionUtils.cpp | 64 ++++++++-
.../Utils/CallPromotionUtilsTest.cpp | 127 ++++++++++++++++++
3 files changed, 233 insertions(+), 8 deletions(-)
diff --git a/llvm/include/llvm/Transforms/Utils/CallPromotionUtils.h b/llvm/include/llvm/Transforms/Utils/CallPromotionUtils.h
index fcb384ec361339..5f3a71206876c6 100644
--- a/llvm/include/llvm/Transforms/Utils/CallPromotionUtils.h
+++ b/llvm/include/llvm/Transforms/Utils/CallPromotionUtils.h
@@ -14,10 +14,17 @@
#ifndef LLVM_TRANSFORMS_UTILS_CALLPROMOTIONUTILS_H
#define LLVM_TRANSFORMS_UTILS_CALLPROMOTIONUTILS_H
+#include <cstdint>
+
+#include "llvm/ADT/ArrayRef.h"
+
namespace llvm {
+class Constant;
class CallBase;
class CastInst;
class Function;
+class GlobalVariable;
+class Instruction;
class MDNode;
class Value;
@@ -41,7 +48,9 @@ bool isLegalToPromote(const CallBase &CB, Function *Callee,
CallBase &promoteCall(CallBase &CB, Function *Callee,
CastInst **RetBitCast = nullptr);
-/// Promote the given indirect call site to conditionally call \p Callee.
+/// Promote the given indirect call site to conditionally call \p Callee. The
+/// promoted direct call instruction is predicated on `CB.getCalledOperand() ==
+/// Callee`.
///
/// This function creates an if-then-else structure at the location of the call
/// site. The original call site is moved into the "else" block. A clone of the
@@ -51,6 +60,31 @@ CallBase &promoteCall(CallBase &CB, Function *Callee,
CallBase &promoteCallWithIfThenElse(CallBase &CB, Function *Callee,
MDNode *BranchWeights = nullptr);
+/// This is similar to `promoteCallWithIfThenElse` except that the condition to
+/// promote a virtual call is that \p VPtr is the same as any of \p
+/// AddressPoints.
+///
+/// This function is expected to be used on virtual calls (a subset of indirect
+/// calls). \p VPtr is the virtual table address stored in the objects, and
+/// \p AddressPoints contains address points of vtables to be compared with.
+///
+/// It's the responsibility of caller to guarantee the transformation
+/// correctness (by specifying \p VPtr and \p AddressPoints properly).
+///
+/// This function doesn't sink the address-calculation instructions of indirect
+/// callee to the indirect call fallback. The subsequent passes (e.g.
+/// inst-combine) should sink them if possible and handle the sink of debug
+/// intrinsics together.
+CallBase &promoteCallWithVTableCmp(CallBase &CB, Instruction *VPtr,
+ Function *Callee,
+ ArrayRef<Constant *> AddressPoints,
+ MDNode *BranchWeights);
+
+/// Returns a constant representing the vtable's address point specified by the
+/// offset. Caller should ensure \p AddressPointOffset is valid.
+Constant *getVTableAddressPointOffset(GlobalVariable *VTable,
+ uint32_t AddressPointOffset);
+
/// Try to promote (devirtualize) a virtual call on an Alloca. Return true on
/// success.
///
@@ -74,13 +108,17 @@ CallBase &promoteCallWithIfThenElse(CallBase &CB, Function *Callee,
///
bool tryPromoteCall(CallBase &CB);
+/// Predicate and clone the given call site using the given condition.
+CallBase &versionCallSiteWithCond(CallBase &CB, Value *Cond,
+ MDNode *BranchWeights);
+
/// Predicate and clone the given call site.
///
-/// This function creates an if-then-else structure at the location of the call
-/// site. The "if" condition compares the call site's called value to the given
-/// callee. The original call site is moved into the "else" block, and a clone
-/// of the call site is placed in the "then" block. The cloned instruction is
-/// returned.
+/// This function creates an if-then-else structure at the location of the
+/// call site. The "if" condition compares the call site's called value to
+/// the given callee. The original call site is moved into the "else" block,
+/// and a clone of the call site is placed in the "then" block. The cloned
+/// instruction is returned.
CallBase &versionCallSite(CallBase &CB, Value *Callee, MDNode *BranchWeights);
} // end namespace llvm
diff --git a/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp b/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp
index d0cf0792eface0..ea855b9a4d8416 100644
--- a/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp
+++ b/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp
@@ -12,9 +12,11 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Utils/CallPromotionUtils.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/TypeMetadataUtils.h"
#include "llvm/IR/AttributeMask.h"
+#include "llvm/IR/Constant.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instructions.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
@@ -185,6 +187,24 @@ static void createRetBitCast(CallBase &CB, Type *RetTy, CastInst **RetBitCast) {
U->replaceUsesOfWith(&CB, Cast);
}
+// Returns the or result of all icmp instructions.
+static Value *getOrResult(const SmallVector<Value *, 2> &ICmps,
+ IRBuilder<> &Builder) {
+ assert(!ICmps.empty() && "Must have at least one icmp instructions");
+ if (ICmps.size() == 1)
+ return ICmps[0];
+
+ SmallVector<Value *, 2> OrResults;
+ int i = 0, NumICmp = ICmps.size();
+ for (i = 0; i + 1 < NumICmp; i += 2)
+ OrResults.push_back(Builder.CreateOr(ICmps[i], ICmps[i + 1], "icmp-or"));
+
+ if (i < NumICmp)
+ OrResults.push_back(ICmps[i]);
+
+ return getOrResult(OrResults, Builder);
+}
+
/// Predicate and clone the given call site.
///
/// This function creates an if-then-else structure at the location of the call
@@ -276,8 +296,8 @@ static void createRetBitCast(CallBase &CB, Type *RetTy, CastInst **RetBitCast) {
/// ; The original call instruction stays in its original block.
/// %t0 = musttail call i32 %ptr()
/// ret %t0
-static CallBase &versionCallSiteWithCond(CallBase &CB, Value *Cond,
- MDNode *BranchWeights) {
+CallBase &llvm::versionCallSiteWithCond(CallBase &CB, Value *Cond,
+ MDNode *BranchWeights) {
IRBuilder<> Builder(&CB);
CallBase *OrigInst = &CB;
@@ -565,6 +585,46 @@ CallBase &llvm::promoteCallWithIfThenElse(CallBase &CB, Function *Callee,
return promoteCall(NewInst, Callee);
}
+Constant *llvm::getVTableAddressPointOffset(GlobalVariable *VTable,
+ uint32_t AddressPointOffset) {
+ Module &M = *VTable->getParent();
+ const DataLayout &DL = M.getDataLayout();
+ LLVMContext &Context = M.getContext();
+ Type *VTableType = VTable->getValueType();
+ assert(AddressPointOffset < DL.getTypeAllocSize(VTableType) &&
+ "Out-of-bound access");
+ APInt AddressPointOffsetAPInt(32, AddressPointOffset, false);
+ SmallVector<APInt> Indices =
+ DL.getGEPIndicesForOffset(VTableType, AddressPointOffsetAPInt);
+ SmallVector<llvm::Constant *> GEPIndices;
+ for (const auto &Index : Indices)
+ GEPIndices.push_back(llvm::ConstantInt::get(Type::getInt32Ty(Context),
+ Index.getZExtValue()));
+
+ return ConstantExpr::getInBoundsGetElementPtr(VTable->getValueType(), VTable,
+ GEPIndices);
+}
+
+CallBase &llvm::promoteCallWithVTableCmp(CallBase &CB, Instruction *VPtr,
+ Function *Callee,
+ ArrayRef<Constant *> AddressPoints,
+ MDNode *BranchWeights) {
+ assert(!AddressPoints.empty() && "Caller should guarantee");
+ IRBuilder<> Builder(&CB);
+ SmallVector<Value *, 2> ICmps;
+ for (auto &AddressPoint : AddressPoints)
+ ICmps.push_back(Builder.CreateICmpEQ(VPtr, AddressPoint));
+
+ Value *Cond = getOrResult(ICmps, Builder);
+
+ // Version the indirect call site. If Cond is true, 'NewInst' will be
+ // executed, otherwise the original call site will be executed.
+ CallBase &NewInst = versionCallSiteWithCond(CB, Cond, BranchWeights);
+
+ // Promote 'NewInst' so that it directly calls the desired function.
+ return promoteCall(NewInst, Callee);
+}
+
bool llvm::tryPromoteCall(CallBase &CB) {
assert(!CB.getCalledFunction());
Module *M = CB.getCaller()->getParent();
diff --git a/llvm/unittests/Transforms/Utils/CallPromotionUtilsTest.cpp b/llvm/unittests/Transforms/Utils/CallPromotionUtilsTest.cpp
index eff8e27d36d641..c57abb54e46849 100644
--- a/llvm/unittests/Transforms/Utils/CallPromotionUtilsTest.cpp
+++ b/llvm/unittests/Transforms/Utils/CallPromotionUtilsTest.cpp
@@ -8,9 +8,12 @@
#include "llvm/Transforms/Utils/CallPromotionUtils.h"
#include "llvm/AsmParser/Parser.h"
+#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/MDBuilder.h"
#include "llvm/IR/Module.h"
+#include "llvm/IR/NoFolder.h"
#include "llvm/Support/SourceMgr.h"
#include "gtest/gtest.h"
@@ -368,3 +371,127 @@ declare %struct2 @_ZN4Impl3RunEv(%class.Impl* %this)
bool IsPromoted = tryPromoteCall(*CI);
EXPECT_FALSE(IsPromoted);
}
+
+TEST(CallPromotionUtilsTest, getVTableAddressPointOffset) {
+ LLVMContext C;
+ std::unique_ptr<Module> M = parseIR(C,
+ R"IR(
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+ at _ZTV8Derived2 = constant { [3 x ptr], [3 x ptr], [4 x ptr] } { [3 x ptr] [ptr null, ptr null, ptr @_ZN5Base35func3Ev], [3 x ptr] [ptr inttoptr (i64 -8 to ptr), ptr null, ptr @_ZN5Base25func2Ev], [4 x ptr] [ptr inttoptr (i64 -16 to ptr), ptr null, ptr @_ZN5Base15func0Ev, ptr @_ZN5Base15func1Ev] }
+
+declare i32 @_ZN5Base15func1Ev(ptr)
+declare i32 @_ZN5Base25func2Ev(ptr)
+declare i32 @_ZN5Base15func0Ev(ptr)
+declare void @_ZN5Base35func3Ev(ptr)
+)IR");
+ GlobalVariable *GV = M->getGlobalVariable("_ZTV8Derived2");
+
+ for (auto [AddressPointOffset, Index] :
+ {std::pair{16, 0}, {40, 1}, {64, 2}}) {
+ Constant *AddressPoint =
+ getVTableAddressPointOffset(GV, AddressPointOffset);
+
+ ConstantExpr *GEP = dyn_cast<ConstantExpr>(AddressPoint);
+ ASSERT_TRUE(GEP);
+ SmallVector<Constant *> Indices = {
+ llvm::ConstantInt::get(Type::getInt32Ty(C), 0U),
+ llvm::ConstantInt::get(Type::getInt32Ty(C), Index),
+ llvm::ConstantInt::get(Type::getInt32Ty(C), 2U)};
+ EXPECT_EQ(GEP, ConstantExpr::getInBoundsGetElementPtr(GV->getValueType(),
+ GV, Indices));
+ }
+}
+
+TEST(CallPromotionUtilsTest, promoteCallWithVTableCmp) {
+ LLVMContext C;
+ std::unique_ptr<Module> M = parseIR(C,
+ R"IR(
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+ at _ZTV5Base1 = constant { [4 x ptr] } { [4 x ptr] [ptr null, ptr null, ptr @_ZN5Base15func0Ev, ptr @_ZN5Base15func1Ev] }, !type !0
+ at _ZTV8Derived1 = constant { [4 x ptr], [3 x ptr] } { [4 x ptr] [ptr inttoptr (i64 -8 to ptr), ptr null, ptr @_ZN5Base15func0Ev, ptr @_ZN5Base15func1Ev], [3 x ptr] [ptr null, ptr null, ptr @_ZN5Base25func2Ev] }, !type !1, !type !2, !type !3
+ at _ZTV5Base2 = constant { [3 x ptr] } { [3 x ptr] [ptr null, ptr null, ptr @_ZN5Base25func2Ev] }, !type !2
+ at _ZTV8Derived2 = constant { [3 x ptr], [3 x ptr], [4 x ptr] } { [3 x ptr] [ptr null, ptr null, ptr @_ZN5Base35func3Ev], [3 x ptr] [ptr inttoptr (i64 -8 to ptr), ptr null, ptr @_ZN5Base25func2Ev], [4 x ptr] [ptr inttoptr (i64 -16 to ptr), ptr null, ptr @_ZN5Base15func0Ev, ptr @_ZN5Base15func1Ev] }, !type !4, !type !5, !type !6, !type !7
+ at _ZTV5Base3 = constant { [3 x ptr] } { [3 x ptr] [ptr null, ptr null, ptr @_ZN5Base35func3Ev] }, !type !6
+
+define i32 @testfunc(ptr %d) {
+entry:
+ %vtable = load ptr, ptr %d, !prof !8
+ %0 = tail call i1 @llvm.type.test(ptr %vtable, metadata !"_ZTS5Base1")
+ tail call void @llvm.assume(i1 %0)
+ %vfn = getelementptr inbounds ptr, ptr %vtable, i64 1
+ %1 = load ptr, ptr %vfn
+ %call = tail call i32 %1(ptr %d), !prof !9
+ ret i32 %call
+}
+
+define i32 @_ZN5Base15func1Ev(ptr %this) {
+entry:
+ ret i32 2
+}
+
+
+declare i1 @llvm.type.test(ptr, metadata)
+declare void @llvm.assume(i1)
+declare i32 @_ZN5Base25func2Ev(ptr)
+declare i32 @_ZN5Base15func0Ev(ptr)
+declare void @_ZN5Base35func3Ev(ptr)
+
+!0 = !{i64 16, !"_ZTS5Base1"}
+!1 = !{i64 16, !"_ZTS5Base1"}
+!2 = !{i64 48, !"_ZTS5Base2"}
+!3 = !{i64 16, !"_ZTS8Derived1"}
+!4 = !{i64 64, !"_ZTS5Base1"}
+!5 = !{i64 40, !"_ZTS5Base2"}
+!6 = !{i64 16, !"_ZTS5Base3"}
+!7 = !{i64 16, !"_ZTS8Derived2"}
+!8 = !{!"VP", i32 2, i64 1600, i64 -9064381665493407289, i64 800, i64 5035968517245772950, i64 500, i64 3215870116411581797, i64 300}
+!9 = !{!"VP", i32 0, i64 1600, i64 6804820478065511155, i64 1600})IR");
+
+ Function *F = M->getFunction("testfunc");
+ ASSERT_TRUE(F);
+ CallInst *CI = dyn_cast<CallInst>(&*std::next(F->front().rbegin()));
+ ASSERT_TRUE(CI && CI->isIndirectCall());
+
+ LoadInst *FuncPtr = dyn_cast<LoadInst>(CI->getCalledOperand());
+ ASSERT_TRUE(FuncPtr);
+
+ GetElementPtrInst *GEP =
+ dyn_cast<GetElementPtrInst>(FuncPtr->getPointerOperand());
+ ASSERT_TRUE(GEP);
+
+ LoadInst *VPtr = dyn_cast<LoadInst>(&*F->front().begin());
+
+ Function *Callee = M->getFunction("_ZN5Base15func1Ev");
+
+ // Create the constant and the branch weights
+ SmallVector<Constant *, 3> VTableAddressPoints;
+
+ for (auto &[VTableName, AddressPointOffset] : {std::pair{"_ZTV5Base1", 16},
+ {"_ZTV8Derived1", 16},
+ {"_ZTV8Derived2", 64}})
+ VTableAddressPoints.push_back(getVTableAddressPointOffset(
+ M->getGlobalVariable(VTableName), AddressPointOffset));
+
+ MDBuilder MDB(C);
+ MDNode *BranchWeights = MDB.createBranchWeights(1600, 0);
+
+ size_t OrigEntryBBSize = F->front().size();
+
+ // Tests that promoted direct call is returned.
+ CallBase &DirectCB = promoteCallWithVTableCmp(
+ *CI, VPtr, Callee, VTableAddressPoints, BranchWeights);
+ EXPECT_EQ(DirectCB.getCalledOperand(), Callee);
+
+ // Tests that GEP and FuncPtr sink to the basic block of indirect call.
+ BasicBlock *EntryBB = &F->front();
+ EXPECT_EQ(EntryBB, GEP->getParent());
+ EXPECT_EQ(EntryBB, FuncPtr->getParent());
+
+ // Promotion inserts 3 icmp instructions and 2 or instructions, and removes
+ // 1 call instruction from the entry block.
+ EXPECT_EQ(F->front().size(), OrigEntryBBSize + 4);
+}
>From 29d9cd2f128da0adde011a0a8362ec252104c901 Mon Sep 17 00:00:00 2001
From: mingmingl <mingmingl at google.com>
Date: Sat, 10 Feb 2024 15:21:49 -0800
Subject: [PATCH 5/5] [TypeProf][IndirectCallPromotion]Implement vtable-based
transformation
---
.../Analysis/IndirectCallPromotionAnalysis.h | 2 +-
.../IndirectCallPromotionAnalysis.cpp | 6 +-
.../Instrumentation/IndirectCallPromotion.cpp | 391 +++++++++++++++++-
.../Transforms/PGOProfile/icp_vtable_cmp.ll | 206 +++++++++
.../PGOProfile/icp_vtable_invoke.ll | 201 +++++++++
.../PGOProfile/icp_vtable_tail_call.ll | 92 +++++
6 files changed, 876 insertions(+), 22 deletions(-)
create mode 100644 llvm/test/Transforms/PGOProfile/icp_vtable_cmp.ll
create mode 100644 llvm/test/Transforms/PGOProfile/icp_vtable_invoke.ll
create mode 100644 llvm/test/Transforms/PGOProfile/icp_vtable_tail_call.ll
diff --git a/llvm/include/llvm/Analysis/IndirectCallPromotionAnalysis.h b/llvm/include/llvm/Analysis/IndirectCallPromotionAnalysis.h
index 8a05e913a91063..eda672d7d50ee2 100644
--- a/llvm/include/llvm/Analysis/IndirectCallPromotionAnalysis.h
+++ b/llvm/include/llvm/Analysis/IndirectCallPromotionAnalysis.h
@@ -57,7 +57,7 @@ class ICallPromotionAnalysis {
///
/// The returned array space is owned by this class, and overwritten on
/// subsequent calls.
- ArrayRef<InstrProfValueData>
+ MutableArrayRef<InstrProfValueData>
getPromotionCandidatesForInstruction(const Instruction *I, uint32_t &NumVals,
uint64_t &TotalCount,
uint32_t &NumCandidates);
diff --git a/llvm/lib/Analysis/IndirectCallPromotionAnalysis.cpp b/llvm/lib/Analysis/IndirectCallPromotionAnalysis.cpp
index ab53717eb889a0..643c155ba6d7e3 100644
--- a/llvm/lib/Analysis/IndirectCallPromotionAnalysis.cpp
+++ b/llvm/lib/Analysis/IndirectCallPromotionAnalysis.cpp
@@ -87,7 +87,7 @@ uint32_t ICallPromotionAnalysis::getProfitablePromotionCandidates(
return I;
}
-ArrayRef<InstrProfValueData>
+MutableArrayRef<InstrProfValueData>
ICallPromotionAnalysis::getPromotionCandidatesForInstruction(
const Instruction *I, uint32_t &NumVals, uint64_t &TotalCount,
uint32_t &NumCandidates) {
@@ -96,8 +96,8 @@ ICallPromotionAnalysis::getPromotionCandidatesForInstruction(
ValueDataArray.get(), NumVals, TotalCount);
if (!Res) {
NumCandidates = 0;
- return ArrayRef<InstrProfValueData>();
+ return MutableArrayRef<InstrProfValueData>();
}
NumCandidates = getProfitablePromotionCandidates(I, NumVals, TotalCount);
- return ArrayRef<InstrProfValueData>(ValueDataArray.get(), NumVals);
+ return MutableArrayRef<InstrProfValueData>(ValueDataArray.get(), NumVals);
}
diff --git a/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp b/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp
index 6a44a32bb34dc9..85af3d7cc56b7a 100644
--- a/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp
+++ b/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp
@@ -13,13 +13,16 @@
//===----------------------------------------------------------------------===//
#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Analysis/IndirectCallPromotionAnalysis.h"
#include "llvm/Analysis/IndirectCallVisitor.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/ProfileSummaryInfo.h"
+#include "llvm/Analysis/TypeMetadataUtils.h"
#include "llvm/IR/DiagnosticInfo.h"
+#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instructions.h"
@@ -51,6 +54,8 @@ using namespace llvm;
STATISTIC(NumOfPGOICallPromotion, "Number of indirect call promotions.");
STATISTIC(NumOfPGOICallsites, "Number of indirect call candidate sites.");
+extern cl::opt<unsigned> MaxNumVTableAnnotations;
+
// Command line option to disable indirect-call promotion with the default as
// false. This is for debug purpose.
static cl::opt<bool> DisableICP("disable-icp", cl::init(false), cl::Hidden,
@@ -103,13 +108,71 @@ static cl::opt<bool>
ICPDUMPAFTER("icp-dumpafter", cl::init(false), cl::Hidden,
cl::desc("Dump IR after transformation happens"));
+// This option is meant to be used by LLVM regression test and test the
+// transformation that compares vtables.
+// TODO: ICP pass will do cost-benefit analysis between function-based
+// comparison and vtable-based comparison and choose one of the two
+// transformations.
+static cl::opt<bool> ICPEnableVTableCmp(
+ "icp-enable-vtable-cmp", cl::init(false), cl::Hidden,
+ cl::desc("If ThinLTO and WPD is enabled and this option is true, "
+ "indirect-call promotion pass will compare vtables rather than "
+ "functions for speculative devirtualization of virtual calls."
+ " If set to false, indirect-call promotion pass will always "
+ "compare functions."));
+
namespace {
+using VTableAddressPointOffsetValMap =
+ SmallDenseMap<const GlobalVariable *, SmallDenseMap<int, Constant *, 4>, 8>;
+
+// A struct to collect type information for a virtual call site.
+struct VirtualCallSiteInfo {
+ // The offset from the address point to virtual function in the vtable.
+ uint64_t FunctionOffset;
+ // The instruction that computes the address point of vtable.
+ Instruction *VPtr;
+ // The compatible type used in LLVM type intrinsics.
+ StringRef CompatibleTypeStr;
+};
+
+// The key is a virtual call, and value is its type information.
+using VirtualCallSiteTypeInfoMap =
+ SmallDenseMap<const CallBase *, VirtualCallSiteInfo, 8>;
+
+// Given the list of compatible type metadata for a vtable and one specified
+// type, returns the address point offset of the type if any.
+static std::optional<uint64_t>
+getCompatibleTypeOffset(const ArrayRef<MDNode *> &Types,
+ StringRef CompatibleType) {
+ if (Types.empty()) {
+ return std::nullopt;
+ }
+ std::optional<uint64_t> Offset;
+ // find the offset where type string is equal to the one in llvm.type.test
+ // intrinsic
+ for (MDNode *Type : Types) {
+ auto TypeIDMetadata = Type->getOperand(1).get();
+ if (auto *TypeId = dyn_cast<MDString>(TypeIDMetadata)) {
+ StringRef TypeStr = TypeId->getString();
+ if (TypeStr != CompatibleType) {
+ continue;
+ }
+ Offset = cast<ConstantInt>(
+ cast<ConstantAsMetadata>(Type->getOperand(0))->getValue())
+ ->getZExtValue();
+ break;
+ }
+ }
+ return Offset;
+}
+
// Promote indirect calls to conditional direct calls, keeping track of
// thresholds.
class IndirectCallPromoter {
private:
Function &F;
+ Module &M;
// Symtab that maps indirect call profile values to function names and
// defines.
@@ -117,6 +180,11 @@ class IndirectCallPromoter {
const bool SamplePGO;
+ // A map from a virtual call to its type information.
+ const VirtualCallSiteTypeInfoMap &VirtualCSInfo;
+
+ VTableAddressPointOffsetValMap &VTableAddressPointOffsetVal;
+
OptimizationRemarkEmitter &ORE;
// A struct that records the direct target and it's call count.
@@ -124,9 +192,17 @@ class IndirectCallPromoter {
Function *const TargetFunction;
const uint64_t Count;
+ uint64_t FunctionOffset;
+
+ SmallVector<std::pair<uint64_t, uint64_t>, 2> VTableGUIDAndCounts;
+
+ SmallVector<Constant *, 2> AddressPoints;
+
PromotionCandidate(Function *F, uint64_t C) : TargetFunction(F), Count(C) {}
};
+ using VTableGUIDCountsMap = SmallDenseMap<uint64_t, uint64_t, 4>;
+
// Check if the indirect-call call site should be promoted. Return the number
// of promotions. Inst is the candidate indirect call, ValueDataRef
// contains the array of value profile data for profiled targets,
@@ -134,7 +210,8 @@ class IndirectCallPromoter {
// NumCandidates is the number of candidate entries in ValueDataRef.
std::vector<PromotionCandidate> getPromotionCandidatesForCallSite(
const CallBase &CB, const ArrayRef<InstrProfValueData> &ValueDataRef,
- uint64_t TotalCount, uint32_t NumCandidates);
+ uint64_t TotalCount, uint32_t NumCandidates,
+ VTableGUIDCountsMap &VTableGUIDCounts);
// Promote a list of targets for one indirect-call callsite by comparing
// indirect callee with functions. Returns true if there are IR
@@ -144,10 +221,33 @@ class IndirectCallPromoter {
uint64_t TotalCount, ArrayRef<InstrProfValueData> ICallProfDataRef,
uint32_t NumCandidates);
+ bool tryToPromoteWithVTableCmp(
+ CallBase &CB, const std::vector<PromotionCandidate> &Candidates,
+ uint64_t TotalFuncCount, uint32_t NumCandidates,
+ MutableArrayRef<InstrProfValueData> ICallProfDataRef,
+ VTableGUIDCountsMap &VTableGUIDCounts);
+
+ void
+ tryGetVTableInfos(const CallBase &CB,
+ const SmallDenseMap<Function *, int, 4> &CalleeIndexMap,
+ VTableGUIDCountsMap &VTableGUIDCounts,
+ std::vector<PromotionCandidate> &Candidates);
+
+ Constant *getOrCreateVTableAddressPointVar(GlobalVariable *GV,
+ uint64_t AddressPointOffset);
+
+ bool isProfitableToCompareVTables(
+ const std::vector<PromotionCandidate> &Candidates, uint64_t TotalCount);
+
public:
- IndirectCallPromoter(Function &Func, InstrProfSymtab *Symtab, bool SamplePGO,
- OptimizationRemarkEmitter &ORE)
- : F(Func), Symtab(Symtab), SamplePGO(SamplePGO), ORE(ORE) {}
+ IndirectCallPromoter(
+ Function &Func, Module &M, InstrProfSymtab *Symtab, bool SamplePGO,
+ const VirtualCallSiteTypeInfoMap &VirtualCSInfo,
+ VTableAddressPointOffsetValMap &VTableAddressPointOffsetVal,
+ OptimizationRemarkEmitter &ORE)
+ : F(Func), M(M), Symtab(Symtab), SamplePGO(SamplePGO),
+ VirtualCSInfo(VirtualCSInfo),
+ VTableAddressPointOffsetVal(VTableAddressPointOffsetVal), ORE(ORE) {}
IndirectCallPromoter(const IndirectCallPromoter &) = delete;
IndirectCallPromoter &operator=(const IndirectCallPromoter &) = delete;
@@ -161,9 +261,12 @@ class IndirectCallPromoter {
std::vector<IndirectCallPromoter::PromotionCandidate>
IndirectCallPromoter::getPromotionCandidatesForCallSite(
const CallBase &CB, const ArrayRef<InstrProfValueData> &ValueDataRef,
- uint64_t TotalCount, uint32_t NumCandidates) {
+ uint64_t TotalCount, uint32_t NumCandidates,
+ VTableGUIDCountsMap &VTableGUIDCounts) {
std::vector<PromotionCandidate> Ret;
+ SmallDenseMap<Function *, int, 4> CalleeIndexMap;
+
LLVM_DEBUG(dbgs() << " \nWork on callsite #" << NumOfPGOICallsites << CB
<< " Num_targets: " << ValueDataRef.size()
<< " Num_candidates: " << NumCandidates << "\n");
@@ -237,30 +340,114 @@ IndirectCallPromoter::getPromotionCandidatesForCallSite(
break;
}
+ CalleeIndexMap[TargetFunction] = Ret.size();
Ret.push_back(PromotionCandidate(TargetFunction, Count));
+
TotalCount -= Count;
}
+
+ if (!ICPEnableVTableCmp)
+ return Ret;
+
+ tryGetVTableInfos(CB, CalleeIndexMap, VTableGUIDCounts, Ret);
+
+ return Ret;
+}
+
+Constant *IndirectCallPromoter::getOrCreateVTableAddressPointVar(
+ GlobalVariable *GV, uint64_t AddressPointOffset) {
+ Constant *Var = VTableAddressPointOffsetVal[GV][AddressPointOffset];
+ if (Var != nullptr)
+ return Var;
+ Constant *Ret = getVTableAddressPointOffset(GV, AddressPointOffset);
+ VTableAddressPointOffsetVal[GV][AddressPointOffset] = Ret;
return Ret;
}
+void IndirectCallPromoter::tryGetVTableInfos(
+ const CallBase &CB, const SmallDenseMap<Function *, int, 4> &CalleeIndexMap,
+ VTableGUIDCountsMap &GUIDCountsMap,
+ std::vector<PromotionCandidate> &Candidates) {
+ if (!ICPEnableVTableCmp)
+ return;
+
+ auto Iter = VirtualCSInfo.find(&CB);
+ if (Iter == VirtualCSInfo.end())
+ return;
+
+ auto &VirtualCallInfo = Iter->second;
+
+ uint32_t ActualNumValueData = 0;
+
+ uint64_t TotalVTableCount = 0;
+ auto VTableValueDataArray = getValueProfDataFromInst(
+ *VirtualCallInfo.VPtr, IPVK_VTableTarget, MaxNumVTableAnnotations,
+ ActualNumValueData, TotalVTableCount);
+
+ if (VTableValueDataArray.get() == nullptr)
+ return;
+
+ SmallVector<MDNode *, 2> Types; // type metadata associated with a vtable.
+ // Compute the functions and counts from by each vtable.
+ for (size_t j = 0; j < ActualNumValueData; j++) {
+ uint64_t VTableVal = VTableValueDataArray[j].Value;
+ GUIDCountsMap[VTableVal] = VTableValueDataArray[j].Count;
+ GlobalVariable *VTableVariable = Symtab->getGlobalVariable(VTableVal);
+ if (!VTableVariable) {
+ LLVM_DEBUG(dbgs() << "\tCannot find vtable definition for " << VTableVal
+ << "\n");
+ continue;
+ }
+
+ Types.clear();
+ VTableVariable->getMetadata(LLVMContext::MD_type, Types);
+ std::optional<uint64_t> MaybeAddressPointOffset =
+ getCompatibleTypeOffset(Types, VirtualCallInfo.CompatibleTypeStr);
+ if (!MaybeAddressPointOffset)
+ continue;
+
+ const uint64_t AddressPointOffset = *MaybeAddressPointOffset;
+
+ Function *Callee = nullptr;
+
+ std::tie(Callee, std::ignore) = getFunctionAtVTableOffset(
+ VTableVariable, AddressPointOffset + VirtualCallInfo.FunctionOffset,
+ *(F.getParent()));
+ if (!Callee)
+ continue;
+
+ auto CalleeIndexIter = CalleeIndexMap.find(Callee);
+ if (CalleeIndexIter == CalleeIndexMap.end())
+ continue;
+
+ auto &Candidate = Candidates[CalleeIndexIter->second];
+ Candidate.VTableGUIDAndCounts.push_back(
+ {VTableVal, VTableValueDataArray[j].Count});
+ Candidate.AddressPoints.push_back(
+ getOrCreateVTableAddressPointVar(VTableVariable, AddressPointOffset));
+ }
+}
+
+static MDNode *getBranchWeights(LLVMContext &Context, uint64_t IfCount,
+ uint64_t ElseCount) {
+ MDBuilder MDB(Context);
+ uint64_t Scale = calculateCountScale(std::max(IfCount, ElseCount));
+ return MDB.createBranchWeights(scaleBranchCount(IfCount, Scale),
+ scaleBranchCount(ElseCount, Scale));
+}
+
CallBase &llvm::pgo::promoteIndirectCall(CallBase &CB, Function *DirectCallee,
uint64_t Count, uint64_t TotalCount,
bool AttachProfToDirectCall,
OptimizationRemarkEmitter *ORE) {
-
- uint64_t ElseCount = TotalCount - Count;
- uint64_t MaxCount = (Count >= ElseCount ? Count : ElseCount);
- uint64_t Scale = calculateCountScale(MaxCount);
- MDBuilder MDB(CB.getContext());
- MDNode *BranchWeights = MDB.createBranchWeights(
- scaleBranchCount(Count, Scale), scaleBranchCount(ElseCount, Scale));
+ MDNode *BranchWeights =
+ getBranchWeights(CB.getContext(), Count, TotalCount - Count);
CallBase &NewInst =
promoteCallWithIfThenElse(CB, DirectCallee, BranchWeights);
- if (AttachProfToDirectCall) {
+ if (AttachProfToDirectCall)
setBranchWeights(NewInst, {static_cast<uint32_t>(Count)});
- }
using namespace ore;
@@ -304,6 +491,80 @@ bool IndirectCallPromoter::tryToPromoteWithFuncCmp(
return Changed;
}
+bool IndirectCallPromoter::tryToPromoteWithVTableCmp(
+ CallBase &CB, const std::vector<PromotionCandidate> &Candidates,
+ uint64_t TotalFuncCount, uint32_t NumCandidates,
+ MutableArrayRef<InstrProfValueData> ICallProfDataRef,
+ VTableGUIDCountsMap &VTableGUIDCounts) {
+ Instruction *VPtr = VirtualCSInfo.at(&CB).VPtr;
+
+ SmallVector<int, 4> PromotedFuncCount;
+ for (const auto &Candidate : Candidates) {
+ uint64_t IfCount = 0;
+ // FIXME: Skip vtables with cold count in the comparison.
+ for (auto &[GUID, Count] : Candidate.VTableGUIDAndCounts) {
+ IfCount += Count;
+ VTableGUIDCounts[GUID] -= Count;
+ }
+
+ promoteCallWithVTableCmp(
+ CB, VPtr, Candidate.TargetFunction, Candidate.AddressPoints,
+ getBranchWeights(CB.getContext(), IfCount, TotalFuncCount - IfCount));
+
+ PromotedFuncCount.push_back(IfCount);
+
+ TotalFuncCount -= IfCount;
+ NumOfPGOICallPromotion++;
+ }
+
+ if (PromotedFuncCount.empty())
+ return false;
+
+ // A comparator that sorts value profile data descendingly.
+ auto Cmp = [](const InstrProfValueData &LHS, const InstrProfValueData &RHS) {
+ return LHS.Count > RHS.Count;
+ };
+
+ CB.setMetadata(LLVMContext::MD_prof, nullptr);
+ // Update indirect call value profiles if total count of the call site is not
+ // zero.
+ if (TotalFuncCount != 0) {
+ for (size_t I = 0; I < PromotedFuncCount.size(); I++)
+ ICallProfDataRef[I].Count -= PromotedFuncCount[I];
+
+ llvm::sort(ICallProfDataRef.begin(), ICallProfDataRef.end(), Cmp);
+
+ // Locate the first <target, count> pair where the count is zero or less.
+ auto UB = llvm::upper_bound(
+ ICallProfDataRef, 0U,
+ [](uint64_t Count, const InstrProfValueData &ProfData) {
+ return ProfData.Count <= Count;
+ });
+
+ ArrayRef<InstrProfValueData> VDs(ICallProfDataRef.begin(), UB);
+ annotateValueSite(M, CB, VDs, TotalFuncCount, IPVK_IndirectCallTarget,
+ NumCandidates);
+ }
+
+ VPtr->setMetadata(LLVMContext::MD_prof, nullptr);
+ std::vector<InstrProfValueData> VTableValueProfiles;
+ uint64_t TotalVTableCount = 0;
+ for (auto [GUID, Count] : VTableGUIDCounts) {
+ if (Count == 0)
+ continue;
+
+ VTableValueProfiles.push_back({GUID, Count});
+ TotalVTableCount += Count;
+ }
+ llvm::sort(VTableValueProfiles, Cmp);
+
+ annotateValueSite(M, *VPtr, VTableValueProfiles, TotalVTableCount,
+ IPVK_VTableTarget, VTableValueProfiles.size());
+
+ // Update vtable profile metadata
+ return true;
+}
+
// Traverse all the indirect-call callsite and get the value profile
// annotation to perform indirect-call promotion.
bool IndirectCallPromoter::processFunction(ProfileSummaryInfo *PSI) {
@@ -317,14 +578,96 @@ bool IndirectCallPromoter::processFunction(ProfileSummaryInfo *PSI) {
if (!NumCandidates ||
(PSI && PSI->hasProfileSummary() && !PSI->isHotCount(TotalCount)))
continue;
+ VTableGUIDCountsMap VTableGUIDCounts;
auto PromotionCandidates = getPromotionCandidatesForCallSite(
- *CB, ICallProfDataRef, TotalCount, NumCandidates);
- Changed |= tryToPromoteWithFuncCmp(*CB, PromotionCandidates, TotalCount,
- ICallProfDataRef, NumCandidates);
+ *CB, ICallProfDataRef, TotalCount, NumCandidates, VTableGUIDCounts);
+
+ if (isProfitableToCompareVTables(PromotionCandidates, TotalCount))
+ Changed |= tryToPromoteWithVTableCmp(*CB, PromotionCandidates, TotalCount,
+ NumCandidates, ICallProfDataRef,
+ VTableGUIDCounts);
+ else
+ Changed |= tryToPromoteWithFuncCmp(*CB, PromotionCandidates, TotalCount,
+ ICallProfDataRef, NumCandidates);
}
return Changed;
}
+bool IndirectCallPromoter::isProfitableToCompareVTables(
+ const std::vector<PromotionCandidate> &Candidates, uint64_t TotalCount) {
+ if (!ICPEnableVTableCmp)
+ return false;
+
+ // FIXME: Implement cost-benefit analysis in a follow-up change.
+ return true;
+}
+
+static void
+computeVirtualCallSiteTypeInfoMap(Module &M, ModuleAnalysisManager &MAM,
+ VirtualCallSiteTypeInfoMap &VirtualCSInfo) {
+ auto &FAM = MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
+ auto LookupDomTree = [&FAM](Function &F) -> DominatorTree & {
+ return FAM.getResult<DominatorTreeAnalysis>(F);
+ };
+
+ // Right now only llvm.type.test is used to find out virtual call sites.
+ // With ThinLTO and whole-program-devirtualization, llvm.type.test and
+ // llvm.public.type.test are emitted, and llvm.public.type.test is either
+ // refined to llvm.type.test or dropped before indirect-call-promotion pass.
+ //
+ // FIXME: For fullLTO with VFE, `llvm.type.checked.load intrinsic` is emitted.
+ // Find out virtual calls by looking at users of llvm.type.checked.load in
+ // that case.
+ Function *TypeTestFunc =
+ M.getFunction(Intrinsic::getName(Intrinsic::type_test));
+
+ if (!TypeTestFunc || TypeTestFunc->use_empty())
+ return;
+
+ // Iterate all type.test calls and find all indirect calls.
+ for (Use &U : llvm::make_early_inc_range(TypeTestFunc->uses())) {
+ auto *CI = dyn_cast<CallInst>(U.getUser());
+ if (!CI)
+ continue;
+
+ auto *TypeMDVal = cast<MetadataAsValue>(CI->getArgOperand(1));
+ if (!TypeMDVal)
+ continue;
+
+ auto *CompatibleTypeId = dyn_cast<MDString>(TypeMDVal->getMetadata());
+ if (!CompatibleTypeId)
+ continue;
+
+ StringRef CompatibleTypeStr = CompatibleTypeId->getString();
+
+ // Find out all devirtualizable call sites given a llvm.type.test intrinsic
+ // call.
+ SmallVector<DevirtCallSite, 1> DevirtCalls;
+ SmallVector<CallInst *, 1> Assumes;
+ auto &DT = LookupDomTree(*CI->getFunction());
+ findDevirtualizableCallsForTypeTest(DevirtCalls, Assumes, CI, DT);
+
+ // type-id, offset from the address point
+ // combined with type metadata to compute function offset
+ for (auto &DevirtCall : DevirtCalls) {
+ CallBase &CB = DevirtCall.CB;
+ // This is the offset from the address point offset to the virtual
+ // function.
+ uint64_t Offset = DevirtCall.Offset;
+
+ // Given an indirect call, try find the instruction which loads a pointer
+ // to virtual table.
+ Instruction *VTablePtr =
+ PGOIndirectCallVisitor::tryGetVTableInstruction(&CB);
+
+ if (!VTablePtr)
+ continue;
+
+ VirtualCSInfo[&CB] = {Offset, VTablePtr, CompatibleTypeStr};
+ }
+ }
+}
+
// A wrapper function that does the actual work.
static bool promoteIndirectCalls(Module &M, ProfileSummaryInfo *PSI, bool InLTO,
bool SamplePGO, ModuleAnalysisManager &MAM) {
@@ -337,6 +680,17 @@ static bool promoteIndirectCalls(Module &M, ProfileSummaryInfo *PSI, bool InLTO,
return false;
}
bool Changed = false;
+ VirtualCallSiteTypeInfoMap VirtualCSInfo;
+
+ computeVirtualCallSiteTypeInfoMap(M, MAM, VirtualCSInfo);
+
+ // This map records states across functions in an LLVM IR module.
+ // IndirectCallPromoter processes one
+ // function at a time and updates this map with new entries the first time
+ // the entry is needed in the module; the subsequent functions could re-use
+ // map entries inserted when processing prior functions.
+ VTableAddressPointOffsetValMap VTableAddressPointOffsetVal;
+
for (auto &F : M) {
if (F.isDeclaration() || F.hasOptNone())
continue;
@@ -345,7 +699,8 @@ static bool promoteIndirectCalls(Module &M, ProfileSummaryInfo *PSI, bool InLTO,
MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
- IndirectCallPromoter CallPromoter(F, &Symtab, SamplePGO, ORE);
+ IndirectCallPromoter CallPromoter(F, M, &Symtab, SamplePGO, VirtualCSInfo,
+ VTableAddressPointOffsetVal, ORE);
bool FuncChanged = CallPromoter.processFunction(PSI);
if (ICPDUMPAFTER && FuncChanged) {
LLVM_DEBUG(dbgs() << "\n== IR Dump After =="; F.print(dbgs()));
diff --git a/llvm/test/Transforms/PGOProfile/icp_vtable_cmp.ll b/llvm/test/Transforms/PGOProfile/icp_vtable_cmp.ll
new file mode 100644
index 00000000000000..75eda4b66be025
--- /dev/null
+++ b/llvm/test/Transforms/PGOProfile/icp_vtable_cmp.ll
@@ -0,0 +1,206 @@
+
+; RUN: opt < %s -passes=pgo-icall-prom -S | FileCheck %s --check-prefix=ICALL-FUNC
+
+; Invoke instcombine after pgo-icall-prom so the address calculation instructions for virtual calls get sink into the basic block for indirect fallback.
+; RUN: opt < %s -passes='pgo-icall-prom,instcombine' -icp-enable-vtable-cmp -S | FileCheck %s --check-prefix=ICALL-VTABLE
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+ at _ZTV4Base = dso_local constant { [4 x ptr] } { [4 x ptr] [ptr null, ptr null, ptr @_ZN4Base5func1Ei, ptr @_ZN4Base5func2Ev] }, !type !0
+ at _ZTV8Derived1 = dso_local constant { [4 x ptr] } { [4 x ptr] [ptr null, ptr null, ptr @_ZN8Derived15func1Ei, ptr @_ZN4Base5func2Ev] }, !type !0, !type !1
+ at _ZTV8Derived2 = dso_local constant { [4 x ptr] } { [4 x ptr] [ptr null, ptr null, ptr @_ZN8Derived25func1Ei, ptr @_ZN4Base5func2Ev] }, !type !0, !type !2
+ at _ZTV8Derived3 = dso_local constant { [4 x ptr] } { [4 x ptr] [ptr null, ptr null, ptr @_ZN8Derived35func1Ei, ptr @_ZN4Base5func2Ev] }, !type !0, !type !3
+
+; Test the IR transformation from function-based indirect-call promotion and vtable-based indirect-call promotion.
+
+; The tested function has one function candidate which comes from one vtable.
+define i32 @test_one_function_one_vtable(ptr %d) {
+; ICALL-FUNC-LABEL: define i32 @test_one_function_one_vtable(
+; ICALL-FUNC-SAME: ptr [[D:%.*]]) {
+; ICALL-FUNC-NEXT: entry:
+; ICALL-FUNC-NEXT: [[VTABLE:%.*]] = load ptr, ptr [[D]], align 8, !prof [[PROF4:![0-9]+]]
+; ICALL-FUNC-NEXT: [[TMP0:%.*]] = tail call i1 @llvm.type.test(ptr [[VTABLE]], metadata !"_ZTS4Base")
+; ICALL-FUNC-NEXT: tail call void @llvm.assume(i1 [[TMP0]])
+; ICALL-FUNC-NEXT: [[VFN:%.*]] = getelementptr inbounds ptr, ptr [[VTABLE]], i64 1
+; ICALL-FUNC-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VFN]], align 8
+; ICALL-FUNC-NEXT: [[TMP2:%.*]] = icmp eq ptr [[TMP1]], @_ZN4Base5func2Ev
+; ICALL-FUNC-NEXT: br i1 [[TMP2]], label [[IF_TRUE_DIRECT_TARG:%.*]], label [[IF_FALSE_ORIG_INDIRECT:%.*]], !prof [[PROF5:![0-9]+]]
+; ICALL-FUNC: if.true.direct_targ:
+; ICALL-FUNC-NEXT: [[TMP3:%.*]] = tail call i32 @_ZN4Base5func2Ev(ptr [[D]])
+; ICALL-FUNC-NEXT: br label [[IF_END_ICP:%.*]]
+; ICALL-FUNC: if.false.orig_indirect:
+; ICALL-FUNC-NEXT: [[CALL:%.*]] = tail call i32 [[TMP1]](ptr [[D]])
+; ICALL-FUNC-NEXT: br label [[IF_END_ICP]]
+; ICALL-FUNC: if.end.icp:
+; ICALL-FUNC-NEXT: [[TMP4:%.*]] = phi i32 [ [[CALL]], [[IF_FALSE_ORIG_INDIRECT]] ], [ [[TMP3]], [[IF_TRUE_DIRECT_TARG]] ]
+; ICALL-FUNC-NEXT: ret i32 [[TMP4]]
+;
+; ICALL-VTABLE-LABEL: define i32 @test_one_function_one_vtable(
+; ICALL-VTABLE-SAME: ptr [[D:%.*]]) {
+; ICALL-VTABLE-NEXT: entry:
+; ICALL-VTABLE-NEXT: [[VTABLE:%.*]] = load ptr, ptr [[D]], align 8
+; ICALL-VTABLE-NEXT: [[TMP0:%.*]] = tail call i1 @llvm.type.test(ptr [[VTABLE]], metadata !"_ZTS4Base")
+; ICALL-VTABLE-NEXT: tail call void @llvm.assume(i1 [[TMP0]])
+; ICALL-VTABLE-NEXT: [[TMP1:%.*]] = icmp eq ptr [[VTABLE]], getelementptr inbounds ({ [4 x ptr] }, ptr @_ZTV8Derived2, i64 0, i32 0, i64 2)
+; ICALL-VTABLE-NEXT: br i1 [[TMP1]], label [[IF_TRUE_DIRECT_TARG:%.*]], label [[IF_FALSE_ORIG_INDIRECT:%.*]], !prof [[PROF4:![0-9]+]]
+; ICALL-VTABLE: if.true.direct_targ:
+; ICALL-VTABLE-NEXT: [[TMP2:%.*]] = tail call i32 @_ZN4Base5func2Ev(ptr nonnull [[D]])
+; ICALL-VTABLE-NEXT: br label [[IF_END_ICP:%.*]]
+; ICALL-VTABLE: if.false.orig_indirect:
+; ICALL-VTABLE-NEXT: [[VFN:%.*]] = getelementptr inbounds i8, ptr [[VTABLE]], i64 8
+; ICALL-VTABLE-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VFN]], align 8
+; ICALL-VTABLE-NEXT: [[CALL:%.*]] = tail call i32 [[TMP3]](ptr nonnull [[D]])
+; ICALL-VTABLE-NEXT: br label [[IF_END_ICP]]
+; ICALL-VTABLE: if.end.icp:
+; ICALL-VTABLE-NEXT: [[TMP4:%.*]] = phi i32 [ [[CALL]], [[IF_FALSE_ORIG_INDIRECT]] ], [ [[TMP2]], [[IF_TRUE_DIRECT_TARG]] ]
+; ICALL-VTABLE-NEXT: ret i32 [[TMP4]]
+;
+entry:
+ %vtable = load ptr, ptr %d, !prof !4
+ %0 = tail call i1 @llvm.type.test(ptr %vtable, metadata !"_ZTS4Base")
+ tail call void @llvm.assume(i1 %0)
+ %vfn = getelementptr inbounds ptr, ptr %vtable, i64 1
+ %1 = load ptr, ptr %vfn
+ %call = tail call i32 %1(ptr %d), !prof !5
+ ret i32 %call
+}
+
+; The tested function has one function candidate which comes from two vtables.
+define i32 @test_one_function_two_vtables(ptr %d) {
+; ICALL-FUNC-LABEL: define i32 @test_one_function_two_vtables(
+; ICALL-FUNC-SAME: ptr [[D:%.*]]) {
+; ICALL-FUNC-NEXT: entry:
+; ICALL-FUNC-NEXT: [[VTABLE:%.*]] = load ptr, ptr [[D]], align 8, !prof [[PROF6:![0-9]+]]
+; ICALL-FUNC-NEXT: [[TMP0:%.*]] = tail call i1 @llvm.type.test(ptr [[VTABLE]], metadata !"_ZTS4Base")
+; ICALL-FUNC-NEXT: tail call void @llvm.assume(i1 [[TMP0]])
+; ICALL-FUNC-NEXT: [[VFN:%.*]] = getelementptr inbounds ptr, ptr [[VTABLE]], i64 1
+; ICALL-FUNC-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VFN]], align 8
+; ICALL-FUNC-NEXT: [[TMP2:%.*]] = icmp eq ptr [[TMP1]], @_ZN4Base5func2Ev
+; ICALL-FUNC-NEXT: br i1 [[TMP2]], label [[IF_TRUE_DIRECT_TARG:%.*]], label [[IF_FALSE_ORIG_INDIRECT:%.*]], !prof [[PROF5]]
+; ICALL-FUNC: if.true.direct_targ:
+; ICALL-FUNC-NEXT: [[TMP3:%.*]] = tail call i32 @_ZN4Base5func2Ev(ptr [[D]])
+; ICALL-FUNC-NEXT: br label [[IF_END_ICP:%.*]]
+; ICALL-FUNC: if.false.orig_indirect:
+; ICALL-FUNC-NEXT: [[CALL:%.*]] = tail call i32 [[TMP1]](ptr [[D]])
+; ICALL-FUNC-NEXT: br label [[IF_END_ICP]]
+; ICALL-FUNC: if.end.icp:
+; ICALL-FUNC-NEXT: [[TMP4:%.*]] = phi i32 [ [[CALL]], [[IF_FALSE_ORIG_INDIRECT]] ], [ [[TMP3]], [[IF_TRUE_DIRECT_TARG]] ]
+; ICALL-FUNC-NEXT: ret i32 [[TMP4]]
+;
+; ICALL-VTABLE-LABEL: define i32 @test_one_function_two_vtables(
+; ICALL-VTABLE-SAME: ptr [[D:%.*]]) {
+; ICALL-VTABLE-NEXT: entry:
+; ICALL-VTABLE-NEXT: [[VTABLE:%.*]] = load ptr, ptr [[D]], align 8
+; ICALL-VTABLE-NEXT: [[TMP0:%.*]] = tail call i1 @llvm.type.test(ptr [[VTABLE]], metadata !"_ZTS4Base")
+; ICALL-VTABLE-NEXT: tail call void @llvm.assume(i1 [[TMP0]])
+; ICALL-VTABLE-NEXT: [[TMP1:%.*]] = icmp eq ptr [[VTABLE]], getelementptr inbounds ({ [4 x ptr] }, ptr @_ZTV8Derived1, i64 0, i32 0, i64 2)
+; ICALL-VTABLE-NEXT: [[TMP2:%.*]] = icmp eq ptr [[VTABLE]], getelementptr inbounds ({ [4 x ptr] }, ptr @_ZTV8Derived2, i64 0, i32 0, i64 2)
+; ICALL-VTABLE-NEXT: [[ICMP_OR:%.*]] = or i1 [[TMP1]], [[TMP2]]
+; ICALL-VTABLE-NEXT: br i1 [[ICMP_OR]], label [[IF_TRUE_DIRECT_TARG:%.*]], label [[IF_FALSE_ORIG_INDIRECT:%.*]], !prof [[PROF4]]
+; ICALL-VTABLE: if.true.direct_targ:
+; ICALL-VTABLE-NEXT: [[TMP3:%.*]] = tail call i32 @_ZN4Base5func2Ev(ptr nonnull [[D]])
+; ICALL-VTABLE-NEXT: br label [[IF_END_ICP:%.*]]
+; ICALL-VTABLE: if.false.orig_indirect:
+; ICALL-VTABLE-NEXT: [[VFN:%.*]] = getelementptr inbounds i8, ptr [[VTABLE]], i64 8
+; ICALL-VTABLE-NEXT: [[TMP4:%.*]] = load ptr, ptr [[VFN]], align 8
+; ICALL-VTABLE-NEXT: [[CALL:%.*]] = tail call i32 [[TMP4]](ptr nonnull [[D]])
+; ICALL-VTABLE-NEXT: br label [[IF_END_ICP]]
+; ICALL-VTABLE: if.end.icp:
+; ICALL-VTABLE-NEXT: [[TMP5:%.*]] = phi i32 [ [[CALL]], [[IF_FALSE_ORIG_INDIRECT]] ], [ [[TMP3]], [[IF_TRUE_DIRECT_TARG]] ]
+; ICALL-VTABLE-NEXT: ret i32 [[TMP5]]
+;
+entry:
+ %vtable = load ptr, ptr %d, !prof !6
+ %0 = tail call i1 @llvm.type.test(ptr %vtable, metadata !"_ZTS4Base")
+ tail call void @llvm.assume(i1 %0)
+ %vfn = getelementptr inbounds ptr, ptr %vtable, i64 1
+ %1 = load ptr, ptr %vfn
+ %call = tail call i32 %1(ptr %d), !prof !5
+ ret i32 %call
+}
+
+; The tested function has one function candidate which comes from three vtables.
+define i32 @test_one_function_three_vtables(ptr %d) {
+; ICALL-FUNC-LABEL: define i32 @test_one_function_three_vtables(
+; ICALL-FUNC-SAME: ptr [[D:%.*]]) {
+; ICALL-FUNC-NEXT: entry:
+; ICALL-FUNC-NEXT: [[VTABLE:%.*]] = load ptr, ptr [[D]], align 8, !prof [[PROF7:![0-9]+]]
+; ICALL-FUNC-NEXT: [[TMP0:%.*]] = tail call i1 @llvm.type.test(ptr [[VTABLE]], metadata !"_ZTS4Base")
+; ICALL-FUNC-NEXT: tail call void @llvm.assume(i1 [[TMP0]])
+; ICALL-FUNC-NEXT: [[VFN:%.*]] = getelementptr inbounds ptr, ptr [[VTABLE]], i64 1
+; ICALL-FUNC-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VFN]], align 8
+; ICALL-FUNC-NEXT: [[TMP2:%.*]] = icmp eq ptr [[TMP1]], @_ZN4Base5func2Ev
+; ICALL-FUNC-NEXT: br i1 [[TMP2]], label [[IF_TRUE_DIRECT_TARG:%.*]], label [[IF_FALSE_ORIG_INDIRECT:%.*]], !prof [[PROF5]]
+; ICALL-FUNC: if.true.direct_targ:
+; ICALL-FUNC-NEXT: [[TMP3:%.*]] = tail call i32 @_ZN4Base5func2Ev(ptr [[D]])
+; ICALL-FUNC-NEXT: br label [[IF_END_ICP:%.*]]
+; ICALL-FUNC: if.false.orig_indirect:
+; ICALL-FUNC-NEXT: [[CALL:%.*]] = tail call i32 [[TMP1]](ptr [[D]])
+; ICALL-FUNC-NEXT: br label [[IF_END_ICP]]
+; ICALL-FUNC: if.end.icp:
+; ICALL-FUNC-NEXT: [[TMP4:%.*]] = phi i32 [ [[CALL]], [[IF_FALSE_ORIG_INDIRECT]] ], [ [[TMP3]], [[IF_TRUE_DIRECT_TARG]] ]
+; ICALL-FUNC-NEXT: ret i32 [[TMP4]]
+;
+; ICALL-VTABLE-LABEL: define i32 @test_one_function_three_vtables(
+; ICALL-VTABLE-SAME: ptr [[D:%.*]]) {
+; ICALL-VTABLE-NEXT: entry:
+; ICALL-VTABLE-NEXT: [[VTABLE:%.*]] = load ptr, ptr [[D]], align 8
+; ICALL-VTABLE-NEXT: [[TMP0:%.*]] = tail call i1 @llvm.type.test(ptr [[VTABLE]], metadata !"_ZTS4Base")
+; ICALL-VTABLE-NEXT: tail call void @llvm.assume(i1 [[TMP0]])
+; ICALL-VTABLE-NEXT: [[TMP1:%.*]] = icmp eq ptr [[VTABLE]], getelementptr inbounds ({ [4 x ptr] }, ptr @_ZTV8Derived1, i64 0, i32 0, i64 2)
+; ICALL-VTABLE-NEXT: [[TMP2:%.*]] = icmp eq ptr [[VTABLE]], getelementptr inbounds ({ [4 x ptr] }, ptr @_ZTV8Derived2, i64 0, i32 0, i64 2)
+; ICALL-VTABLE-NEXT: [[TMP3:%.*]] = icmp eq ptr [[VTABLE]], getelementptr inbounds ({ [4 x ptr] }, ptr @_ZTV4Base, i64 0, i32 0, i64 2)
+; ICALL-VTABLE-NEXT: [[ICMP_OR:%.*]] = or i1 [[TMP1]], [[TMP2]]
+; ICALL-VTABLE-NEXT: [[ICMP_OR1:%.*]] = or i1 [[ICMP_OR]], [[TMP3]]
+; ICALL-VTABLE-NEXT: br i1 [[ICMP_OR1]], label [[IF_TRUE_DIRECT_TARG:%.*]], label [[IF_FALSE_ORIG_INDIRECT:%.*]], !prof [[PROF4]]
+; ICALL-VTABLE: if.true.direct_targ:
+; ICALL-VTABLE-NEXT: [[TMP4:%.*]] = tail call i32 @_ZN4Base5func2Ev(ptr nonnull [[D]])
+; ICALL-VTABLE-NEXT: br label [[IF_END_ICP:%.*]]
+; ICALL-VTABLE: if.false.orig_indirect:
+; ICALL-VTABLE-NEXT: [[VFN:%.*]] = getelementptr inbounds i8, ptr [[VTABLE]], i64 8
+; ICALL-VTABLE-NEXT: [[TMP5:%.*]] = load ptr, ptr [[VFN]], align 8
+; ICALL-VTABLE-NEXT: [[CALL:%.*]] = tail call i32 [[TMP5]](ptr nonnull [[D]])
+; ICALL-VTABLE-NEXT: br label [[IF_END_ICP]]
+; ICALL-VTABLE: if.end.icp:
+; ICALL-VTABLE-NEXT: [[TMP6:%.*]] = phi i32 [ [[CALL]], [[IF_FALSE_ORIG_INDIRECT]] ], [ [[TMP4]], [[IF_TRUE_DIRECT_TARG]] ]
+; ICALL-VTABLE-NEXT: ret i32 [[TMP6]]
+;
+entry:
+ %vtable = load ptr, ptr %d, !prof !7
+ %0 = tail call i1 @llvm.type.test(ptr %vtable, metadata !"_ZTS4Base")
+ tail call void @llvm.assume(i1 %0)
+ %vfn = getelementptr inbounds ptr, ptr %vtable, i64 1
+ %1 = load ptr, ptr %vfn
+ %call = tail call i32 %1(ptr %d), !prof !5
+ ret i32 %call
+}
+
+
+declare i1 @llvm.type.test(ptr, metadata)
+declare void @llvm.assume(i1 noundef)
+declare i32 @_ZN4Base5func1Ei(ptr, i32)
+declare i32 @_ZN8Derived15func1Ei(ptr, i32)
+declare i32 @_ZN8Derived25func1Ei(ptr, i32)
+declare i32 @_ZN8Derived35func1Ei(ptr, i32)
+
+define i32 @_ZN4Base5func2Ev(ptr %this) {
+entry:
+ ret i32 0
+}
+
+!0 = !{i64 16, !"_ZTS4Base"}
+!1 = !{i64 16, !"_ZTS8Derived1"}
+!2 = !{i64 16, !"_ZTS8Derived2"}
+!3 = !{i64 16, !"_ZTS8Derived3"}
+!4 = !{!"VP", i32 2, i64 1600, i64 5035968517245772950, i64 1600}
+!5 = !{!"VP", i32 0, i64 1600, i64 -3104805163612457913, i64 1600}
+!6 = !{!"VP", i32 2, i64 1600, i64 -9064381665493407289, i64 1000, i64 5035968517245772950, i64 600}
+!7 = !{!"VP", i32 2, i64 1600, i64 -9064381665493407289, i64 600, i64 5035968517245772950, i64 550, i64 1960855528937986108, i64 450}
+
+; ICALL-FUNC: [[PROF4]] = !{!"VP", i32 2, i64 1600, i64 5035968517245772950, i64 1600}
+; ICALL-FUNC: [[PROF5]] = !{!"branch_weights", i32 1600, i32 0}
+; ICALL-FUNC: [[PROF6]] = !{!"VP", i32 2, i64 1600, i64 -9064381665493407289, i64 1000, i64 5035968517245772950, i64 600}
+; ICALL-FUNC: [[PROF7]] = !{!"VP", i32 2, i64 1600, i64 -9064381665493407289, i64 600, i64 5035968517245772950, i64 550, i64 1960855528937986108, i64 450}
+
+; ICALL-VTABLE: [[PROF4]] = !{!"branch_weights", i32 1600, i32 0}
diff --git a/llvm/test/Transforms/PGOProfile/icp_vtable_invoke.ll b/llvm/test/Transforms/PGOProfile/icp_vtable_invoke.ll
new file mode 100644
index 00000000000000..a2924420fd2a06
--- /dev/null
+++ b/llvm/test/Transforms/PGOProfile/icp_vtable_invoke.ll
@@ -0,0 +1,201 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 4
+; RUN: opt < %s -passes=pgo-icall-prom -S | FileCheck %s --check-prefix=ICALL-FUNC
+; RUN: opt < %s -passes='pgo-icall-prom,instcombine' -icp-enable-vtable-cmp -S | FileCheck %s --check-prefix=ICALL-VTABLE
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+%class.Error = type { i8 }
+
+ at _ZTI5Error = dso_local constant { ptr, ptr } { ptr getelementptr inbounds (ptr, ptr null, i64 2), ptr null }
+ at _ZTV4Base = dso_local constant { [3 x ptr] } { [3 x ptr] [ptr null, ptr null, ptr @_ZN4Base10get_ticketEv] }, !type !0, !type !1
+ at _ZTV7Derived = dso_local constant { [3 x ptr] } { [3 x ptr] [ptr null, ptr null, ptr @_ZN7Derived10get_ticketEv] }, !type !0, !type !1, !type !2, !type !3
+
+ at .str = private unnamed_addr constant [15 x i8] c"out of tickets\00"
+
+define i32 @_Z4testP4Base(ptr %b) personality ptr @__gxx_personality_v0 {
+; ICALL-FUNC-LABEL: define i32 @_Z4testP4Base(
+; ICALL-FUNC-SAME: ptr [[B:%.*]]) personality ptr @__gxx_personality_v0 {
+; ICALL-FUNC-NEXT: entry:
+; ICALL-FUNC-NEXT: [[E:%.*]] = alloca [[CLASS_ERROR:%.*]], align 8
+; ICALL-FUNC-NEXT: [[VTABLE:%.*]] = load ptr, ptr [[B]], align 8, !prof [[PROF4:![0-9]+]]
+; ICALL-FUNC-NEXT: [[TMP0:%.*]] = tail call i1 @llvm.type.test(ptr [[VTABLE]], metadata !"_ZTS4Base")
+; ICALL-FUNC-NEXT: tail call void @llvm.assume(i1 [[TMP0]])
+; ICALL-FUNC-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VTABLE]], align 8
+; ICALL-FUNC-NEXT: [[TMP2:%.*]] = icmp eq ptr [[TMP1]], @_ZN7Derived10get_ticketEv
+; ICALL-FUNC-NEXT: br i1 [[TMP2]], label [[IF_TRUE_DIRECT_TARG:%.*]], label [[IF_FALSE_ORIG_INDIRECT:%.*]], !prof [[PROF5:![0-9]+]]
+; ICALL-FUNC: if.true.direct_targ:
+; ICALL-FUNC-NEXT: [[TMP3:%.*]] = invoke i32 @_ZN7Derived10get_ticketEv(ptr [[B]])
+; ICALL-FUNC-NEXT: to label [[IF_END_ICP:%.*]] unwind label [[LPAD:%.*]]
+; ICALL-FUNC: if.false.orig_indirect:
+; ICALL-FUNC-NEXT: [[TMP4:%.*]] = icmp eq ptr [[TMP1]], @_ZN4Base10get_ticketEv
+; ICALL-FUNC-NEXT: br i1 [[TMP4]], label [[IF_TRUE_DIRECT_TARG1:%.*]], label [[IF_FALSE_ORIG_INDIRECT2:%.*]], !prof [[PROF6:![0-9]+]]
+; ICALL-FUNC: if.true.direct_targ1:
+; ICALL-FUNC-NEXT: [[TMP5:%.*]] = invoke i32 @_ZN4Base10get_ticketEv(ptr [[B]])
+; ICALL-FUNC-NEXT: to label [[IF_END_ICP3:%.*]] unwind label [[LPAD]]
+; ICALL-FUNC: if.false.orig_indirect2:
+; ICALL-FUNC-NEXT: [[CALL:%.*]] = invoke i32 [[TMP1]](ptr [[B]])
+; ICALL-FUNC-NEXT: to label [[IF_END_ICP3]] unwind label [[LPAD]]
+; ICALL-FUNC: if.end.icp3:
+; ICALL-FUNC-NEXT: [[TMP6:%.*]] = phi i32 [ [[CALL]], [[IF_FALSE_ORIG_INDIRECT2]] ], [ [[TMP5]], [[IF_TRUE_DIRECT_TARG1]] ]
+; ICALL-FUNC-NEXT: br label [[IF_END_ICP]]
+; ICALL-FUNC: if.end.icp:
+; ICALL-FUNC-NEXT: [[TMP7:%.*]] = phi i32 [ [[TMP6]], [[IF_END_ICP3]] ], [ [[TMP3]], [[IF_TRUE_DIRECT_TARG]] ]
+; ICALL-FUNC-NEXT: br label %try.cont
+; ICALL-FUNC: lpad:
+
+;
+; ICALL-VTABLE-LABEL: define i32 @_Z4testP4Base(
+; ICALL-VTABLE-SAME: ptr [[B:%.*]]) personality ptr @__gxx_personality_v0 {
+; ICALL-VTABLE-NEXT: entry:
+; ICALL-VTABLE-NEXT: [[E:%.*]] = alloca [[CLASS_ERROR:%.*]], align 8
+; ICALL-VTABLE-NEXT: [[VTABLE:%.*]] = load ptr, ptr [[B]], align 8
+; ICALL-VTABLE-NEXT: [[TMP0:%.*]] = tail call i1 @llvm.type.test(ptr [[VTABLE]], metadata !"_ZTS4Base")
+; ICALL-VTABLE-NEXT: tail call void @llvm.assume(i1 [[TMP0]])
+; ICALL-VTABLE-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VTABLE]], align 8
+; ICALL-VTABLE-NEXT: [[TMP2:%.*]] = icmp eq ptr [[VTABLE]], getelementptr inbounds ({ [3 x ptr] }, ptr @_ZTV7Derived, i64 0, i32 0, i64 2)
+; ICALL-VTABLE-NEXT: br i1 [[TMP2]], label [[IF_TRUE_DIRECT_TARG:%.*]], label [[IF_FALSE_ORIG_INDIRECT:%.*]], !prof [[PROF4:![0-9]+]]
+; ICALL-VTABLE: if.true.direct_targ:
+; ICALL-VTABLE-NEXT: [[TMP3:%.*]] = invoke i32 @_ZN7Derived10get_ticketEv(ptr nonnull [[B]])
+; ICALL-VTABLE-NEXT: to label [[IF_END_ICP:%.*]] unwind label [[LPAD:%.*]]
+; ICALL-VTABLE: if.false.orig_indirect:
+; ICALL-VTABLE-NEXT: [[TMP4:%.*]] = icmp eq ptr [[VTABLE]], getelementptr inbounds ({ [3 x ptr] }, ptr @_ZTV4Base, i64 0, i32 0, i64 2)
+; ICALL-VTABLE-NEXT: br i1 [[TMP4]], label [[IF_TRUE_DIRECT_TARG1:%.*]], label [[IF_FALSE_ORIG_INDIRECT2:%.*]], !prof [[PROF5:![0-9]+]]
+; ICALL-VTABLE: if.true.direct_targ1:
+; ICALL-VTABLE-NEXT: [[TMP5:%.*]] = invoke i32 @_ZN4Base10get_ticketEv(ptr nonnull [[B]])
+; ICALL-VTABLE-NEXT: to label [[IF_END_ICP3:%.*]] unwind label [[LPAD]]
+; ICALL-VTABLE: if.false.orig_indirect2:
+; ICALL-VTABLE-NEXT: [[CALL:%.*]] = invoke i32 [[TMP1]](ptr nonnull [[B]])
+; ICALL-VTABLE-NEXT: to label [[IF_END_ICP3]] unwind label [[LPAD]]
+; ICALL-VTABLE: if.end.icp3:
+; ICALL-VTABLE-NEXT: [[TMP6:%.*]] = phi i32 [ [[CALL]], [[IF_FALSE_ORIG_INDIRECT2]] ], [ [[TMP5]], [[IF_TRUE_DIRECT_TARG1]] ]
+; ICALL-VTABLE-NEXT: br label [[IF_END_ICP]]
+; ICALL-VTABLE: if.end.icp:
+; ICALL-VTABLE-NEXT: [[TMP7:%.*]] = phi i32 [ [[TMP6]], [[IF_END_ICP3]] ], [ [[TMP3]], [[IF_TRUE_DIRECT_TARG]] ]
+; ICALL-VTABLE-NEXT: br label %try.cont
+; ICALL-VTABLE: lpad:
+;
+entry:
+ %e = alloca %class.Error
+ %vtable = load ptr, ptr %b, !prof !4
+ %0 = tail call i1 @llvm.type.test(ptr %vtable, metadata !"_ZTS4Base")
+ tail call void @llvm.assume(i1 %0)
+ %1 = load ptr, ptr %vtable
+ %call = invoke i32 %1(ptr %b)
+ to label %try.cont unwind label %lpad, !prof !5
+
+lpad:
+ %2 = landingpad { ptr, i32 }
+ cleanup
+ catch ptr @_ZTI5Error
+ %3 = extractvalue { ptr, i32 } %2, 1
+ %4 = tail call i32 @llvm.eh.typeid.for(ptr nonnull @_ZTI5Error)
+ %matches = icmp eq i32 %3, %4
+ br i1 %matches, label %catch, label %ehcleanup
+
+catch:
+ %5 = extractvalue { ptr, i32 } %2, 0
+
+ %call3 = invoke i32 @_ZN5Error10error_codeEv(ptr nonnull align 1 dereferenceable(1) %e)
+ to label %invoke.cont2 unwind label %lpad1
+
+invoke.cont2:
+ call void @__cxa_end_catch()
+ br label %try.cont
+
+try.cont:
+ %ret.0 = phi i32 [ %call3, %invoke.cont2 ], [ %call, %entry ]
+ ret i32 %ret.0
+
+lpad1:
+ %6 = landingpad { ptr, i32 }
+ cleanup
+ invoke void @__cxa_end_catch()
+ to label %invoke.cont4 unwind label %terminate.lpad
+
+invoke.cont4:
+ br label %ehcleanup
+
+ehcleanup:
+ %lpad.val7.merged = phi { ptr, i32 } [ %6, %invoke.cont4 ], [ %2, %lpad ]
+ resume { ptr, i32 } %lpad.val7.merged
+
+terminate.lpad:
+ %7 = landingpad { ptr, i32 }
+ catch ptr null
+ %8 = extractvalue { ptr, i32 } %7, 0
+ unreachable
+}
+
+declare i1 @llvm.type.test(ptr, metadata)
+declare void @llvm.assume(i1 noundef)
+declare i32 @__gxx_personality_v0(...)
+declare i32 @llvm.eh.typeid.for(ptr)
+
+declare i32 @_ZN5Error10error_codeEv(ptr nonnull align 1 dereferenceable(1))
+
+declare void @__cxa_end_catch()
+
+define i32 @_ZN4Base10get_ticketEv(ptr %this) align 2 personality ptr @__gxx_personality_v0 {
+entry:
+ %call = tail call i32 @_Z13get_ticket_idv()
+ %cmp.not = icmp eq i32 %call, -1
+ br i1 %cmp.not, label %if.end, label %if.then
+
+if.then:
+ ret i32 %call
+
+if.end:
+ %exception = tail call ptr @__cxa_allocate_exception(i64 1)
+ invoke void @_ZN5ErrorC1EPKci(ptr nonnull align 1 dereferenceable(1) %exception, ptr nonnull @.str, i32 1)
+ to label %invoke.cont unwind label %lpad
+
+invoke.cont:
+ unreachable
+
+lpad:
+ %0 = landingpad { ptr, i32 }
+ cleanup
+ resume { ptr, i32 } %0
+}
+
+define i32 @_ZN7Derived10get_ticketEv(ptr %this) align 2 personality ptr @__gxx_personality_v0 {
+entry:
+ %call = tail call i32 @_Z13get_ticket_idv()
+ %cmp.not = icmp eq i32 %call, -1
+ br i1 %cmp.not, label %if.end, label %if.then
+
+if.then:
+ ret i32 %call
+
+if.end:
+ %exception = tail call ptr @__cxa_allocate_exception(i64 1)
+ invoke void @_ZN5ErrorC1EPKci(ptr nonnull align 1 dereferenceable(1) %exception, ptr nonnull @.str, i32 2)
+ to label %invoke.cont unwind label %lpad
+
+invoke.cont:
+ unreachable
+
+lpad:
+ %0 = landingpad { ptr, i32 }
+ cleanup
+ resume { ptr, i32 } %0
+}
+
+declare i32 @_Z13get_ticket_idv()
+declare ptr @__cxa_allocate_exception(i64)
+declare void @_ZN5ErrorC1EPKci(ptr nonnull align 1 dereferenceable(1), ptr, i32)
+
+!0 = !{i64 16, !"_ZTS4Base"}
+!1 = !{i64 16, !"_ZTSM4BaseFivE.virtual"}
+!2 = !{i64 16, !"_ZTS7Derived"}
+!3 = !{i64 16, !"_ZTSM7DerivedFivE.virtual"}
+!4 = !{!"VP", i32 2, i64 1600, i64 13870436605473471591, i64 900, i64 1960855528937986108, i64 700}
+!5 = !{!"VP", i32 0, i64 1600, i64 14811317294552474744, i64 900, i64 9261744921105590125, i64 700}
+
+; ICALL-FUNC: [[PROF4]] = !{!"VP", i32 2, i64 1600, i64 -4576307468236080025, i64 900, i64 1960855528937986108, i64 700}
+; ICALL-FUNC: [[PROF5]] = !{!"branch_weights", i32 900, i32 700}
+; ICALL-FUNC: [[PROF6]] = !{!"branch_weights", i32 700, i32 0}
+
+; ICALL-VTABLE: [[PROF4]] = !{!"branch_weights", i32 900, i32 700}
+; ICALL-VTABLE: [[PROF5]] = !{!"branch_weights", i32 700, i32 0}
diff --git a/llvm/test/Transforms/PGOProfile/icp_vtable_tail_call.ll b/llvm/test/Transforms/PGOProfile/icp_vtable_tail_call.ll
new file mode 100644
index 00000000000000..94ed588c5458d8
--- /dev/null
+++ b/llvm/test/Transforms/PGOProfile/icp_vtable_tail_call.ll
@@ -0,0 +1,92 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 4
+; RUN: opt < %s -passes=pgo-icall-prom -pass-remarks=pgo-icall-prom -S 2>&1 | FileCheck %s --check-prefix=ICALL-FUNC
+; RUN: opt < %s -passes='pgo-icall-prom,instcombine' -pass-remarks=pgo-icall-prom -icp-enable-vtable-cmp -S 2>&1 | FileCheck %s --check-prefix=ICALL-VTABLE
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+ at _ZTV7Derived = constant { [3 x ptr] } { [3 x ptr] [ptr null, ptr null, ptr @_ZN7Derived5func1Eii] }, align 8, !type !0, !type !1, !type !2, !type !3
+ at _ZTV4Base = constant { [3 x ptr] } { [3 x ptr] [ptr null, ptr null, ptr @_ZN4Base5func1Eii] }, align 8, !type !0, !type !1
+
+define i32 @test_tail_call(ptr %ptr, i32 %a, i32 %b) {
+; ICALL-FUNC-LABEL: define i32 @test_tail_call(
+; ICALL-FUNC-SAME: ptr [[PTR:%.*]], i32 [[A:%.*]], i32 [[B:%.*]]) {
+; ICALL-FUNC-NEXT: entry:
+; ICALL-FUNC-NEXT: [[VTABLE:%.*]] = load ptr, ptr [[PTR]], align 8, !prof [[PROF4:![0-9]+]]
+; ICALL-FUNC-NEXT: [[TMP0:%.*]] = tail call i1 @llvm.type.test(ptr [[VTABLE]], metadata !"_ZTS4Base")
+; ICALL-FUNC-NEXT: tail call void @llvm.assume(i1 [[TMP0]])
+; ICALL-FUNC-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VTABLE]], align 8
+; ICALL-FUNC-NEXT: [[TMP2:%.*]] = icmp eq ptr [[TMP1]], @_ZN7Derived5func1Eii
+; ICALL-FUNC-NEXT: br i1 [[TMP2]], label [[IF_TRUE_DIRECT_TARG:%.*]], label [[TMP4:%.*]], !prof [[PROF5:![0-9]+]]
+; ICALL-FUNC: if.true.direct_targ:
+; ICALL-FUNC-NEXT: [[TMP3:%.*]] = musttail call i32 @_ZN7Derived5func1Eii(ptr [[PTR]], i32 [[A]], i32 [[B]])
+; ICALL-FUNC-NEXT: ret i32 [[TMP3]]
+; ICALL-FUNC: 4:
+; ICALL-FUNC-NEXT: [[TMP5:%.*]] = icmp eq ptr [[TMP1]], @_ZN4Base5func1Eii
+; ICALL-FUNC-NEXT: br i1 [[TMP5]], label [[IF_TRUE_DIRECT_TARG1:%.*]], label [[TMP7:%.*]], !prof [[PROF6:![0-9]+]]
+; ICALL-FUNC: if.true.direct_targ1:
+; ICALL-FUNC-NEXT: [[TMP6:%.*]] = musttail call i32 @_ZN4Base5func1Eii(ptr [[PTR]], i32 [[A]], i32 [[B]])
+; ICALL-FUNC-NEXT: ret i32 [[TMP6]]
+; ICALL-FUNC: 7:
+; ICALL-FUNC-NEXT: [[CALL:%.*]] = musttail call i32 [[TMP1]](ptr [[PTR]], i32 [[A]], i32 [[B]])
+; ICALL-FUNC-NEXT: ret i32 [[CALL]]
+;
+; ICALL-VTABLE-LABEL: define i32 @test_tail_call(
+; ICALL-VTABLE-SAME: ptr [[PTR:%.*]], i32 [[A:%.*]], i32 [[B:%.*]]) {
+; ICALL-VTABLE-NEXT: entry:
+; ICALL-VTABLE-NEXT: [[VTABLE:%.*]] = load ptr, ptr [[PTR]], align 8
+; ICALL-VTABLE-NEXT: [[TMP0:%.*]] = tail call i1 @llvm.type.test(ptr [[VTABLE]], metadata !"_ZTS4Base")
+; ICALL-VTABLE-NEXT: tail call void @llvm.assume(i1 [[TMP0]])
+; ICALL-VTABLE-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VTABLE]], align 8
+; ICALL-VTABLE-NEXT: [[TMP2:%.*]] = icmp eq ptr [[VTABLE]], getelementptr inbounds ({ [3 x ptr] }, ptr @_ZTV7Derived, i64 0, i32 0, i64 2)
+; ICALL-VTABLE-NEXT: br i1 [[TMP2]], label [[IF_TRUE_DIRECT_TARG:%.*]], label [[TMP4:%.*]], !prof [[PROF4:![0-9]+]]
+; ICALL-VTABLE: if.true.direct_targ:
+; ICALL-VTABLE-NEXT: [[TMP3:%.*]] = musttail call i32 @_ZN7Derived5func1Eii(ptr nonnull [[PTR]], i32 [[A]], i32 [[B]])
+; ICALL-VTABLE-NEXT: ret i32 [[TMP3]]
+; ICALL-VTABLE: 4:
+; ICALL-VTABLE-NEXT: [[TMP5:%.*]] = icmp eq ptr [[VTABLE]], getelementptr inbounds ({ [3 x ptr] }, ptr @_ZTV4Base, i64 0, i32 0, i64 2)
+; ICALL-VTABLE-NEXT: br i1 [[TMP5]], label [[IF_TRUE_DIRECT_TARG1:%.*]], label [[TMP7:%.*]], !prof [[PROF5:![0-9]+]]
+; ICALL-VTABLE: if.true.direct_targ1:
+; ICALL-VTABLE-NEXT: [[TMP6:%.*]] = musttail call i32 @_ZN4Base5func1Eii(ptr nonnull [[PTR]], i32 [[A]], i32 [[B]])
+; ICALL-VTABLE-NEXT: ret i32 [[TMP6]]
+; ICALL-VTABLE: 7:
+; ICALL-VTABLE-NEXT: [[CALL:%.*]] = musttail call i32 [[TMP1]](ptr nonnull [[PTR]], i32 [[A]], i32 [[B]])
+; ICALL-VTABLE-NEXT: ret i32 [[CALL]]
+;
+entry:
+ %vtable = load ptr, ptr %ptr, !prof !4
+ %0 = tail call i1 @llvm.type.test(ptr %vtable, metadata !"_ZTS4Base")
+ tail call void @llvm.assume(i1 %0)
+ %1 = load ptr, ptr %vtable
+ %call = musttail call i32 %1(ptr %ptr, i32 %a, i32 %b), !prof !5
+ ret i32 %call
+}
+
+declare i1 @llvm.type.test(ptr, metadata)
+declare void @llvm.assume(i1)
+define i32 @_ZN7Derived5func1Eii(ptr %this, i32 %a, i32 %b) {
+entry:
+ %sub = sub nsw i32 %a, %b
+ ret i32 %sub
+}
+
+define i32 @_ZN4Base5func1Eii(ptr %this, i32 %a, i32 %b) {
+entry:
+ %add = add nsw i32 %b, %a
+ ret i32 %add
+}
+
+
+!0 = !{i64 16, !"_ZTS4Base"}
+!1 = !{i64 16, !"_ZTSM4BaseFiiiE.virtual"}
+!2 = !{i64 16, !"_ZTS7Derived"}
+!3 = !{i64 16, !"_ZTSM7DerivedFiiiE.virtual"}
+!4 = !{!"VP", i32 2, i64 1600, i64 13870436605473471591, i64 900, i64 1960855528937986108, i64 700}
+!5 = !{!"VP", i32 0, i64 1600, i64 7889036118036845314, i64 900, i64 10495086226207060333, i64 700}
+
+; ICALL-FUNC: [[PROF4]] = !{!"VP", i32 2, i64 1600, i64 -4576307468236080025, i64 900, i64 1960855528937986108, i64 700}
+; ICALL-FUNC: [[PROF5]] = !{!"branch_weights", i32 900, i32 700}
+; ICALL-FUNC: [[PROF6]] = !{!"branch_weights", i32 700, i32 0}
+
+; ICALL-VTABLE: [[PROF4]] = !{!"branch_weights", i32 900, i32 700}
+; ICALL-VTABLE: [[PROF5]] = !{!"branch_weights", i32 700, i32 0}
More information about the llvm-commits
mailing list