[compiler-rt] [llvm] [TypeProf][IndirectCallPromotion]Implement vtable-based transformation (PR #81442)

Mingming Liu via llvm-commits llvm-commits at lists.llvm.org
Sun Feb 11 22:30:34 PST 2024


https://github.com/minglotus-6 created https://github.com/llvm/llvm-project/pull/81442

* Cost-benefit analysis will be added in a subsequent patch.
* The parent patch is https://github.com/llvm/llvm-project/pull/81378

>From 48adcf1a142de6abeeb16596c5087fe83e8f422b Mon Sep 17 00:00:00 2001
From: mingmingl <mingmingl at google.com>
Date: Wed, 7 Feb 2024 15:12:36 -0800
Subject: [PATCH 1/5] =?UTF-8?q?[=F0=9D=98=80=F0=9D=97=BD=F0=9D=97=BF]=20ch?=
 =?UTF-8?q?anges=20to=20main=20this=20commit=20is=20based=20on?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Created using spr 1.3.4

[skip ci]
---
 compiler-rt/include/profile/InstrProfData.inc |  58 +-
 compiler-rt/lib/profile/InstrProfiling.h      |  35 +-
 .../lib/profile/InstrProfilingBuffer.c        |  58 +-
 .../lib/profile/InstrProfilingInternal.h      |   4 +-
 compiler-rt/lib/profile/InstrProfilingMerge.c |  25 +-
 .../lib/profile/InstrProfilingPlatformLinux.c |  20 +
 .../lib/profile/InstrProfilingWriter.c        |  37 +-
 .../llvm/Analysis/IndirectCallVisitor.h       |  70 +-
 llvm/include/llvm/ProfileData/InstrProf.h     | 170 ++++-
 .../llvm/ProfileData/InstrProfData.inc        |  40 +-
 .../llvm/ProfileData/InstrProfReader.h        |  20 +
 .../llvm/ProfileData/InstrProfWriter.h        |   4 +
 .../IndirectCallPromotionAnalysis.cpp         |   4 +
 llvm/lib/Analysis/ModuleSummaryAnalysis.cpp   |  20 +
 llvm/lib/Bitcode/Writer/BitcodeWriter.cpp     |  13 +-
 llvm/lib/ProfileData/InstrProf.cpp            | 172 ++++-
 llvm/lib/ProfileData/InstrProfReader.cpp      |  72 ++-
 llvm/lib/ProfileData/InstrProfWriter.cpp      |  59 +-
 .../Instrumentation/IndirectCallPromotion.cpp |  45 +-
 .../Instrumentation/InstrProfiling.cpp        | 173 +++++
 .../Instrumentation/PGOInstrumentation.cpp    |   7 +
 .../Instrumentation/ValueProfilePlugins.inc   |  36 +-
 .../thinlto-func-summary-vtableref-pgo.ll     |  74 +++
 .../InstrProfiling/coverage.ll                |   8 +-
 .../thinlto_indirect_call_promotion.profraw   | Bin 528 -> 544 bytes
 .../PGOProfile/Inputs/vtable_prof.profraw     | Bin 0 -> 656 bytes
 .../Transforms/PGOProfile/comdat_internal.ll  |   4 +-
 .../Transforms/PGOProfile/vtable_profile.ll   |  98 +++
 .../llvm-profdata/Inputs/c-general.profraw    | Bin 2016 -> 2032 bytes
 .../llvm-profdata/Inputs/compressed.profraw   | Bin 1968 -> 1984 bytes
 .../Inputs/update_vtable_value_prof_inputs.sh | 102 +++
 .../Inputs/vtable-value-prof-basic.profraw    | Bin 0 -> 960 bytes
 .../Inputs/vtable-value-prof.proftext         |  73 +++
 .../llvm-profdata/binary-ids-padding.test     |   6 +-
 .../llvm-profdata/large-binary-id-size.test   |   4 +-
 ...alformed-not-space-for-another-header.test |   6 +-
 .../malformed-num-counters-zero.test          |   6 +-
 .../malformed-ptr-to-counter-array.test       |   6 +-
 .../misaligned-binary-ids-size.test           |   4 +-
 .../mismatched-raw-profile-header.test        |   2 +
 .../tools/llvm-profdata/raw-32-bits-be.test   |  11 +-
 .../tools/llvm-profdata/raw-32-bits-le.test   |  10 +-
 .../tools/llvm-profdata/raw-64-bits-be.test   |  10 +-
 .../tools/llvm-profdata/raw-64-bits-le.test   |  10 +-
 .../tools/llvm-profdata/raw-two-profiles.test |   8 +-
 .../vtable-value-prof-basic.test              | 124 ++++
 .../llvm-profdata/vtable-value-prof.proftext  |  16 +
 llvm/tools/llvm-profdata/llvm-profdata.cpp    |  30 +-
 llvm/unittests/ProfileData/InstrProfTest.cpp  | 604 ++++++++++++++----
 49 files changed, 2065 insertions(+), 293 deletions(-)
 create mode 100644 llvm/test/Bitcode/thinlto-func-summary-vtableref-pgo.ll
 create mode 100644 llvm/test/Transforms/PGOProfile/Inputs/vtable_prof.profraw
 create mode 100644 llvm/test/Transforms/PGOProfile/vtable_profile.ll
 create mode 100755 llvm/test/tools/llvm-profdata/Inputs/update_vtable_value_prof_inputs.sh
 create mode 100644 llvm/test/tools/llvm-profdata/Inputs/vtable-value-prof-basic.profraw
 create mode 100644 llvm/test/tools/llvm-profdata/Inputs/vtable-value-prof.proftext
 create mode 100644 llvm/test/tools/llvm-profdata/vtable-value-prof-basic.test
 create mode 100644 llvm/test/tools/llvm-profdata/vtable-value-prof.proftext

diff --git a/compiler-rt/include/profile/InstrProfData.inc b/compiler-rt/include/profile/InstrProfData.inc
index 25df899b3f3619..f0bc2d960ce688 100644
--- a/compiler-rt/include/profile/InstrProfData.inc
+++ b/compiler-rt/include/profile/InstrProfData.inc
@@ -94,6 +94,26 @@ INSTR_PROF_DATA(const uint32_t, llvm::Type::getInt32Ty(Ctx), NumBitmapBytes, \
 #undef INSTR_PROF_DATA
 /* INSTR_PROF_DATA end. */
 
+/* For a virtual table object, record the name hash to associate profiled
+ * addresses with global variables, and record {starting address, size in bytes}
+ * to map the profiled virtual table (which usually have an offset from the
+ * starting address) back to a virtual table object. */
+#ifndef INSTR_PROF_VTABLE_DATA
+#define INSTR_PROF_VTABLE_DATA(Type, LLVMType, Name, Initializer)
+#else
+#define INSTR_PROF_VTABLE_DATA_DEFINED
+#endif
+INSTR_PROF_VTABLE_DATA(
+    const uint64_t, llvm::Type::getInt64Ty(Ctx), VTableNameHash,
+    ConstantInt::get(llvm::Type::getInt64Ty(Ctx),
+                     IndexedInstrProf::ComputeHash(PGOVTableName)))
+INSTR_PROF_VTABLE_DATA(const IntPtrT, llvm::Type::getInt8PtrTy(Ctx),
+                       VTablePointer, VTableAddr)
+INSTR_PROF_VTABLE_DATA(const uint32_t, llvm::Type::getInt32Ty(Ctx), VTableSize,
+                       ConstantInt::get(llvm::Type::getInt32Ty(Ctx),
+                                        VTableSizeVal))
+#undef INSTR_PROF_VTABLE_DATA
+/* INSTR_PROF_VTABLE_DATA end. */
 
 /* This is an internal data structure used by value profiler. It
  * is defined here to allow serialization code sharing by LLVM
@@ -145,6 +165,8 @@ INSTR_PROF_RAW_HEADER(uint64_t, CountersDelta,
 INSTR_PROF_RAW_HEADER(uint64_t, BitmapDelta,
                       (uintptr_t)BitmapBegin - (uintptr_t)DataBegin)
 INSTR_PROF_RAW_HEADER(uint64_t, NamesDelta, (uintptr_t)NamesBegin)
+INSTR_PROF_RAW_HEADER(uint64_t, VNamesSize, VNamesSize)
+INSTR_PROF_RAW_HEADER(uint64_t, NumVTables, NumVTables)
 INSTR_PROF_RAW_HEADER(uint64_t, ValueKindLast, IPVK_Last)
 #undef INSTR_PROF_RAW_HEADER
 /* INSTR_PROF_RAW_HEADER  end */
@@ -186,13 +208,28 @@ VALUE_PROF_FUNC_PARAM(uint32_t, CounterIndex, Type::getInt32Ty(Ctx))
 VALUE_PROF_KIND(IPVK_IndirectCallTarget, 0, "indirect call target")
 /* For memory intrinsic functions size profiling. */
 VALUE_PROF_KIND(IPVK_MemOPSize, 1, "memory intrinsic functions size")
+/* For virtual table address profiling, the addresses of the virtual table
+ * (i.e., the address contained in objects pointing to a virtual table) are
+ * profiled. Note this may not be the address of the per C++ class virtual table
+ *  object (i.e., there is an offset).
+ *
+ * The profiled addresses are stored in raw profile, together with the following
+ * two types of information.
+ * 1. The (beginning and ending) addresses of per C++ class virtual table objects.
+ * 2. The (compressed) virtual table object names.
+ * RawInstrProfReader converts profiled virtual table addresses to virtual table
+ *  objects' MD5 hash.
+ */
+VALUE_PROF_KIND(IPVK_VTableTarget, 2, "The address of the compatible vtable (i.e., "
+                                      "there is an offset from this address to per C++ "
+                                      "class virtual table global variable.)")
 /* These two kinds must be the last to be
  * declared. This is to make sure the string
  * array created with the template can be
  * indexed with the kind value.
  */
 VALUE_PROF_KIND(IPVK_First, IPVK_IndirectCallTarget, "first")
-VALUE_PROF_KIND(IPVK_Last, IPVK_MemOPSize, "last")
+VALUE_PROF_KIND(IPVK_Last, IPVK_VTableTarget, "last")
 
 #undef VALUE_PROF_KIND
 /* VALUE_PROF_KIND end */
@@ -267,9 +304,9 @@ COVMAP_HEADER(uint32_t, Int32Ty, Version, \
 #undef COVMAP_HEADER
 /* COVMAP_HEADER end.  */
 
-
 #ifdef INSTR_PROF_SECT_ENTRY
 #define INSTR_PROF_DATA_DEFINED
+
 INSTR_PROF_SECT_ENTRY(IPSK_data, \
                       INSTR_PROF_QUOTE(INSTR_PROF_DATA_COMMON), \
                       INSTR_PROF_DATA_COFF, "__DATA,")
@@ -282,12 +319,18 @@ INSTR_PROF_SECT_ENTRY(IPSK_bitmap, \
 INSTR_PROF_SECT_ENTRY(IPSK_name, \
                       INSTR_PROF_QUOTE(INSTR_PROF_NAME_COMMON), \
                       INSTR_PROF_NAME_COFF, "__DATA,")
+INSTR_PROF_SECT_ENTRY(IPSK_vname, \
+                      INSTR_PROF_QUOTE(INSTR_PROF_VNAME_COMMON), \
+                      INSTR_PROF_VNAME_COFF, "__DATA,")
 INSTR_PROF_SECT_ENTRY(IPSK_vals, \
                       INSTR_PROF_QUOTE(INSTR_PROF_VALS_COMMON), \
                       INSTR_PROF_VALS_COFF, "__DATA,")
 INSTR_PROF_SECT_ENTRY(IPSK_vnodes, \
                       INSTR_PROF_QUOTE(INSTR_PROF_VNODES_COMMON), \
                       INSTR_PROF_VNODES_COFF, "__DATA,")
+INSTR_PROF_SECT_ENTRY(IPSK_vtab, \
+                      INSTR_PROF_QUOTE(INSTR_PROF_VTAB_COMMON), \
+                      INSTR_PROF_VTAB_COFF, "__DATA,")
 INSTR_PROF_SECT_ENTRY(IPSK_covmap, \
                       INSTR_PROF_QUOTE(INSTR_PROF_COVMAP_COMMON), \
                       INSTR_PROF_COVMAP_COFF, "__LLVM_COV,")
@@ -307,7 +350,6 @@ INSTR_PROF_SECT_ENTRY(IPSK_covname, \
 #undef INSTR_PROF_SECT_ENTRY
 #endif
 
-
 #ifdef INSTR_PROF_VALUE_PROF_DATA
 #define INSTR_PROF_DATA_DEFINED
 
@@ -479,7 +521,6 @@ getValueProfRecordHeaderSize(uint32_t NumValueSites);
 #undef INSTR_PROF_VALUE_PROF_DATA
 #endif  /* INSTR_PROF_VALUE_PROF_DATA */
 
-
 #ifdef INSTR_PROF_COMMON_API_IMPL
 #define INSTR_PROF_DATA_DEFINED
 #ifdef __cplusplus
@@ -663,9 +704,9 @@ serializeValueProfDataFrom(ValueProfRecordClosure *Closure,
         (uint64_t)'f' << 16 | (uint64_t)'R' << 8 | (uint64_t)129
 
 /* Raw profile format version (start from 1). */
-#define INSTR_PROF_RAW_VERSION 9
+#define INSTR_PROF_RAW_VERSION 10
 /* Indexed profile format version (start from 1). */
-#define INSTR_PROF_INDEX_VERSION 11
+#define INSTR_PROF_INDEX_VERSION 12
 /* Coverage mapping format version (start from 0). */
 #define INSTR_PROF_COVMAP_VERSION 6
 
@@ -703,10 +744,12 @@ serializeValueProfDataFrom(ValueProfRecordClosure *Closure,
    than WIN32 */
 #define INSTR_PROF_DATA_COMMON __llvm_prf_data
 #define INSTR_PROF_NAME_COMMON __llvm_prf_names
+#define INSTR_PROF_VNAME_COMMON __llvm_prf_vtabnames
 #define INSTR_PROF_CNTS_COMMON __llvm_prf_cnts
 #define INSTR_PROF_BITS_COMMON __llvm_prf_bits
 #define INSTR_PROF_VALS_COMMON __llvm_prf_vals
 #define INSTR_PROF_VNODES_COMMON __llvm_prf_vnds
+#define INSTR_PROF_VTAB_COMMON __llvm_prf_vtab
 #define INSTR_PROF_COVMAP_COMMON __llvm_covmap
 #define INSTR_PROF_COVFUN_COMMON __llvm_covfun
 #define INSTR_PROF_COVDATA_COMMON __llvm_covdata
@@ -717,10 +760,12 @@ serializeValueProfDataFrom(ValueProfRecordClosure *Closure,
  */
 #define INSTR_PROF_DATA_COFF ".lprfd$M"
 #define INSTR_PROF_NAME_COFF ".lprfn$M"
+#define INSTR_PROF_VNAME_COFF ".lprfn$M"
 #define INSTR_PROF_CNTS_COFF ".lprfc$M"
 #define INSTR_PROF_BITS_COFF ".lprfb$M"
 #define INSTR_PROF_VALS_COFF ".lprfv$M"
 #define INSTR_PROF_VNODES_COFF ".lprfnd$M"
+#define INSTR_PROF_VTAB_COFF ".lprfvt$M"
 #define INSTR_PROF_COVMAP_COFF ".lcovmap$M"
 #define INSTR_PROF_COVFUN_COFF ".lcovfun$M"
 /* Since cov data and cov names sections are not allocated, we don't need to
@@ -938,3 +983,4 @@ InstrProfIsSingleValRange(uint64_t Value) {
 }
 
 #endif /* INSTR_PROF_VALUE_PROF_MEMOP_API */
+
diff --git a/compiler-rt/lib/profile/InstrProfiling.h b/compiler-rt/lib/profile/InstrProfiling.h
index 01239083369187..9e6306ace61f2c 100644
--- a/compiler-rt/lib/profile/InstrProfiling.h
+++ b/compiler-rt/lib/profile/InstrProfiling.h
@@ -49,6 +49,12 @@ typedef struct ValueProfNode {
 #include "profile/InstrProfData.inc"
 } ValueProfNode;
 
+typedef void *IntPtrT;
+typedef struct VTableProfData {
+#define INSTR_PROF_VTABLE_DATA(Type, LLVMType, Name, Initializer) Type Name;
+#include "profile/InstrProfData.inc"
+} VTableProfData;
+
 /*!
  * \brief Return 1 if profile counters are continuously synced to the raw
  * profile via an mmap(). This is in contrast to the default mode, in which
@@ -103,12 +109,16 @@ const __llvm_profile_data *__llvm_profile_begin_data(void);
 const __llvm_profile_data *__llvm_profile_end_data(void);
 const char *__llvm_profile_begin_names(void);
 const char *__llvm_profile_end_names(void);
+const char *__llvm_profile_begin_vtabnames(void);
+const char *__llvm_profile_end_vtabnames(void);
 char *__llvm_profile_begin_counters(void);
 char *__llvm_profile_end_counters(void);
 char *__llvm_profile_begin_bitmap(void);
 char *__llvm_profile_end_bitmap(void);
 ValueProfNode *__llvm_profile_begin_vnodes();
 ValueProfNode *__llvm_profile_end_vnodes();
+VTableProfData *__llvm_profile_begin_vtables();
+VTableProfData *__llvm_profile_end_vtables();
 uint32_t *__llvm_profile_begin_orderfile();
 
 /*!
@@ -252,20 +262,31 @@ uint64_t __llvm_profile_get_num_bitmap_bytes(const char *Begin,
 /*! \brief Get the size of the profile name section in bytes. */
 uint64_t __llvm_profile_get_name_size(const char *Begin, const char *End);
 
-/* ! \brief Given the sizes of the data and counter information, return the
- * number of padding bytes before and after the counters, and after the names,
- * in the raw profile.
+/*! \brief Get the number of virtual table profile data entries */
+uint64_t __llvm_profile_get_num_vtable(const VTableProfData *Begin,
+                                       const VTableProfData *End);
+
+/*! \brief Get the size of virtual table profile data in bytes. */
+uint64_t __llvm_profile_get_vtable_section_size(const VTableProfData *Begin,
+                                                const VTableProfData *End);
+
+/* ! \brief Given the sizes of the data and counter information, computes the
+ * number of padding bytes before and after the counter section, as well as the
+ * number of padding bytes after other setions in the raw profile.
+ * Returns -1 upon errors and 0 upon success. Output parameters should be used
+ * iff return value is 0.
  *
  * Note: When mmap() mode is disabled, no padding bytes before/after counters
  * are needed. However, in mmap() mode, the counter section in the raw profile
  * must be page-aligned: this API computes the number of padding bytes
  * needed to achieve that.
  */
-void __llvm_profile_get_padding_sizes_for_counters(
+int __llvm_profile_get_padding_sizes_for_counters(
     uint64_t DataSize, uint64_t CountersSize, uint64_t NumBitmapBytes,
-    uint64_t NamesSize, uint64_t *PaddingBytesBeforeCounters,
-    uint64_t *PaddingBytesAfterCounters, uint64_t *PaddingBytesAfterBitmap,
-    uint64_t *PaddingBytesAfterNames);
+    uint64_t NamesSize, uint64_t VTableSize, uint64_t VNameSize,
+    uint64_t *PaddingBytesBeforeCounters, uint64_t *PaddingBytesAfterCounters,
+    uint64_t *PaddingBytesAfterBitmap, uint64_t *PaddingBytesAfterNames,
+    uint64_t *PaddingBytesAfterVTable, uint64_t *PaddingBytesAfterVNames);
 
 /*!
  * \brief Set the flag that profile data has been dumped to the file.
diff --git a/compiler-rt/lib/profile/InstrProfilingBuffer.c b/compiler-rt/lib/profile/InstrProfilingBuffer.c
index af52804b2b532c..f31dc7d4e2111a 100644
--- a/compiler-rt/lib/profile/InstrProfilingBuffer.c
+++ b/compiler-rt/lib/profile/InstrProfilingBuffer.c
@@ -70,6 +70,18 @@ uint64_t __llvm_profile_get_data_size(const __llvm_profile_data *Begin,
                                       const __llvm_profile_data *End) {
   return __llvm_profile_get_num_data(Begin, End) * sizeof(__llvm_profile_data);
 }
+COMPILER_RT_VISIBILITY
+uint64_t __llvm_profile_get_num_vtable(const VTableProfData *Begin,
+                                       const VTableProfData *End) {
+  intptr_t EndI = (intptr_t)End, BeginI = (intptr_t)Begin;
+  return (EndI + sizeof(VTableProfData) - 1 - BeginI) / sizeof(VTableProfData);
+}
+
+COMPILER_RT_VISIBILITY
+uint64_t __llvm_profile_get_vtable_section_size(const VTableProfData *Begin,
+                                                const VTableProfData *End) {
+  return __llvm_profile_get_num_vtable(Begin, End) * sizeof(VTableProfData);
+}
 
 COMPILER_RT_VISIBILITY size_t __llvm_profile_counter_entry_size(void) {
   if (__llvm_profile_get_version() & VARIANT_MASK_BYTE_COVERAGE)
@@ -119,11 +131,13 @@ static int needsCounterPadding(void) {
 }
 
 COMPILER_RT_VISIBILITY
-void __llvm_profile_get_padding_sizes_for_counters(
+int __llvm_profile_get_padding_sizes_for_counters(
     uint64_t DataSize, uint64_t CountersSize, uint64_t NumBitmapBytes,
-    uint64_t NamesSize, uint64_t *PaddingBytesBeforeCounters,
-    uint64_t *PaddingBytesAfterCounters, uint64_t *PaddingBytesAfterBitmapBytes,
-    uint64_t *PaddingBytesAfterNames) {
+    uint64_t NamesSize, uint64_t VTableSize, uint64_t VNameSize,
+    uint64_t *PaddingBytesBeforeCounters, uint64_t *PaddingBytesAfterCounters,
+    uint64_t *PaddingBytesAfterBitmapBytes, uint64_t *PaddingBytesAfterNames,
+    uint64_t *PaddingBytesAfterVTable, uint64_t *PaddingBytesAfterVName) {
+  // Counter padding is needed only if continuous mode is enabled.
   if (!needsCounterPadding()) {
     *PaddingBytesBeforeCounters = 0;
     *PaddingBytesAfterCounters =
@@ -131,9 +145,19 @@ void __llvm_profile_get_padding_sizes_for_counters(
     *PaddingBytesAfterBitmapBytes =
         __llvm_profile_get_num_padding_bytes(NumBitmapBytes);
     *PaddingBytesAfterNames = __llvm_profile_get_num_padding_bytes(NamesSize);
-    return;
+    if (PaddingBytesAfterVTable != NULL)
+      *PaddingBytesAfterVTable =
+          __llvm_profile_get_num_padding_bytes(VTableSize);
+    if (PaddingBytesAfterVName != NULL)
+      *PaddingBytesAfterVName = __llvm_profile_get_num_padding_bytes(VNameSize);
+    return 0;
   }
 
+  // Value profiling not supported in continuous mode at profile-write time.
+  // Return -1 to alert the incompatibility.
+  if (VTableSize != 0 || VNameSize != 0)
+    return -1;
+
   // In continuous mode, the file offsets for headers and for the start of
   // counter sections need to be page-aligned.
   *PaddingBytesBeforeCounters =
@@ -142,6 +166,13 @@ void __llvm_profile_get_padding_sizes_for_counters(
   *PaddingBytesAfterBitmapBytes =
       calculateBytesNeededToPageAlign(NumBitmapBytes);
   *PaddingBytesAfterNames = calculateBytesNeededToPageAlign(NamesSize);
+  // Set these two variables to zero to avoid uninitialized variables
+  // even if VTableSize and VNameSize are known to be zero.
+  if (PaddingBytesAfterVTable != NULL)
+    *PaddingBytesAfterVTable = 0;
+  if (PaddingBytesAfterVName != NULL)
+    *PaddingBytesAfterVName = 0;
+  return 0;
 }
 
 COMPILER_RT_VISIBILITY
@@ -162,9 +193,11 @@ uint64_t __llvm_profile_get_size_for_buffer_internal(
   uint64_t PaddingBytesBeforeCounters, PaddingBytesAfterCounters,
       PaddingBytesAfterNames, PaddingBytesAfterBitmapBytes;
   __llvm_profile_get_padding_sizes_for_counters(
-      DataSize, CountersSize, NumBitmapBytes, NamesSize,
-      &PaddingBytesBeforeCounters, &PaddingBytesAfterCounters,
-      &PaddingBytesAfterBitmapBytes, &PaddingBytesAfterNames);
+      DataSize, CountersSize, NumBitmapBytes, NamesSize, 0 /* VTableSize */,
+      0 /* VNameSize */, &PaddingBytesBeforeCounters,
+      &PaddingBytesAfterCounters, &PaddingBytesAfterBitmapBytes,
+      &PaddingBytesAfterNames, NULL /* PaddingBytesAfterVTable */,
+      NULL /* PaddingbytesAfterVNames */);
 
   return sizeof(__llvm_profile_header) + __llvm_write_binary_ids(NULL) +
          DataSize + PaddingBytesBeforeCounters + CountersSize +
@@ -191,7 +224,10 @@ COMPILER_RT_VISIBILITY int __llvm_profile_write_buffer_internal(
     const char *NamesBegin, const char *NamesEnd) {
   ProfDataWriter BufferWriter;
   initBufferWriter(&BufferWriter, Buffer);
-  return lprofWriteDataImpl(&BufferWriter, DataBegin, DataEnd, CountersBegin,
-                            CountersEnd, BitmapBegin, BitmapEnd, 0, NamesBegin,
-                            NamesEnd, 0);
+  // Set virtual table arguments to NULL since they are not supported yet.
+  return lprofWriteDataImpl(
+      &BufferWriter, DataBegin, DataEnd, CountersBegin, CountersEnd,
+      BitmapBegin, BitmapEnd, 0 /* VPDataReader */, NamesBegin, NamesEnd,
+      NULL /* VTableBegin */, NULL /* VTableEnd */, NULL /* VNamesBegin */,
+      NULL /* VNamesEnd */, 0 /* SkipNameDataWrite */);
 }
diff --git a/compiler-rt/lib/profile/InstrProfilingInternal.h b/compiler-rt/lib/profile/InstrProfilingInternal.h
index 03ed67fcfa766f..38159b668a1dfd 100644
--- a/compiler-rt/lib/profile/InstrProfilingInternal.h
+++ b/compiler-rt/lib/profile/InstrProfilingInternal.h
@@ -156,7 +156,9 @@ int lprofWriteDataImpl(ProfDataWriter *Writer,
                        const char *CountersBegin, const char *CountersEnd,
                        const char *BitmapBegin, const char *BitmapEnd,
                        VPDataReaderType *VPDataReader, const char *NamesBegin,
-                       const char *NamesEnd, int SkipNameDataWrite);
+                       const char *NamesEnd, const VTableProfData *VTableBegin,
+                       const VTableProfData *VTableEnd, const char *VNamesBegin,
+                       const char *VNamesEnd, int SkipNameDataWrite);
 
 /* Merge value profile data pointed to by SrcValueProfData into
  * in-memory profile counters pointed by to DstData.  */
diff --git a/compiler-rt/lib/profile/InstrProfilingMerge.c b/compiler-rt/lib/profile/InstrProfilingMerge.c
index b5850e99ee37d8..ad7a50dc77f44e 100644
--- a/compiler-rt/lib/profile/InstrProfilingMerge.c
+++ b/compiler-rt/lib/profile/InstrProfilingMerge.c
@@ -107,6 +107,27 @@ static uintptr_t signextIfWin64(void *V) {
 #endif
 }
 
+static uint64_t
+getDistanceFromCounterToValueProf(const __llvm_profile_header *const Header) {
+  // Skip names section, vtable profile data section and vtable names section
+  // for runtime profile merge. To merge runtime addresses from multiple
+  // profiles collected from the same instrumented binary, the binary should be
+  // loaded at fixed base address (e.g., build with -no-pie, or run with ASLR
+  // disabled).
+  // In this set-up these three sections remain unchanged.
+  const uint64_t VTableSectionSize =
+      Header->NumVTables * sizeof(VTableProfData);
+  const uint64_t PaddingBytesAfterVTableSection =
+      __llvm_profile_get_num_padding_bytes(VTableSectionSize);
+  const uint64_t VNamesSize = Header->VNamesSize;
+  const uint64_t PaddingBytesAfterVNamesSize =
+      __llvm_profile_get_num_padding_bytes(VNamesSize);
+  return Header->NamesSize +
+         __llvm_profile_get_num_padding_bytes(Header->NamesSize) +
+         VTableSectionSize + PaddingBytesAfterVTableSection + VNamesSize +
+         PaddingBytesAfterVNamesSize;
+}
+
 COMPILER_RT_VISIBILITY
 int __llvm_profile_merge_from_buffer(const char *ProfileData,
                                      uint64_t ProfileSize) {
@@ -136,9 +157,9 @@ int __llvm_profile_merge_from_buffer(const char *ProfileData,
                    Header->NumCounters * __llvm_profile_counter_entry_size();
   SrcBitmapStart = SrcCountersEnd;
   SrcNameStart = SrcBitmapStart + Header->NumBitmapBytes;
+
   SrcValueProfDataStart =
-      SrcNameStart + Header->NamesSize +
-      __llvm_profile_get_num_padding_bytes(Header->NamesSize);
+      SrcNameStart + getDistanceFromCounterToValueProf(Header);
   if (SrcNameStart < SrcCountersStart || SrcNameStart < SrcBitmapStart)
     return 1;
 
diff --git a/compiler-rt/lib/profile/InstrProfilingPlatformLinux.c b/compiler-rt/lib/profile/InstrProfilingPlatformLinux.c
index 19266ab6c6fb8a..d2554a2702aaf6 100644
--- a/compiler-rt/lib/profile/InstrProfilingPlatformLinux.c
+++ b/compiler-rt/lib/profile/InstrProfilingPlatformLinux.c
@@ -24,8 +24,12 @@
 #define PROF_DATA_STOP INSTR_PROF_SECT_STOP(INSTR_PROF_DATA_COMMON)
 #define PROF_NAME_START INSTR_PROF_SECT_START(INSTR_PROF_NAME_COMMON)
 #define PROF_NAME_STOP INSTR_PROF_SECT_STOP(INSTR_PROF_NAME_COMMON)
+#define PROF_VNAME_START INSTR_PROF_SECT_START(INSTR_PROF_VNAME_COMMON)
+#define PROF_VNAME_STOP INSTR_PROF_SECT_STOP(INSTR_PROF_VNAME_COMMON)
 #define PROF_CNTS_START INSTR_PROF_SECT_START(INSTR_PROF_CNTS_COMMON)
 #define PROF_CNTS_STOP INSTR_PROF_SECT_STOP(INSTR_PROF_CNTS_COMMON)
+#define PROF_VTABLE_START INSTR_PROF_SECT_START(INSTR_PROF_VTAB_COMMON)
+#define PROF_VTABLE_STOP INSTR_PROF_SECT_STOP(INSTR_PROF_VTAB_COMMON)
 #define PROF_BITS_START INSTR_PROF_SECT_START(INSTR_PROF_BITS_COMMON)
 #define PROF_BITS_STOP INSTR_PROF_SECT_STOP(INSTR_PROF_BITS_COMMON)
 #define PROF_ORDERFILE_START INSTR_PROF_SECT_START(INSTR_PROF_ORDERFILE_COMMON)
@@ -41,6 +45,10 @@ extern __llvm_profile_data PROF_DATA_STOP COMPILER_RT_VISIBILITY
     COMPILER_RT_WEAK;
 extern char PROF_CNTS_START COMPILER_RT_VISIBILITY COMPILER_RT_WEAK;
 extern char PROF_CNTS_STOP COMPILER_RT_VISIBILITY COMPILER_RT_WEAK;
+extern VTableProfData PROF_VTABLE_START COMPILER_RT_VISIBILITY COMPILER_RT_WEAK;
+extern VTableProfData PROF_VTABLE_STOP COMPILER_RT_VISIBILITY COMPILER_RT_WEAK;
+extern char PROF_VNAME_START COMPILER_RT_VISIBILITY COMPILER_RT_WEAK;
+extern char PROF_VNAME_STOP COMPILER_RT_VISIBILITY COMPILER_RT_WEAK;
 extern char PROF_BITS_START COMPILER_RT_VISIBILITY COMPILER_RT_WEAK;
 extern char PROF_BITS_STOP COMPILER_RT_VISIBILITY COMPILER_RT_WEAK;
 extern uint32_t PROF_ORDERFILE_START COMPILER_RT_VISIBILITY COMPILER_RT_WEAK;
@@ -63,6 +71,18 @@ COMPILER_RT_VISIBILITY const char *__llvm_profile_begin_names(void) {
 COMPILER_RT_VISIBILITY const char *__llvm_profile_end_names(void) {
   return &PROF_NAME_STOP;
 }
+COMPILER_RT_VISIBILITY const char *__llvm_profile_begin_vtabnames(void) {
+  return &PROF_VNAME_START;
+}
+COMPILER_RT_VISIBILITY const char *__llvm_profile_end_vtabnames(void) {
+  return &PROF_VNAME_STOP;
+}
+COMPILER_RT_VISIBILITY VTableProfData *__llvm_profile_begin_vtables(void) {
+  return &PROF_VTABLE_START;
+}
+COMPILER_RT_VISIBILITY VTableProfData *__llvm_profile_end_vtables(void) {
+  return &PROF_VTABLE_STOP;
+}
 COMPILER_RT_VISIBILITY char *__llvm_profile_begin_counters(void) {
   return &PROF_CNTS_START;
 }
diff --git a/compiler-rt/lib/profile/InstrProfilingWriter.c b/compiler-rt/lib/profile/InstrProfilingWriter.c
index 4d767d13851485..8816a71155511b 100644
--- a/compiler-rt/lib/profile/InstrProfilingWriter.c
+++ b/compiler-rt/lib/profile/InstrProfilingWriter.c
@@ -250,9 +250,14 @@ COMPILER_RT_VISIBILITY int lprofWriteData(ProfDataWriter *Writer,
   const char *BitmapEnd = __llvm_profile_end_bitmap();
   const char *NamesBegin = __llvm_profile_begin_names();
   const char *NamesEnd = __llvm_profile_end_names();
+  const VTableProfData *VTableBegin = __llvm_profile_begin_vtables();
+  const VTableProfData *VTableEnd = __llvm_profile_end_vtables();
+  const char *VNamesBegin = __llvm_profile_begin_vtabnames();
+  const char *VNamesEnd = __llvm_profile_end_vtabnames();
   return lprofWriteDataImpl(Writer, DataBegin, DataEnd, CountersBegin,
                             CountersEnd, BitmapBegin, BitmapEnd, VPDataReader,
-                            NamesBegin, NamesEnd, SkipNameDataWrite);
+                            NamesBegin, NamesEnd, VTableBegin, VTableEnd,
+                            VNamesBegin, VNamesEnd, SkipNameDataWrite);
 }
 
 COMPILER_RT_VISIBILITY int
@@ -261,7 +266,9 @@ lprofWriteDataImpl(ProfDataWriter *Writer, const __llvm_profile_data *DataBegin,
                    const char *CountersBegin, const char *CountersEnd,
                    const char *BitmapBegin, const char *BitmapEnd,
                    VPDataReaderType *VPDataReader, const char *NamesBegin,
-                   const char *NamesEnd, int SkipNameDataWrite) {
+                   const char *NamesEnd, const VTableProfData *VTableBegin,
+                   const VTableProfData *VTableEnd, const char *VNamesBegin,
+                   const char *VNamesEnd, int SkipNameDataWrite) {
   /* Calculate size of sections. */
   const uint64_t DataSectionSize =
       __llvm_profile_get_data_size(DataBegin, DataEnd);
@@ -273,6 +280,12 @@ lprofWriteDataImpl(ProfDataWriter *Writer, const __llvm_profile_data *DataBegin,
   const uint64_t NumBitmapBytes =
       __llvm_profile_get_num_bitmap_bytes(BitmapBegin, BitmapEnd);
   const uint64_t NamesSize = __llvm_profile_get_name_size(NamesBegin, NamesEnd);
+  const uint64_t NumVTables =
+      __llvm_profile_get_num_vtable(VTableBegin, VTableEnd);
+  const uint64_t VTableSectionSize =
+      __llvm_profile_get_vtable_section_size(VTableBegin, VTableEnd);
+  const uint64_t VNamesSize =
+      __llvm_profile_get_name_size(VNamesBegin, VNamesEnd);
 
   /* Create the header. */
   __llvm_profile_header Header;
@@ -280,11 +293,15 @@ lprofWriteDataImpl(ProfDataWriter *Writer, const __llvm_profile_data *DataBegin,
   /* Determine how much padding is needed before/after the counters and after
    * the names. */
   uint64_t PaddingBytesBeforeCounters, PaddingBytesAfterCounters,
-      PaddingBytesAfterNames, PaddingBytesAfterBitmapBytes;
-  __llvm_profile_get_padding_sizes_for_counters(
-      DataSectionSize, CountersSectionSize, NumBitmapBytes, NamesSize,
-      &PaddingBytesBeforeCounters, &PaddingBytesAfterCounters,
-      &PaddingBytesAfterBitmapBytes, &PaddingBytesAfterNames);
+      PaddingBytesAfterBitmapBytes, PaddingBytesAfterNames,
+      PaddingBytesAfterVTable, PaddingBytesAfterVNames;
+  if (__llvm_profile_get_padding_sizes_for_counters(
+          DataSectionSize, CountersSectionSize, NumBitmapBytes, NamesSize,
+          VTableSectionSize, VNamesSize, &PaddingBytesBeforeCounters,
+          &PaddingBytesAfterCounters, &PaddingBytesAfterBitmapBytes,
+          &PaddingBytesAfterNames, &PaddingBytesAfterVTable,
+          &PaddingBytesAfterVNames) == -1)
+    return -1;
 
   {
 /* Initialize header structure.  */
@@ -323,7 +340,11 @@ lprofWriteDataImpl(ProfDataWriter *Writer, const __llvm_profile_data *DataBegin,
       {BitmapBegin, sizeof(uint8_t), NumBitmapBytes, 0},
       {NULL, sizeof(uint8_t), PaddingBytesAfterBitmapBytes, 1},
       {SkipNameDataWrite ? NULL : NamesBegin, sizeof(uint8_t), NamesSize, 0},
-      {NULL, sizeof(uint8_t), PaddingBytesAfterNames, 1}};
+      {NULL, sizeof(uint8_t), PaddingBytesAfterNames, 1},
+      {VTableBegin, sizeof(uint8_t), VTableSectionSize, 0},
+      {NULL, sizeof(uint8_t), PaddingBytesAfterVTable, 1},
+      {SkipNameDataWrite ? NULL : VNamesBegin, sizeof(uint8_t), VNamesSize, 0},
+      {NULL, sizeof(uint8_t), PaddingBytesAfterVNames, 1}};
   if (Writer->Write(Writer, IOVecData, sizeof(IOVecData) / sizeof(*IOVecData)))
     return -1;
 
diff --git a/llvm/include/llvm/Analysis/IndirectCallVisitor.h b/llvm/include/llvm/Analysis/IndirectCallVisitor.h
index 0825e19ecd2d24..5969241a179ea1 100644
--- a/llvm/include/llvm/Analysis/IndirectCallVisitor.h
+++ b/llvm/include/llvm/Analysis/IndirectCallVisitor.h
@@ -12,27 +12,87 @@
 #ifndef LLVM_ANALYSIS_INDIRECTCALLVISITOR_H
 #define LLVM_ANALYSIS_INDIRECTCALLVISITOR_H
 
+#include "llvm/ADT/SetVector.h"
 #include "llvm/IR/InstVisitor.h"
 #include <vector>
 
 namespace llvm {
-// Visitor class that finds all indirect call.
+// Visitor class that finds indirect calls or instructions that gives vtable
+// value, depending on Type.
 struct PGOIndirectCallVisitor : public InstVisitor<PGOIndirectCallVisitor> {
+  enum class InstructionType {
+    kIndirectCall = 0,
+    kVTableVal = 1,
+  };
   std::vector<CallBase *> IndirectCalls;
-  PGOIndirectCallVisitor() = default;
+  std::vector<Instruction *> ProfiledAddresses;
+  PGOIndirectCallVisitor(InstructionType Type) : Type(Type) {}
+
+  // Given an indirect call instruction, try to find the the following pattern
+  //
+  // %vtable = load ptr, ptr %obj
+  // %vfn = getelementptr inbounds ptr, ptr %vtable, i64 1
+  // %2 = load ptr, ptr %vfn
+  // $call = tail call i32 %2
+  //
+  // A heuristic is used to find the address feeding instructions.
+  static Instruction *tryGetVTableInstruction(CallBase *CB) {
+    assert(CB != nullptr && "Caller guaranteed");
+    LoadInst *LI = dyn_cast<LoadInst>(CB->getCalledOperand());
+
+    if (LI != nullptr) {
+      Value *FuncPtr = LI->getPointerOperand(); // GEP (or bitcast)
+      Value *VTablePtr = FuncPtr->stripInBoundsConstantOffsets();
+      // FIXME: Add support in the frontend so LLVM type intrinsics are
+      // emitted without LTO. This way, added intrinsics could filter
+      // non-vtable instructions and reduce instrumentation overhead.
+      // Since a non-vtable profiled address is not within the address
+      // range of vtable objects, it's stored as zero in indexed profiles.
+      // A pass that looks up symbol with an zero hash will (almost) always
+      // find nullptr and skip the actual transformation (e.g., comparison
+      // of symbols). So the performance overhead from non-vtable profiled
+      // address is negligible if exists at all. Comparing loaded address
+      // with symbol address guarantees correctness.
+      if (VTablePtr != nullptr && isa<Instruction>(VTablePtr)) {
+        return cast<Instruction>(VTablePtr);
+      }
+    }
+    return nullptr;
+  }
 
   void visitCallBase(CallBase &Call) {
-    if (Call.isIndirectCall())
+    if (Call.isIndirectCall()) {
       IndirectCalls.push_back(&Call);
+
+      if (Type != InstructionType::kVTableVal)
+        return;
+
+      Instruction *VPtr =
+          PGOIndirectCallVisitor::tryGetVTableInstruction(&Call);
+      if (VPtr) {
+        ProfiledAddresses.push_back(VPtr);
+      }
+    }
   }
+
+private:
+  InstructionType Type;
 };
 
-// Helper function that finds all indirect call sites.
 inline std::vector<CallBase *> findIndirectCalls(Function &F) {
-  PGOIndirectCallVisitor ICV;
+  PGOIndirectCallVisitor ICV(
+      PGOIndirectCallVisitor::InstructionType::kIndirectCall);
   ICV.visit(F);
   return ICV.IndirectCalls;
 }
+
+inline std::vector<Instruction *> findVTableAddrs(Function &F) {
+  PGOIndirectCallVisitor ICV(
+      PGOIndirectCallVisitor::InstructionType::kVTableVal);
+  ICV.visit(F);
+  return ICV.ProfiledAddresses;
+}
+
 } // namespace llvm
 
 #endif
diff --git a/llvm/include/llvm/ProfileData/InstrProf.h b/llvm/include/llvm/ProfileData/InstrProf.h
index 87e7bbbd727ee5..6cdceae5eeb960 100644
--- a/llvm/include/llvm/ProfileData/InstrProf.h
+++ b/llvm/include/llvm/ProfileData/InstrProf.h
@@ -89,6 +89,9 @@ inline StringRef getInstrProfValueProfMemOpFuncName() {
 /// Return the name prefix of variables containing instrumented function names.
 inline StringRef getInstrProfNameVarPrefix() { return "__profn_"; }
 
+/// Return the name prefix of variables containing virtual table profile data.
+inline StringRef getInstrProfVTableVarPrefix() { return "__profvt_"; }
+
 /// Return the name prefix of variables containing per-function control data.
 inline StringRef getInstrProfDataVarPrefix() { return "__profd_"; }
 
@@ -110,6 +113,8 @@ inline StringRef getInstrProfNamesVarName() {
   return "__llvm_prf_nm";
 }
 
+inline StringRef getInstrProfVTableNamesVarName() { return "__llvm_prf_vnm"; }
+
 /// Return the name of a covarage mapping variable (internal linkage)
 /// for each instrumented source module. Such variables are allocated
 /// in the __llvm_covmap section.
@@ -246,6 +251,9 @@ Error collectGlobalObjectNameStrings(ArrayRef<std::string> NameStrs,
 Error collectPGOFuncNameStrings(ArrayRef<GlobalVariable *> NameVars,
                                 std::string &Result, bool doCompression = true);
 
+Error collectVTableStrings(ArrayRef<GlobalVariable *> VTables,
+                           std::string &Result, bool doCompression);
+
 /// Check if INSTR_PROF_RAW_VERSION_VAR is defined. This global is only being
 /// set in IR PGO compilation.
 bool isIRPGOFlagSet(const Module *M);
@@ -269,13 +277,15 @@ void annotateValueSite(Module &M, Instruction &Inst,
                        uint32_t MaxMDCount = 3);
 
 /// Same as the above interface but using an ArrayRef, as well as \p Sum.
+/// This function will not annotate !prof metadata on the instruction if the
+/// referenced array is empty.
 void annotateValueSite(Module &M, Instruction &Inst,
                        ArrayRef<InstrProfValueData> VDs, uint64_t Sum,
                        InstrProfValueKind ValueKind, uint32_t MaxMDCount);
 
 /// Extract the value profile data from \p Inst which is annotated with
 /// value profile meta data. Return false if there is no value data annotated,
-/// otherwise  return true.
+/// otherwise return true.
 bool getValueProfDataFromInst(const Instruction &Inst,
                               InstrProfValueKind ValueKind,
                               uint32_t MaxNumValueData,
@@ -283,11 +293,23 @@ bool getValueProfDataFromInst(const Instruction &Inst,
                               uint32_t &ActualNumValueData, uint64_t &TotalC,
                               bool GetNoICPValue = false);
 
+/// Extract the value profile data from \p Inst and returns them if \p Inst is
+/// annotated with value profile data. Returns nullptr otherwise. It's similar
+/// to `getValueProfDataFromInst` above except that an array is allocated only
+/// after a preliminary checking that the value profiles of kind `ValueKind`
+/// exist.
+std::unique_ptr<InstrProfValueData[]>
+getValueProfDataFromInst(const Instruction &Inst, InstrProfValueKind ValueKind,
+                         uint32_t MaxNumValueData, uint32_t &ActualNumValueData,
+                         uint64_t &TotalC, bool GetNoICPValue = false);
+
 inline StringRef getPGOFuncNameMetadataName() { return "PGOFuncName"; }
 
 /// Return the PGOFuncName meta data associated with a function.
 MDNode *getPGOFuncNameMetadata(const Function &F);
 
+std::string getPGOName(const GlobalVariable &V, bool InLTO = false);
+
 /// Create the PGOFuncName meta data if PGOFuncName is different from
 /// function's raw name. This should only apply to internal linkage functions
 /// declared by users only.
@@ -295,7 +317,7 @@ void createPGOFuncNameMetadata(Function &F, StringRef PGOFuncName);
 
 /// Check if we can use Comdat for profile variables. This will eliminate
 /// the duplicated profile variables for Comdat functions.
-bool needsComdatForCounter(const Function &F, const Module &M);
+bool needsComdatForCounter(const GlobalValue &GV, const Module &M);
 
 /// An enum describing the attributes of an instrumented profile.
 enum class InstrProfKind {
@@ -429,20 +451,36 @@ uint64_t ComputeHash(StringRef K);
 class InstrProfSymtab {
 public:
   using AddrHashMap = std::vector<std::pair<uint64_t, uint64_t>>;
+  using RangeHashMap =
+      std::vector<std::pair<std::pair<uint64_t, uint64_t>, uint64_t>>;
 
 private:
   StringRef Data;
   uint64_t Address = 0;
-  // Unique name strings.
+  // Unique name strings. Used to ensure entries in MD5NameMap (a vector that's
+  // going to be sorted) has unique MD5 keys in the first place.
   StringSet<> NameTab;
+  // Records the unique virtual table names. This is used by InstrProfWriter to
+  // write out an on-disk chained hash table of virtual table names.
+  // InstrProfWriter stores per function profile data (keyed by function names)
+  // so it doesn't use a StringSet for function names.
+  StringSet<> VTableNames;
   // A map from MD5 keys to function name strings.
   std::vector<std::pair<uint64_t, StringRef>> MD5NameMap;
+  // A map from MD5 keys to virtual table definitions. Only populated when
+  // building the Symtab from a module.
+  std::vector<std::pair<uint64_t, GlobalVariable *>> MD5VTableMap;
   // A map from MD5 keys to function define. We only populate this map
   // when build the Symtab from a Module.
   std::vector<std::pair<uint64_t, Function *>> MD5FuncMap;
   // A map from function runtime address to function name MD5 hash.
   // This map is only populated and used by raw instr profile reader.
   AddrHashMap AddrToMD5Map;
+  // A map from virtual table runtime address to function name MD5 hash.
+  // This map is only populated and used by raw instr profile reader.
+  // This is a different map from 'AddrToMD5Map' for readability and
+  // debuggability.
+  RangeHashMap VTableAddrRangeToMD5Map;
   bool Sorted = false;
 
   static StringRef getExternalSymbol() {
@@ -470,9 +508,19 @@ class InstrProfSymtab {
 
   /// \c NameStrings is a string composed of one of more sub-strings
   ///  encoded in the format described in \c collectPGOFuncNameStrings.
-  /// This method is a wrapper to \c readPGOFuncNameStrings method.
+  /// This method is a wrapper to \c readAndDecodeStrings method.
   Error create(StringRef NameStrings);
 
+  /// \c FuncNameStrings is a string composed of one or more encoded function
+  /// name strings, and \c VTableNameStrings composes of one or more encoded
+  /// vtable names. This function is a wrapper to \c readAndDecodeStrings
+  /// method.
+  Error create(StringRef FuncNameStrings, StringRef VTableNameStrings);
+
+  /// Initialize 'this' with the set of vtable names encoded in
+  /// \c CompressedVTableNames.
+  Error initVTableNamesFromCompressedStrings(StringRef CompressedVTableNames);
+
   /// This interface is used by reader of CoverageMapping test
   /// format.
   inline Error create(StringRef D, uint64_t BaseAddr);
@@ -485,32 +533,70 @@ class InstrProfSymtab {
 
   /// Create InstrProfSymtab from a set of names iteratable from
   /// \p IterRange. This interface is used by IndexedProfReader.
-  template <typename NameIterRange> Error create(const NameIterRange &IterRange);
-
-  /// Update the symtab by adding \p FuncName to the table. This interface
-  /// is used by the raw and text profile readers.
-  Error addFuncName(StringRef FuncName) {
-    if (FuncName.empty())
+  template <typename NameIterRange>
+  Error create(const NameIterRange &IterRange);
+
+  /// Create InstrProfSymtab from a set of function names and vtable
+  /// names iteratable from \p IterRange. This interface is used by
+  /// IndexedProfReader.
+  template <typename FuncNameIterRange, typename VTableNameIterRange>
+  Error create(const FuncNameIterRange &FuncIterRange,
+               const VTableNameIterRange &VTableIterRange);
+
+  Error addSymbolName(StringRef SymbolName) {
+    if (SymbolName.empty())
       return make_error<InstrProfError>(instrprof_error::malformed,
-                                        "function name is empty");
-    auto Ins = NameTab.insert(FuncName);
+                                        "symbol name is empty");
+
+    // Insert into NameTab so that MD5NameMap (a vector that will be sorted)
+    // won't have duplicated entries in the first place.
+    auto Ins = NameTab.insert(SymbolName);
     if (Ins.second) {
       MD5NameMap.push_back(std::make_pair(
-          IndexedInstrProf::ComputeHash(FuncName), Ins.first->getKey()));
+          IndexedInstrProf::ComputeHash(SymbolName), Ins.first->getKey()));
       Sorted = false;
     }
     return Error::success();
   }
 
+  /// The method name is kept since there are many callers.
+  /// It just forwards to 'addSymbolName'.
+  Error addFuncName(StringRef FuncName) { return addSymbolName(FuncName); }
+
+  /// Adds VTableName as a known symbol, and inserts it to a map that
+  /// tracks all vtable names.
+  Error addVTableName(StringRef VTableName) {
+    if (Error E = addSymbolName(VTableName))
+      return E;
+
+    // Record VTableName. InstrProfWriter uses this map. The comment around
+    // class member explains why.
+    VTableNames.insert(VTableName);
+    return Error::success();
+  }
+
+  const StringSet<> &getVTableNames() const { return VTableNames; }
+
   /// Map a function address to its name's MD5 hash. This interface
   /// is only used by the raw profiler reader.
   void mapAddress(uint64_t Addr, uint64_t MD5Val) {
     AddrToMD5Map.push_back(std::make_pair(Addr, MD5Val));
   }
 
+  /// Map the address range (i.e., [start_address, end_address]) of a variable
+  /// to  its names' MD5 hash. This interface is only used by the raw profile
+  /// reader.
+  void mapVTableAddress(uint64_t StartAddr, uint64_t EndAddr, uint64_t MD5Val) {
+    VTableAddrRangeToMD5Map.push_back(
+        std::make_pair(std::make_pair(StartAddr, EndAddr), MD5Val));
+  }
+
   /// Return a function's hash, or 0, if the function isn't in this SymTab.
   uint64_t getFunctionHashFromAddress(uint64_t Address);
 
+  /// Return a vtable's hash, or 0 if the vtable doesn't exist in this SymTab.
+  uint64_t getVTableHashFromAddress(uint64_t Address);
+
   /// Return function's PGO name from the function name's symbol
   /// address in the object file. If an error occurs, return
   /// an empty string.
@@ -532,6 +618,8 @@ class InstrProfSymtab {
 
   /// Return function from the name's md5 hash. Return nullptr if not found.
   inline Function *getFunction(uint64_t FuncMD5Hash);
+  // Return vtable from the name's MD5 hash. Return nullptr if not found.
+  inline GlobalVariable *getGlobalVariable(uint64_t GlobalVariableMD5Hash);
 
   /// Return the name section data.
   inline StringRef getNameData() const { return Data; }
@@ -556,6 +644,23 @@ Error InstrProfSymtab::create(const NameIterRange &IterRange) {
   return Error::success();
 }
 
+template <typename FuncNameIterRange, typename VTableNameIterRange>
+Error InstrProfSymtab::create(const FuncNameIterRange &FuncIterRange,
+                              const VTableNameIterRange &VTableIterRange) {
+  for (auto Name : FuncIterRange)
+    if (Error E = addFuncName(Name))
+      return E;
+
+  for (auto VTableName : VTableIterRange) {
+    if (Error E = addVTableName(VTableName)) {
+      return E;
+    }
+  }
+
+  finalizeSymtab();
+  return Error::success();
+}
+
 void InstrProfSymtab::finalizeSymtab() {
   if (Sorted)
     return;
@@ -564,6 +669,13 @@ void InstrProfSymtab::finalizeSymtab() {
   llvm::sort(AddrToMD5Map, less_first());
   AddrToMD5Map.erase(std::unique(AddrToMD5Map.begin(), AddrToMD5Map.end()),
                      AddrToMD5Map.end());
+  // VTable object address ranges should not overlap; so sort by either
+  // beginning address or end address is fine.
+  llvm::sort(VTableAddrRangeToMD5Map, less_first());
+  // std::unique uses == operator for std::pair.
+  VTableAddrRangeToMD5Map.erase(std::unique(VTableAddrRangeToMD5Map.begin(),
+                                            VTableAddrRangeToMD5Map.end()),
+                                VTableAddrRangeToMD5Map.end());
   Sorted = true;
 }
 
@@ -594,6 +706,19 @@ Function* InstrProfSymtab::getFunction(uint64_t FuncMD5Hash) {
   return nullptr;
 }
 
+GlobalVariable *
+InstrProfSymtab::getGlobalVariable(uint64_t GlobalVariableMD5Hash) {
+  finalizeSymtab();
+  auto Result =
+      llvm::lower_bound(MD5VTableMap, GlobalVariableMD5Hash,
+                        [](const std::pair<uint64_t, GlobalVariable *> &LHS,
+                           uint64_t RHS) { return LHS.first < RHS; });
+
+  if (Result != MD5VTableMap.end() && Result->first == GlobalVariableMD5Hash)
+    return Result->second;
+  return nullptr;
+}
+
 // To store the sums of profile count values, or the percentage of
 // the sums of the total count values.
 struct CountSumOrPercent {
@@ -820,6 +945,7 @@ struct InstrProfRecord {
   struct ValueProfData {
     std::vector<InstrProfValueSiteRecord> IndirectCallSites;
     std::vector<InstrProfValueSiteRecord> MemOPSizes;
+    std::vector<InstrProfValueSiteRecord> VTableTargets;
   };
   std::unique_ptr<ValueProfData> ValueData;
 
@@ -842,6 +968,8 @@ struct InstrProfRecord {
       return ValueData->IndirectCallSites;
     case IPVK_MemOPSize:
       return ValueData->MemOPSizes;
+    case IPVK_VTableTarget:
+      return ValueData->VTableTargets;
     default:
       llvm_unreachable("Unknown value kind!");
     }
@@ -856,6 +984,8 @@ struct InstrProfRecord {
       return ValueData->IndirectCallSites;
     case IPVK_MemOPSize:
       return ValueData->MemOPSizes;
+    case IPVK_VTableTarget:
+      return ValueData->VTableTargets;
     default:
       llvm_unreachable("Unknown value kind!");
     }
@@ -1025,7 +1155,9 @@ enum ProfVersion {
   Version10 = 10,
   // An additional field is used for bitmap bytes.
   Version11 = 11,
-  // The current version is 11.
+  // VTable profiling,
+  Version12 = 12,
+  // The current version is 12.
   CurrentVersion = INSTR_PROF_INDEX_VERSION
 };
 const uint64_t Version = ProfVersion::CurrentVersion;
@@ -1046,6 +1178,7 @@ struct Header {
   uint64_t MemProfOffset;
   uint64_t BinaryIdOffset;
   uint64_t TemporalProfTracesOffset;
+  uint64_t VTableNamesOffset; // Organize virtual table names.
   // New fields should only be added at the end to ensure that the size
   // computation is correct. The methods below need to be updated to ensure that
   // the new field is read correctly.
@@ -1182,8 +1315,13 @@ template <> inline uint64_t getMagic<uint32_t>() {
 // It should also match the synthesized type in
 // Transforms/Instrumentation/InstrProfiling.cpp:getOrCreateRegionCounters.
 template <class IntPtrT> struct alignas(8) ProfileData {
-  #define INSTR_PROF_DATA(Type, LLVMType, Name, Init) Type Name;
-  #include "llvm/ProfileData/InstrProfData.inc"
+#define INSTR_PROF_DATA(Type, LLVMType, Name, Init) Type Name;
+#include "llvm/ProfileData/InstrProfData.inc"
+};
+
+template <class IntPtrT> struct alignas(8) VTableProfileData {
+#define INSTR_PROF_VTABLE_DATA(Type, LLVMType, Name, Init) Type Name;
+#include "llvm/ProfileData/InstrProfData.inc"
 };
 
 // File header structure of the LLVM profile data in raw format.
diff --git a/llvm/include/llvm/ProfileData/InstrProfData.inc b/llvm/include/llvm/ProfileData/InstrProfData.inc
index 25df899b3f3619..77720aba3eb484 100644
--- a/llvm/include/llvm/ProfileData/InstrProfData.inc
+++ b/llvm/include/llvm/ProfileData/InstrProfData.inc
@@ -94,6 +94,22 @@ INSTR_PROF_DATA(const uint32_t, llvm::Type::getInt32Ty(Ctx), NumBitmapBytes, \
 #undef INSTR_PROF_DATA
 /* INSTR_PROF_DATA end. */
 
+#ifndef INSTR_PROF_VTABLE_DATA
+#define INSTR_PROF_VTABLE_DATA(Type, LLVMType, Name, Initializer)
+#else
+#define INSTR_PROF_VTABLE_DATA_DEFINED
+#endif
+INSTR_PROF_VTABLE_DATA(
+    const uint64_t, llvm::Type::getInt64Ty(Ctx), VTableNameHash,
+    ConstantInt::get(llvm::Type::getInt64Ty(Ctx),
+                     IndexedInstrProf::ComputeHash(PGOVTableName)))
+INSTR_PROF_VTABLE_DATA(const IntPtrT, llvm::PointerType::getUnqual(Ctx),
+                       VTablePointer, VTableAddr)
+INSTR_PROF_VTABLE_DATA(const uint32_t, llvm::Type::getInt32Ty(Ctx), VTableSize,
+                       ConstantInt::get(llvm::Type::getInt32Ty(Ctx),
+                                        VTableSizeVal))
+#undef INSTR_PROF_VTABLE_DATA
+/* INSTR_PROF_VTABLE_DATA end. */
 
 /* This is an internal data structure used by value profiler. It
  * is defined here to allow serialization code sharing by LLVM
@@ -145,6 +161,8 @@ INSTR_PROF_RAW_HEADER(uint64_t, CountersDelta,
 INSTR_PROF_RAW_HEADER(uint64_t, BitmapDelta,
                       (uintptr_t)BitmapBegin - (uintptr_t)DataBegin)
 INSTR_PROF_RAW_HEADER(uint64_t, NamesDelta, (uintptr_t)NamesBegin)
+INSTR_PROF_RAW_HEADER(uint64_t, VNamesSize, VNamesSize)
+INSTR_PROF_RAW_HEADER(uint64_t, NumVTables, NumVTables)
 INSTR_PROF_RAW_HEADER(uint64_t, ValueKindLast, IPVK_Last)
 #undef INSTR_PROF_RAW_HEADER
 /* INSTR_PROF_RAW_HEADER  end */
@@ -186,13 +204,14 @@ VALUE_PROF_FUNC_PARAM(uint32_t, CounterIndex, Type::getInt32Ty(Ctx))
 VALUE_PROF_KIND(IPVK_IndirectCallTarget, 0, "indirect call target")
 /* For memory intrinsic functions size profiling. */
 VALUE_PROF_KIND(IPVK_MemOPSize, 1, "memory intrinsic functions size")
+VALUE_PROF_KIND(IPVK_VTableTarget, 2, "vtable target")
 /* These two kinds must be the last to be
  * declared. This is to make sure the string
  * array created with the template can be
  * indexed with the kind value.
  */
 VALUE_PROF_KIND(IPVK_First, IPVK_IndirectCallTarget, "first")
-VALUE_PROF_KIND(IPVK_Last, IPVK_MemOPSize, "last")
+VALUE_PROF_KIND(IPVK_Last, IPVK_VTableTarget, "last")
 
 #undef VALUE_PROF_KIND
 /* VALUE_PROF_KIND end */
@@ -267,7 +286,6 @@ COVMAP_HEADER(uint32_t, Int32Ty, Version, \
 #undef COVMAP_HEADER
 /* COVMAP_HEADER end.  */
 
-
 #ifdef INSTR_PROF_SECT_ENTRY
 #define INSTR_PROF_DATA_DEFINED
 INSTR_PROF_SECT_ENTRY(IPSK_data, \
@@ -282,12 +300,18 @@ INSTR_PROF_SECT_ENTRY(IPSK_bitmap, \
 INSTR_PROF_SECT_ENTRY(IPSK_name, \
                       INSTR_PROF_QUOTE(INSTR_PROF_NAME_COMMON), \
                       INSTR_PROF_NAME_COFF, "__DATA,")
+INSTR_PROF_SECT_ENTRY(IPSK_vname, \
+                      INSTR_PROF_QUOTE(INSTR_PROF_VNAME_COMMON), \
+                      INSTR_PROF_VNAME_COFF, "__DATA,")
 INSTR_PROF_SECT_ENTRY(IPSK_vals, \
                       INSTR_PROF_QUOTE(INSTR_PROF_VALS_COMMON), \
                       INSTR_PROF_VALS_COFF, "__DATA,")
 INSTR_PROF_SECT_ENTRY(IPSK_vnodes, \
                       INSTR_PROF_QUOTE(INSTR_PROF_VNODES_COMMON), \
                       INSTR_PROF_VNODES_COFF, "__DATA,")
+INSTR_PROF_SECT_ENTRY(IPSK_vtab, \
+                      INSTR_PROF_QUOTE(INSTR_PROF_VTAB_COMMON), \
+                      INSTR_PROF_VTAB_COFF, "__DATA,")
 INSTR_PROF_SECT_ENTRY(IPSK_covmap, \
                       INSTR_PROF_QUOTE(INSTR_PROF_COVMAP_COMMON), \
                       INSTR_PROF_COVMAP_COFF, "__LLVM_COV,")
@@ -307,7 +331,6 @@ INSTR_PROF_SECT_ENTRY(IPSK_covname, \
 #undef INSTR_PROF_SECT_ENTRY
 #endif
 
-
 #ifdef INSTR_PROF_VALUE_PROF_DATA
 #define INSTR_PROF_DATA_DEFINED
 
@@ -347,7 +370,7 @@ typedef struct ValueProfRecord {
   /*!
    * Return the number of value sites.
    */
-  uint32_t getNumValueSites() const { return NumValueSites; }
+  uint32_t getNumValueSites() const {  return NumValueSites; }
   /*!
    * Read data from this record and save it to Record.
    */
@@ -479,7 +502,6 @@ getValueProfRecordHeaderSize(uint32_t NumValueSites);
 #undef INSTR_PROF_VALUE_PROF_DATA
 #endif  /* INSTR_PROF_VALUE_PROF_DATA */
 
-
 #ifdef INSTR_PROF_COMMON_API_IMPL
 #define INSTR_PROF_DATA_DEFINED
 #ifdef __cplusplus
@@ -663,9 +685,9 @@ serializeValueProfDataFrom(ValueProfRecordClosure *Closure,
         (uint64_t)'f' << 16 | (uint64_t)'R' << 8 | (uint64_t)129
 
 /* Raw profile format version (start from 1). */
-#define INSTR_PROF_RAW_VERSION 9
+#define INSTR_PROF_RAW_VERSION 10
 /* Indexed profile format version (start from 1). */
-#define INSTR_PROF_INDEX_VERSION 11
+#define INSTR_PROF_INDEX_VERSION 12
 /* Coverage mapping format version (start from 0). */
 #define INSTR_PROF_COVMAP_VERSION 6
 
@@ -703,10 +725,12 @@ serializeValueProfDataFrom(ValueProfRecordClosure *Closure,
    than WIN32 */
 #define INSTR_PROF_DATA_COMMON __llvm_prf_data
 #define INSTR_PROF_NAME_COMMON __llvm_prf_names
+#define INSTR_PROF_VNAME_COMMON __llvm_prf_vtabnames
 #define INSTR_PROF_CNTS_COMMON __llvm_prf_cnts
 #define INSTR_PROF_BITS_COMMON __llvm_prf_bits
 #define INSTR_PROF_VALS_COMMON __llvm_prf_vals
 #define INSTR_PROF_VNODES_COMMON __llvm_prf_vnds
+#define INSTR_PROF_VTAB_COMMON __llvm_prf_vtab
 #define INSTR_PROF_COVMAP_COMMON __llvm_covmap
 #define INSTR_PROF_COVFUN_COMMON __llvm_covfun
 #define INSTR_PROF_COVDATA_COMMON __llvm_covdata
@@ -717,10 +741,12 @@ serializeValueProfDataFrom(ValueProfRecordClosure *Closure,
  */
 #define INSTR_PROF_DATA_COFF ".lprfd$M"
 #define INSTR_PROF_NAME_COFF ".lprfn$M"
+#define INSTR_PROF_VNAME_COFF ".lprfvn$M"
 #define INSTR_PROF_CNTS_COFF ".lprfc$M"
 #define INSTR_PROF_BITS_COFF ".lprfb$M"
 #define INSTR_PROF_VALS_COFF ".lprfv$M"
 #define INSTR_PROF_VNODES_COFF ".lprfnd$M"
+#define INSTR_PROF_VTAB_COFF ".lprfvt$M"
 #define INSTR_PROF_COVMAP_COFF ".lcovmap$M"
 #define INSTR_PROF_COVFUN_COFF ".lcovfun$M"
 /* Since cov data and cov names sections are not allocated, we don't need to
diff --git a/llvm/include/llvm/ProfileData/InstrProfReader.h b/llvm/include/llvm/ProfileData/InstrProfReader.h
index 87f15639a2c3c9..c1edd7afb75bd7 100644
--- a/llvm/include/llvm/ProfileData/InstrProfReader.h
+++ b/llvm/include/llvm/ProfileData/InstrProfReader.h
@@ -326,12 +326,16 @@ class RawInstrProfReader : public InstrProfReader {
   uint64_t NamesDelta;
   const RawInstrProf::ProfileData<IntPtrT> *Data;
   const RawInstrProf::ProfileData<IntPtrT> *DataEnd;
+  const RawInstrProf::VTableProfileData<IntPtrT> *VTableBegin = nullptr;
+  const RawInstrProf::VTableProfileData<IntPtrT> *VTableEnd = nullptr;
   const char *CountersStart;
   const char *CountersEnd;
   const char *BitmapStart;
   const char *BitmapEnd;
   const char *NamesStart;
   const char *NamesEnd;
+  const char *VNamesStart = nullptr;
+  const char *VNamesEnd = nullptr;
   // After value profile is all read, this pointer points to
   // the header of next profile data (if exists)
   const uint8_t *ValueDataStart;
@@ -622,6 +626,12 @@ class InstrProfReaderIndex : public InstrProfReaderIndexBase {
   InstrProfKind getProfileKind() const override;
 
   Error populateSymtab(InstrProfSymtab &Symtab) override {
+    // FIXME: the create method calls 'finalizeSymtab' and sorts a bunch of
+    // arrays/maps. Since there are other data sources other than 'HashTable' to
+    // populate a symtab, it might make sense to have something like this
+    // 1. Let each data source populate Symtab and init the arrays/maps without
+    // calling 'finalizeSymtab'
+    // 2. Call 'finalizeSymtab' once to get all arrays/maps sorted if needed.
     return Symtab.create(HashTable->keys());
   }
 };
@@ -656,6 +666,16 @@ class IndexedInstrProfReader : public InstrProfReader {
   std::unique_ptr<MemProfRecordHashTable> MemProfRecordTable;
   /// MemProf frame profile data on-disk indexed via frame id.
   std::unique_ptr<MemProfFrameHashTable> MemProfFrameTable;
+  /// The reader itself doesn't decompress vtable names. A compiler that reads
+  /// indexed profiles could construct symtab from module IR so it doesn't need
+  /// the decompressed names.
+  /// When a symtab is constructed from profiles by llvm-profdata, the list of
+  /// names could be decompressed based on `VTableNamePtr` and
+  /// `CompressedVTableNamesLen`.
+  /// VTableNamePtr points to the beginning of compressed vtable names.
+  const char *VTableNamePtr = nullptr;
+  /// The length of compressed vtable names.
+  uint64_t CompressedVTableNamesLen = 0;
   /// Total size of binary ids.
   uint64_t BinaryIdsSize{0};
   /// Start address of binary id length and data pairs.
diff --git a/llvm/include/llvm/ProfileData/InstrProfWriter.h b/llvm/include/llvm/ProfileData/InstrProfWriter.h
index 047b14f223bd94..049fa36bb53f5c 100644
--- a/llvm/include/llvm/ProfileData/InstrProfWriter.h
+++ b/llvm/include/llvm/ProfileData/InstrProfWriter.h
@@ -63,6 +63,9 @@ class InstrProfWriter {
   // List of binary ids.
   std::vector<llvm::object::BuildID> BinaryIds;
 
+  // Read the vtable names from raw instr profile reader.
+  StringSet<> VTableNames;
+
   // An enum describing the attributes of the profile.
   InstrProfKind ProfileKind = InstrProfKind::Unknown;
   // Use raw pointer here for the incomplete type object.
@@ -84,6 +87,7 @@ class InstrProfWriter {
   void addRecord(NamedInstrProfRecord &&I, function_ref<void(Error)> Warn) {
     addRecord(std::move(I), 1, Warn);
   }
+  void addVTableName(StringRef VTableName) { VTableNames.insert(VTableName); }
 
   /// Add \p SrcTraces using reservoir sampling where \p SrcStreamSize is the
   /// total number of temporal profiling traces the source has seen.
diff --git a/llvm/lib/Analysis/IndirectCallPromotionAnalysis.cpp b/llvm/lib/Analysis/IndirectCallPromotionAnalysis.cpp
index ebfa1c8fc08e1c..ab53717eb889a0 100644
--- a/llvm/lib/Analysis/IndirectCallPromotionAnalysis.cpp
+++ b/llvm/lib/Analysis/IndirectCallPromotionAnalysis.cpp
@@ -45,6 +45,10 @@ static cl::opt<unsigned>
                      cl::desc("Max number of promotions for a single indirect "
                               "call callsite"));
 
+cl::opt<unsigned> MaxNumVTableAnnotations(
+    "icp-max-num-vtables", cl::init(6), cl::Hidden,
+    cl::desc("Max number of vtables annotated for a vtable load instruction."));
+
 ICallPromotionAnalysis::ICallPromotionAnalysis() {
   ValueDataArray = std::make_unique<InstrProfValueData[]>(MaxNumPromotions);
 }
diff --git a/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp b/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp
index 1f15e94783240a..3ad0bab827a512 100644
--- a/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp
+++ b/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp
@@ -82,6 +82,8 @@ static cl::opt<std::string> ModuleSummaryDotFile(
 
 extern cl::opt<bool> ScalePartialSampleProfileWorkingSetSize;
 
+extern cl::opt<unsigned> MaxNumVTableAnnotations;
+
 // Walk through the operands of a given User via worklist iteration and populate
 // the set of GlobalValue references encountered. Invoked either on an
 // Instruction or a GlobalVariable (which walks its initializer).
@@ -124,6 +126,24 @@ static bool findRefEdges(ModuleSummaryIndex &Index, const User *CurUser,
         Worklist.push_back(Operand);
     }
   }
+
+  const Instruction *I = dyn_cast<Instruction>(CurUser);
+  if (I) {
+    uint32_t ActualNumValueData = 0;
+    uint64_t TotalCount = 0;
+    // MaxNumVTableAnnotations is the maximum number of vtables annotated on
+    // the instruction.
+    auto ValueDataArray =
+        getValueProfDataFromInst(*I, IPVK_VTableTarget, MaxNumVTableAnnotations,
+                                 ActualNumValueData, TotalCount);
+
+    if (ValueDataArray.get()) {
+      for (uint32_t j = 0; j < ActualNumValueData; j++) {
+        RefEdges.insert(Index.getOrInsertValueInfo(/* VTableGUID = */
+                                                   ValueDataArray[j].Value));
+      }
+    }
+  }
   return HasBlockAddress;
 }
 
diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
index 13be0b0c3307fb..7686e32b69305b 100644
--- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -199,7 +199,7 @@ class ModuleBitcodeWriterBase : public BitcodeWriterBase {
     for (const auto &GUIDSummaryLists : *Index)
       // Examine all summaries for this GUID.
       for (auto &Summary : GUIDSummaryLists.second.SummaryList)
-        if (auto FS = dyn_cast<FunctionSummary>(Summary.get()))
+        if (auto FS = dyn_cast<FunctionSummary>(Summary.get())) {
           // For each call in the function summary, see if the call
           // is to a GUID (which means it is for an indirect call,
           // otherwise we would have a Value for it). If so, synthesize
@@ -207,6 +207,15 @@ class ModuleBitcodeWriterBase : public BitcodeWriterBase {
           for (auto &CallEdge : FS->calls())
             if (!CallEdge.first.haveGVs() || !CallEdge.first.getValue())
               assignValueId(CallEdge.first.getGUID());
+
+          // For each referenced variables in the function summary, see if the
+          // variable is represented by a GUID (as opposed to a symbol to
+          // declarations or definitions in the module). If so, synthesize a
+          // value id.
+          for (auto &RefEdge : FS->refs())
+            if ((!RefEdge.haveGVs() || !RefEdge.getValue()))
+              assignValueId(RefEdge.getGUID());
+        }
   }
 
 protected:
@@ -4071,7 +4080,7 @@ void ModuleBitcodeWriterBase::writePerModuleFunctionSummaryRecord(
   NameVals.push_back(SpecialRefCnts.second); // worefcnt
 
   for (auto &RI : FS->refs())
-    NameVals.push_back(VE.getValueID(RI.getValue()));
+    NameVals.push_back(getValueId(RI));
 
   const bool UseRelBFRecord =
       WriteRelBFToSummary && !F.hasProfileData() &&
diff --git a/llvm/lib/ProfileData/InstrProf.cpp b/llvm/lib/ProfileData/InstrProf.cpp
index 2640027455e0da..91e79e8b2e9add 100644
--- a/llvm/lib/ProfileData/InstrProf.cpp
+++ b/llvm/lib/ProfileData/InstrProf.cpp
@@ -219,6 +219,12 @@ cl::opt<bool> DoInstrProfNameCompression(
     "enable-name-compression",
     cl::desc("Enable name/filename string compression"), cl::init(true));
 
+cl::opt<bool> EnableVTableValueProfiling(
+    "enable-vtable-value-profiling", cl::init(false),
+    cl::desc("If true, the virtual table address will be instrumented to know "
+             "the types of a C++ pointer. The information is used in indirect "
+             "call promotion to do selective vtable-based comparison."));
+
 std::string getInstrProfSectionName(InstrProfSectKind IPSK,
                                     Triple::ObjectFormatType OF,
                                     bool AddSegmentInfo) {
@@ -378,6 +384,13 @@ std::string getPGOFuncName(const Function &F, bool InLTO, uint64_t Version) {
   return getPGOFuncName(F.getName(), GlobalValue::ExternalLinkage, "");
 }
 
+std::string getPGOName(const GlobalVariable &V, bool InLTO) {
+  // PGONameMetadata should be set by compiler at profile use time
+  // and read by symtab creation to look up symbols corresponding to
+  // a MD5 hash.
+  return getIRPGOObjectName(V, InLTO, nullptr /* PGONameMetadata */);
+}
+
 // See getIRPGOFuncName() for a discription of the format.
 std::pair<StringRef, StringRef>
 getParsedIRPGOFuncName(StringRef IRPGOFuncName) {
@@ -460,6 +473,17 @@ Error InstrProfSymtab::create(Module &M, bool InLTO) {
     if (Error E = addFuncWithName(F, getPGOFuncName(F, InLTO)))
       return E;
   }
+
+  SmallVector<MDNode *, 2> Types;
+  for (GlobalVariable &G : M.globals()) {
+    if (!G.hasName())
+      continue;
+    Types.clear();
+    G.getMetadata(LLVMContext::MD_type, Types);
+    if (!Types.empty()) {
+      MD5VTableMap.emplace_back(G.getGUID(), &G);
+    }
+  }
   Sorted = false;
   finalizeSymtab();
   return Error::success();
@@ -518,6 +542,25 @@ Error InstrProfSymtab::create(StringRef NameStrings) {
       std::bind(&InstrProfSymtab::addFuncName, this, std::placeholders::_1));
 }
 
+Error InstrProfSymtab::create(StringRef FuncNameStrings,
+                              StringRef VTableNameStrings) {
+  if (Error E = readAndDecodeStrings(FuncNameStrings,
+                                     std::bind(&InstrProfSymtab::addFuncName,
+                                               this, std::placeholders::_1)))
+    return E;
+
+  return readAndDecodeStrings(
+      VTableNameStrings,
+      std::bind(&InstrProfSymtab::addVTableName, this, std::placeholders::_1));
+}
+
+Error InstrProfSymtab::initVTableNamesFromCompressedStrings(
+    StringRef CompressedVTableStrings) {
+  return readAndDecodeStrings(
+      CompressedVTableStrings,
+      std::bind(&InstrProfSymtab::addVTableName, this, std::placeholders::_1));
+}
+
 Error InstrProfSymtab::addFuncWithName(Function &F, StringRef PGOFuncName) {
   if (Error E = addFuncName(PGOFuncName))
     return E;
@@ -550,6 +593,28 @@ Error InstrProfSymtab::addFuncWithName(Function &F, StringRef PGOFuncName) {
   return Error::success();
 }
 
+uint64_t InstrProfSymtab::getVTableHashFromAddress(uint64_t Address) {
+  finalizeSymtab();
+  auto It = lower_bound(
+      VTableAddrRangeToMD5Map, Address,
+      [](std::pair<std::pair<uint64_t, uint64_t>, uint64_t> VTableRangeAddr,
+         uint64_t Addr) {
+        // Find the first address range of which end address is larger than
+        // `Addr`. Smaller-than-or-equal-to is used because the profiled address
+        // within a vtable should be [start-address, end-address).
+        return VTableRangeAddr.first.second <= Addr;
+      });
+
+  // Returns the MD5 hash if Address is within the address range of an entry.
+  if (It != VTableAddrRangeToMD5Map.end() && It->first.first <= Address) {
+    return It->second;
+  }
+  // The virtual table address collected from value profiler could be defined
+  // in another module that is not instrumented. Force the value to be 0 in
+  // this case.
+  return 0;
+}
+
 uint64_t InstrProfSymtab::getFunctionHashFromAddress(uint64_t Address) {
   finalizeSymtab();
   auto It = partition_point(AddrToMD5Map, [=](std::pair<uint64_t, uint64_t> A) {
@@ -626,6 +691,17 @@ Error collectPGOFuncNameStrings(ArrayRef<GlobalVariable *> NameVars,
       NameStrs, compression::zlib::isAvailable() && doCompression, Result);
 }
 
+Error collectVTableStrings(ArrayRef<GlobalVariable *> VTables,
+                           std::string &Result, bool doCompression) {
+  std::vector<std::string> VTableNameStrs;
+  for (auto *VTable : VTables) {
+    VTableNameStrs.push_back(getPGOName(*VTable));
+  }
+  return collectGlobalObjectNameStrings(
+      VTableNameStrs, compression::zlib::isAvailable() && doCompression,
+      Result);
+}
+
 void InstrProfRecord::accumulateCounts(CountSumOrPercent &Sum) const {
   uint64_t FuncSum = 0;
   Sum.NumEntries += Counts.size();
@@ -888,6 +964,9 @@ uint64_t InstrProfRecord::remapValue(uint64_t Value, uint32_t ValueKind,
   if (ValueKind == IPVK_IndirectCallTarget)
     return SymTab->getFunctionHashFromAddress(Value);
 
+  if (ValueKind == IPVK_VTableTarget)
+    return SymTab->getVTableHashFromAddress(Value);
+
   return Value;
 }
 
@@ -1181,6 +1260,8 @@ void annotateValueSite(Module &M, Instruction &Inst,
                        ArrayRef<InstrProfValueData> VDs,
                        uint64_t Sum, InstrProfValueKind ValueKind,
                        uint32_t MaxMDCount) {
+  if (VDs.empty())
+    return;
   LLVMContext &Ctx = M.getContext();
   MDBuilder MDHelper(Ctx);
   SmallVector<Metadata *, 3> Vals;
@@ -1206,46 +1287,44 @@ void annotateValueSite(Module &M, Instruction &Inst,
   Inst.setMetadata(LLVMContext::MD_prof, MDNode::get(Ctx, Vals));
 }
 
-bool getValueProfDataFromInst(const Instruction &Inst,
-                              InstrProfValueKind ValueKind,
-                              uint32_t MaxNumValueData,
-                              InstrProfValueData ValueData[],
-                              uint32_t &ActualNumValueData, uint64_t &TotalC,
-                              bool GetNoICPValue) {
+MDNode *mayHaveValueProfileOfKind(const Instruction &Inst,
+                                  InstrProfValueKind ValueKind) {
   MDNode *MD = Inst.getMetadata(LLVMContext::MD_prof);
   if (!MD)
-    return false;
+    return nullptr;
 
-  unsigned NOps = MD->getNumOperands();
+  if (MD->getNumOperands() < 5)
+    return nullptr;
 
-  if (NOps < 5)
-    return false;
-
-  // Operand 0 is a string tag "VP":
   MDString *Tag = cast<MDString>(MD->getOperand(0));
-  if (!Tag)
-    return false;
-
-  if (!Tag->getString().equals("VP"))
-    return false;
+  if (!Tag || !Tag->getString().equals("VP"))
+    return nullptr;
 
   // Now check kind:
   ConstantInt *KindInt = mdconst::dyn_extract<ConstantInt>(MD->getOperand(1));
   if (!KindInt)
-    return false;
+    return nullptr;
   if (KindInt->getZExtValue() != ValueKind)
-    return false;
+    return nullptr;
 
+  return MD;
+}
+
+static bool getValueProfDataFromInstImpl(const MDNode *const MD,
+                                         const uint32_t MaxNumDataWant,
+                                         InstrProfValueData ValueData[],
+                                         uint32_t &ActualNumValueData,
+                                         uint64_t &TotalC, bool GetNoICPValue) {
+  const unsigned NOps = MD->getNumOperands();
   // Get total count
   ConstantInt *TotalCInt = mdconst::dyn_extract<ConstantInt>(MD->getOperand(2));
   if (!TotalCInt)
     return false;
   TotalC = TotalCInt->getZExtValue();
-
   ActualNumValueData = 0;
 
   for (unsigned I = 3; I < NOps; I += 2) {
-    if (ActualNumValueData >= MaxNumValueData)
+    if (ActualNumValueData >= MaxNumDataWant)
       break;
     ConstantInt *Value = mdconst::dyn_extract<ConstantInt>(MD->getOperand(I));
     ConstantInt *Count =
@@ -1262,6 +1341,36 @@ bool getValueProfDataFromInst(const Instruction &Inst,
   return true;
 }
 
+std::unique_ptr<InstrProfValueData[]>
+getValueProfDataFromInst(const Instruction &Inst, InstrProfValueKind ValueKind,
+                         uint32_t MaxNumValueData, uint32_t &ActualNumValueData,
+                         uint64_t &TotalC, bool GetNoICPValue) {
+  MDNode *MD = mayHaveValueProfileOfKind(Inst, ValueKind);
+  if (!MD)
+    return nullptr;
+  auto ValueDataArray = std::make_unique<InstrProfValueData[]>(MaxNumValueData);
+  if (!getValueProfDataFromInstImpl(MD, MaxNumValueData, ValueDataArray.get(),
+                                    ActualNumValueData, TotalC, GetNoICPValue))
+    return nullptr;
+  return ValueDataArray;
+}
+
+// FIXME: Migrate existing callers to the function above that returns an
+// array.
+bool getValueProfDataFromInst(const Instruction &Inst,
+                              InstrProfValueKind ValueKind,
+                              uint32_t MaxNumValueData,
+                              InstrProfValueData ValueData[],
+                              uint32_t &ActualNumValueData, uint64_t &TotalC,
+                              bool GetNoICPValue) {
+  MDNode *MD = mayHaveValueProfileOfKind(Inst, ValueKind);
+  if (!MD)
+    return false;
+  return getValueProfDataFromInstImpl(MD, MaxNumValueData, ValueData,
+                                      ActualNumValueData, TotalC,
+                                      GetNoICPValue);
+}
+
 MDNode *getPGOFuncNameMetadata(const Function &F) {
   return F.getMetadata(getPGOFuncNameMetadataName());
 }
@@ -1278,8 +1387,8 @@ void createPGOFuncNameMetadata(Function &F, StringRef PGOFuncName) {
   F.setMetadata(getPGOFuncNameMetadataName(), N);
 }
 
-bool needsComdatForCounter(const Function &F, const Module &M) {
-  if (F.hasComdat())
+bool needsComdatForCounter(const GlobalValue &GV, const Module &M) {
+  if (GV.hasComdat())
     return true;
 
   if (!Triple(M.getTargetTriple()).supportsCOMDAT())
@@ -1295,7 +1404,7 @@ bool needsComdatForCounter(const Function &F, const Module &M) {
   // available_externally functions will end up being duplicated in raw profile
   // data. This can result in distorted profile as the counts of those dups
   // will be accumulated by the profile merger.
-  GlobalValue::LinkageTypes Linkage = F.getLinkage();
+  GlobalValue::LinkageTypes Linkage = GV.getLinkage();
   if (Linkage != GlobalValue::ExternalWeakLinkage &&
       Linkage != GlobalValue::AvailableExternallyLinkage)
     return false;
@@ -1451,7 +1560,7 @@ void OverlapStats::dump(raw_fd_ostream &OS) const {
   for (unsigned I = 0; I < IPVK_Last - IPVK_First + 1; I++) {
     if (Base.ValueCounts[I] < 1.0f && Test.ValueCounts[I] < 1.0f)
       continue;
-    char ProfileKindName[20];
+    char ProfileKindName[20] = {0};
     switch (I) {
     case IPVK_IndirectCallTarget:
       strncpy(ProfileKindName, "IndirectCall", 19);
@@ -1459,6 +1568,9 @@ void OverlapStats::dump(raw_fd_ostream &OS) const {
     case IPVK_MemOPSize:
       strncpy(ProfileKindName, "MemOP", 19);
       break;
+    case IPVK_VTableTarget:
+      strncpy(ProfileKindName, "VTable", 19);
+      break;
     default:
       snprintf(ProfileKindName, 19, "VP[%d]", I);
       break;
@@ -1523,9 +1635,12 @@ Expected<Header> Header::readFromBuffer(const unsigned char *Buffer) {
     // When a new field is added in the header add a case statement here to
     // populate it.
     static_assert(
-        IndexedInstrProf::ProfVersion::CurrentVersion == Version11,
+        IndexedInstrProf::ProfVersion::CurrentVersion == Version12,
         "Please update the reading code below if a new field has been added, "
         "if not add a case statement to fall through to the latest version.");
+  case 12ull:
+    H.VTableNamesOffset = read(Buffer, offsetOf(&Header::VTableNamesOffset));
+    [[fallthrough]];
   case 11ull:
     [[fallthrough]];
   case 10ull:
@@ -1551,10 +1666,13 @@ size_t Header::size() const {
     // When a new field is added to the header add a case statement here to
     // compute the size as offset of the new field + size of the new field. This
     // relies on the field being added to the end of the list.
-    static_assert(IndexedInstrProf::ProfVersion::CurrentVersion == Version11,
+    static_assert(IndexedInstrProf::ProfVersion::CurrentVersion == Version12,
                   "Please update the size computation below if a new field has "
                   "been added to the header, if not add a case statement to "
                   "fall through to the latest version.");
+  case 12ull:
+    return offsetOf(&Header::VTableNamesOffset) +
+           sizeof(Header::VTableNamesOffset);
   case 11ull:
     [[fallthrough]];
   case 10ull:
diff --git a/llvm/lib/ProfileData/InstrProfReader.cpp b/llvm/lib/ProfileData/InstrProfReader.cpp
index 0d8d43daae960b..4ef6823381749e 100644
--- a/llvm/lib/ProfileData/InstrProfReader.cpp
+++ b/llvm/lib/ProfileData/InstrProfReader.cpp
@@ -366,6 +366,14 @@ TextInstrProfReader::readValueProfileData(InstrProfRecord &Record) {
               return E;
             Value = IndexedInstrProf::ComputeHash(VD.first);
           }
+        } else if (ValueKind == IPVK_VTableTarget) {
+          if (InstrProfSymtab::isExternalSymbol(VD.first)) {
+            Value = 0;
+          } else {
+            if (Error E = Symtab->addVTableName(VD.first))
+              return E;
+            Value = IndexedInstrProf::ComputeHash(VD.first);
+          }
         } else {
           READ_NUM(VD.first, Value);
         }
@@ -533,7 +541,8 @@ Error RawInstrProfReader<IntPtrT>::readNextHeader(const char *CurrentPos) {
 
 template <class IntPtrT>
 Error RawInstrProfReader<IntPtrT>::createSymtab(InstrProfSymtab &Symtab) {
-  if (Error E = Symtab.create(StringRef(NamesStart, NamesEnd - NamesStart)))
+  if (Error E = Symtab.create(StringRef(NamesStart, NamesEnd - NamesStart),
+                              StringRef(VNamesStart, VNamesEnd - VNamesStart)))
     return error(std::move(E));
   for (const RawInstrProf::ProfileData<IntPtrT> *I = Data; I != DataEnd; ++I) {
     const IntPtrT FPtr = swap(I->FunctionPointer);
@@ -541,6 +550,21 @@ Error RawInstrProfReader<IntPtrT>::createSymtab(InstrProfSymtab &Symtab) {
       continue;
     Symtab.mapAddress(FPtr, swap(I->NameRef));
   }
+
+  if (VTableBegin != nullptr && VTableEnd != nullptr) {
+    for (const RawInstrProf::VTableProfileData<IntPtrT> *I = VTableBegin;
+         I != VTableEnd; ++I) {
+      const IntPtrT VPtr = swap(I->VTablePointer);
+      if (!VPtr)
+        continue;
+      // Map both begin and end address to the name hash, since the instrumented
+      // address could be somewhere in the middle.
+      // VPtr is of type uint32_t or uint64_t so 'VPtr + I->VTableSize' marks
+      // the end of vtable address.
+      Symtab.mapVTableAddress(VPtr, VPtr + swap(I->VTableSize),
+                              swap(I->VTableNameHash));
+    }
+  }
   return success();
 }
 
@@ -582,10 +606,17 @@ Error RawInstrProfReader<IntPtrT>::readHeader(
   auto NumBitmapBytes = swap(Header.NumBitmapBytes);
   auto PaddingBytesAfterBitmapBytes = swap(Header.PaddingBytesAfterBitmapBytes);
   auto NamesSize = swap(Header.NamesSize);
+  auto VTableNameSize = swap(Header.VNamesSize);
+  auto NumVTables = swap(Header.NumVTables);
   ValueKindLast = swap(Header.ValueKindLast);
 
   auto DataSize = NumData * sizeof(RawInstrProf::ProfileData<IntPtrT>);
-  auto PaddingSize = getNumPaddingBytes(NamesSize);
+  auto PaddingBytesAfterNames = getNumPaddingBytes(NamesSize);
+  auto PaddingBytesAfterVTableNames = getNumPaddingBytes(VTableNameSize);
+
+  auto VTableSectionSize =
+      NumVTables * sizeof(RawInstrProf::VTableProfileData<IntPtrT>);
+  auto PaddingBytesAfterVTableProfData = getNumPaddingBytes(VTableSectionSize);
 
   // Profile data starts after profile header and binary ids if exist.
   ptrdiff_t DataOffset = sizeof(RawInstrProf::Header) + BinaryIdSize;
@@ -594,7 +625,12 @@ Error RawInstrProfReader<IntPtrT>::readHeader(
       CountersOffset + CountersSize + PaddingBytesAfterCounters;
   ptrdiff_t NamesOffset =
       BitmapOffset + NumBitmapBytes + PaddingBytesAfterBitmapBytes;
-  ptrdiff_t ValueDataOffset = NamesOffset + NamesSize + PaddingSize;
+  ptrdiff_t VTableProfDataOffset =
+      NamesOffset + NamesSize + PaddingBytesAfterNames;
+  ptrdiff_t VTableNameOffset = VTableProfDataOffset + VTableSectionSize +
+                               PaddingBytesAfterVTableProfData;
+  ptrdiff_t ValueDataOffset =
+      VTableNameOffset + VTableNameSize + PaddingBytesAfterVTableNames;
 
   auto *Start = reinterpret_cast<const char *>(&Header);
   if (Start + ValueDataOffset > DataBuffer->getBufferEnd())
@@ -614,8 +650,14 @@ Error RawInstrProfReader<IntPtrT>::readHeader(
     Data = reinterpret_cast<const RawInstrProf::ProfileData<IntPtrT> *>(
         Start + DataOffset);
     DataEnd = Data + NumData;
+    VTableBegin =
+        reinterpret_cast<const RawInstrProf::VTableProfileData<IntPtrT> *>(
+            Start + VTableProfDataOffset);
+    VTableEnd = VTableBegin + NumVTables;
     NamesStart = Start + NamesOffset;
     NamesEnd = NamesStart + NamesSize;
+    VNamesStart = Start + VTableNameOffset;
+    VNamesEnd = VNamesStart + VTableNameSize;
   }
 
   CountersStart = Start + CountersOffset;
@@ -1260,6 +1302,19 @@ Error IndexedInstrProfReader::readHeader() {
                                         "corrupted binary ids");
   }
 
+  if (GET_VERSION(Header->formatVersion()) >= 12) {
+    uint64_t VTableNamesOffset =
+        endian::byte_swap<uint64_t, llvm::endianness::little>(
+            Header->VTableNamesOffset);
+    const unsigned char *Ptr = Start + VTableNamesOffset;
+
+    CompressedVTableNamesLen =
+        support::endian::readNext<uint64_t, llvm::endianness::little,
+                                  unaligned>(Ptr);
+
+    VTableNamePtr = (const char *)Ptr;
+  }
+
   if (GET_VERSION(Header->formatVersion()) >= 10 &&
       Header->formatVersion() & VARIANT_MASK_TEMPORAL_PROF) {
     uint64_t TemporalProfTracesOffset =
@@ -1319,7 +1374,16 @@ InstrProfSymtab &IndexedInstrProfReader::getSymtab() {
   if (Symtab)
     return *Symtab;
 
-  std::unique_ptr<InstrProfSymtab> NewSymtab = std::make_unique<InstrProfSymtab>();
+  std::unique_ptr<InstrProfSymtab> NewSymtab =
+      std::make_unique<InstrProfSymtab>();
+
+  if (Error E = NewSymtab->initVTableNamesFromCompressedStrings(
+          StringRef(VTableNamePtr, CompressedVTableNamesLen))) {
+    auto [ErrCode, Msg] = InstrProfError::take(std::move(E));
+    consumeError(error(ErrCode, Msg));
+  }
+
+  // finalizeSymtab is called inside populateSymtab.
   if (Error E = Index->populateSymtab(*NewSymtab)) {
     auto [ErrCode, Msg] = InstrProfError::take(std::move(E));
     consumeError(error(ErrCode, Msg));
diff --git a/llvm/lib/ProfileData/InstrProfWriter.cpp b/llvm/lib/ProfileData/InstrProfWriter.cpp
index d65f8fe50313dc..7592c0ffd3272b 100644
--- a/llvm/lib/ProfileData/InstrProfWriter.cpp
+++ b/llvm/lib/ProfileData/InstrProfWriter.cpp
@@ -19,6 +19,7 @@
 #include "llvm/ProfileData/InstrProf.h"
 #include "llvm/ProfileData/MemProf.h"
 #include "llvm/ProfileData/ProfileCommon.h"
+#include "llvm/Support/Compression.h"
 #include "llvm/Support/Endian.h"
 #include "llvm/Support/EndianStream.h"
 #include "llvm/Support/Error.h"
@@ -455,12 +456,13 @@ Error InstrProfWriter::writeImpl(ProfOStream &OS) {
   Header.MemProfOffset = 0;
   Header.BinaryIdOffset = 0;
   Header.TemporalProfTracesOffset = 0;
+  Header.VTableNamesOffset = 0;
   int N = sizeof(IndexedInstrProf::Header) / sizeof(uint64_t);
 
   // Only write out all the fields except 'HashOffset', 'MemProfOffset',
-  // 'BinaryIdOffset' and `TemporalProfTracesOffset`. We need to remember the
-  // offset of these fields to allow back patching later.
-  for (int I = 0; I < N - 4; I++)
+  // 'BinaryIdOffset', `TemporalProfTracesOffset` and `VTableNamesOffset`. We
+  // need to remember the offset of these fields to allow back patching later.
+  for (int I = 0; I < N - 5; I++)
     OS.write(reinterpret_cast<uint64_t *>(&Header)[I]);
 
   // Save the location of Header.HashOffset field in \c OS.
@@ -484,6 +486,9 @@ Error InstrProfWriter::writeImpl(ProfOStream &OS) {
   uint64_t TemporalProfTracesOffset = OS.tell();
   OS.write(0);
 
+  uint64_t VTableNamesOffset = OS.tell();
+  OS.write(0);
+
   // Reserve space to write profile summary data.
   uint32_t NumEntries = ProfileSummaryBuilder::DefaultCutoffs.size();
   uint32_t SummarySize = Summary::getSize(Summary::NumKinds, NumEntries);
@@ -604,6 +609,43 @@ Error InstrProfWriter::writeImpl(ProfOStream &OS) {
       OS.writeByte(0);
   }
 
+  // if version >= the version with vtable profile metadata.
+  uint64_t VTableNamesSectionStart = 0;
+  if (IndexedInstrProf::ProfVersion::CurrentVersion >= 12) {
+    VTableNamesSectionStart = OS.tell();
+
+    std::string CompressedVTableNames;
+
+    std::vector<std::string> VTableNameStrs;
+    for (const auto &VTableName : VTableNames.keys()) {
+      VTableNameStrs.push_back(VTableName.str());
+    }
+
+    if (!VTableNameStrs.empty()) {
+      if (Error E = collectGlobalObjectNameStrings(
+              VTableNameStrs, compression::zlib::isAvailable(),
+              CompressedVTableNames))
+        return E;
+    }
+
+    uint64_t CompressedStringLen = CompressedVTableNames.length();
+
+    // Record the length of compressed string.
+    OS.write(CompressedStringLen);
+
+    // Write the chars in compressed strings.
+    for (auto &c : CompressedVTableNames)
+      OS.writeByte(static_cast<uint8_t>(c));
+
+    // Pad up to a multiple of 8.
+    // InstrProfReader could read bytes according to 'CompressedStringLen'.
+    uint64_t PaddedLength = alignTo(CompressedStringLen, 8);
+
+    for (uint64_t K = CompressedStringLen; K < PaddedLength; K++) {
+      OS.writeByte(0);
+    }
+  }
+
   uint64_t TemporalProfTracesSectionStart = 0;
   if (static_cast<bool>(ProfileKind & InstrProfKind::TemporalProfile)) {
     TemporalProfTracesSectionStart = OS.tell();
@@ -647,6 +689,7 @@ Error InstrProfWriter::writeImpl(ProfOStream &OS) {
       // Patch the Header.TemporalProfTracesOffset (=0 for profiles without
       // traces).
       {TemporalProfTracesOffset, &TemporalProfTracesSectionStart, 1},
+      {VTableNamesOffset, &VTableNamesSectionStart, 1},
       // Patch the summary data.
       {SummaryOffset, reinterpret_cast<uint64_t *>(TheSummary.get()),
        (int)(SummarySize / sizeof(uint64_t))},
@@ -699,7 +742,8 @@ Error InstrProfWriter::validateRecord(const InstrProfRecord &Func) {
       std::unique_ptr<InstrProfValueData[]> VD = Func.getValueForSite(VK, S);
       DenseSet<uint64_t> SeenValues;
       for (uint32_t I = 0; I < ND; I++)
-        if ((VK != IPVK_IndirectCallTarget) && !SeenValues.insert(VD[I].Value).second)
+        if ((VK != IPVK_IndirectCallTarget && VK != IPVK_VTableTarget) &&
+            !SeenValues.insert(VD[I].Value).second)
           return make_error<InstrProfError>(instrprof_error::invalid_prof);
     }
   }
@@ -747,7 +791,7 @@ void InstrProfWriter::writeRecordInText(StringRef Name, uint64_t Hash,
       OS << ND << "\n";
       std::unique_ptr<InstrProfValueData[]> VD = Func.getValueForSite(VK, S);
       for (uint32_t I = 0; I < ND; I++) {
-        if (VK == IPVK_IndirectCallTarget)
+        if (VK == IPVK_IndirectCallTarget || VK == IPVK_VTableTarget)
           OS << Symtab.getFuncOrVarNameIfDefined(VD[I].Value) << ":"
              << VD[I].Count << "\n";
         else
@@ -786,6 +830,11 @@ Error InstrProfWriter::writeText(raw_fd_ostream &OS) {
     }
   }
 
+  for (const auto &VTableName : VTableNames) {
+    if (Error E = Symtab.addVTableName(VTableName.getKey()))
+      return E;
+  }
+
   if (static_cast<bool>(ProfileKind & InstrProfKind::TemporalProfile))
     writeTextTemporalProfTraceData(OS, Symtab);
 
diff --git a/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp b/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp
index 7344fea1751719..6a44a32bb34dc9 100644
--- a/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp
+++ b/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp
@@ -136,11 +136,13 @@ class IndirectCallPromoter {
       const CallBase &CB, const ArrayRef<InstrProfValueData> &ValueDataRef,
       uint64_t TotalCount, uint32_t NumCandidates);
 
-  // Promote a list of targets for one indirect-call callsite. Return
-  // the number of promotions.
-  uint32_t tryToPromote(CallBase &CB,
-                        const std::vector<PromotionCandidate> &Candidates,
-                        uint64_t &TotalCount);
+  // Promote a list of targets for one indirect-call callsite by comparing
+  // indirect callee with functions. Returns true if there are IR
+  // transformations and false otherwise.
+  bool tryToPromoteWithFuncCmp(
+      CallBase &CB, const std::vector<PromotionCandidate> &Candidates,
+      uint64_t TotalCount, ArrayRef<InstrProfValueData> ICallProfDataRef,
+      uint32_t NumCandidates);
 
 public:
   IndirectCallPromoter(Function &Func, InstrProfSymtab *Symtab, bool SamplePGO,
@@ -273,9 +275,10 @@ CallBase &llvm::pgo::promoteIndirectCall(CallBase &CB, Function *DirectCallee,
 }
 
 // Promote indirect-call to conditional direct-call for one callsite.
-uint32_t IndirectCallPromoter::tryToPromote(
+bool IndirectCallPromoter::tryToPromoteWithFuncCmp(
     CallBase &CB, const std::vector<PromotionCandidate> &Candidates,
-    uint64_t &TotalCount) {
+    uint64_t TotalCount, ArrayRef<InstrProfValueData> ICallProfDataRef,
+    uint32_t NumCandidates) {
   uint32_t NumPromoted = 0;
 
   for (const auto &C : Candidates) {
@@ -287,7 +290,18 @@ uint32_t IndirectCallPromoter::tryToPromote(
     NumOfPGOICallPromotion++;
     NumPromoted++;
   }
-  return NumPromoted;
+
+  const bool Changed = (NumPromoted != 0);
+
+  if (Changed) {
+    CB.setMetadata(LLVMContext::MD_prof, nullptr);
+
+    if (TotalCount != 0)
+      annotateValueSite(*F.getParent(), CB, ICallProfDataRef.slice(NumPromoted),
+                        TotalCount, IPVK_IndirectCallTarget, NumCandidates);
+  }
+
+  return Changed;
 }
 
 // Traverse all the indirect-call callsite and get the value profile
@@ -305,19 +319,8 @@ bool IndirectCallPromoter::processFunction(ProfileSummaryInfo *PSI) {
       continue;
     auto PromotionCandidates = getPromotionCandidatesForCallSite(
         *CB, ICallProfDataRef, TotalCount, NumCandidates);
-    uint32_t NumPromoted = tryToPromote(*CB, PromotionCandidates, TotalCount);
-    if (NumPromoted == 0)
-      continue;
-
-    Changed = true;
-    // Adjust the MD.prof metadata. First delete the old one.
-    CB->setMetadata(LLVMContext::MD_prof, nullptr);
-    // If all promoted, we don't need the MD.prof metadata.
-    if (TotalCount == 0 || NumPromoted == NumVals)
-      continue;
-    // Otherwise we need update with the un-promoted records back.
-    annotateValueSite(*F.getParent(), *CB, ICallProfDataRef.slice(NumPromoted),
-                      TotalCount, IPVK_IndirectCallTarget, NumCandidates);
+    Changed |= tryToPromoteWithFuncCmp(*CB, PromotionCandidates, TotalCount,
+                                       ICallProfDataRef, NumCandidates);
   }
   return Changed;
 }
diff --git a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
index a19b1408725441..49978dac034e82 100644
--- a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
+++ b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
@@ -64,6 +64,9 @@ using namespace llvm;
 #define DEBUG_TYPE "instrprof"
 
 namespace llvm {
+// Command line option to enable vtable value profiling. Defined in
+// ProfileData/InstrProf.cpp: -enable-vtable-value-profiling=
+extern cl::opt<bool> EnableVTableValueProfiling;
 // TODO: Remove -debug-info-correlate in next LLVM release, in favor of
 // -profile-correlate=debug-info.
 cl::opt<bool> DebugInfoCorrelate(
@@ -196,12 +199,18 @@ class InstrLowerer final {
     PerFunctionProfileData() = default;
   };
   DenseMap<GlobalVariable *, PerFunctionProfileData> ProfileDataMap;
+  // Key is virtual table variable, value is 'VTableProfData' in the form of
+  // GlobalVariable.
+  DenseMap<GlobalVariable *, GlobalVariable *> VTableDataMap;
   /// If runtime relocation is enabled, this maps functions to the load
   /// instruction that produces the profile relocation bias.
   DenseMap<const Function *, LoadInst *> FunctionToProfileBiasMap;
   std::vector<GlobalValue *> CompilerUsedVars;
   std::vector<GlobalValue *> UsedVars;
   std::vector<GlobalVariable *> ReferencedNames;
+  // The list of virtual table variables of which the VTableProfData is
+  // collected.
+  std::vector<GlobalVariable *> ReferencedVTables;
   GlobalVariable *NamesVar = nullptr;
   size_t NamesSize = 0;
 
@@ -294,9 +303,15 @@ class InstrLowerer final {
   /// Create INSTR_PROF_DATA variable for counters and bitmaps.
   void createDataVariable(InstrProfCntrInstBase *Inc);
 
+  /// Get the counters for virtual table values, creating them if necessary.
+  void getOrCreateVTableProfData(GlobalVariable *GV);
+
   /// Emit the section with compressed function names.
   void emitNameData();
 
+  /// Emit the section with compressed vtable names.
+  void emitVTableNames();
+
   /// Emit value nodes section for value profiling.
   void emitVNodes();
 
@@ -740,6 +755,15 @@ bool InstrLowerer::lower() {
     }
   }
 
+  if (EnableVTableValueProfiling) {
+    for (GlobalVariable &GV : M.globals()) {
+      // Global variables with type metadata are virtual table variables.
+      if (GV.hasMetadata(LLVMContext::MD_type)) {
+        getOrCreateVTableProfData(&GV);
+      }
+    }
+  }
+
   for (Function &F : M)
     MadeChange |= lowerIntrinsics(&F);
 
@@ -753,6 +777,7 @@ bool InstrLowerer::lower() {
 
   emitVNodes();
   emitNameData();
+  emitVTableNames();
 
   // Emit runtime hook for the cases where the target does not unconditionally
   // require pulling in profile runtime, and coverage is enabled on code that is
@@ -1220,6 +1245,129 @@ void InstrLowerer::maybeSetComdat(GlobalVariable *GV, Function *Fn,
     GV->setLinkage(GlobalValue::InternalLinkage);
 }
 
+static inline bool shouldRecordVTableAddr(GlobalVariable *GV) {
+  if (!profDataReferencedByCode(*GV->getParent()))
+    return false;
+
+  if (!GV->hasLinkOnceLinkage() && !GV->hasLocalLinkage() &&
+      !GV->hasAvailableExternallyLinkage())
+    return true;
+
+  // This avoids the profile data from referencing internal symbols in
+  // COMDAT.
+  if (GV->hasLocalLinkage() && GV->hasComdat())
+    return false;
+
+  return true;
+}
+
+// FIXME: Does symbolic relocation from 'getFuncAddrForProfData' matter here?
+static inline Constant *getVTableAddrForProfData(GlobalVariable *GV) {
+  auto *Int8PtrTy = PointerType::getUnqual(GV->getContext());
+
+  // Store a nullptr in __profvt_ if a real address shouldn't be used.
+  if (!shouldRecordVTableAddr(GV))
+    return ConstantPointerNull::get(Int8PtrTy);
+
+  return ConstantExpr::getBitCast(GV, Int8PtrTy);
+}
+
+void InstrLowerer::getOrCreateVTableProfData(GlobalVariable *GV) {
+  assert(!DebugInfoCorrelate &&
+         "Value profiling is not supported with lightweight instrumentation");
+  if (GV->isDeclaration() || GV->hasAvailableExternallyLinkage())
+    return;
+
+  if (GV->getName().starts_with("llvm.") ||
+      GV->getName().starts_with("__llvm") ||
+      GV->getName().starts_with("__prof"))
+    return;
+
+  // VTableProfData already created
+  auto It = VTableDataMap.find(GV);
+  if (It != VTableDataMap.end() && It->second)
+    return;
+
+  GlobalValue::LinkageTypes Linkage = GV->getLinkage();
+  GlobalValue::VisibilityTypes Visibility = GV->getVisibility();
+
+  // This is to keep consistent with per-function profile data
+  // for correctness.
+  if (TT.isOSBinFormatXCOFF()) {
+    Linkage = GlobalValue::InternalLinkage;
+    Visibility = GlobalValue::DefaultVisibility;
+  }
+
+  LLVMContext &Ctx = M.getContext();
+  Type *DataTypes[] = {
+#define INSTR_PROF_VTABLE_DATA(Type, LLVMType, Name, Init) LLVMType,
+#include "llvm/ProfileData/InstrProfData.inc"
+  };
+
+  auto *DataTy = StructType::get(Ctx, ArrayRef(DataTypes));
+
+  // Used by INSTR_PROF_VTABLE_DATA MACRO
+  Constant *VTableAddr = getVTableAddrForProfData(GV);
+  const std::string PGOVTableName = getPGOName(*GV);
+  // Record the length of the vtable. This is needed since vtable pointers
+  // loaded from C++ objects might be from the middle of a vtable definition.
+  uint32_t VTableSizeVal =
+      M.getDataLayout().getTypeAllocSize(GV->getValueType());
+
+  Constant *DataVals[] = {
+#define INSTR_PROF_VTABLE_DATA(Type, LLVMType, Name, Init) Init,
+#include "llvm/ProfileData/InstrProfData.inc"
+  };
+
+  std::string VarName = getInstrProfVTableVarPrefix().str() + PGOVTableName;
+  auto *Data =
+      new GlobalVariable(M, DataTy, false /* constant */, Linkage,
+                         ConstantStruct::get(DataTy, DataVals), VarName);
+
+  Data->setVisibility(Visibility);
+  Data->setSection(getInstrProfSectionName(IPSK_vtab, TT.getObjectFormat()));
+  Data->setAlignment(Align(8));
+
+  const bool NeedComdat = needsComdatForCounter(*GV, M);
+
+  // GV is the data structure to record vtable information.
+  // Place the global variable for per-vtable profile data in a comdat group
+  // if the associated vtable definition is a COMDAT. This makes sure only one
+  // copy of the variable for the vtable will be emitted after linking.
+  auto MaybeSetComdat = [&](GlobalVariable *GV, StringRef GroupName) {
+    bool UseComdat = (NeedComdat || TT.isOSBinFormatELF());
+    if (UseComdat) {
+      // Create a new comdat group using the name of the global variable as
+      // opposed to using the comdat group of the vtable.
+      Comdat *C = M.getOrInsertComdat(GroupName);
+      // For ELF, when not using COMDAT, put the vtable profile data into a
+      // nodeduplicate COMDAT which is lowered to a zero-flag zero group.
+      // This allows -z -start-stop-gc to discard the entire group when the
+      // vtable def is discarded.
+      if (!NeedComdat)
+        C->setSelectionKind(Comdat::NoDeduplicate);
+      GV->setComdat(C);
+      // COFF doesn't allow the comdat group leader to have private linkage, so
+      // upgrade private linkage to internal linkage to produce a symbol table
+      // entry.
+      if (TT.isOSBinFormatCOFF() && GV->hasPrivateLinkage()) {
+        GV->setLinkage(GlobalValue::InternalLinkage);
+      }
+      return;
+    }
+  };
+
+  MaybeSetComdat(Data, Data->getName());
+
+  VTableDataMap[GV] = Data;
+
+  ReferencedVTables.push_back(GV);
+
+  // VTable <Hash, Addr> is used by runtime but not referenced by other
+  // sections. Conservatively mark it linker retained.
+  UsedVars.push_back(Data);
+}
+
 GlobalVariable *InstrLowerer::setupProfileSection(InstrProfInstBase *Inc,
                                                   InstrProfSectKind IPSK) {
   GlobalVariable *NamePtr = Inc->getName();
@@ -1633,6 +1781,31 @@ void InstrLowerer::emitNameData() {
     NamePtr->eraseFromParent();
 }
 
+void InstrLowerer::emitVTableNames() {
+  if (!EnableVTableValueProfiling || ReferencedVTables.empty())
+    return;
+
+  // Collect the PGO names of referenced vtables and compress them.
+  std::string CompressedVTableNames;
+  if (Error E = collectVTableStrings(ReferencedVTables, CompressedVTableNames,
+                                     DoInstrProfNameCompression)) {
+    report_fatal_error(Twine(toString(std::move(E))), false);
+  }
+
+  auto &Ctx = M.getContext();
+  auto *VTableNamesVal = ConstantDataArray::getString(
+      Ctx, StringRef(CompressedVTableNames), false /* AddNull */);
+  GlobalVariable *VTableNamesVar =
+      new GlobalVariable(M, VTableNamesVal->getType(), true /* constant */,
+                         GlobalValue::PrivateLinkage, VTableNamesVal,
+                         getInstrProfVTableNamesVarName());
+  VTableNamesVar->setSection(
+      getInstrProfSectionName(IPSK_vname, TT.getObjectFormat()));
+  VTableNamesVar->setAlignment(Align(1));
+  // Make VTableNames linker retained.
+  UsedVars.push_back(VTableNamesVar);
+}
+
 void InstrLowerer::emitRegistration() {
   if (!needsRuntimeRegistrationOfSectionRange(TT))
     return;
diff --git a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
index c20fc942eaf0d5..f1aa17de429338 100644
--- a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
+++ b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
@@ -327,6 +327,11 @@ extern cl::opt<PGOViewCountsType> PGOViewCounts;
 // Defined in Analysis/BlockFrequencyInfo.cpp:  -view-bfi-func-name=
 extern cl::opt<std::string> ViewBlockFreqFuncName;
 
+extern cl::opt<bool> DebugInfoCorrelate;
+
+// Command line option to enable vtable value profiling. Defined in
+// ProfileData/InstrProf.cpp: -enable-vtable-value-profiling=
+extern cl::opt<bool> EnableVTableValueProfiling;
 extern cl::opt<InstrProfCorrelator::ProfCorrelatorKind> ProfileCorrelate;
 } // namespace llvm
 
@@ -581,6 +586,8 @@ template <class Edge, class BBInfo> class FuncPGOInstrumentation {
       NumOfPGOMemIntrinsics += ValueSites[IPVK_MemOPSize].size();
       NumOfPGOBB += MST.bbInfoSize();
       ValueSites[IPVK_IndirectCallTarget] = VPC.get(IPVK_IndirectCallTarget);
+      if (EnableVTableValueProfiling)
+        ValueSites[IPVK_VTableTarget] = VPC.get(IPVK_VTableTarget);
     } else {
       NumOfCSPGOSelectInsts += SIVisitor.getNumOfSelectInsts();
       NumOfCSPGOMemIntrinsics += ValueSites[IPVK_MemOPSize].size();
diff --git a/llvm/lib/Transforms/Instrumentation/ValueProfilePlugins.inc b/llvm/lib/Transforms/Instrumentation/ValueProfilePlugins.inc
index 3a129de1acd02d..96b21301ce676f 100644
--- a/llvm/lib/Transforms/Instrumentation/ValueProfilePlugins.inc
+++ b/llvm/lib/Transforms/Instrumentation/ValueProfilePlugins.inc
@@ -90,9 +90,39 @@ public:
   }
 };
 
+///------------------------ VirtualTableValueProfilingPlugin
+///------------------------
+class VTableProfilingPlugin {
+  Function &F;
+
+public:
+  static constexpr InstrProfValueKind Kind = IPVK_VTableTarget;
+
+  VTableProfilingPlugin(Function &Fn, TargetLibraryInfo &TLI) : F(Fn) {}
+
+  void run(std::vector<CandidateInfo> &Candidates) {
+    std::vector<Instruction *> Result = findVTableAddrs(F);
+    for (Instruction *I : Result) {
+      Instruction *InsertPt = I->getNextNonDebugInstruction();
+      // When finding an insertion point, keep PHI and EH pad instructions
+      // before vp intrinsics. This is similar to
+      // `BasicBlock::getFirstInsertionPt`.
+      while (InsertPt && (dyn_cast<PHINode>(InsertPt) || InsertPt->isEHPad()))
+        InsertPt = InsertPt->getNextNonDebugInstruction();
+      // Skip instrumentating the value if InsertPt is the last instruction.
+      // FIXME: Set InsertPt to the end of basic block to instrument the value
+      // if InsertPt is the last instruction.
+      if (InsertPt == nullptr)
+        continue;
+
+      Instruction *AnnotatedInst = I;
+      Candidates.emplace_back(CandidateInfo{I, InsertPt, AnnotatedInst});
+    }
+  }
+};
+
 ///----------------------- Registration of the plugins -------------------------
 /// For now, registering a plugin with the ValueProfileCollector is done by
 /// adding the plugin type to the VP_PLUGIN_LIST macro.
-#define VP_PLUGIN_LIST           \
-    MemIntrinsicPlugin,          \
-    IndirectCallPromotionPlugin
+#define VP_PLUGIN_LIST                                                         \
+  MemIntrinsicPlugin, IndirectCallPromotionPlugin, VTableProfilingPlugin
diff --git a/llvm/test/Bitcode/thinlto-func-summary-vtableref-pgo.ll b/llvm/test/Bitcode/thinlto-func-summary-vtableref-pgo.ll
new file mode 100644
index 00000000000000..ba3ce9a75ee832
--- /dev/null
+++ b/llvm/test/Bitcode/thinlto-func-summary-vtableref-pgo.ll
@@ -0,0 +1,74 @@
+; Promote at most one function and annotate at most one vtable.
+; As a result, only one value (of each relevant kind) shows up in the function
+; summary.
+
+; RUN: opt -module-summary -icp-max-num-vtables=1 -icp-max-prom=1 %s -o %t.o
+
+; RUN: llvm-bcanalyzer -dump %t.o | FileCheck %s
+
+; RUN: llvm-dis -o - %t.o | FileCheck %s --check-prefix=DIS
+; Round trip it through llvm-as
+; RUN: llvm-dis -o - %t.o | llvm-as -o - | llvm-dis -o - | FileCheck %s --check-prefix=DIS
+
+; CHECK: <GLOBALVAL_SUMMARY_BLOCK
+; CHECK-NEXT:   <VERSION op0=9/>
+; CHECK-NEXT:   <FLAGS op0=0/>
+; The `VALUE_GUID` below represents the "_ZTV4Base" referenced by the instruction
+; that loads vtable pointers.
+; CHECK-NEXT: <VALUE_GUID op0=21 op1=1960855528937986108/>
+; The `VALUE_GUID` below represents the "_ZN4Base4funcEv" referenced by the
+; indirect call instruction.
+; CHECK-NEXT:      <VALUE_GUID op0=20 op1=5459407273543877811/>
+; NOTE vtables and functions from Derived class is dropped because
+; `-icp-max-num-vtables` and `-icp-max-prom` are both set to one.
+; <PERMODULE_PROFILE> has the format [valueid, flags, instcount, funcflags,
+;                                     numrefs, rorefcnt, worefcnt,
+;                                     m x valueid,
+;                                     n x (valueid, hotness+tailcall)]
+; CHECK-NEXT:   <PERMODULE_PROFILE abbrevid=4 op0=0 op1=0 op2=4 op3=256 op4=1 op5=1 op6=0 op7=21 op8=20 op9=3/>
+; CHECK-NEXT:  </GLOBALVAL_SUMMARY_BLOCK>
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Function has one BB and an entry count of 150, so the BB is hot according to
+; ProfileSummary and reflected so in the bitcode (see llvm-dis output).
+define i32 @_Z4testP4Base(ptr %0) !prof !15 {
+  %2 = load ptr, ptr %0, !prof !16
+  %3 = load ptr, ptr %2
+  %4 = tail call i32 %3(ptr %0), !prof !17
+  ret i32 %4
+}
+
+!llvm.module.flags = !{!1}
+
+!1 = !{i32 1, !"ProfileSummary", !2}
+!2 = !{!3, !4, !5, !6, !7, !8, !9, !10}
+!3 = !{!"ProfileFormat", !"InstrProf"}
+!4 = !{!"TotalCount", i64 10000}
+!5 = !{!"MaxCount", i64 200}
+!6 = !{!"MaxInternalCount", i64 200}
+!7 = !{!"MaxFunctionCount", i64 200}
+!8 = !{!"NumCounts", i64 3}
+!9 = !{!"NumFunctions", i64 3}
+!10 = !{!"DetailedSummary", !11}
+!11 = !{!12, !13, !14}
+!12 = !{i32 10000, i64 100, i32 1}
+!13 = !{i32 990000, i64 100, i32 1}
+!14 = !{i32 999999, i64 1, i32 2}
+
+!15 = !{!"function_entry_count", i32 150}
+; 1960855528937986108 is the MD5 hash of _ZTV4Base, and
+; 13870436605473471591 is the MD5 hash of _ZTV7Derived
+!16 = !{!"VP", i32 2, i64 150, i64 1960855528937986108, i64 100, i64 13870436605473471591, i64 50}
+; 5459407273543877811 is the MD5 hash of _ZN4Base4funcEv, and
+; 6174874150489409711 is the MD5 hash of  _ZN7Derived4funcEv
+!17 = !{!"VP", i32 0, i64 150, i64 5459407273543877811, i64 100, i64 6174874150489409711, i64 50}
+
+; ModuleSummaryIndex stores <guid, global-value summary> map in std::map; so
+; global value summares are printed out in the order that gv's guid increases.
+; DIS: ^0 = module: (path: "{{.*}}", hash: (0, 0, 0, 0, 0))
+; DIS: ^1 = gv: (guid: 1960855528937986108)
+; DIS: ^2 = gv: (guid: 5459407273543877811)
+; DIS: ^3 = gv: (name: "_Z4testP4Base", summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 0, canAutoHide: 0), insts: 4, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 0, returnDoesNotAlias: 0, noInline: 0, alwaysInline: 0, noUnwind: 0, mayThrow: 0, hasUnknownCall: 1, mustBeUnreachable: 0), calls: ((callee: ^2, hotness: hot)), refs: (readonly ^1)))) ; guid = 15857150948103218965
+; DIS: ^4 = blockcount: 0
diff --git a/llvm/test/Instrumentation/InstrProfiling/coverage.ll b/llvm/test/Instrumentation/InstrProfiling/coverage.ll
index bbf895ea4b34e1..08cbcaa962b765 100644
--- a/llvm/test/Instrumentation/InstrProfiling/coverage.ll
+++ b/llvm/test/Instrumentation/InstrProfiling/coverage.ll
@@ -5,12 +5,12 @@ target triple = "aarch64-unknown-linux-gnu"
 
 @__profn_foo = private constant [3 x i8] c"foo"
 ; CHECK: @__profc_foo = private global [1 x i8] c"\FF", section "__llvm_prf_cnts", comdat, align 1
-; CHECK: @__profd_foo = private global { i64, i64, i64, i64, ptr, ptr, i32, [2 x i16], i32 } { i64 {{.*}}, i64 {{.*}}, i64 sub (i64 ptrtoint (ptr @__profc_foo to i64)
-; BINARY: @__profd_foo = private global { i64, i64, i64, i64, ptr, ptr, i32, [2 x i16], i32 } { i64 {{.*}}, i64 {{.*}}, i64 ptrtoint (ptr @__profc_foo to i64),
+; CHECK: @__profd_foo = private global { i64, i64, i64, i64, ptr, ptr, i32, [3 x i16], i32 } { i64 {{.*}}, i64 {{.*}}, i64 sub (i64 ptrtoint (ptr @__profc_foo to i64)
+; BINARY: @__profd_foo = private global { i64, i64, i64, i64, ptr, ptr, i32, [3 x i16], i32 } { i64 {{.*}}, i64 {{.*}}, i64 ptrtoint (ptr @__profc_foo to i64),
 @__profn_bar = private constant [3 x i8] c"bar"
 ; CHECK: @__profc_bar = private global [1 x i8] c"\FF", section "__llvm_prf_cnts", comdat, align 1
-; CHECK: @__profd_bar = private global { i64, i64, i64, i64, ptr, ptr, i32, [2 x i16], i32 } { i64 {{.*}}, i64 {{.*}}, i64 sub (i64 ptrtoint (ptr @__profc_bar to i64)
-; BINARY: @__profd_bar = private global { i64, i64, i64, i64, ptr, ptr, i32, [2 x i16], i32 } { i64 {{.*}}, i64 {{.*}}, i64 ptrtoint (ptr @__profc_bar to i64),
+; CHECK: @__profd_bar = private global { i64, i64, i64, i64, ptr, ptr, i32, [3 x i16], i32 } { i64 {{.*}}, i64 {{.*}}, i64 sub (i64 ptrtoint (ptr @__profc_bar to i64)
+; BINARY: @__profd_bar = private global { i64, i64, i64, i64, ptr, ptr, i32, [3 x i16], i32 } { i64 {{.*}}, i64 {{.*}}, i64 ptrtoint (ptr @__profc_bar to i64),
 
 ; CHECK: @__llvm_prf_nm = {{.*}} section "__llvm_prf_names"
 ; BINARY: @__llvm_prf_nm ={{.*}} section "__llvm_covnames"
diff --git a/llvm/test/Transforms/PGOProfile/Inputs/thinlto_indirect_call_promotion.profraw b/llvm/test/Transforms/PGOProfile/Inputs/thinlto_indirect_call_promotion.profraw
index 5efda10bb98a941c04b6846db05d3691bc36aac0..5d96ba8ac220508002ae9a7cdb0beb13e0a25144 100644
GIT binary patch
delta 133
zcmbQhvVeuNu_!ISs37M**F;W##g0c6JDpbj|Gzn}&24We0|sE4n5oVhFbgO-ajG?I
s0+?~tnzsPN04lGLYj at i_S(ee5^#Dj at awy|$1+XHZ93#{)ux=zi0I(w{I{*Lx

delta 117
zcmZ3$GJ%D&u_!ISs37M*=R{6_L67IVA1SZ;|9^9yv+SKv1_s87mFlblGl86mORZTI
rz>KHXyapf!P<n@?i|n1rx{SuG4Iq)psf at D~z=}Xx86W_x8;K796T>9f

diff --git a/llvm/test/Transforms/PGOProfile/Inputs/vtable_prof.profraw b/llvm/test/Transforms/PGOProfile/Inputs/vtable_prof.profraw
new file mode 100644
index 0000000000000000000000000000000000000000..5adeb774cddd6462bd2d3779a96d9ad5a06d5e23
GIT binary patch
literal 656
zcmZoHO3N=Q$obF700xW at ih+R*#(>fsXncDp|G<9;NPf(`(<%&25s=FS6^fqKW9??N
zGj7VQAPc)yoJ=r%1$-<h`e5o|CjGkF``j#L>TQo!%iMorv-oE~?b`s=moX8dAEXai
zZ{I{cNtM}66M)L!U*_`VDuC*1;77F&?qR5f3mFb--sHb`6Q<7rs&4`TeGkj4d7Hnn
z_QUiEK=mykpl|Z0nI4u#dwXE|9{hu7+(1CzJE&)WLEZg=8Nz`12PO`qVd`OYj%~%Q
z?tUW^4?Qnm9Z%ksIv!_J&iI}=libt)X>)<6?kOE_UqcgL?X%uyC1=kZ5ixZVHa)cJ
q8pk>@hHDd5b}&Q$t%KPG4tuD3VBrh17v_JMy|8eE$;12!lLr8f`l*Eg

literal 0
HcmV?d00001

diff --git a/llvm/test/Transforms/PGOProfile/comdat_internal.ll b/llvm/test/Transforms/PGOProfile/comdat_internal.ll
index 8c6942c0f527bc..1bad0db1b47624 100644
--- a/llvm/test/Transforms/PGOProfile/comdat_internal.ll
+++ b/llvm/test/Transforms/PGOProfile/comdat_internal.ll
@@ -13,9 +13,9 @@ $foo = comdat any
 ; CHECK: @__llvm_profile_raw_version = hidden constant i64 {{[0-9]+}}, comdat
 ; CHECK-NOT: __profn__stdin__foo
 ; CHECK: @__profc__stdin__foo.[[#FOO_HASH]] = private global [1 x i64] zeroinitializer, section "__llvm_prf_cnts", comdat, align 8
-; CHECK: @__profd__stdin__foo.[[#FOO_HASH]] = private global { i64, i64, i64, i64, ptr, ptr, i32, [2 x i16], i32 } { i64 {{.*}}, i64 [[#FOO_HASH]], i64 sub (i64 ptrtoint (ptr @__profc__stdin__foo.742261418966908927 to i64), i64 ptrtoint (ptr @__profd__stdin__foo.742261418966908927 to i64)), i64 0, ptr null
+; CHECK: @__profd__stdin__foo.[[#FOO_HASH]] = private global { i64, i64, i64, i64, ptr, ptr, i32, [3 x i16], i32 } { i64 {{.*}}, i64 [[#FOO_HASH]], i64 sub (i64 ptrtoint (ptr @__profc__stdin__foo.742261418966908927 to i64), i64 ptrtoint (ptr @__profd__stdin__foo.742261418966908927 to i64)), i64 0, ptr null
 ; CHECK-NOT: @foo
-; CHECK-SAME: , ptr null, i32 1, [2 x i16] zeroinitializer, i32 0 }, section "__llvm_prf_data", comdat($__profc__stdin__foo.[[#FOO_HASH]]), align 8
+; CHECK-SAME: , ptr null, i32 1, [3 x i16] zeroinitializer, i32 0 }, section "__llvm_prf_data", comdat($__profc__stdin__foo.[[#FOO_HASH]]), align 8
 ; CHECK: @__llvm_prf_nm
 ; CHECK: @llvm.compiler.used
 
diff --git a/llvm/test/Transforms/PGOProfile/vtable_profile.ll b/llvm/test/Transforms/PGOProfile/vtable_profile.ll
new file mode 100644
index 00000000000000..edc866e4e4efb5
--- /dev/null
+++ b/llvm/test/Transforms/PGOProfile/vtable_profile.ll
@@ -0,0 +1,98 @@
+; RUN: opt < %s -passes=pgo-instr-gen -enable-vtable-value-profiling -S | FileCheck %s --check-prefix=GEN
+; RUN: opt < %s -passes=pgo-instr-gen,instrprof -enable-vtable-value-profiling -S | FileCheck %s --check-prefix=LOWER
+
+; __llvm_prf_vnm stores zlib-compressed vtable names.
+; REQUIRES: zlib
+
+source_filename = "vtable_local.ll"
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; The test IR is generated based on the following C++ program.
+; Base1 has external linkage and Base2 has local linkage.
+; class Derived uses multiple inheritance so its virtual table
+; global variable contains two vtables. func1 is loaded from
+; the vtable compatible with class Base1, and func2 is loaded
+; from the vtable compatible with class Base2.
+
+; class Base1 {
+; public:
+;   virtual int func1(int a) ;
+; };
+;
+; namespace {
+; class Base2 {
+; public:
+;   __attribute__((noinline)) virtual int func2(int a) {
+;     return a;
+;   }
+; };
+; }
+
+; class Derived : public Base1, public Base2 {
+; public:
+;   Derived(int c) : v(c) {}
+; private:
+;   int v;
+; };
+;
+; Derived* createType();
+
+; int func(int a) {
+;   Derived* d = createType();
+;   return d->func2(a) + d->func1(a);
+; }
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+ at _ZTV7Derived = constant { [3 x ptr], [3 x ptr] } { [3 x ptr] [ptr null, ptr null, ptr @_ZN5Base15func1Ei], [3 x ptr] [ptr inttoptr (i64 -8 to ptr), ptr null, ptr @_ZN12_GLOBAL__N_15Base25func2Ei] }, !type !0, !type !3, !type !6, !type !8, !type !10
+ at _ZTV5Base1 = available_externally constant { [3 x ptr] } { [3 x ptr] [ptr null, ptr null, ptr @_ZN5Base15func1Ei] }, !type !0
+ at _ZTVN12_GLOBAL__N_15Base2E = internal constant { [3 x ptr] } { [3 x ptr] [ptr null, ptr null, ptr @_ZN12_GLOBAL__N_15Base25func2Ei] }, !type !11, !type !8; !vcall_visibility !12
+ at llvm.compiler.used = appending global [1 x ptr] [ptr @_ZTV5Base1], section "llvm.metadata"
+
+; GEN: __llvm_profile_raw_version = comdat any
+; GEN: __llvm_profile_raw_version = hidden constant i64 72057594037927946, comdat
+; GEN: __profn__Z4funci = private constant [8 x i8] c"_Z4funci"
+
+; LOWER: $__profvt__ZTV7Derived = comdat nodeduplicate
+; LOWER: $"__profvt_vtable_local.ll;_ZTVN12_GLOBAL__N_15Base2E" = comdat nodeduplicate
+; LOWER: @__profvt__ZTV7Derived = global { i64, ptr, i32 } { i64 -4576307468236080025, ptr @_ZTV7Derived, i32 48 }, section "__llvm_prf_vtab", comdat, align 8
+; LOWER: @"__profvt_vtable_local.ll;_ZTVN12_GLOBAL__N_15Base2E" = internal global { i64, ptr, i32 } { i64 1419990121885302679, ptr @_ZTVN12_GLOBAL__N_15Base2E, i32 24 }, section "__llvm_prf_vtab", comdat, align 8
+; LOWER: @__llvm_prf_vnm = private constant [64 x i8] c"7>x\DA\8B\8F\0A\093wI-\CA,KMa,+IL\CAI\8D\CF\C9ON\CC\D1\CB\C9\B1\8E\07J\FA\19\1A\C5\BB\FB\F8;9\FA\C4\C7\FB\C5\1B\9A:%\16\A7\1A\B9\02\00\19:\12o", section "__llvm_prf_vtabnames", align 1
+; LOWER: @llvm.used = appending global [5 x ptr] [ptr @__profvt__ZTV7Derived, ptr @"__profvt_vtable_local.ll;_ZTVN12_GLOBAL__N_15Base2E", ptr @__llvm_prf_vnodes, ptr @__llvm_prf_nm, ptr @__llvm_prf_vnm], section "llvm.metadata"
+
+define i32 @_Z4funci(i32 %a) {
+entry:
+  %call = call ptr @_Z10createTypev()
+  %add.ptr = getelementptr inbounds i8, ptr %call, i64 8
+  %vtable = load ptr, ptr %add.ptr
+; GEN: [[P1:%[0-9]+]] = ptrtoint ptr %vtable to i64
+; GEN: call void @llvm.instrprof.value.profile(ptr @__profn__Z4funci, i64 [[CFGHash:[0-9]+]], i64 [[P1]], i32 2, i32 0)
+; LOWER: [[P1:%[0-9]+]] = ptrtoint ptr %vtable to i64
+; LOWER: call void @__llvm_profile_instrument_target(i64 [[P1]], ptr @__profd__Z4funci, i32 2)
+  %vfunc1 = load ptr, ptr %vtable
+  %call1 = call i32 %vfunc1(ptr %add.ptr, i32 %a)
+  %vtable2 = load ptr, ptr %call
+; GEN: [[P2:%[0-9]+]] = ptrtoint ptr %vtable2 to i64
+; GEN: call void @llvm.instrprof.value.profile(ptr @__profn__Z4funci, i64 [[CFGHash]], i64 [[P2]], i32 2, i32 1)
+; LOWER: [[P2:%[0-9]+]] = ptrtoint ptr %vtable2 to i64
+; LOWER: call void @__llvm_profile_instrument_target(i64 [[P2]], ptr @__profd__Z4funci, i32 3)
+  %vfunc2 = load ptr, ptr %vtable2
+  %call4 = call i32 %vfunc2(ptr %call, i32 %a)
+  %add = add nsw i32 %call1, %call4
+  ret i32 %add
+}
+
+declare ptr @_Z10createTypev()
+declare i32 @_ZN12_GLOBAL__N_15Base25func2Ei(ptr %this, i32 %a)
+declare i32 @_ZN5Base15func1Ei(ptr, i32)
+
+!0 = !{i64 16, !"_ZTS5Base1"}
+!3 = !{i64 16, !"_ZTS7Derived"}
+!6 = !{i64 40, !7}
+!7 = distinct !{}
+!8 = !{i64 16, !9}
+!9 = distinct !{}
+!10 = !{i64 40, !9}
+!11 = !{i64 16, !7}
diff --git a/llvm/test/tools/llvm-profdata/Inputs/c-general.profraw b/llvm/test/tools/llvm-profdata/Inputs/c-general.profraw
index 9cd225587c92511e99f3497ce1d5f47c6fc5f0af..a5dcc9fb22e2e125eccd0ad52a509a84e218781a 100644
GIT binary patch
delta 40
ycmV+ at 0N4NE5AY8OfpTVVa&T<_3Xus<4&W)m$E2$N|IVI0I9pYdP6HaTaBv5DToMxi

delta 39
vcmeys|A3#fu_!ISs37M*=R{6_K?|$bHJ=*(|L<GyrHQwmfq`*jWjQ+lUJ(&8

diff --git a/llvm/test/tools/llvm-profdata/Inputs/compressed.profraw b/llvm/test/tools/llvm-profdata/Inputs/compressed.profraw
index 9966729d92ddc33bf89eeb3fee87215bbabbbef1..4d36ffcf5e05b084cf0d1e04fe3933f80b0b1749 100644
GIT binary patch
delta 40
ycmV+ at 0N4Mp55NxzfpTVVa&T<_3Xus<4&eFQuj8rz|DDYvP#jj1P6HaTa6kus4H8fQ

delta 39
vcmX at Wzk#2#u_!ISs37M*=R{6_L5r?)Gq*SV|KArNnC4N>z`(e%(w!XuMI#TR

diff --git a/llvm/test/tools/llvm-profdata/Inputs/update_vtable_value_prof_inputs.sh b/llvm/test/tools/llvm-profdata/Inputs/update_vtable_value_prof_inputs.sh
new file mode 100755
index 00000000000000..89c3e642ac7ef7
--- /dev/null
+++ b/llvm/test/tools/llvm-profdata/Inputs/update_vtable_value_prof_inputs.sh
@@ -0,0 +1,102 @@
+#!/bin/bash
+
+if [ $# -lt 1 ]; then
+  echo "Path to clang++ required!"
+  echo "Usage: update_vtable_value_prof_inputs.sh /path/to/updated/clang++"
+  exit 1
+else
+  CLANG=$1
+fi
+
+
+# Remember current directory.
+CURDIR=$PWD
+
+# Allows the script to be invoked from other directories.
+OUTDIR=$(dirname $(realpath -s $0))
+echo $OUTDIR
+
+cd $OUTDIR
+
+# vtable_prof.cc has the following class hierarchy:
+# class Base
+# ├── class Derived1
+# └── class Derived2
+# Derived1 is a class in the global namespace and Derived2 is in anonymous
+# namespace for test coverage. Overridden virtual methods are annotated as
+# `noinline` so the callsite remains indirect calls for testing purposes.
+cat > vtable_prof.cc << EOF
+#include <cstdlib>
+#include <cstdio>
+
+class Base {
+ public:
+  virtual int func1(int a, int b) = 0;
+  virtual int func2(int a, int b) = 0;
+};
+
+class Derived1 : public Base {
+    public:
+    __attribute__((noinline))
+    int func1(int a, int b) override
+    {
+        return a + b;
+    }
+
+    __attribute__((noinline))
+    int func2(int a, int b) override {
+        return a * b;
+    }
+};
+
+namespace {
+class Derived2 : public Base {
+    public:
+    __attribute__((noinline))
+    int func1(int a, int b) override {
+        return a - b;
+    }
+
+    __attribute__((noinline))
+    int func2(int a, int b) override {
+        return a * (a - b);
+    }
+};
+}  // namespace
+
+__attribute__((noinline)) Base* createType(int a) {
+    Base* base = nullptr;
+    if (a % 4 == 0)
+      base = new Derived1();
+    else
+      base = new Derived2();
+    return base;
+}
+
+
+int main(int argc, char** argv) {
+    int sum = 0;
+    for (int i = 0; i < 1000; i++) {
+        int a = rand();
+        int b = rand();
+        Base* ptr = createType(i);
+        sum += ptr->func1(a, b) + ptr->func2(b, a);
+    }
+    printf("sum is %d\n", sum);
+    return 0;
+}
+EOF
+
+
+# Clean up temporary files on exit and return to original directory.
+cleanup() {
+  rm -f vtable_prof
+  rm -f vtable_prof.cc
+  cd $CURDIR
+}
+trap cleanup EXIT
+
+FLAGS="-fuse-ld=lld -O2 -g -fprofile-generate=. -mllvm -enable-vtable-value-profiling"
+
+${CLANG} ${FLAGS} vtable_prof.cc -o vtable_prof
+env LLVM_PROFILE_FILE=vtable-value-prof-basic.profraw ./vtable_prof
diff --git a/llvm/test/tools/llvm-profdata/Inputs/vtable-value-prof-basic.profraw b/llvm/test/tools/llvm-profdata/Inputs/vtable-value-prof-basic.profraw
new file mode 100644
index 0000000000000000000000000000000000000000..322c8dcd73f935564ca6775962dfa7cbfbdbeda6
GIT binary patch
literal 960
zcmZoHO3N=Q$obF300xW at ih+R*#(>fsXnb^T5>(*8e+cNvxiv$7)&KuHuBfD62xVZf
zg~~Ib(b2Pdtlf-u#!a~uWMQ|8lL at 9DX8r>{Rj_^-AEqD1|8=$Zxmn88+a9f!x&Opw
z at lSx-50h8mhv;8$5<-LY!!Srq-$Xr0mDx=oZSOC0`En(o>30x7wI3cpPzx3^9MrtY
zfA1ztzXqEA00R1p<k**G8rrkL^ndsVF#u{jg8~8l5A7N^IQjV|!u0Pz({DgP|2t at a
z{(@Tgf*Hbq`4c7%qhab{^fJcyid)_NMkXG5UcNe>yeoA)&ZeC4J#!|xr~lLD0#Dsj
zI^Mp9CcfHdz0FF_o;f07%G5b+s*2jvCC9`?o-Ti>svgZODk*hz_wOIpmcNc1|9w>Y
z&=v_r9wlGy6JiY2N)|B?_Z(ljx&GM=lcpW at koZu5y4Nb%QFreJNezYuG;wQ?v$#0n
zPDs(w_VhXB*{iQ{%Kx&r-?lz>uU}~PUXj|##Nd5hxD9Cb3Lu81HE?=?a$xBJ79Ozp
jfrSSwzG3pP at Q2BR%>tPXHSYw}KQQ$IP=CPG6Oso21zGjH

literal 0
HcmV?d00001

diff --git a/llvm/test/tools/llvm-profdata/Inputs/vtable-value-prof.proftext b/llvm/test/tools/llvm-profdata/Inputs/vtable-value-prof.proftext
new file mode 100644
index 00000000000000..ec85dc4c3b12f0
--- /dev/null
+++ b/llvm/test/tools/llvm-profdata/Inputs/vtable-value-prof.proftext
@@ -0,0 +1,73 @@
+# IR level Instrumentation Flag
+:ir
+/path/to/vtable_prof.cc;_ZN12_GLOBAL__N_18Derived25func1Eii
+# Func Hash:
+742261418966908927
+# Num Counters:
+1
+# Counter Values:
+750
+
+/path/to/vtable_prof.cc;_ZN12_GLOBAL__N_18Derived25func2Eii
+# Func Hash:
+742261418966908927
+# Num Counters:
+1
+# Counter Values:
+750
+
+_Z10createTypei
+# Func Hash:
+146835647075900052
+# Num Counters:
+2
+# Counter Values:
+750
+250
+
+_ZN8Derived15func1Eii
+# Func Hash:
+742261418966908927
+# Num Counters:
+1
+# Counter Values:
+250
+
+_ZN8Derived15func2Eii
+# Func Hash:
+742261418966908927
+# Num Counters:
+1
+# Counter Values:
+250
+
+main
+# Func Hash:
+1124236338992350536
+# Num Counters:
+2
+# Counter Values:
+1000
+1
+# Num Value Kinds:
+2
+# ValueKind = IPVK_IndirectCallTarget:
+0
+# NumValueSites:
+2
+2
+/path/to/vtable_prof.cc;_ZN12_GLOBAL__N_18Derived25func1Eii:750
+_ZN8Derived15func1Eii:250
+2
+/path/to/vtable_prof.cc;_ZN12_GLOBAL__N_18Derived25func2Eii:750
+_ZN8Derived15func2Eii:250
+# ValueKind = IPVK_VTableTarget:
+2
+# NumValueSites:
+2
+2
+/path/to/vtable_prof.cc;_ZTVN12_GLOBAL__N_18Derived2E:750
+_ZTV8Derived1:250
+2
+/path/to/vtable_prof.cc;_ZTVN12_GLOBAL__N_18Derived2E:750
+_ZTV8Derived1:250
diff --git a/llvm/test/tools/llvm-profdata/binary-ids-padding.test b/llvm/test/tools/llvm-profdata/binary-ids-padding.test
index eda63203a304a4..61881b69cfd5c0 100644
--- a/llvm/test/tools/llvm-profdata/binary-ids-padding.test
+++ b/llvm/test/tools/llvm-profdata/binary-ids-padding.test
@@ -10,10 +10,12 @@
 // INSTR_PROF_RAW_HEADER(uint64_t, CountersDelta, (uintptr_t)CountersBegin)
 // INSTR_PROF_RAW_HEADER(uint64_t, BitmaskDelta, (uintptr_t)BitmaskBegin)
 // INSTR_PROF_RAW_HEADER(uint64_t, NamesDelta, (uintptr_t)NamesBegin)
+// INSTR_PROF_RAW_HEADER(uint64_t, VNamesSize, VNamesSize)
+// INSTR_PROF_RAW_HEADER(uint64_t, NumVTables, NumVTables)
 // INSTR_PROF_RAW_HEADER(uint64_t, ValueKindLast, IPVK_Last)
 
 RUN: printf '\201rforpl\377' > %t.profraw
-RUN: printf '\11\0\0\0\0\0\0\0' >> %t.profraw
+RUN: printf '\12\0\0\0\0\0\0\0' >> %t.profraw
 // There will be 2 20-byte binary IDs, so the total Binary IDs size will be 64 bytes.
 //   2 * 8  binary ID sizes
 // + 2 * 20 binary IDs (of size 20)
@@ -32,6 +34,8 @@ RUN: printf '\0\0\4\0\1\0\0\0' >> %t.profraw
 RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
 RUN: printf '\0\0\4\0\2\0\0\0' >> %t.profraw
 RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
 
 // Binary IDs - There are only two in this case that are 20 bytes.
 RUN: printf '\24\0\0\0\0\0\0\0' >> %t.profraw
diff --git a/llvm/test/tools/llvm-profdata/large-binary-id-size.test b/llvm/test/tools/llvm-profdata/large-binary-id-size.test
index 38b838e0d100af..316a9a4c9df4ce 100644
--- a/llvm/test/tools/llvm-profdata/large-binary-id-size.test
+++ b/llvm/test/tools/llvm-profdata/large-binary-id-size.test
@@ -1,5 +1,5 @@
 RUN: printf '\201rforpl\377' > %t.profraw
-RUN: printf '\11\0\0\0\0\0\0\0' >> %t.profraw
+RUN: printf '\12\0\0\0\0\0\0\0' >> %t.profraw
 RUN: printf '\40\0\0\0\0\0\0\0' >> %t.profraw
 RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
 RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
@@ -12,6 +12,8 @@ RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
 RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
 RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
 RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
 
 // Check for a corrupted size being too large past the end of the file.
 RUN: printf '\7\7\7\7\7\7\7\7' >> %t.profraw
diff --git a/llvm/test/tools/llvm-profdata/malformed-not-space-for-another-header.test b/llvm/test/tools/llvm-profdata/malformed-not-space-for-another-header.test
index c967e850dbe352..8b686d5c50cb74 100644
--- a/llvm/test/tools/llvm-profdata/malformed-not-space-for-another-header.test
+++ b/llvm/test/tools/llvm-profdata/malformed-not-space-for-another-header.test
@@ -10,10 +10,12 @@
 // INSTR_PROF_RAW_HEADER(uint64_t, CountersDelta, (uintptr_t)CountersBegin)
 // INSTR_PROF_RAW_HEADER(uint64_t, BitmaskDelta, (uintptr_t)BitmaskBegin)
 // INSTR_PROF_RAW_HEADER(uint64_t, NamesDelta, (uintptr_t)NamesBegin)
+// INSTR_PROF_RAW_HEADER(uint64_t, VNamesSize, VNamesSize)
+// INSTR_PROF_RAW_HEADER(uint64_t, NumVTables, NumVTables)
 // INSTR_PROF_RAW_HEADER(uint64_t, ValueKindLast, IPVK_Last)
 
 RUN: printf '\201rforpl\377' > %t.profraw
-RUN: printf '\11\0\0\0\0\0\0\0' >> %t.profraw
+RUN: printf '\12\0\0\0\0\0\0\0' >> %t.profraw
 RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
 RUN: printf '\1\0\0\0\0\0\0\0' >> %t.profraw
 RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
@@ -26,6 +28,8 @@ RUN: printf '\0\0\4\0\1\0\0\0' >> %t.profraw
 RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
 RUN: printf '\0\0\4\0\2\0\0\0' >> %t.profraw
 RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
 
 // Data Section
 //
diff --git a/llvm/test/tools/llvm-profdata/malformed-num-counters-zero.test b/llvm/test/tools/llvm-profdata/malformed-num-counters-zero.test
index 2e747f81a6bfae..089afad4206223 100644
--- a/llvm/test/tools/llvm-profdata/malformed-num-counters-zero.test
+++ b/llvm/test/tools/llvm-profdata/malformed-num-counters-zero.test
@@ -10,10 +10,12 @@
 // INSTR_PROF_RAW_HEADER(uint64_t, CountersDelta, (uintptr_t)CountersBegin)
 // INSTR_PROF_RAW_HEADER(uint64_t, BitmaskDelta, (uintptr_t)BitmaskBegin)
 // INSTR_PROF_RAW_HEADER(uint64_t, NamesDelta, (uintptr_t)NamesBegin)
+// INSTR_PROF_RAW_HEADER(uint64_t, VNamesSize, VNamesSize)
+// INSTR_PROF_RAW_HEADER(uint64_t, NumVTables, NumVTables)
 // INSTR_PROF_RAW_HEADER(uint64_t, ValueKindLast, IPVK_Last)
 
 RUN: printf '\201rforpl\377' > %t.profraw
-RUN: printf '\11\0\0\0\0\0\0\0' >> %t.profraw
+RUN: printf '\12\0\0\0\0\0\0\0' >> %t.profraw
 RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
 RUN: printf '\1\0\0\0\0\0\0\0' >> %t.profraw
 RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
@@ -26,6 +28,8 @@ RUN: printf '\0\0\4\0\1\0\0\0' >> %t.profraw
 RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
 RUN: printf '\0\0\4\0\2\0\0\0' >> %t.profraw
 RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
 
 // Data Section
 //
diff --git a/llvm/test/tools/llvm-profdata/malformed-ptr-to-counter-array.test b/llvm/test/tools/llvm-profdata/malformed-ptr-to-counter-array.test
index 3c23bc7dd0f7f9..e404ba4210cc14 100644
--- a/llvm/test/tools/llvm-profdata/malformed-ptr-to-counter-array.test
+++ b/llvm/test/tools/llvm-profdata/malformed-ptr-to-counter-array.test
@@ -10,10 +10,12 @@
 // INSTR_PROF_RAW_HEADER(uint64_t, CountersDelta, (uintptr_t)CountersBegin)
 // INSTR_PROF_RAW_HEADER(uint64_t, BitmaskDelta, (uintptr_t)BitmaskBegin)
 // INSTR_PROF_RAW_HEADER(uint64_t, NamesDelta, (uintptr_t)NamesBegin)
+// INSTR_PROF_RAW_HEADER(uint64_t, VNamesSize, VNamesSize)
+// INSTR_PROF_RAW_HEADER(uint64_t, NumVTables, NumVTables)
 // INSTR_PROF_RAW_HEADER(uint64_t, ValueKindLast, IPVK_Last)
 
 RUN: printf '\201rforpl\377' > %t.profraw
-RUN: printf '\11\0\0\0\0\0\0\0' >> %t.profraw
+RUN: printf '\12\0\0\0\0\0\0\0' >> %t.profraw
 RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
 RUN: printf '\1\0\0\0\0\0\0\0' >> %t.profraw
 RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
@@ -26,6 +28,8 @@ RUN: printf '\0\0\6\0\1\0\0\0' >> %t.profraw
 RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
 RUN: printf '\0\0\6\0\2\0\0\0' >> %t.profraw
 RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
 
 // Data Section
 //
diff --git a/llvm/test/tools/llvm-profdata/misaligned-binary-ids-size.test b/llvm/test/tools/llvm-profdata/misaligned-binary-ids-size.test
index 4a5c42843ff4dd..ee54bfb9785678 100644
--- a/llvm/test/tools/llvm-profdata/misaligned-binary-ids-size.test
+++ b/llvm/test/tools/llvm-profdata/misaligned-binary-ids-size.test
@@ -1,5 +1,5 @@
 RUN: printf '\201rforpl\377' > %t.profraw
-RUN: printf '\11\0\0\0\0\0\0\0' >> %t.profraw
+RUN: printf '\12\0\0\0\0\0\0\0' >> %t.profraw
 // We should fail on this because the binary IDs is not a multiple of 8 bytes.
 RUN: printf '\77\0\0\0\0\0\0\0' >> %t.profraw
 RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
@@ -10,6 +10,8 @@ RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
 RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
 RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
 RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
 
 // Binary IDs - There are only two in this case that are 20 bytes.
 RUN: printf '\24\0\0\0\0\0\0\0' >> %t.profraw
diff --git a/llvm/test/tools/llvm-profdata/mismatched-raw-profile-header.test b/llvm/test/tools/llvm-profdata/mismatched-raw-profile-header.test
index 2a92575ee34075..dfa163f1f3439a 100644
--- a/llvm/test/tools/llvm-profdata/mismatched-raw-profile-header.test
+++ b/llvm/test/tools/llvm-profdata/mismatched-raw-profile-header.test
@@ -15,6 +15,8 @@ RUN: printf '\0\0\0\0\0\0\0\20' >> %t
 RUN: printf '\0\0\0\1\0\4\0\0' >> %t
 RUN: printf '\0\0\0\2\0\4\0\0' >> %t
 RUN: printf '\0\0\0\0\0\0\0\0' >> %t
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t
 
 RUN: not llvm-profdata show %t -o /dev/null 2>&1 | FileCheck %s
 
diff --git a/llvm/test/tools/llvm-profdata/raw-32-bits-be.test b/llvm/test/tools/llvm-profdata/raw-32-bits-be.test
index 8220361df6cfa6..63782c8b94d4a5 100644
--- a/llvm/test/tools/llvm-profdata/raw-32-bits-be.test
+++ b/llvm/test/tools/llvm-profdata/raw-32-bits-be.test
@@ -1,5 +1,6 @@
+// Header
 RUN: printf '\377lprofR\201' > %t
-RUN: printf '\0\0\0\0\0\0\0\11' >> %t
+RUN: printf '\0\0\0\0\0\0\0\12' >> %t
 RUN: printf '\0\0\0\0\0\0\0\0' >> %t
 RUN: printf '\0\0\0\0\0\0\0\2' >> %t
 RUN: printf '\0\0\0\0\0\0\0\0' >> %t
@@ -12,6 +13,8 @@ RUN: printf '\0\0\0\0\1\0\0\0' >> %t
 RUN: printf '\0\0\0\0\3\0\0\0' >> %t
 RUN: printf '\0\0\0\0\2\0\0\0' >> %t
 RUN: printf '\0\0\0\0\0\0\0\0' >> %t
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t
 
 RUN: printf '\134\370\302\114\333\030\275\254' >> %t
 RUN: printf '\0\0\0\0\0\0\0\1' >> %t
@@ -20,9 +23,8 @@ RUN: printf '\3\0\0\0' >> %t
 RUN: printf '\0\0\0\0' >> %t
 RUN: printf '\0\0\0\0' >> %t
 RUN: printf '\0\0\0\1' >> %t
-RUN: printf '\0\0\0\0' >> %t
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t
 RUN: printf '\0\0\0\3' >> %t
-RUN: printf '\0\0\0\0' >> %t
 
 RUN: printf '\344\023\165\112\031\035\265\067' >> %t
 RUN: printf '\0\0\0\0\0\0\0\2' >> %t
@@ -31,9 +33,8 @@ RUN: printf '\2\xff\xff\xd3' >> %t
 RUN: printf '\0\0\0\0' >> %t
 RUN: printf '\0\0\0\0' >> %t
 RUN: printf '\0\0\0\2' >> %t
-RUN: printf '\0\0\0\0' >> %t
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t
 RUN: printf '\0\0\0\1' >> %t
-RUN: printf '\0\0\0\0' >> %t
 
 RUN: printf '\0\0\0\0\0\0\0\023' >> %t
 RUN: printf '\0\0\0\0\0\0\0\067' >> %t
diff --git a/llvm/test/tools/llvm-profdata/raw-32-bits-le.test b/llvm/test/tools/llvm-profdata/raw-32-bits-le.test
index 9352ae132380d6..e9569bec1178bd 100644
--- a/llvm/test/tools/llvm-profdata/raw-32-bits-le.test
+++ b/llvm/test/tools/llvm-profdata/raw-32-bits-le.test
@@ -1,5 +1,5 @@
 RUN: printf '\201Rforpl\377' > %t
-RUN: printf '\11\0\0\0\0\0\0\0' >> %t
+RUN: printf '\12\0\0\0\0\0\0\0' >> %t
 RUN: printf '\0\0\0\0\0\0\0\0' >> %t
 RUN: printf '\2\0\0\0\0\0\0\0' >> %t
 RUN: printf '\0\0\0\0\0\0\0\0' >> %t
@@ -12,6 +12,8 @@ RUN: printf '\0\0\0\1\0\0\0\0' >> %t
 RUN: printf '\0\0\0\3\0\0\0\0' >> %t
 RUN: printf '\0\0\0\2\0\0\0\0' >> %t
 RUN: printf '\0\0\0\0\0\0\0\0' >> %t
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t
 
 RUN: printf '\254\275\030\333\114\302\370\134' >> %t
 RUN: printf '\1\0\0\0\0\0\0\0' >> %t
@@ -20,9 +22,8 @@ RUN: printf '\0\0\0\3' >> %t
 RUN: printf '\0\0\0\0' >> %t
 RUN: printf '\0\0\0\0' >> %t
 RUN: printf '\1\0\0\0' >> %t
-RUN: printf '\0\0\0\0' >> %t
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t
 RUN: printf '\3\0\0\0' >> %t
-RUN: printf '\0\0\0\0' >> %t
 
 RUN: printf '\067\265\035\031\112\165\023\344' >> %t
 RUN: printf '\02\0\0\0\0\0\0\0' >> %t
@@ -31,9 +32,8 @@ RUN: printf '\xd3\xff\xff\2' >> %t
 RUN: printf '\0\0\0\0' >> %t
 RUN: printf '\0\0\0\0' >> %t
 RUN: printf '\2\0\0\0' >> %t
-RUN: printf '\0\0\0\0' >> %t
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t
 RUN: printf '\1\0\0\0' >> %t
-RUN: printf '\0\0\0\0' >> %t
 
 RUN: printf '\023\0\0\0\0\0\0\0' >> %t
 RUN: printf '\067\0\0\0\0\0\0\0' >> %t
diff --git a/llvm/test/tools/llvm-profdata/raw-64-bits-be.test b/llvm/test/tools/llvm-profdata/raw-64-bits-be.test
index c3e995add6ff2e..0bc579eec58abb 100644
--- a/llvm/test/tools/llvm-profdata/raw-64-bits-be.test
+++ b/llvm/test/tools/llvm-profdata/raw-64-bits-be.test
@@ -1,5 +1,5 @@
 RUN: printf '\377lprofr\201' > %t
-RUN: printf '\0\0\0\0\0\0\0\11' >> %t
+RUN: printf '\0\0\0\0\0\0\0\12' >> %t
 RUN: printf '\0\0\0\0\0\0\0\0' >> %t
 RUN: printf '\0\0\0\0\0\0\0\2' >> %t
 RUN: printf '\0\0\0\0\0\0\0\0' >> %t
@@ -12,6 +12,8 @@ RUN: printf '\0\0\0\1\0\4\0\0' >> %t
 RUN: printf '\0\0\0\3\0\4\0\0' >> %t
 RUN: printf '\0\0\0\2\0\4\0\0' >> %t
 RUN: printf '\0\0\0\0\0\0\0\0' >> %t
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t
 
 RUN: printf '\134\370\302\114\333\030\275\254' >> %t
 RUN: printf '\0\0\0\0\0\0\0\1' >> %t
@@ -20,9 +22,8 @@ RUN: printf '\0\0\0\3\0\4\0\0' >> %t
 RUN: printf '\0\0\0\0\0\0\0\0' >> %t
 RUN: printf '\0\0\0\0\0\0\0\0' >> %t
 RUN: printf '\0\0\0\1' >> %t
-RUN: printf '\0\0\0\0' >> %t
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t
 RUN: printf '\0\0\0\3' >> %t
-RUN: printf '\0\0\0\0' >> %t
 
 RUN: printf '\344\023\165\112\031\035\265\067' >> %t
 RUN: printf '\0\0\0\0\0\0\0\02' >> %t
@@ -31,9 +32,8 @@ RUN: printf '\0\0\0\3\0\3\xff\xc3' >> %t
 RUN: printf '\0\0\0\0\0\0\0\0' >> %t
 RUN: printf '\0\0\0\0\0\0\0\0' >> %t
 RUN: printf '\0\0\0\02' >> %t
-RUN: printf '\0\0\0\0' >> %t
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t
 RUN: printf '\0\0\0\1' >> %t
-RUN: printf '\0\0\0\0' >> %t
 
 RUN: printf '\0\0\0\0\0\0\0\023' >> %t
 RUN: printf '\0\0\0\0\0\0\0\067' >> %t
diff --git a/llvm/test/tools/llvm-profdata/raw-64-bits-le.test b/llvm/test/tools/llvm-profdata/raw-64-bits-le.test
index 0b3ef2a89abe52..ca9ea54c3f0146 100644
--- a/llvm/test/tools/llvm-profdata/raw-64-bits-le.test
+++ b/llvm/test/tools/llvm-profdata/raw-64-bits-le.test
@@ -1,5 +1,5 @@
 RUN: printf '\201rforpl\377' > %t
-RUN: printf '\11\0\0\0\0\0\0\0' >> %t
+RUN: printf '\12\0\0\0\0\0\0\0' >> %t
 RUN: printf '\0\0\0\0\0\0\0\0' >> %t
 RUN: printf '\2\0\0\0\0\0\0\0' >> %t
 RUN: printf '\0\0\0\0\0\0\0\0' >> %t
@@ -12,6 +12,8 @@ RUN: printf '\0\0\4\0\1\0\0\0' >> %t
 RUN: printf '\0\0\4\0\3\0\0\0' >> %t
 RUN: printf '\0\0\4\0\2\0\0\0' >> %t
 RUN: printf '\0\0\0\0\0\0\0\0' >> %t
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t
 
 RUN: printf '\254\275\030\333\114\302\370\134' >> %t
 RUN: printf '\1\0\0\0\0\0\0\0' >> %t
@@ -20,9 +22,8 @@ RUN: printf '\0\0\4\0\3\0\0\0' >> %t
 RUN: printf '\0\0\0\0\0\0\0\0' >> %t
 RUN: printf '\0\0\0\0\0\0\0\0' >> %t
 RUN: printf '\1\0\0\0' >> %t
-RUN: printf '\0\0\0\0' >> %t
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t
 RUN: printf '\3\0\0\0' >> %t
-RUN: printf '\0\0\0\0' >> %t
 
 RUN: printf '\067\265\035\031\112\165\023\344' >> %t
 RUN: printf '\02\0\0\0\0\0\0\0' >> %t
@@ -31,9 +32,8 @@ RUN: printf '\xc3\xff\3\0\3\0\0\0' >> %t
 RUN: printf '\0\0\0\0\0\0\0\0' >> %t
 RUN: printf '\0\0\0\0\0\0\0\0' >> %t
 RUN: printf '\02\0\0\0' >> %t
-RUN: printf '\0\0\0\0' >> %t
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t
 RUN: printf '\1\0\0\0' >> %t
-RUN: printf '\0\0\0\0' >> %t
 
 RUN: printf '\023\0\0\0\0\0\0\0' >> %t
 RUN: printf '\067\0\0\0\0\0\0\0' >> %t
diff --git a/llvm/test/tools/llvm-profdata/raw-two-profiles.test b/llvm/test/tools/llvm-profdata/raw-two-profiles.test
index f4a9aa8e1bbc3a..70a4210dea9f84 100644
--- a/llvm/test/tools/llvm-profdata/raw-two-profiles.test
+++ b/llvm/test/tools/llvm-profdata/raw-two-profiles.test
@@ -1,5 +1,5 @@
 RUN: printf '\201rforpl\377' > %t-foo.profraw
-RUN: printf '\11\0\0\0\0\0\0\0' >> %t-foo.profraw
+RUN: printf '\12\0\0\0\0\0\0\0' >> %t-foo.profraw
 RUN: printf '\0\0\0\0\0\0\0\0' >> %t-foo.profraw
 RUN: printf '\1\0\0\0\0\0\0\0' >> %t-foo.profraw
 RUN: printf '\0\0\0\0\0\0\0\0' >> %t-foo.profraw
@@ -12,6 +12,8 @@ RUN: printf '\0\0\4\0\1\0\0\0' >> %t-foo.profraw
 RUN: printf '\0\0\0\0\0\0\0\0' >> %t-foo.profraw
 RUN: printf '\0\0\4\0\2\0\0\0' >> %t-foo.profraw
 RUN: printf '\0\0\0\0\0\0\0\0' >> %t-foo.profraw
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t-foo.profraw
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t-foo.profraw
 
 RUN: printf '\254\275\030\333\114\302\370\134' >> %t-foo.profraw
 RUN: printf '\1\0\0\0\0\0\0\0' >> %t-foo.profraw
@@ -26,7 +28,7 @@ RUN: printf '\023\0\0\0\0\0\0\0' >> %t-foo.profraw
 RUN: printf '\3\0foo\0\0\0' >> %t-foo.profraw
 
 RUN: printf '\201rforpl\377' > %t-bar.profraw
-RUN: printf '\11\0\0\0\0\0\0\0' >> %t-bar.profraw
+RUN: printf '\12\0\0\0\0\0\0\0' >> %t-bar.profraw
 RUN: printf '\0\0\0\0\0\0\0\0' >> %t-bar.profraw
 RUN: printf '\1\0\0\0\0\0\0\0' >> %t-bar.profraw
 RUN: printf '\0\0\0\0\0\0\0\0' >> %t-bar.profraw
@@ -39,6 +41,8 @@ RUN: printf '\0\0\6\0\1\0\0\0' >> %t-bar.profraw
 RUN: printf '\0\0\0\0\0\0\0\0' >> %t-bar.profraw
 RUN: printf '\0\0\6\0\2\0\0\0' >> %t-bar.profraw
 RUN: printf '\0\0\0\0\0\0\0\0' >> %t-bar.profraw
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t-bar.profraw
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t-bar.profraw
 
 RUN: printf '\067\265\035\031\112\165\023\344' >> %t-bar.profraw
 RUN: printf '\02\0\0\0\0\0\0\0' >> %t-bar.profraw
diff --git a/llvm/test/tools/llvm-profdata/vtable-value-prof-basic.test b/llvm/test/tools/llvm-profdata/vtable-value-prof-basic.test
new file mode 100644
index 00000000000000..fb070dc97a4d8a
--- /dev/null
+++ b/llvm/test/tools/llvm-profdata/vtable-value-prof-basic.test
@@ -0,0 +1,124 @@
+To update the inputs used below, run
+Inputs/update_vtable_value_prof_inputs.sh /path/to/updated/clang++
+
+; Raw profiles stores zlib-compressed vtable names. Raw profile reader needs
+; to decompress them.
+; REQUIRES: zlib
+
+; RUN: rm -rf %t && mkdir %t && cd %t
+
+Show profile data from raw profiles.
+RUN: llvm-profdata show --function=main --ic-targets --show-vtables %p/Inputs/vtable-value-prof-basic.profraw | FileCheck %s --check-prefix=RAW
+
+Generate indexed profile from raw profile and show the data.
+RUN: llvm-profdata merge %p/Inputs/vtable-value-prof-basic.profraw -o indexed.profdata
+RUN: llvm-profdata show --function=main --ic-targets --show-vtables indexed.profdata | FileCheck %s --check-prefix=INDEXED
+
+Generate text profile from raw profile and show the data.
+RUN: llvm-profdata merge --text %p/Inputs/vtable-value-prof-basic.profraw -o vtable-value-prof-basic.proftext
+RUN: llvm-profdata show --function=main --ic-targets --show-vtables --text vtable-value-prof-basic.proftext | FileCheck %s --check-prefix=ICTEXT 
+
+RAW: Counters:
+RAW-NEXT:  main:
+RAW-NEXT:  Hash: 0x0f9a16fe6d398548
+RAW-NEXT:  Counters: 2
+RAW-NEXT:  Indirect Call Site Count: 2
+RAW-NEXT:  Number of instrumented vtables: 2
+RAW-NEXT:  Indirect Target Results:
+RAW-NEXT:       [  0, _ZN8Derived15func1Eii,        250 ] (25.00%)
+RAW-NEXT:       [  0, {{.*}}vtable_prof.cc;_ZN12_GLOBAL__N_18Derived25func1Eii,        750 ] (75.00%)
+RAW-NEXT:       [  1, _ZN8Derived15func2Eii,        250 ] (25.00%)
+RAW-NEXT:       [  1, {{.*}}vtable_prof.cc;_ZN12_GLOBAL__N_18Derived25func2Eii,        750 ] (75.00%)
+RAW-NEXT:  VTable Results:
+RAW-NEXT:       [  0, _ZTV8Derived1,        250 ] (25.00%)
+RAW-NEXT:       [  0, {{.*}}vtable_prof.cc;_ZTVN12_GLOBAL__N_18Derived2E,        750 ] (75.00%)
+RAW-NEXT:       [  1, _ZTV8Derived1,        250 ] (25.00%)
+RAW-NEXT:       [  1, {{.*}}vtable_prof.cc;_ZTVN12_GLOBAL__N_18Derived2E,        750 ] (75.00%)
+RAW-NEXT: Instrumentation level: IR  entry_first = 0
+RAW-NEXT: Functions shown: 1
+RAW-NEXT: Total functions: 6
+RAW-NEXT: Maximum function count: 1000
+RAW-NEXT: Maximum internal block count: 250
+RAW-NEXT: Statistics for indirect call sites profile:
+RAW-NEXT:   Total number of sites: 2
+RAW-NEXT:   Total number of sites with values: 2
+RAW-NEXT:   Total number of profiled values: 4
+RAW-NEXT:   Value sites histogram:
+RAW-NEXT:         NumTargets, SiteCount
+RAW-NEXT:         2, 2
+RAW-NEXT: Statistics for vtable profile:
+RAW-NEXT:   Total number of sites: 2
+RAW-NEXT:   Total number of sites with values: 2
+RAW-NEXT:   Total number of profiled values: 4
+RAW-NEXT:   Value sites histogram:
+RAW-NEXT:         NumTargets, SiteCount
+RAW-NEXT:         2, 2
+
+
+INDEXED:      Counters:
+INDEXED-NEXT:   main:
+INDEXED-NEXT:     Hash: 0x0f9a16fe6d398548
+INDEXED-NEXT:     Counters: 2
+INDEXED-NEXT:     Indirect Call Site Count: 2
+INDEXED-NEXT:     Number of instrumented vtables: 2
+INDEXED-NEXT:     Indirect Target Results:
+INDEXED-NEXT:         [  0, {{.*}}vtable_prof.cc;_ZN12_GLOBAL__N_18Derived25func1Eii,        750 ] (75.00%)
+INDEXED-NEXT:         [  0, _ZN8Derived15func1Eii,        250 ] (25.00%)
+INDEXED-NEXT:         [  1, {{.*}}vtable_prof.cc;_ZN12_GLOBAL__N_18Derived25func2Eii,        750 ] (75.00%)
+INDEXED-NEXT:         [  1, _ZN8Derived15func2Eii,        250 ] (25.00%)
+INDEXED-NEXT:     VTable Results:
+INDEXED-NEXT:         [  0, {{.*}}vtable_prof.cc;_ZTVN12_GLOBAL__N_18Derived2E,        750 ] (75.00%)
+INDEXED-NEXT:         [  0, _ZTV8Derived1,        250 ] (25.00%)
+INDEXED-NEXT:         [  1, {{.*}}vtable_prof.cc;_ZTVN12_GLOBAL__N_18Derived2E,        750 ] (75.00%)
+INDEXED-NEXT:         [  1, _ZTV8Derived1,        250 ] (25.00%)
+INDEXED-NEXT: Instrumentation level: IR  entry_first = 0
+INDEXED-NEXT: Functions shown: 1
+INDEXED-NEXT: Total functions: 6
+INDEXED-NEXT: Maximum function count: 1000
+INDEXED-NEXT: Maximum internal block count: 250
+INDEXED-NEXT: Statistics for indirect call sites profile:
+INDEXED-NEXT:   Total number of sites: 2
+INDEXED-NEXT:   Total number of sites with values: 2
+INDEXED-NEXT:   Total number of profiled values: 4
+INDEXED-NEXT:   Value sites histogram:
+INDEXED-NEXT:       NumTargets, SiteCount
+INDEXED-NEXT:       2, 2
+INDEXED-NEXT: Statistics for vtable profile:
+INDEXED-NEXT:   Total number of sites: 2
+INDEXED-NEXT:   Total number of sites with values: 2
+INDEXED-NEXT:   Total number of profiled values: 4
+INDEXED-NEXT:   Value sites histogram:
+INDEXED-NEXT:       NumTargets, SiteCount
+INDEXED-NEXT:       2, 2
+
+ICTEXT: :ir
+ICTEXT: main
+ICTEXT: # Func Hash:
+ICTEXT: 1124236338992350536
+ICTEXT: # Num Counters:
+ICTEXT: 2
+ICTEXT: # Counter Values:
+ICTEXT: 1000
+ICTEXT: 1
+ICTEXT: # Num Value Kinds:
+ICTEXT: 2
+ICTEXT: # ValueKind = IPVK_IndirectCallTarget:
+ICTEXT: 0
+ICTEXT: # NumValueSites:
+ICTEXT: 2
+ICTEXT: 2
+ICTEXT: {{.*}}vtable_prof.cc;_ZN12_GLOBAL__N_18Derived25func1Eii:750
+ICTEXT: _ZN8Derived15func1Eii:250
+ICTEXT: 2
+ICTEXT: {{.*}}vtable_prof.cc;_ZN12_GLOBAL__N_18Derived25func2Eii:750
+ICTEXT: _ZN8Derived15func2Eii:250
+ICTEXT: # ValueKind = IPVK_VTableTarget:
+ICTEXT: 2
+ICTEXT: # NumValueSites:
+ICTEXT: 2
+ICTEXT: 2
+ICTEXT: {{.*}}vtable_prof.cc;_ZTVN12_GLOBAL__N_18Derived2E:750
+ICTEXT: _ZTV8Derived1:250
+ICTEXT: 2
+ICTEXT: {{.*}}vtable_prof.cc;_ZTVN12_GLOBAL__N_18Derived2E:750
+ICTEXT: _ZTV8Derived1:250
diff --git a/llvm/test/tools/llvm-profdata/vtable-value-prof.proftext b/llvm/test/tools/llvm-profdata/vtable-value-prof.proftext
new file mode 100644
index 00000000000000..38073916ec445b
--- /dev/null
+++ b/llvm/test/tools/llvm-profdata/vtable-value-prof.proftext
@@ -0,0 +1,16 @@
+# RUN: llvm-profdata show --function=main --show-vtables %p/Inputs/vtable-value-prof.proftext | FileCheck %s
+
+# CHECK: Counters:
+# CHECK:  main:
+# CHECK:    Hash: 0x0f9a16fe6d398548
+# CHECK:    Counters: 2
+# CHECK:    VTable Results:
+# CHECK:	       [  0, /path/to/vtable_prof.cc;_ZTVN12_GLOBAL__N_18Derived2E,        750 ] (75.00%)
+# CHECK:	       [  0, _ZTV8Derived1,        250 ] (25.00%)
+# CHECK:	       [  1, /path/to/vtable_prof.cc;_ZTVN12_GLOBAL__N_18Derived2E,        750 ] (75.00%)
+# CHECK:	       [  1, _ZTV8Derived1,        250 ] (25.00%)
+# CHECK: Instrumentation level: IR  entry_first = 0
+# CHECK: Functions shown: 1
+# CHECK: Total functions: 6
+# CHECK: Maximum function count: 1000
+# CHECK: Maximum internal block count: 250
diff --git a/llvm/tools/llvm-profdata/llvm-profdata.cpp b/llvm/tools/llvm-profdata/llvm-profdata.cpp
index 7754ca36125720..9fb56b8e2647e0 100644
--- a/llvm/tools/llvm-profdata/llvm-profdata.cpp
+++ b/llvm/tools/llvm-profdata/llvm-profdata.cpp
@@ -345,6 +345,9 @@ cl::opt<bool> ShowIndirectCallTargets(
     "ic-targets", cl::init(false),
     cl::desc("Show indirect call site target values for shown functions"),
     cl::sub(ShowSubcommand));
+cl::opt<bool> ShowVTables("show-vtables", cl::init(false),
+                          cl::desc("Show vtable names for shown functions"),
+                          cl::sub(ShowSubcommand));
 cl::opt<bool> ShowMemOPSizes(
     "memop-sizes", cl::init(false),
     cl::desc("Show the profiled sizes of the memory intrinsic calls "
@@ -722,6 +725,13 @@ static void loadInput(const WeightedFile &Input, SymbolRemapper *Remapper,
     });
   }
 
+  const InstrProfSymtab &symtab = Reader->getSymtab();
+  const auto &VTableNames = symtab.getVTableNames();
+
+  for (const auto &kv : VTableNames) {
+    WC->Writer.addVTableName(kv.getKey());
+  }
+
   if (Reader->hasTemporalProfile()) {
     auto &Traces = Reader->getTemporalProfTraces(Input.Weight);
     if (!Traces.empty())
@@ -1353,8 +1363,8 @@ remapSamples(const sampleprof::FunctionSamples &Samples,
                           BodySample.second.getSamples());
     for (const auto &Target : BodySample.second.getCallTargets()) {
       Result.addCalledTargetSamples(BodySample.first.LineOffset,
-                                    MaskedDiscriminator,
-                                    Remapper(Target.first), Target.second);
+                                    MaskedDiscriminator, Remapper(Target.first),
+                                    Target.second);
     }
   }
   for (const auto &CallsiteSamples : Samples.getCallsiteSamples()) {
@@ -2817,6 +2827,10 @@ static int showInstrProfile(ShowFormat SFormat, raw_fd_ostream &OS) {
         OS << "    Indirect Call Site Count: "
            << Func.getNumValueSites(IPVK_IndirectCallTarget) << "\n";
 
+      if (ShowVTables)
+        OS << "    Number of instrumented vtables: "
+           << Func.getNumValueSites(IPVK_VTableTarget) << "\n";
+
       uint32_t NumMemOPCalls = Func.getNumValueSites(IPVK_MemOPSize);
       if (ShowMemOPSizes && NumMemOPCalls > 0)
         OS << "    Number of Memory Intrinsics Calls: " << NumMemOPCalls
@@ -2838,6 +2852,13 @@ static int showInstrProfile(ShowFormat SFormat, raw_fd_ostream &OS) {
                               &(Reader->getSymtab()));
       }
 
+      if (ShowVTables) {
+        OS << "    VTable Results:\n";
+        traverseAllValueSites(Func, IPVK_VTableTarget,
+                              VPStats[IPVK_VTableTarget], OS,
+                              &(Reader->getSymtab()));
+      }
+
       if (ShowMemOPSizes && NumMemOPCalls > 0) {
         OS << "    Memory Intrinsic Size Results:\n";
         traverseAllValueSites(Func, IPVK_MemOPSize, VPStats[IPVK_MemOPSize], OS,
@@ -2886,6 +2907,11 @@ static int showInstrProfile(ShowFormat SFormat, raw_fd_ostream &OS) {
                         VPStats[IPVK_IndirectCallTarget]);
   }
 
+  if (ShownFunctions && ShowVTables) {
+    OS << "Statistics for vtable profile:\n";
+    showValueSitesStats(OS, IPVK_VTableTarget, VPStats[IPVK_VTableTarget]);
+  }
+
   if (ShownFunctions && ShowMemOPSizes) {
     OS << "Statistics for memory intrinsic calls sizes profile:\n";
     showValueSitesStats(OS, IPVK_MemOPSize, VPStats[IPVK_MemOPSize]);
diff --git a/llvm/unittests/ProfileData/InstrProfTest.cpp b/llvm/unittests/ProfileData/InstrProfTest.cpp
index 8ffb68de7a2d20..b007a374c2cf2c 100644
--- a/llvm/unittests/ProfileData/InstrProfTest.cpp
+++ b/llvm/unittests/ProfileData/InstrProfTest.cpp
@@ -638,32 +638,78 @@ TEST_F(InstrProfTest, test_irpgo_read_deprecated_names) {
       Succeeded());
 }
 
+// callee1 to callee6 are from vtable1 to vtable6 respectively.
 static const char callee1[] = "callee1";
 static const char callee2[] = "callee2";
 static const char callee3[] = "callee3";
 static const char callee4[] = "callee4";
 static const char callee5[] = "callee5";
 static const char callee6[] = "callee6";
+// callee7 and callee8 are not from any vtables.
+static const char callee7[] = "callee7";
+static const char callee8[] = "callee8";
+// 'callee' is primarily used to create multiple-element vtables.
+static const char callee[] = "callee";
+static const uint64_t vtable1[] = {uint64_t(callee), uint64_t(callee1)};
+static const uint64_t vtable2[] = {uint64_t(callee2), uint64_t(callee)};
+static const uint64_t vtable3[] = {
+    uint64_t(callee),
+    uint64_t(callee3),
+};
+static const uint64_t vtable4[] = {uint64_t(callee4), uint64_t(callee)};
+static const uint64_t vtable5[] = {uint64_t(callee5), uint64_t(callee)};
+static const uint64_t vtable6[] = {uint64_t(callee6), uint64_t(callee)};
+
+// Returns the address of callee with a numbered suffix in vtable.
+static uint64_t getCalleeAddress(const uint64_t *vtableAddr) {
+  uint64_t CalleeAddr;
+  // Callee with a numbered suffix is the 2nd element in vtable1 and vtable3,
+  // and the 1st element in the rest of vtables.
+  if (vtableAddr == vtable1 || vtableAddr == vtable3)
+    CalleeAddr = uint64_t(vtableAddr) + 8;
+  else
+    CalleeAddr = uint64_t(vtableAddr);
+  return CalleeAddr;
+}
 
-TEST_P(InstrProfReaderWriterTest, icall_data_read_write) {
+TEST_P(InstrProfReaderWriterTest, icall_and_vtable_data_read_write) {
   NamedInstrProfRecord Record1("caller", 0x1234, {1, 2});
 
-  // 4 value sites.
-  Record1.reserveSites(IPVK_IndirectCallTarget, 4);
-  InstrProfValueData VD0[] = {
-      {(uint64_t)callee1, 1}, {(uint64_t)callee2, 2}, {(uint64_t)callee3, 3}};
-  Record1.addValueData(IPVK_IndirectCallTarget, 0, VD0, 3, nullptr);
-  // No value profile data at the second site.
-  Record1.addValueData(IPVK_IndirectCallTarget, 1, nullptr, 0, nullptr);
-  InstrProfValueData VD2[] = {{(uint64_t)callee1, 1}, {(uint64_t)callee2, 2}};
-  Record1.addValueData(IPVK_IndirectCallTarget, 2, VD2, 2, nullptr);
-  InstrProfValueData VD3[] = {{(uint64_t)callee1, 1}};
-  Record1.addValueData(IPVK_IndirectCallTarget, 3, VD3, 1, nullptr);
+  // 4 indirect call value sites.
+  {
+    Record1.reserveSites(IPVK_IndirectCallTarget, 4);
+    InstrProfValueData VD0[] = {
+        {(uint64_t)callee1, 1}, {(uint64_t)callee2, 2}, {(uint64_t)callee3, 3}};
+    Record1.addValueData(IPVK_IndirectCallTarget, 0, VD0, 3, nullptr);
+    // No value profile data at the second site.
+    Record1.addValueData(IPVK_IndirectCallTarget, 1, nullptr, 0, nullptr);
+    InstrProfValueData VD2[] = {{(uint64_t)callee1, 1}, {(uint64_t)callee2, 2}};
+    Record1.addValueData(IPVK_IndirectCallTarget, 2, VD2, 2, nullptr);
+    InstrProfValueData VD3[] = {{(uint64_t)callee7, 1}, {(uint64_t)callee8, 2}};
+    Record1.addValueData(IPVK_IndirectCallTarget, 3, VD3, 2, nullptr);
+  }
+
+  // 2 vtable value sites.
+  {
+    InstrProfValueData VD0[] = {
+        {getCalleeAddress(vtable1), 1},
+        {getCalleeAddress(vtable2), 2},
+        {getCalleeAddress(vtable3), 3},
+    };
+    InstrProfValueData VD2[] = {
+        {getCalleeAddress(vtable1), 1},
+        {getCalleeAddress(vtable2), 2},
+    };
+    Record1.addValueData(IPVK_VTableTarget, 0, VD0, 3, nullptr);
+    Record1.addValueData(IPVK_VTableTarget, 2, VD2, 2, nullptr);
+  }
 
   Writer.addRecord(std::move(Record1), getProfWeight(), Err);
   Writer.addRecord({"callee1", 0x1235, {3, 4}}, Err);
   Writer.addRecord({"callee2", 0x1235, {3, 4}}, Err);
   Writer.addRecord({"callee3", 0x1235, {3, 4}}, Err);
+  Writer.addRecord({"callee7", 0x1235, {3, 4}}, Err);
+  Writer.addRecord({"callee8", 0x1235, {3, 4}}, Err);
 
   // Set writer value prof data endianness.
   Writer.setValueProfDataEndianness(getEndianness());
@@ -676,24 +722,66 @@ TEST_P(InstrProfReaderWriterTest, icall_data_read_write) {
 
   Expected<InstrProfRecord> R = Reader->getInstrProfRecord("caller", 0x1234);
   ASSERT_THAT_ERROR(R.takeError(), Succeeded());
+
+  // Test the number of instrumented indirect call sites and the number of
+  // profiled values at each site.
   ASSERT_EQ(4U, R->getNumValueSites(IPVK_IndirectCallTarget));
   EXPECT_EQ(3U, R->getNumValueDataForSite(IPVK_IndirectCallTarget, 0));
   EXPECT_EQ(0U, R->getNumValueDataForSite(IPVK_IndirectCallTarget, 1));
   EXPECT_EQ(2U, R->getNumValueDataForSite(IPVK_IndirectCallTarget, 2));
-  EXPECT_EQ(1U, R->getNumValueDataForSite(IPVK_IndirectCallTarget, 3));
+  EXPECT_EQ(2U, R->getNumValueDataForSite(IPVK_IndirectCallTarget, 3));
+
+  // Test the number of instrumented vtable sites and the number of profiled
+  // values at each site.
+  ASSERT_EQ(2U, R->getNumValueSites(IPVK_VTableTarget));
+  EXPECT_EQ(3U, R->getNumValueDataForSite(IPVK_VTableTarget, 0));
+  EXPECT_EQ(2U, R->getNumValueDataForSite(IPVK_VTableTarget, 1));
+
+  // First indirect site.
+  {
+    uint64_t TotalC;
+    std::unique_ptr<InstrProfValueData[]> VD =
+        R->getValueForSite(IPVK_IndirectCallTarget, 0, &TotalC);
+
+    EXPECT_EQ(3U * getProfWeight(), VD[0].Count);
+    EXPECT_EQ(2U * getProfWeight(), VD[1].Count);
+    EXPECT_EQ(1U * getProfWeight(), VD[2].Count);
+    EXPECT_EQ(6U * getProfWeight(), TotalC);
+
+    EXPECT_EQ(StringRef((const char *)VD[0].Value, 7), StringRef("callee3"));
+    EXPECT_EQ(StringRef((const char *)VD[1].Value, 7), StringRef("callee2"));
+    EXPECT_EQ(StringRef((const char *)VD[2].Value, 7), StringRef("callee1"));
+  }
 
-  uint64_t TotalC;
-  std::unique_ptr<InstrProfValueData[]> VD =
-      R->getValueForSite(IPVK_IndirectCallTarget, 0, &TotalC);
+  // First vtable site.
+  {
+    uint64_t TotalC;
+    std::unique_ptr<InstrProfValueData[]> VD =
+        R->getValueForSite(IPVK_VTableTarget, 0, &TotalC);
+
+    EXPECT_EQ(3U * getProfWeight(), VD[0].Count);
+    EXPECT_EQ(2U * getProfWeight(), VD[1].Count);
+    EXPECT_EQ(1U * getProfWeight(), VD[2].Count);
+    EXPECT_EQ(6U * getProfWeight(), TotalC);
 
-  EXPECT_EQ(3U * getProfWeight(), VD[0].Count);
-  EXPECT_EQ(2U * getProfWeight(), VD[1].Count);
-  EXPECT_EQ(1U * getProfWeight(), VD[2].Count);
-  EXPECT_EQ(6U * getProfWeight(), TotalC);
+    EXPECT_EQ(VD[0].Value, getCalleeAddress(vtable3));
+    EXPECT_EQ(VD[1].Value, getCalleeAddress(vtable2));
+    EXPECT_EQ(VD[2].Value, getCalleeAddress(vtable1));
+  }
 
-  EXPECT_EQ(StringRef((const char *)VD[0].Value, 7), StringRef("callee3"));
-  EXPECT_EQ(StringRef((const char *)VD[1].Value, 7), StringRef("callee2"));
-  EXPECT_EQ(StringRef((const char *)VD[2].Value, 7), StringRef("callee1"));
+  // Second vtable site.
+  {
+    uint64_t TotalC;
+    std::unique_ptr<InstrProfValueData[]> VD =
+        R->getValueForSite(IPVK_VTableTarget, 1, &TotalC);
+
+    EXPECT_EQ(2U * getProfWeight(), VD[0].Count);
+    EXPECT_EQ(1U * getProfWeight(), VD[1].Count);
+    EXPECT_EQ(3U * getProfWeight(), TotalC);
+
+    EXPECT_EQ(VD[0].Value, getCalleeAddress(vtable2));
+    EXPECT_EQ(VD[1].Value, getCalleeAddress(vtable1));
+  }
 }
 
 INSTANTIATE_TEST_SUITE_P(
@@ -801,33 +889,53 @@ TEST_P(MaybeSparseInstrProfTest, annotate_vp_data) {
   ASSERT_EQ(1U, ValueData[3].Count);
 }
 
-TEST_P(MaybeSparseInstrProfTest, icall_data_merge) {
+TEST_P(MaybeSparseInstrProfTest, icall_and_vtable_data_merge) {
   static const char caller[] = "caller";
   NamedInstrProfRecord Record11(caller, 0x1234, {1, 2});
   NamedInstrProfRecord Record12(caller, 0x1234, {1, 2});
 
-  // 5 value sites.
-  Record11.reserveSites(IPVK_IndirectCallTarget, 5);
-  InstrProfValueData VD0[] = {{uint64_t(callee1), 1},
-                              {uint64_t(callee2), 2},
-                              {uint64_t(callee3), 3},
-                              {uint64_t(callee4), 4}};
-  Record11.addValueData(IPVK_IndirectCallTarget, 0, VD0, 4, nullptr);
+  // 5 value sites for indirect calls.
+  {
+    Record11.reserveSites(IPVK_IndirectCallTarget, 5);
+    InstrProfValueData VD0[] = {{uint64_t(callee1), 1},
+                                {uint64_t(callee2), 2},
+                                {uint64_t(callee3), 3},
+                                {uint64_t(callee4), 4}};
+    Record11.addValueData(IPVK_IndirectCallTarget, 0, VD0, 4, nullptr);
 
-  // No value profile data at the second site.
-  Record11.addValueData(IPVK_IndirectCallTarget, 1, nullptr, 0, nullptr);
+    // No value profile data at the second site.
+    Record11.addValueData(IPVK_IndirectCallTarget, 1, nullptr, 0, nullptr);
 
-  InstrProfValueData VD2[] = {
-      {uint64_t(callee1), 1}, {uint64_t(callee2), 2}, {uint64_t(callee3), 3}};
-  Record11.addValueData(IPVK_IndirectCallTarget, 2, VD2, 3, nullptr);
+    InstrProfValueData VD2[] = {
+        {uint64_t(callee1), 1}, {uint64_t(callee2), 2}, {uint64_t(callee3), 3}};
+    Record11.addValueData(IPVK_IndirectCallTarget, 2, VD2, 3, nullptr);
 
-  InstrProfValueData VD3[] = {{uint64_t(callee1), 1}};
-  Record11.addValueData(IPVK_IndirectCallTarget, 3, VD3, 1, nullptr);
+    InstrProfValueData VD3[] = {{uint64_t(callee7), 1}, {uint64_t(callee8), 2}};
+    Record11.addValueData(IPVK_IndirectCallTarget, 3, VD3, 2, nullptr);
 
-  InstrProfValueData VD4[] = {{uint64_t(callee1), 1},
-                              {uint64_t(callee2), 2},
-                              {uint64_t(callee3), 3}};
-  Record11.addValueData(IPVK_IndirectCallTarget, 4, VD4, 3, nullptr);
+    InstrProfValueData VD4[] = {
+        {uint64_t(callee1), 1}, {uint64_t(callee2), 2}, {uint64_t(callee3), 3}};
+    Record11.addValueData(IPVK_IndirectCallTarget, 4, VD4, 3, nullptr);
+  }
+  // 3 value sites for vtables.
+  {
+    Record11.reserveSites(IPVK_VTableTarget, 3);
+    InstrProfValueData VD0[] = {{getCalleeAddress(vtable1), 1},
+                                {getCalleeAddress(vtable2), 2},
+                                {getCalleeAddress(vtable3), 3},
+                                {getCalleeAddress(vtable4), 4}};
+    Record11.addValueData(IPVK_VTableTarget, 0, VD0, 4, nullptr);
+
+    InstrProfValueData VD2[] = {{getCalleeAddress(vtable1), 1},
+                                {getCalleeAddress(vtable2), 2},
+                                {getCalleeAddress(vtable3), 3}};
+    Record11.addValueData(IPVK_VTableTarget, 1, VD2, 3, nullptr);
+
+    InstrProfValueData VD4[] = {{getCalleeAddress(vtable1), 1},
+                                {getCalleeAddress(vtable2), 2},
+                                {getCalleeAddress(vtable3), 3}};
+    Record11.addValueData(IPVK_VTableTarget, 3, VD4, 3, nullptr);
+  }
 
   // A different record for the same caller.
   Record12.reserveSites(IPVK_IndirectCallTarget, 5);
@@ -843,11 +951,28 @@ TEST_P(MaybeSparseInstrProfTest, icall_data_merge) {
 
   Record12.addValueData(IPVK_IndirectCallTarget, 3, nullptr, 0, nullptr);
 
-  InstrProfValueData VD42[] = {{uint64_t(callee1), 1},
-                               {uint64_t(callee2), 2},
-                               {uint64_t(callee3), 3}};
+  InstrProfValueData VD42[] = {
+      {uint64_t(callee1), 1}, {uint64_t(callee2), 2}, {uint64_t(callee3), 3}};
   Record12.addValueData(IPVK_IndirectCallTarget, 4, VD42, 3, nullptr);
 
+  // 3 value sites for vtables.
+  {
+    Record12.reserveSites(IPVK_VTableTarget, 3);
+    InstrProfValueData VD0[] = {{getCalleeAddress(vtable2), 5},
+                                {getCalleeAddress(vtable3), 3}};
+    Record12.addValueData(IPVK_VTableTarget, 0, VD0, 2, nullptr);
+
+    InstrProfValueData VD2[] = {{getCalleeAddress(vtable2), 1},
+                                {getCalleeAddress(vtable3), 3},
+                                {getCalleeAddress(vtable4), 4}};
+    Record12.addValueData(IPVK_VTableTarget, 1, VD2, 3, nullptr);
+
+    InstrProfValueData VD4[] = {{getCalleeAddress(vtable1), 1},
+                                {getCalleeAddress(vtable2), 2},
+                                {getCalleeAddress(vtable3), 3}};
+    Record12.addValueData(IPVK_VTableTarget, 3, VD4, 3, nullptr);
+  }
+
   Writer.addRecord(std::move(Record11), Err);
   // Merge profile data.
   Writer.addRecord(std::move(Record12), Err);
@@ -857,53 +982,99 @@ TEST_P(MaybeSparseInstrProfTest, icall_data_merge) {
   Writer.addRecord({callee3, 0x1235, {3, 4}}, Err);
   Writer.addRecord({callee3, 0x1235, {3, 4}}, Err);
   Writer.addRecord({callee4, 0x1235, {3, 5}}, Err);
+  Writer.addRecord({callee7, 0x1235, {3, 5}}, Err);
+  Writer.addRecord({callee8, 0x1235, {3, 5}}, Err);
   auto Profile = Writer.writeBuffer();
   readProfile(std::move(Profile));
 
+  // Test the number of instrumented value sites and the number of profiled
+  // values for each site.
   Expected<InstrProfRecord> R = Reader->getInstrProfRecord("caller", 0x1234);
   EXPECT_THAT_ERROR(R.takeError(), Succeeded());
+  // For indirect calls.
   ASSERT_EQ(5U, R->getNumValueSites(IPVK_IndirectCallTarget));
   ASSERT_EQ(4U, R->getNumValueDataForSite(IPVK_IndirectCallTarget, 0));
   ASSERT_EQ(0U, R->getNumValueDataForSite(IPVK_IndirectCallTarget, 1));
   ASSERT_EQ(4U, R->getNumValueDataForSite(IPVK_IndirectCallTarget, 2));
-  ASSERT_EQ(1U, R->getNumValueDataForSite(IPVK_IndirectCallTarget, 3));
+  ASSERT_EQ(2U, R->getNumValueDataForSite(IPVK_IndirectCallTarget, 3));
   ASSERT_EQ(3U, R->getNumValueDataForSite(IPVK_IndirectCallTarget, 4));
+  // For vtables.
+  ASSERT_EQ(3U, R->getNumValueSites(IPVK_VTableTarget));
+  ASSERT_EQ(4U, R->getNumValueDataForSite(IPVK_VTableTarget, 0));
+  ASSERT_EQ(4U, R->getNumValueDataForSite(IPVK_VTableTarget, 1));
+  ASSERT_EQ(3U, R->getNumValueDataForSite(IPVK_VTableTarget, 2));
+
+  // Test the merged values for indirect calls.
+  {
+    std::unique_ptr<InstrProfValueData[]> VD =
+        R->getValueForSite(IPVK_IndirectCallTarget, 0);
+    EXPECT_EQ(StringRef((const char *)VD[0].Value, 7), StringRef("callee2"));
+    EXPECT_EQ(7U, VD[0].Count);
+    EXPECT_EQ(StringRef((const char *)VD[1].Value, 7), StringRef("callee3"));
+    EXPECT_EQ(6U, VD[1].Count);
+    EXPECT_EQ(StringRef((const char *)VD[2].Value, 7), StringRef("callee4"));
+    EXPECT_EQ(4U, VD[2].Count);
+    EXPECT_EQ(StringRef((const char *)VD[3].Value, 7), StringRef("callee1"));
+    EXPECT_EQ(1U, VD[3].Count);
+
+    std::unique_ptr<InstrProfValueData[]> VD_2(
+        R->getValueForSite(IPVK_IndirectCallTarget, 2));
+    EXPECT_EQ(StringRef((const char *)VD_2[0].Value, 7), StringRef("callee3"));
+    EXPECT_EQ(6U, VD_2[0].Count);
+    EXPECT_EQ(StringRef((const char *)VD_2[1].Value, 7), StringRef("callee4"));
+    EXPECT_EQ(4U, VD_2[1].Count);
+    EXPECT_EQ(StringRef((const char *)VD_2[2].Value, 7), StringRef("callee2"));
+    EXPECT_EQ(3U, VD_2[2].Count);
+    EXPECT_EQ(StringRef((const char *)VD_2[3].Value, 7), StringRef("callee1"));
+    EXPECT_EQ(1U, VD_2[3].Count);
+
+    std::unique_ptr<InstrProfValueData[]> VD_3(
+        R->getValueForSite(IPVK_IndirectCallTarget, 3));
+    EXPECT_EQ(StringRef((const char *)VD_3[0].Value, 7), StringRef("callee8"));
+    EXPECT_EQ(2U, VD_3[0].Count);
+    EXPECT_EQ(StringRef((const char *)VD_3[1].Value, 7), StringRef("callee7"));
+    EXPECT_EQ(1U, VD_3[1].Count);
+
+    std::unique_ptr<InstrProfValueData[]> VD_4(
+        R->getValueForSite(IPVK_IndirectCallTarget, 4));
+    EXPECT_EQ(StringRef((const char *)VD_4[0].Value, 7), StringRef("callee3"));
+    EXPECT_EQ(6U, VD_4[0].Count);
+    EXPECT_EQ(StringRef((const char *)VD_4[1].Value, 7), StringRef("callee2"));
+    EXPECT_EQ(4U, VD_4[1].Count);
+    EXPECT_EQ(StringRef((const char *)VD_4[2].Value, 7), StringRef("callee1"));
+    EXPECT_EQ(2U, VD_4[2].Count);
+  }
 
-  std::unique_ptr<InstrProfValueData[]> VD =
-      R->getValueForSite(IPVK_IndirectCallTarget, 0);
-  ASSERT_EQ(StringRef((const char *)VD[0].Value, 7), StringRef("callee2"));
-  ASSERT_EQ(7U, VD[0].Count);
-  ASSERT_EQ(StringRef((const char *)VD[1].Value, 7), StringRef("callee3"));
-  ASSERT_EQ(6U, VD[1].Count);
-  ASSERT_EQ(StringRef((const char *)VD[2].Value, 7), StringRef("callee4"));
-  ASSERT_EQ(4U, VD[2].Count);
-  ASSERT_EQ(StringRef((const char *)VD[3].Value, 7), StringRef("callee1"));
-  ASSERT_EQ(1U, VD[3].Count);
-
-  std::unique_ptr<InstrProfValueData[]> VD_2(
-      R->getValueForSite(IPVK_IndirectCallTarget, 2));
-  ASSERT_EQ(StringRef((const char *)VD_2[0].Value, 7), StringRef("callee3"));
-  ASSERT_EQ(6U, VD_2[0].Count);
-  ASSERT_EQ(StringRef((const char *)VD_2[1].Value, 7), StringRef("callee4"));
-  ASSERT_EQ(4U, VD_2[1].Count);
-  ASSERT_EQ(StringRef((const char *)VD_2[2].Value, 7), StringRef("callee2"));
-  ASSERT_EQ(3U, VD_2[2].Count);
-  ASSERT_EQ(StringRef((const char *)VD_2[3].Value, 7), StringRef("callee1"));
-  ASSERT_EQ(1U, VD_2[3].Count);
-
-  std::unique_ptr<InstrProfValueData[]> VD_3(
-      R->getValueForSite(IPVK_IndirectCallTarget, 3));
-  ASSERT_EQ(StringRef((const char *)VD_3[0].Value, 7), StringRef("callee1"));
-  ASSERT_EQ(1U, VD_3[0].Count);
-
-  std::unique_ptr<InstrProfValueData[]> VD_4(
-      R->getValueForSite(IPVK_IndirectCallTarget, 4));
-  ASSERT_EQ(StringRef((const char *)VD_4[0].Value, 7), StringRef("callee3"));
-  ASSERT_EQ(6U, VD_4[0].Count);
-  ASSERT_EQ(StringRef((const char *)VD_4[1].Value, 7), StringRef("callee2"));
-  ASSERT_EQ(4U, VD_4[1].Count);
-  ASSERT_EQ(StringRef((const char *)VD_4[2].Value, 7), StringRef("callee1"));
-  ASSERT_EQ(2U, VD_4[2].Count);
+  // Test the merged values for vtables
+  {
+    auto VD0 = R->getValueForSite(IPVK_VTableTarget, 0);
+    EXPECT_EQ(VD0[0].Value, getCalleeAddress(vtable2));
+    EXPECT_EQ(VD0[0].Count, 7U);
+    EXPECT_EQ(VD0[1].Value, getCalleeAddress(vtable3));
+    EXPECT_EQ(VD0[1].Count, 6U);
+    EXPECT_EQ(VD0[2].Value, getCalleeAddress(vtable4));
+    EXPECT_EQ(VD0[2].Count, 4U);
+    EXPECT_EQ(VD0[3].Value, getCalleeAddress(vtable1));
+    EXPECT_EQ(VD0[3].Count, 1U);
+
+    auto VD1 = R->getValueForSite(IPVK_VTableTarget, 1);
+    EXPECT_EQ(VD1[0].Value, getCalleeAddress(vtable3));
+    EXPECT_EQ(VD1[0].Count, 6U);
+    EXPECT_EQ(VD1[1].Value, getCalleeAddress(vtable4));
+    EXPECT_EQ(VD1[1].Count, 4U);
+    EXPECT_EQ(VD1[2].Value, getCalleeAddress(vtable2));
+    EXPECT_EQ(VD1[2].Count, 3U);
+    EXPECT_EQ(VD1[3].Value, getCalleeAddress(vtable1));
+    EXPECT_EQ(VD1[3].Count, 1U);
+
+    auto VD2 = R->getValueForSite(IPVK_VTableTarget, 2);
+    EXPECT_EQ(VD2[0].Value, getCalleeAddress(vtable3));
+    EXPECT_EQ(VD2[0].Count, 6U);
+    EXPECT_EQ(VD2[1].Value, getCalleeAddress(vtable2));
+    EXPECT_EQ(VD2[1].Count, 4U);
+    EXPECT_EQ(VD2[2].Value, getCalleeAddress(vtable1));
+    EXPECT_EQ(VD2[2].Count, 2U);
+  }
 }
 
 struct ValueProfileMergeEdgeCaseTest
@@ -1027,30 +1198,62 @@ INSTANTIATE_TEST_SUITE_P(
     EdgeCaseTest, ValueProfileMergeEdgeCaseTest,
     ::testing::Combine(::testing::Bool(), /* Sparse */
                        ::testing::Values(IPVK_IndirectCallTarget,
-                                         IPVK_MemOPSize) /* ValueKind */
+                                         IPVK_MemOPSize,
+                                         IPVK_VTableTarget) /* ValueKind */
                        ));
 
 static void addValueProfData(InstrProfRecord &Record) {
-  Record.reserveSites(IPVK_IndirectCallTarget, 5);
-  InstrProfValueData VD0[] = {{uint64_t(callee1), 400},
-                              {uint64_t(callee2), 1000},
-                              {uint64_t(callee3), 500},
-                              {uint64_t(callee4), 300},
-                              {uint64_t(callee5), 100}};
-  Record.addValueData(IPVK_IndirectCallTarget, 0, VD0, 5, nullptr);
-  InstrProfValueData VD1[] = {{uint64_t(callee5), 800},
-                              {uint64_t(callee3), 1000},
-                              {uint64_t(callee2), 2500},
-                              {uint64_t(callee1), 1300}};
-  Record.addValueData(IPVK_IndirectCallTarget, 1, VD1, 4, nullptr);
-  InstrProfValueData VD2[] = {{uint64_t(callee6), 800},
-                              {uint64_t(callee3), 1000},
-                              {uint64_t(callee4), 5500}};
-  Record.addValueData(IPVK_IndirectCallTarget, 2, VD2, 3, nullptr);
-  InstrProfValueData VD3[] = {{uint64_t(callee2), 1800},
-                              {uint64_t(callee3), 2000}};
-  Record.addValueData(IPVK_IndirectCallTarget, 3, VD3, 2, nullptr);
-  Record.addValueData(IPVK_IndirectCallTarget, 4, nullptr, 0, nullptr);
+  // Add test data for indirect calls.
+  {
+    Record.reserveSites(IPVK_IndirectCallTarget, 6);
+    InstrProfValueData VD0[] = {{uint64_t(callee1), 400},
+                                {uint64_t(callee2), 1000},
+                                {uint64_t(callee3), 500},
+                                {uint64_t(callee4), 300},
+                                {uint64_t(callee5), 100}};
+    Record.addValueData(IPVK_IndirectCallTarget, 0, VD0, 5, nullptr);
+    InstrProfValueData VD1[] = {{uint64_t(callee5), 800},
+                                {uint64_t(callee3), 1000},
+                                {uint64_t(callee2), 2500},
+                                {uint64_t(callee1), 1300}};
+    Record.addValueData(IPVK_IndirectCallTarget, 1, VD1, 4, nullptr);
+    InstrProfValueData VD2[] = {{uint64_t(callee6), 800},
+                                {uint64_t(callee3), 1000},
+                                {uint64_t(callee4), 5500}};
+    Record.addValueData(IPVK_IndirectCallTarget, 2, VD2, 3, nullptr);
+    InstrProfValueData VD3[] = {{uint64_t(callee2), 1800},
+                                {uint64_t(callee3), 2000}};
+    Record.addValueData(IPVK_IndirectCallTarget, 3, VD3, 2, nullptr);
+    Record.addValueData(IPVK_IndirectCallTarget, 4, nullptr, 0, nullptr);
+    InstrProfValueData VD5[] = {{uint64_t(callee7), 1234},
+                                {uint64_t(callee8), 5678}};
+    Record.addValueData(IPVK_IndirectCallTarget, 5, VD5, 2, nullptr);
+  }
+
+  // Add test data for vtables
+  {
+    Record.reserveSites(IPVK_VTableTarget, 4);
+    InstrProfValueData VD0[] = {
+        {getCalleeAddress(vtable1), 400}, {getCalleeAddress(vtable2), 1000},
+        {getCalleeAddress(vtable3), 500}, {getCalleeAddress(vtable4), 300},
+        {getCalleeAddress(vtable5), 100},
+    };
+    InstrProfValueData VD1[] = {{getCalleeAddress(vtable5), 800},
+                                {getCalleeAddress(vtable3), 1000},
+                                {getCalleeAddress(vtable2), 2500},
+                                {getCalleeAddress(vtable1), 1300}};
+    InstrProfValueData VD2[] = {
+        {getCalleeAddress(vtable6), 800},
+        {getCalleeAddress(vtable3), 1000},
+        {getCalleeAddress(vtable4), 5500},
+    };
+    InstrProfValueData VD3[] = {{getCalleeAddress(vtable2), 1800},
+                                {getCalleeAddress(vtable3), 2000}};
+    Record.addValueData(IPVK_VTableTarget, 0, VD0, 5, nullptr);
+    Record.addValueData(IPVK_VTableTarget, 1, VD1, 4, nullptr);
+    Record.addValueData(IPVK_VTableTarget, 2, VD2, 3, nullptr);
+    Record.addValueData(IPVK_VTableTarget, 3, VD3, 2, nullptr);
+  }
 }
 
 TEST(ValueProfileReadWriteTest, value_prof_data_read_write) {
@@ -1063,59 +1266,111 @@ TEST(ValueProfileReadWriteTest, value_prof_data_read_write) {
   VPData->deserializeTo(Record, nullptr);
 
   // Now read data from Record and sanity check the data
-  ASSERT_EQ(5U, Record.getNumValueSites(IPVK_IndirectCallTarget));
+  ASSERT_EQ(6U, Record.getNumValueSites(IPVK_IndirectCallTarget));
   ASSERT_EQ(5U, Record.getNumValueDataForSite(IPVK_IndirectCallTarget, 0));
   ASSERT_EQ(4U, Record.getNumValueDataForSite(IPVK_IndirectCallTarget, 1));
   ASSERT_EQ(3U, Record.getNumValueDataForSite(IPVK_IndirectCallTarget, 2));
   ASSERT_EQ(2U, Record.getNumValueDataForSite(IPVK_IndirectCallTarget, 3));
   ASSERT_EQ(0U, Record.getNumValueDataForSite(IPVK_IndirectCallTarget, 4));
+  ASSERT_EQ(2U, Record.getNumValueDataForSite(IPVK_IndirectCallTarget, 5));
 
   auto Cmp = [](const InstrProfValueData &VD1, const InstrProfValueData &VD2) {
     return VD1.Count > VD2.Count;
   };
+
   std::unique_ptr<InstrProfValueData[]> VD_0(
       Record.getValueForSite(IPVK_IndirectCallTarget, 0));
   llvm::sort(&VD_0[0], &VD_0[5], Cmp);
-  ASSERT_EQ(StringRef((const char *)VD_0[0].Value, 7), StringRef("callee2"));
-  ASSERT_EQ(1000U, VD_0[0].Count);
-  ASSERT_EQ(StringRef((const char *)VD_0[1].Value, 7), StringRef("callee3"));
-  ASSERT_EQ(500U, VD_0[1].Count);
-  ASSERT_EQ(StringRef((const char *)VD_0[2].Value, 7), StringRef("callee1"));
-  ASSERT_EQ(400U, VD_0[2].Count);
-  ASSERT_EQ(StringRef((const char *)VD_0[3].Value, 7), StringRef("callee4"));
-  ASSERT_EQ(300U, VD_0[3].Count);
-  ASSERT_EQ(StringRef((const char *)VD_0[4].Value, 7), StringRef("callee5"));
-  ASSERT_EQ(100U, VD_0[4].Count);
+  EXPECT_EQ(StringRef((const char *)VD_0[0].Value, 7), StringRef("callee2"));
+  EXPECT_EQ(1000U, VD_0[0].Count);
+  EXPECT_EQ(StringRef((const char *)VD_0[1].Value, 7), StringRef("callee3"));
+  EXPECT_EQ(500U, VD_0[1].Count);
+  EXPECT_EQ(StringRef((const char *)VD_0[2].Value, 7), StringRef("callee1"));
+  EXPECT_EQ(400U, VD_0[2].Count);
+  EXPECT_EQ(StringRef((const char *)VD_0[3].Value, 7), StringRef("callee4"));
+  EXPECT_EQ(300U, VD_0[3].Count);
+  EXPECT_EQ(StringRef((const char *)VD_0[4].Value, 7), StringRef("callee5"));
+  EXPECT_EQ(100U, VD_0[4].Count);
 
   std::unique_ptr<InstrProfValueData[]> VD_1(
       Record.getValueForSite(IPVK_IndirectCallTarget, 1));
   llvm::sort(&VD_1[0], &VD_1[4], Cmp);
-  ASSERT_EQ(StringRef((const char *)VD_1[0].Value, 7), StringRef("callee2"));
-  ASSERT_EQ(2500U, VD_1[0].Count);
-  ASSERT_EQ(StringRef((const char *)VD_1[1].Value, 7), StringRef("callee1"));
-  ASSERT_EQ(1300U, VD_1[1].Count);
-  ASSERT_EQ(StringRef((const char *)VD_1[2].Value, 7), StringRef("callee3"));
-  ASSERT_EQ(1000U, VD_1[2].Count);
-  ASSERT_EQ(StringRef((const char *)VD_1[3].Value, 7), StringRef("callee5"));
-  ASSERT_EQ(800U, VD_1[3].Count);
+  EXPECT_EQ(StringRef((const char *)VD_1[0].Value, 7), StringRef("callee2"));
+  EXPECT_EQ(2500U, VD_1[0].Count);
+  EXPECT_EQ(StringRef((const char *)VD_1[1].Value, 7), StringRef("callee1"));
+  EXPECT_EQ(1300U, VD_1[1].Count);
+  EXPECT_EQ(StringRef((const char *)VD_1[2].Value, 7), StringRef("callee3"));
+  EXPECT_EQ(1000U, VD_1[2].Count);
+  EXPECT_EQ(StringRef((const char *)VD_1[3].Value, 7), StringRef("callee5"));
+  EXPECT_EQ(800U, VD_1[3].Count);
 
   std::unique_ptr<InstrProfValueData[]> VD_2(
       Record.getValueForSite(IPVK_IndirectCallTarget, 2));
   llvm::sort(&VD_2[0], &VD_2[3], Cmp);
-  ASSERT_EQ(StringRef((const char *)VD_2[0].Value, 7), StringRef("callee4"));
-  ASSERT_EQ(5500U, VD_2[0].Count);
-  ASSERT_EQ(StringRef((const char *)VD_2[1].Value, 7), StringRef("callee3"));
-  ASSERT_EQ(1000U, VD_2[1].Count);
-  ASSERT_EQ(StringRef((const char *)VD_2[2].Value, 7), StringRef("callee6"));
-  ASSERT_EQ(800U, VD_2[2].Count);
+  EXPECT_EQ(StringRef((const char *)VD_2[0].Value, 7), StringRef("callee4"));
+  EXPECT_EQ(5500U, VD_2[0].Count);
+  EXPECT_EQ(StringRef((const char *)VD_2[1].Value, 7), StringRef("callee3"));
+  EXPECT_EQ(1000U, VD_2[1].Count);
+  EXPECT_EQ(StringRef((const char *)VD_2[2].Value, 7), StringRef("callee6"));
+  EXPECT_EQ(800U, VD_2[2].Count);
 
   std::unique_ptr<InstrProfValueData[]> VD_3(
       Record.getValueForSite(IPVK_IndirectCallTarget, 3));
   llvm::sort(&VD_3[0], &VD_3[2], Cmp);
-  ASSERT_EQ(StringRef((const char *)VD_3[0].Value, 7), StringRef("callee3"));
-  ASSERT_EQ(2000U, VD_3[0].Count);
-  ASSERT_EQ(StringRef((const char *)VD_3[1].Value, 7), StringRef("callee2"));
-  ASSERT_EQ(1800U, VD_3[1].Count);
+  EXPECT_EQ(StringRef((const char *)VD_3[0].Value, 7), StringRef("callee3"));
+  EXPECT_EQ(2000U, VD_3[0].Count);
+  EXPECT_EQ(StringRef((const char *)VD_3[1].Value, 7), StringRef("callee2"));
+  EXPECT_EQ(1800U, VD_3[1].Count);
+
+  ASSERT_EQ(4U, Record.getNumValueSites(IPVK_VTableTarget));
+  ASSERT_EQ(5U, Record.getNumValueDataForSite(IPVK_VTableTarget, 0));
+  ASSERT_EQ(4U, Record.getNumValueDataForSite(IPVK_VTableTarget, 1));
+  ASSERT_EQ(3U, Record.getNumValueDataForSite(IPVK_VTableTarget, 2));
+  ASSERT_EQ(2U, Record.getNumValueDataForSite(IPVK_VTableTarget, 3));
+
+  std::unique_ptr<InstrProfValueData[]> VD0(
+      Record.getValueForSite(IPVK_VTableTarget, 0));
+  llvm::sort(&VD0[0], &VD0[5], Cmp);
+  EXPECT_EQ(VD0[0].Value, getCalleeAddress(vtable2));
+  EXPECT_EQ(VD0[0].Count, 1000U);
+  EXPECT_EQ(VD0[1].Value, getCalleeAddress(vtable3));
+  EXPECT_EQ(VD0[1].Count, 500U);
+  EXPECT_EQ(VD0[2].Value, getCalleeAddress(vtable1));
+  EXPECT_EQ(VD0[2].Count, 400U);
+  EXPECT_EQ(VD0[3].Value, getCalleeAddress(vtable4));
+  EXPECT_EQ(VD0[3].Count, 300U);
+  EXPECT_EQ(VD0[4].Value, getCalleeAddress(vtable5));
+  EXPECT_EQ(VD0[4].Count, 100U);
+
+  std::unique_ptr<InstrProfValueData[]> VD1(
+      Record.getValueForSite(IPVK_VTableTarget, 1));
+  llvm::sort(&VD1[0], &VD1[4], Cmp);
+  EXPECT_EQ(VD1[0].Value, getCalleeAddress(vtable2));
+  EXPECT_EQ(VD1[0].Count, 2500U);
+  EXPECT_EQ(VD1[1].Value, getCalleeAddress(vtable1));
+  EXPECT_EQ(VD1[1].Count, 1300U);
+  EXPECT_EQ(VD1[2].Value, getCalleeAddress(vtable3));
+  EXPECT_EQ(VD1[2].Count, 1000U);
+  EXPECT_EQ(VD1[3].Value, getCalleeAddress(vtable5));
+  EXPECT_EQ(VD1[3].Count, 800U);
+
+  std::unique_ptr<InstrProfValueData[]> VD2(
+      Record.getValueForSite(IPVK_VTableTarget, 2));
+  llvm::sort(&VD2[0], &VD2[3], Cmp);
+  EXPECT_EQ(VD2[0].Value, getCalleeAddress(vtable4));
+  EXPECT_EQ(VD2[0].Count, 5500U);
+  EXPECT_EQ(VD2[1].Value, getCalleeAddress(vtable3));
+  EXPECT_EQ(VD2[1].Count, 1000U);
+  EXPECT_EQ(VD2[2].Value, getCalleeAddress(vtable6));
+  EXPECT_EQ(VD2[2].Count, 800U);
+
+  std::unique_ptr<InstrProfValueData[]> VD3(
+      Record.getValueForSite(IPVK_VTableTarget, 3));
+  llvm::sort(&VD3[0], &VD3[2], Cmp);
+  EXPECT_EQ(VD3[0].Value, getCalleeAddress(vtable3));
+  EXPECT_EQ(VD3[0].Count, 2000U);
+  EXPECT_EQ(VD3[1].Value, getCalleeAddress(vtable2));
+  EXPECT_EQ(VD3[1].Count, 1800U);
 }
 
 TEST(ValueProfileReadWriteTest, symtab_mapping) {
@@ -1132,10 +1387,27 @@ TEST(ValueProfileReadWriteTest, symtab_mapping) {
   Symtab.mapAddress(uint64_t(callee4), 0x4000ULL);
   // Missing mapping for callee5
 
+  auto getVTableStartAddr = [](const uint64_t *vtable) -> uint64_t {
+    return uint64_t(vtable);
+  };
+  auto getVTableEndAddr = [](const uint64_t *vtable) -> uint64_t {
+    return uint64_t(vtable) + 16;
+  };
+  // vtable1, vtable2, vtable3, vtable4 get mapped; vtable5, vtable6 are not
+  // mapped.
+  Symtab.mapVTableAddress(getVTableStartAddr(vtable1),
+                          getVTableEndAddr(vtable1), MD5Hash("vtable1"));
+  Symtab.mapVTableAddress(getVTableStartAddr(vtable2),
+                          getVTableEndAddr(vtable2), MD5Hash("vtable2"));
+  Symtab.mapVTableAddress(getVTableStartAddr(vtable3),
+                          getVTableEndAddr(vtable3), MD5Hash("vtable3"));
+  Symtab.mapVTableAddress(getVTableStartAddr(vtable4),
+                          getVTableEndAddr(vtable4), MD5Hash("vtable4"));
+
   VPData->deserializeTo(Record, &Symtab);
 
   // Now read data from Record and sanity check the data
-  ASSERT_EQ(5U, Record.getNumValueSites(IPVK_IndirectCallTarget));
+  ASSERT_EQ(6U, Record.getNumValueSites(IPVK_IndirectCallTarget));
   ASSERT_EQ(5U, Record.getNumValueDataForSite(IPVK_IndirectCallTarget, 0));
 
   auto Cmp = [](const InstrProfValueData &VD1, const InstrProfValueData &VD2) {
@@ -1153,6 +1425,74 @@ TEST(ValueProfileReadWriteTest, symtab_mapping) {
 
   // callee5 does not have a mapped value -- default to 0.
   ASSERT_EQ(VD_0[4].Value, 0ULL);
+
+  // Sanity check the vtable value data
+  ASSERT_EQ(4U, Record.getNumValueSites(IPVK_VTableTarget));
+
+  {
+    // The first vtable site.
+    std::unique_ptr<InstrProfValueData[]> VD(
+        Record.getValueForSite(IPVK_VTableTarget, 0));
+    ASSERT_EQ(5U, Record.getNumValueDataForSite(IPVK_VTableTarget, 0));
+    llvm::sort(&VD[0], &VD[5], Cmp);
+    EXPECT_EQ(1000U, VD[0].Count);
+    EXPECT_EQ(VD[0].Value, MD5Hash("vtable2"));
+    EXPECT_EQ(500U, VD[1].Count);
+    EXPECT_EQ(VD[1].Value, MD5Hash("vtable3"));
+    EXPECT_EQ(VD[2].Value, MD5Hash("vtable1"));
+    EXPECT_EQ(400U, VD[2].Count);
+    EXPECT_EQ(VD[3].Value, MD5Hash("vtable4"));
+    EXPECT_EQ(300U, VD[3].Count);
+
+    // vtable5 isn't mapped -- default to 0.
+    EXPECT_EQ(VD[4].Value, 0U);
+    EXPECT_EQ(VD[4].Count, 100U);
+  }
+
+  {
+    // The second vtable site.
+    std::unique_ptr<InstrProfValueData[]> VD(
+        Record.getValueForSite(IPVK_VTableTarget, 1));
+    ASSERT_EQ(4, Record.getNumValueDataForSite(IPVK_VTableTarget, 1));
+    llvm::sort(&VD[0], &VD[4], Cmp);
+    EXPECT_EQ(VD[0].Value, MD5Hash("vtable2"));
+    EXPECT_EQ(2500U, VD[0].Count);
+    EXPECT_EQ(VD[1].Value, MD5Hash("vtable1"));
+    EXPECT_EQ(1300U, VD[1].Count);
+
+    EXPECT_EQ(VD[2].Value, MD5Hash("vtable3"));
+    EXPECT_EQ(1000U, VD[2].Count);
+    // vtable5 isn't mapped -- default to 0.
+    EXPECT_EQ(VD[3].Value, 0U);
+    EXPECT_EQ(800U, VD[3].Count);
+  }
+
+  {
+    // The third vtable site.
+    std::unique_ptr<InstrProfValueData[]> VD(
+        Record.getValueForSite(IPVK_VTableTarget, 2));
+    ASSERT_EQ(3, Record.getNumValueDataForSite(IPVK_VTableTarget, 2));
+    llvm::sort(&VD[0], &VD[3], Cmp);
+    EXPECT_EQ(5500U, VD[0].Count);
+    EXPECT_EQ(VD[0].Value, MD5Hash("vtable4"));
+    EXPECT_EQ(1000U, VD[1].Count);
+    EXPECT_EQ(VD[1].Value, MD5Hash("vtable3"));
+    // vtable6 isn't mapped -- default to 0.
+    EXPECT_EQ(VD[2].Value, 0U);
+    EXPECT_EQ(800U, VD[2].Count);
+  }
+
+  {
+    // The fourth vtable site.
+    std::unique_ptr<InstrProfValueData[]> VD(
+        Record.getValueForSite(IPVK_VTableTarget, 3));
+    ASSERT_EQ(2, Record.getNumValueDataForSite(IPVK_VTableTarget, 3));
+    llvm::sort(&VD[0], &VD[2], Cmp);
+    EXPECT_EQ(2000U, VD[0].Count);
+    EXPECT_EQ(VD[0].Value, MD5Hash("vtable3"));
+    EXPECT_EQ(1800U, VD[1].Count);
+    EXPECT_EQ(VD[1].Value, MD5Hash("vtable2"));
+  }
 }
 
 TEST_P(MaybeSparseInstrProfTest, get_max_function_count) {

>From 66dbbfef52bdc092cbd4ed619bba38c003f6063d Mon Sep 17 00:00:00 2001
From: mingmingl <mingmingl at google.com>
Date: Thu, 8 Feb 2024 09:07:27 -0800
Subject: [PATCH 2/5] [InstrProf] Add vtables with type metadata into symtab to
 look it up with GUID

---
 llvm/include/llvm/ProfileData/InstrProf.h    | 19 +++++
 llvm/lib/ProfileData/InstrProf.cpp           | 87 ++++++++++++++------
 llvm/unittests/ProfileData/InstrProfTest.cpp | 55 +++++++++++++
 3 files changed, 138 insertions(+), 23 deletions(-)

diff --git a/llvm/include/llvm/ProfileData/InstrProf.h b/llvm/include/llvm/ProfileData/InstrProf.h
index 53108a093bf4dd..6e799cf8aa273e 100644
--- a/llvm/include/llvm/ProfileData/InstrProf.h
+++ b/llvm/include/llvm/ProfileData/InstrProf.h
@@ -487,8 +487,25 @@ class InstrProfSymtab {
     return "** External Symbol **";
   }
 
+  // Returns the canonical name of the given PGOName by stripping the names
+  // suffixes that begins with ".". If MayHaveUniqueSuffix is true, ".__uniq."
+  // suffix is kept in the canonical name.
+  StringRef getCanonicalName(StringRef PGOName, bool MayHaveUniqueSuffix);
+
+  // Add the function into the symbol table, by creating the following
+  // map entries:
+  // - <MD5Hash(PGOFuncName), PGOFuncName>
+  // - <MD5Hash(PGOFuncName), F>
+  // - <MD5Hash(getCanonicalName(PGOFuncName), F)
   Error addFuncWithName(Function &F, StringRef PGOFuncName);
 
+  // Add the vtable into the symbol table, by creating the following
+  // map entries:
+  // - <MD5Hash(PGOVTableName), PGOVTableName>
+  // - <MD5Hash(PGOVTableName), V>
+  // - <MD5Hash(getCanonicalName(PGOVTableName), V)
+  Error addVTableWithName(GlobalVariable &V, StringRef PGOVTableName);
+
   // If the symtab is created by a series of calls to \c addFuncName, \c
   // finalizeSymtab needs to be called before looking up function names.
   // This is required because the underlying map is a vector (for space
@@ -543,6 +560,7 @@ class InstrProfSymtab {
   Error create(const FuncNameIterRange &FuncIterRange,
                const VTableNameIterRange &VTableIterRange);
 
+  // Map the MD5 of the symbol name to the name.
   Error addSymbolName(StringRef SymbolName) {
     if (SymbolName.empty())
       return make_error<InstrProfError>(instrprof_error::malformed,
@@ -665,6 +683,7 @@ void InstrProfSymtab::finalizeSymtab() {
   if (Sorted)
     return;
   llvm::sort(MD5NameMap, less_first());
+  llvm::sort(MD5VTableMap, less_first());
   llvm::sort(MD5FuncMap, less_first());
   llvm::sort(AddrToMD5Map, less_first());
   AddrToMD5Map.erase(std::unique(AddrToMD5Map.begin(), AddrToMD5Map.end()),
diff --git a/llvm/lib/ProfileData/InstrProf.cpp b/llvm/lib/ProfileData/InstrProf.cpp
index 9ebcba10c860ff..a09a2bb0ba77cb 100644
--- a/llvm/lib/ProfileData/InstrProf.cpp
+++ b/llvm/lib/ProfileData/InstrProf.cpp
@@ -480,7 +480,9 @@ Error InstrProfSymtab::create(Module &M, bool InLTO) {
     Types.clear();
     G.getMetadata(LLVMContext::MD_type, Types);
     if (!Types.empty()) {
-      MD5VTableMap.emplace_back(G.getGUID(), &G);
+      if (Error E = addVTableWithName(
+              G, getIRPGOObjectName(G, InLTO, /* PGONameMetadata */ nullptr)))
+        return E;
     }
   }
   Sorted = false;
@@ -488,6 +490,30 @@ Error InstrProfSymtab::create(Module &M, bool InLTO) {
   return Error::success();
 }
 
+Error InstrProfSymtab::addVTableWithName(GlobalVariable &VTable,
+                                         StringRef VTablePGOName) {
+  if (Error E = addVTableName(VTablePGOName))
+    return E;
+
+  MD5VTableMap.emplace_back(GlobalValue::getGUID(VTablePGOName), &VTable);
+
+  // NOTE: `-funique-internal-linkage-names` doesn't uniqufy vtables, so no
+  // need to check ".__uniq."
+
+  // If a local-linkage vtable is promoted to have external linkage in ThinLTO,
+  // it will have `.llvm.` in its name. Use the name before externalization.
+  StringRef CanonicalName =
+      getCanonicalName(VTablePGOName, /* MayHaveUniqueSuffix= */ false);
+  if (CanonicalName != VTablePGOName) {
+    if (Error E = addVTableName(CanonicalName))
+      return E;
+
+    MD5VTableMap.emplace_back(GlobalValue::getGUID(CanonicalName), &VTable);
+  }
+
+  return Error::success();
+}
+
 /// \c NameStrings is a string composed of one of more possibly encoded
 /// sub-strings. The substrings are separated by 0 or more zero bytes. This
 /// method decodes the string and calls `NameCallback` for each substring.
@@ -560,35 +586,50 @@ Error InstrProfSymtab::initVTableNamesFromCompressedStrings(
       std::bind(&InstrProfSymtab::addVTableName, this, std::placeholders::_1));
 }
 
-Error InstrProfSymtab::addFuncWithName(Function &F, StringRef PGOFuncName) {
-  if (Error E = addFuncName(PGOFuncName))
-    return E;
-  MD5FuncMap.emplace_back(Function::getGUID(PGOFuncName), &F);
+StringRef InstrProfSymtab::getCanonicalName(StringRef PGOName,
+                                            bool MayHaveUniqueSuffix) {
+  size_t pos = 0;
   // In ThinLTO, local function may have been promoted to global and have
   // suffix ".llvm." added to the function name. We need to add the
   // stripped function name to the symbol table so that we can find a match
   // from profile.
   //
-  // We may have other suffixes similar as ".llvm." which are needed to
-  // be stripped before the matching, but ".__uniq." suffix which is used
-  // to differentiate internal linkage functions in different modules
-  // should be kept. Now this is the only suffix with the pattern ".xxx"
-  // which is kept before matching.
-  const std::string UniqSuffix = ".__uniq.";
-  auto pos = PGOFuncName.find(UniqSuffix);
-  // Search '.' after ".__uniq." if ".__uniq." exists, otherwise
-  // search '.' from the beginning.
-  if (pos != std::string::npos)
-    pos += UniqSuffix.length();
-  else
-    pos = 0;
-  pos = PGOFuncName.find('.', pos);
-  if (pos != std::string::npos && pos != 0) {
-    StringRef OtherFuncName = PGOFuncName.substr(0, pos);
-    if (Error E = addFuncName(OtherFuncName))
+  // ".__uniq." suffix is used to differentiate internal linkage functions in
+  // different modules and should be kept. Now this is the only suffix with the
+  // pattern ".xxx" which is kept before matching, other suffixes similar as
+  // ".llvm." will be stripped.
+  if (MayHaveUniqueSuffix) {
+    const std::string UniqSuffix = ".__uniq.";
+    pos = PGOName.find(UniqSuffix);
+    if (pos != StringRef::npos)
+      pos += UniqSuffix.length();
+    else
+      pos = 0;
+  }
+
+  // Search '.' after ".__uniq." if ".__uniq." exists, otherwise search '.' from
+  // the beginning.
+  pos = PGOName.find('.', pos);
+  if (pos != StringRef::npos && pos != 0)
+    return PGOName.substr(0, pos);
+
+  return PGOName;
+}
+
+Error InstrProfSymtab::addFuncWithName(Function &F, StringRef PGOFuncName) {
+  if (Error E = addFuncName(PGOFuncName))
+    return E;
+  MD5FuncMap.emplace_back(Function::getGUID(PGOFuncName), &F);
+
+  StringRef CanonicalName =
+      getCanonicalName(PGOFuncName, /* MayHaveUniqueSuffix= */ true);
+
+  if (CanonicalName != PGOFuncName) {
+    if (Error E = addFuncName(CanonicalName))
       return E;
-    MD5FuncMap.emplace_back(Function::getGUID(OtherFuncName), &F);
+    MD5FuncMap.emplace_back(Function::getGUID(CanonicalName), &F);
   }
+
   return Error::success();
 }
 
diff --git a/llvm/unittests/ProfileData/InstrProfTest.cpp b/llvm/unittests/ProfileData/InstrProfTest.cpp
index 4b99195c1b859a..edde544660e454 100644
--- a/llvm/unittests/ProfileData/InstrProfTest.cpp
+++ b/llvm/unittests/ProfileData/InstrProfTest.cpp
@@ -6,6 +6,8 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/LLVMContext.h"
@@ -1605,6 +1607,44 @@ TEST(SymtabTest, instr_prof_symtab_module_test) {
   Function::Create(FTy, Function::WeakODRLinkage, "Wblah", M.get());
   Function::Create(FTy, Function::WeakODRLinkage, "Wbar", M.get());
 
+  // [ptr, ptr, ptr]
+  ArrayType *VTableArrayType = ArrayType::get(
+      PointerType::get(Ctx, M->getDataLayout().getDefaultGlobalsAddressSpace()),
+      3);
+  Constant *Int32TyNull =
+      llvm::ConstantExpr::getNullValue(PointerType::getUnqual(Ctx));
+  SmallVector<llvm::Type *, 1> tys = {VTableArrayType};
+  StructType *VTableType = llvm::StructType::get(Ctx, tys);
+
+  // Create a vtable definition with external linkage.
+  GlobalVariable *ExternalGV = new llvm::GlobalVariable(
+      *M, VTableType, /* isConstant= */ true,
+      llvm::GlobalValue::ExternalLinkage,
+      llvm::ConstantStruct::get(
+          VTableType, {llvm::ConstantArray::get(
+                          VTableArrayType,
+                          {Int32TyNull, Int32TyNull,
+                           Function::Create(FTy, Function::ExternalLinkage,
+                                            "VFuncInExternalGV", M.get())})}),
+      "ExternalGV");
+
+  // Create a vtable definition for local-linkage function.
+  GlobalVariable *LocalGV = new llvm::GlobalVariable(
+      *M, VTableType, /* isConstant= */ true,
+      llvm::GlobalValue::InternalLinkage,
+      llvm::ConstantStruct::get(
+          VTableType,
+          {llvm::ConstantArray::get(
+              VTableArrayType, {Int32TyNull, Int32TyNull,
+                                Function::Create(FTy, Function::ExternalLinkage,
+                                                 "VFuncInLocalGV", M.get())})}),
+      "LocalGV");
+
+  // Add type metadata for the test data, since vtables with type metadata are
+  // added to symtab.
+  ExternalGV->addTypeMetadata(16, MDString::get(Ctx, "ExternalGV"));
+  LocalGV->addTypeMetadata(16, MDString::get(Ctx, "LocalGV"));
+
   InstrProfSymtab ProfSymtab;
   EXPECT_THAT_ERROR(ProfSymtab.create(*M), Succeeded());
 
@@ -1626,6 +1666,21 @@ TEST(SymtabTest, instr_prof_symtab_module_test) {
     EXPECT_EQ(StringRef(PGOName), PGOFuncName);
     EXPECT_THAT(PGOFuncName.str(), EndsWith(Funcs[I].str()));
   }
+
+  StringRef VTables[] = {"ExternalGV", "LocalGV"};
+  for (StringRef VTableName : VTables) {
+    GlobalVariable *GV =
+        M->getGlobalVariable(VTableName, /* AllowInternal=*/true);
+
+    // Test that ProfSymtab returns the expected name given a hash.
+    std::string IRPGOName = getPGOName(*GV);
+    uint64_t GUID = IndexedInstrProf::ComputeHash(IRPGOName);
+    EXPECT_EQ(IRPGOName, ProfSymtab.getFuncOrVarName(GUID));
+    EXPECT_EQ(VTableName, getParsedIRPGOName(IRPGOName).second);
+
+    // Test that ProfSymtab returns the expected global variable
+    EXPECT_EQ(GV, ProfSymtab.getGlobalVariable(GUID));
+  }
 }
 
 // Testing symtab serialization and creator/deserialization interface

>From 7ebae253ab1808bca328453f68af2b595d07176e Mon Sep 17 00:00:00 2001
From: mingmingl <mingmingl at google.com>
Date: Thu, 8 Feb 2024 11:32:50 -0800
Subject: [PATCH 3/5] [NFC][CallPromotionUtils]Extract a helper function
 versionCallSiteWithCond from versionCallSite

---
 .../Transforms/Utils/CallPromotionUtils.cpp   | 36 +++++++++++--------
 1 file changed, 21 insertions(+), 15 deletions(-)

diff --git a/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp b/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp
index 4e84927f1cfc90..d0cf0792eface0 100644
--- a/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp
+++ b/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp
@@ -188,10 +188,9 @@ static void createRetBitCast(CallBase &CB, Type *RetTy, CastInst **RetBitCast) {
 /// Predicate and clone the given call site.
 ///
 /// This function creates an if-then-else structure at the location of the call
-/// site. The "if" condition compares the call site's called value to the given
-/// callee. The original call site is moved into the "else" block, and a clone
-/// of the call site is placed in the "then" block. The cloned instruction is
-/// returned.
+/// site. The "if" condition is specified by `Cond`.
+/// The original call site is moved into the "else" block, and a clone of the
+/// call site is placed in the "then" block. The cloned instruction is returned.
 ///
 /// For example, the call instruction below:
 ///
@@ -202,7 +201,6 @@ static void createRetBitCast(CallBase &CB, Type *RetTy, CastInst **RetBitCast) {
 /// Is replace by the following:
 ///
 ///   orig_bb:
-///     %cond = icmp eq i32 ()* %ptr, @func
 ///     br i1 %cond, %then_bb, %else_bb
 ///
 ///   then_bb:
@@ -232,7 +230,6 @@ static void createRetBitCast(CallBase &CB, Type *RetTy, CastInst **RetBitCast) {
 /// Is replace by the following:
 ///
 ///   orig_bb:
-///     %cond = icmp eq i32 ()* %ptr, @func
 ///     br i1 %cond, %then_bb, %else_bb
 ///
 ///   then_bb:
@@ -267,7 +264,6 @@ static void createRetBitCast(CallBase &CB, Type *RetTy, CastInst **RetBitCast) {
 /// Is replaced by the following:
 ///
 ///   cond_bb:
-///     %cond = icmp eq i32 ()* %ptr, @func
 ///     br i1 %cond, %then_bb, %orig_bb
 ///
 ///   then_bb:
@@ -280,19 +276,13 @@ static void createRetBitCast(CallBase &CB, Type *RetTy, CastInst **RetBitCast) {
 ///     ; The original call instruction stays in its original block.
 ///     %t0 = musttail call i32 %ptr()
 ///     ret %t0
-CallBase &llvm::versionCallSite(CallBase &CB, Value *Callee,
-                                MDNode *BranchWeights) {
+static CallBase &versionCallSiteWithCond(CallBase &CB, Value *Cond,
+                                         MDNode *BranchWeights) {
 
   IRBuilder<> Builder(&CB);
   CallBase *OrigInst = &CB;
   BasicBlock *OrigBlock = OrigInst->getParent();
 
-  // Create the compare. The called value and callee must have the same type to
-  // be compared.
-  if (CB.getCalledOperand()->getType() != Callee->getType())
-    Callee = Builder.CreateBitCast(Callee, CB.getCalledOperand()->getType());
-  auto *Cond = Builder.CreateICmpEQ(CB.getCalledOperand(), Callee);
-
   if (OrigInst->isMustTailCall()) {
     // Create an if-then structure. The original instruction stays in its block,
     // and a clone of the original instruction is placed in the "then" block.
@@ -380,6 +370,22 @@ CallBase &llvm::versionCallSite(CallBase &CB, Value *Callee,
   return *NewInst;
 }
 
+// Predicate and clone the given call site usingc condition `CB.callee ==
+// Callee`. See the comment `versionCallSiteWithCond` for the transformation.
+CallBase &llvm::versionCallSite(CallBase &CB, Value *Callee,
+                                MDNode *BranchWeights) {
+
+  IRBuilder<> Builder(&CB);
+
+  // Create the compare. The called value and callee must have the same type to
+  // be compared.
+  if (CB.getCalledOperand()->getType() != Callee->getType())
+    Callee = Builder.CreateBitCast(Callee, CB.getCalledOperand()->getType());
+  auto *Cond = Builder.CreateICmpEQ(CB.getCalledOperand(), Callee);
+
+  return versionCallSiteWithCond(CB, Cond, BranchWeights);
+}
+
 bool llvm::isLegalToPromote(const CallBase &CB, Function *Callee,
                             const char **FailureReason) {
   assert(!CB.getCalledFunction() && "Only indirect call sites can be promoted");

>From ac5dc1bf77b67cbf0aa5e2c8fb6a7ce0080fb501 Mon Sep 17 00:00:00 2001
From: mingmingl <mingmingl at google.com>
Date: Sat, 10 Feb 2024 12:03:25 -0800
Subject: [PATCH 4/5] [CallPromotionUtils]Implement conditional indirect call
 promotion with vtable-based comparison

---
 .../Transforms/Utils/CallPromotionUtils.h     |  50 ++++++-
 .../Transforms/Utils/CallPromotionUtils.cpp   |  64 ++++++++-
 .../Utils/CallPromotionUtilsTest.cpp          | 127 ++++++++++++++++++
 3 files changed, 233 insertions(+), 8 deletions(-)

diff --git a/llvm/include/llvm/Transforms/Utils/CallPromotionUtils.h b/llvm/include/llvm/Transforms/Utils/CallPromotionUtils.h
index fcb384ec361339..5f3a71206876c6 100644
--- a/llvm/include/llvm/Transforms/Utils/CallPromotionUtils.h
+++ b/llvm/include/llvm/Transforms/Utils/CallPromotionUtils.h
@@ -14,10 +14,17 @@
 #ifndef LLVM_TRANSFORMS_UTILS_CALLPROMOTIONUTILS_H
 #define LLVM_TRANSFORMS_UTILS_CALLPROMOTIONUTILS_H
 
+#include <cstdint>
+
+#include "llvm/ADT/ArrayRef.h"
+
 namespace llvm {
+class Constant;
 class CallBase;
 class CastInst;
 class Function;
+class GlobalVariable;
+class Instruction;
 class MDNode;
 class Value;
 
@@ -41,7 +48,9 @@ bool isLegalToPromote(const CallBase &CB, Function *Callee,
 CallBase &promoteCall(CallBase &CB, Function *Callee,
                       CastInst **RetBitCast = nullptr);
 
-/// Promote the given indirect call site to conditionally call \p Callee.
+/// Promote the given indirect call site to conditionally call \p Callee. The
+/// promoted direct call instruction is predicated on `CB.getCalledOperand() ==
+/// Callee`.
 ///
 /// This function creates an if-then-else structure at the location of the call
 /// site. The original call site is moved into the "else" block. A clone of the
@@ -51,6 +60,31 @@ CallBase &promoteCall(CallBase &CB, Function *Callee,
 CallBase &promoteCallWithIfThenElse(CallBase &CB, Function *Callee,
                                     MDNode *BranchWeights = nullptr);
 
+/// This is similar to `promoteCallWithIfThenElse` except that the condition to
+/// promote a virtual call is that \p VPtr is the same as any of \p
+/// AddressPoints.
+///
+/// This function is expected to be used on virtual calls (a subset of indirect
+/// calls). \p VPtr is the virtual table address stored in the objects, and
+/// \p AddressPoints contains address points of vtables to be compared with.
+///
+/// It's the responsibility of caller to guarantee the transformation
+/// correctness (by specifying \p VPtr and \p AddressPoints properly).
+///
+/// This function doesn't sink the address-calculation instructions of indirect
+/// callee to the indirect call fallback. The subsequent passes (e.g.
+/// inst-combine) should sink them if possible and handle the sink of debug
+/// intrinsics together.
+CallBase &promoteCallWithVTableCmp(CallBase &CB, Instruction *VPtr,
+                                   Function *Callee,
+                                   ArrayRef<Constant *> AddressPoints,
+                                   MDNode *BranchWeights);
+
+/// Returns a constant representing the vtable's address point specified by the
+/// offset. Caller should ensure \p AddressPointOffset is valid.
+Constant *getVTableAddressPointOffset(GlobalVariable *VTable,
+                                      uint32_t AddressPointOffset);
+
 /// Try to promote (devirtualize) a virtual call on an Alloca. Return true on
 /// success.
 ///
@@ -74,13 +108,17 @@ CallBase &promoteCallWithIfThenElse(CallBase &CB, Function *Callee,
 ///
 bool tryPromoteCall(CallBase &CB);
 
+/// Predicate and clone the given call site using the given condition.
+CallBase &versionCallSiteWithCond(CallBase &CB, Value *Cond,
+                                  MDNode *BranchWeights);
+
 /// Predicate and clone the given call site.
 ///
-/// This function creates an if-then-else structure at the location of the call
-/// site. The "if" condition compares the call site's called value to the given
-/// callee. The original call site is moved into the "else" block, and a clone
-/// of the call site is placed in the "then" block. The cloned instruction is
-/// returned.
+/// This function creates an if-then-else structure at the location of the
+/// call site. The "if" condition compares the call site's called value to
+/// the given callee. The original call site is moved into the "else" block,
+/// and a clone of the call site is placed in the "then" block. The cloned
+/// instruction is returned.
 CallBase &versionCallSite(CallBase &CB, Value *Callee, MDNode *BranchWeights);
 
 } // end namespace llvm
diff --git a/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp b/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp
index d0cf0792eface0..ea855b9a4d8416 100644
--- a/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp
+++ b/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp
@@ -12,9 +12,11 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Transforms/Utils/CallPromotionUtils.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/Analysis/Loads.h"
 #include "llvm/Analysis/TypeMetadataUtils.h"
 #include "llvm/IR/AttributeMask.h"
+#include "llvm/IR/Constant.h"
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
@@ -185,6 +187,24 @@ static void createRetBitCast(CallBase &CB, Type *RetTy, CastInst **RetBitCast) {
     U->replaceUsesOfWith(&CB, Cast);
 }
 
+// Returns the or result of all icmp instructions.
+static Value *getOrResult(const SmallVector<Value *, 2> &ICmps,
+                          IRBuilder<> &Builder) {
+  assert(!ICmps.empty() && "Must have at least one icmp instructions");
+  if (ICmps.size() == 1)
+    return ICmps[0];
+
+  SmallVector<Value *, 2> OrResults;
+  int i = 0, NumICmp = ICmps.size();
+  for (i = 0; i + 1 < NumICmp; i += 2)
+    OrResults.push_back(Builder.CreateOr(ICmps[i], ICmps[i + 1], "icmp-or"));
+
+  if (i < NumICmp)
+    OrResults.push_back(ICmps[i]);
+
+  return getOrResult(OrResults, Builder);
+}
+
 /// Predicate and clone the given call site.
 ///
 /// This function creates an if-then-else structure at the location of the call
@@ -276,8 +296,8 @@ static void createRetBitCast(CallBase &CB, Type *RetTy, CastInst **RetBitCast) {
 ///     ; The original call instruction stays in its original block.
 ///     %t0 = musttail call i32 %ptr()
 ///     ret %t0
-static CallBase &versionCallSiteWithCond(CallBase &CB, Value *Cond,
-                                         MDNode *BranchWeights) {
+CallBase &llvm::versionCallSiteWithCond(CallBase &CB, Value *Cond,
+                                        MDNode *BranchWeights) {
 
   IRBuilder<> Builder(&CB);
   CallBase *OrigInst = &CB;
@@ -565,6 +585,46 @@ CallBase &llvm::promoteCallWithIfThenElse(CallBase &CB, Function *Callee,
   return promoteCall(NewInst, Callee);
 }
 
+Constant *llvm::getVTableAddressPointOffset(GlobalVariable *VTable,
+                                            uint32_t AddressPointOffset) {
+  Module &M = *VTable->getParent();
+  const DataLayout &DL = M.getDataLayout();
+  LLVMContext &Context = M.getContext();
+  Type *VTableType = VTable->getValueType();
+  assert(AddressPointOffset < DL.getTypeAllocSize(VTableType) &&
+         "Out-of-bound access");
+  APInt AddressPointOffsetAPInt(32, AddressPointOffset, false);
+  SmallVector<APInt> Indices =
+      DL.getGEPIndicesForOffset(VTableType, AddressPointOffsetAPInt);
+  SmallVector<llvm::Constant *> GEPIndices;
+  for (const auto &Index : Indices)
+    GEPIndices.push_back(llvm::ConstantInt::get(Type::getInt32Ty(Context),
+                                                Index.getZExtValue()));
+
+  return ConstantExpr::getInBoundsGetElementPtr(VTable->getValueType(), VTable,
+                                                GEPIndices);
+}
+
+CallBase &llvm::promoteCallWithVTableCmp(CallBase &CB, Instruction *VPtr,
+                                         Function *Callee,
+                                         ArrayRef<Constant *> AddressPoints,
+                                         MDNode *BranchWeights) {
+  assert(!AddressPoints.empty() && "Caller should guarantee");
+  IRBuilder<> Builder(&CB);
+  SmallVector<Value *, 2> ICmps;
+  for (auto &AddressPoint : AddressPoints)
+    ICmps.push_back(Builder.CreateICmpEQ(VPtr, AddressPoint));
+
+  Value *Cond = getOrResult(ICmps, Builder);
+
+  // Version the indirect call site. If Cond is true, 'NewInst' will be
+  // executed, otherwise the original call site will be executed.
+  CallBase &NewInst = versionCallSiteWithCond(CB, Cond, BranchWeights);
+
+  // Promote 'NewInst' so that it directly calls the desired function.
+  return promoteCall(NewInst, Callee);
+}
+
 bool llvm::tryPromoteCall(CallBase &CB) {
   assert(!CB.getCalledFunction());
   Module *M = CB.getCaller()->getParent();
diff --git a/llvm/unittests/Transforms/Utils/CallPromotionUtilsTest.cpp b/llvm/unittests/Transforms/Utils/CallPromotionUtilsTest.cpp
index eff8e27d36d641..c57abb54e46849 100644
--- a/llvm/unittests/Transforms/Utils/CallPromotionUtilsTest.cpp
+++ b/llvm/unittests/Transforms/Utils/CallPromotionUtilsTest.cpp
@@ -8,9 +8,12 @@
 
 #include "llvm/Transforms/Utils/CallPromotionUtils.h"
 #include "llvm/AsmParser/Parser.h"
+#include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/MDBuilder.h"
 #include "llvm/IR/Module.h"
+#include "llvm/IR/NoFolder.h"
 #include "llvm/Support/SourceMgr.h"
 #include "gtest/gtest.h"
 
@@ -368,3 +371,127 @@ declare %struct2 @_ZN4Impl3RunEv(%class.Impl* %this)
   bool IsPromoted = tryPromoteCall(*CI);
   EXPECT_FALSE(IsPromoted);
 }
+
+TEST(CallPromotionUtilsTest, getVTableAddressPointOffset) {
+  LLVMContext C;
+  std::unique_ptr<Module> M = parseIR(C,
+                                      R"IR(
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+ at _ZTV8Derived2 = constant { [3 x ptr], [3 x ptr], [4 x ptr] } { [3 x ptr] [ptr null, ptr null, ptr @_ZN5Base35func3Ev], [3 x ptr] [ptr inttoptr (i64 -8 to ptr), ptr null, ptr @_ZN5Base25func2Ev], [4 x ptr] [ptr inttoptr (i64 -16 to ptr), ptr null, ptr @_ZN5Base15func0Ev, ptr @_ZN5Base15func1Ev] }
+
+declare i32 @_ZN5Base15func1Ev(ptr)
+declare i32 @_ZN5Base25func2Ev(ptr)
+declare i32 @_ZN5Base15func0Ev(ptr)
+declare void @_ZN5Base35func3Ev(ptr)
+)IR");
+  GlobalVariable *GV = M->getGlobalVariable("_ZTV8Derived2");
+
+  for (auto [AddressPointOffset, Index] :
+       {std::pair{16, 0}, {40, 1}, {64, 2}}) {
+    Constant *AddressPoint =
+        getVTableAddressPointOffset(GV, AddressPointOffset);
+
+    ConstantExpr *GEP = dyn_cast<ConstantExpr>(AddressPoint);
+    ASSERT_TRUE(GEP);
+    SmallVector<Constant *> Indices = {
+        llvm::ConstantInt::get(Type::getInt32Ty(C), 0U),
+        llvm::ConstantInt::get(Type::getInt32Ty(C), Index),
+        llvm::ConstantInt::get(Type::getInt32Ty(C), 2U)};
+    EXPECT_EQ(GEP, ConstantExpr::getInBoundsGetElementPtr(GV->getValueType(),
+                                                          GV, Indices));
+  }
+}
+
+TEST(CallPromotionUtilsTest, promoteCallWithVTableCmp) {
+  LLVMContext C;
+  std::unique_ptr<Module> M = parseIR(C,
+                                      R"IR(
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+ at _ZTV5Base1 = constant { [4 x ptr] } { [4 x ptr] [ptr null, ptr null, ptr @_ZN5Base15func0Ev, ptr @_ZN5Base15func1Ev] }, !type !0
+ at _ZTV8Derived1 = constant { [4 x ptr], [3 x ptr] } { [4 x ptr] [ptr inttoptr (i64 -8 to ptr), ptr null, ptr @_ZN5Base15func0Ev, ptr @_ZN5Base15func1Ev], [3 x ptr] [ptr null, ptr null, ptr @_ZN5Base25func2Ev] }, !type !1, !type !2, !type !3
+ at _ZTV5Base2 = constant { [3 x ptr] } { [3 x ptr] [ptr null, ptr null, ptr @_ZN5Base25func2Ev] }, !type !2
+ at _ZTV8Derived2 = constant { [3 x ptr], [3 x ptr], [4 x ptr] } { [3 x ptr] [ptr null, ptr null, ptr @_ZN5Base35func3Ev], [3 x ptr] [ptr inttoptr (i64 -8 to ptr), ptr null, ptr @_ZN5Base25func2Ev], [4 x ptr] [ptr inttoptr (i64 -16 to ptr), ptr null, ptr @_ZN5Base15func0Ev, ptr @_ZN5Base15func1Ev] }, !type !4, !type !5, !type !6, !type !7
+ at _ZTV5Base3 = constant { [3 x ptr] } { [3 x ptr] [ptr null, ptr null, ptr @_ZN5Base35func3Ev] }, !type !6
+
+define i32 @testfunc(ptr %d) {
+entry:
+  %vtable = load ptr, ptr %d, !prof !8
+  %0 = tail call i1 @llvm.type.test(ptr %vtable, metadata !"_ZTS5Base1")
+  tail call void @llvm.assume(i1 %0)
+  %vfn = getelementptr inbounds ptr, ptr %vtable, i64 1
+  %1 = load ptr, ptr %vfn
+  %call = tail call i32 %1(ptr %d), !prof !9
+  ret i32 %call
+}
+
+define i32 @_ZN5Base15func1Ev(ptr %this) {
+entry:
+  ret i32 2
+}
+
+
+declare i1 @llvm.type.test(ptr, metadata)
+declare void @llvm.assume(i1)
+declare i32 @_ZN5Base25func2Ev(ptr)
+declare i32 @_ZN5Base15func0Ev(ptr)
+declare void @_ZN5Base35func3Ev(ptr)
+
+!0 = !{i64 16, !"_ZTS5Base1"}
+!1 = !{i64 16, !"_ZTS5Base1"}
+!2 = !{i64 48, !"_ZTS5Base2"}
+!3 = !{i64 16, !"_ZTS8Derived1"}
+!4 = !{i64 64, !"_ZTS5Base1"}
+!5 = !{i64 40, !"_ZTS5Base2"}
+!6 = !{i64 16, !"_ZTS5Base3"}
+!7 = !{i64 16, !"_ZTS8Derived2"}
+!8 = !{!"VP", i32 2, i64 1600, i64 -9064381665493407289, i64 800, i64 5035968517245772950, i64 500, i64 3215870116411581797, i64 300}
+!9 = !{!"VP", i32 0, i64 1600, i64 6804820478065511155, i64 1600})IR");
+
+  Function *F = M->getFunction("testfunc");
+  ASSERT_TRUE(F);
+  CallInst *CI = dyn_cast<CallInst>(&*std::next(F->front().rbegin()));
+  ASSERT_TRUE(CI && CI->isIndirectCall());
+
+  LoadInst *FuncPtr = dyn_cast<LoadInst>(CI->getCalledOperand());
+  ASSERT_TRUE(FuncPtr);
+
+  GetElementPtrInst *GEP =
+      dyn_cast<GetElementPtrInst>(FuncPtr->getPointerOperand());
+  ASSERT_TRUE(GEP);
+
+  LoadInst *VPtr = dyn_cast<LoadInst>(&*F->front().begin());
+
+  Function *Callee = M->getFunction("_ZN5Base15func1Ev");
+
+  // Create the constant and the branch weights
+  SmallVector<Constant *, 3> VTableAddressPoints;
+
+  for (auto &[VTableName, AddressPointOffset] : {std::pair{"_ZTV5Base1", 16},
+                                                 {"_ZTV8Derived1", 16},
+                                                 {"_ZTV8Derived2", 64}})
+    VTableAddressPoints.push_back(getVTableAddressPointOffset(
+        M->getGlobalVariable(VTableName), AddressPointOffset));
+
+  MDBuilder MDB(C);
+  MDNode *BranchWeights = MDB.createBranchWeights(1600, 0);
+
+  size_t OrigEntryBBSize = F->front().size();
+
+  // Tests that promoted direct call is returned.
+  CallBase &DirectCB = promoteCallWithVTableCmp(
+      *CI, VPtr, Callee, VTableAddressPoints, BranchWeights);
+  EXPECT_EQ(DirectCB.getCalledOperand(), Callee);
+
+  // Tests that GEP and FuncPtr sink to the basic block of indirect call.
+  BasicBlock *EntryBB = &F->front();
+  EXPECT_EQ(EntryBB, GEP->getParent());
+  EXPECT_EQ(EntryBB, FuncPtr->getParent());
+
+  // Promotion inserts 3 icmp instructions and 2 or instructions, and removes
+  // 1 call instruction from the entry block.
+  EXPECT_EQ(F->front().size(), OrigEntryBBSize + 4);
+}

>From 29d9cd2f128da0adde011a0a8362ec252104c901 Mon Sep 17 00:00:00 2001
From: mingmingl <mingmingl at google.com>
Date: Sat, 10 Feb 2024 15:21:49 -0800
Subject: [PATCH 5/5] [TypeProf][IndirectCallPromotion]Implement vtable-based
 transformation

---
 .../Analysis/IndirectCallPromotionAnalysis.h  |   2 +-
 .../IndirectCallPromotionAnalysis.cpp         |   6 +-
 .../Instrumentation/IndirectCallPromotion.cpp | 391 +++++++++++++++++-
 .../Transforms/PGOProfile/icp_vtable_cmp.ll   | 206 +++++++++
 .../PGOProfile/icp_vtable_invoke.ll           | 201 +++++++++
 .../PGOProfile/icp_vtable_tail_call.ll        |  92 +++++
 6 files changed, 876 insertions(+), 22 deletions(-)
 create mode 100644 llvm/test/Transforms/PGOProfile/icp_vtable_cmp.ll
 create mode 100644 llvm/test/Transforms/PGOProfile/icp_vtable_invoke.ll
 create mode 100644 llvm/test/Transforms/PGOProfile/icp_vtable_tail_call.ll

diff --git a/llvm/include/llvm/Analysis/IndirectCallPromotionAnalysis.h b/llvm/include/llvm/Analysis/IndirectCallPromotionAnalysis.h
index 8a05e913a91063..eda672d7d50ee2 100644
--- a/llvm/include/llvm/Analysis/IndirectCallPromotionAnalysis.h
+++ b/llvm/include/llvm/Analysis/IndirectCallPromotionAnalysis.h
@@ -57,7 +57,7 @@ class ICallPromotionAnalysis {
   ///
   /// The returned array space is owned by this class, and overwritten on
   /// subsequent calls.
-  ArrayRef<InstrProfValueData>
+  MutableArrayRef<InstrProfValueData>
   getPromotionCandidatesForInstruction(const Instruction *I, uint32_t &NumVals,
                                        uint64_t &TotalCount,
                                        uint32_t &NumCandidates);
diff --git a/llvm/lib/Analysis/IndirectCallPromotionAnalysis.cpp b/llvm/lib/Analysis/IndirectCallPromotionAnalysis.cpp
index ab53717eb889a0..643c155ba6d7e3 100644
--- a/llvm/lib/Analysis/IndirectCallPromotionAnalysis.cpp
+++ b/llvm/lib/Analysis/IndirectCallPromotionAnalysis.cpp
@@ -87,7 +87,7 @@ uint32_t ICallPromotionAnalysis::getProfitablePromotionCandidates(
   return I;
 }
 
-ArrayRef<InstrProfValueData>
+MutableArrayRef<InstrProfValueData>
 ICallPromotionAnalysis::getPromotionCandidatesForInstruction(
     const Instruction *I, uint32_t &NumVals, uint64_t &TotalCount,
     uint32_t &NumCandidates) {
@@ -96,8 +96,8 @@ ICallPromotionAnalysis::getPromotionCandidatesForInstruction(
                                ValueDataArray.get(), NumVals, TotalCount);
   if (!Res) {
     NumCandidates = 0;
-    return ArrayRef<InstrProfValueData>();
+    return MutableArrayRef<InstrProfValueData>();
   }
   NumCandidates = getProfitablePromotionCandidates(I, NumVals, TotalCount);
-  return ArrayRef<InstrProfValueData>(ValueDataArray.get(), NumVals);
+  return MutableArrayRef<InstrProfValueData>(ValueDataArray.get(), NumVals);
 }
diff --git a/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp b/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp
index 6a44a32bb34dc9..85af3d7cc56b7a 100644
--- a/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp
+++ b/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp
@@ -13,13 +13,16 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Analysis/IndirectCallPromotionAnalysis.h"
 #include "llvm/Analysis/IndirectCallVisitor.h"
 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
 #include "llvm/Analysis/ProfileSummaryInfo.h"
+#include "llvm/Analysis/TypeMetadataUtils.h"
 #include "llvm/IR/DiagnosticInfo.h"
+#include "llvm/IR/Dominators.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/InstrTypes.h"
 #include "llvm/IR/Instructions.h"
@@ -51,6 +54,8 @@ using namespace llvm;
 STATISTIC(NumOfPGOICallPromotion, "Number of indirect call promotions.");
 STATISTIC(NumOfPGOICallsites, "Number of indirect call candidate sites.");
 
+extern cl::opt<unsigned> MaxNumVTableAnnotations;
+
 // Command line option to disable indirect-call promotion with the default as
 // false. This is for debug purpose.
 static cl::opt<bool> DisableICP("disable-icp", cl::init(false), cl::Hidden,
@@ -103,13 +108,71 @@ static cl::opt<bool>
     ICPDUMPAFTER("icp-dumpafter", cl::init(false), cl::Hidden,
                  cl::desc("Dump IR after transformation happens"));
 
+// This option is meant to be used by LLVM regression test and test the
+// transformation that compares vtables.
+// TODO: ICP pass will do cost-benefit analysis between function-based
+// comparison and vtable-based comparison and choose one of the two
+// transformations.
+static cl::opt<bool> ICPEnableVTableCmp(
+    "icp-enable-vtable-cmp", cl::init(false), cl::Hidden,
+    cl::desc("If ThinLTO and WPD is enabled and this option is true, "
+             "indirect-call promotion pass will compare vtables rather than "
+             "functions for speculative devirtualization of virtual calls."
+             " If set to false, indirect-call promotion pass will always "
+             "compare functions."));
+
 namespace {
 
+using VTableAddressPointOffsetValMap =
+    SmallDenseMap<const GlobalVariable *, SmallDenseMap<int, Constant *, 4>, 8>;
+
+// A struct to collect type information for a virtual call site.
+struct VirtualCallSiteInfo {
+  // The offset from the address point to virtual function in the vtable.
+  uint64_t FunctionOffset;
+  // The instruction that computes the address point of vtable.
+  Instruction *VPtr;
+  // The compatible type used in LLVM type intrinsics.
+  StringRef CompatibleTypeStr;
+};
+
+// The key is a virtual call, and value is its type information.
+using VirtualCallSiteTypeInfoMap =
+    SmallDenseMap<const CallBase *, VirtualCallSiteInfo, 8>;
+
+// Given the list of compatible type metadata for a vtable and one specified
+// type, returns the address point offset of the type if any.
+static std::optional<uint64_t>
+getCompatibleTypeOffset(const ArrayRef<MDNode *> &Types,
+                        StringRef CompatibleType) {
+  if (Types.empty()) {
+    return std::nullopt;
+  }
+  std::optional<uint64_t> Offset;
+  // find the offset where type string is equal to the one in llvm.type.test
+  // intrinsic
+  for (MDNode *Type : Types) {
+    auto TypeIDMetadata = Type->getOperand(1).get();
+    if (auto *TypeId = dyn_cast<MDString>(TypeIDMetadata)) {
+      StringRef TypeStr = TypeId->getString();
+      if (TypeStr != CompatibleType) {
+        continue;
+      }
+      Offset = cast<ConstantInt>(
+                   cast<ConstantAsMetadata>(Type->getOperand(0))->getValue())
+                   ->getZExtValue();
+      break;
+    }
+  }
+  return Offset;
+}
+
 // Promote indirect calls to conditional direct calls, keeping track of
 // thresholds.
 class IndirectCallPromoter {
 private:
   Function &F;
+  Module &M;
 
   // Symtab that maps indirect call profile values to function names and
   // defines.
@@ -117,6 +180,11 @@ class IndirectCallPromoter {
 
   const bool SamplePGO;
 
+  // A map from a virtual call to its type information.
+  const VirtualCallSiteTypeInfoMap &VirtualCSInfo;
+
+  VTableAddressPointOffsetValMap &VTableAddressPointOffsetVal;
+
   OptimizationRemarkEmitter &ORE;
 
   // A struct that records the direct target and it's call count.
@@ -124,9 +192,17 @@ class IndirectCallPromoter {
     Function *const TargetFunction;
     const uint64_t Count;
 
+    uint64_t FunctionOffset;
+
+    SmallVector<std::pair<uint64_t, uint64_t>, 2> VTableGUIDAndCounts;
+
+    SmallVector<Constant *, 2> AddressPoints;
+
     PromotionCandidate(Function *F, uint64_t C) : TargetFunction(F), Count(C) {}
   };
 
+  using VTableGUIDCountsMap = SmallDenseMap<uint64_t, uint64_t, 4>;
+
   // Check if the indirect-call call site should be promoted. Return the number
   // of promotions. Inst is the candidate indirect call, ValueDataRef
   // contains the array of value profile data for profiled targets,
@@ -134,7 +210,8 @@ class IndirectCallPromoter {
   // NumCandidates is the number of candidate entries in ValueDataRef.
   std::vector<PromotionCandidate> getPromotionCandidatesForCallSite(
       const CallBase &CB, const ArrayRef<InstrProfValueData> &ValueDataRef,
-      uint64_t TotalCount, uint32_t NumCandidates);
+      uint64_t TotalCount, uint32_t NumCandidates,
+      VTableGUIDCountsMap &VTableGUIDCounts);
 
   // Promote a list of targets for one indirect-call callsite by comparing
   // indirect callee with functions. Returns true if there are IR
@@ -144,10 +221,33 @@ class IndirectCallPromoter {
       uint64_t TotalCount, ArrayRef<InstrProfValueData> ICallProfDataRef,
       uint32_t NumCandidates);
 
+  bool tryToPromoteWithVTableCmp(
+      CallBase &CB, const std::vector<PromotionCandidate> &Candidates,
+      uint64_t TotalFuncCount, uint32_t NumCandidates,
+      MutableArrayRef<InstrProfValueData> ICallProfDataRef,
+      VTableGUIDCountsMap &VTableGUIDCounts);
+
+  void
+  tryGetVTableInfos(const CallBase &CB,
+                    const SmallDenseMap<Function *, int, 4> &CalleeIndexMap,
+                    VTableGUIDCountsMap &VTableGUIDCounts,
+                    std::vector<PromotionCandidate> &Candidates);
+
+  Constant *getOrCreateVTableAddressPointVar(GlobalVariable *GV,
+                                             uint64_t AddressPointOffset);
+
+  bool isProfitableToCompareVTables(
+      const std::vector<PromotionCandidate> &Candidates, uint64_t TotalCount);
+
 public:
-  IndirectCallPromoter(Function &Func, InstrProfSymtab *Symtab, bool SamplePGO,
-                       OptimizationRemarkEmitter &ORE)
-      : F(Func), Symtab(Symtab), SamplePGO(SamplePGO), ORE(ORE) {}
+  IndirectCallPromoter(
+      Function &Func, Module &M, InstrProfSymtab *Symtab, bool SamplePGO,
+      const VirtualCallSiteTypeInfoMap &VirtualCSInfo,
+      VTableAddressPointOffsetValMap &VTableAddressPointOffsetVal,
+      OptimizationRemarkEmitter &ORE)
+      : F(Func), M(M), Symtab(Symtab), SamplePGO(SamplePGO),
+        VirtualCSInfo(VirtualCSInfo),
+        VTableAddressPointOffsetVal(VTableAddressPointOffsetVal), ORE(ORE) {}
   IndirectCallPromoter(const IndirectCallPromoter &) = delete;
   IndirectCallPromoter &operator=(const IndirectCallPromoter &) = delete;
 
@@ -161,9 +261,12 @@ class IndirectCallPromoter {
 std::vector<IndirectCallPromoter::PromotionCandidate>
 IndirectCallPromoter::getPromotionCandidatesForCallSite(
     const CallBase &CB, const ArrayRef<InstrProfValueData> &ValueDataRef,
-    uint64_t TotalCount, uint32_t NumCandidates) {
+    uint64_t TotalCount, uint32_t NumCandidates,
+    VTableGUIDCountsMap &VTableGUIDCounts) {
   std::vector<PromotionCandidate> Ret;
 
+  SmallDenseMap<Function *, int, 4> CalleeIndexMap;
+
   LLVM_DEBUG(dbgs() << " \nWork on callsite #" << NumOfPGOICallsites << CB
                     << " Num_targets: " << ValueDataRef.size()
                     << " Num_candidates: " << NumCandidates << "\n");
@@ -237,30 +340,114 @@ IndirectCallPromoter::getPromotionCandidatesForCallSite(
       break;
     }
 
+    CalleeIndexMap[TargetFunction] = Ret.size();
     Ret.push_back(PromotionCandidate(TargetFunction, Count));
+
     TotalCount -= Count;
   }
+
+  if (!ICPEnableVTableCmp)
+    return Ret;
+
+  tryGetVTableInfos(CB, CalleeIndexMap, VTableGUIDCounts, Ret);
+
+  return Ret;
+}
+
+Constant *IndirectCallPromoter::getOrCreateVTableAddressPointVar(
+    GlobalVariable *GV, uint64_t AddressPointOffset) {
+  Constant *Var = VTableAddressPointOffsetVal[GV][AddressPointOffset];
+  if (Var != nullptr)
+    return Var;
+  Constant *Ret = getVTableAddressPointOffset(GV, AddressPointOffset);
+  VTableAddressPointOffsetVal[GV][AddressPointOffset] = Ret;
   return Ret;
 }
 
+void IndirectCallPromoter::tryGetVTableInfos(
+    const CallBase &CB, const SmallDenseMap<Function *, int, 4> &CalleeIndexMap,
+    VTableGUIDCountsMap &GUIDCountsMap,
+    std::vector<PromotionCandidate> &Candidates) {
+  if (!ICPEnableVTableCmp)
+    return;
+
+  auto Iter = VirtualCSInfo.find(&CB);
+  if (Iter == VirtualCSInfo.end())
+    return;
+
+  auto &VirtualCallInfo = Iter->second;
+
+  uint32_t ActualNumValueData = 0;
+
+  uint64_t TotalVTableCount = 0;
+  auto VTableValueDataArray = getValueProfDataFromInst(
+      *VirtualCallInfo.VPtr, IPVK_VTableTarget, MaxNumVTableAnnotations,
+      ActualNumValueData, TotalVTableCount);
+
+  if (VTableValueDataArray.get() == nullptr)
+    return;
+
+  SmallVector<MDNode *, 2> Types; // type metadata associated with a vtable.
+  // Compute the functions and counts from by each vtable.
+  for (size_t j = 0; j < ActualNumValueData; j++) {
+    uint64_t VTableVal = VTableValueDataArray[j].Value;
+    GUIDCountsMap[VTableVal] = VTableValueDataArray[j].Count;
+    GlobalVariable *VTableVariable = Symtab->getGlobalVariable(VTableVal);
+    if (!VTableVariable) {
+      LLVM_DEBUG(dbgs() << "\tCannot find vtable definition for " << VTableVal
+                        << "\n");
+      continue;
+    }
+
+    Types.clear();
+    VTableVariable->getMetadata(LLVMContext::MD_type, Types);
+    std::optional<uint64_t> MaybeAddressPointOffset =
+        getCompatibleTypeOffset(Types, VirtualCallInfo.CompatibleTypeStr);
+    if (!MaybeAddressPointOffset)
+      continue;
+
+    const uint64_t AddressPointOffset = *MaybeAddressPointOffset;
+
+    Function *Callee = nullptr;
+
+    std::tie(Callee, std::ignore) = getFunctionAtVTableOffset(
+        VTableVariable, AddressPointOffset + VirtualCallInfo.FunctionOffset,
+        *(F.getParent()));
+    if (!Callee)
+      continue;
+
+    auto CalleeIndexIter = CalleeIndexMap.find(Callee);
+    if (CalleeIndexIter == CalleeIndexMap.end())
+      continue;
+
+    auto &Candidate = Candidates[CalleeIndexIter->second];
+    Candidate.VTableGUIDAndCounts.push_back(
+        {VTableVal, VTableValueDataArray[j].Count});
+    Candidate.AddressPoints.push_back(
+        getOrCreateVTableAddressPointVar(VTableVariable, AddressPointOffset));
+  }
+}
+
+static MDNode *getBranchWeights(LLVMContext &Context, uint64_t IfCount,
+                                uint64_t ElseCount) {
+  MDBuilder MDB(Context);
+  uint64_t Scale = calculateCountScale(std::max(IfCount, ElseCount));
+  return MDB.createBranchWeights(scaleBranchCount(IfCount, Scale),
+                                 scaleBranchCount(ElseCount, Scale));
+}
+
 CallBase &llvm::pgo::promoteIndirectCall(CallBase &CB, Function *DirectCallee,
                                          uint64_t Count, uint64_t TotalCount,
                                          bool AttachProfToDirectCall,
                                          OptimizationRemarkEmitter *ORE) {
-
-  uint64_t ElseCount = TotalCount - Count;
-  uint64_t MaxCount = (Count >= ElseCount ? Count : ElseCount);
-  uint64_t Scale = calculateCountScale(MaxCount);
-  MDBuilder MDB(CB.getContext());
-  MDNode *BranchWeights = MDB.createBranchWeights(
-      scaleBranchCount(Count, Scale), scaleBranchCount(ElseCount, Scale));
+  MDNode *BranchWeights =
+      getBranchWeights(CB.getContext(), Count, TotalCount - Count);
 
   CallBase &NewInst =
       promoteCallWithIfThenElse(CB, DirectCallee, BranchWeights);
 
-  if (AttachProfToDirectCall) {
+  if (AttachProfToDirectCall)
     setBranchWeights(NewInst, {static_cast<uint32_t>(Count)});
-  }
 
   using namespace ore;
 
@@ -304,6 +491,80 @@ bool IndirectCallPromoter::tryToPromoteWithFuncCmp(
   return Changed;
 }
 
+bool IndirectCallPromoter::tryToPromoteWithVTableCmp(
+    CallBase &CB, const std::vector<PromotionCandidate> &Candidates,
+    uint64_t TotalFuncCount, uint32_t NumCandidates,
+    MutableArrayRef<InstrProfValueData> ICallProfDataRef,
+    VTableGUIDCountsMap &VTableGUIDCounts) {
+  Instruction *VPtr = VirtualCSInfo.at(&CB).VPtr;
+
+  SmallVector<int, 4> PromotedFuncCount;
+  for (const auto &Candidate : Candidates) {
+    uint64_t IfCount = 0;
+    // FIXME: Skip vtables with cold count in the comparison.
+    for (auto &[GUID, Count] : Candidate.VTableGUIDAndCounts) {
+      IfCount += Count;
+      VTableGUIDCounts[GUID] -= Count;
+    }
+
+    promoteCallWithVTableCmp(
+        CB, VPtr, Candidate.TargetFunction, Candidate.AddressPoints,
+        getBranchWeights(CB.getContext(), IfCount, TotalFuncCount - IfCount));
+
+    PromotedFuncCount.push_back(IfCount);
+
+    TotalFuncCount -= IfCount;
+    NumOfPGOICallPromotion++;
+  }
+
+  if (PromotedFuncCount.empty())
+    return false;
+
+  // A comparator that sorts value profile data descendingly.
+  auto Cmp = [](const InstrProfValueData &LHS, const InstrProfValueData &RHS) {
+    return LHS.Count > RHS.Count;
+  };
+
+  CB.setMetadata(LLVMContext::MD_prof, nullptr);
+  // Update indirect call value profiles if total count of the call site is not
+  // zero.
+  if (TotalFuncCount != 0) {
+    for (size_t I = 0; I < PromotedFuncCount.size(); I++)
+      ICallProfDataRef[I].Count -= PromotedFuncCount[I];
+
+    llvm::sort(ICallProfDataRef.begin(), ICallProfDataRef.end(), Cmp);
+
+    // Locate the first <target, count> pair where the count is zero or less.
+    auto UB = llvm::upper_bound(
+        ICallProfDataRef, 0U,
+        [](uint64_t Count, const InstrProfValueData &ProfData) {
+          return ProfData.Count <= Count;
+        });
+
+    ArrayRef<InstrProfValueData> VDs(ICallProfDataRef.begin(), UB);
+    annotateValueSite(M, CB, VDs, TotalFuncCount, IPVK_IndirectCallTarget,
+                      NumCandidates);
+  }
+
+  VPtr->setMetadata(LLVMContext::MD_prof, nullptr);
+  std::vector<InstrProfValueData> VTableValueProfiles;
+  uint64_t TotalVTableCount = 0;
+  for (auto [GUID, Count] : VTableGUIDCounts) {
+    if (Count == 0)
+      continue;
+
+    VTableValueProfiles.push_back({GUID, Count});
+    TotalVTableCount += Count;
+  }
+  llvm::sort(VTableValueProfiles, Cmp);
+
+  annotateValueSite(M, *VPtr, VTableValueProfiles, TotalVTableCount,
+                    IPVK_VTableTarget, VTableValueProfiles.size());
+
+  // Update vtable profile metadata
+  return true;
+}
+
 // Traverse all the indirect-call callsite and get the value profile
 // annotation to perform indirect-call promotion.
 bool IndirectCallPromoter::processFunction(ProfileSummaryInfo *PSI) {
@@ -317,14 +578,96 @@ bool IndirectCallPromoter::processFunction(ProfileSummaryInfo *PSI) {
     if (!NumCandidates ||
         (PSI && PSI->hasProfileSummary() && !PSI->isHotCount(TotalCount)))
       continue;
+    VTableGUIDCountsMap VTableGUIDCounts;
     auto PromotionCandidates = getPromotionCandidatesForCallSite(
-        *CB, ICallProfDataRef, TotalCount, NumCandidates);
-    Changed |= tryToPromoteWithFuncCmp(*CB, PromotionCandidates, TotalCount,
-                                       ICallProfDataRef, NumCandidates);
+        *CB, ICallProfDataRef, TotalCount, NumCandidates, VTableGUIDCounts);
+
+    if (isProfitableToCompareVTables(PromotionCandidates, TotalCount))
+      Changed |= tryToPromoteWithVTableCmp(*CB, PromotionCandidates, TotalCount,
+                                           NumCandidates, ICallProfDataRef,
+                                           VTableGUIDCounts);
+    else
+      Changed |= tryToPromoteWithFuncCmp(*CB, PromotionCandidates, TotalCount,
+                                         ICallProfDataRef, NumCandidates);
   }
   return Changed;
 }
 
+bool IndirectCallPromoter::isProfitableToCompareVTables(
+    const std::vector<PromotionCandidate> &Candidates, uint64_t TotalCount) {
+  if (!ICPEnableVTableCmp)
+    return false;
+
+  // FIXME: Implement cost-benefit analysis in a follow-up change.
+  return true;
+}
+
+static void
+computeVirtualCallSiteTypeInfoMap(Module &M, ModuleAnalysisManager &MAM,
+                                  VirtualCallSiteTypeInfoMap &VirtualCSInfo) {
+  auto &FAM = MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
+  auto LookupDomTree = [&FAM](Function &F) -> DominatorTree & {
+    return FAM.getResult<DominatorTreeAnalysis>(F);
+  };
+
+  // Right now only llvm.type.test is used to find out virtual call sites.
+  // With ThinLTO and whole-program-devirtualization, llvm.type.test and
+  // llvm.public.type.test are emitted, and llvm.public.type.test is either
+  // refined to llvm.type.test or dropped before indirect-call-promotion pass.
+  //
+  // FIXME: For fullLTO with VFE, `llvm.type.checked.load intrinsic` is emitted.
+  // Find out virtual calls by looking at users of llvm.type.checked.load in
+  // that case.
+  Function *TypeTestFunc =
+      M.getFunction(Intrinsic::getName(Intrinsic::type_test));
+
+  if (!TypeTestFunc || TypeTestFunc->use_empty())
+    return;
+
+  // Iterate all type.test calls and find all indirect calls.
+  for (Use &U : llvm::make_early_inc_range(TypeTestFunc->uses())) {
+    auto *CI = dyn_cast<CallInst>(U.getUser());
+    if (!CI)
+      continue;
+
+    auto *TypeMDVal = cast<MetadataAsValue>(CI->getArgOperand(1));
+    if (!TypeMDVal)
+      continue;
+
+    auto *CompatibleTypeId = dyn_cast<MDString>(TypeMDVal->getMetadata());
+    if (!CompatibleTypeId)
+      continue;
+
+    StringRef CompatibleTypeStr = CompatibleTypeId->getString();
+
+    // Find out all devirtualizable call sites given a llvm.type.test intrinsic
+    // call.
+    SmallVector<DevirtCallSite, 1> DevirtCalls;
+    SmallVector<CallInst *, 1> Assumes;
+    auto &DT = LookupDomTree(*CI->getFunction());
+    findDevirtualizableCallsForTypeTest(DevirtCalls, Assumes, CI, DT);
+
+    // type-id, offset from the address point
+    // combined with type metadata to compute function offset
+    for (auto &DevirtCall : DevirtCalls) {
+      CallBase &CB = DevirtCall.CB;
+      // This is the offset from the address point offset to the virtual
+      // function.
+      uint64_t Offset = DevirtCall.Offset;
+
+      // Given an indirect call, try find the instruction which loads a pointer
+      // to virtual table.
+      Instruction *VTablePtr =
+          PGOIndirectCallVisitor::tryGetVTableInstruction(&CB);
+
+      if (!VTablePtr)
+        continue;
+
+      VirtualCSInfo[&CB] = {Offset, VTablePtr, CompatibleTypeStr};
+    }
+  }
+}
+
 // A wrapper function that does the actual work.
 static bool promoteIndirectCalls(Module &M, ProfileSummaryInfo *PSI, bool InLTO,
                                  bool SamplePGO, ModuleAnalysisManager &MAM) {
@@ -337,6 +680,17 @@ static bool promoteIndirectCalls(Module &M, ProfileSummaryInfo *PSI, bool InLTO,
     return false;
   }
   bool Changed = false;
+  VirtualCallSiteTypeInfoMap VirtualCSInfo;
+
+  computeVirtualCallSiteTypeInfoMap(M, MAM, VirtualCSInfo);
+
+  // This map records states across functions in an LLVM IR module.
+  // IndirectCallPromoter processes one
+  // function at a time and updates this map with new entries the first time
+  // the entry is needed in the module; the subsequent functions could re-use
+  // map entries inserted when processing prior functions.
+  VTableAddressPointOffsetValMap VTableAddressPointOffsetVal;
+
   for (auto &F : M) {
     if (F.isDeclaration() || F.hasOptNone())
       continue;
@@ -345,7 +699,8 @@ static bool promoteIndirectCalls(Module &M, ProfileSummaryInfo *PSI, bool InLTO,
         MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
     auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
 
-    IndirectCallPromoter CallPromoter(F, &Symtab, SamplePGO, ORE);
+    IndirectCallPromoter CallPromoter(F, M, &Symtab, SamplePGO, VirtualCSInfo,
+                                      VTableAddressPointOffsetVal, ORE);
     bool FuncChanged = CallPromoter.processFunction(PSI);
     if (ICPDUMPAFTER && FuncChanged) {
       LLVM_DEBUG(dbgs() << "\n== IR Dump After =="; F.print(dbgs()));
diff --git a/llvm/test/Transforms/PGOProfile/icp_vtable_cmp.ll b/llvm/test/Transforms/PGOProfile/icp_vtable_cmp.ll
new file mode 100644
index 00000000000000..75eda4b66be025
--- /dev/null
+++ b/llvm/test/Transforms/PGOProfile/icp_vtable_cmp.ll
@@ -0,0 +1,206 @@
+
+; RUN: opt < %s -passes=pgo-icall-prom -S | FileCheck %s --check-prefix=ICALL-FUNC
+
+; Invoke instcombine after pgo-icall-prom so the address calculation instructions for virtual calls get sink into the basic block for indirect fallback.
+; RUN: opt < %s -passes='pgo-icall-prom,instcombine' -icp-enable-vtable-cmp -S | FileCheck %s --check-prefix=ICALL-VTABLE
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+ at _ZTV4Base = dso_local constant { [4 x ptr] } { [4 x ptr] [ptr null, ptr null, ptr @_ZN4Base5func1Ei, ptr @_ZN4Base5func2Ev] }, !type !0
+ at _ZTV8Derived1 = dso_local constant { [4 x ptr] } { [4 x ptr] [ptr null, ptr null, ptr @_ZN8Derived15func1Ei, ptr @_ZN4Base5func2Ev] }, !type !0, !type !1
+ at _ZTV8Derived2 = dso_local constant { [4 x ptr] } { [4 x ptr] [ptr null, ptr null, ptr @_ZN8Derived25func1Ei, ptr @_ZN4Base5func2Ev] }, !type !0, !type !2
+ at _ZTV8Derived3 = dso_local constant { [4 x ptr] } { [4 x ptr] [ptr null, ptr null, ptr @_ZN8Derived35func1Ei, ptr @_ZN4Base5func2Ev] }, !type !0, !type !3
+
+; Test the IR transformation from function-based indirect-call promotion and vtable-based indirect-call promotion.
+
+; The tested function has one function candidate which comes from one vtable.
+define i32 @test_one_function_one_vtable(ptr %d) {
+; ICALL-FUNC-LABEL: define i32 @test_one_function_one_vtable(
+; ICALL-FUNC-SAME: ptr [[D:%.*]]) {
+; ICALL-FUNC-NEXT:  entry:
+; ICALL-FUNC-NEXT:    [[VTABLE:%.*]] = load ptr, ptr [[D]], align 8, !prof [[PROF4:![0-9]+]]
+; ICALL-FUNC-NEXT:    [[TMP0:%.*]] = tail call i1 @llvm.type.test(ptr [[VTABLE]], metadata !"_ZTS4Base")
+; ICALL-FUNC-NEXT:    tail call void @llvm.assume(i1 [[TMP0]])
+; ICALL-FUNC-NEXT:    [[VFN:%.*]] = getelementptr inbounds ptr, ptr [[VTABLE]], i64 1
+; ICALL-FUNC-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[VFN]], align 8
+; ICALL-FUNC-NEXT:    [[TMP2:%.*]] = icmp eq ptr [[TMP1]], @_ZN4Base5func2Ev
+; ICALL-FUNC-NEXT:    br i1 [[TMP2]], label [[IF_TRUE_DIRECT_TARG:%.*]], label [[IF_FALSE_ORIG_INDIRECT:%.*]], !prof [[PROF5:![0-9]+]]
+; ICALL-FUNC:       if.true.direct_targ:
+; ICALL-FUNC-NEXT:    [[TMP3:%.*]] = tail call i32 @_ZN4Base5func2Ev(ptr [[D]])
+; ICALL-FUNC-NEXT:    br label [[IF_END_ICP:%.*]]
+; ICALL-FUNC:       if.false.orig_indirect:
+; ICALL-FUNC-NEXT:    [[CALL:%.*]] = tail call i32 [[TMP1]](ptr [[D]])
+; ICALL-FUNC-NEXT:    br label [[IF_END_ICP]]
+; ICALL-FUNC:       if.end.icp:
+; ICALL-FUNC-NEXT:    [[TMP4:%.*]] = phi i32 [ [[CALL]], [[IF_FALSE_ORIG_INDIRECT]] ], [ [[TMP3]], [[IF_TRUE_DIRECT_TARG]] ]
+; ICALL-FUNC-NEXT:    ret i32 [[TMP4]]
+;
+; ICALL-VTABLE-LABEL: define i32 @test_one_function_one_vtable(
+; ICALL-VTABLE-SAME: ptr [[D:%.*]]) {
+; ICALL-VTABLE-NEXT:  entry:
+; ICALL-VTABLE-NEXT:    [[VTABLE:%.*]] = load ptr, ptr [[D]], align 8
+; ICALL-VTABLE-NEXT:    [[TMP0:%.*]] = tail call i1 @llvm.type.test(ptr [[VTABLE]], metadata !"_ZTS4Base")
+; ICALL-VTABLE-NEXT:    tail call void @llvm.assume(i1 [[TMP0]])
+; ICALL-VTABLE-NEXT:    [[TMP1:%.*]] = icmp eq ptr [[VTABLE]], getelementptr inbounds ({ [4 x ptr] }, ptr @_ZTV8Derived2, i64 0, i32 0, i64 2)
+; ICALL-VTABLE-NEXT:    br i1 [[TMP1]], label [[IF_TRUE_DIRECT_TARG:%.*]], label [[IF_FALSE_ORIG_INDIRECT:%.*]], !prof [[PROF4:![0-9]+]]
+; ICALL-VTABLE:       if.true.direct_targ:
+; ICALL-VTABLE-NEXT:    [[TMP2:%.*]] = tail call i32 @_ZN4Base5func2Ev(ptr nonnull [[D]])
+; ICALL-VTABLE-NEXT:    br label [[IF_END_ICP:%.*]]
+; ICALL-VTABLE:       if.false.orig_indirect:
+; ICALL-VTABLE-NEXT:    [[VFN:%.*]] = getelementptr inbounds i8, ptr [[VTABLE]], i64 8
+; ICALL-VTABLE-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[VFN]], align 8
+; ICALL-VTABLE-NEXT:    [[CALL:%.*]] = tail call i32 [[TMP3]](ptr nonnull [[D]])
+; ICALL-VTABLE-NEXT:    br label [[IF_END_ICP]]
+; ICALL-VTABLE:       if.end.icp:
+; ICALL-VTABLE-NEXT:    [[TMP4:%.*]] = phi i32 [ [[CALL]], [[IF_FALSE_ORIG_INDIRECT]] ], [ [[TMP2]], [[IF_TRUE_DIRECT_TARG]] ]
+; ICALL-VTABLE-NEXT:    ret i32 [[TMP4]]
+;
+entry:
+  %vtable = load ptr, ptr %d, !prof !4
+  %0 = tail call i1 @llvm.type.test(ptr %vtable, metadata !"_ZTS4Base")
+  tail call void @llvm.assume(i1 %0)
+  %vfn = getelementptr inbounds ptr, ptr %vtable, i64 1
+  %1 = load ptr, ptr %vfn
+  %call = tail call i32 %1(ptr %d), !prof !5
+  ret i32 %call
+}
+
+; The tested function has one function candidate which comes from two vtables.
+define i32 @test_one_function_two_vtables(ptr %d) {
+; ICALL-FUNC-LABEL: define i32 @test_one_function_two_vtables(
+; ICALL-FUNC-SAME: ptr [[D:%.*]]) {
+; ICALL-FUNC-NEXT:  entry:
+; ICALL-FUNC-NEXT:    [[VTABLE:%.*]] = load ptr, ptr [[D]], align 8, !prof [[PROF6:![0-9]+]]
+; ICALL-FUNC-NEXT:    [[TMP0:%.*]] = tail call i1 @llvm.type.test(ptr [[VTABLE]], metadata !"_ZTS4Base")
+; ICALL-FUNC-NEXT:    tail call void @llvm.assume(i1 [[TMP0]])
+; ICALL-FUNC-NEXT:    [[VFN:%.*]] = getelementptr inbounds ptr, ptr [[VTABLE]], i64 1
+; ICALL-FUNC-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[VFN]], align 8
+; ICALL-FUNC-NEXT:    [[TMP2:%.*]] = icmp eq ptr [[TMP1]], @_ZN4Base5func2Ev
+; ICALL-FUNC-NEXT:    br i1 [[TMP2]], label [[IF_TRUE_DIRECT_TARG:%.*]], label [[IF_FALSE_ORIG_INDIRECT:%.*]], !prof [[PROF5]]
+; ICALL-FUNC:       if.true.direct_targ:
+; ICALL-FUNC-NEXT:    [[TMP3:%.*]] = tail call i32 @_ZN4Base5func2Ev(ptr [[D]])
+; ICALL-FUNC-NEXT:    br label [[IF_END_ICP:%.*]]
+; ICALL-FUNC:       if.false.orig_indirect:
+; ICALL-FUNC-NEXT:    [[CALL:%.*]] = tail call i32 [[TMP1]](ptr [[D]])
+; ICALL-FUNC-NEXT:    br label [[IF_END_ICP]]
+; ICALL-FUNC:       if.end.icp:
+; ICALL-FUNC-NEXT:    [[TMP4:%.*]] = phi i32 [ [[CALL]], [[IF_FALSE_ORIG_INDIRECT]] ], [ [[TMP3]], [[IF_TRUE_DIRECT_TARG]] ]
+; ICALL-FUNC-NEXT:    ret i32 [[TMP4]]
+;
+; ICALL-VTABLE-LABEL: define i32 @test_one_function_two_vtables(
+; ICALL-VTABLE-SAME: ptr [[D:%.*]]) {
+; ICALL-VTABLE-NEXT:  entry:
+; ICALL-VTABLE-NEXT:    [[VTABLE:%.*]] = load ptr, ptr [[D]], align 8
+; ICALL-VTABLE-NEXT:    [[TMP0:%.*]] = tail call i1 @llvm.type.test(ptr [[VTABLE]], metadata !"_ZTS4Base")
+; ICALL-VTABLE-NEXT:    tail call void @llvm.assume(i1 [[TMP0]])
+; ICALL-VTABLE-NEXT:    [[TMP1:%.*]] = icmp eq ptr [[VTABLE]], getelementptr inbounds ({ [4 x ptr] }, ptr @_ZTV8Derived1, i64 0, i32 0, i64 2)
+; ICALL-VTABLE-NEXT:    [[TMP2:%.*]] = icmp eq ptr [[VTABLE]], getelementptr inbounds ({ [4 x ptr] }, ptr @_ZTV8Derived2, i64 0, i32 0, i64 2)
+; ICALL-VTABLE-NEXT:    [[ICMP_OR:%.*]] = or i1 [[TMP1]], [[TMP2]]
+; ICALL-VTABLE-NEXT:    br i1 [[ICMP_OR]], label [[IF_TRUE_DIRECT_TARG:%.*]], label [[IF_FALSE_ORIG_INDIRECT:%.*]], !prof [[PROF4]]
+; ICALL-VTABLE:       if.true.direct_targ:
+; ICALL-VTABLE-NEXT:    [[TMP3:%.*]] = tail call i32 @_ZN4Base5func2Ev(ptr nonnull [[D]])
+; ICALL-VTABLE-NEXT:    br label [[IF_END_ICP:%.*]]
+; ICALL-VTABLE:       if.false.orig_indirect:
+; ICALL-VTABLE-NEXT:    [[VFN:%.*]] = getelementptr inbounds i8, ptr [[VTABLE]], i64 8
+; ICALL-VTABLE-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[VFN]], align 8
+; ICALL-VTABLE-NEXT:    [[CALL:%.*]] = tail call i32 [[TMP4]](ptr nonnull [[D]])
+; ICALL-VTABLE-NEXT:    br label [[IF_END_ICP]]
+; ICALL-VTABLE:       if.end.icp:
+; ICALL-VTABLE-NEXT:    [[TMP5:%.*]] = phi i32 [ [[CALL]], [[IF_FALSE_ORIG_INDIRECT]] ], [ [[TMP3]], [[IF_TRUE_DIRECT_TARG]] ]
+; ICALL-VTABLE-NEXT:    ret i32 [[TMP5]]
+;
+entry:
+  %vtable = load ptr, ptr %d, !prof !6
+  %0 = tail call i1 @llvm.type.test(ptr %vtable, metadata !"_ZTS4Base")
+  tail call void @llvm.assume(i1 %0)
+  %vfn = getelementptr inbounds ptr, ptr %vtable, i64 1
+  %1 = load ptr, ptr %vfn
+  %call = tail call i32 %1(ptr %d), !prof !5
+  ret i32 %call
+}
+
+; The tested function has one function candidate which comes from three vtables.
+define i32 @test_one_function_three_vtables(ptr %d) {
+; ICALL-FUNC-LABEL: define i32 @test_one_function_three_vtables(
+; ICALL-FUNC-SAME: ptr [[D:%.*]]) {
+; ICALL-FUNC-NEXT:  entry:
+; ICALL-FUNC-NEXT:    [[VTABLE:%.*]] = load ptr, ptr [[D]], align 8, !prof [[PROF7:![0-9]+]]
+; ICALL-FUNC-NEXT:    [[TMP0:%.*]] = tail call i1 @llvm.type.test(ptr [[VTABLE]], metadata !"_ZTS4Base")
+; ICALL-FUNC-NEXT:    tail call void @llvm.assume(i1 [[TMP0]])
+; ICALL-FUNC-NEXT:    [[VFN:%.*]] = getelementptr inbounds ptr, ptr [[VTABLE]], i64 1
+; ICALL-FUNC-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[VFN]], align 8
+; ICALL-FUNC-NEXT:    [[TMP2:%.*]] = icmp eq ptr [[TMP1]], @_ZN4Base5func2Ev
+; ICALL-FUNC-NEXT:    br i1 [[TMP2]], label [[IF_TRUE_DIRECT_TARG:%.*]], label [[IF_FALSE_ORIG_INDIRECT:%.*]], !prof [[PROF5]]
+; ICALL-FUNC:       if.true.direct_targ:
+; ICALL-FUNC-NEXT:    [[TMP3:%.*]] = tail call i32 @_ZN4Base5func2Ev(ptr [[D]])
+; ICALL-FUNC-NEXT:    br label [[IF_END_ICP:%.*]]
+; ICALL-FUNC:       if.false.orig_indirect:
+; ICALL-FUNC-NEXT:    [[CALL:%.*]] = tail call i32 [[TMP1]](ptr [[D]])
+; ICALL-FUNC-NEXT:    br label [[IF_END_ICP]]
+; ICALL-FUNC:       if.end.icp:
+; ICALL-FUNC-NEXT:    [[TMP4:%.*]] = phi i32 [ [[CALL]], [[IF_FALSE_ORIG_INDIRECT]] ], [ [[TMP3]], [[IF_TRUE_DIRECT_TARG]] ]
+; ICALL-FUNC-NEXT:    ret i32 [[TMP4]]
+;
+; ICALL-VTABLE-LABEL: define i32 @test_one_function_three_vtables(
+; ICALL-VTABLE-SAME: ptr [[D:%.*]]) {
+; ICALL-VTABLE-NEXT:  entry:
+; ICALL-VTABLE-NEXT:    [[VTABLE:%.*]] = load ptr, ptr [[D]], align 8
+; ICALL-VTABLE-NEXT:    [[TMP0:%.*]] = tail call i1 @llvm.type.test(ptr [[VTABLE]], metadata !"_ZTS4Base")
+; ICALL-VTABLE-NEXT:    tail call void @llvm.assume(i1 [[TMP0]])
+; ICALL-VTABLE-NEXT:    [[TMP1:%.*]] = icmp eq ptr [[VTABLE]], getelementptr inbounds ({ [4 x ptr] }, ptr @_ZTV8Derived1, i64 0, i32 0, i64 2)
+; ICALL-VTABLE-NEXT:    [[TMP2:%.*]] = icmp eq ptr [[VTABLE]], getelementptr inbounds ({ [4 x ptr] }, ptr @_ZTV8Derived2, i64 0, i32 0, i64 2)
+; ICALL-VTABLE-NEXT:    [[TMP3:%.*]] = icmp eq ptr [[VTABLE]], getelementptr inbounds ({ [4 x ptr] }, ptr @_ZTV4Base, i64 0, i32 0, i64 2)
+; ICALL-VTABLE-NEXT:    [[ICMP_OR:%.*]] = or i1 [[TMP1]], [[TMP2]]
+; ICALL-VTABLE-NEXT:    [[ICMP_OR1:%.*]] = or i1 [[ICMP_OR]], [[TMP3]]
+; ICALL-VTABLE-NEXT:    br i1 [[ICMP_OR1]], label [[IF_TRUE_DIRECT_TARG:%.*]], label [[IF_FALSE_ORIG_INDIRECT:%.*]], !prof [[PROF4]]
+; ICALL-VTABLE:       if.true.direct_targ:
+; ICALL-VTABLE-NEXT:    [[TMP4:%.*]] = tail call i32 @_ZN4Base5func2Ev(ptr nonnull [[D]])
+; ICALL-VTABLE-NEXT:    br label [[IF_END_ICP:%.*]]
+; ICALL-VTABLE:       if.false.orig_indirect:
+; ICALL-VTABLE-NEXT:    [[VFN:%.*]] = getelementptr inbounds i8, ptr [[VTABLE]], i64 8
+; ICALL-VTABLE-NEXT:    [[TMP5:%.*]] = load ptr, ptr [[VFN]], align 8
+; ICALL-VTABLE-NEXT:    [[CALL:%.*]] = tail call i32 [[TMP5]](ptr nonnull [[D]])
+; ICALL-VTABLE-NEXT:    br label [[IF_END_ICP]]
+; ICALL-VTABLE:       if.end.icp:
+; ICALL-VTABLE-NEXT:    [[TMP6:%.*]] = phi i32 [ [[CALL]], [[IF_FALSE_ORIG_INDIRECT]] ], [ [[TMP4]], [[IF_TRUE_DIRECT_TARG]] ]
+; ICALL-VTABLE-NEXT:    ret i32 [[TMP6]]
+;
+entry:
+  %vtable = load ptr, ptr %d, !prof !7
+  %0 = tail call i1 @llvm.type.test(ptr %vtable, metadata !"_ZTS4Base")
+  tail call void @llvm.assume(i1 %0)
+  %vfn = getelementptr inbounds ptr, ptr %vtable, i64 1
+  %1 = load ptr, ptr %vfn
+  %call = tail call i32 %1(ptr %d), !prof !5
+  ret i32 %call
+}
+
+
+declare i1 @llvm.type.test(ptr, metadata)
+declare void @llvm.assume(i1 noundef)
+declare i32 @_ZN4Base5func1Ei(ptr, i32)
+declare i32 @_ZN8Derived15func1Ei(ptr, i32)
+declare i32 @_ZN8Derived25func1Ei(ptr, i32)
+declare i32 @_ZN8Derived35func1Ei(ptr, i32)
+
+define i32 @_ZN4Base5func2Ev(ptr %this) {
+entry:
+  ret i32 0
+}
+
+!0 = !{i64 16, !"_ZTS4Base"}
+!1 = !{i64 16, !"_ZTS8Derived1"}
+!2 = !{i64 16, !"_ZTS8Derived2"}
+!3 = !{i64 16, !"_ZTS8Derived3"}
+!4 = !{!"VP", i32 2, i64 1600, i64 5035968517245772950, i64 1600}
+!5 = !{!"VP", i32 0, i64 1600, i64 -3104805163612457913, i64 1600}
+!6 = !{!"VP", i32 2, i64 1600, i64 -9064381665493407289, i64 1000, i64 5035968517245772950, i64 600}
+!7 = !{!"VP", i32 2, i64 1600, i64 -9064381665493407289, i64 600, i64 5035968517245772950, i64 550, i64 1960855528937986108, i64 450}
+
+; ICALL-FUNC: [[PROF4]] = !{!"VP", i32 2, i64 1600, i64 5035968517245772950, i64 1600}
+; ICALL-FUNC: [[PROF5]] = !{!"branch_weights", i32 1600, i32 0}
+; ICALL-FUNC: [[PROF6]] = !{!"VP", i32 2, i64 1600, i64 -9064381665493407289, i64 1000, i64 5035968517245772950, i64 600}
+; ICALL-FUNC: [[PROF7]] = !{!"VP", i32 2, i64 1600, i64 -9064381665493407289, i64 600, i64 5035968517245772950, i64 550, i64 1960855528937986108, i64 450}
+
+; ICALL-VTABLE: [[PROF4]] = !{!"branch_weights", i32 1600, i32 0}
diff --git a/llvm/test/Transforms/PGOProfile/icp_vtable_invoke.ll b/llvm/test/Transforms/PGOProfile/icp_vtable_invoke.ll
new file mode 100644
index 00000000000000..a2924420fd2a06
--- /dev/null
+++ b/llvm/test/Transforms/PGOProfile/icp_vtable_invoke.ll
@@ -0,0 +1,201 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 4
+; RUN: opt < %s -passes=pgo-icall-prom -S  | FileCheck %s --check-prefix=ICALL-FUNC
+; RUN: opt < %s -passes='pgo-icall-prom,instcombine' -icp-enable-vtable-cmp -S | FileCheck %s --check-prefix=ICALL-VTABLE
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+%class.Error = type { i8 }
+
+ at _ZTI5Error = dso_local constant { ptr, ptr } { ptr getelementptr inbounds (ptr, ptr null, i64 2), ptr null }
+ at _ZTV4Base = dso_local constant { [3 x ptr] } { [3 x ptr] [ptr null, ptr null, ptr @_ZN4Base10get_ticketEv] }, !type !0, !type !1
+ at _ZTV7Derived = dso_local constant { [3 x ptr] } { [3 x ptr] [ptr null, ptr null, ptr @_ZN7Derived10get_ticketEv] }, !type !0, !type !1, !type !2, !type !3
+
+ at .str = private unnamed_addr constant [15 x i8] c"out of tickets\00"
+
+define i32 @_Z4testP4Base(ptr %b) personality ptr @__gxx_personality_v0 {
+; ICALL-FUNC-LABEL: define i32 @_Z4testP4Base(
+; ICALL-FUNC-SAME: ptr [[B:%.*]]) personality ptr @__gxx_personality_v0 {
+; ICALL-FUNC-NEXT:  entry:
+; ICALL-FUNC-NEXT:    [[E:%.*]] = alloca [[CLASS_ERROR:%.*]], align 8
+; ICALL-FUNC-NEXT:    [[VTABLE:%.*]] = load ptr, ptr [[B]], align 8, !prof [[PROF4:![0-9]+]]
+; ICALL-FUNC-NEXT:    [[TMP0:%.*]] = tail call i1 @llvm.type.test(ptr [[VTABLE]], metadata !"_ZTS4Base")
+; ICALL-FUNC-NEXT:    tail call void @llvm.assume(i1 [[TMP0]])
+; ICALL-FUNC-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[VTABLE]], align 8
+; ICALL-FUNC-NEXT:    [[TMP2:%.*]] = icmp eq ptr [[TMP1]], @_ZN7Derived10get_ticketEv
+; ICALL-FUNC-NEXT:    br i1 [[TMP2]], label [[IF_TRUE_DIRECT_TARG:%.*]], label [[IF_FALSE_ORIG_INDIRECT:%.*]], !prof [[PROF5:![0-9]+]]
+; ICALL-FUNC:       if.true.direct_targ:
+; ICALL-FUNC-NEXT:    [[TMP3:%.*]] = invoke i32 @_ZN7Derived10get_ticketEv(ptr [[B]])
+; ICALL-FUNC-NEXT:            to label [[IF_END_ICP:%.*]] unwind label [[LPAD:%.*]]
+; ICALL-FUNC:       if.false.orig_indirect:
+; ICALL-FUNC-NEXT:    [[TMP4:%.*]] = icmp eq ptr [[TMP1]], @_ZN4Base10get_ticketEv
+; ICALL-FUNC-NEXT:    br i1 [[TMP4]], label [[IF_TRUE_DIRECT_TARG1:%.*]], label [[IF_FALSE_ORIG_INDIRECT2:%.*]], !prof [[PROF6:![0-9]+]]
+; ICALL-FUNC:       if.true.direct_targ1:
+; ICALL-FUNC-NEXT:    [[TMP5:%.*]] = invoke i32 @_ZN4Base10get_ticketEv(ptr [[B]])
+; ICALL-FUNC-NEXT:            to label [[IF_END_ICP3:%.*]] unwind label [[LPAD]]
+; ICALL-FUNC:       if.false.orig_indirect2:
+; ICALL-FUNC-NEXT:    [[CALL:%.*]] = invoke i32 [[TMP1]](ptr [[B]])
+; ICALL-FUNC-NEXT:            to label [[IF_END_ICP3]] unwind label [[LPAD]]
+; ICALL-FUNC:       if.end.icp3:
+; ICALL-FUNC-NEXT:    [[TMP6:%.*]] = phi i32 [ [[CALL]], [[IF_FALSE_ORIG_INDIRECT2]] ], [ [[TMP5]], [[IF_TRUE_DIRECT_TARG1]] ]
+; ICALL-FUNC-NEXT:    br label [[IF_END_ICP]]
+; ICALL-FUNC:       if.end.icp:
+; ICALL-FUNC-NEXT:    [[TMP7:%.*]] = phi i32 [ [[TMP6]], [[IF_END_ICP3]] ], [ [[TMP3]], [[IF_TRUE_DIRECT_TARG]] ]
+; ICALL-FUNC-NEXT:    br label %try.cont
+; ICALL-FUNC:       lpad:
+
+;
+; ICALL-VTABLE-LABEL: define i32 @_Z4testP4Base(
+; ICALL-VTABLE-SAME: ptr [[B:%.*]]) personality ptr @__gxx_personality_v0 {
+; ICALL-VTABLE-NEXT:  entry:
+; ICALL-VTABLE-NEXT:    [[E:%.*]] = alloca [[CLASS_ERROR:%.*]], align 8
+; ICALL-VTABLE-NEXT:    [[VTABLE:%.*]] = load ptr, ptr [[B]], align 8
+; ICALL-VTABLE-NEXT:    [[TMP0:%.*]] = tail call i1 @llvm.type.test(ptr [[VTABLE]], metadata !"_ZTS4Base")
+; ICALL-VTABLE-NEXT:    tail call void @llvm.assume(i1 [[TMP0]])
+; ICALL-VTABLE-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[VTABLE]], align 8
+; ICALL-VTABLE-NEXT:    [[TMP2:%.*]] = icmp eq ptr [[VTABLE]], getelementptr inbounds ({ [3 x ptr] }, ptr @_ZTV7Derived, i64 0, i32 0, i64 2)
+; ICALL-VTABLE-NEXT:    br i1 [[TMP2]], label [[IF_TRUE_DIRECT_TARG:%.*]], label [[IF_FALSE_ORIG_INDIRECT:%.*]], !prof [[PROF4:![0-9]+]]
+; ICALL-VTABLE:       if.true.direct_targ:
+; ICALL-VTABLE-NEXT:    [[TMP3:%.*]] = invoke i32 @_ZN7Derived10get_ticketEv(ptr nonnull [[B]])
+; ICALL-VTABLE-NEXT:            to label [[IF_END_ICP:%.*]] unwind label [[LPAD:%.*]]
+; ICALL-VTABLE:       if.false.orig_indirect:
+; ICALL-VTABLE-NEXT:    [[TMP4:%.*]] = icmp eq ptr [[VTABLE]], getelementptr inbounds ({ [3 x ptr] }, ptr @_ZTV4Base, i64 0, i32 0, i64 2)
+; ICALL-VTABLE-NEXT:    br i1 [[TMP4]], label [[IF_TRUE_DIRECT_TARG1:%.*]], label [[IF_FALSE_ORIG_INDIRECT2:%.*]], !prof [[PROF5:![0-9]+]]
+; ICALL-VTABLE:       if.true.direct_targ1:
+; ICALL-VTABLE-NEXT:    [[TMP5:%.*]] = invoke i32 @_ZN4Base10get_ticketEv(ptr nonnull [[B]])
+; ICALL-VTABLE-NEXT:            to label [[IF_END_ICP3:%.*]] unwind label [[LPAD]]
+; ICALL-VTABLE:       if.false.orig_indirect2:
+; ICALL-VTABLE-NEXT:    [[CALL:%.*]] = invoke i32 [[TMP1]](ptr nonnull [[B]])
+; ICALL-VTABLE-NEXT:            to label [[IF_END_ICP3]] unwind label [[LPAD]]
+; ICALL-VTABLE:       if.end.icp3:
+; ICALL-VTABLE-NEXT:    [[TMP6:%.*]] = phi i32 [ [[CALL]], [[IF_FALSE_ORIG_INDIRECT2]] ], [ [[TMP5]], [[IF_TRUE_DIRECT_TARG1]] ]
+; ICALL-VTABLE-NEXT:    br label [[IF_END_ICP]]
+; ICALL-VTABLE:       if.end.icp:
+; ICALL-VTABLE-NEXT:    [[TMP7:%.*]] = phi i32 [ [[TMP6]], [[IF_END_ICP3]] ], [ [[TMP3]], [[IF_TRUE_DIRECT_TARG]] ]
+; ICALL-VTABLE-NEXT:    br label %try.cont
+; ICALL-VTABLE:       lpad:
+;
+entry:
+  %e = alloca %class.Error
+  %vtable = load ptr, ptr %b, !prof !4
+  %0 = tail call i1 @llvm.type.test(ptr %vtable, metadata !"_ZTS4Base")
+  tail call void @llvm.assume(i1 %0)
+  %1 = load ptr, ptr %vtable
+  %call = invoke i32 %1(ptr %b)
+  to label %try.cont unwind label %lpad, !prof !5
+
+lpad:
+  %2 = landingpad { ptr, i32 }
+  cleanup
+  catch ptr @_ZTI5Error
+  %3 = extractvalue { ptr, i32 } %2, 1
+  %4 = tail call i32 @llvm.eh.typeid.for(ptr nonnull @_ZTI5Error)
+  %matches = icmp eq i32 %3, %4
+  br i1 %matches, label %catch, label %ehcleanup
+
+catch:
+  %5 = extractvalue { ptr, i32 } %2, 0
+
+  %call3 = invoke i32 @_ZN5Error10error_codeEv(ptr nonnull align 1 dereferenceable(1) %e)
+  to label %invoke.cont2 unwind label %lpad1
+
+invoke.cont2:
+  call void @__cxa_end_catch()
+  br label %try.cont
+
+try.cont:
+  %ret.0 = phi i32 [ %call3, %invoke.cont2 ], [ %call, %entry ]
+  ret i32 %ret.0
+
+lpad1:
+  %6 = landingpad { ptr, i32 }
+  cleanup
+  invoke void @__cxa_end_catch()
+  to label %invoke.cont4 unwind label %terminate.lpad
+
+invoke.cont4:
+  br label %ehcleanup
+
+ehcleanup:
+  %lpad.val7.merged = phi { ptr, i32 } [ %6, %invoke.cont4 ], [ %2, %lpad ]
+  resume { ptr, i32 } %lpad.val7.merged
+
+terminate.lpad:
+  %7 = landingpad { ptr, i32 }
+  catch ptr null
+  %8 = extractvalue { ptr, i32 } %7, 0
+  unreachable
+}
+
+declare i1 @llvm.type.test(ptr, metadata)
+declare void @llvm.assume(i1 noundef)
+declare i32 @__gxx_personality_v0(...)
+declare i32 @llvm.eh.typeid.for(ptr)
+
+declare i32 @_ZN5Error10error_codeEv(ptr nonnull align 1 dereferenceable(1))
+
+declare void @__cxa_end_catch()
+
+define i32 @_ZN4Base10get_ticketEv(ptr %this) align 2 personality ptr @__gxx_personality_v0 {
+entry:
+  %call = tail call i32 @_Z13get_ticket_idv()
+  %cmp.not = icmp eq i32 %call, -1
+  br i1 %cmp.not, label %if.end, label %if.then
+
+if.then:
+  ret i32 %call
+
+if.end:
+  %exception = tail call ptr @__cxa_allocate_exception(i64 1)
+  invoke void @_ZN5ErrorC1EPKci(ptr nonnull align 1 dereferenceable(1) %exception, ptr nonnull @.str, i32 1)
+  to label %invoke.cont unwind label %lpad
+
+invoke.cont:
+  unreachable
+
+lpad:
+  %0 = landingpad { ptr, i32 }
+  cleanup
+  resume { ptr, i32 } %0
+}
+
+define i32 @_ZN7Derived10get_ticketEv(ptr %this) align 2 personality ptr @__gxx_personality_v0 {
+entry:
+  %call = tail call i32 @_Z13get_ticket_idv()
+  %cmp.not = icmp eq i32 %call, -1
+  br i1 %cmp.not, label %if.end, label %if.then
+
+if.then:
+  ret i32 %call
+
+if.end:
+  %exception = tail call ptr @__cxa_allocate_exception(i64 1)
+  invoke void @_ZN5ErrorC1EPKci(ptr nonnull align 1 dereferenceable(1) %exception, ptr nonnull @.str, i32 2)
+  to label %invoke.cont unwind label %lpad
+
+invoke.cont:
+  unreachable
+
+lpad:
+  %0 = landingpad { ptr, i32 }
+  cleanup
+  resume { ptr, i32 } %0
+}
+
+declare i32 @_Z13get_ticket_idv()
+declare ptr @__cxa_allocate_exception(i64)
+declare void @_ZN5ErrorC1EPKci(ptr nonnull align 1 dereferenceable(1), ptr, i32)
+
+!0 = !{i64 16, !"_ZTS4Base"}
+!1 = !{i64 16, !"_ZTSM4BaseFivE.virtual"}
+!2 = !{i64 16, !"_ZTS7Derived"}
+!3 = !{i64 16, !"_ZTSM7DerivedFivE.virtual"}
+!4 = !{!"VP", i32 2, i64 1600, i64 13870436605473471591, i64 900, i64 1960855528937986108, i64 700}
+!5 = !{!"VP", i32 0, i64 1600, i64 14811317294552474744, i64 900, i64 9261744921105590125, i64 700}
+
+; ICALL-FUNC: [[PROF4]] = !{!"VP", i32 2, i64 1600, i64 -4576307468236080025, i64 900, i64 1960855528937986108, i64 700}
+; ICALL-FUNC: [[PROF5]] = !{!"branch_weights", i32 900, i32 700}
+; ICALL-FUNC: [[PROF6]] = !{!"branch_weights", i32 700, i32 0}
+
+; ICALL-VTABLE: [[PROF4]] = !{!"branch_weights", i32 900, i32 700}
+; ICALL-VTABLE: [[PROF5]] = !{!"branch_weights", i32 700, i32 0}
diff --git a/llvm/test/Transforms/PGOProfile/icp_vtable_tail_call.ll b/llvm/test/Transforms/PGOProfile/icp_vtable_tail_call.ll
new file mode 100644
index 00000000000000..94ed588c5458d8
--- /dev/null
+++ b/llvm/test/Transforms/PGOProfile/icp_vtable_tail_call.ll
@@ -0,0 +1,92 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 4
+; RUN: opt < %s -passes=pgo-icall-prom -pass-remarks=pgo-icall-prom -S 2>&1 | FileCheck %s --check-prefix=ICALL-FUNC
+; RUN: opt < %s -passes='pgo-icall-prom,instcombine' -pass-remarks=pgo-icall-prom -icp-enable-vtable-cmp -S 2>&1 | FileCheck %s --check-prefix=ICALL-VTABLE
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+ at _ZTV7Derived = constant { [3 x ptr] } { [3 x ptr] [ptr null, ptr null, ptr @_ZN7Derived5func1Eii] }, align 8, !type !0, !type !1, !type !2, !type !3
+ at _ZTV4Base = constant { [3 x ptr] } { [3 x ptr] [ptr null, ptr null, ptr @_ZN4Base5func1Eii] }, align 8, !type !0, !type !1
+
+define i32 @test_tail_call(ptr %ptr, i32 %a, i32 %b) {
+; ICALL-FUNC-LABEL: define i32 @test_tail_call(
+; ICALL-FUNC-SAME: ptr [[PTR:%.*]], i32 [[A:%.*]], i32 [[B:%.*]]) {
+; ICALL-FUNC-NEXT:  entry:
+; ICALL-FUNC-NEXT:    [[VTABLE:%.*]] = load ptr, ptr [[PTR]], align 8, !prof [[PROF4:![0-9]+]]
+; ICALL-FUNC-NEXT:    [[TMP0:%.*]] = tail call i1 @llvm.type.test(ptr [[VTABLE]], metadata !"_ZTS4Base")
+; ICALL-FUNC-NEXT:    tail call void @llvm.assume(i1 [[TMP0]])
+; ICALL-FUNC-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[VTABLE]], align 8
+; ICALL-FUNC-NEXT:    [[TMP2:%.*]] = icmp eq ptr [[TMP1]], @_ZN7Derived5func1Eii
+; ICALL-FUNC-NEXT:    br i1 [[TMP2]], label [[IF_TRUE_DIRECT_TARG:%.*]], label [[TMP4:%.*]], !prof [[PROF5:![0-9]+]]
+; ICALL-FUNC:       if.true.direct_targ:
+; ICALL-FUNC-NEXT:    [[TMP3:%.*]] = musttail call i32 @_ZN7Derived5func1Eii(ptr [[PTR]], i32 [[A]], i32 [[B]])
+; ICALL-FUNC-NEXT:    ret i32 [[TMP3]]
+; ICALL-FUNC:       4:
+; ICALL-FUNC-NEXT:    [[TMP5:%.*]] = icmp eq ptr [[TMP1]], @_ZN4Base5func1Eii
+; ICALL-FUNC-NEXT:    br i1 [[TMP5]], label [[IF_TRUE_DIRECT_TARG1:%.*]], label [[TMP7:%.*]], !prof [[PROF6:![0-9]+]]
+; ICALL-FUNC:       if.true.direct_targ1:
+; ICALL-FUNC-NEXT:    [[TMP6:%.*]] = musttail call i32 @_ZN4Base5func1Eii(ptr [[PTR]], i32 [[A]], i32 [[B]])
+; ICALL-FUNC-NEXT:    ret i32 [[TMP6]]
+; ICALL-FUNC:       7:
+; ICALL-FUNC-NEXT:    [[CALL:%.*]] = musttail call i32 [[TMP1]](ptr [[PTR]], i32 [[A]], i32 [[B]])
+; ICALL-FUNC-NEXT:    ret i32 [[CALL]]
+;
+; ICALL-VTABLE-LABEL: define i32 @test_tail_call(
+; ICALL-VTABLE-SAME: ptr [[PTR:%.*]], i32 [[A:%.*]], i32 [[B:%.*]]) {
+; ICALL-VTABLE-NEXT:  entry:
+; ICALL-VTABLE-NEXT:    [[VTABLE:%.*]] = load ptr, ptr [[PTR]], align 8
+; ICALL-VTABLE-NEXT:    [[TMP0:%.*]] = tail call i1 @llvm.type.test(ptr [[VTABLE]], metadata !"_ZTS4Base")
+; ICALL-VTABLE-NEXT:    tail call void @llvm.assume(i1 [[TMP0]])
+; ICALL-VTABLE-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[VTABLE]], align 8
+; ICALL-VTABLE-NEXT:    [[TMP2:%.*]] = icmp eq ptr [[VTABLE]], getelementptr inbounds ({ [3 x ptr] }, ptr @_ZTV7Derived, i64 0, i32 0, i64 2)
+; ICALL-VTABLE-NEXT:    br i1 [[TMP2]], label [[IF_TRUE_DIRECT_TARG:%.*]], label [[TMP4:%.*]], !prof [[PROF4:![0-9]+]]
+; ICALL-VTABLE:       if.true.direct_targ:
+; ICALL-VTABLE-NEXT:    [[TMP3:%.*]] = musttail call i32 @_ZN7Derived5func1Eii(ptr nonnull [[PTR]], i32 [[A]], i32 [[B]])
+; ICALL-VTABLE-NEXT:    ret i32 [[TMP3]]
+; ICALL-VTABLE:       4:
+; ICALL-VTABLE-NEXT:    [[TMP5:%.*]] = icmp eq ptr [[VTABLE]], getelementptr inbounds ({ [3 x ptr] }, ptr @_ZTV4Base, i64 0, i32 0, i64 2)
+; ICALL-VTABLE-NEXT:    br i1 [[TMP5]], label [[IF_TRUE_DIRECT_TARG1:%.*]], label [[TMP7:%.*]], !prof [[PROF5:![0-9]+]]
+; ICALL-VTABLE:       if.true.direct_targ1:
+; ICALL-VTABLE-NEXT:    [[TMP6:%.*]] = musttail call i32 @_ZN4Base5func1Eii(ptr nonnull [[PTR]], i32 [[A]], i32 [[B]])
+; ICALL-VTABLE-NEXT:    ret i32 [[TMP6]]
+; ICALL-VTABLE:       7:
+; ICALL-VTABLE-NEXT:    [[CALL:%.*]] = musttail call i32 [[TMP1]](ptr nonnull [[PTR]], i32 [[A]], i32 [[B]])
+; ICALL-VTABLE-NEXT:    ret i32 [[CALL]]
+;
+entry:
+  %vtable = load ptr, ptr %ptr, !prof !4
+  %0 = tail call i1 @llvm.type.test(ptr %vtable, metadata !"_ZTS4Base")
+  tail call void @llvm.assume(i1 %0)
+  %1 = load ptr, ptr %vtable
+  %call = musttail call i32 %1(ptr %ptr, i32 %a, i32 %b), !prof !5
+  ret i32 %call
+}
+
+declare i1 @llvm.type.test(ptr, metadata)
+declare void @llvm.assume(i1)
+define i32 @_ZN7Derived5func1Eii(ptr %this, i32 %a, i32 %b) {
+entry:
+  %sub = sub nsw i32 %a, %b
+  ret i32 %sub
+}
+
+define i32 @_ZN4Base5func1Eii(ptr %this, i32 %a, i32 %b) {
+entry:
+  %add = add nsw i32 %b, %a
+  ret i32 %add
+}
+
+
+!0 = !{i64 16, !"_ZTS4Base"}
+!1 = !{i64 16, !"_ZTSM4BaseFiiiE.virtual"}
+!2 = !{i64 16, !"_ZTS7Derived"}
+!3 = !{i64 16, !"_ZTSM7DerivedFiiiE.virtual"}
+!4 = !{!"VP", i32 2, i64 1600, i64 13870436605473471591, i64 900, i64 1960855528937986108, i64 700}
+!5 = !{!"VP", i32 0, i64 1600, i64 7889036118036845314, i64 900, i64 10495086226207060333, i64 700}
+
+; ICALL-FUNC: [[PROF4]] = !{!"VP", i32 2, i64 1600, i64 -4576307468236080025, i64 900, i64 1960855528937986108, i64 700}
+; ICALL-FUNC: [[PROF5]] = !{!"branch_weights", i32 900, i32 700}
+; ICALL-FUNC: [[PROF6]] = !{!"branch_weights", i32 700, i32 0}
+
+; ICALL-VTABLE: [[PROF4]] = !{!"branch_weights", i32 900, i32 700}
+; ICALL-VTABLE: [[PROF5]] = !{!"branch_weights", i32 700, i32 0}



More information about the llvm-commits mailing list