[clang] [IRPGO] [Draft] Import vtable definitions in ThinTLO and use more efficient vtable comparison sequence with cost-benefit analysis (PR #69141)

Mingming Liu via cfe-commits cfe-commits at lists.llvm.org
Sun Oct 15 21:29:08 PDT 2023


https://github.com/minglotus-6 created https://github.com/llvm/llvm-project/pull/69141

None

>From 67e78ec0ac7cf3395bade6f6c96cde0fbcd57711 Mon Sep 17 00:00:00 2001
From: Mingming Liu <mingmingl at google.com>
Date: Sat, 16 Sep 2023 23:15:15 -0700
Subject: [PATCH 1/6] [IRPGO][ValueProfile] Instrument virtual table address
 that could be used to do virtual table address comparision for
 indirect-call-promotion.

The changes include:
1) Insert value profile intrinsics and lowering them.
   - Introduced INSTR_PROF_VTABLE_DATA to record per-vtable data.
   - Modified LLVM_PROF_RAW_HEADER to record the metadata for vtable profiles.
   - Test case in llvm/test/Transforms/PGOProfile/vtable_profile.ll
2) Tooling support in llvm-profdata to show the added vtable information
   - Changes are made in {raw,text,indexed} prof reader and/or writer to read/write vtable profile data.
   - Test cases added in llvm/test/tools/llvm-profdata
---
 .../CodeGen/coverage-profile-raw-version.c    |   4 +-
 compiler-rt/include/profile/InstrProfData.inc |  32 +++-
 compiler-rt/lib/profile/InstrProfiling.h      |  20 +-
 .../lib/profile/InstrProfilingBuffer.c        |  60 +++++-
 .../lib/profile/InstrProfilingInternal.h      |  11 +-
 compiler-rt/lib/profile/InstrProfilingMerge.c |  20 +-
 .../lib/profile/InstrProfilingPlatformLinux.c |  20 ++
 .../lib/profile/InstrProfilingWriter.c        |  45 ++++-
 .../Linux/instrprof-value-prof-warn.test      |   2 +-
 .../profile/instrprof-write-buffer-internal.c |  10 +-
 .../llvm/Analysis/IndirectCallVisitor.h       |  22 ++-
 llvm/include/llvm/ProfileData/InstrProf.h     | 125 ++++++++++++-
 .../llvm/ProfileData/InstrProfData.inc        |  31 +++-
 .../llvm/ProfileData/InstrProfReader.h        |  59 +++++-
 .../llvm/ProfileData/InstrProfWriter.h        |   4 +
 .../Instrumentation/InstrProfiling.h          |  12 ++
 llvm/lib/ProfileData/InstrProf.cpp            | 172 +++++++++++++++---
 llvm/lib/ProfileData/InstrProfReader.cpp      |  79 +++++++-
 llvm/lib/ProfileData/InstrProfWriter.cpp      |  92 +++++++++-
 .../Instrumentation/InstrProfiling.cpp        | 165 +++++++++++++++++
 .../Instrumentation/PGOInstrumentation.cpp    |   1 +
 .../Instrumentation/ValueProfilePlugins.inc   |  26 ++-
 .../Inputs/update_vtable_value_prof_inputs.sh |  84 +++++++++
 .../Transforms/PGOProfile/comdat_internal.ll  |   4 +-
 .../indirect_call_profile_funclet.ll          |   5 +-
 .../Transforms/PGOProfile/vtable_profile.ll   | 139 ++++++++++++++
 .../llvm-profdata/Inputs/c-general.profraw    | Bin 1800 -> 1912 bytes
 .../llvm-profdata/Inputs/vtable-prof.proftext |  73 ++++++++
 .../llvm-profdata/Inputs/vtable_prof.profraw  | Bin 0 -> 816 bytes
 .../llvm-profdata/binary-ids-padding.test     |  10 +-
 .../llvm-profdata/large-binary-id-size.test   |   4 +-
 ...alformed-not-space-for-another-header.test |   7 +-
 .../malformed-num-counters-zero.test          |   7 +-
 .../malformed-ptr-to-counter-array.test       |   7 +-
 .../misaligned-binary-ids-size.test           |   4 +-
 .../mismatched-raw-profile-header.test        |   2 +
 .../tools/llvm-profdata/raw-32-bits-be.test   |   7 +-
 .../tools/llvm-profdata/raw-32-bits-le.test   |   6 +-
 .../tools/llvm-profdata/raw-64-bits-be.test   |  13 +-
 .../tools/llvm-profdata/raw-64-bits-le.test   |  29 ++-
 .../tools/llvm-profdata/raw-two-profiles.test |  10 +-
 .../tools/llvm-profdata/vtable-prof.proftext  |  16 ++
 .../vtable-value-prof-basic.test              | 100 ++++++++++
 llvm/tools/llvm-profdata/llvm-profdata.cpp    |  37 +++-
 44 files changed, 1474 insertions(+), 102 deletions(-)
 create mode 100644 llvm/test/Transforms/PGOProfile/Inputs/update_vtable_value_prof_inputs.sh
 create mode 100644 llvm/test/Transforms/PGOProfile/vtable_profile.ll
 create mode 100644 llvm/test/tools/llvm-profdata/Inputs/vtable-prof.proftext
 create mode 100644 llvm/test/tools/llvm-profdata/Inputs/vtable_prof.profraw
 create mode 100644 llvm/test/tools/llvm-profdata/vtable-prof.proftext
 create mode 100644 llvm/test/tools/llvm-profdata/vtable-value-prof-basic.test

diff --git a/clang/test/CodeGen/coverage-profile-raw-version.c b/clang/test/CodeGen/coverage-profile-raw-version.c
index 749dce50298f025..bb30fd8c1c70ae7 100644
--- a/clang/test/CodeGen/coverage-profile-raw-version.c
+++ b/clang/test/CodeGen/coverage-profile-raw-version.c
@@ -1,8 +1,8 @@
 // RUN: %clang_cc1 -debug-info-kind=standalone -fprofile-instrument=clang -fcoverage-mapping -emit-llvm -o - %s | FileCheck %s
 // RUN: %clang_cc1 -debug-info-kind=standalone -mllvm -debug-info-correlate -fprofile-instrument=clang -fcoverage-mapping -emit-llvm -o - %s | FileCheck %s --check-prefix=DEBUG_INFO
 
-// CHECK: @__llvm_profile_raw_version = {{.*}}constant i64 8
-// DEBUG_INFO: @__llvm_profile_raw_version = {{.*}}constant i64 576460752303423496
+// CHECK: @__llvm_profile_raw_version = {{.*}}constant i64 9
+// DEBUG_INFO: @__llvm_profile_raw_version = {{.*}}constant i64 576460752303423497
 
 int main() {
     return 0;
diff --git a/compiler-rt/include/profile/InstrProfData.inc b/compiler-rt/include/profile/InstrProfData.inc
index 4456bf1ab176325..b49f9d983e0f46d 100644
--- a/compiler-rt/include/profile/InstrProfData.inc
+++ b/compiler-rt/include/profile/InstrProfData.inc
@@ -92,6 +92,19 @@ INSTR_PROF_DATA(const uint16_t, Int16ArrayTy, NumValueSites[IPVK_Last+1], \
 /* INSTR_PROF_DATA end. */
 
 
+#ifndef INSTR_PROF_VTABLE_DATA
+#define INSTR_PROF_VTABLE_DATA(Type, LLVMType, Name, Initializer)
+#else
+#define INSTR_PROF_VTABLE_DATA_DEFINED
+#endif
+INSTR_PROF_VTABLE_DATA(const uint64_t, llvm::Type::getInt64Ty(Ctx), VTableNameHash, \
+                      ConstantInt::get(llvm::Type::getInt64Ty(Ctx), IndexedInstrProf::ComputeHash(VTableName)))
+INSTR_PROF_VTABLE_DATA(const IntPtrT, llvm::Type::getInt8PtrTy(Ctx), VTablePointer, VTableAddr)
+INSTR_PROF_VTABLE_DATA(const uint32_t, llvm::Type::getInt32Ty(Ctx), VTableSize, \
+                      ConstantInt::get(llvm::Type::getInt32Ty(Ctx), VTableSizeVal))
+#undef INSTR_PROF_VTABLE_DATA
+/* INSTR_PROF_VTABLE_DATA end. */
+
 /* This is an internal data structure used by value profiler. It
  * is defined here to allow serialization code sharing by LLVM
  * to be used in unit test.
@@ -136,6 +149,8 @@ INSTR_PROF_RAW_HEADER(uint64_t, NamesSize,  NamesSize)
 INSTR_PROF_RAW_HEADER(uint64_t, CountersDelta,
                       (uintptr_t)CountersBegin - (uintptr_t)DataBegin)
 INSTR_PROF_RAW_HEADER(uint64_t, NamesDelta, (uintptr_t)NamesBegin)
+INSTR_PROF_RAW_HEADER(uint64_t, VNamesSize, VNamesSize)
+INSTR_PROF_RAW_HEADER(uint64_t, NumVTables, NumVTables)
 INSTR_PROF_RAW_HEADER(uint64_t, ValueKindLast, IPVK_Last)
 #undef INSTR_PROF_RAW_HEADER
 /* INSTR_PROF_RAW_HEADER  end */
@@ -177,13 +192,14 @@ VALUE_PROF_FUNC_PARAM(uint32_t, CounterIndex, Type::getInt32Ty(Ctx))
 VALUE_PROF_KIND(IPVK_IndirectCallTarget, 0, "indirect call target")
 /* For memory intrinsic functions size profiling. */
 VALUE_PROF_KIND(IPVK_MemOPSize, 1, "memory intrinsic functions size")
+VALUE_PROF_KIND(IPVK_VTableTarget, 2, "vtable target")
 /* These two kinds must be the last to be
  * declared. This is to make sure the string
  * array created with the template can be
  * indexed with the kind value.
  */
 VALUE_PROF_KIND(IPVK_First, IPVK_IndirectCallTarget, "first")
-VALUE_PROF_KIND(IPVK_Last, IPVK_MemOPSize, "last")
+VALUE_PROF_KIND(IPVK_Last, IPVK_VTableTarget, "last")
 
 #undef VALUE_PROF_KIND
 /* VALUE_PROF_KIND end */
@@ -270,12 +286,18 @@ INSTR_PROF_SECT_ENTRY(IPSK_cnts, \
 INSTR_PROF_SECT_ENTRY(IPSK_name, \
                       INSTR_PROF_QUOTE(INSTR_PROF_NAME_COMMON), \
                       INSTR_PROF_NAME_COFF, "__DATA,")
+INSTR_PROF_SECT_ENTRY(IPSK_vname, \
+                      INSTR_PROF_QUOTE(INSTR_PROF_VNAME_COMMON), \
+                      INSTR_PROF_VNAME_COFF, "__DATA,")
 INSTR_PROF_SECT_ENTRY(IPSK_vals, \
                       INSTR_PROF_QUOTE(INSTR_PROF_VALS_COMMON), \
                       INSTR_PROF_VALS_COFF, "__DATA,")
 INSTR_PROF_SECT_ENTRY(IPSK_vnodes, \
                       INSTR_PROF_QUOTE(INSTR_PROF_VNODES_COMMON), \
                       INSTR_PROF_VNODES_COFF, "__DATA,")
+INSTR_PROF_SECT_ENTRY(IPSK_vtab, \
+                      INSTR_PROF_QUOTE(INSTR_PROF_VTAB_COMMON), \
+                      INSTR_PROF_VTAB_COFF, "__DATA,")
 INSTR_PROF_SECT_ENTRY(IPSK_covmap, \
                       INSTR_PROF_QUOTE(INSTR_PROF_COVMAP_COMMON), \
                       INSTR_PROF_COVMAP_COFF, "__LLVM_COV,")
@@ -646,9 +668,9 @@ serializeValueProfDataFrom(ValueProfRecordClosure *Closure,
 
 /* FIXME: Please remedy the fixme in the header before bumping the version. */
 /* Raw profile format version (start from 1). */
-#define INSTR_PROF_RAW_VERSION 8
+#define INSTR_PROF_RAW_VERSION 9
 /* Indexed profile format version (start from 1). */
-#define INSTR_PROF_INDEX_VERSION 10
+#define INSTR_PROF_INDEX_VERSION 11
 /* Coverage mapping format version (start from 0). */
 #define INSTR_PROF_COVMAP_VERSION 5
 
@@ -686,9 +708,11 @@ serializeValueProfDataFrom(ValueProfRecordClosure *Closure,
    than WIN32 */
 #define INSTR_PROF_DATA_COMMON __llvm_prf_data
 #define INSTR_PROF_NAME_COMMON __llvm_prf_names
+#define INSTR_PROF_VNAME_COMMON __llvm_prf_vnames
 #define INSTR_PROF_CNTS_COMMON __llvm_prf_cnts
 #define INSTR_PROF_VALS_COMMON __llvm_prf_vals
 #define INSTR_PROF_VNODES_COMMON __llvm_prf_vnds
+#define INSTR_PROF_VTAB_COMMON __llvm_prf_vtab
 #define INSTR_PROF_COVMAP_COMMON __llvm_covmap
 #define INSTR_PROF_COVFUN_COMMON __llvm_covfun
 #define INSTR_PROF_ORDERFILE_COMMON __llvm_orderfile
@@ -697,9 +721,11 @@ serializeValueProfDataFrom(ValueProfRecordClosure *Closure,
  */
 #define INSTR_PROF_DATA_COFF ".lprfd$M"
 #define INSTR_PROF_NAME_COFF ".lprfn$M"
+#define INSTR_PROF_VNAME_COFF ".lprfn$M"
 #define INSTR_PROF_CNTS_COFF ".lprfc$M"
 #define INSTR_PROF_VALS_COFF ".lprfv$M"
 #define INSTR_PROF_VNODES_COFF ".lprfnd$M"
+#define INSTR_PROF_VTAB_COFF ".lprfvt$M"
 #define INSTR_PROF_COVMAP_COFF ".lcovmap$M"
 #define INSTR_PROF_COVFUN_COFF ".lcovfun$M"
 #define INSTR_PROF_ORDERFILE_COFF ".lorderfile$M"
diff --git a/compiler-rt/lib/profile/InstrProfiling.h b/compiler-rt/lib/profile/InstrProfiling.h
index 4433d7bd48871fc..f3afa694e02c27d 100644
--- a/compiler-rt/lib/profile/InstrProfiling.h
+++ b/compiler-rt/lib/profile/InstrProfiling.h
@@ -38,6 +38,12 @@ typedef struct ValueProfNode {
 #include "profile/InstrProfData.inc"
 } ValueProfNode;
 
+typedef void *IntPtrT;
+typedef struct VTableProfData {
+#define INSTR_PROF_VTABLE_DATA(Type, LLVMType, Name, Initializer) Type Name;
+#include "profile/InstrProfData.inc"
+} VTableProfData;
+
 /*!
  * \brief Return 1 if profile counters are continuously synced to the raw
  * profile via an mmap(). This is in contrast to the default mode, in which
@@ -86,10 +92,14 @@ const __llvm_profile_data *__llvm_profile_begin_data(void);
 const __llvm_profile_data *__llvm_profile_end_data(void);
 const char *__llvm_profile_begin_names(void);
 const char *__llvm_profile_end_names(void);
+const char *__llvm_profile_begin_vnames(void);
+const char *__llvm_profile_end_vnames(void);
 char *__llvm_profile_begin_counters(void);
 char *__llvm_profile_end_counters(void);
 ValueProfNode *__llvm_profile_begin_vnodes();
 ValueProfNode *__llvm_profile_end_vnodes();
+VTableProfData *__llvm_profile_begin_vtables();
+VTableProfData *__llvm_profile_end_vtables();
 uint32_t *__llvm_profile_begin_orderfile();
 
 /*!
@@ -276,6 +286,12 @@ uint64_t __llvm_profile_get_num_counters(const char *Begin, const char *End);
 /*! \brief Get the size of the profile counters section in bytes. */
 uint64_t __llvm_profile_get_counters_size(const char *Begin, const char *End);
 
+uint64_t __llvm_profile_get_num_vtable(const VTableProfData *Begin,
+                                       const VTableProfData *End);
+
+uint64_t __llvm_profile_get_vtable_size(const VTableProfData *Begin,
+                                        const VTableProfData *End);
+
 /* ! \brief Given the sizes of the data and counter information, return the
  * number of padding bytes before and after the counters, and after the names,
  * in the raw profile.
@@ -287,8 +303,10 @@ uint64_t __llvm_profile_get_counters_size(const char *Begin, const char *End);
  */
 void __llvm_profile_get_padding_sizes_for_counters(
     uint64_t DataSize, uint64_t CountersSize, uint64_t NamesSize,
+    uint64_t VTableSize, uint64_t VNameSize,
     uint64_t *PaddingBytesBeforeCounters, uint64_t *PaddingBytesAfterCounters,
-    uint64_t *PaddingBytesAfterNames);
+    uint64_t *PaddingBytesAfterNames, uint64_t *PaddingBytesAfterVTable,
+    uint64_t *PaddingBytesAfterVNames);
 
 /*!
  * \brief Set the flag that profile data has been dumped to the file.
diff --git a/compiler-rt/lib/profile/InstrProfilingBuffer.c b/compiler-rt/lib/profile/InstrProfilingBuffer.c
index 61ac5d9c0285002..0c36e40444c7344 100644
--- a/compiler-rt/lib/profile/InstrProfilingBuffer.c
+++ b/compiler-rt/lib/profile/InstrProfilingBuffer.c
@@ -9,6 +9,8 @@
 // Note: This is linked into the Darwin kernel, and must remain compatible
 // with freestanding compilation. See `darwin_add_builtin_libraries`.
 
+#include <assert.h>
+
 #include "InstrProfiling.h"
 #include "InstrProfilingInternal.h"
 #include "InstrProfilingPort.h"
@@ -45,9 +47,14 @@ uint64_t __llvm_profile_get_size_for_buffer(void) {
   const char *CountersEnd = __llvm_profile_end_counters();
   const char *NamesBegin = __llvm_profile_begin_names();
   const char *NamesEnd = __llvm_profile_end_names();
+  const VTableProfData *VTableBegin = __llvm_profile_begin_vtables();
+  const VTableProfData *VTableEnd = __llvm_profile_end_vtables();
+  const char *VNamesBegin = __llvm_profile_begin_vnames();
+  const char *VNamesEnd = __llvm_profile_end_vnames();
 
   return __llvm_profile_get_size_for_buffer_internal(
-      DataBegin, DataEnd, CountersBegin, CountersEnd, NamesBegin, NamesEnd);
+      DataBegin, DataEnd, CountersBegin, CountersEnd, NamesBegin, NamesEnd,
+      VTableBegin, VTableEnd, VNamesBegin, VNamesEnd);
 }
 
 COMPILER_RT_VISIBILITY
@@ -63,6 +70,18 @@ uint64_t __llvm_profile_get_data_size(const __llvm_profile_data *Begin,
                                       const __llvm_profile_data *End) {
   return __llvm_profile_get_num_data(Begin, End) * sizeof(__llvm_profile_data);
 }
+COMPILER_RT_VISIBILITY
+uint64_t __llvm_profile_get_num_vtable(const VTableProfData *Begin,
+                                       const VTableProfData *End) {
+  intptr_t EndI = (intptr_t)End, BeginI = (intptr_t)Begin;
+  return (EndI + sizeof(VTableProfData) - 1 - BeginI) / sizeof(VTableProfData);
+}
+
+COMPILER_RT_VISIBILITY
+uint64_t __llvm_profile_get_vtable_size(const VTableProfData *Begin,
+                                        const VTableProfData *End) {
+  return __llvm_profile_get_num_vtable(Begin, End) * sizeof(VTableProfData);
+}
 
 COMPILER_RT_VISIBILITY size_t __llvm_profile_counter_entry_size(void) {
   if (__llvm_profile_get_version() & VARIANT_MASK_BYTE_COVERAGE)
@@ -103,46 +122,68 @@ static int needsCounterPadding(void) {
 COMPILER_RT_VISIBILITY
 void __llvm_profile_get_padding_sizes_for_counters(
     uint64_t DataSize, uint64_t CountersSize, uint64_t NamesSize,
+    uint64_t VTableSize, uint64_t VNameSize,
     uint64_t *PaddingBytesBeforeCounters, uint64_t *PaddingBytesAfterCounters,
-    uint64_t *PaddingBytesAfterNames) {
+    uint64_t *PaddingBytesAfterNames, uint64_t *PaddingBytesAfterVTable,
+    uint64_t *PaddingBytesAfterVName) {
+  // Counter padding is needed only if continuous mode is enabled.
   if (!needsCounterPadding()) {
     *PaddingBytesBeforeCounters = 0;
     *PaddingBytesAfterCounters =
         __llvm_profile_get_num_padding_bytes(CountersSize);
     *PaddingBytesAfterNames = __llvm_profile_get_num_padding_bytes(NamesSize);
+    *PaddingBytesAfterVTable = __llvm_profile_get_num_padding_bytes(VTableSize);
+    *PaddingBytesAfterVName = __llvm_profile_get_num_padding_bytes(VNameSize);
     return;
   }
 
+  // Value profiling not supported in continuous mode at profile-write time
+  // according to
+  // https://github.com/llvm/llvm-project/blob/e6a007f6b51a661ed3dd8b0210b734b3e9b4354f/compiler-rt/lib/profile/InstrProfilingWriter.c#L328
+  assert(VTableSize == 0 && VNameSize == 0 &&
+         "Value profile not supported for continuous mode");
   // In continuous mode, the file offsets for headers and for the start of
   // counter sections need to be page-aligned.
   *PaddingBytesBeforeCounters =
       calculateBytesNeededToPageAlign(sizeof(__llvm_profile_header) + DataSize);
   *PaddingBytesAfterCounters = calculateBytesNeededToPageAlign(CountersSize);
   *PaddingBytesAfterNames = calculateBytesNeededToPageAlign(NamesSize);
+  // Set these two variables to zero to avoid uninitialized variables
+  // even if VTableSize and VNameSize are asserted to be zero.
+  *PaddingBytesAfterVTable = 0;
+  *PaddingBytesAfterVName = 0;
 }
 
 COMPILER_RT_VISIBILITY
 uint64_t __llvm_profile_get_size_for_buffer_internal(
     const __llvm_profile_data *DataBegin, const __llvm_profile_data *DataEnd,
     const char *CountersBegin, const char *CountersEnd, const char *NamesBegin,
-    const char *NamesEnd) {
+    const char *NamesEnd, const VTableProfData *VTableBegin,
+    const VTableProfData *VTableEnd, const char *VNamesBegin,
+    const char *VNamesEnd) {
   /* Match logic in __llvm_profile_write_buffer(). */
   const uint64_t NamesSize = (NamesEnd - NamesBegin) * sizeof(char);
   uint64_t DataSize = __llvm_profile_get_data_size(DataBegin, DataEnd);
   uint64_t CountersSize =
       __llvm_profile_get_counters_size(CountersBegin, CountersEnd);
+  uint64_t VTableSize = __llvm_profile_get_vtable_size(VTableBegin, VTableEnd);
+  uint64_t VNameSize = (VNamesEnd - VNamesBegin) * sizeof(char);
 
   /* Determine how much padding is needed before/after the counters and after
    * the names. */
   uint64_t PaddingBytesBeforeCounters, PaddingBytesAfterCounters,
-      PaddingBytesAfterNames;
+      PaddingBytesAfterNames, PaddingBytesAfterVTable, PaddingBytesAfterVNames;
   __llvm_profile_get_padding_sizes_for_counters(
-      DataSize, CountersSize, NamesSize, &PaddingBytesBeforeCounters,
-      &PaddingBytesAfterCounters, &PaddingBytesAfterNames);
+      DataSize, CountersSize, NamesSize, VTableSize, VNameSize,
+      &PaddingBytesBeforeCounters, &PaddingBytesAfterCounters,
+      &PaddingBytesAfterNames, &PaddingBytesAfterVTable,
+      &PaddingBytesAfterVNames);
 
   return sizeof(__llvm_profile_header) + __llvm_write_binary_ids(NULL) +
          DataSize + PaddingBytesBeforeCounters + CountersSize +
-         PaddingBytesAfterCounters + NamesSize + PaddingBytesAfterNames;
+         PaddingBytesAfterCounters + NamesSize + PaddingBytesAfterNames +
+         VTableSize + PaddingBytesAfterVTable + VNameSize +
+         PaddingBytesAfterVNames;
 }
 
 COMPILER_RT_VISIBILITY
@@ -163,6 +204,9 @@ COMPILER_RT_VISIBILITY int __llvm_profile_write_buffer_internal(
     const char *CountersEnd, const char *NamesBegin, const char *NamesEnd) {
   ProfDataWriter BufferWriter;
   initBufferWriter(&BufferWriter, Buffer);
+  // Set virtual table arguments to NULL since they are not supported yet.
   return lprofWriteDataImpl(&BufferWriter, DataBegin, DataEnd, CountersBegin,
-                            CountersEnd, 0, NamesBegin, NamesEnd, 0);
+                            CountersEnd, 0, NamesBegin, NamesEnd,
+                            NULL /* VTableBegin */, NULL /* VTableEnd */,
+                            NULL /* VNamesBegin */, NULL /* VNamesEnd */, 0);
 }
diff --git a/compiler-rt/lib/profile/InstrProfilingInternal.h b/compiler-rt/lib/profile/InstrProfilingInternal.h
index 360165e32ab3fe2..bce333f933f0ffe 100644
--- a/compiler-rt/lib/profile/InstrProfilingInternal.h
+++ b/compiler-rt/lib/profile/InstrProfilingInternal.h
@@ -18,11 +18,16 @@
  * pointers to the live data in memory.  This function is probably not what you
  * want.  Use __llvm_profile_get_size_for_buffer instead.  Use this function if
  * your program has a custom memory layout.
+ * NOTE: The change of function signature requires modifying c source code
+ * as demonstrated by the existing tests. If this is causing backward
+ * compatible issues, considering adding another function for new use cases.
  */
 uint64_t __llvm_profile_get_size_for_buffer_internal(
     const __llvm_profile_data *DataBegin, const __llvm_profile_data *DataEnd,
     const char *CountersBegin, const char *CountersEnd, const char *NamesBegin,
-    const char *NamesEnd);
+    const char *NamesEnd, const VTableProfData *VTableBegin,
+    const VTableProfData *VTableEnd, const char *VNamesBegin,
+    const char *VNamesEnd);
 
 /*!
  * \brief Write instrumentation data to the given buffer, given explicit
@@ -154,7 +159,9 @@ int lprofWriteDataImpl(ProfDataWriter *Writer,
                        const __llvm_profile_data *DataEnd,
                        const char *CountersBegin, const char *CountersEnd,
                        VPDataReaderType *VPDataReader, const char *NamesBegin,
-                       const char *NamesEnd, int SkipNameDataWrite);
+                       const char *NamesEnd, const VTableProfData *VTableBegin,
+                       const VTableProfData *VTableEnd, const char *VNamesBegin,
+                       const char *VNamesEnd, int SkipNameDataWrite);
 
 /* Merge value profile data pointed to by SrcValueProfData into
  * in-memory profile counters pointed by to DstData.  */
diff --git a/compiler-rt/lib/profile/InstrProfilingMerge.c b/compiler-rt/lib/profile/InstrProfilingMerge.c
index 9cf12f251f7262d..2ef6227599ff139 100644
--- a/compiler-rt/lib/profile/InstrProfilingMerge.c
+++ b/compiler-rt/lib/profile/InstrProfilingMerge.c
@@ -124,9 +124,27 @@ int __llvm_profile_merge_from_buffer(const char *ProfileData,
   SrcCountersEnd = SrcCountersStart +
                    Header->NumCounters * __llvm_profile_counter_entry_size();
   SrcNameStart = SrcCountersEnd;
-  SrcValueProfDataStart =
+  // This is to assume counter size is a multiple of 8 bytes.
+  // uint64_t NamesSize = Header->NamesSize;
+  // uint64_t PaddingBytesAfterNames =
+  //    __llvm_profile_get_num_padding_bytes(Header->NamesSize);
+  // First, skip rather than merge them
+  uint64_t VTableSectionSize = Header->NumVTables * sizeof(VTableProfData);
+  uint64_t PaddingBytesAfterVTableSection =
+      __llvm_profile_get_num_padding_bytes(VTableSectionSize);
+  uint64_t VNamesSize = Header->VNamesSize;
+  uint64_t PaddingBytesAfterVNamesSize =
+      __llvm_profile_get_num_padding_bytes(VNamesSize);
+
+  uint64_t VTableProfDataOffset =
       SrcNameStart + Header->NamesSize +
       __llvm_profile_get_num_padding_bytes(Header->NamesSize);
+
+  uint64_t VTableNamesOffset =
+      VTableProfDataOffset + VTableSectionSize + PaddingBytesAfterVTableSection;
+
+  SrcValueProfDataStart =
+      VTableNamesOffset + VNamesSize + PaddingBytesAfterVNamesSize;
   if (SrcNameStart < SrcCountersStart)
     return 1;
 
diff --git a/compiler-rt/lib/profile/InstrProfilingPlatformLinux.c b/compiler-rt/lib/profile/InstrProfilingPlatformLinux.c
index 2cce0a4b2c48d35..dc861632271ce79 100644
--- a/compiler-rt/lib/profile/InstrProfilingPlatformLinux.c
+++ b/compiler-rt/lib/profile/InstrProfilingPlatformLinux.c
@@ -33,8 +33,12 @@
 #define PROF_DATA_STOP INSTR_PROF_SECT_STOP(INSTR_PROF_DATA_COMMON)
 #define PROF_NAME_START INSTR_PROF_SECT_START(INSTR_PROF_NAME_COMMON)
 #define PROF_NAME_STOP INSTR_PROF_SECT_STOP(INSTR_PROF_NAME_COMMON)
+#define PROF_VNAME_START INSTR_PROF_SECT_START(INSTR_PROF_VNAME_COMMON)
+#define PROF_VNAME_STOP INSTR_PROF_SECT_STOP(INSTR_PROF_VNAME_COMMON)
 #define PROF_CNTS_START INSTR_PROF_SECT_START(INSTR_PROF_CNTS_COMMON)
 #define PROF_CNTS_STOP INSTR_PROF_SECT_STOP(INSTR_PROF_CNTS_COMMON)
+#define PROF_VTABLE_START INSTR_PROF_SECT_START(INSTR_PROF_VTAB_COMMON)
+#define PROF_VTABLE_STOP INSTR_PROF_SECT_STOP(INSTR_PROF_VTAB_COMMON)
 #define PROF_ORDERFILE_START INSTR_PROF_SECT_START(INSTR_PROF_ORDERFILE_COMMON)
 #define PROF_VNODES_START INSTR_PROF_SECT_START(INSTR_PROF_VNODES_COMMON)
 #define PROF_VNODES_STOP INSTR_PROF_SECT_STOP(INSTR_PROF_VNODES_COMMON)
@@ -48,6 +52,10 @@ extern __llvm_profile_data PROF_DATA_STOP COMPILER_RT_VISIBILITY
     COMPILER_RT_WEAK;
 extern char PROF_CNTS_START COMPILER_RT_VISIBILITY COMPILER_RT_WEAK;
 extern char PROF_CNTS_STOP COMPILER_RT_VISIBILITY COMPILER_RT_WEAK;
+extern VTableProfData PROF_VTABLE_START COMPILER_RT_VISIBILITY COMPILER_RT_WEAK;
+extern VTableProfData PROF_VTABLE_STOP COMPILER_RT_VISIBILITY COMPILER_RT_WEAK;
+extern char PROF_VNAME_START COMPILER_RT_VISIBILITY COMPILER_RT_WEAK;
+extern char PROF_VNAME_STOP COMPILER_RT_VISIBILITY COMPILER_RT_WEAK;
 extern uint32_t PROF_ORDERFILE_START COMPILER_RT_VISIBILITY COMPILER_RT_WEAK;
 extern char PROF_NAME_START COMPILER_RT_VISIBILITY COMPILER_RT_WEAK;
 extern char PROF_NAME_STOP COMPILER_RT_VISIBILITY COMPILER_RT_WEAK;
@@ -68,6 +76,18 @@ COMPILER_RT_VISIBILITY const char *__llvm_profile_begin_names(void) {
 COMPILER_RT_VISIBILITY const char *__llvm_profile_end_names(void) {
   return &PROF_NAME_STOP;
 }
+COMPILER_RT_VISIBILITY const char *__llvm_profile_begin_vnames(void) {
+  return &PROF_VNAME_START;
+}
+COMPILER_RT_VISIBILITY const char *__llvm_profile_end_vnames(void) {
+  return &PROF_VNAME_STOP;
+}
+COMPILER_RT_VISIBILITY VTableProfData *__llvm_profile_begin_vtables(void) {
+  return &PROF_VTABLE_START;
+}
+COMPILER_RT_VISIBILITY VTableProfData *__llvm_profile_end_vtables(void) {
+  return &PROF_VTABLE_STOP;
+}
 COMPILER_RT_VISIBILITY char *__llvm_profile_begin_counters(void) {
   return &PROF_CNTS_START;
 }
diff --git a/compiler-rt/lib/profile/InstrProfilingWriter.c b/compiler-rt/lib/profile/InstrProfilingWriter.c
index 1e22398a4c0f64a..ed5dbdb6ee4383f 100644
--- a/compiler-rt/lib/profile/InstrProfilingWriter.c
+++ b/compiler-rt/lib/profile/InstrProfilingWriter.c
@@ -248,8 +248,13 @@ COMPILER_RT_VISIBILITY int lprofWriteData(ProfDataWriter *Writer,
   const char *CountersEnd = __llvm_profile_end_counters();
   const char *NamesBegin = __llvm_profile_begin_names();
   const char *NamesEnd = __llvm_profile_end_names();
+  const VTableProfData *VTableBegin = __llvm_profile_begin_vtables();
+  const VTableProfData *VTableEnd = __llvm_profile_end_vtables();
+  const char *VNamesBegin = __llvm_profile_begin_vnames();
+  const char *VNamesEnd = __llvm_profile_end_vnames();
   return lprofWriteDataImpl(Writer, DataBegin, DataEnd, CountersBegin,
                             CountersEnd, VPDataReader, NamesBegin, NamesEnd,
+                            VTableBegin, VTableEnd, VNamesBegin, VNamesEnd,
                             SkipNameDataWrite);
 }
 
@@ -258,7 +263,9 @@ lprofWriteDataImpl(ProfDataWriter *Writer, const __llvm_profile_data *DataBegin,
                    const __llvm_profile_data *DataEnd,
                    const char *CountersBegin, const char *CountersEnd,
                    VPDataReaderType *VPDataReader, const char *NamesBegin,
-                   const char *NamesEnd, int SkipNameDataWrite) {
+                   const char *NamesEnd, const VTableProfData *VTableBegin,
+                   const VTableProfData *VTableEnd, const char *VNamesBegin,
+                   const char *VNamesEnd, int SkipNameDataWrite) {
   int DebugInfoCorrelate =
       (__llvm_profile_get_version() & VARIANT_MASK_DBG_CORRELATE) != 0ULL;
 
@@ -272,6 +279,13 @@ lprofWriteDataImpl(ProfDataWriter *Writer, const __llvm_profile_data *DataBegin,
   const uint64_t NumCounters =
       __llvm_profile_get_num_counters(CountersBegin, CountersEnd);
   const uint64_t NamesSize = DebugInfoCorrelate ? 0 : NamesEnd - NamesBegin;
+  const uint64_t NumVTables =
+      __llvm_profile_get_num_vtable(VTableBegin, VTableEnd);
+  const uint64_t VTableSectionSize =
+      __llvm_profile_get_vtable_size(VTableBegin, VTableEnd);
+  // Note, in reality, vtable profiling is not supported when DebugInfoCorrelate
+  // is true.
+  const uint64_t VNamesSize = DebugInfoCorrelate ? 0 : VNamesEnd - VNamesBegin;
 
   /* Create the header. */
   __llvm_profile_header Header;
@@ -279,11 +293,12 @@ lprofWriteDataImpl(ProfDataWriter *Writer, const __llvm_profile_data *DataBegin,
   /* Determine how much padding is needed before/after the counters and after
    * the names. */
   uint64_t PaddingBytesBeforeCounters, PaddingBytesAfterCounters,
-      PaddingBytesAfterNames;
+      PaddingBytesAfterNames, PaddingBytesAfterVTable, PaddingBytesAfterVNames;
   __llvm_profile_get_padding_sizes_for_counters(
-      DataSectionSize, CountersSectionSize, NamesSize,
-      &PaddingBytesBeforeCounters, &PaddingBytesAfterCounters,
-      &PaddingBytesAfterNames);
+      DataSectionSize, CountersSectionSize, NamesSize, VTableSectionSize,
+      VNamesSize, &PaddingBytesBeforeCounters, &PaddingBytesAfterCounters,
+      &PaddingBytesAfterNames, &PaddingBytesAfterVTable,
+      &PaddingBytesAfterVNames);
 
   {
 /* Initialize header structure.  */
@@ -305,13 +320,24 @@ lprofWriteDataImpl(ProfDataWriter *Writer, const __llvm_profile_data *DataBegin,
 
   /* Write the profile header. */
   ProfDataIOVec IOVec[] = {{&Header, sizeof(__llvm_profile_header), 1, 0}};
+  // printf("Size of profile header is %d\n",
+  // (int)(sizeof(__llvm_profile_header)));
   if (Writer->Write(Writer, IOVec, sizeof(IOVec) / sizeof(*IOVec)))
     return -1;
 
+  // printf("Completed profile header\n");
+
   /* Write the binary id lengths and data. */
-  if (__llvm_write_binary_ids(Writer) == -1)
+  int binary_id_size = __llvm_write_binary_ids(Writer);
+  if (binary_id_size == -1)
     return -1;
 
+  // Might be needed for debugging. Clean up before commit.
+  // uint64_t VTableProfDataOffset =
+  //    sizeof(__llvm_profile_header) + binary_id_size + DataSectionSize +
+  //    PaddingBytesBeforeCounters + CountersSectionSize +
+  //    PaddingBytesAfterCounters + NamesSize + PaddingBytesAfterNames;
+
   /* Write the profile data. */
   ProfDataIOVec IOVecData[] = {
       {DebugInfoCorrelate ? NULL : DataBegin, sizeof(uint8_t), DataSectionSize,
@@ -321,7 +347,12 @@ lprofWriteDataImpl(ProfDataWriter *Writer, const __llvm_profile_data *DataBegin,
       {NULL, sizeof(uint8_t), PaddingBytesAfterCounters, 1},
       {(SkipNameDataWrite || DebugInfoCorrelate) ? NULL : NamesBegin,
        sizeof(uint8_t), NamesSize, 0},
-      {NULL, sizeof(uint8_t), PaddingBytesAfterNames, 1}};
+      {NULL, sizeof(uint8_t), PaddingBytesAfterNames, 1},
+      {VTableBegin, sizeof(uint8_t), VTableSectionSize, 0},
+      {NULL, sizeof(uint8_t), PaddingBytesAfterVTable, 1},
+      {(SkipNameDataWrite || DebugInfoCorrelate) ? NULL : VNamesBegin,
+       sizeof(uint8_t), VNamesSize, 0},
+      {NULL, sizeof(uint8_t), PaddingBytesAfterVNames, 1}};
   if (Writer->Write(Writer, IOVecData, sizeof(IOVecData) / sizeof(*IOVecData)))
     return -1;
 
diff --git a/compiler-rt/test/profile/Linux/instrprof-value-prof-warn.test b/compiler-rt/test/profile/Linux/instrprof-value-prof-warn.test
index 991fd2b69b5b303..89787e79c20e9d1 100644
--- a/compiler-rt/test/profile/Linux/instrprof-value-prof-warn.test
+++ b/compiler-rt/test/profile/Linux/instrprof-value-prof-warn.test
@@ -1,7 +1,7 @@
 RUN: %clang_pgogen -O2 -fuse-ld=bfd -mllvm -disable-vp=false -mllvm -vp-static-alloc=true -DSTRESS=1 -o %t.ir.warn  %S/../Inputs/instrprof-value-prof-real.c
 RUN: env LLVM_PROFILE_FILE=%t.ir.profraw LLVM_VP_MAX_NUM_VALS_PER_SITE=255  %run %t.ir.warn 2>&1 |FileCheck --check-prefix=WARNING %s
 #  Test that enough static counters have been allocated
-RUN: env LLVM_PROFILE_FILE=%t.ir.profraw LLVM_VP_MAX_NUM_VALS_PER_SITE=130  %run %t.ir.warn 2>&1 |FileCheck --check-prefix=NOWARNING --allow-empty %s
+RUN: env LLVM_PROFILE_FILE=%t.ir.profraw LLVM_VP_MAX_NUM_VALS_PER_SITE=80  %run %t.ir.warn 2>&1 |FileCheck --check-prefix=NOWARNING --allow-empty %s
 
 # WARNING: LLVM Profile Warning:
 # NOWARNING-NOT: LLVM Profile Warning:
diff --git a/compiler-rt/test/profile/instrprof-write-buffer-internal.c b/compiler-rt/test/profile/instrprof-write-buffer-internal.c
index 7b96c6d91c33f5a..97dfff8d7fe71fb 100644
--- a/compiler-rt/test/profile/instrprof-write-buffer-internal.c
+++ b/compiler-rt/test/profile/instrprof-write-buffer-internal.c
@@ -28,7 +28,9 @@ char *__llvm_profile_end_counters(void);
 
 uint64_t __llvm_profile_get_size_for_buffer_internal(
     const void *DataBegin, const void *DataEnd, const char *CountersBegin,
-    const char *CountersEnd, const char *NamesBegin, const char *NamesEnd);
+    const char *CountersEnd, const char *NamesBegin, const char *NamesEnd,
+    const char *VTableBegin, const char *VTableEnd, const char *VNamesBegin,
+    const char *VNamesEnd);
 
 int __llvm_profile_write_buffer_internal(char *Buffer, const void *DataBegin,
                                          const void *DataEnd,
@@ -43,7 +45,11 @@ int main(int argc, const char *argv[]) {
   uint64_t bufsize = __llvm_profile_get_size_for_buffer_internal(
       __llvm_profile_begin_data(), __llvm_profile_end_data(),
       __llvm_profile_begin_counters(), __llvm_profile_end_counters(),
-      __llvm_profile_begin_names(), __llvm_profile_end_names());
+      __llvm_profile_begin_names(), __llvm_profile_end_names(), NULL, NULL,
+      NULL, NULL);
+
+  // printf("buffer size is %lld\n", bufsize);
+  //uint64_t aligned_bufsize = ((bufsize + 32) >> 6) << 6;
 
   char *buf = malloc(bufsize);
   int ret = __llvm_profile_write_buffer_internal(buf,
diff --git a/llvm/include/llvm/Analysis/IndirectCallVisitor.h b/llvm/include/llvm/Analysis/IndirectCallVisitor.h
index 0825e19ecd2d240..52d4ff04d3d4ecc 100644
--- a/llvm/include/llvm/Analysis/IndirectCallVisitor.h
+++ b/llvm/include/llvm/Analysis/IndirectCallVisitor.h
@@ -19,20 +19,38 @@ namespace llvm {
 // Visitor class that finds all indirect call.
 struct PGOIndirectCallVisitor : public InstVisitor<PGOIndirectCallVisitor> {
   std::vector<CallBase *> IndirectCalls;
+  std::vector<Instruction *> VTableAddrs;
   PGOIndirectCallVisitor() = default;
 
   void visitCallBase(CallBase &Call) {
     if (Call.isIndirectCall())
-      IndirectCalls.push_back(&Call);
+      if (Call.isIndirectCall()) {
+        IndirectCalls.push_back(&Call);
+
+        LoadInst *LI = dyn_cast<LoadInst>(Call.getCalledOperand());
+        if (LI != nullptr) {
+          Value *Ptr = LI->getPointerOperand();
+          Value *VTablePtr = Ptr->stripInBoundsConstantOffsets();
+          if (VTablePtr != nullptr && isa<Instruction>(VTablePtr)) {
+            VTableAddrs.push_back(cast<Instruction>(VTablePtr));
+          }
+        }
+      }
   }
 };
 
-// Helper function that finds all indirect call sites.
 inline std::vector<CallBase *> findIndirectCalls(Function &F) {
   PGOIndirectCallVisitor ICV;
   ICV.visit(F);
   return ICV.IndirectCalls;
 }
+
+inline std::vector<Instruction *> findVTableAddrs(Function &F) {
+  PGOIndirectCallVisitor ICV;
+  ICV.visit(F);
+  return ICV.VTableAddrs;
+}
+
 } // namespace llvm
 
 #endif
diff --git a/llvm/include/llvm/ProfileData/InstrProf.h b/llvm/include/llvm/ProfileData/InstrProf.h
index f9096b46157200b..e5af3767fb746a4 100644
--- a/llvm/include/llvm/ProfileData/InstrProf.h
+++ b/llvm/include/llvm/ProfileData/InstrProf.h
@@ -90,6 +90,9 @@ inline StringRef getInstrProfValueProfMemOpFuncName() {
 /// Return the name prefix of variables containing instrumented function names.
 inline StringRef getInstrProfNameVarPrefix() { return "__profn_"; }
 
+/// Return the name prefix of variables containing virtual table profile data.
+inline StringRef getInstrProfVTableVarPrefix() { return "__profvt_"; }
+
 /// Return the name prefix of variables containing per-function control data.
 inline StringRef getInstrProfDataVarPrefix() { return "__profd_"; }
 
@@ -108,6 +111,8 @@ inline StringRef getInstrProfNamesVarName() {
   return "__llvm_prf_nm";
 }
 
+inline StringRef getInstrProfVTableNamesVarName() { return "__llvm_prf_vnm"; }
+
 /// Return the name of a covarage mapping variable (internal linkage)
 /// for each instrumented source module. Such variables are allocated
 /// in the __llvm_covmap section.
@@ -237,11 +242,16 @@ Error collectPGOFuncNameStrings(ArrayRef<std::string> NameStrs,
 Error collectPGOFuncNameStrings(ArrayRef<GlobalVariable *> NameVars,
                                 std::string &Result, bool doCompression = true);
 
+Error collectVTableStrings(ArrayRef<GlobalVariable *> VTables,
+                           std::string &Result, bool doCompression);
+
 /// \c NameStrings is a string composed of one of more sub-strings encoded in
 /// the format described above. The substrings are separated by 0 or more zero
 /// bytes. This method decodes the string and populates the \c Symtab.
 Error readPGOFuncNameStrings(StringRef NameStrings, InstrProfSymtab &Symtab);
 
+Error readVTableNames(StringRef NameStrings, InstrProfSymtab &Symtab);
+
 /// Check if INSTR_PROF_RAW_VERSION_VAR is defined. This global is only being
 /// set in IR PGO compilation.
 bool isIRPGOFlagSet(const Module *M);
@@ -291,7 +301,7 @@ void createPGOFuncNameMetadata(Function &F, StringRef PGOFuncName);
 
 /// Check if we can use Comdat for profile variables. This will eliminate
 /// the duplicated profile variables for Comdat functions.
-bool needsComdatForCounter(const Function &F, const Module &M);
+bool needsComdatForCounter(const GlobalValue &GV, const Module &M);
 
 /// An enum describing the attributes of an instrumented profile.
 enum class InstrProfKind {
@@ -429,14 +439,24 @@ class InstrProfSymtab {
   uint64_t Address = 0;
   // Unique name strings.
   StringSet<> NameTab;
+  // Unique virtual table names.
+  StringSet<> VTableNames;
   // A map from MD5 keys to function name strings.
   std::vector<std::pair<uint64_t, StringRef>> MD5NameMap;
+  // A map from MD5 keys to virtual table definitions. Only populated when
+  // building the Symtab from a module.
+  std::vector<std::pair<uint64_t, GlobalVariable *>> MD5VTableMap;
   // A map from MD5 keys to function define. We only populate this map
   // when build the Symtab from a Module.
   std::vector<std::pair<uint64_t, Function *>> MD5FuncMap;
   // A map from function runtime address to function name MD5 hash.
   // This map is only populated and used by raw instr profile reader.
   AddrHashMap AddrToMD5Map;
+  // A map from virtual table runtime address to function name MD5 hash.
+  // This map is only populated and used by raw instr profile reader.
+  // This is a different map from 'AddrToMD5Map' for readability and
+  // debuggability.
+  AddrHashMap VTableAddrToMD5Map;
   bool Sorted = false;
 
   static StringRef getExternalSymbol() {
@@ -471,6 +491,8 @@ class InstrProfSymtab {
   /// This method is a wrapper to \c readPGOFuncNameStrings method.
   inline Error create(StringRef NameStrings);
 
+  inline Error create(StringRef FuncNameStrings, StringRef VTableNameStrings);
+
   /// A wrapper interface to populate the PGO symtab with functions
   /// decls from module \c M. This interface is used by transformation
   /// passes such as indirect function call promotion. Variable \c InLTO
@@ -481,6 +503,13 @@ class InstrProfSymtab {
   /// \p IterRange. This interface is used by IndexedProfReader.
   template <typename NameIterRange> Error create(const NameIterRange &IterRange);
 
+  /// Create InstrProfSymtab from a set of function names and vtable
+  /// names iteratable from \p IterRange. This interface is used by
+  /// IndexedProfReader.
+  template <typename FuncNameIterRange, typename VTableNameIterRange>
+  Error create(const FuncNameIterRange &FuncIterRange,
+               const VTableNameIterRange &VTableIterRange);
+
   /// Update the symtab by adding \p FuncName to the table. This interface
   /// is used by the raw and text profile readers.
   Error addFuncName(StringRef FuncName) {
@@ -496,15 +525,49 @@ class InstrProfSymtab {
     return Error::success();
   }
 
+  Error addVTableName(StringRef VTableName) {
+    if (VTableName.empty())
+      return make_error<InstrProfError>(instrprof_error::malformed,
+                                        "invalid input: VTableName is empty");
+    // Insert into NameTab.
+    auto Ins = NameTab.insert(VTableName);
+
+    // Insert into VTableNames.
+    VTableNames.insert(VTableName);
+
+    // If this is newly added, update MD5NameMap.
+    if (Ins.second) {
+      // printf("VTableName %s\n", VTableName.str().c_str());
+      // printf("AddVTableName hash %"PRIu64" to %s\n",
+      // IndexedInstrProf::ComputeHash(VTableName), Ins.first->getKey());
+      MD5NameMap.push_back(std::make_pair(
+          IndexedInstrProf::ComputeHash(VTableName), Ins.first->getKey()));
+      Sorted = false;
+    }
+    return Error::success();
+  }
+
+  const StringSet<> &getVTableNames() const { return VTableNames; }
+
   /// Map a function address to its name's MD5 hash. This interface
   /// is only used by the raw profiler reader.
   void mapAddress(uint64_t Addr, uint64_t MD5Val) {
     AddrToMD5Map.push_back(std::make_pair(Addr, MD5Val));
   }
 
+  // Map the start and end address of a variable to its names' MD5 hash.
+  // This interface is only used by the raw profile header.
+  void mapVTableAddress(uint64_t StartAddr, uint64_t EndAddr, uint64_t MD5Val) {
+    VTableAddrToMD5Map.push_back(std::make_pair(StartAddr, MD5Val));
+    VTableAddrToMD5Map.push_back(std::make_pair(EndAddr, MD5Val));
+  }
+
   /// Return a function's hash, or 0, if the function isn't in this SymTab.
   uint64_t getFunctionHashFromAddress(uint64_t Address);
 
+  /// Return a vtable's hash, or 0 if the vtable doesn't exist in this SymTab.
+  uint64_t getVTableHashFromAddress(uint64_t Address);
+
   /// Return function's PGO name from the function name's symbol
   /// address in the object file. If an error occurs, return
   /// an empty string.
@@ -519,6 +582,11 @@ class InstrProfSymtab {
   /// will be represented using the same StringRef value.
   inline StringRef getFuncNameOrExternalSymbol(uint64_t FuncMD5Hash);
 
+  /// Just like getFuncName, except that it will return a non-empty StringRef
+  /// if the function is external to this symbol table. All such cases
+  /// will be represented using the same StringRef value.
+  // inline StringRef getVTableNameOrExternalSymbol(uint64_t VTableMD5Hash);
+
   /// True if Symbol is the value used to represent external symbols.
   static bool isExternalSymbol(const StringRef &Symbol) {
     return Symbol == InstrProfSymtab::getExternalSymbol();
@@ -526,6 +594,8 @@ class InstrProfSymtab {
 
   /// Return function from the name's md5 hash. Return nullptr if not found.
   inline Function *getFunction(uint64_t FuncMD5Hash);
+  // Return vtable from the name's MD5 hash. Return nullptr if not found.
+  inline GlobalVariable *getGlobalVariable(uint64_t GlobalVariableMD5Hash);
 
   /// Return the name section data.
   inline StringRef getNameData() const { return Data; }
@@ -544,6 +614,16 @@ Error InstrProfSymtab::create(StringRef NameStrings) {
   return readPGOFuncNameStrings(NameStrings, *this);
 }
 
+Error InstrProfSymtab::create(StringRef FuncNameStrings,
+                              StringRef VTableNameStrings) {
+  if (Error E = readPGOFuncNameStrings(FuncNameStrings, *this))
+    return E;
+
+  // FIXME: Add test coverage that this returns success when VTableNameStrings
+  // is empty.
+  return readVTableNames(VTableNameStrings, *this);
+}
+
 template <typename NameIterRange>
 Error InstrProfSymtab::create(const NameIterRange &IterRange) {
   for (auto Name : IterRange)
@@ -554,6 +634,23 @@ Error InstrProfSymtab::create(const NameIterRange &IterRange) {
   return Error::success();
 }
 
+template <typename FuncNameIterRange, typename VTableNameIterRange>
+Error InstrProfSymtab::create(const FuncNameIterRange &FuncIterRange,
+                              const VTableNameIterRange &VTableIterRange) {
+  for (auto Name : FuncIterRange)
+    if (Error E = addFuncName(Name))
+      return E;
+
+  for (auto VTableName : VTableIterRange) {
+    if (Error E = addVTableName(VTableName)) {
+      return E;
+    }
+  }
+
+  finalizeSymtab();
+  return Error::success();
+}
+
 void InstrProfSymtab::finalizeSymtab() {
   if (Sorted)
     return;
@@ -592,6 +689,19 @@ Function* InstrProfSymtab::getFunction(uint64_t FuncMD5Hash) {
   return nullptr;
 }
 
+GlobalVariable *
+InstrProfSymtab::getGlobalVariable(uint64_t GlobalVariableMD5Hash) {
+  finalizeSymtab();
+  auto Result =
+      llvm::lower_bound(MD5VTableMap, GlobalVariableMD5Hash,
+                        [](const std::pair<uint64_t, GlobalVariable *> &LHS,
+                           uint64_t RHS) { return LHS.first < RHS; });
+
+  if (Result != MD5VTableMap.end() && Result->first == GlobalVariableMD5Hash)
+    return Result->second;
+  return nullptr;
+}
+
 // To store the sums of profile count values, or the percentage of
 // the sums of the total count values.
 struct CountSumOrPercent {
@@ -813,6 +923,7 @@ struct InstrProfRecord {
   struct ValueProfData {
     std::vector<InstrProfValueSiteRecord> IndirectCallSites;
     std::vector<InstrProfValueSiteRecord> MemOPSizes;
+    std::vector<InstrProfValueSiteRecord> VTableTargets;
   };
   std::unique_ptr<ValueProfData> ValueData;
 
@@ -835,6 +946,8 @@ struct InstrProfRecord {
       return ValueData->IndirectCallSites;
     case IPVK_MemOPSize:
       return ValueData->MemOPSizes;
+    case IPVK_VTableTarget:
+      return ValueData->VTableTargets;
     default:
       llvm_unreachable("Unknown value kind!");
     }
@@ -849,6 +962,8 @@ struct InstrProfRecord {
       return ValueData->IndirectCallSites;
     case IPVK_MemOPSize:
       return ValueData->MemOPSizes;
+    case IPVK_VTableTarget:
+      return ValueData->VTableTargets;
     default:
       llvm_unreachable("Unknown value kind!");
     }
@@ -1015,6 +1130,8 @@ enum ProfVersion {
   Version9 = 9,
   // An additional (optional) temporal profile traces section is added.
   Version10 = 10,
+  // VTable profiling,
+  Version11 = 11,
   // The current version is 10.
   CurrentVersion = INSTR_PROF_INDEX_VERSION
 };
@@ -1035,6 +1152,7 @@ struct Header {
   uint64_t MemProfOffset;
   uint64_t BinaryIdOffset;
   uint64_t TemporalProfTracesOffset;
+  uint64_t VTableNamesOffset; // Organize virtual table names.
   // New fields should only be added at the end to ensure that the size
   // computation is correct. The methods below need to be updated to ensure that
   // the new field is read correctly.
@@ -1174,6 +1292,11 @@ template <class IntPtrT> struct alignas(8) ProfileData {
   #include "llvm/ProfileData/InstrProfData.inc"
 };
 
+template <class IntPtrT> struct alignas(8) VTableProfileData {
+#define INSTR_PROF_VTABLE_DATA(Type, LLVMType, Name, Init) Type Name;
+#include "llvm/ProfileData/InstrProfData.inc"
+};
+
 // File header structure of the LLVM profile data in raw format.
 // The definition should match the header referenced in
 // compiler-rt/lib/profile/InstrProfilingFile.c  and
diff --git a/llvm/include/llvm/ProfileData/InstrProfData.inc b/llvm/include/llvm/ProfileData/InstrProfData.inc
index 4456bf1ab176325..af2532ffd4844f4 100644
--- a/llvm/include/llvm/ProfileData/InstrProfData.inc
+++ b/llvm/include/llvm/ProfileData/InstrProfData.inc
@@ -91,6 +91,18 @@ INSTR_PROF_DATA(const uint16_t, Int16ArrayTy, NumValueSites[IPVK_Last+1], \
 #undef INSTR_PROF_DATA
 /* INSTR_PROF_DATA end. */
 
+#ifndef INSTR_PROF_VTABLE_DATA
+#define INSTR_PROF_VTABLE_DATA(Type, LLVMType, Name, Initializer)
+#else
+#define INSTR_PROF_VTABLE_DATA_DEFINED
+#endif
+INSTR_PROF_VTABLE_DATA(const uint64_t, llvm::Type::getInt64Ty(Ctx), VTableNameHash, \
+                      ConstantInt::get(llvm::Type::getInt64Ty(Ctx), IndexedInstrProf::ComputeHash(VTableName)))
+INSTR_PROF_VTABLE_DATA(const IntPtrT, llvm::Type::getInt8PtrTy(Ctx), VTablePointer, VTableAddr)
+INSTR_PROF_VTABLE_DATA(const uint32_t, llvm::Type::getInt32Ty(Ctx), VTableSize, \
+                      ConstantInt::get(llvm::Type::getInt32Ty(Ctx), VTableSizeVal))
+#undef INSTR_PROF_VTABLE_DATA
+/* INSTR_PROF_VTABLE_DATA end. */
 
 /* This is an internal data structure used by value profiler. It
  * is defined here to allow serialization code sharing by LLVM
@@ -136,6 +148,8 @@ INSTR_PROF_RAW_HEADER(uint64_t, NamesSize,  NamesSize)
 INSTR_PROF_RAW_HEADER(uint64_t, CountersDelta,
                       (uintptr_t)CountersBegin - (uintptr_t)DataBegin)
 INSTR_PROF_RAW_HEADER(uint64_t, NamesDelta, (uintptr_t)NamesBegin)
+INSTR_PROF_RAW_HEADER(uint64_t, VNamesSize, VNamesSize)
+INSTR_PROF_RAW_HEADER(uint64_t, NumVTables, NumVTables)
 INSTR_PROF_RAW_HEADER(uint64_t, ValueKindLast, IPVK_Last)
 #undef INSTR_PROF_RAW_HEADER
 /* INSTR_PROF_RAW_HEADER  end */
@@ -177,13 +191,14 @@ VALUE_PROF_FUNC_PARAM(uint32_t, CounterIndex, Type::getInt32Ty(Ctx))
 VALUE_PROF_KIND(IPVK_IndirectCallTarget, 0, "indirect call target")
 /* For memory intrinsic functions size profiling. */
 VALUE_PROF_KIND(IPVK_MemOPSize, 1, "memory intrinsic functions size")
+VALUE_PROF_KIND(IPVK_VTableTarget, 2, "vtable target")
 /* These two kinds must be the last to be
  * declared. This is to make sure the string
  * array created with the template can be
  * indexed with the kind value.
  */
 VALUE_PROF_KIND(IPVK_First, IPVK_IndirectCallTarget, "first")
-VALUE_PROF_KIND(IPVK_Last, IPVK_MemOPSize, "last")
+VALUE_PROF_KIND(IPVK_Last, IPVK_VTableTarget, "last")
 
 #undef VALUE_PROF_KIND
 /* VALUE_PROF_KIND end */
@@ -270,12 +285,18 @@ INSTR_PROF_SECT_ENTRY(IPSK_cnts, \
 INSTR_PROF_SECT_ENTRY(IPSK_name, \
                       INSTR_PROF_QUOTE(INSTR_PROF_NAME_COMMON), \
                       INSTR_PROF_NAME_COFF, "__DATA,")
+INSTR_PROF_SECT_ENTRY(IPSK_vname, \
+                      INSTR_PROF_QUOTE(INSTR_PROF_VNAME_COMMON), \
+                      INSTR_PROF_VNAME_COFF, "__DATA,")
 INSTR_PROF_SECT_ENTRY(IPSK_vals, \
                       INSTR_PROF_QUOTE(INSTR_PROF_VALS_COMMON), \
                       INSTR_PROF_VALS_COFF, "__DATA,")
 INSTR_PROF_SECT_ENTRY(IPSK_vnodes, \
                       INSTR_PROF_QUOTE(INSTR_PROF_VNODES_COMMON), \
                       INSTR_PROF_VNODES_COFF, "__DATA,")
+INSTR_PROF_SECT_ENTRY(IPSK_vtab, \
+                      INSTR_PROF_QUOTE(INSTR_PROF_VTAB_COMMON), \
+                      INSTR_PROF_VTAB_COFF, "__DATA,")
 INSTR_PROF_SECT_ENTRY(IPSK_covmap, \
                       INSTR_PROF_QUOTE(INSTR_PROF_COVMAP_COMMON), \
                       INSTR_PROF_COVMAP_COFF, "__LLVM_COV,")
@@ -646,9 +667,9 @@ serializeValueProfDataFrom(ValueProfRecordClosure *Closure,
 
 /* FIXME: Please remedy the fixme in the header before bumping the version. */
 /* Raw profile format version (start from 1). */
-#define INSTR_PROF_RAW_VERSION 8
+#define INSTR_PROF_RAW_VERSION 9
 /* Indexed profile format version (start from 1). */
-#define INSTR_PROF_INDEX_VERSION 10
+#define INSTR_PROF_INDEX_VERSION 11
 /* Coverage mapping format version (start from 0). */
 #define INSTR_PROF_COVMAP_VERSION 5
 
@@ -686,9 +707,11 @@ serializeValueProfDataFrom(ValueProfRecordClosure *Closure,
    than WIN32 */
 #define INSTR_PROF_DATA_COMMON __llvm_prf_data
 #define INSTR_PROF_NAME_COMMON __llvm_prf_names
+#define INSTR_PROF_VNAME_COMMON __llvm_prf_vnames
 #define INSTR_PROF_CNTS_COMMON __llvm_prf_cnts
 #define INSTR_PROF_VALS_COMMON __llvm_prf_vals
 #define INSTR_PROF_VNODES_COMMON __llvm_prf_vnds
+#define INSTR_PROF_VTAB_COMMON __llvm_prf_vtab
 #define INSTR_PROF_COVMAP_COMMON __llvm_covmap
 #define INSTR_PROF_COVFUN_COMMON __llvm_covfun
 #define INSTR_PROF_ORDERFILE_COMMON __llvm_orderfile
@@ -697,9 +720,11 @@ serializeValueProfDataFrom(ValueProfRecordClosure *Closure,
  */
 #define INSTR_PROF_DATA_COFF ".lprfd$M"
 #define INSTR_PROF_NAME_COFF ".lprfn$M"
+#define INSTR_PROF_VNAME_COFF ".lprfvn$M"
 #define INSTR_PROF_CNTS_COFF ".lprfc$M"
 #define INSTR_PROF_VALS_COFF ".lprfv$M"
 #define INSTR_PROF_VNODES_COFF ".lprfnd$M"
+#define INSTR_PROF_VTAB_COFF ".lprfvt$M"
 #define INSTR_PROF_COVMAP_COFF ".lcovmap$M"
 #define INSTR_PROF_COVFUN_COFF ".lcovfun$M"
 #define INSTR_PROF_ORDERFILE_COFF ".lorderfile$M"
diff --git a/llvm/include/llvm/ProfileData/InstrProfReader.h b/llvm/include/llvm/ProfileData/InstrProfReader.h
index 74e921e10c47b9a..bc5abf27385fc7e 100644
--- a/llvm/include/llvm/ProfileData/InstrProfReader.h
+++ b/llvm/include/llvm/ProfileData/InstrProfReader.h
@@ -326,10 +326,14 @@ class RawInstrProfReader : public InstrProfReader {
   uint64_t NamesDelta;
   const RawInstrProf::ProfileData<IntPtrT> *Data;
   const RawInstrProf::ProfileData<IntPtrT> *DataEnd;
+  const RawInstrProf::VTableProfileData<IntPtrT> *VTableBegin = nullptr;
+  const RawInstrProf::VTableProfileData<IntPtrT> *VTableEnd = nullptr;
   const char *CountersStart;
   const char *CountersEnd;
   const char *NamesStart;
   const char *NamesEnd;
+  const char *VNamesStart = nullptr;
+  const char *VNamesEnd = nullptr;
   // After value profile is all read, this pointer points to
   // the header of next profile data (if exists)
   const uint8_t *ValueDataStart;
@@ -469,6 +473,46 @@ enum class HashT : uint32_t;
 
 } // end namespace IndexedInstrProf
 
+class InstrProfVTableLookupTrait {
+  char val;
+  IndexedInstrProf::HashT HashType;
+  unsigned FormatVersion;
+
+public:
+  InstrProfVTableLookupTrait(IndexedInstrProf::HashT HashType,
+                             unsigned FormatVersion)
+      : HashType(HashType), FormatVersion(FormatVersion) {}
+
+  using data_type = char;
+
+  using internal_key_type = StringRef;
+  using external_key_type = StringRef;
+
+  using hash_value_type = uint64_t;
+  using offset_type = uint64_t;
+
+  static bool EqualKey(StringRef A, StringRef B) { return A == B; }
+  static StringRef GetInternalKey(StringRef K) { return K; }
+  static StringRef GetExternalKey(StringRef K) { return K; }
+
+  hash_value_type ComputeHash(StringRef K);
+
+  static std::pair<offset_type, offset_type>
+  ReadKeyDataLength(const unsigned char *&D) {
+    using namespace support;
+
+    offset_type KeyLen = endian::readNext<offset_type, little, unaligned>(D);
+    offset_type DataLen = endian::readNext<offset_type, little, unaligned>(D);
+    return std::make_pair(KeyLen, DataLen);
+  }
+
+  StringRef ReadKey(const unsigned char *D, offset_type N) {
+    return StringRef((const char *)D, N);
+  }
+
+  data_type ReadData(StringRef K, const unsigned char *D, offset_type N);
+};
+
 /// Trait for lookups into the on-disk hash table for the binary instrprof
 /// format.
 class InstrProfLookupTrait {
@@ -520,6 +564,9 @@ class InstrProfLookupTrait {
   }
 };
 
+using VirtualTableNamesHashTable =
+    OnDiskIterableChainedHashTable<InstrProfVTableLookupTrait>;
+
 struct InstrProfReaderIndexBase {
   virtual ~InstrProfReaderIndexBase() = default;
 
@@ -542,7 +589,10 @@ struct InstrProfReaderIndexBase {
   virtual bool hasMemoryProfile() const = 0;
   virtual bool hasTemporalProfile() const = 0;
   virtual InstrProfKind getProfileKind() const = 0;
-  virtual Error populateSymtab(InstrProfSymtab &) = 0;
+  // The pointer VirtualTableIndex is not owned.
+  virtual Error
+  populateSymtab(InstrProfSymtab &,
+                 VirtualTableNamesHashTable *VirtualTableIndex) = 0;
 };
 
 using OnDiskHashTableImplV3 =
@@ -617,7 +667,10 @@ class InstrProfReaderIndex : public InstrProfReaderIndexBase {
 
   InstrProfKind getProfileKind() const override;
 
-  Error populateSymtab(InstrProfSymtab &Symtab) override {
+  Error populateSymtab(InstrProfSymtab &Symtab,
+                       VirtualTableNamesHashTable *VirtualTableIndex) override {
+    if (VirtualTableIndex != nullptr)
+      return Symtab.create(HashTable->keys(), VirtualTableIndex->keys());
     return Symtab.create(HashTable->keys());
   }
 };
@@ -652,6 +705,8 @@ class IndexedInstrProfReader : public InstrProfReader {
   std::unique_ptr<MemProfRecordHashTable> MemProfRecordTable;
   /// MemProf frame profile data on-disk indexed via frame id.
   std::unique_ptr<MemProfFrameHashTable> MemProfFrameTable;
+  /// Virtual table profile data indexed .
+  std::unique_ptr<VirtualTableNamesHashTable> VirtualTableIndex = nullptr;
   /// Total size of binary ids.
   uint64_t BinaryIdsSize{0};
   /// Start address of binary id length and data pairs.
diff --git a/llvm/include/llvm/ProfileData/InstrProfWriter.h b/llvm/include/llvm/ProfileData/InstrProfWriter.h
index e50705ee053eea8..c38bc621469ede0 100644
--- a/llvm/include/llvm/ProfileData/InstrProfWriter.h
+++ b/llvm/include/llvm/ProfileData/InstrProfWriter.h
@@ -64,6 +64,9 @@ class InstrProfWriter {
   // List of binary ids.
   std::vector<llvm::object::BuildID> BinaryIds;
 
+  // Read the vtable names from raw instr profile reader.
+  StringSet<> VTableNames;
+
   // An enum describing the attributes of the profile.
   InstrProfKind ProfileKind = InstrProfKind::Unknown;
   // Use raw pointer here for the incomplete type object.
@@ -85,6 +88,7 @@ class InstrProfWriter {
   void addRecord(NamedInstrProfRecord &&I, function_ref<void(Error)> Warn) {
     addRecord(std::move(I), 1, Warn);
   }
+  void addVTableName(StringRef VTableName) { VTableNames.insert(VTableName); }
 
   /// Add \p SrcTraces using reservoir sampling where \p SrcStreamSize is the
   /// total number of temporal profiling traces the source has seen.
diff --git a/llvm/include/llvm/Transforms/Instrumentation/InstrProfiling.h b/llvm/include/llvm/Transforms/Instrumentation/InstrProfiling.h
index cb0c055dcb74ae8..9af090c0c2e6825 100644
--- a/llvm/include/llvm/Transforms/Instrumentation/InstrProfiling.h
+++ b/llvm/include/llvm/Transforms/Instrumentation/InstrProfiling.h
@@ -56,12 +56,18 @@ class InstrProfiling : public PassInfoMixin<InstrProfiling> {
     }
   };
   DenseMap<GlobalVariable *, PerFunctionProfileData> ProfileDataMap;
+  // Key is virtual table variable, value is 'VTableProfData' in the form of
+  // GlobalVariable.
+  DenseMap<GlobalVariable *, GlobalVariable *> VTableDataMap;
   /// If runtime relocation is enabled, this maps functions to the load
   /// instruction that produces the profile relocation bias.
   DenseMap<const Function *, LoadInst *> FunctionToProfileBiasMap;
   std::vector<GlobalValue *> CompilerUsedVars;
   std::vector<GlobalValue *> UsedVars;
   std::vector<GlobalVariable *> ReferencedNames;
+  // The list of virtual table variables of which the VTableProfData is
+  // collected.
+  std::vector<GlobalVariable *> ReferencedVTableNames;
   GlobalVariable *NamesVar;
   size_t NamesSize;
 
@@ -115,6 +121,9 @@ class InstrProfiling : public PassInfoMixin<InstrProfiling> {
   /// referring to them will also be created.
   GlobalVariable *getOrCreateRegionCounters(InstrProfInstBase *Inc);
 
+  /// Get the counters for virtual table values, creating them if necessary.
+  void getOrCreateVTableProfData(GlobalVariable *GV);
+
   /// Create the region counters.
   GlobalVariable *createRegionCounters(InstrProfInstBase *Inc, StringRef Name,
                                        GlobalValue::LinkageTypes Linkage);
@@ -122,6 +131,9 @@ class InstrProfiling : public PassInfoMixin<InstrProfiling> {
   /// Emit the section with compressed function names.
   void emitNameData();
 
+  /// Emit the section with compressed vtable names.
+  void emitVTableNames();
+
   /// Emit value nodes section for value profiling.
   void emitVNodes();
 
diff --git a/llvm/lib/ProfileData/InstrProf.cpp b/llvm/lib/ProfileData/InstrProf.cpp
index 835dd697bc7b6aa..62a155b01e313a5 100644
--- a/llvm/lib/ProfileData/InstrProf.cpp
+++ b/llvm/lib/ProfileData/InstrProf.cpp
@@ -436,6 +436,18 @@ Error InstrProfSymtab::create(Module &M, bool InLTO) {
     if (Error E = addFuncWithName(F, getPGOFuncName(F, InLTO)))
       return E;
   }
+
+  SmallVector<MDNode *, 2> Types;
+  for (GlobalVariable &G : M.globals()) {
+    if (!G.hasName())
+      continue;
+    Types.clear();
+    G.getMetadata(LLVMContext::MD_type, Types);
+    if (!Types.empty()) {
+      // errs() << "Insert " << G.getGUID() << "\t into MD5VTableMap\n";
+      MD5VTableMap.emplace_back(G.getGUID(), &G);
+    }
+  }
   Sorted = false;
   finalizeSymtab();
   return Error::success();
@@ -473,6 +485,26 @@ Error InstrProfSymtab::addFuncWithName(Function &F, StringRef PGOFuncName) {
   return Error::success();
 }
 
+uint64_t InstrProfSymtab::getVTableHashFromAddress(uint64_t Address) {
+  finalizeSymtab();
+  // printf("look up key 0x%llx\n", Address);
+  // for (auto iter = VTableAddrToMD5Map.begin(); iter !=
+  // VTableAddrToMD5Map.end(); iter++) {
+  //   printf("<key, val> is <0x%llx, %"PRIu64"\n", iter->first, iter->second);
+  // }
+  auto It =
+      partition_point(VTableAddrToMD5Map, [=](std::pair<uint64_t, uint64_t> A) {
+        return A.first < Address;
+      });
+  // FIXME: Does the raw function pointers point apply here?
+  if (It != VTableAddrToMD5Map.end()) {
+    // printf("InstrProfSymtab::getVTableHashFromAddress map addr 0x%llx to hash
+    // value %"PRIu64"\n", Address, (uint64_t)It->second);
+    return (uint64_t)It->second;
+  }
+  return 0;
+}
+
 uint64_t InstrProfSymtab::getFunctionHashFromAddress(uint64_t Address) {
   finalizeSymtab();
   auto It = partition_point(AddrToMD5Map, [=](std::pair<uint64_t, uint64_t> A) {
@@ -549,42 +581,112 @@ Error collectPGOFuncNameStrings(ArrayRef<GlobalVariable *> NameVars,
       NameStrs, compression::zlib::isAvailable() && doCompression, Result);
 }
 
+Error collectVTableStrings(ArrayRef<GlobalVariable *> VTables,
+                           std::string &Result, bool doCompression) {
+  std::vector<std::string> VTableNameStrs;
+  for (auto *VTable : VTables) {
+    // printf("VTable name %s added\n", VTable->getName().str().c_str());
+    VTableNameStrs.push_back(std::string(VTable->getName()));
+  }
+  return collectPGOFuncNameStrings(
+      VTableNameStrs, compression::zlib::isAvailable() && doCompression,
+      Result);
+}
+
+instrprof_error decodeAndSplitStrings(
+    const uint8_t *Input, SmallVector<uint8_t, 128> &UncompressedNameStrings,
+    StringRef &NameStrings, uint32_t &Dist, bool &isCompressed) {
+  Dist = 0;
+  const uint8_t *Start = Input;
+  uint32_t UncompressedSizeLen = 0;
+  uint64_t UncompressedSize = decodeULEB128(Start, &UncompressedSizeLen);
+  Start += UncompressedSizeLen;
+  Dist += UncompressedSizeLen;
+  uint32_t CompressedSizeLen = 0;
+  uint64_t CompressedSize = decodeULEB128(Start, &CompressedSizeLen);
+  Start += CompressedSizeLen;
+  Dist += CompressedSizeLen;
+  isCompressed = (CompressedSize != 0);
+  if (isCompressed) {
+    if (!llvm::compression::zlib::isAvailable())
+      return instrprof_error::zlib_unavailable;
+
+    if (Error E = compression::zlib::decompress(ArrayRef(Start, CompressedSize),
+                                                UncompressedNameStrings,
+                                                UncompressedSize)) {
+      consumeError(std::move(E));
+      return instrprof_error::uncompress_failed;
+    }
+    Dist += CompressedSize;
+  } else {
+    NameStrings =
+        StringRef(reinterpret_cast<const char *>(Start), UncompressedSize);
+    Dist += UncompressedSize;
+  }
+
+  return instrprof_error::success;
+}
+
 Error readPGOFuncNameStrings(StringRef NameStrings, InstrProfSymtab &Symtab) {
   const uint8_t *P = NameStrings.bytes_begin();
   const uint8_t *EndP = NameStrings.bytes_end();
   while (P < EndP) {
-    uint32_t N;
-    uint64_t UncompressedSize = decodeULEB128(P, &N);
-    P += N;
-    uint64_t CompressedSize = decodeULEB128(P, &N);
-    P += N;
-    bool isCompressed = (CompressedSize != 0);
-    SmallVector<uint8_t, 128> UncompressedNameStrings;
+    // Now parse the name strings.
+    uint32_t Dist = 0;
     StringRef NameStrings;
+    SmallVector<uint8_t, 128> UncompressedNameStrings;
+    SmallVector<StringRef, 0> Names;
+    bool isCompressed = false;
+    instrprof_error E = decodeAndSplitStrings(P, UncompressedNameStrings,
+                                              NameStrings, Dist, isCompressed);
+    if (E != instrprof_error::success)
+      return make_error<InstrProfError>(E);
+
     if (isCompressed) {
-      if (!llvm::compression::zlib::isAvailable())
-        return make_error<InstrProfError>(instrprof_error::zlib_unavailable);
-
-      if (Error E = compression::zlib::decompress(ArrayRef(P, CompressedSize),
-                                                  UncompressedNameStrings,
-                                                  UncompressedSize)) {
-        consumeError(std::move(E));
-        return make_error<InstrProfError>(instrprof_error::uncompress_failed);
-      }
-      P += CompressedSize;
       NameStrings = toStringRef(UncompressedNameStrings);
-    } else {
-      NameStrings =
-          StringRef(reinterpret_cast<const char *>(P), UncompressedSize);
-      P += UncompressedSize;
     }
+
+    NameStrings.split(Names, getInstrProfNameSeparator());
+    for (StringRef &Name : Names) {
+      if (Error E = Symtab.addFuncName(Name))
+        return E;
+    }
+
+    P += Dist;
+    // Skip padding?
+    while (P < EndP && *P == 0)
+      P++;
+  }
+  return Error::success();
+}
+
+Error readVTableNames(StringRef NameStrings, InstrProfSymtab &Symtab) {
+  const uint8_t *P = NameStrings.bytes_begin();
+  const uint8_t *EndP = NameStrings.bytes_end();
+  while (P < EndP) {
     // Now parse the name strings.
+    uint32_t Dist = 0;
+    StringRef NameStrings;
+    SmallVector<uint8_t, 128> UncompressedNameStrings;
     SmallVector<StringRef, 0> Names;
+    bool isCompressed = false;
+    instrprof_error E = decodeAndSplitStrings(P, UncompressedNameStrings,
+                                              NameStrings, Dist, isCompressed);
+    if (E != instrprof_error::success)
+      return make_error<InstrProfError>(E);
+
+    if (isCompressed) {
+      NameStrings = toStringRef(UncompressedNameStrings);
+    }
     NameStrings.split(Names, getInstrProfNameSeparator());
-    for (StringRef &Name : Names)
-      if (Error E = Symtab.addFuncName(Name))
+    for (StringRef &Name : Names) {
+      // printf("Read back vtable name %s\n", Name.str().c_str());
+      if (Error E = Symtab.addVTableName(Name))
         return E;
+    }
 
+    P += Dist;
+    // Skip padding?
     while (P < EndP && *P == 0)
       P++;
   }
@@ -841,6 +943,11 @@ uint64_t InstrProfRecord::remapValue(uint64_t Value, uint32_t ValueKind,
   if (ValueKind == IPVK_IndirectCallTarget)
     return SymTab->getFunctionHashFromAddress(Value);
 
+  if (ValueKind == IPVK_VTableTarget) {
+    uint64_t VTableHash = SymTab->getVTableHashFromAddress(Value);
+    return VTableHash;
+  }
+
   return Value;
 }
 
@@ -1232,8 +1339,8 @@ void createPGOFuncNameMetadata(Function &F, StringRef PGOFuncName) {
   F.setMetadata(getPGOFuncNameMetadataName(), N);
 }
 
-bool needsComdatForCounter(const Function &F, const Module &M) {
-  if (F.hasComdat())
+bool needsComdatForCounter(const GlobalValue &GV, const Module &M) {
+  if (GV.hasComdat())
     return true;
 
   if (!Triple(M.getTargetTriple()).supportsCOMDAT())
@@ -1249,7 +1356,7 @@ bool needsComdatForCounter(const Function &F, const Module &M) {
   // available_externally functions will end up being duplicated in raw profile
   // data. This can result in distorted profile as the counts of those dups
   // will be accumulated by the profile merger.
-  GlobalValue::LinkageTypes Linkage = F.getLinkage();
+  GlobalValue::LinkageTypes Linkage = GV.getLinkage();
   if (Linkage != GlobalValue::ExternalWeakLinkage &&
       Linkage != GlobalValue::AvailableExternallyLinkage)
     return false;
@@ -1413,6 +1520,9 @@ void OverlapStats::dump(raw_fd_ostream &OS) const {
     case IPVK_MemOPSize:
       strncpy(ProfileKindName, "MemOP", 19);
       break;
+    case IPVK_VTableTarget:
+      strncpy(ProfileKindName, "VTable", 19);
+      break;
     default:
       snprintf(ProfileKindName, 19, "VP[%d]", I);
       break;
@@ -1476,9 +1586,12 @@ Expected<Header> Header::readFromBuffer(const unsigned char *Buffer) {
     // When a new field is added in the header add a case statement here to
     // populate it.
     static_assert(
-        IndexedInstrProf::ProfVersion::CurrentVersion == Version10,
+        IndexedInstrProf::ProfVersion::CurrentVersion == Version11,
         "Please update the reading code below if a new field has been added, "
         "if not add a case statement to fall through to the latest version.");
+  case 11ull:
+    H.VTableNamesOffset = read(Buffer, offsetOf(&Header::VTableNamesOffset));
+    [[fallthrough]];
   case 10ull:
     H.TemporalProfTracesOffset =
         read(Buffer, offsetOf(&Header::TemporalProfTracesOffset));
@@ -1502,10 +1615,13 @@ size_t Header::size() const {
     // When a new field is added to the header add a case statement here to
     // compute the size as offset of the new field + size of the new field. This
     // relies on the field being added to the end of the list.
-    static_assert(IndexedInstrProf::ProfVersion::CurrentVersion == Version10,
+    static_assert(IndexedInstrProf::ProfVersion::CurrentVersion == Version11,
                   "Please update the size computation below if a new field has "
                   "been added to the header, if not add a case statement to "
                   "fall through to the latest version.");
+  case 11ull:
+    return offsetOf(&Header::VTableNamesOffset) +
+           sizeof(Header::VTableNamesOffset);
   case 10ull:
     return offsetOf(&Header::TemporalProfTracesOffset) +
            sizeof(Header::TemporalProfTracesOffset);
diff --git a/llvm/lib/ProfileData/InstrProfReader.cpp b/llvm/lib/ProfileData/InstrProfReader.cpp
index db20441b712cd24..11951f20780c1eb 100644
--- a/llvm/lib/ProfileData/InstrProfReader.cpp
+++ b/llvm/lib/ProfileData/InstrProfReader.cpp
@@ -372,6 +372,14 @@ TextInstrProfReader::readValueProfileData(InstrProfRecord &Record) {
               return E;
             Value = IndexedInstrProf::ComputeHash(VD.first);
           }
+        } else if (ValueKind == IPVK_VTableTarget) {
+          if (InstrProfSymtab::isExternalSymbol(VD.first)) {
+            Value = 0;
+          } else {
+            if (Error E = Symtab->addVTableName(VD.first))
+              return E;
+            Value = IndexedInstrProf::ComputeHash(VD.first);
+          }
         } else {
           READ_NUM(VD.first, Value);
         }
@@ -516,14 +524,31 @@ Error RawInstrProfReader<IntPtrT>::readNextHeader(const char *CurrentPos) {
 
 template <class IntPtrT>
 Error RawInstrProfReader<IntPtrT>::createSymtab(InstrProfSymtab &Symtab) {
-  if (Error E = Symtab.create(StringRef(NamesStart, NamesEnd - NamesStart)))
+  if (Error E =
+          Symtab.create(StringRef(NamesStart, NamesEnd - NamesStart),
+                        StringRef(VNamesStart, VNamesEnd - VNamesStart))) {
     return error(std::move(E));
+  }
   for (const RawInstrProf::ProfileData<IntPtrT> *I = Data; I != DataEnd; ++I) {
     const IntPtrT FPtr = swap(I->FunctionPointer);
     if (!FPtr)
       continue;
     Symtab.mapAddress(FPtr, I->NameRef);
   }
+
+  if (VTableBegin != nullptr && VTableEnd != nullptr) {
+    for (const RawInstrProf::VTableProfileData<IntPtrT> *I = VTableBegin;
+         I != VTableEnd; ++I) {
+      const IntPtrT VPtr = I->VTablePointer;
+      if (!VPtr)
+        continue;
+      // Map both begin and end address to the name hash, since the instrumented
+      // address could be somewhere in the middle.
+      // VPtr is of type uint32_t or uint64_t so 'VPtr + I->VTableSize' marks
+      // the end of vtable address.
+      Symtab.mapVTableAddress(VPtr, VPtr + I->VTableSize, I->VTableNameHash);
+    }
+  }
   return success();
 }
 
@@ -555,17 +580,29 @@ Error RawInstrProfReader<IntPtrT>::readHeader(
   auto CountersSize = swap(Header.NumCounters) * getCounterTypeSize();
   auto PaddingBytesAfterCounters = swap(Header.PaddingBytesAfterCounters);
   auto NamesSize = swap(Header.NamesSize);
+  auto VTableNameSize = Header.VNamesSize;
+  auto NumVTables = Header.NumVTables;
   ValueKindLast = swap(Header.ValueKindLast);
 
   auto DataSize = NumData * sizeof(RawInstrProf::ProfileData<IntPtrT>);
-  auto PaddingSize = getNumPaddingBytes(NamesSize);
+  auto PaddingBytesAfterNames = getNumPaddingBytes(NamesSize);
+  auto PaddingBytesAfterVTableNames = getNumPaddingBytes(VTableNameSize);
+
+  auto VTableSectionSize =
+      Header.NumVTables * sizeof(RawInstrProf::VTableProfileData<IntPtrT>);
+  auto PaddingBytesAfterVTableProfData = getNumPaddingBytes(VTableSectionSize);
 
   // Profile data starts after profile header and binary ids if exist.
   ptrdiff_t DataOffset = sizeof(RawInstrProf::Header) + BinaryIdsSize;
   ptrdiff_t CountersOffset = DataOffset + DataSize + PaddingBytesBeforeCounters;
   ptrdiff_t NamesOffset =
       CountersOffset + CountersSize + PaddingBytesAfterCounters;
-  ptrdiff_t ValueDataOffset = NamesOffset + NamesSize + PaddingSize;
+  ptrdiff_t VTableProfDataOffset =
+      NamesOffset + NamesSize + PaddingBytesAfterNames;
+  ptrdiff_t VTableNameOffset = VTableProfDataOffset + VTableSectionSize +
+                               PaddingBytesAfterVTableProfData;
+  ptrdiff_t ValueDataOffset =
+      VTableNameOffset + VTableNameSize + PaddingBytesAfterVTableNames;
 
   auto *Start = reinterpret_cast<const char *>(&Header);
   if (Start + ValueDataOffset > DataBuffer->getBufferEnd())
@@ -584,8 +621,14 @@ Error RawInstrProfReader<IntPtrT>::readHeader(
     Data = reinterpret_cast<const RawInstrProf::ProfileData<IntPtrT> *>(
         Start + DataOffset);
     DataEnd = Data + NumData;
+    VTableBegin =
+        reinterpret_cast<const RawInstrProf::VTableProfileData<IntPtrT> *>(
+            Start + VTableProfDataOffset);
+    VTableEnd = VTableBegin + NumVTables;
     NamesStart = Start + NamesOffset;
     NamesEnd = NamesStart + NamesSize;
+    VNamesStart = Start + VTableNameOffset;
+    VNamesEnd = VNamesStart + VTableNameSize;
   }
 
   // Binary ids start just after the header.
@@ -831,6 +874,19 @@ data_type InstrProfLookupTrait::ReadData(StringRef K, const unsigned char *D,
   return DataBuffer;
 }
 
+InstrProfVTableLookupTrait::hash_value_type
+InstrProfVTableLookupTrait::ComputeHash(StringRef K) {
+  return IndexedInstrProf::ComputeHash(HashType, K);
+}
+
+InstrProfVTableLookupTrait::data_type
+InstrProfVTableLookupTrait::ReadData(StringRef K, const unsigned char *D,
+                                     offset_type N) {
+  char v =
+      support::endian::readNext<char, support::little, support::unaligned>(D);
+  return v;
+}
+
 template <typename HashTableImpl>
 Error InstrProfReaderIndex<HashTableImpl>::getRecords(
     StringRef FuncName, ArrayRef<NamedInstrProfRecord> &Data) {
@@ -1150,6 +1206,21 @@ Error IndexedInstrProfReader::readHeader() {
                                         "corrupted binary ids");
   }
 
+  if (GET_VERSION(Header->formatVersion()) >= 11) {
+    uint64_t VTableNamesOffset =
+        endian::byte_swap<uint64_t, little>(Header->VTableNamesOffset);
+    const unsigned char *Ptr = Start + VTableNamesOffset;
+
+    const uint64_t HashTableMetadataOffset =
+        support::endian::readNext<uint64_t, little, unaligned>(Ptr);
+
+    VirtualTableIndex.reset(VirtualTableNamesHashTable::Create(
+        /*Bucket=*/Start + HashTableMetadataOffset,
+        /*Payload=*/Ptr,
+        /*Base=*/Start,
+        InstrProfVTableLookupTrait(HashType, Header->formatVersion())));
+  }
+
   if (GET_VERSION(Header->formatVersion()) >= 10 &&
       Header->formatVersion() & VARIANT_MASK_TEMPORAL_PROF) {
     uint64_t TemporalProfTracesOffset =
@@ -1204,7 +1275,7 @@ InstrProfSymtab &IndexedInstrProfReader::getSymtab() {
     return *Symtab;
 
   std::unique_ptr<InstrProfSymtab> NewSymtab = std::make_unique<InstrProfSymtab>();
-  if (Error E = Index->populateSymtab(*NewSymtab)) {
+  if (Error E = Index->populateSymtab(*NewSymtab, VirtualTableIndex.get())) {
     auto [ErrCode, Msg] = InstrProfError::take(std::move(E));
     consumeError(error(ErrCode, Msg));
   }
diff --git a/llvm/lib/ProfileData/InstrProfWriter.cpp b/llvm/lib/ProfileData/InstrProfWriter.cpp
index b74d5c3862d803d..786993153a8b376 100644
--- a/llvm/lib/ProfileData/InstrProfWriter.cpp
+++ b/llvm/lib/ProfileData/InstrProfWriter.cpp
@@ -170,6 +170,47 @@ class InstrProfRecordWriterTrait {
   }
 };
 
+class InstrProfRecordVTableTrait {
+public:
+  using key_type = StringRef;
+  using key_type_ref = StringRef;
+
+  using data_type = char;
+  using data_type_ref = char;
+
+  using hash_value_type = uint64_t;
+  using offset_type = uint64_t;
+
+  InstrProfRecordVTableTrait() = default;
+
+  static hash_value_type ComputeHash(key_type_ref K) {
+    return IndexedInstrProf::ComputeHash(K);
+  }
+
+  static std::pair<offset_type, offset_type>
+  EmitKeyDataLength(raw_ostream &Out, key_type_ref K, data_type_ref V) {
+    using namespace support;
+
+    endian::Writer LE(Out, little);
+
+    offset_type N = K.size();
+    LE.write<offset_type>(N);
+
+    offset_type M = 1;
+    LE.write<offset_type>(M);
+
+    return std::make_pair(N, M);
+  }
+
+  void EmitKey(raw_ostream &Out, key_type_ref K, offset_type N) {
+    Out.write(K.data(), N);
+  }
+
+  void EmitData(raw_ostream &Out, key_type_ref, data_type_ref V, offset_type) {
+    Out.write(&V, 1);
+  }
+};
+
 } // end namespace llvm
 
 InstrProfWriter::InstrProfWriter(bool Sparse,
@@ -447,12 +488,13 @@ Error InstrProfWriter::writeImpl(ProfOStream &OS) {
   Header.MemProfOffset = 0;
   Header.BinaryIdOffset = 0;
   Header.TemporalProfTracesOffset = 0;
+  Header.VTableNamesOffset = 0;
   int N = sizeof(IndexedInstrProf::Header) / sizeof(uint64_t);
 
   // Only write out all the fields except 'HashOffset', 'MemProfOffset',
-  // 'BinaryIdOffset' and `TemporalProfTracesOffset`. We need to remember the
-  // offset of these fields to allow back patching later.
-  for (int I = 0; I < N - 4; I++)
+  // 'BinaryIdOffset', `TemporalProfTracesOffset` and `VTableNamesOffset`. We
+  // need to remember the offset of these fields to allow back patching later.
+  for (int I = 0; I < N - 5; I++)
     OS.write(reinterpret_cast<uint64_t *>(&Header)[I]);
 
   // Save the location of Header.HashOffset field in \c OS.
@@ -476,6 +518,9 @@ Error InstrProfWriter::writeImpl(ProfOStream &OS) {
   uint64_t TemporalProfTracesOffset = OS.tell();
   OS.write(0);
 
+  uint64_t VTableNamesOffset = OS.tell();
+  OS.write(0);
+
   // Reserve space to write profile summary data.
   uint32_t NumEntries = ProfileSummaryBuilder::DefaultCutoffs.size();
   uint32_t SummarySize = Summary::getSize(Summary::NumKinds, NumEntries);
@@ -589,6 +634,36 @@ Error InstrProfWriter::writeImpl(ProfOStream &OS) {
       OS.writeByte(0);
   }
 
+  // if version >= the version with vtable profile metadata
+  // Intentionally put vtable names before temporal profile section.
+  uint64_t VTableNamesSectionStart = 0;
+  if (IndexedInstrProf::ProfVersion::CurrentVersion >= 11) {
+    VTableNamesSectionStart = OS.tell();
+
+    // Reserve space for vtable record table offset.
+    OS.write(0ULL);
+
+    OnDiskChainedHashTableGenerator<llvm::InstrProfRecordVTableTrait>
+        VTableNamesGenerator;
+    for (const auto &kv : VTableNames) {
+      // printf("InstrProfWriter.cpp key is %s\n", kv.getKey().str().c_str());
+      VTableNamesGenerator.insert(kv.getKey(), '0');
+    }
+
+    auto VTableNamesWriter =
+        std::make_unique<llvm::InstrProfRecordVTableTrait>();
+
+    uint64_t VTableNamesTableOffset =
+        VTableNamesGenerator.Emit(OS.OS, *VTableNamesWriter);
+
+    // printf("InstrProfWriter.cpp:VTableNamesSectionStart is %"PRIu64"\n",
+    // VTableNamesSectionStart); printf("\tVTableNamesTableOffset is
+    // %"PRIu64"\n", VTableNamesTableOffset);
+    PatchItem PatchItems[] = {
+        {VTableNamesSectionStart, &VTableNamesTableOffset, 1}};
+    OS.patch(PatchItems, 1);
+  }
+
   uint64_t TemporalProfTracesSectionStart = 0;
   if (static_cast<bool>(ProfileKind & InstrProfKind::TemporalProfile)) {
     TemporalProfTracesSectionStart = OS.tell();
@@ -632,6 +707,7 @@ Error InstrProfWriter::writeImpl(ProfOStream &OS) {
       // Patch the Header.TemporalProfTracesOffset (=0 for profiles without
       // traces).
       {TemporalProfTracesOffset, &TemporalProfTracesSectionStart, 1},
+      {VTableNamesOffset, &VTableNamesSectionStart, 1},
       // Patch the summary data.
       {SummaryOffset, reinterpret_cast<uint64_t *>(TheSummary.get()),
        (int)(SummarySize / sizeof(uint64_t))},
@@ -684,7 +760,8 @@ Error InstrProfWriter::validateRecord(const InstrProfRecord &Func) {
       std::unique_ptr<InstrProfValueData[]> VD = Func.getValueForSite(VK, S);
       DenseSet<uint64_t> SeenValues;
       for (uint32_t I = 0; I < ND; I++)
-        if ((VK != IPVK_IndirectCallTarget) && !SeenValues.insert(VD[I].Value).second)
+        if ((VK != IPVK_IndirectCallTarget && VK != IPVK_VTableTarget) &&
+            !SeenValues.insert(VD[I].Value).second)
           return make_error<InstrProfError>(instrprof_error::invalid_prof);
     }
   }
@@ -721,7 +798,7 @@ void InstrProfWriter::writeRecordInText(StringRef Name, uint64_t Hash,
       OS << ND << "\n";
       std::unique_ptr<InstrProfValueData[]> VD = Func.getValueForSite(VK, S);
       for (uint32_t I = 0; I < ND; I++) {
-        if (VK == IPVK_IndirectCallTarget)
+        if (VK == IPVK_IndirectCallTarget || VK == IPVK_VTableTarget)
           OS << Symtab.getFuncNameOrExternalSymbol(VD[I].Value) << ":"
              << VD[I].Count << "\n";
         else
@@ -758,6 +835,11 @@ Error InstrProfWriter::writeText(raw_fd_ostream &OS) {
     }
   }
 
+  for (const auto &VTableName : VTableNames) {
+    if (Error E = Symtab.addVTableName(VTableName.getKey()))
+      return E;
+  }
+
   if (static_cast<bool>(ProfileKind & InstrProfKind::TemporalProfile))
     writeTextTemporalProfTraceData(OS, Symtab);
 
diff --git a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
index c90692980d86ac5..9c94df86e4bf923 100644
--- a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
+++ b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
@@ -559,6 +559,13 @@ bool InstrProfiling::run(
       static_cast<void>(getOrCreateRegionCounters(FirstProfInst));
   }
 
+  for (GlobalVariable &GV : M.globals()) {
+    // Global variables with type metadata are virtual table variables.
+    if (GV.hasMetadata(LLVMContext::MD_type)) {
+      getOrCreateVTableProfData(&GV);
+    }
+  }
+
   for (Function &F : M)
     MadeChange |= lowerIntrinsics(&F);
 
@@ -572,6 +579,7 @@ bool InstrProfiling::run(
 
   emitVNodes();
   emitNameData();
+  emitVTableNames();
 
   // Emit runtime hook for the cases where the target does not unconditionally
   // require pulling in profile runtime, and coverage is enabled on code that is
@@ -977,6 +985,135 @@ InstrProfiling::createRegionCounters(InstrProfInstBase *Inc, StringRef Name,
   return GV;
 }
 
+static inline bool shouldRecordVTableAddr(GlobalVariable *GV) {
+  if (!profDataReferencedByCode(*GV->getParent()))
+    return false;
+
+  if (!GV->hasLinkOnceLinkage() && !GV->hasLocalLinkage() &&
+      !GV->hasAvailableExternallyLinkage())
+    return true;
+
+  // This avoids the profile data from referencing internal symbols in
+  // COMDAT.
+  if (GV->hasLocalLinkage() && GV->hasComdat())
+    return false;
+
+  return true;
+}
+
+// FIXME: Does symbollic relocation from 'getFuncAddrForProfData' matter here?
+static inline Constant *getVTableAddrForProfData(GlobalVariable *GV) {
+  // Store a nullptr in __profvt_ if a real address shouldn't be used.
+  auto *Int8PtrTy = Type::getInt8PtrTy(GV->getContext());
+
+  if (!shouldRecordVTableAddr(GV))
+    return ConstantPointerNull::get(Int8PtrTy);
+
+  return ConstantExpr::getBitCast(GV, Int8PtrTy);
+}
+
+/// Get the name of a profiling variable for a particular variable.
+static std::string getVarName(GlobalVariable *GV, StringRef Prefix) {
+  StringRef Name = GV->getName();
+  return (Prefix + Name).str();
+}
+
+void InstrProfiling::getOrCreateVTableProfData(GlobalVariable *GV) {
+  assert(!DebugInfoCorrelate &&
+         "Value profiling is not supported with lightweight instrumentation");
+  if (GV->isDeclaration() || GV->hasAvailableExternallyLinkage())
+    return;
+
+  if (GV->getName().starts_with("llvm.") ||
+      GV->getName().starts_with("__llvm") ||
+      GV->getName().starts_with("__prof"))
+    return;
+
+  // VTableProfData already created
+  auto It = VTableDataMap.find(GV);
+  if (It != VTableDataMap.end() && It->second)
+    return;
+
+  GlobalValue::LinkageTypes Linkage = GV->getLinkage();
+  GlobalValue::VisibilityTypes Visibility = GV->getVisibility();
+
+  // This is to keep consistent with per-function profile data
+  // for correctness.
+  if (TT.isOSBinFormatXCOFF()) {
+    Linkage = GlobalValue::InternalLinkage;
+    Visibility = GlobalValue::DefaultVisibility;
+  }
+
+  LLVMContext &Ctx = M->getContext();
+  Type *DataTypes[] = {
+#define INSTR_PROF_VTABLE_DATA(Type, LLVMType, Name, Init) LLVMType,
+#include "llvm/ProfileData/InstrProfData.inc"
+  };
+
+  auto *DataTy = StructType::get(Ctx, ArrayRef(DataTypes));
+
+  // Used by INSTR_PROF_VTABLE_DATA MACRO
+  Constant *VTableAddr = getVTableAddrForProfData(GV);
+  StringRef VTableName = GV->getName();
+  // Record the length of the vtable. This is needed since vtable pointers
+  // loaded from C++ objects might be from the middle of a vtable definition.
+  uint32_t VTableSizeVal =
+      M->getDataLayout().getTypeAllocSize(GV->getValueType());
+
+  Constant *DataVals[] = {
+#define INSTR_PROF_VTABLE_DATA(Type, LLVMType, Name, Init) Init,
+#include "llvm/ProfileData/InstrProfData.inc"
+  };
+
+  auto *Data =
+      new GlobalVariable(*M, DataTy, false /* constant */, Linkage,
+                         ConstantStruct::get(DataTy, DataVals),
+                         getVarName(GV, getInstrProfVTableVarPrefix()));
+
+  Data->setVisibility(Visibility);
+  Data->setSection(getInstrProfSectionName(IPSK_vtab, TT.getObjectFormat()));
+  Data->setAlignment(Align(8));
+
+  const bool NeedComdat = needsComdatForCounter(*GV, *M);
+
+  // GV is the data structure to record vtable information.
+  // Place the global variable for per-vtable profile data in a comdat group
+  // if the associated vtable definition is a COMDAT. This makes sure only one
+  // copy of the variable for the vtable will be emitted after linking.
+  auto MaybeSetComdat = [&](GlobalVariable *GV, StringRef GroupName) {
+    bool UseComdat = (NeedComdat || TT.isOSBinFormatELF());
+    if (UseComdat) {
+      // Create a new comdat group using the name of the global variable as
+      // opposed to using the comdat group of the vtable.
+      Comdat *C = M->getOrInsertComdat(GroupName);
+      // For ELF, when not using COMDAT, put the vtable profile data into a
+      // nodeduplicate COMDAT which is lowered to a zero-flag zero group.
+      // This allows -z -start-top-gc to discard the entire group when the
+      // vtable def is discarded.
+      if (!NeedComdat)
+        C->setSelectionKind(Comdat::NoDeduplicate);
+      GV->setComdat(C);
+      // COFF doesn't allow the comdat group leader to have private linkage, so
+      // upgrade private linkage to internal linkage to produce a symbol table
+      // entry.
+      if (TT.isOSBinFormatCOFF() && GV->hasPrivateLinkage()) {
+        GV->setLinkage(GlobalValue::InternalLinkage);
+      }
+      return;
+    }
+  };
+
+  MaybeSetComdat(Data, Data->getName());
+
+  VTableDataMap[GV] = Data;
+
+  ReferencedVTableNames.push_back(GV);
+
+  // VTable <Hash, Addr> is used by runtime but not referenced by other
+  // sections. Conservatively mark it linker retained.
+  UsedVars.push_back(Data);
+}
+
 GlobalVariable *
 InstrProfiling::getOrCreateRegionCounters(InstrProfInstBase *Inc) {
   GlobalVariable *NamePtr = Inc->getName();
@@ -1264,6 +1401,34 @@ void InstrProfiling::emitNameData() {
     NamePtr->eraseFromParent();
 }
 
+void InstrProfiling::emitVTableNames() {
+  if (ReferencedVTableNames.empty())
+    return;
+
+  // Collect VTable
+  std::string CompressedVTableNames;
+  if (Error E =
+          collectVTableStrings(ReferencedVTableNames, CompressedVTableNames,
+                               DoInstrProfNameCompression)) {
+    report_fatal_error(Twine(toString(std::move(E))), false);
+  }
+
+  auto &Ctx = M->getContext();
+  auto *VTableNamesVal = ConstantDataArray::getString(
+      Ctx, StringRef(CompressedVTableNames), false /* AddNull */);
+  GlobalVariable *VTableNamesVar =
+      new GlobalVariable(*M, VTableNamesVal->getType(), true /* constant */,
+                         GlobalValue::PrivateLinkage, VTableNamesVal,
+                         getInstrProfVTableNamesVarName());
+  VTableNamesVar->setSection(
+      getInstrProfSectionName(IPSK_vname, TT.getObjectFormat()));
+  VTableNamesVar->setAlignment(Align(1));
+  // Make VTableNames linker retained.
+  UsedVars.push_back(VTableNamesVar);
+
+  // FIXME: Why emitNames call erase method?
+}
+
 void InstrProfiling::emitRegistration() {
   if (!needsRuntimeRegistrationOfSectionRange(TT))
     return;
diff --git a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
index 1fe9c57e550a412..3b2c78eebf40d9e 100644
--- a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
+++ b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
@@ -585,6 +585,7 @@ template <class Edge, class BBInfo> class FuncPGOInstrumentation {
       NumOfPGOMemIntrinsics += ValueSites[IPVK_MemOPSize].size();
       NumOfPGOBB += MST.BBInfos.size();
       ValueSites[IPVK_IndirectCallTarget] = VPC.get(IPVK_IndirectCallTarget);
+      ValueSites[IPVK_VTableTarget] = VPC.get(IPVK_VTableTarget);
     } else {
       NumOfCSPGOSelectInsts += SIVisitor.getNumOfSelectInsts();
       NumOfCSPGOMemIntrinsics += ValueSites[IPVK_MemOPSize].size();
diff --git a/llvm/lib/Transforms/Instrumentation/ValueProfilePlugins.inc b/llvm/lib/Transforms/Instrumentation/ValueProfilePlugins.inc
index 3a129de1acd02d6..59d51a59c48b735 100644
--- a/llvm/lib/Transforms/Instrumentation/ValueProfilePlugins.inc
+++ b/llvm/lib/Transforms/Instrumentation/ValueProfilePlugins.inc
@@ -90,9 +90,33 @@ public:
   }
 };
 
+///------------------------ VirtualTableValueProfilingPlugin ------------------------
+class VTableProfilingPlugin {
+  Function &F;
+
+public:
+  static constexpr InstrProfValueKind Kind = IPVK_VTableTarget;
+
+  VTableProfilingPlugin(Function &Fn, TargetLibraryInfo &TLI) : F(Fn) {}
+
+  void run(std::vector<CandidateInfo> &Candidates) {
+    std::vector<Instruction *> Result = findVTableAddrs(F);
+    for (Instruction* I : Result) {
+      Instruction* InsertPt = I->getNextNonDebugInstruction();
+      while(InsertPt && dyn_cast<PHINode>(InsertPt))
+        InsertPt = InsertPt->getNextNonDebugInstruction();
+      assert(InsertPt);
+
+      Instruction *AnnotatedInst = I;
+      Candidates.emplace_back(CandidateInfo{I, InsertPt, AnnotatedInst});
+    }
+  }
+};
+
 ///----------------------- Registration of the plugins -------------------------
 /// For now, registering a plugin with the ValueProfileCollector is done by
 /// adding the plugin type to the VP_PLUGIN_LIST macro.
 #define VP_PLUGIN_LIST           \
     MemIntrinsicPlugin,          \
-    IndirectCallPromotionPlugin
+    IndirectCallPromotionPlugin, \
+    VTableProfilingPlugin 
diff --git a/llvm/test/Transforms/PGOProfile/Inputs/update_vtable_value_prof_inputs.sh b/llvm/test/Transforms/PGOProfile/Inputs/update_vtable_value_prof_inputs.sh
new file mode 100644
index 000000000000000..1b35ea0303d15d2
--- /dev/null
+++ b/llvm/test/Transforms/PGOProfile/Inputs/update_vtable_value_prof_inputs.sh
@@ -0,0 +1,84 @@
+#!/bin/bash
+
+if [ $# -lt 2 ]; then
+  echo "Path to clang++ and llvm-profdata required!"
+  echo "Usage: update_vtable_value_prof_inputs.sh /path/to/updated/clang++ /path/to/updated/llvm-profdata"
+  exit 1
+else
+  CLANG=$1
+  LLVMPROFDATA=$2
+fi
+
+OUTDIR=$(dirname $(realpath -s $0))
+
+echo "Outdir is $OUTDIR"
+
+cat > ${OUTDIR}/vtable_prof.cc << EOF
+#include <cstdlib>
+#include <cstdio>
+
+class Base {
+ public:
+  virtual int func1(int a, int b) = 0;
+  virtual int func2(int a, int b) = 0;
+};
+
+class Derived1 : public Base {
+    public:
+    __attribute__((noinline))
+    int func1(int a, int b) override
+    {
+        return a + b;
+    }
+
+    __attribute__((noinline))
+    int func2(int a, int b) override {
+        return a * b;
+    }
+};
+
+class Derived2 : public Base {
+    public:
+    
+    __attribute__((noinline))
+    int func1(int a, int b) override {
+        return a - b;
+    }
+
+    __attribute__((noinline))
+    int func2(int a, int b) override {
+        return a * (a - b);
+    }
+};
+
+__attribute__((noinline)) Base* createType(int a) {
+    Base* base = nullptr;
+    if (a % 4 == 0)
+      base = new Derived1();
+    else
+      base = new Derived2();
+    return base;
+}
+
+
+int main(int argc, char** argv) {
+    int sum = 0;
+    for (int i = 0; i < 1000; i++) {
+        int a = rand();
+        int b = rand();
+        Base* ptr = createType(i);
+        sum += ptr->func1(a, b) + ptr->func2(b, a);
+    }
+    printf("sum is %d\n", sum);
+    return 0;
+}
+EOF
+
+FLAGS="-fuse-ld=lld -O2 -g -fprofile-generate=. -flto=thin -Xclang -fwhole-program-vtables -Wl,--lto-whole-program-visibility"
+
+${CLANG} ${FLAGS} ${OUTDIR}/vtable_prof.cc -o ${OUTDIR}/vtable_prof
+env LLVM_PROFILE_FILE=${OUTDIR}/vtable_prof.profraw ${OUTDIR}/vtable_prof
+
+rm ${OUTDIR}/vtable_prof
+rm ${OUTDIR}/vtable_prof.cc
+
diff --git a/llvm/test/Transforms/PGOProfile/comdat_internal.ll b/llvm/test/Transforms/PGOProfile/comdat_internal.ll
index 1c44a274f3c0471..990b246ce07a8c1 100644
--- a/llvm/test/Transforms/PGOProfile/comdat_internal.ll
+++ b/llvm/test/Transforms/PGOProfile/comdat_internal.ll
@@ -13,9 +13,9 @@ $foo = comdat any
 ; CHECK: @__llvm_profile_raw_version = hidden constant i64 {{[0-9]+}}, comdat
 ; CHECK-NOT: __profn__stdin__foo
 ; CHECK: @__profc__stdin__foo.[[#FOO_HASH]] = private global [1 x i64] zeroinitializer, section "__llvm_prf_cnts", comdat, align 8
-; CHECK: @__profd__stdin__foo.[[#FOO_HASH]] = private global { i64, i64, i64, ptr, ptr, i32, [2 x i16] } { i64 {{.*}}, i64 [[#FOO_HASH]], i64 sub (i64 ptrtoint (ptr @__profc__stdin__foo.742261418966908927 to i64), i64 ptrtoint (ptr @__profd__stdin__foo.742261418966908927 to i64)), ptr null
+; CHECK: @__profd__stdin__foo.[[#FOO_HASH]] = private global { i64, i64, i64, ptr, ptr, i32, [3 x i16] } { i64 {{.*}}, i64 [[#FOO_HASH]], i64 sub (i64 ptrtoint (ptr @__profc__stdin__foo.742261418966908927 to i64), i64 ptrtoint (ptr @__profd__stdin__foo.742261418966908927 to i64)), ptr null
 ; CHECK-NOT: @foo
-; CHECK-SAME: , ptr null, i32 1, [2 x i16] zeroinitializer }, section "__llvm_prf_data", comdat($__profc__stdin__foo.[[#FOO_HASH]]), align 8
+; CHECK-SAME: , ptr null, i32 1, [3 x i16] zeroinitializer }, section "__llvm_prf_data", comdat($__profc__stdin__foo.[[#FOO_HASH]]), align 8
 ; CHECK: @__llvm_prf_nm
 ; CHECK: @llvm.compiler.used
 
diff --git a/llvm/test/Transforms/PGOProfile/indirect_call_profile_funclet.ll b/llvm/test/Transforms/PGOProfile/indirect_call_profile_funclet.ll
index 42018e4d5e54dd2..63173ab59b28ba2 100644
--- a/llvm/test/Transforms/PGOProfile/indirect_call_profile_funclet.ll
+++ b/llvm/test/Transforms/PGOProfile/indirect_call_profile_funclet.ll
@@ -54,11 +54,12 @@ try.cont:                                         ; preds = %catch, %entry
 }
 
 ; GEN: catch:
-; GEN: call void @llvm.instrprof.value.profile(
+; GEN: call void @llvm.instrprof.value.profile({{.*}} i32 2, i32 0)
+; GEN: call void @llvm.instrprof.value.profile({{.*}} i32 0, i32 0)
 ; GEN-SAME: [ "funclet"(token %tmp1) ]
 
 ; LOWER: catch:
-; LOWER: call void @__llvm_profile_instrument_target(
+; LOWER: call void @__llvm_profile_instrument_target({{.*}} i32 0)
 ; LOWER-SAME: [ "funclet"(token %tmp1) ]
 
 declare dso_local void @"?may_throw@@YAXH at Z"(i32)
diff --git a/llvm/test/Transforms/PGOProfile/vtable_profile.ll b/llvm/test/Transforms/PGOProfile/vtable_profile.ll
new file mode 100644
index 000000000000000..0c6bd3c064d35fc
--- /dev/null
+++ b/llvm/test/Transforms/PGOProfile/vtable_profile.ll
@@ -0,0 +1,139 @@
+; RUN: opt < %s -passes=pgo-instr-gen -S | FileCheck %s --check-prefix=GEN
+; RUN: opt < %s -passes=pgo-instr-gen,instrprof -S | FileCheck %s --check-prefix=LOWER
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+$_ZTV7Derived = comdat any
+
+ at _ZTV7Derived = constant { [3 x ptr], [3 x ptr] } { [3 x ptr] [ptr null, ptr @_ZTI7Derived, ptr @_ZN5Base15func1Eii], [3 x ptr] [ptr inttoptr (i64 -8 to ptr), ptr @_ZTI7Derived, ptr @_ZN5Base25func2Eii] }, comdat, align 8, !type !0, !type !1, !type !2, !type !3, !type !4, !type !5, !type !6, !type !7, !type !8
+ at _ZTVN10__cxxabiv121__vmi_class_type_infoE = external global [0 x ptr]
+ at _ZTS7Derived = constant [9 x i8] c"7Derived\00", align 1
+ at _ZTI5Base1 = external constant ptr
+ at _ZTI5Base2 = external constant ptr
+ at _ZTI7Derived =  constant { ptr, ptr, i32, i32, ptr, i64, ptr, i64 } { ptr getelementptr inbounds (ptr, ptr @_ZTVN10__cxxabiv121__vmi_class_type_infoE, i64 2), ptr @_ZTS7Derived, i32 0, i32 2, ptr @_ZTI5Base1, i64 2, ptr @_ZTI5Base2, i64 2050 }, align 8
+ at _ZTV5Base1 = constant { [3 x ptr] } { [3 x ptr] [ptr null, ptr @_ZTI5Base1, ptr @_ZN5Base15func1Eii] }, align 8, !type !0, !type !1
+ at _ZTV5Base2 = constant { [3 x ptr] } { [3 x ptr] [ptr null, ptr @_ZTI5Base2, ptr @_ZN5Base25func2Eii] }, align 8, !type !9, !type !4
+ at llvm.compiler.used = appending global [2 x ptr] [ptr @_ZTV5Base1, ptr @_ZTV5Base2], section "llvm.metadata"
+
+declare ptr @_Z10createTypei(i32)
+declare i32 @_ZN5Base15func1Eii(ptr, i32, i32)
+declare i32 @_ZN5Base25func2Eii(ptr, i32, i32)
+
+; GEN: @__llvm_profile_raw_version = hidden constant i64 72057594037927945, comdat
+; GEN: @__profn_test_vtable_value_profiling = private constant [27 x i8] c"test_vtable_value_profiling"
+
+; LOWER: $__profvt__ZTV7Derived = comdat any
+; LOWER: $__profvt__ZTV5Base1 = comdat nodeduplicate
+; LOWER: $__profvt__ZTV5Base2 = comdat nodeduplicate
+; LOWER: @__llvm_profile_raw_version = hidden constant i64 72057594037927945, comdat
+; LOWER: @__profc_test_vtable_value_profiling = private global [1 x i64] zeroinitializer, section "__llvm_prf_cnts", comdat, align 8
+; LOWER: @__profvp_test_vtable_value_profiling = private global [4 x i64] zeroinitializer, section "__llvm_prf_vals", comdat($__profc_test_vtable_value_profiling), align 8
+; LOWER: @__profd_test_vtable_value_profiling = private global { i64, i64, i64, ptr, ptr, i32, [3 x i16] } { i64 1593873508557585901, i64 567090795815895039, i64 sub (i64 ptrtoint (ptr @__profc_test_vtable_value_profiling to i64), i64 ptrtoint (ptr @__profd_test_vtable_value_profiling to i64)), ptr @test_vtable_value_profiling.local, ptr @__profvp_test_vtable_value_profiling, i32 1, [3 x i16] [i16 2, i16 0, i16 2] }, section "__llvm_prf_data", comdat($__profc_test_vtable_value_profiling), align 8
+; LOWER: @__profvt__ZTV7Derived = global { i64, ptr, i32 } { i64 -4576307468236080025, ptr @_ZTV7Derived, i32 48 }, section "__llvm_prf_vtab", comdat, align 8
+; LOWER: @__profvt__ZTV5Base1 = global { i64, ptr, i32 } { i64 3215870116411581797, ptr @_ZTV5Base1, i32 24 }, section "__llvm_prf_vtab", comdat, align 8
+; LOWER: @__profvt__ZTV5Base2 = global { i64, ptr, i32 } { i64 8378219803387680050, ptr @_ZTV5Base2, i32 24 }, section "__llvm_prf_vtab", comdat, align 8
+; LOWER: @__llvm_prf_vnodes = private global [10 x { i64, i64, ptr }] zeroinitializer, section "__llvm_prf_vnds", align 8
+; LOWER: @__llvm_prf_nm = private constant [37 x i8] c"\1B#x\DA+I-.\89/+IL\CAI\8D/K\CC)M\8D/(\CAO\CB\CC\C9\CCK\07\00\9Ea\0BC", section "__llvm_prf_names", align 1
+; LOWER: @__llvm_prf_vnm = private constant [34 x i8] c"\22 x\DA\8B\8F\0A\093wI-\CA,KMa\8C\07rL\9D\12\8BS\0D\11L#\00\C3\A2\0A\E9", section "__llvm_prf_vnames", align 1
+; LOWER: @llvm.used = appending global [6 x ptr] [ptr @__profvt__ZTV7Derived, ptr @__profvt__ZTV5Base1, ptr @__profvt__ZTV5Base2, ptr @__llvm_prf_vnodes, ptr @__llvm_prf_nm, ptr @__llvm_prf_vnm], section "llvm.metadata"
+
+define i32 @test_vtable_value_profiling(i32 %a, i32 %b, i32 %c) {
+; GEN-LABEL: define i32 @test_vtable_value_profiling(
+; GEN-SAME: i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]]) {
+; GEN-NEXT:  entry:
+; GEN-NEXT:    call void @llvm.instrprof.increment(ptr @__profn_test_vtable_value_profiling, i64 567090795815895039, i32 1, i32 0)
+; GEN-NEXT:    [[CALL:%.*]] = tail call ptr @_Z10createTypei(i32 [[C]])
+; GEN-NEXT:    [[ADD_PTR:%.*]] = getelementptr inbounds i8, ptr [[CALL]], i64 8
+; GEN-NEXT:    [[VTABLE:%.*]] = load ptr, ptr [[ADD_PTR]], align 8
+; GEN-NEXT:    [[TMP0:%.*]] = ptrtoint ptr [[VTABLE]] to i64
+; GEN-NEXT:    call void @llvm.instrprof.value.profile(ptr @__profn_test_vtable_value_profiling, i64 567090795815895039, i64 [[TMP0]], i32 2, i32 0)
+; GEN-NEXT:    [[VFUNC:%.*]] = load ptr, ptr [[VTABLE]], align 8
+; GEN-NEXT:    [[TMP1:%.*]] = ptrtoint ptr [[VFUNC]] to i64
+; GEN-NEXT:    call void @llvm.instrprof.value.profile(ptr @__profn_test_vtable_value_profiling, i64 567090795815895039, i64 [[TMP1]], i32 0, i32 0)
+; GEN-NEXT:    [[CALL1:%.*]] = tail call i32 [[VFUNC]](ptr [[ADD_PTR]], i32 [[A]], i32 [[B]])
+; GEN-NEXT:    [[VTABLE2:%.*]] = load ptr, ptr [[CALL]], align 8
+; GEN-NEXT:    [[TMP2:%.*]] = ptrtoint ptr [[VTABLE2]] to i64
+; GEN-NEXT:    call void @llvm.instrprof.value.profile(ptr @__profn_test_vtable_value_profiling, i64 567090795815895039, i64 [[TMP2]], i32 2, i32 1)
+; GEN-NEXT:    [[VFUNC2:%.*]] = load ptr, ptr [[VTABLE2]], align 8
+; GEN-NEXT:    [[TMP3:%.*]] = ptrtoint ptr [[VFUNC2]] to i64
+; GEN-NEXT:    call void @llvm.instrprof.value.profile(ptr @__profn_test_vtable_value_profiling, i64 567090795815895039, i64 [[TMP3]], i32 0, i32 1)
+; GEN-NEXT:    [[CALL4:%.*]] = tail call i32 [[VFUNC2]](ptr [[CALL]], i32 [[B]], i32 [[A]])
+; GEN-NEXT:    [[ADD:%.*]] = add nsw i32 [[CALL4]], [[CALL1]]
+; GEN-NEXT:    ret i32 [[ADD]]
+;
+; LOWER-LABEL: define i32 @test_vtable_value_profiling(
+; LOWER-SAME: i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]]) {
+; LOWER-NEXT:  entry:
+; LOWER-NEXT:    [[PGOCOUNT:%.*]] = load i64, ptr @__profc_test_vtable_value_profiling, align 8
+; LOWER-NEXT:    [[TMP0:%.*]] = add i64 [[PGOCOUNT]], 1
+; LOWER-NEXT:    store i64 [[TMP0]], ptr @__profc_test_vtable_value_profiling, align 8
+; LOWER-NEXT:    [[CALL:%.*]] = tail call ptr @_Z10createTypei(i32 [[C]])
+; LOWER-NEXT:    [[ADD_PTR:%.*]] = getelementptr inbounds i8, ptr [[CALL]], i64 8
+; LOWER-NEXT:    [[VTABLE:%.*]] = load ptr, ptr [[ADD_PTR]], align 8
+; LOWER-NEXT:    [[TMP1:%.*]] = ptrtoint ptr [[VTABLE]] to i64
+; LOWER-NEXT:    call void @__llvm_profile_instrument_target(i64 [[TMP1]], ptr @__profd_test_vtable_value_profiling, i32 2)
+; LOWER-NEXT:    [[VFUNC:%.*]] = load ptr, ptr [[VTABLE]], align 8
+; LOWER-NEXT:    [[TMP2:%.*]] = ptrtoint ptr [[VFUNC]] to i64
+; LOWER-NEXT:    call void @__llvm_profile_instrument_target(i64 [[TMP2]], ptr @__profd_test_vtable_value_profiling, i32 0)
+; LOWER-NEXT:    [[CALL1:%.*]] = tail call i32 [[VFUNC]](ptr [[ADD_PTR]], i32 [[A]], i32 [[B]])
+; LOWER-NEXT:    [[VTABLE2:%.*]] = load ptr, ptr [[CALL]], align 8
+; LOWER-NEXT:    [[TMP3:%.*]] = ptrtoint ptr [[VTABLE2]] to i64
+; LOWER-NEXT:    call void @__llvm_profile_instrument_target(i64 [[TMP3]], ptr @__profd_test_vtable_value_profiling, i32 3)
+; LOWER-NEXT:    [[VFUNC2:%.*]] = load ptr, ptr [[VTABLE2]], align 8
+; LOWER-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[VFUNC2]] to i64
+; LOWER-NEXT:    call void @__llvm_profile_instrument_target(i64 [[TMP4]], ptr @__profd_test_vtable_value_profiling, i32 1)
+; LOWER-NEXT:    [[CALL4:%.*]] = tail call i32 [[VFUNC2]](ptr [[CALL]], i32 [[B]], i32 [[A]])
+; LOWER-NEXT:    [[ADD:%.*]] = add nsw i32 [[CALL4]], [[CALL1]]
+; LOWER-NEXT:    ret i32 [[ADD]]
+;
+entry:
+  %call = tail call ptr @_Z10createTypei(i32 %c)
+  %add.ptr = getelementptr inbounds i8, ptr %call, i64 8
+  %vtable = load ptr, ptr %add.ptr, align 8
+  %vfunc = load ptr, ptr %vtable, align 8
+  %call1 = tail call i32 %vfunc(ptr %add.ptr, i32 %a, i32 %b)
+  %vtable2 = load ptr, ptr %call, align 8
+  %vfunc2 = load ptr, ptr %vtable2, align 8
+  %call4 = tail call i32 %vfunc2(ptr %call, i32 %b, i32 %a)
+  %add = add nsw i32 %call4, %call1
+  ret i32 %add
+}
+
+!0 = !{i64 16, !"_ZTS5Base1"}
+!1 = !{i64 16, !"_ZTSM5Base1FiiiE.virtual"}
+!2 = !{i64 40, !"_ZTSM5Base1FiiiE.virtual"}
+!3 = !{i64 40, !"_ZTS5Base2"}
+!4 = !{i64 16, !"_ZTSM5Base2FiiiE.virtual"}
+!5 = !{i64 40, !"_ZTSM5Base2FiiiE.virtual"}
+!6 = !{i64 16, !"_ZTS7Derived"}
+!7 = !{i64 16, !"_ZTSM7DerivedFiiiE.virtual"}
+!8 = !{i64 40, !"_ZTSM7DerivedFiiiE.virtual"}
+!9 = !{i64 16, !"_ZTS5Base2"}
+;.
+; GEN: attributes #[[ATTR0:[0-9]+]] = { nounwind }
+;.
+; LOWER: attributes #[[ATTR0:[0-9]+]] = { nounwind }
+;.
+; GEN: [[META0:![0-9]+]] = !{i64 16, !"_ZTS5Base1"}
+; GEN: [[META1:![0-9]+]] = !{i64 16, !"_ZTSM5Base1FiiiE.virtual"}
+; GEN: [[META2:![0-9]+]] = !{i64 40, !"_ZTSM5Base1FiiiE.virtual"}
+; GEN: [[META3:![0-9]+]] = !{i64 40, !"_ZTS5Base2"}
+; GEN: [[META4:![0-9]+]] = !{i64 16, !"_ZTSM5Base2FiiiE.virtual"}
+; GEN: [[META5:![0-9]+]] = !{i64 40, !"_ZTSM5Base2FiiiE.virtual"}
+; GEN: [[META6:![0-9]+]] = !{i64 16, !"_ZTS7Derived"}
+; GEN: [[META7:![0-9]+]] = !{i64 16, !"_ZTSM7DerivedFiiiE.virtual"}
+; GEN: [[META8:![0-9]+]] = !{i64 40, !"_ZTSM7DerivedFiiiE.virtual"}
+; GEN: [[META9:![0-9]+]] = !{i64 16, !"_ZTS5Base2"}
+;.
+; LOWER: [[META0:![0-9]+]] = !{i64 16, !"_ZTS5Base1"}
+; LOWER: [[META1:![0-9]+]] = !{i64 16, !"_ZTSM5Base1FiiiE.virtual"}
+; LOWER: [[META2:![0-9]+]] = !{i64 40, !"_ZTSM5Base1FiiiE.virtual"}
+; LOWER: [[META3:![0-9]+]] = !{i64 40, !"_ZTS5Base2"}
+; LOWER: [[META4:![0-9]+]] = !{i64 16, !"_ZTSM5Base2FiiiE.virtual"}
+; LOWER: [[META5:![0-9]+]] = !{i64 40, !"_ZTSM5Base2FiiiE.virtual"}
+; LOWER: [[META6:![0-9]+]] = !{i64 16, !"_ZTS7Derived"}
+; LOWER: [[META7:![0-9]+]] = !{i64 16, !"_ZTSM7DerivedFiiiE.virtual"}
+; LOWER: [[META8:![0-9]+]] = !{i64 40, !"_ZTSM7DerivedFiiiE.virtual"}
+; LOWER: [[META9:![0-9]+]] = !{i64 16, !"_ZTS5Base2"}
+;.
diff --git a/llvm/test/tools/llvm-profdata/Inputs/c-general.profraw b/llvm/test/tools/llvm-profdata/Inputs/c-general.profraw
index bc8fc5db1cb154d98ca962e84313463e3298cb92..a065c4dd7b9ce20f631c1228147c97dc46dae8bb 100644
GIT binary patch
delta 184
zcmeC+`@zTASd^AuRFLzZb0Vib$GLNibNIp-7$!F`Nlc6|nB2g^F|k8{QDNdtc}9kb
z8^sv|CY}Vb9*Q$2O#BICO%`MnXS^|4lM%#r1hOqAM}pafKz7IEPB42Rqd3!nzY}N4
z0o7b&6zBZ#_dgggOg_jgKbeC`fa${DiCJ<W+a_Bu32^R!%NGLWGnfQ`CRH-YPmW*`
K*t~!#g9QKz%0Qq1

delta 226
zcmeyt*TKixSd^AuRFLzZV<M+LCwGqn0|+oqOf(m4Udy`lqjO(I&}GvIF&jY_jforO
zC*BZXOqjS)d=evvVCa+zP1gCUanf@<V@;ADUZ|L?$S6PAgHeF-2T;Z20-zFmUzxmw
zcE{>#|HF2)MP+VFm^=}vWCu{m4j at Z>@&llnuTyPVyaQr|<1ZS8=|7iS^J6k2ll){2
zCIKdcf0M14<b-NpJU>z}?X}aSS0831wSTCP_&2diZt at JE0*-%^S24*6MxEwya$o3I
Va?uU!c!md)A2P{r=3p*h0RV;ZQW5|F

diff --git a/llvm/test/tools/llvm-profdata/Inputs/vtable-prof.proftext b/llvm/test/tools/llvm-profdata/Inputs/vtable-prof.proftext
new file mode 100644
index 000000000000000..bf0ad6beb9825b8
--- /dev/null
+++ b/llvm/test/tools/llvm-profdata/Inputs/vtable-prof.proftext
@@ -0,0 +1,73 @@
+# IR level Instrumentation Flag
+:ir
+_Z10createTypei
+# Func Hash:
+146835647075900052
+# Num Counters:
+2
+# Counter Values:
+750
+250
+
+_ZN8Derived15func1Eii
+# Func Hash:
+742261418966908927
+# Num Counters:
+1
+# Counter Values:
+250
+
+_ZN8Derived15func2Eii
+# Func Hash:
+742261418966908927
+# Num Counters:
+1
+# Counter Values:
+250
+
+_ZN8Derived25func1Eii
+# Func Hash:
+742261418966908927
+# Num Counters:
+1
+# Counter Values:
+750
+
+_ZN8Derived25func2Eii
+# Func Hash:
+742261418966908927
+# Num Counters:
+1
+# Counter Values:
+750
+
+main
+# Func Hash:
+1124236338992350536
+# Num Counters:
+2
+# Counter Values:
+1000
+1
+# Num Value Kinds:
+2
+# ValueKind = IPVK_IndirectCallTarget:
+0
+# NumValueSites:
+2
+2
+_ZN8Derived25func1Eii:750
+_ZN8Derived15func1Eii:250
+2
+_ZN8Derived25func2Eii:750
+_ZN8Derived15func2Eii:250
+# ValueKind = IPVK_VTableTarget:
+2
+# NumValueSites:
+2
+2
+_ZTV8Derived2:750
+_ZTV8Derived1:250
+2
+_ZTV8Derived2:750
+_ZTV8Derived1:250
\ No newline at end of file
diff --git a/llvm/test/tools/llvm-profdata/Inputs/vtable_prof.profraw b/llvm/test/tools/llvm-profdata/Inputs/vtable_prof.profraw
new file mode 100644
index 0000000000000000000000000000000000000000..bc52463abfb388a4bb7873ebc503726d0ef78ccd
GIT binary patch
literal 816
zcmZoHO3N=Q$obF700xW at ih+R*#(>fsXncDp|G<9;XkF}No*c at +AOjU=LZhQ+^;o+Z
z?TnjpE6Bob6(<u+Ux0=)SRc%Im_8W)*VW$VW+_u|d$d~S{u7(UKLcvt1em_f5d9#1
zFbvY$H&IVgWp>j9pz`;ZxqP__VEQys?Sp$5YT-hLgPJ$_ at 7;vybAakwKtSKa@@n4Z
zZ>;?=eF9K at 8wlu|{As3#rP1CVn7#-9AQ}%4(Dx1+P`{w=e!&c3!2AOfhtV+gFgnM!
z;#PORk%@<%m#>Z|?@AqyvngkM&zwo_>HoC3z*F~>j<>I&iLdrqZ?lrKXO4)NItiN|
zT6K+MofyNli7Go7j<4KY|LlfI(+?X+cq%~MF- at h$YkkyXR|i{EacL=#eO#Pyvr~H9
zA4xJY)N64ng0!vxVp!aR;}Oc401XG2KVabr^9L+mVDhkVgUN%<0+|gpj|1v1nEC)H
JAEus=JOHD*$_M}e

literal 0
HcmV?d00001

diff --git a/llvm/test/tools/llvm-profdata/binary-ids-padding.test b/llvm/test/tools/llvm-profdata/binary-ids-padding.test
index 67db5c98ef323a7..891aac0be4c6033 100644
--- a/llvm/test/tools/llvm-profdata/binary-ids-padding.test
+++ b/llvm/test/tools/llvm-profdata/binary-ids-padding.test
@@ -8,10 +8,12 @@
 // INSTR_PROF_RAW_HEADER(uint64_t, NamesSize,  NamesSize)
 // INSTR_PROF_RAW_HEADER(uint64_t, CountersDelta, (uintptr_t)CountersBegin)
 // INSTR_PROF_RAW_HEADER(uint64_t, NamesDelta, (uintptr_t)NamesBegin)
+// INSTR_PROF_RAW_HEADER(uint64_t, VNamesSize, VNamesSize)
+// INSTR_PROF_RAW_HEADER(uint64_t, NumVTables, NumVTables)
 // INSTR_PROF_RAW_HEADER(uint64_t, ValueKindLast, IPVK_Last)
 
 RUN: printf '\201rforpl\377' > %t.profraw
-RUN: printf '\10\0\0\0\0\0\0\0' >> %t.profraw
+RUN: printf '\11\0\0\0\0\0\0\0' >> %t.profraw
 // There will be 2 20-byte binary IDs, so the total Binary IDs size will be 64 bytes.
 //   2 * 8  binary ID sizes
 // + 2 * 20 binary IDs (of size 20)
@@ -27,6 +29,8 @@ RUN: printf '\20\0\0\0\0\0\0\0' >> %t.profraw
 RUN: printf '\0\0\4\0\1\0\0\0' >> %t.profraw
 RUN: printf '\0\0\4\0\2\0\0\0' >> %t.profraw
 RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
 
 // Binary IDs - There are only two in this case that are 20 bytes.
 RUN: printf '\24\0\0\0\0\0\0\0' >> %t.profraw
@@ -52,13 +56,15 @@ RUN: printf '\0\0\4\0\1\0\0\0' >> %t.profraw
 RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
 RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
 RUN: printf '\1\0\0\0\0\0\0\0' >> %t.profraw
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
 
 RUN: printf '\067\265\035\031\112\165\023\344' >> %t.profraw
 RUN: printf '\02\0\0\0\0\0\0\0' >> %t.profraw
-RUN: printf '\xd8\xff\3\0\1\0\0\0' >> %t.profraw
+RUN: printf '\xd0\xff\3\0\1\0\0\0' >> %t.profraw
 RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
 RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
 RUN: printf '\02\0\0\0\0\0\0\0' >> %t.profraw
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
 
 RUN: printf '\023\0\0\0\0\0\0\0' >> %t.profraw
 RUN: printf '\067\0\0\0\0\0\0\0' >> %t.profraw
diff --git a/llvm/test/tools/llvm-profdata/large-binary-id-size.test b/llvm/test/tools/llvm-profdata/large-binary-id-size.test
index 2394431e94de482..3362186e1011583 100644
--- a/llvm/test/tools/llvm-profdata/large-binary-id-size.test
+++ b/llvm/test/tools/llvm-profdata/large-binary-id-size.test
@@ -1,5 +1,5 @@
 RUN: printf '\201rforpl\377' > %t.profraw
-RUN: printf '\10\0\0\0\0\0\0\0' >> %t.profraw
+RUN: printf '\11\0\0\0\0\0\0\0' >> %t.profraw
 RUN: printf '\40\0\0\0\0\0\0\0' >> %t.profraw
 RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
 RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
@@ -9,6 +9,8 @@ RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
 RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
 RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
 RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
 
 // Check for a corrupted size being too large past the end of the file.
 RUN: printf '\7\7\7\7\7\7\7\7' >> %t.profraw
diff --git a/llvm/test/tools/llvm-profdata/malformed-not-space-for-another-header.test b/llvm/test/tools/llvm-profdata/malformed-not-space-for-another-header.test
index 06f418d0235d260..bfcf91a36f9403d 100644
--- a/llvm/test/tools/llvm-profdata/malformed-not-space-for-another-header.test
+++ b/llvm/test/tools/llvm-profdata/malformed-not-space-for-another-header.test
@@ -8,10 +8,12 @@
 // INSTR_PROF_RAW_HEADER(uint64_t, NamesSize,  NamesSize)
 // INSTR_PROF_RAW_HEADER(uint64_t, CountersDelta, (uintptr_t)CountersBegin)
 // INSTR_PROF_RAW_HEADER(uint64_t, NamesDelta, (uintptr_t)NamesBegin)
+// INSTR_PROF_RAW_HEADER(uint64_t, VNamesSize, VNamesSize)
+// INSTR_PROF_RAW_HEADER(uint64_t, NumVTables, NumVTables)
 // INSTR_PROF_RAW_HEADER(uint64_t, ValueKindLast, IPVK_Last)
 
 RUN: printf '\201rforpl\377' > %t.profraw
-RUN: printf '\10\0\0\0\0\0\0\0' >> %t.profraw
+RUN: printf '\11\0\0\0\0\0\0\0' >> %t.profraw
 RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
 RUN: printf '\1\0\0\0\0\0\0\0' >> %t.profraw
 RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
@@ -21,6 +23,8 @@ RUN: printf '\10\0\0\0\0\0\0\0' >> %t.profraw
 RUN: printf '\0\0\4\0\1\0\0\0' >> %t.profraw
 RUN: printf '\0\0\4\0\2\0\0\0' >> %t.profraw
 RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
 
 // Data Section
 //
@@ -36,6 +40,7 @@ RUN: printf '\0\0\4\0\1\0\0\0' >> %t.profraw
 RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
 RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
 RUN: printf '\1\0\0\0\0\0\0\0' >> %t.profraw
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
 
 RUN: printf '\023\0\0\0\0\0\0\0' >> %t.profraw
 RUN: printf '\3\0foo\0\0\0' >> %t.profraw
diff --git a/llvm/test/tools/llvm-profdata/malformed-num-counters-zero.test b/llvm/test/tools/llvm-profdata/malformed-num-counters-zero.test
index b718cf0fd8e9723..70ddca3dfd7de07 100644
--- a/llvm/test/tools/llvm-profdata/malformed-num-counters-zero.test
+++ b/llvm/test/tools/llvm-profdata/malformed-num-counters-zero.test
@@ -8,10 +8,12 @@
 // INSTR_PROF_RAW_HEADER(uint64_t, NamesSize,  NamesSize)
 // INSTR_PROF_RAW_HEADER(uint64_t, CountersDelta, (uintptr_t)CountersBegin)
 // INSTR_PROF_RAW_HEADER(uint64_t, NamesDelta, (uintptr_t)NamesBegin)
+// INSTR_PROF_RAW_HEADER(uint64_t, VNamesSize, VNamesSize)
+// INSTR_PROF_RAW_HEADER(uint64_t, NumVTables, NumVTables)
 // INSTR_PROF_RAW_HEADER(uint64_t, ValueKindLast, IPVK_Last)
 
 RUN: printf '\201rforpl\377' > %t.profraw
-RUN: printf '\10\0\0\0\0\0\0\0' >> %t.profraw
+RUN: printf '\11\0\0\0\0\0\0\0' >> %t.profraw
 RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
 RUN: printf '\1\0\0\0\0\0\0\0' >> %t.profraw
 RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
@@ -21,6 +23,8 @@ RUN: printf '\10\0\0\0\0\0\0\0' >> %t.profraw
 RUN: printf '\0\0\4\0\1\0\0\0' >> %t.profraw
 RUN: printf '\0\0\4\0\2\0\0\0' >> %t.profraw
 RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
 
 // Data Section
 //
@@ -37,6 +41,7 @@ RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
 RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
 // Make NumCounters = 0 so that we get "number of counters is zero" error message
 RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
 
 RUN: printf '\023\0\0\0\0\0\0\0' >> %t.profraw
 RUN: printf '\3\0foo\0\0\0' >> %t.profraw
diff --git a/llvm/test/tools/llvm-profdata/malformed-ptr-to-counter-array.test b/llvm/test/tools/llvm-profdata/malformed-ptr-to-counter-array.test
index 38e40334a6a690d..54f861e08970da1 100644
--- a/llvm/test/tools/llvm-profdata/malformed-ptr-to-counter-array.test
+++ b/llvm/test/tools/llvm-profdata/malformed-ptr-to-counter-array.test
@@ -8,10 +8,12 @@
 // INSTR_PROF_RAW_HEADER(uint64_t, NamesSize,  NamesSize)
 // INSTR_PROF_RAW_HEADER(uint64_t, CountersDelta, (uintptr_t)CountersBegin)
 // INSTR_PROF_RAW_HEADER(uint64_t, NamesDelta, (uintptr_t)NamesBegin)
+// INSTR_PROF_RAW_HEADER(uint64_t, VNamesSize, VNamesSize)
+// INSTR_PROF_RAW_HEADER(uint64_t, NumVTables, NumVTables)
 // INSTR_PROF_RAW_HEADER(uint64_t, ValueKindLast, IPVK_Last)
 
 RUN: printf '\201rforpl\377' > %t.profraw
-RUN: printf '\10\0\0\0\0\0\0\0' >> %t.profraw
+RUN: printf '\11\0\0\0\0\0\0\0' >> %t.profraw
 RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
 RUN: printf '\1\0\0\0\0\0\0\0' >> %t.profraw
 RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
@@ -21,6 +23,8 @@ RUN: printf '\10\0\0\0\0\0\0\0' >> %t.profraw
 RUN: printf '\0\0\6\0\1\0\0\0' >> %t.profraw
 RUN: printf '\0\0\6\0\2\0\0\0' >> %t.profraw
 RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
 
 // Data Section
 //
@@ -42,6 +46,7 @@ RUN: printf '\11\0\6\0\1\0\0\0' >> %t.profraw
 RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
 RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
 RUN: printf '\02\0\0\0\0\0\0\0' >> %t.profraw
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
 
 // Counter Section
 
diff --git a/llvm/test/tools/llvm-profdata/misaligned-binary-ids-size.test b/llvm/test/tools/llvm-profdata/misaligned-binary-ids-size.test
index 171b5cc60878f4c..489e06468ac7d82 100644
--- a/llvm/test/tools/llvm-profdata/misaligned-binary-ids-size.test
+++ b/llvm/test/tools/llvm-profdata/misaligned-binary-ids-size.test
@@ -1,5 +1,5 @@
 RUN: printf '\201rforpl\377' > %t.profraw
-RUN: printf '\10\0\0\0\0\0\0\0' >> %t.profraw
+RUN: printf '\11\0\0\0\0\0\0\0' >> %t.profraw
 // We should fail on this because the binary IDs is not a multiple of 8 bytes.
 RUN: printf '\77\0\0\0\0\0\0\0' >> %t.profraw
 RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
@@ -10,6 +10,8 @@ RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
 RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
 RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
 RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
 
 // Binary IDs - There are only two in this case that are 20 bytes.
 RUN: printf '\24\0\0\0\0\0\0\0' >> %t.profraw
diff --git a/llvm/test/tools/llvm-profdata/mismatched-raw-profile-header.test b/llvm/test/tools/llvm-profdata/mismatched-raw-profile-header.test
index 24f3f563e9689d6..0c9c046682ded84 100644
--- a/llvm/test/tools/llvm-profdata/mismatched-raw-profile-header.test
+++ b/llvm/test/tools/llvm-profdata/mismatched-raw-profile-header.test
@@ -12,6 +12,8 @@ RUN: printf '\0\0\0\0\0\0\0\20' >> %t
 RUN: printf '\0\0\0\1\0\4\0\0' >> %t
 RUN: printf '\0\0\0\2\0\4\0\0' >> %t
 RUN: printf '\0\0\0\0\0\0\0\0' >> %t
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t
 
 RUN: not llvm-profdata show %t -o /dev/null 2>&1 | FileCheck %s
 
diff --git a/llvm/test/tools/llvm-profdata/raw-32-bits-be.test b/llvm/test/tools/llvm-profdata/raw-32-bits-be.test
index c8e862009ef0284..f7ded4b2ed82497 100644
--- a/llvm/test/tools/llvm-profdata/raw-32-bits-be.test
+++ b/llvm/test/tools/llvm-profdata/raw-32-bits-be.test
@@ -1,5 +1,6 @@
+// Header
 RUN: printf '\377lprofR\201' > %t
-RUN: printf '\0\0\0\0\0\0\0\10' >> %t
+RUN: printf '\0\0\0\0\0\0\0\11' >> %t
 RUN: printf '\0\0\0\0\0\0\0\0' >> %t
 RUN: printf '\0\0\0\0\0\0\0\2' >> %t
 RUN: printf '\0\0\0\0\0\0\0\0' >> %t
@@ -9,6 +10,10 @@ RUN: printf '\0\0\0\0\0\0\0\20' >> %t
 RUN: printf '\0\0\0\0\1\0\0\0' >> %t
 RUN: printf '\0\0\0\0\2\0\0\0' >> %t
 RUN: printf '\0\0\0\0\0\0\0\0' >> %t
+// NumVTables
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t
+// ValueKindLast
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t
 
 RUN: printf '\134\370\302\114\333\030\275\254' >> %t
 RUN: printf '\0\0\0\0\0\0\0\1' >> %t
diff --git a/llvm/test/tools/llvm-profdata/raw-32-bits-le.test b/llvm/test/tools/llvm-profdata/raw-32-bits-le.test
index 523ff1ceb4807a3..2d95f5ba9cbd979 100644
--- a/llvm/test/tools/llvm-profdata/raw-32-bits-le.test
+++ b/llvm/test/tools/llvm-profdata/raw-32-bits-le.test
@@ -1,5 +1,5 @@
 RUN: printf '\201Rforpl\377' > %t
-RUN: printf '\10\0\0\0\0\0\0\0' >> %t
+RUN: printf '\11\0\0\0\0\0\0\0' >> %t
 RUN: printf '\0\0\0\0\0\0\0\0' >> %t
 RUN: printf '\2\0\0\0\0\0\0\0' >> %t
 RUN: printf '\0\0\0\0\0\0\0\0' >> %t
@@ -9,6 +9,10 @@ RUN: printf '\20\0\0\0\0\0\0\0' >> %t
 RUN: printf '\0\0\0\1\0\0\0\0' >> %t
 RUN: printf '\0\0\0\2\0\0\0\0' >> %t
 RUN: printf '\0\0\0\0\0\0\0\0' >> %t
+// NumVTables
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t
+// ValueKindLast
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t
 
 RUN: printf '\254\275\030\333\114\302\370\134' >> %t
 RUN: printf '\1\0\0\0\0\0\0\0' >> %t
diff --git a/llvm/test/tools/llvm-profdata/raw-64-bits-be.test b/llvm/test/tools/llvm-profdata/raw-64-bits-be.test
index b2b8b31dafbf5ac..05ca0cf98d7beb2 100644
--- a/llvm/test/tools/llvm-profdata/raw-64-bits-be.test
+++ b/llvm/test/tools/llvm-profdata/raw-64-bits-be.test
@@ -1,5 +1,6 @@
+// Header
 RUN: printf '\377lprofr\201' > %t
-RUN: printf '\0\0\0\0\0\0\0\10' >> %t
+RUN: printf '\0\0\0\0\0\0\0\11' >> %t
 RUN: printf '\0\0\0\0\0\0\0\0' >> %t
 RUN: printf '\0\0\0\0\0\0\0\2' >> %t
 RUN: printf '\0\0\0\0\0\0\0\0' >> %t
@@ -9,20 +10,28 @@ RUN: printf '\0\0\0\0\0\0\0\20' >> %t
 RUN: printf '\0\0\0\1\0\4\0\0' >> %t
 RUN: printf '\0\0\0\2\0\4\0\0' >> %t
 RUN: printf '\0\0\0\0\0\0\0\0' >> %t
+// NumVTables
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t
+// ValueKindLast
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t
 
+// InstrProfData
 RUN: printf '\134\370\302\114\333\030\275\254' >> %t
 RUN: printf '\0\0\0\0\0\0\0\1' >> %t
 RUN: printf '\0\0\0\1\0\4\0\0' >> %t
 RUN: printf '\0\0\0\0\0\0\0\0' >> %t
 RUN: printf '\0\0\0\0\0\0\0\0' >> %t
 RUN: printf '\0\0\0\1\0\0\0\0' >> %t
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t
 
+// InstrProfData
 RUN: printf '\344\023\165\112\031\035\265\067' >> %t
 RUN: printf '\0\0\0\0\0\0\0\02' >> %t
-RUN: printf '\0\0\0\1\0\3\xff\xd8' >> %t
+RUN: printf '\0\0\0\1\0\3\xff\xd0' >> %t
 RUN: printf '\0\0\0\0\0\0\0\0' >> %t
 RUN: printf '\0\0\0\0\0\0\0\0' >> %t
 RUN: printf '\0\0\0\02\0\0\0\0' >> %t
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t
 
 RUN: printf '\0\0\0\0\0\0\0\023' >> %t
 RUN: printf '\0\0\0\0\0\0\0\067' >> %t
diff --git a/llvm/test/tools/llvm-profdata/raw-64-bits-le.test b/llvm/test/tools/llvm-profdata/raw-64-bits-le.test
index 4e95798bc0afbda..d8e12ef705a2582 100644
--- a/llvm/test/tools/llvm-profdata/raw-64-bits-le.test
+++ b/llvm/test/tools/llvm-profdata/raw-64-bits-le.test
@@ -1,28 +1,53 @@
 RUN: printf '\201rforpl\377' > %t
-RUN: printf '\10\0\0\0\0\0\0\0' >> %t
+// Version
+RUN: printf '\11\0\0\0\0\0\0\0' >> %t
+// BinaryIdsSize
 RUN: printf '\0\0\0\0\0\0\0\0' >> %t
+// NumData
 RUN: printf '\2\0\0\0\0\0\0\0' >> %t
+// PaddingBytesBeforeCounters
 RUN: printf '\0\0\0\0\0\0\0\0' >> %t
+// NumCounters
 RUN: printf '\3\0\0\0\0\0\0\0' >> %t
+// PaddingBytesAfterCounters
 RUN: printf '\0\0\0\0\0\0\0\0' >> %t
+// NamesSize
 RUN: printf '\20\0\0\0\0\0\0\0' >> %t
+// CountersDelta
 RUN: printf '\0\0\4\0\1\0\0\0' >> %t
+// NamesDelta
 RUN: printf '\0\0\4\0\2\0\0\0' >> %t
+// VNamesSize
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t
+// NumVTables
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t
+// ValueKindLast
 RUN: printf '\0\0\0\0\0\0\0\0' >> %t
 
+// InstrProfData
+// NameRef
 RUN: printf '\254\275\030\333\114\302\370\134' >> %t
+// FuncHash
 RUN: printf '\1\0\0\0\0\0\0\0' >> %t
+// RelativeCounterPtr
 RUN: printf '\0\0\4\0\1\0\0\0' >> %t
+// FunctionPointer
 RUN: printf '\0\0\0\0\0\0\0\0' >> %t
+// Values
 RUN: printf '\0\0\0\0\0\0\0\0' >> %t
+// NumCounters + Int16ArrayTy
 RUN: printf '\1\0\0\0\0\0\0\0' >> %t
+// Int16ArrayTy + padding
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t
 
+// InstrProfData
 RUN: printf '\067\265\035\031\112\165\023\344' >> %t
 RUN: printf '\02\0\0\0\0\0\0\0' >> %t
-RUN: printf '\xd8\xff\3\0\1\0\0\0' >> %t
+RUN: printf '\xd0\xff\3\0\1\0\0\0' >> %t
 RUN: printf '\0\0\0\0\0\0\0\0' >> %t
 RUN: printf '\0\0\0\0\0\0\0\0' >> %t
 RUN: printf '\02\0\0\0\0\0\0\0' >> %t
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t
 
 RUN: printf '\023\0\0\0\0\0\0\0' >> %t
 RUN: printf '\067\0\0\0\0\0\0\0' >> %t
diff --git a/llvm/test/tools/llvm-profdata/raw-two-profiles.test b/llvm/test/tools/llvm-profdata/raw-two-profiles.test
index 8d46c91e2732cd9..5038b6af9232d39 100644
--- a/llvm/test/tools/llvm-profdata/raw-two-profiles.test
+++ b/llvm/test/tools/llvm-profdata/raw-two-profiles.test
@@ -1,5 +1,5 @@
 RUN: printf '\201rforpl\377' > %t-foo.profraw
-RUN: printf '\10\0\0\0\0\0\0\0' >> %t-foo.profraw
+RUN: printf '\11\0\0\0\0\0\0\0' >> %t-foo.profraw
 RUN: printf '\0\0\0\0\0\0\0\0' >> %t-foo.profraw
 RUN: printf '\1\0\0\0\0\0\0\0' >> %t-foo.profraw
 RUN: printf '\0\0\0\0\0\0\0\0' >> %t-foo.profraw
@@ -9,6 +9,8 @@ RUN: printf '\10\0\0\0\0\0\0\0' >> %t-foo.profraw
 RUN: printf '\0\0\4\0\1\0\0\0' >> %t-foo.profraw
 RUN: printf '\0\0\4\0\2\0\0\0' >> %t-foo.profraw
 RUN: printf '\0\0\0\0\0\0\0\0' >> %t-foo.profraw
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t-foo.profraw
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t-foo.profraw
 
 RUN: printf '\254\275\030\333\114\302\370\134' >> %t-foo.profraw
 RUN: printf '\1\0\0\0\0\0\0\0' >> %t-foo.profraw
@@ -16,12 +18,13 @@ RUN: printf '\0\0\4\0\1\0\0\0' >> %t-foo.profraw
 RUN: printf '\0\0\0\0\0\0\0\0' >> %t-foo.profraw
 RUN: printf '\0\0\0\0\0\0\0\0' >> %t-foo.profraw
 RUN: printf '\1\0\0\0\0\0\0\0' >> %t-foo.profraw
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t-foo.profraw
 
 RUN: printf '\023\0\0\0\0\0\0\0' >> %t-foo.profraw
 RUN: printf '\3\0foo\0\0\0' >> %t-foo.profraw
 
 RUN: printf '\201rforpl\377' > %t-bar.profraw
-RUN: printf '\10\0\0\0\0\0\0\0' >> %t-bar.profraw
+RUN: printf '\11\0\0\0\0\0\0\0' >> %t-bar.profraw
 RUN: printf '\0\0\0\0\0\0\0\0' >> %t-bar.profraw
 RUN: printf '\1\0\0\0\0\0\0\0' >> %t-bar.profraw
 RUN: printf '\0\0\0\0\0\0\0\0' >> %t-bar.profraw
@@ -31,6 +34,8 @@ RUN: printf '\10\0\0\0\0\0\0\0' >> %t-bar.profraw
 RUN: printf '\0\0\6\0\1\0\0\0' >> %t-bar.profraw
 RUN: printf '\0\0\6\0\2\0\0\0' >> %t-bar.profraw
 RUN: printf '\0\0\0\0\0\0\0\0' >> %t-bar.profraw
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t-bar.profraw
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t-bar.profraw
 
 RUN: printf '\067\265\035\031\112\165\023\344' >> %t-bar.profraw
 RUN: printf '\02\0\0\0\0\0\0\0' >> %t-bar.profraw
@@ -38,6 +43,7 @@ RUN: printf '\0\0\6\0\1\0\0\0' >> %t-bar.profraw
 RUN: printf '\0\0\0\0\0\0\0\0' >> %t-bar.profraw
 RUN: printf '\0\0\0\0\0\0\0\0' >> %t-bar.profraw
 RUN: printf '\02\0\0\0\0\0\0\0' >> %t-bar.profraw
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t-bar.profraw
 
 RUN: printf '\067\0\0\0\0\0\0\0' >> %t-bar.profraw
 RUN: printf '\101\0\0\0\0\0\0\0' >> %t-bar.profraw
diff --git a/llvm/test/tools/llvm-profdata/vtable-prof.proftext b/llvm/test/tools/llvm-profdata/vtable-prof.proftext
new file mode 100644
index 000000000000000..6ebe9b44266b6f2
--- /dev/null
+++ b/llvm/test/tools/llvm-profdata/vtable-prof.proftext
@@ -0,0 +1,16 @@
+RUN: llvm-profdata show --function=main --show-vtables %p/Inputs/vtable-prof.proftext | FileCheck %s --check-prefix=VTABLE
+
+VTABLE: Counters:
+VTABLE:  main:
+VTABLE:    Hash: 0x0f9a16fe6d398548
+VTABLE:    Counters: 2
+VTABLE:    VTable Results:
+VTABLE:	       [  0, _ZTV8Derived2,        750 ] (75.00%)
+VTABLE:	       [  0, _ZTV8Derived1,        250 ] (25.00%)
+VTABLE:	       [  1, _ZTV8Derived2,        750 ] (75.00%)
+VTABLE:	       [  1, _ZTV8Derived1,        250 ] (25.00%)
+VTABLE: Instrumentation level: IR  entry_first = 0
+VTABLE: Functions shown: 1
+VTABLE: Total functions: 6
+VTABLE: Maximum function count: 1000
+VTABLE: Maximum internal block count: 250
\ No newline at end of file
diff --git a/llvm/test/tools/llvm-profdata/vtable-value-prof-basic.test b/llvm/test/tools/llvm-profdata/vtable-value-prof-basic.test
new file mode 100644
index 000000000000000..9dd7fa288969075
--- /dev/null
+++ b/llvm/test/tools/llvm-profdata/vtable-value-prof-basic.test
@@ -0,0 +1,100 @@
+To update the inputs used below, run Inputs/update_vtable_value_prof_inputs.sh /path/to/updated/clang++ /path/to/updated/llvm-profdata
+
+Show profile data from raw profiles.
+RUN: llvm-profdata show --function=main --ic-targets --show-vtables %p/Inputs/vtable_prof.profraw | FileCheck %s --check-prefix=RAW
+
+Generate indexed profile from raw profile and show the data.
+RUN: llvm-profdata merge %p/Inputs/vtable_prof.profraw -o %t_indexed.profdata
+RUN: llvm-profdata show --function=main --ic-targets --show-vtables %t_indexed.profdata | FileCheck %s --check-prefix=INDEXED
+
+Generate text profile from raw profile and show the data.
+RUN: llvm-profdata merge --text %p/Inputs/vtable_prof.profraw -o %t.proftext
+RUN: llvm-profdata show --function=main --ic-targets --show-vtables --text %t.proftext | FileCheck %s --check-prefix=ICTEXT 
+
+RAW: Counters:
+RAW-NEXT:  main:
+RAW-NEXT:  Hash: 0x0f9a16fe6d398548
+RAW-NEXT:  Counters: 2
+RAW-NEXT:  Indirect Call Site Count: 2
+RAW-NEXT:  VTable Results:
+RAW-NEXT:	     [  0, _ZTV8Derived1,        250 ] (25.00%)
+RAW-NEXT:      [  0, _ZTV8Derived2,        750 ] (75.00%)
+RAW-NEXT:      [  1, _ZTV8Derived1,        250 ] (25.00%)
+RAW-NEXT:	     [  1, _ZTV8Derived2,        750 ] (75.00%)
+RAW-NEXT:  Indirect Target Results:
+RAW-NEXT:	     [  0, _ZN8Derived15func1Eii,        250 ] (25.00%)
+RAW-NEXT:	     [  0, _ZN8Derived25func1Eii,        750 ] (75.00%)
+RAW-NEXT:	     [  1, _ZN8Derived15func2Eii,        250 ] (25.00%)
+RAW-NEXT:	     [  1, _ZN8Derived25func2Eii,        750 ] (75.00%)
+RAW-NEXT: Instrumentation level: IR  entry_first = 0
+RAW-NEXT: Functions shown: 1
+RAW-NEXT: Total functions: 6
+RAW-NEXT: Maximum function count: 1000
+RAW-NEXT: Maximum internal block count: 250
+RAW-NEXT: Statistics for indirect call sites profile:
+RAW-NEXT:   Total number of sites: 2
+RAW-NEXT:   Total number of sites with values: 2
+RAW-NEXT:   Total number of profiled values: 4
+RAW-NEXT:   Value sites histogram:
+RAW-NEXT:         NumTargets, SiteCount
+RAW-NEXT:         2, 2
+
+INDEXED:      Counters:
+INDEXED-NEXT:   main:
+INDEXED-NEXT:     Hash: 0x0f9a16fe6d398548
+INDEXED-NEXT:     Counters: 2
+INDEXED-NEXT:     Indirect Call Site Count: 2
+INDEXED-NEXT:     VTable Results:
+INDEXED-NEXT:	      [  0, _ZTV8Derived2,        750 ] (75.00%)
+INDEXED-NEXT:     	  [  0, _ZTV8Derived1,        250 ] (25.00%)
+INDEXED-NEXT:         [  1, _ZTV8Derived2,        750 ] (75.00%)
+INDEXED-NEXT:         [  1, _ZTV8Derived1,        250 ] (25.00%)
+INDEXED-NEXT:     Indirect Target Results:
+INDEXED-NEXT:         [  0, _ZN8Derived25func1Eii,        750 ] (75.00%)
+INDEXED-NEXT:         [  0, _ZN8Derived15func1Eii,        250 ] (25.00%)
+INDEXED-NEXT:         [  1, _ZN8Derived25func2Eii,        750 ] (75.00%)
+INDEXED-NEXT:         [  1, _ZN8Derived15func2Eii,        250 ] (25.00%)
+INDEXED-NEXT: Instrumentation level: IR  entry_first = 0
+INDEXED-NEXT: Functions shown: 1
+INDEXED-NEXT: Total functions: 6
+INDEXED-NEXT: Maximum function count: 1000
+INDEXED-NEXT: Maximum internal block count: 250
+INDEXED-NEXT: Statistics for indirect call sites profile:
+INDEXED-NEXT:   Total number of sites: 2
+INDEXED-NEXT:   Total number of sites with values: 2
+INDEXED-NEXT:   Total number of profiled values: 4
+INDEXED-NEXT:   Value sites histogram:
+INDEXED-NEXT:       NumTargets, SiteCount
+INDEXED-NEXT:       2, 2
+
+ICTEXT: :ir
+ICTEXT: main
+ICTEXT: # Func Hash:
+ICTEXT: 1124236338992350536
+ICTEXT: # Num Counters:
+ICTEXT: 2
+ICTEXT: # Counter Values:
+ICTEXT: 1000
+ICTEXT: 1
+ICTEXT: # Num Value Kinds:
+ICTEXT: 2
+ICTEXT: # ValueKind = IPVK_IndirectCallTarget:
+ICTEXT: 0
+ICTEXT: # NumValueSites:
+ICTEXT: 2
+ICTEXT: 2
+ICTEXT: _ZN8Derived25func1Eii:750
+ICTEXT: _ZN8Derived15func1Eii:250
+ICTEXT: 2
+ICTEXT: _ZN8Derived25func2Eii:750
+ICTEXT: _ZN8Derived15func2Eii:250
+ICTEXT: # ValueKind = IPVK_VTableTarget:
+ICTEXT: 2
+ICTEXT: # NumValueSites:
+ICTEXT: 2
+ICTEXT: 2
+ICTEXT: _ZTV8Derived2:750
+ICTEXT: _ZTV8Derived1:250
+ICTEXT: 2
+ICTEXT: _ZTV8Derived2:750
+ICTEXT: _ZTV8Derived1:250
diff --git a/llvm/tools/llvm-profdata/llvm-profdata.cpp b/llvm/tools/llvm-profdata/llvm-profdata.cpp
index fdb2c1405f1237f..18fa08e46214c33 100644
--- a/llvm/tools/llvm-profdata/llvm-profdata.cpp
+++ b/llvm/tools/llvm-profdata/llvm-profdata.cpp
@@ -347,6 +347,13 @@ static void loadInput(const WeightedFile &Input, SymbolRemapper *Remapper,
     });
   }
 
+  const InstrProfSymtab &symtab = Reader->getSymtab();
+  const auto &VTableNames = symtab.getVTableNames();
+
+  for (const auto &kv : VTableNames) {
+    WC->Writer.addVTableName(kv.getKey());
+  }
+
   if (Reader->hasTemporalProfile()) {
     auto &Traces = Reader->getTemporalProfTraces(Input.Weight);
     if (!Traces.empty())
@@ -2413,12 +2420,12 @@ static void showValueSitesStats(raw_fd_ostream &OS, uint32_t VK,
 
 static int showInstrProfile(
     const std::string &Filename, bool ShowCounts, uint32_t TopN,
-    bool ShowIndirectCallTargets, bool ShowMemOPSizes, bool ShowDetailedSummary,
-    std::vector<uint32_t> DetailedSummaryCutoffs, bool ShowAllFunctions,
-    bool ShowCS, uint64_t ValueCutoff, bool OnlyListBelow,
-    const std::string &ShowFunction, bool TextFormat, bool ShowBinaryIds,
-    bool ShowCovered, bool ShowProfileVersion, bool ShowTemporalProfTraces,
-    ShowFormat SFormat, raw_fd_ostream &OS) {
+    bool ShowIndirectCallTargets, bool ShowMemOPSizes, bool ShowVTables,
+    bool ShowDetailedSummary, std::vector<uint32_t> DetailedSummaryCutoffs,
+    bool ShowAllFunctions, bool ShowCS, uint64_t ValueCutoff,
+    bool OnlyListBelow, const std::string &ShowFunction, bool TextFormat,
+    bool ShowBinaryIds, bool ShowCovered, bool ShowProfileVersion,
+    bool ShowTemporalProfTraces, ShowFormat SFormat, raw_fd_ostream &OS) {
   if (SFormat == ShowFormat::Json)
     exitWithError("JSON output is not supported for instr profiles");
   if (SFormat == ShowFormat::Yaml)
@@ -2554,6 +2561,13 @@ static int showInstrProfile(
         OS << "    Number of Memory Intrinsics Calls: " << NumMemOPCalls
            << "\n";
 
+      if (ShowVTables) {
+        OS << "    VTable Results:\n";
+        traverseAllValueSites(Func, IPVK_VTableTarget,
+                              VPStats[IPVK_VTableTarget], OS,
+                              &(Reader->getSymtab()));
+      }
+
       if (ShowCounts) {
         OS << "    Block counts: [";
         size_t Start = (IsIRInstr ? 0 : 1);
@@ -2934,6 +2948,9 @@ static int show_main(int argc, const char *argv[]) {
   cl::opt<bool> ShowIndirectCallTargets(
       "ic-targets", cl::init(false),
       cl::desc("Show indirect call site target values for shown functions"));
+  cl::opt<bool> ShowVTables(
+      "show-vtables", cl::init(false),
+      cl::desc("Show virtual table target values for shown functions"));
   cl::opt<bool> ShowMemOPSizes(
       "memop-sizes", cl::init(false),
       cl::desc("Show the profiled sizes of the memory intrinsic calls "
@@ -3036,10 +3053,10 @@ static int show_main(int argc, const char *argv[]) {
   if (ProfileKind == instr)
     return showInstrProfile(
         Filename, ShowCounts, TopNFunctions, ShowIndirectCallTargets,
-        ShowMemOPSizes, ShowDetailedSummary, DetailedSummaryCutoffs,
-        ShowAllFunctions, ShowCS, ValueCutoff, OnlyListBelow, ShowFunction,
-        TextFormat, ShowBinaryIds, ShowCovered, ShowProfileVersion,
-        ShowTemporalProfTraces, SFormat, OS);
+        ShowMemOPSizes, ShowVTables, ShowDetailedSummary,
+        DetailedSummaryCutoffs, ShowAllFunctions, ShowCS, ValueCutoff,
+        OnlyListBelow, ShowFunction, TextFormat, ShowBinaryIds, ShowCovered,
+        ShowProfileVersion, ShowTemporalProfTraces, SFormat, OS);
   if (ProfileKind == sample)
     return showSampleProfile(Filename, ShowCounts, TopNFunctions,
                              ShowAllFunctions, ShowDetailedSummary,

>From f80c5721effe2f18a971a87f7fafb26cd495373f Mon Sep 17 00:00:00 2001
From: Mingming Liu <mingmingl at google.com>
Date: Fri, 29 Sep 2023 14:42:19 -0700
Subject: [PATCH 2/6] Address feebacks on the code. Questions will be followed
 up shortly. - In IndirectCallVisitor, instrument vtables and skip the rest
 (e.g., function pointers, etc). - Add brief comments for helper functions,
 remove debugging logs and rename vtable name section to vtabnames.

---
 compiler-rt/include/profile/InstrProfData.inc |  15 +-
 compiler-rt/lib/profile/InstrProfiling.h      |  10 +-
 .../lib/profile/InstrProfilingBuffer.c        |  11 +-
 compiler-rt/lib/profile/InstrProfilingMerge.c |   4 +-
 .../lib/profile/InstrProfilingPlatformLinux.c |   4 +-
 .../lib/profile/InstrProfilingWriter.c        |  13 +-
 .../profile/instrprof-write-buffer-internal.c |   8 --
 .../llvm/Analysis/IndirectCallVisitor.h       |  51 +++++--
 llvm/include/llvm/ProfileData/InstrProf.h     |  25 ++--
 .../llvm/ProfileData/InstrProfData.inc        |   2 +-
 llvm/lib/ProfileData/InstrProf.cpp            |  16 +--
 llvm/lib/ProfileData/InstrProfWriter.cpp      |  14 +-
 .../indirect_call_profile_funclet.ll          |   4 +
 .../Transforms/PGOProfile/vtable_profile.ll   | 133 +++++++++++++-----
 14 files changed, 208 insertions(+), 102 deletions(-)

diff --git a/compiler-rt/include/profile/InstrProfData.inc b/compiler-rt/include/profile/InstrProfData.inc
index b49f9d983e0f46d..af2799ed7d1dcd6 100644
--- a/compiler-rt/include/profile/InstrProfData.inc
+++ b/compiler-rt/include/profile/InstrProfData.inc
@@ -92,6 +92,9 @@ INSTR_PROF_DATA(const uint16_t, Int16ArrayTy, NumValueSites[IPVK_Last+1], \
 /* INSTR_PROF_DATA end. */
 
 
+/* For a virtual table object, record the name hash to associate profiled addresses
+ * with global variables, and record {starting address, size in bytes} to map the profiled virtual table (which usually have an offset from the starting address)
+ * back to a virtual table object. */
 #ifndef INSTR_PROF_VTABLE_DATA
 #define INSTR_PROF_VTABLE_DATA(Type, LLVMType, Name, Initializer)
 #else
@@ -192,7 +195,15 @@ VALUE_PROF_FUNC_PARAM(uint32_t, CounterIndex, Type::getInt32Ty(Ctx))
 VALUE_PROF_KIND(IPVK_IndirectCallTarget, 0, "indirect call target")
 /* For memory intrinsic functions size profiling. */
 VALUE_PROF_KIND(IPVK_MemOPSize, 1, "memory intrinsic functions size")
-VALUE_PROF_KIND(IPVK_VTableTarget, 2, "vtable target")
+/* For virtual table address profiling, the addresses of the virtual table (i.e., the address contained in objects pointing to a virtual table) are
+ * profiled. Note this may not be the address of the per C++ class virtual table object (i.e., there is an offset).
+ *
+ * The profiled addresses are stored in raw profile, together with the following two types of information.
+ * 1. The (beginning and ending) addresses of per C++ class virtual table objects.
+ * 2. The (compressed) virtual table object names.
+ * RawInstrProfReader converts profiled virtual table addresses to virtual table objects' MD5 hash.
+ */
+VALUE_PROF_KIND(IPVK_VTableTarget, 2, "The address of the compatible vtable (i.e., there is an offset from this address to per C++ class virtual table global variable.)")
 /* These two kinds must be the last to be
  * declared. This is to make sure the string
  * array created with the template can be
@@ -708,7 +719,7 @@ serializeValueProfDataFrom(ValueProfRecordClosure *Closure,
    than WIN32 */
 #define INSTR_PROF_DATA_COMMON __llvm_prf_data
 #define INSTR_PROF_NAME_COMMON __llvm_prf_names
-#define INSTR_PROF_VNAME_COMMON __llvm_prf_vnames
+#define INSTR_PROF_VNAME_COMMON __llvm_prf_vtabnames
 #define INSTR_PROF_CNTS_COMMON __llvm_prf_cnts
 #define INSTR_PROF_VALS_COMMON __llvm_prf_vals
 #define INSTR_PROF_VNODES_COMMON __llvm_prf_vnds
diff --git a/compiler-rt/lib/profile/InstrProfiling.h b/compiler-rt/lib/profile/InstrProfiling.h
index f3afa694e02c27d..c3aa72e411f7991 100644
--- a/compiler-rt/lib/profile/InstrProfiling.h
+++ b/compiler-rt/lib/profile/InstrProfiling.h
@@ -92,8 +92,8 @@ const __llvm_profile_data *__llvm_profile_begin_data(void);
 const __llvm_profile_data *__llvm_profile_end_data(void);
 const char *__llvm_profile_begin_names(void);
 const char *__llvm_profile_end_names(void);
-const char *__llvm_profile_begin_vnames(void);
-const char *__llvm_profile_end_vnames(void);
+const char *__llvm_profile_begin_vtabnames(void);
+const char *__llvm_profile_end_vtabnames(void);
 char *__llvm_profile_begin_counters(void);
 char *__llvm_profile_end_counters(void);
 ValueProfNode *__llvm_profile_begin_vnodes();
@@ -286,11 +286,13 @@ uint64_t __llvm_profile_get_num_counters(const char *Begin, const char *End);
 /*! \brief Get the size of the profile counters section in bytes. */
 uint64_t __llvm_profile_get_counters_size(const char *Begin, const char *End);
 
+/*! \brief Get the number of virtual table profile data entries */
 uint64_t __llvm_profile_get_num_vtable(const VTableProfData *Begin,
                                        const VTableProfData *End);
 
-uint64_t __llvm_profile_get_vtable_size(const VTableProfData *Begin,
-                                        const VTableProfData *End);
+/*! \brief Get the size of virtual table profile data in bytes. */
+uint64_t __llvm_profile_get_vtable_section_size(const VTableProfData *Begin,
+                                                const VTableProfData *End);
 
 /* ! \brief Given the sizes of the data and counter information, return the
  * number of padding bytes before and after the counters, and after the names,
diff --git a/compiler-rt/lib/profile/InstrProfilingBuffer.c b/compiler-rt/lib/profile/InstrProfilingBuffer.c
index 0c36e40444c7344..ad1643d48908df2 100644
--- a/compiler-rt/lib/profile/InstrProfilingBuffer.c
+++ b/compiler-rt/lib/profile/InstrProfilingBuffer.c
@@ -49,8 +49,8 @@ uint64_t __llvm_profile_get_size_for_buffer(void) {
   const char *NamesEnd = __llvm_profile_end_names();
   const VTableProfData *VTableBegin = __llvm_profile_begin_vtables();
   const VTableProfData *VTableEnd = __llvm_profile_end_vtables();
-  const char *VNamesBegin = __llvm_profile_begin_vnames();
-  const char *VNamesEnd = __llvm_profile_end_vnames();
+  const char *VNamesBegin = __llvm_profile_begin_vtabnames();
+  const char *VNamesEnd = __llvm_profile_end_vtabnames();
 
   return __llvm_profile_get_size_for_buffer_internal(
       DataBegin, DataEnd, CountersBegin, CountersEnd, NamesBegin, NamesEnd,
@@ -78,8 +78,8 @@ uint64_t __llvm_profile_get_num_vtable(const VTableProfData *Begin,
 }
 
 COMPILER_RT_VISIBILITY
-uint64_t __llvm_profile_get_vtable_size(const VTableProfData *Begin,
-                                        const VTableProfData *End) {
+uint64_t __llvm_profile_get_vtable_section_size(const VTableProfData *Begin,
+                                                const VTableProfData *End) {
   return __llvm_profile_get_num_vtable(Begin, End) * sizeof(VTableProfData);
 }
 
@@ -166,7 +166,8 @@ uint64_t __llvm_profile_get_size_for_buffer_internal(
   uint64_t DataSize = __llvm_profile_get_data_size(DataBegin, DataEnd);
   uint64_t CountersSize =
       __llvm_profile_get_counters_size(CountersBegin, CountersEnd);
-  uint64_t VTableSize = __llvm_profile_get_vtable_size(VTableBegin, VTableEnd);
+  uint64_t VTableSize =
+      __llvm_profile_get_vtable_section_size(VTableBegin, VTableEnd);
   uint64_t VNameSize = (VNamesEnd - VNamesBegin) * sizeof(char);
 
   /* Determine how much padding is needed before/after the counters and after
diff --git a/compiler-rt/lib/profile/InstrProfilingMerge.c b/compiler-rt/lib/profile/InstrProfilingMerge.c
index 2ef6227599ff139..629d502cdde3127 100644
--- a/compiler-rt/lib/profile/InstrProfilingMerge.c
+++ b/compiler-rt/lib/profile/InstrProfilingMerge.c
@@ -125,9 +125,7 @@ int __llvm_profile_merge_from_buffer(const char *ProfileData,
                    Header->NumCounters * __llvm_profile_counter_entry_size();
   SrcNameStart = SrcCountersEnd;
   // This is to assume counter size is a multiple of 8 bytes.
-  // uint64_t NamesSize = Header->NamesSize;
-  // uint64_t PaddingBytesAfterNames =
-  //    __llvm_profile_get_num_padding_bytes(Header->NamesSize);
+
   // First, skip rather than merge them
   uint64_t VTableSectionSize = Header->NumVTables * sizeof(VTableProfData);
   uint64_t PaddingBytesAfterVTableSection =
diff --git a/compiler-rt/lib/profile/InstrProfilingPlatformLinux.c b/compiler-rt/lib/profile/InstrProfilingPlatformLinux.c
index dc861632271ce79..c681e253bb46a4a 100644
--- a/compiler-rt/lib/profile/InstrProfilingPlatformLinux.c
+++ b/compiler-rt/lib/profile/InstrProfilingPlatformLinux.c
@@ -76,10 +76,10 @@ COMPILER_RT_VISIBILITY const char *__llvm_profile_begin_names(void) {
 COMPILER_RT_VISIBILITY const char *__llvm_profile_end_names(void) {
   return &PROF_NAME_STOP;
 }
-COMPILER_RT_VISIBILITY const char *__llvm_profile_begin_vnames(void) {
+COMPILER_RT_VISIBILITY const char *__llvm_profile_begin_vtabnames(void) {
   return &PROF_VNAME_START;
 }
-COMPILER_RT_VISIBILITY const char *__llvm_profile_end_vnames(void) {
+COMPILER_RT_VISIBILITY const char *__llvm_profile_end_vtabnames(void) {
   return &PROF_VNAME_STOP;
 }
 COMPILER_RT_VISIBILITY VTableProfData *__llvm_profile_begin_vtables(void) {
diff --git a/compiler-rt/lib/profile/InstrProfilingWriter.c b/compiler-rt/lib/profile/InstrProfilingWriter.c
index ed5dbdb6ee4383f..b998618aad7896f 100644
--- a/compiler-rt/lib/profile/InstrProfilingWriter.c
+++ b/compiler-rt/lib/profile/InstrProfilingWriter.c
@@ -250,8 +250,8 @@ COMPILER_RT_VISIBILITY int lprofWriteData(ProfDataWriter *Writer,
   const char *NamesEnd = __llvm_profile_end_names();
   const VTableProfData *VTableBegin = __llvm_profile_begin_vtables();
   const VTableProfData *VTableEnd = __llvm_profile_end_vtables();
-  const char *VNamesBegin = __llvm_profile_begin_vnames();
-  const char *VNamesEnd = __llvm_profile_end_vnames();
+  const char *VNamesBegin = __llvm_profile_begin_vtabnames();
+  const char *VNamesEnd = __llvm_profile_end_vtabnames();
   return lprofWriteDataImpl(Writer, DataBegin, DataEnd, CountersBegin,
                             CountersEnd, VPDataReader, NamesBegin, NamesEnd,
                             VTableBegin, VTableEnd, VNamesBegin, VNamesEnd,
@@ -282,9 +282,8 @@ lprofWriteDataImpl(ProfDataWriter *Writer, const __llvm_profile_data *DataBegin,
   const uint64_t NumVTables =
       __llvm_profile_get_num_vtable(VTableBegin, VTableEnd);
   const uint64_t VTableSectionSize =
-      __llvm_profile_get_vtable_size(VTableBegin, VTableEnd);
-  // Note, in reality, vtable profiling is not supported when DebugInfoCorrelate
-  // is true.
+      __llvm_profile_get_vtable_section_size(VTableBegin, VTableEnd);
+  // Note vtable profiling is not supported when DebugInfoCorrelate is true.
   const uint64_t VNamesSize = DebugInfoCorrelate ? 0 : VNamesEnd - VNamesBegin;
 
   /* Create the header. */
@@ -320,13 +319,9 @@ lprofWriteDataImpl(ProfDataWriter *Writer, const __llvm_profile_data *DataBegin,
 
   /* Write the profile header. */
   ProfDataIOVec IOVec[] = {{&Header, sizeof(__llvm_profile_header), 1, 0}};
-  // printf("Size of profile header is %d\n",
-  // (int)(sizeof(__llvm_profile_header)));
   if (Writer->Write(Writer, IOVec, sizeof(IOVec) / sizeof(*IOVec)))
     return -1;
 
-  // printf("Completed profile header\n");
-
   /* Write the binary id lengths and data. */
   int binary_id_size = __llvm_write_binary_ids(Writer);
   if (binary_id_size == -1)
diff --git a/compiler-rt/test/profile/instrprof-write-buffer-internal.c b/compiler-rt/test/profile/instrprof-write-buffer-internal.c
index 97dfff8d7fe71fb..484aa1f4f2d0eaf 100644
--- a/compiler-rt/test/profile/instrprof-write-buffer-internal.c
+++ b/compiler-rt/test/profile/instrprof-write-buffer-internal.c
@@ -48,20 +48,12 @@ int main(int argc, const char *argv[]) {
       __llvm_profile_begin_names(), __llvm_profile_end_names(), NULL, NULL,
       NULL, NULL);
 
-  // printf("buffer size is %lld\n", bufsize);
-  //uint64_t aligned_bufsize = ((bufsize + 32) >> 6) << 6;
-
   char *buf = malloc(bufsize);
   int ret = __llvm_profile_write_buffer_internal(buf,
       __llvm_profile_begin_data(), __llvm_profile_end_data(),
       __llvm_profile_begin_counters(), __llvm_profile_end_counters(),
       __llvm_profile_begin_names(), __llvm_profile_end_names());
 
-  if (ret != 0) {
-    fprintf(stderr, "failed to write buffer");
-    return ret;
-  }
-
   FILE *f = fopen(argv[1], "w");
   fwrite(buf, bufsize, 1, f);
   fclose(f);
diff --git a/llvm/include/llvm/Analysis/IndirectCallVisitor.h b/llvm/include/llvm/Analysis/IndirectCallVisitor.h
index 52d4ff04d3d4ecc..23168b88a6988ce 100644
--- a/llvm/include/llvm/Analysis/IndirectCallVisitor.h
+++ b/llvm/include/llvm/Analysis/IndirectCallVisitor.h
@@ -12,6 +12,7 @@
 #ifndef LLVM_ANALYSIS_INDIRECTCALLVISITOR_H
 #define LLVM_ANALYSIS_INDIRECTCALLVISITOR_H
 
+#include "llvm/ADT/SetVector.h"
 #include "llvm/IR/InstVisitor.h"
 #include <vector>
 
@@ -19,24 +20,58 @@ namespace llvm {
 // Visitor class that finds all indirect call.
 struct PGOIndirectCallVisitor : public InstVisitor<PGOIndirectCallVisitor> {
   std::vector<CallBase *> IndirectCalls;
-  std::vector<Instruction *> VTableAddrs;
+  SetVector<Instruction *, std::vector<Instruction *>> VTableAddrs;
   PGOIndirectCallVisitor() = default;
 
   void visitCallBase(CallBase &Call) {
-    if (Call.isIndirectCall())
+    const CallInst *CI = dyn_cast<CallInst>(&Call);
+    if (CI && CI->getCalledFunction()) {
+      switch (CI->getCalledFunction()->getIntrinsicID()) {
+      case Intrinsic::type_test:
+      case Intrinsic::public_type_test:
+      case Intrinsic::type_checked_load_relative:
+      case Intrinsic::type_checked_load: {
+        Value *VTablePtr = CI->getArgOperand(0)->stripPointerCasts();
+
+        if (PtrTestedByTypeIntrinsics.count(VTablePtr) == 0) {
+          Instruction *I = dyn_cast_or_null<Instruction>(VTablePtr);
+          // This is the first type intrinsic where VTablePtr is used.
+          // Assert that the VTablePtr is not found as a type profiling
+          // candidate yet. Note nullptr won't be inserted into VTableAddrs in
+          // the first place, so this assertion works even if 'VTablePtr' is not
+          // an instruction.
+          assert(VTableAddrs.count(I) == 0 &&
+                 "Expect type intrinsic to record VTablePtr before virtual "
+                 "functions are loaded to find vtables that should be "
+                 "instrumented");
+
+          PtrTestedByTypeIntrinsics.insert(VTablePtr);
+        }
+      } break;
+      }
+    }
       if (Call.isIndirectCall()) {
         IndirectCalls.push_back(&Call);
-
         LoadInst *LI = dyn_cast<LoadInst>(Call.getCalledOperand());
         if (LI != nullptr) {
-          Value *Ptr = LI->getPointerOperand();
-          Value *VTablePtr = Ptr->stripInBoundsConstantOffsets();
-          if (VTablePtr != nullptr && isa<Instruction>(VTablePtr)) {
-            VTableAddrs.push_back(cast<Instruction>(VTablePtr));
+          Value *MaybeVTablePtr =
+              LI->getPointerOperand()->stripInBoundsConstantOffsets();
+          Instruction *VTableInstr = dyn_cast<Instruction>(MaybeVTablePtr);
+          // If not used by any type intrinsic, this is not a vtable.
+          // Inst visitor should see the very first type intrinsic using a
+          // vtable before the very first virtual function load from this
+          // vtable. This condition is asserted above.
+          if (VTableInstr && PtrTestedByTypeIntrinsics.count(MaybeVTablePtr)) {
+            VTableAddrs.insert(VTableInstr);
           }
         }
       }
   }
+
+private:
+  // Keeps track of the pointers that are tested by llvm type intrinsics for
+  // look up.
+  SmallPtrSet<Value *, 4> PtrTestedByTypeIntrinsics;
 };
 
 inline std::vector<CallBase *> findIndirectCalls(Function &F) {
@@ -48,7 +83,7 @@ inline std::vector<CallBase *> findIndirectCalls(Function &F) {
 inline std::vector<Instruction *> findVTableAddrs(Function &F) {
   PGOIndirectCallVisitor ICV;
   ICV.visit(F);
-  return ICV.VTableAddrs;
+  return ICV.VTableAddrs.takeVector();
 }
 
 } // namespace llvm
diff --git a/llvm/include/llvm/ProfileData/InstrProf.h b/llvm/include/llvm/ProfileData/InstrProf.h
index e5af3767fb746a4..82b7d662cfcb980 100644
--- a/llvm/include/llvm/ProfileData/InstrProf.h
+++ b/llvm/include/llvm/ProfileData/InstrProf.h
@@ -225,15 +225,19 @@ StringRef getPGOFuncNameVarInitializer(GlobalVariable *NameVar);
 StringRef getFuncNameWithoutPrefix(StringRef PGOFuncName,
                                    StringRef FileName = "<unknown>");
 
-/// Given a vector of strings (function PGO names) \c NameStrs, the
+/// Given a vector of strings (names of global variables, currently
+/// function PGO names or C++ virtual table objects) \c NameStrs, the
 /// method generates a combined string \c Result that is ready to be
 /// serialized.  The \c Result string is comprised of three fields:
 /// The first field is the length of the uncompressed strings, and the
 /// the second field is the length of the zlib-compressed string.
 /// Both fields are encoded in ULEB128.  If \c doCompress is false, the
-///  third field is the uncompressed strings; otherwise it is the
+/// third field is the uncompressed strings; otherwise it is the
 /// compressed string. When the string compression is off, the
 /// second field will have value zero.
+/// FIXME: This should be renamed as collectGlobalVariableNameStrings.
+/// This function is used in non-llvm repo, so the refactor of renaming
+/// should go to a separate patch for easier renaming.
 Error collectPGOFuncNameStrings(ArrayRef<std::string> NameStrs,
                                 bool doCompression, std::string &Result);
 
@@ -437,9 +441,13 @@ class InstrProfSymtab {
 private:
   StringRef Data;
   uint64_t Address = 0;
-  // Unique name strings.
+  // Unique name strings. Used to ensure entries in MD5NameMap (a vector that's
+  // going to be sorted) has unique MD5 keys in the first place.
   StringSet<> NameTab;
-  // Unique virtual table names.
+  // Records the unique virtual table names. This is used by InstrProfWriter to
+  // write out an on-disk chained hash table of virtual table names.
+  // InstrProfWriter stores per function profile data (keyed by function names)
+  // so it doesn't use a StringSet for function names.
   StringSet<> VTableNames;
   // A map from MD5 keys to function name strings.
   std::vector<std::pair<uint64_t, StringRef>> MD5NameMap;
@@ -529,17 +537,16 @@ class InstrProfSymtab {
     if (VTableName.empty())
       return make_error<InstrProfError>(instrprof_error::malformed,
                                         "invalid input: VTableName is empty");
-    // Insert into NameTab.
+    // Insert into NameTab so that MD5NameMap (a vector that is going to be
+    // sorted) won't have duplicated entries in the first place.
     auto Ins = NameTab.insert(VTableName);
 
-    // Insert into VTableNames.
+    // Record VTableName. InstrProfWriter uses this map. The comment around
+    // class member explains why.
     VTableNames.insert(VTableName);
 
     // If this is newly added, update MD5NameMap.
     if (Ins.second) {
-      // printf("VTableName %s\n", VTableName.str().c_str());
-      // printf("AddVTableName hash %"PRIu64" to %s\n",
-      // IndexedInstrProf::ComputeHash(VTableName), Ins.first->getKey());
       MD5NameMap.push_back(std::make_pair(
           IndexedInstrProf::ComputeHash(VTableName), Ins.first->getKey()));
       Sorted = false;
diff --git a/llvm/include/llvm/ProfileData/InstrProfData.inc b/llvm/include/llvm/ProfileData/InstrProfData.inc
index af2532ffd4844f4..dfccc25ffa31962 100644
--- a/llvm/include/llvm/ProfileData/InstrProfData.inc
+++ b/llvm/include/llvm/ProfileData/InstrProfData.inc
@@ -707,7 +707,7 @@ serializeValueProfDataFrom(ValueProfRecordClosure *Closure,
    than WIN32 */
 #define INSTR_PROF_DATA_COMMON __llvm_prf_data
 #define INSTR_PROF_NAME_COMMON __llvm_prf_names
-#define INSTR_PROF_VNAME_COMMON __llvm_prf_vnames
+#define INSTR_PROF_VNAME_COMMON __llvm_prf_vtabnames
 #define INSTR_PROF_CNTS_COMMON __llvm_prf_cnts
 #define INSTR_PROF_VALS_COMMON __llvm_prf_vals
 #define INSTR_PROF_VNODES_COMMON __llvm_prf_vnds
diff --git a/llvm/lib/ProfileData/InstrProf.cpp b/llvm/lib/ProfileData/InstrProf.cpp
index 62a155b01e313a5..3adf45763e2b994 100644
--- a/llvm/lib/ProfileData/InstrProf.cpp
+++ b/llvm/lib/ProfileData/InstrProf.cpp
@@ -487,20 +487,15 @@ Error InstrProfSymtab::addFuncWithName(Function &F, StringRef PGOFuncName) {
 
 uint64_t InstrProfSymtab::getVTableHashFromAddress(uint64_t Address) {
   finalizeSymtab();
-  // printf("look up key 0x%llx\n", Address);
-  // for (auto iter = VTableAddrToMD5Map.begin(); iter !=
-  // VTableAddrToMD5Map.end(); iter++) {
-  //   printf("<key, val> is <0x%llx, %"PRIu64"\n", iter->first, iter->second);
-  // }
   auto It =
       partition_point(VTableAddrToMD5Map, [=](std::pair<uint64_t, uint64_t> A) {
         return A.first < Address;
       });
-  // FIXME: Does the raw function pointers point apply here?
+  // The virtual table address collected from value profiler could be defined
+  // in another module that is not instrumented. Force the value to be 0 in
+  // this case.
   if (It != VTableAddrToMD5Map.end()) {
-    // printf("InstrProfSymtab::getVTableHashFromAddress map addr 0x%llx to hash
-    // value %"PRIu64"\n", Address, (uint64_t)It->second);
-    return (uint64_t)It->second;
+    return It->second;
   }
   return 0;
 }
@@ -585,7 +580,6 @@ Error collectVTableStrings(ArrayRef<GlobalVariable *> VTables,
                            std::string &Result, bool doCompression) {
   std::vector<std::string> VTableNameStrs;
   for (auto *VTable : VTables) {
-    // printf("VTable name %s added\n", VTable->getName().str().c_str());
     VTableNameStrs.push_back(std::string(VTable->getName()));
   }
   return collectPGOFuncNameStrings(
@@ -680,13 +674,11 @@ Error readVTableNames(StringRef NameStrings, InstrProfSymtab &Symtab) {
     }
     NameStrings.split(Names, getInstrProfNameSeparator());
     for (StringRef &Name : Names) {
-      // printf("Read back vtable name %s\n", Name.str().c_str());
       if (Error E = Symtab.addVTableName(Name))
         return E;
     }
 
     P += Dist;
-    // Skip padding?
     while (P < EndP && *P == 0)
       P++;
   }
diff --git a/llvm/lib/ProfileData/InstrProfWriter.cpp b/llvm/lib/ProfileData/InstrProfWriter.cpp
index 786993153a8b376..1d565eec4351a27 100644
--- a/llvm/lib/ProfileData/InstrProfWriter.cpp
+++ b/llvm/lib/ProfileData/InstrProfWriter.cpp
@@ -634,19 +634,22 @@ Error InstrProfWriter::writeImpl(ProfOStream &OS) {
       OS.writeByte(0);
   }
 
-  // if version >= the version with vtable profile metadata
-  // Intentionally put vtable names before temporal profile section.
+  // if version >= the version with vtable profile metadata.
   uint64_t VTableNamesSectionStart = 0;
   if (IndexedInstrProf::ProfVersion::CurrentVersion >= 11) {
     VTableNamesSectionStart = OS.tell();
 
-    // Reserve space for vtable record table offset.
+    // Reserve space for vtable records offset.
     OS.write(0ULL);
 
     OnDiskChainedHashTableGenerator<llvm::InstrProfRecordVTableTrait>
         VTableNamesGenerator;
     for (const auto &kv : VTableNames) {
-      // printf("InstrProfWriter.cpp key is %s\n", kv.getKey().str().c_str());
+      // Use a char '0' as value placeholder, only keys (vtable names)
+      // are used.
+      // FIXME: It might make sense to have a OnDiskChainedHashSetGenerator if
+      // there are more use cases. Use a hash table for now, with one unused
+      // 'char' per entry.
       VTableNamesGenerator.insert(kv.getKey(), '0');
     }
 
@@ -656,9 +659,6 @@ Error InstrProfWriter::writeImpl(ProfOStream &OS) {
     uint64_t VTableNamesTableOffset =
         VTableNamesGenerator.Emit(OS.OS, *VTableNamesWriter);
 
-    // printf("InstrProfWriter.cpp:VTableNamesSectionStart is %"PRIu64"\n",
-    // VTableNamesSectionStart); printf("\tVTableNamesTableOffset is
-    // %"PRIu64"\n", VTableNamesTableOffset);
     PatchItem PatchItems[] = {
         {VTableNamesSectionStart, &VTableNamesTableOffset, 1}};
     OS.patch(PatchItems, 1);
diff --git a/llvm/test/Transforms/PGOProfile/indirect_call_profile_funclet.ll b/llvm/test/Transforms/PGOProfile/indirect_call_profile_funclet.ll
index 63173ab59b28ba2..988a30865a7fee6 100644
--- a/llvm/test/Transforms/PGOProfile/indirect_call_profile_funclet.ll
+++ b/llvm/test/Transforms/PGOProfile/indirect_call_profile_funclet.ll
@@ -45,6 +45,8 @@ catch.dispatch:                                   ; preds = %entry
 catch:                                            ; preds = %catch.dispatch
   %tmp1 = catchpad within %tmp [ptr null, i32 64, ptr null]
   %vtable = load ptr, ptr %b, align 8
+  %0 = tail call i1 @llvm.type.test(ptr %vtable, metadata !"_ZTS4base")
+  tail call void @llvm.assume(i1 %0)
   %tmp3 = load ptr, ptr %vtable, align 8
   call void %tmp3(ptr %b) [ "funclet"(token %tmp1) ]
   catchret from %tmp1 to label %try.cont
@@ -64,3 +66,5 @@ try.cont:                                         ; preds = %catch, %entry
 
 declare dso_local void @"?may_throw@@YAXH at Z"(i32)
 declare dso_local i32 @__CxxFrameHandler3(...)
+declare i1 @llvm.type.test(ptr, metadata)
+declare void @llvm.assume(i1 noundef)
diff --git a/llvm/test/Transforms/PGOProfile/vtable_profile.ll b/llvm/test/Transforms/PGOProfile/vtable_profile.ll
index 0c6bd3c064d35fc..059552a7634dc3c 100644
--- a/llvm/test/Transforms/PGOProfile/vtable_profile.ll
+++ b/llvm/test/Transforms/PGOProfile/vtable_profile.ll
@@ -1,9 +1,39 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals --version 3
 ; RUN: opt < %s -passes=pgo-instr-gen -S | FileCheck %s --check-prefix=GEN
 ; RUN: opt < %s -passes=pgo-instr-gen,instrprof -S | FileCheck %s --check-prefix=LOWER
 
 target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
 
+; The test IR is generated based on the following C++ program.
+; class Derived uses multiple inheritance so its virtual table
+; global variable contains two vtables. func1 is loaded from
+; the vtable compatible with class Base1, and func2 is loaded
+; from the vtable compatible with class Base2.
+; class Base1 {
+; public:
+;    virtual int func1(int a, int b) ;
+; };
+;
+; class Base2 {
+; public:
+;    virtual int func2(int a, int b);
+; };
+;
+; class Derived : public Base1, public Base2 {
+; public:
+;    Derived(int c) : v(c) {}
+; private:
+;    int v;
+; };
+;
+; Derived* createType(int c);
+; int func(int a, int b, int c) {
+;    Derived* d = createType(c);
+;
+;    return d->func2(a, b) + d->func1(b, a);
+; }
+
 $_ZTV7Derived = comdat any
 
 @_ZTV7Derived = constant { [3 x ptr], [3 x ptr] } { [3 x ptr] [ptr null, ptr @_ZTI7Derived, ptr @_ZN5Base15func1Eii], [3 x ptr] [ptr inttoptr (i64 -8 to ptr), ptr @_ZTI7Derived, ptr @_ZN5Base25func2Eii] }, comdat, align 8, !type !0, !type !1, !type !2, !type !3, !type !4, !type !5, !type !6, !type !7, !type !8
@@ -19,25 +49,44 @@ $_ZTV7Derived = comdat any
 declare ptr @_Z10createTypei(i32)
 declare i32 @_ZN5Base15func1Eii(ptr, i32, i32)
 declare i32 @_ZN5Base25func2Eii(ptr, i32, i32)
+declare i1 @llvm.public.type.test(ptr, metadata)
+declare void @llvm.assume(i1 noundef)
 
-; GEN: @__llvm_profile_raw_version = hidden constant i64 72057594037927945, comdat
-; GEN: @__profn_test_vtable_value_profiling = private constant [27 x i8] c"test_vtable_value_profiling"
-
-; LOWER: $__profvt__ZTV7Derived = comdat any
-; LOWER: $__profvt__ZTV5Base1 = comdat nodeduplicate
-; LOWER: $__profvt__ZTV5Base2 = comdat nodeduplicate
-; LOWER: @__llvm_profile_raw_version = hidden constant i64 72057594037927945, comdat
-; LOWER: @__profc_test_vtable_value_profiling = private global [1 x i64] zeroinitializer, section "__llvm_prf_cnts", comdat, align 8
-; LOWER: @__profvp_test_vtable_value_profiling = private global [4 x i64] zeroinitializer, section "__llvm_prf_vals", comdat($__profc_test_vtable_value_profiling), align 8
-; LOWER: @__profd_test_vtable_value_profiling = private global { i64, i64, i64, ptr, ptr, i32, [3 x i16] } { i64 1593873508557585901, i64 567090795815895039, i64 sub (i64 ptrtoint (ptr @__profc_test_vtable_value_profiling to i64), i64 ptrtoint (ptr @__profd_test_vtable_value_profiling to i64)), ptr @test_vtable_value_profiling.local, ptr @__profvp_test_vtable_value_profiling, i32 1, [3 x i16] [i16 2, i16 0, i16 2] }, section "__llvm_prf_data", comdat($__profc_test_vtable_value_profiling), align 8
-; LOWER: @__profvt__ZTV7Derived = global { i64, ptr, i32 } { i64 -4576307468236080025, ptr @_ZTV7Derived, i32 48 }, section "__llvm_prf_vtab", comdat, align 8
-; LOWER: @__profvt__ZTV5Base1 = global { i64, ptr, i32 } { i64 3215870116411581797, ptr @_ZTV5Base1, i32 24 }, section "__llvm_prf_vtab", comdat, align 8
-; LOWER: @__profvt__ZTV5Base2 = global { i64, ptr, i32 } { i64 8378219803387680050, ptr @_ZTV5Base2, i32 24 }, section "__llvm_prf_vtab", comdat, align 8
-; LOWER: @__llvm_prf_vnodes = private global [10 x { i64, i64, ptr }] zeroinitializer, section "__llvm_prf_vnds", align 8
-; LOWER: @__llvm_prf_nm = private constant [37 x i8] c"\1B#x\DA+I-.\89/+IL\CAI\8D/K\CC)M\8D/(\CAO\CB\CC\C9\CCK\07\00\9Ea\0BC", section "__llvm_prf_names", align 1
-; LOWER: @__llvm_prf_vnm = private constant [34 x i8] c"\22 x\DA\8B\8F\0A\093wI-\CA,KMa\8C\07rL\9D\12\8BS\0D\11L#\00\C3\A2\0A\E9", section "__llvm_prf_vnames", align 1
-; LOWER: @llvm.used = appending global [6 x ptr] [ptr @__profvt__ZTV7Derived, ptr @__profvt__ZTV5Base1, ptr @__profvt__ZTV5Base2, ptr @__llvm_prf_vnodes, ptr @__llvm_prf_nm, ptr @__llvm_prf_vnm], section "llvm.metadata"
-
+;.
+; GEN: @[[_ZTV7DERIVED:[a-zA-Z0-9_$"\\.-]+]] = constant { [3 x ptr], [3 x ptr] } { [3 x ptr] [ptr null, ptr @_ZTI7Derived, ptr @_ZN5Base15func1Eii], [3 x ptr] [ptr inttoptr (i64 -8 to ptr), ptr @_ZTI7Derived, ptr @_ZN5Base25func2Eii] }, comdat, align 8, !type !0, !type !1, !type !2, !type !3, !type !4, !type !5, !type !6, !type !7, !type !8
+; GEN: @[[_ZTVN10__CXXABIV121__VMI_CLASS_TYPE_INFOE:[a-zA-Z0-9_$"\\.-]+]] = external global [0 x ptr]
+; GEN: @[[_ZTS7DERIVED:[a-zA-Z0-9_$"\\.-]+]] = constant [9 x i8] c"7Derived\00", align 1
+; GEN: @[[_ZTI5BASE1:[a-zA-Z0-9_$"\\.-]+]] = external constant ptr
+; GEN: @[[_ZTI5BASE2:[a-zA-Z0-9_$"\\.-]+]] = external constant ptr
+; GEN: @[[_ZTI7DERIVED:[a-zA-Z0-9_$"\\.-]+]] = constant { ptr, ptr, i32, i32, ptr, i64, ptr, i64 } { ptr getelementptr inbounds (ptr, ptr @_ZTVN10__cxxabiv121__vmi_class_type_infoE, i64 2), ptr @_ZTS7Derived, i32 0, i32 2, ptr @_ZTI5Base1, i64 2, ptr @_ZTI5Base2, i64 2050 }, align 8
+; GEN: @[[_ZTV5BASE1:[a-zA-Z0-9_$"\\.-]+]] = constant { [3 x ptr] } { [3 x ptr] [ptr null, ptr @_ZTI5Base1, ptr @_ZN5Base15func1Eii] }, align 8, !type !0, !type !1
+; GEN: @[[_ZTV5BASE2:[a-zA-Z0-9_$"\\.-]+]] = constant { [3 x ptr] } { [3 x ptr] [ptr null, ptr @_ZTI5Base2, ptr @_ZN5Base25func2Eii] }, align 8, !type !9, !type !4
+; GEN: @[[LLVM_COMPILER_USED:[a-zA-Z0-9_$"\\.-]+]] = appending global [2 x ptr] [ptr @_ZTV5Base1, ptr @_ZTV5Base2], section "llvm.metadata"
+; GEN: @[[__LLVM_PROFILE_RAW_VERSION:[a-zA-Z0-9_$"\\.-]+]] = hidden constant i64 72057594037927945, comdat
+; GEN: @[[__PROFN_TEST_VTABLE_VALUE_PROFILING:[a-zA-Z0-9_$"\\.-]+]] = private constant [27 x i8] c"test_vtable_value_profiling"
+;.
+; LOWER: @[[_ZTV7DERIVED:[a-zA-Z0-9_$"\\.-]+]] = constant { [3 x ptr], [3 x ptr] } { [3 x ptr] [ptr null, ptr @_ZTI7Derived, ptr @_ZN5Base15func1Eii], [3 x ptr] [ptr inttoptr (i64 -8 to ptr), ptr @_ZTI7Derived, ptr @_ZN5Base25func2Eii] }, comdat, align 8, !type !0, !type !1, !type !2, !type !3, !type !4, !type !5, !type !6, !type !7, !type !8
+; LOWER: @[[_ZTVN10__CXXABIV121__VMI_CLASS_TYPE_INFOE:[a-zA-Z0-9_$"\\.-]+]] = external global [0 x ptr]
+; LOWER: @[[_ZTS7DERIVED:[a-zA-Z0-9_$"\\.-]+]] = constant [9 x i8] c"7Derived\00", align 1
+; LOWER: @[[_ZTI5BASE1:[a-zA-Z0-9_$"\\.-]+]] = external constant ptr
+; LOWER: @[[_ZTI5BASE2:[a-zA-Z0-9_$"\\.-]+]] = external constant ptr
+; LOWER: @[[_ZTI7DERIVED:[a-zA-Z0-9_$"\\.-]+]] = constant { ptr, ptr, i32, i32, ptr, i64, ptr, i64 } { ptr getelementptr inbounds (ptr, ptr @_ZTVN10__cxxabiv121__vmi_class_type_infoE, i64 2), ptr @_ZTS7Derived, i32 0, i32 2, ptr @_ZTI5Base1, i64 2, ptr @_ZTI5Base2, i64 2050 }, align 8
+; LOWER: @[[_ZTV5BASE1:[a-zA-Z0-9_$"\\.-]+]] = constant { [3 x ptr] } { [3 x ptr] [ptr null, ptr @_ZTI5Base1, ptr @_ZN5Base15func1Eii] }, align 8, !type !0, !type !1
+; LOWER: @[[_ZTV5BASE2:[a-zA-Z0-9_$"\\.-]+]] = constant { [3 x ptr] } { [3 x ptr] [ptr null, ptr @_ZTI5Base2, ptr @_ZN5Base25func2Eii] }, align 8, !type !9, !type !4
+; LOWER: @[[__LLVM_PROFILE_RAW_VERSION:[a-zA-Z0-9_$"\\.-]+]] = hidden constant i64 72057594037927945, comdat
+; LOWER: @[[__PROFC_TEST_VTABLE_VALUE_PROFILING:[a-zA-Z0-9_$"\\.-]+]] = private global [1 x i64] zeroinitializer, section "__llvm_prf_cnts", comdat, align 8
+; LOWER: @[[__PROFVP_TEST_VTABLE_VALUE_PROFILING:[a-zA-Z0-9_$"\\.-]+]] = private global [4 x i64] zeroinitializer, section "__llvm_prf_vals", comdat($__profc_test_vtable_value_profiling), align 8
+; LOWER: @[[__PROFD_TEST_VTABLE_VALUE_PROFILING:[a-zA-Z0-9_$"\\.-]+]] = private global { i64, i64, i64, ptr, ptr, i32, [3 x i16] } { i64 1593873508557585901, i64 567090795815895039, i64 sub (i64 ptrtoint (ptr @__profc_test_vtable_value_profiling to i64), i64 ptrtoint (ptr @__profd_test_vtable_value_profiling to i64)), ptr @test_vtable_value_profiling.local, ptr @__profvp_test_vtable_value_profiling, i32 1, [3 x i16] [i16 2, i16 0, i16 2] }, section "__llvm_prf_data", comdat($__profc_test_vtable_value_profiling), align 8
+; LOWER: @[[__PROFVT__ZTV7DERIVED:[a-zA-Z0-9_$"\\.-]+]] = global { i64, ptr, i32 } { i64 -4576307468236080025, ptr @_ZTV7Derived, i32 48 }, section "__llvm_prf_vtab", comdat, align 8
+; LOWER: @[[__PROFVT__ZTV5BASE1:[a-zA-Z0-9_$"\\.-]+]] = global { i64, ptr, i32 } { i64 3215870116411581797, ptr @_ZTV5Base1, i32 24 }, section "__llvm_prf_vtab", comdat, align 8
+; LOWER: @[[__PROFVT__ZTV5BASE2:[a-zA-Z0-9_$"\\.-]+]] = global { i64, ptr, i32 } { i64 8378219803387680050, ptr @_ZTV5Base2, i32 24 }, section "__llvm_prf_vtab", comdat, align 8
+; LOWER: @[[__LLVM_PRF_VNODES:[a-zA-Z0-9_$"\\.-]+]] = private global [10 x { i64, i64, ptr }] zeroinitializer, section "__llvm_prf_vnds", align 8
+; LOWER: @[[__LLVM_PRF_NM:[a-zA-Z0-9_$"\\.-]+]] = private constant [37 x i8] c"\1B#x\DA+I-.\89/+IL\CAI\8D/K\CC)M\8D/(\CAO\CB\CC\C9\CCK\07\00\9Ea\0BC", section "__llvm_prf_names", align 1
+; LOWER: @[[__LLVM_PRF_VNM:[a-zA-Z0-9_$"\\.-]+]] = private constant [34 x i8] c"\22 x\DA\8B\8F\0A\093wI-\CA,KMa\8C\07rL\9D\12\8BS\0D\11L#\00\C3\A2\0A\E9", section "__llvm_prf_vtabnames", align 1
+; LOWER: @[[LLVM_COMPILER_USED:[a-zA-Z0-9_$"\\.-]+]] = appending global [3 x ptr] [ptr @_ZTV5Base1, ptr @_ZTV5Base2, ptr @__profd_test_vtable_value_profiling], section "llvm.metadata"
+; LOWER: @[[LLVM_USED:[a-zA-Z0-9_$"\\.-]+]] = appending global [6 x ptr] [ptr @__profvt__ZTV7Derived, ptr @__profvt__ZTV5Base1, ptr @__profvt__ZTV5Base2, ptr @__llvm_prf_vnodes, ptr @__llvm_prf_nm, ptr @__llvm_prf_vnm], section "llvm.metadata"
+; LOWER: @[[TEST_VTABLE_VALUE_PROFILING_LOCAL:[a-zA-Z0-9_$"\\.-]+]] = private alias i32 (i32, i32, i32), ptr @test_vtable_value_profiling
+;.
 define i32 @test_vtable_value_profiling(i32 %a, i32 %b, i32 %c) {
 ; GEN-LABEL: define i32 @test_vtable_value_profiling(
 ; GEN-SAME: i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]]) {
@@ -48,16 +97,20 @@ define i32 @test_vtable_value_profiling(i32 %a, i32 %b, i32 %c) {
 ; GEN-NEXT:    [[VTABLE:%.*]] = load ptr, ptr [[ADD_PTR]], align 8
 ; GEN-NEXT:    [[TMP0:%.*]] = ptrtoint ptr [[VTABLE]] to i64
 ; GEN-NEXT:    call void @llvm.instrprof.value.profile(ptr @__profn_test_vtable_value_profiling, i64 567090795815895039, i64 [[TMP0]], i32 2, i32 0)
+; GEN-NEXT:    [[TMP1:%.*]] = tail call i1 @llvm.public.type.test(ptr [[VTABLE]], metadata !"_ZTS5Base2")
+; GEN-NEXT:    tail call void @llvm.assume(i1 [[TMP1]])
 ; GEN-NEXT:    [[VFUNC:%.*]] = load ptr, ptr [[VTABLE]], align 8
-; GEN-NEXT:    [[TMP1:%.*]] = ptrtoint ptr [[VFUNC]] to i64
-; GEN-NEXT:    call void @llvm.instrprof.value.profile(ptr @__profn_test_vtable_value_profiling, i64 567090795815895039, i64 [[TMP1]], i32 0, i32 0)
+; GEN-NEXT:    [[TMP2:%.*]] = ptrtoint ptr [[VFUNC]] to i64
+; GEN-NEXT:    call void @llvm.instrprof.value.profile(ptr @__profn_test_vtable_value_profiling, i64 567090795815895039, i64 [[TMP2]], i32 0, i32 0)
 ; GEN-NEXT:    [[CALL1:%.*]] = tail call i32 [[VFUNC]](ptr [[ADD_PTR]], i32 [[A]], i32 [[B]])
 ; GEN-NEXT:    [[VTABLE2:%.*]] = load ptr, ptr [[CALL]], align 8
-; GEN-NEXT:    [[TMP2:%.*]] = ptrtoint ptr [[VTABLE2]] to i64
-; GEN-NEXT:    call void @llvm.instrprof.value.profile(ptr @__profn_test_vtable_value_profiling, i64 567090795815895039, i64 [[TMP2]], i32 2, i32 1)
+; GEN-NEXT:    [[TMP3:%.*]] = ptrtoint ptr [[VTABLE2]] to i64
+; GEN-NEXT:    call void @llvm.instrprof.value.profile(ptr @__profn_test_vtable_value_profiling, i64 567090795815895039, i64 [[TMP3]], i32 2, i32 1)
+; GEN-NEXT:    [[TMP4:%.*]] = tail call i1 @llvm.public.type.test(ptr [[VTABLE2]], metadata !"_ZTS5Base1")
+; GEN-NEXT:    tail call void @llvm.assume(i1 [[TMP4]])
 ; GEN-NEXT:    [[VFUNC2:%.*]] = load ptr, ptr [[VTABLE2]], align 8
-; GEN-NEXT:    [[TMP3:%.*]] = ptrtoint ptr [[VFUNC2]] to i64
-; GEN-NEXT:    call void @llvm.instrprof.value.profile(ptr @__profn_test_vtable_value_profiling, i64 567090795815895039, i64 [[TMP3]], i32 0, i32 1)
+; GEN-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[VFUNC2]] to i64
+; GEN-NEXT:    call void @llvm.instrprof.value.profile(ptr @__profn_test_vtable_value_profiling, i64 567090795815895039, i64 [[TMP5]], i32 0, i32 1)
 ; GEN-NEXT:    [[CALL4:%.*]] = tail call i32 [[VFUNC2]](ptr [[CALL]], i32 [[B]], i32 [[A]])
 ; GEN-NEXT:    [[ADD:%.*]] = add nsw i32 [[CALL4]], [[CALL1]]
 ; GEN-NEXT:    ret i32 [[ADD]]
@@ -73,27 +126,39 @@ define i32 @test_vtable_value_profiling(i32 %a, i32 %b, i32 %c) {
 ; LOWER-NEXT:    [[VTABLE:%.*]] = load ptr, ptr [[ADD_PTR]], align 8
 ; LOWER-NEXT:    [[TMP1:%.*]] = ptrtoint ptr [[VTABLE]] to i64
 ; LOWER-NEXT:    call void @__llvm_profile_instrument_target(i64 [[TMP1]], ptr @__profd_test_vtable_value_profiling, i32 2)
+; LOWER-NEXT:    [[TMP2:%.*]] = tail call i1 @llvm.public.type.test(ptr [[VTABLE]], metadata !"_ZTS5Base2")
+; LOWER-NEXT:    tail call void @llvm.assume(i1 [[TMP2]])
 ; LOWER-NEXT:    [[VFUNC:%.*]] = load ptr, ptr [[VTABLE]], align 8
-; LOWER-NEXT:    [[TMP2:%.*]] = ptrtoint ptr [[VFUNC]] to i64
-; LOWER-NEXT:    call void @__llvm_profile_instrument_target(i64 [[TMP2]], ptr @__profd_test_vtable_value_profiling, i32 0)
+; LOWER-NEXT:    [[TMP3:%.*]] = ptrtoint ptr [[VFUNC]] to i64
+; LOWER-NEXT:    call void @__llvm_profile_instrument_target(i64 [[TMP3]], ptr @__profd_test_vtable_value_profiling, i32 0)
 ; LOWER-NEXT:    [[CALL1:%.*]] = tail call i32 [[VFUNC]](ptr [[ADD_PTR]], i32 [[A]], i32 [[B]])
 ; LOWER-NEXT:    [[VTABLE2:%.*]] = load ptr, ptr [[CALL]], align 8
-; LOWER-NEXT:    [[TMP3:%.*]] = ptrtoint ptr [[VTABLE2]] to i64
-; LOWER-NEXT:    call void @__llvm_profile_instrument_target(i64 [[TMP3]], ptr @__profd_test_vtable_value_profiling, i32 3)
+; LOWER-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[VTABLE2]] to i64
+; LOWER-NEXT:    call void @__llvm_profile_instrument_target(i64 [[TMP4]], ptr @__profd_test_vtable_value_profiling, i32 3)
+; LOWER-NEXT:    [[TMP5:%.*]] = tail call i1 @llvm.public.type.test(ptr [[VTABLE2]], metadata !"_ZTS5Base1")
+; LOWER-NEXT:    tail call void @llvm.assume(i1 [[TMP5]])
 ; LOWER-NEXT:    [[VFUNC2:%.*]] = load ptr, ptr [[VTABLE2]], align 8
-; LOWER-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[VFUNC2]] to i64
-; LOWER-NEXT:    call void @__llvm_profile_instrument_target(i64 [[TMP4]], ptr @__profd_test_vtable_value_profiling, i32 1)
+; LOWER-NEXT:    [[TMP6:%.*]] = ptrtoint ptr [[VFUNC2]] to i64
+; LOWER-NEXT:    call void @__llvm_profile_instrument_target(i64 [[TMP6]], ptr @__profd_test_vtable_value_profiling, i32 1)
 ; LOWER-NEXT:    [[CALL4:%.*]] = tail call i32 [[VFUNC2]](ptr [[CALL]], i32 [[B]], i32 [[A]])
 ; LOWER-NEXT:    [[ADD:%.*]] = add nsw i32 [[CALL4]], [[CALL1]]
 ; LOWER-NEXT:    ret i32 [[ADD]]
 ;
 entry:
   %call = tail call ptr @_Z10createTypei(i32 %c)
+  ; The first indirect call is 'func2' so func ptr is
+  ; loaded from vtable compatible with Base2.
   %add.ptr = getelementptr inbounds i8, ptr %call, i64 8
   %vtable = load ptr, ptr %add.ptr, align 8
+  %0 = tail call i1 @llvm.public.type.test(ptr %vtable, metadata !"_ZTS5Base2")
+  tail call void @llvm.assume(i1 %0)
   %vfunc = load ptr, ptr %vtable, align 8
   %call1 = tail call i32 %vfunc(ptr %add.ptr, i32 %a, i32 %b)
+  ; The second indirect call is 'func1' so func ptr is
+  ; loaded from vtable compatible with Base1.
   %vtable2 = load ptr, ptr %call, align 8
+  %1 = tail call i1 @llvm.public.type.test(ptr %vtable2, metadata !"_ZTS5Base1")
+  tail call void @llvm.assume(i1 %1)
   %vfunc2 = load ptr, ptr %vtable2, align 8
   %call4 = tail call i32 %vfunc2(ptr %call, i32 %b, i32 %a)
   %add = add nsw i32 %call4, %call1
@@ -111,9 +176,13 @@ entry:
 !8 = !{i64 40, !"_ZTSM7DerivedFiiiE.virtual"}
 !9 = !{i64 16, !"_ZTS5Base2"}
 ;.
-; GEN: attributes #[[ATTR0:[0-9]+]] = { nounwind }
+; GEN: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
+; GEN: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) }
+; GEN: attributes #[[ATTR2:[0-9]+]] = { nounwind }
 ;.
-; LOWER: attributes #[[ATTR0:[0-9]+]] = { nounwind }
+; LOWER: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
+; LOWER: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) }
+; LOWER: attributes #[[ATTR2:[0-9]+]] = { nounwind }
 ;.
 ; GEN: [[META0:![0-9]+]] = !{i64 16, !"_ZTS5Base1"}
 ; GEN: [[META1:![0-9]+]] = !{i64 16, !"_ZTSM5Base1FiiiE.virtual"}

>From 67bda225d05528a568c9b2ebb18a003999cd416a Mon Sep 17 00:00:00 2001
From: Mingming Liu <mingmingl at google.com>
Date: Mon, 2 Oct 2023 13:46:09 -0700
Subject: [PATCH 3/6] In InstrProf.cpp, add option
 -enable-vtable-type-profiling to flag control vtable instrumentation, and
 record address range of vtables (in VTableAddRangeToMD5Map). - The new option
 is used in PGOInstrumentation.cpp and   InstrProfiling.cpp to flag-control
 the instrumentation of static and   runtime information. Static information
 includes vtable address range   and name md5 hash, runtime information are
 types of a vtable value. - Before this commit, VTableAddrToMD5Map records
 start and end of vtable   address individually. After this commit, the map
 records a range to   return MD5 iff the address is within the range. This is
 more accurate   when runtime address is collected in one module but static
 vtable   information is not recorded.

---
 .../llvm/Analysis/IndirectCallVisitor.h       | 43 ++++++++++++-------
 llvm/include/llvm/ProfileData/InstrProf.h     | 12 +++---
 llvm/lib/ProfileData/InstrProf.cpp            | 23 +++++++---
 .../Instrumentation/InstrProfiling.cpp        | 16 ++++---
 .../Instrumentation/PGOInstrumentation.cpp    |  8 +++-
 .../Inputs/update_vtable_value_prof_inputs.sh |  5 +--
 6 files changed, 70 insertions(+), 37 deletions(-)

diff --git a/llvm/include/llvm/Analysis/IndirectCallVisitor.h b/llvm/include/llvm/Analysis/IndirectCallVisitor.h
index 23168b88a6988ce..347f72bc625cb85 100644
--- a/llvm/include/llvm/Analysis/IndirectCallVisitor.h
+++ b/llvm/include/llvm/Analysis/IndirectCallVisitor.h
@@ -17,15 +17,20 @@
 #include <vector>
 
 namespace llvm {
-// Visitor class that finds all indirect call.
+// Visitor class that finds indirect calls or instructions that gives vtable
+// value, depending on Type.
 struct PGOIndirectCallVisitor : public InstVisitor<PGOIndirectCallVisitor> {
+  enum class InstructionType {
+    kIndirectCall = 0,
+    kVTableVal = 1,
+  };
   std::vector<CallBase *> IndirectCalls;
   SetVector<Instruction *, std::vector<Instruction *>> VTableAddrs;
-  PGOIndirectCallVisitor() = default;
+  PGOIndirectCallVisitor(InstructionType Type) : Type(Type) {}
 
   void visitCallBase(CallBase &Call) {
     const CallInst *CI = dyn_cast<CallInst>(&Call);
-    if (CI && CI->getCalledFunction()) {
+    if (Type == InstructionType::kVTableVal && CI && CI->getCalledFunction()) {
       switch (CI->getCalledFunction()->getIntrinsicID()) {
       case Intrinsic::type_test:
       case Intrinsic::public_type_test:
@@ -52,17 +57,20 @@ struct PGOIndirectCallVisitor : public InstVisitor<PGOIndirectCallVisitor> {
     }
       if (Call.isIndirectCall()) {
         IndirectCalls.push_back(&Call);
-        LoadInst *LI = dyn_cast<LoadInst>(Call.getCalledOperand());
-        if (LI != nullptr) {
-          Value *MaybeVTablePtr =
-              LI->getPointerOperand()->stripInBoundsConstantOffsets();
-          Instruction *VTableInstr = dyn_cast<Instruction>(MaybeVTablePtr);
-          // If not used by any type intrinsic, this is not a vtable.
-          // Inst visitor should see the very first type intrinsic using a
-          // vtable before the very first virtual function load from this
-          // vtable. This condition is asserted above.
-          if (VTableInstr && PtrTestedByTypeIntrinsics.count(MaybeVTablePtr)) {
-            VTableAddrs.insert(VTableInstr);
+        if (Type == InstructionType::kVTableVal) {
+          LoadInst *LI = dyn_cast<LoadInst>(Call.getCalledOperand());
+          if (LI != nullptr) {
+            Value *MaybeVTablePtr =
+                LI->getPointerOperand()->stripInBoundsConstantOffsets();
+            Instruction *VTableInstr = dyn_cast<Instruction>(MaybeVTablePtr);
+            // If not used by any type intrinsic, this is not a vtable.
+            // Inst visitor should see the very first type intrinsic using a
+            // vtable before the very first virtual function load from this
+            // vtable. This condition is asserted above.
+            if (VTableInstr &&
+                PtrTestedByTypeIntrinsics.count(MaybeVTablePtr)) {
+              VTableAddrs.insert(VTableInstr);
+            }
           }
         }
       }
@@ -72,16 +80,19 @@ struct PGOIndirectCallVisitor : public InstVisitor<PGOIndirectCallVisitor> {
   // Keeps track of the pointers that are tested by llvm type intrinsics for
   // look up.
   SmallPtrSet<Value *, 4> PtrTestedByTypeIntrinsics;
+  InstructionType Type;
 };
 
 inline std::vector<CallBase *> findIndirectCalls(Function &F) {
-  PGOIndirectCallVisitor ICV;
+  PGOIndirectCallVisitor ICV(
+      PGOIndirectCallVisitor::InstructionType::kIndirectCall);
   ICV.visit(F);
   return ICV.IndirectCalls;
 }
 
 inline std::vector<Instruction *> findVTableAddrs(Function &F) {
-  PGOIndirectCallVisitor ICV;
+  PGOIndirectCallVisitor ICV(
+      PGOIndirectCallVisitor::InstructionType::kVTableVal);
   ICV.visit(F);
   return ICV.VTableAddrs.takeVector();
 }
diff --git a/llvm/include/llvm/ProfileData/InstrProf.h b/llvm/include/llvm/ProfileData/InstrProf.h
index 82b7d662cfcb980..96fb654860ec629 100644
--- a/llvm/include/llvm/ProfileData/InstrProf.h
+++ b/llvm/include/llvm/ProfileData/InstrProf.h
@@ -437,6 +437,8 @@ uint64_t ComputeHash(StringRef K);
 class InstrProfSymtab {
 public:
   using AddrHashMap = std::vector<std::pair<uint64_t, uint64_t>>;
+  using RangeHashMap =
+      std::vector<std::pair<std::pair<uint64_t, uint64_t>, uint64_t>>;
 
 private:
   StringRef Data;
@@ -464,7 +466,7 @@ class InstrProfSymtab {
   // This map is only populated and used by raw instr profile reader.
   // This is a different map from 'AddrToMD5Map' for readability and
   // debuggability.
-  AddrHashMap VTableAddrToMD5Map;
+  RangeHashMap VTableAddrRangeToMD5Map;
   bool Sorted = false;
 
   static StringRef getExternalSymbol() {
@@ -562,11 +564,11 @@ class InstrProfSymtab {
     AddrToMD5Map.push_back(std::make_pair(Addr, MD5Val));
   }
 
-  // Map the start and end address of a variable to its names' MD5 hash.
-  // This interface is only used by the raw profile header.
+  // Map the address range (i.e., [start_address, end_address]) of a variable to
+  // its names' MD5 hash. This interface is only used by the raw profile header.
   void mapVTableAddress(uint64_t StartAddr, uint64_t EndAddr, uint64_t MD5Val) {
-    VTableAddrToMD5Map.push_back(std::make_pair(StartAddr, MD5Val));
-    VTableAddrToMD5Map.push_back(std::make_pair(EndAddr, MD5Val));
+    VTableAddrRangeToMD5Map.push_back(
+        std::make_pair(std::make_pair(StartAddr, EndAddr), MD5Val));
   }
 
   /// Return a function's hash, or 0, if the function isn't in this SymTab.
diff --git a/llvm/lib/ProfileData/InstrProf.cpp b/llvm/lib/ProfileData/InstrProf.cpp
index 3adf45763e2b994..b43f4ef20ae2fa3 100644
--- a/llvm/lib/ProfileData/InstrProf.cpp
+++ b/llvm/lib/ProfileData/InstrProf.cpp
@@ -215,6 +215,13 @@ cl::opt<bool> DoInstrProfNameCompression(
     "enable-name-compression",
     cl::desc("Enable name/filename string compression"), cl::init(true));
 
+cl::opt<bool> EnableVTableValueProfiling(
+    "enable-vtable-value-profiling", cl::init(true),
+    cl::desc("If true, the virtual table address will be instrumented to know "
+             "the types of a C++ pointer. The information could be used in "
+             "indirect-call-promotion to do selective vtable-based comparison "
+             "and interprocedural type propagation."));
+
 std::string getInstrProfSectionName(InstrProfSectKind IPSK,
                                     Triple::ObjectFormatType OF,
                                     bool AddSegmentInfo) {
@@ -487,16 +494,18 @@ Error InstrProfSymtab::addFuncWithName(Function &F, StringRef PGOFuncName) {
 
 uint64_t InstrProfSymtab::getVTableHashFromAddress(uint64_t Address) {
   finalizeSymtab();
-  auto It =
-      partition_point(VTableAddrToMD5Map, [=](std::pair<uint64_t, uint64_t> A) {
-        return A.first < Address;
-      });
+  auto It = lower_bound(
+      VTableAddrRangeToMD5Map, Address,
+      [](std::pair<std::pair<uint64_t, uint64_t>, uint64_t> VTableRangeAddr,
+         uint64_t Addr) { return VTableRangeAddr.first.second < Addr; });
+
+  // Returns the MD5 hash if Address is within the address range of an entry.
+  if (It != VTableAddrRangeToMD5Map.end() && It->first.first <= Address) {
+    return It->second;
+  }
   // The virtual table address collected from value profiler could be defined
   // in another module that is not instrumented. Force the value to be 0 in
   // this case.
-  if (It != VTableAddrToMD5Map.end()) {
-    return It->second;
-  }
   return 0;
 }
 
diff --git a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
index 9c94df86e4bf923..64de8a5e747026c 100644
--- a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
+++ b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
@@ -63,6 +63,10 @@ cl::opt<bool>
     DebugInfoCorrelate("debug-info-correlate",
                        cl::desc("Use debug info to correlate profiles."),
                        cl::init(false));
+
+// Command line option to enable vtable value profiling. Defined in
+// ProfileData/InstrProf.cpp: -enable-vtable-value-profiling=
+extern cl::opt<bool> EnableVTableValueProfiling;
 } // namespace llvm
 
 namespace {
@@ -559,10 +563,12 @@ bool InstrProfiling::run(
       static_cast<void>(getOrCreateRegionCounters(FirstProfInst));
   }
 
-  for (GlobalVariable &GV : M.globals()) {
-    // Global variables with type metadata are virtual table variables.
-    if (GV.hasMetadata(LLVMContext::MD_type)) {
-      getOrCreateVTableProfData(&GV);
+  if (EnableVTableValueProfiling) {
+    for (GlobalVariable &GV : M.globals()) {
+      // Global variables with type metadata are virtual table variables.
+      if (GV.hasMetadata(LLVMContext::MD_type)) {
+        getOrCreateVTableProfData(&GV);
+      }
     }
   }
 
@@ -1402,7 +1408,7 @@ void InstrProfiling::emitNameData() {
 }
 
 void InstrProfiling::emitVTableNames() {
-  if (ReferencedVTableNames.empty())
+  if (!EnableVTableValueProfiling || ReferencedVTableNames.empty())
     return;
 
   // Collect VTable
diff --git a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
index 3b2c78eebf40d9e..810dff726a05975 100644
--- a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
+++ b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
@@ -328,6 +328,10 @@ extern cl::opt<PGOViewCountsType> PGOViewCounts;
 extern cl::opt<std::string> ViewBlockFreqFuncName;
 
 extern cl::opt<bool> DebugInfoCorrelate;
+
+// Command line option to enable vtable value profiling. Defined in
+// ProfileData/InstrProf.cpp: -enable-vtable-value-profiling=
+extern cl::opt<bool> EnableVTableValueProfiling;
 } // namespace llvm
 
 static cl::opt<bool>
@@ -585,7 +589,9 @@ template <class Edge, class BBInfo> class FuncPGOInstrumentation {
       NumOfPGOMemIntrinsics += ValueSites[IPVK_MemOPSize].size();
       NumOfPGOBB += MST.BBInfos.size();
       ValueSites[IPVK_IndirectCallTarget] = VPC.get(IPVK_IndirectCallTarget);
-      ValueSites[IPVK_VTableTarget] = VPC.get(IPVK_VTableTarget);
+      if (EnableVTableValueProfiling) {
+        ValueSites[IPVK_VTableTarget] = VPC.get(IPVK_VTableTarget);
+      }
     } else {
       NumOfCSPGOSelectInsts += SIVisitor.getNumOfSelectInsts();
       NumOfCSPGOMemIntrinsics += ValueSites[IPVK_MemOPSize].size();
diff --git a/llvm/test/Transforms/PGOProfile/Inputs/update_vtable_value_prof_inputs.sh b/llvm/test/Transforms/PGOProfile/Inputs/update_vtable_value_prof_inputs.sh
index 1b35ea0303d15d2..5c594e10e7370f0 100644
--- a/llvm/test/Transforms/PGOProfile/Inputs/update_vtable_value_prof_inputs.sh
+++ b/llvm/test/Transforms/PGOProfile/Inputs/update_vtable_value_prof_inputs.sh
@@ -1,12 +1,11 @@
 #!/bin/bash
 
 if [ $# -lt 2 ]; then
-  echo "Path to clang++ and llvm-profdata required!"
-  echo "Usage: update_vtable_value_prof_inputs.sh /path/to/updated/clang++ /path/to/updated/llvm-profdata"
+  echo "Path to clang++ required!"
+  echo "Usage: update_vtable_value_prof_inputs.sh /path/to/updated/clang++"
   exit 1
 else
   CLANG=$1
-  LLVMPROFDATA=$2
 fi
 
 OUTDIR=$(dirname $(realpath -s $0))

>From 6ea4cee3d189fa6a61a8224cc5510302d1c8e914 Mon Sep 17 00:00:00 2001
From: Mingming Liu <mingmingl at google.com>
Date: Mon, 2 Oct 2023 16:27:49 -0700
Subject: [PATCH 4/6] A few fixes: 1. Format the code indentation in
 IndirectCallVisitor.h, and add a FIXME    to do more efficient vtable
 instrumentation. 2. In InstrProfSymtab::finalizeSymtab, sort and uniquify   
 VTableAddrRangeToMD5Map (somehow forgot this when preparing this    patch,
 the full prototype includes profile-use did this already.)

---
 .../llvm/Analysis/IndirectCallVisitor.h       | 37 +++++++++++--------
 llvm/include/llvm/ProfileData/InstrProf.h     |  7 ++++
 2 files changed, 28 insertions(+), 16 deletions(-)

diff --git a/llvm/include/llvm/Analysis/IndirectCallVisitor.h b/llvm/include/llvm/Analysis/IndirectCallVisitor.h
index 347f72bc625cb85..7fe9cba88410113 100644
--- a/llvm/include/llvm/Analysis/IndirectCallVisitor.h
+++ b/llvm/include/llvm/Analysis/IndirectCallVisitor.h
@@ -55,25 +55,30 @@ struct PGOIndirectCallVisitor : public InstVisitor<PGOIndirectCallVisitor> {
       } break;
       }
     }
-      if (Call.isIndirectCall()) {
-        IndirectCalls.push_back(&Call);
-        if (Type == InstructionType::kVTableVal) {
-          LoadInst *LI = dyn_cast<LoadInst>(Call.getCalledOperand());
-          if (LI != nullptr) {
-            Value *MaybeVTablePtr =
-                LI->getPointerOperand()->stripInBoundsConstantOffsets();
-            Instruction *VTableInstr = dyn_cast<Instruction>(MaybeVTablePtr);
-            // If not used by any type intrinsic, this is not a vtable.
-            // Inst visitor should see the very first type intrinsic using a
-            // vtable before the very first virtual function load from this
-            // vtable. This condition is asserted above.
-            if (VTableInstr &&
-                PtrTestedByTypeIntrinsics.count(MaybeVTablePtr)) {
-              VTableAddrs.insert(VTableInstr);
-            }
+    if (Call.isIndirectCall()) {
+      IndirectCalls.push_back(&Call);
+      if (Type == InstructionType::kVTableVal) {
+        // Note without -fstrict-vtable-pointers, vtable pointers of the same
+        // objects are loaded multiple times, and current implementation
+        // instruments each load once.
+        // FIXME: For more efficient instrumentation, analyze load invariant
+        // vtable values (e.g., from the same pointer in C++) and instrument
+        // them once.
+        LoadInst *LI = dyn_cast<LoadInst>(Call.getCalledOperand());
+        if (LI != nullptr) {
+          Value *MaybeVTablePtr =
+              LI->getPointerOperand()->stripInBoundsConstantOffsets();
+          Instruction *VTableInstr = dyn_cast<Instruction>(MaybeVTablePtr);
+          // If not used by any type intrinsic, this is not a vtable.
+          // Inst visitor should see the very first type intrinsic using a
+          // vtable before the very first virtual function load from this
+          // vtable. This condition is asserted above.
+          if (VTableInstr && PtrTestedByTypeIntrinsics.count(MaybeVTablePtr)) {
+            VTableAddrs.insert(VTableInstr);
           }
         }
       }
+    }
   }
 
 private:
diff --git a/llvm/include/llvm/ProfileData/InstrProf.h b/llvm/include/llvm/ProfileData/InstrProf.h
index 96fb654860ec629..74f2bb7b66d9c64 100644
--- a/llvm/include/llvm/ProfileData/InstrProf.h
+++ b/llvm/include/llvm/ProfileData/InstrProf.h
@@ -668,6 +668,13 @@ void InstrProfSymtab::finalizeSymtab() {
   llvm::sort(AddrToMD5Map, less_first());
   AddrToMD5Map.erase(std::unique(AddrToMD5Map.begin(), AddrToMD5Map.end()),
                      AddrToMD5Map.end());
+  // GlobalVariable address ranges should not overlap; so sort by either
+  // beginning address or end address is fine.
+  llvm::sort(VTableAddrRangeToMD5Map, less_first());
+  // std::unique uses == operator for std::pair.
+  VTableAddrRangeToMD5Map.erase(std::unique(VTableAddrRangeToMD5Map.begin(),
+                                            VTableAddrRangeToMD5Map.end()),
+                                VTableAddrRangeToMD5Map.end());
   Sorted = true;
 }
 

>From 5b1e5b89ca76379ef379359c78236ee891e01bef Mon Sep 17 00:00:00 2001
From: Mingming Liu <mingmingl at google.com>
Date: Tue, 3 Oct 2023 22:59:21 -0700
Subject: [PATCH 5/6] resolve feedbacks on code

---
 compiler-rt/lib/profile/InstrProfilingMerge.c              | 6 ++++--
 compiler-rt/lib/profile/InstrProfilingWriter.c             | 6 ------
 compiler-rt/test/profile/instrprof-write-buffer-internal.c | 5 +++++
 llvm/include/llvm/ProfileData/InstrProf.h                  | 5 -----
 llvm/lib/ProfileData/InstrProf.cpp                         | 5 +++--
 llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp     | 2 --
 6 files changed, 12 insertions(+), 17 deletions(-)

diff --git a/compiler-rt/lib/profile/InstrProfilingMerge.c b/compiler-rt/lib/profile/InstrProfilingMerge.c
index 629d502cdde3127..5f67e25b1edc993 100644
--- a/compiler-rt/lib/profile/InstrProfilingMerge.c
+++ b/compiler-rt/lib/profile/InstrProfilingMerge.c
@@ -124,9 +124,11 @@ int __llvm_profile_merge_from_buffer(const char *ProfileData,
   SrcCountersEnd = SrcCountersStart +
                    Header->NumCounters * __llvm_profile_counter_entry_size();
   SrcNameStart = SrcCountersEnd;
-  // This is to assume counter size is a multiple of 8 bytes.
 
-  // First, skip rather than merge them
+  // Skip vtable profile data section and vtable names sections for runtime
+  // profile merge. To merge runtime addresses from multiple profiles, the
+  // same instrumented binary should run with ASLR disabled -> in this set-up
+  // these two sections remain unchanged.
   uint64_t VTableSectionSize = Header->NumVTables * sizeof(VTableProfData);
   uint64_t PaddingBytesAfterVTableSection =
       __llvm_profile_get_num_padding_bytes(VTableSectionSize);
diff --git a/compiler-rt/lib/profile/InstrProfilingWriter.c b/compiler-rt/lib/profile/InstrProfilingWriter.c
index b998618aad7896f..18affce48153f73 100644
--- a/compiler-rt/lib/profile/InstrProfilingWriter.c
+++ b/compiler-rt/lib/profile/InstrProfilingWriter.c
@@ -327,12 +327,6 @@ lprofWriteDataImpl(ProfDataWriter *Writer, const __llvm_profile_data *DataBegin,
   if (binary_id_size == -1)
     return -1;
 
-  // Might be needed for debugging. Clean up before commit.
-  // uint64_t VTableProfDataOffset =
-  //    sizeof(__llvm_profile_header) + binary_id_size + DataSectionSize +
-  //    PaddingBytesBeforeCounters + CountersSectionSize +
-  //    PaddingBytesAfterCounters + NamesSize + PaddingBytesAfterNames;
-
   /* Write the profile data. */
   ProfDataIOVec IOVecData[] = {
       {DebugInfoCorrelate ? NULL : DataBegin, sizeof(uint8_t), DataSectionSize,
diff --git a/compiler-rt/test/profile/instrprof-write-buffer-internal.c b/compiler-rt/test/profile/instrprof-write-buffer-internal.c
index 484aa1f4f2d0eaf..7ac65cdd62c982e 100644
--- a/compiler-rt/test/profile/instrprof-write-buffer-internal.c
+++ b/compiler-rt/test/profile/instrprof-write-buffer-internal.c
@@ -54,6 +54,11 @@ int main(int argc, const char *argv[]) {
       __llvm_profile_begin_counters(), __llvm_profile_end_counters(),
       __llvm_profile_begin_names(), __llvm_profile_end_names());
 
+  if (ret != 0) {
+    fprintf(stderr, "failed to write buffer");
+    return ret;
+  }
+
   FILE *f = fopen(argv[1], "w");
   fwrite(buf, bufsize, 1, f);
   fclose(f);
diff --git a/llvm/include/llvm/ProfileData/InstrProf.h b/llvm/include/llvm/ProfileData/InstrProf.h
index 74f2bb7b66d9c64..ce780a34dd876dd 100644
--- a/llvm/include/llvm/ProfileData/InstrProf.h
+++ b/llvm/include/llvm/ProfileData/InstrProf.h
@@ -591,11 +591,6 @@ class InstrProfSymtab {
   /// will be represented using the same StringRef value.
   inline StringRef getFuncNameOrExternalSymbol(uint64_t FuncMD5Hash);
 
-  /// Just like getFuncName, except that it will return a non-empty StringRef
-  /// if the function is external to this symbol table. All such cases
-  /// will be represented using the same StringRef value.
-  // inline StringRef getVTableNameOrExternalSymbol(uint64_t VTableMD5Hash);
-
   /// True if Symbol is the value used to represent external symbols.
   static bool isExternalSymbol(const StringRef &Symbol) {
     return Symbol == InstrProfSymtab::getExternalSymbol();
diff --git a/llvm/lib/ProfileData/InstrProf.cpp b/llvm/lib/ProfileData/InstrProf.cpp
index b43f4ef20ae2fa3..a9c180d59ea5c70 100644
--- a/llvm/lib/ProfileData/InstrProf.cpp
+++ b/llvm/lib/ProfileData/InstrProf.cpp
@@ -220,7 +220,9 @@ cl::opt<bool> EnableVTableValueProfiling(
     cl::desc("If true, the virtual table address will be instrumented to know "
              "the types of a C++ pointer. The information could be used in "
              "indirect-call-promotion to do selective vtable-based comparison "
-             "and interprocedural type propagation."));
+             "and interprocedural type propagation. Requires type metadata and "
+             "type intrinsics (https://llvm.org/docs/TypeMetadata.html) to use"
+             " this option."));
 
 std::string getInstrProfSectionName(InstrProfSectKind IPSK,
                                     Triple::ObjectFormatType OF,
@@ -451,7 +453,6 @@ Error InstrProfSymtab::create(Module &M, bool InLTO) {
     Types.clear();
     G.getMetadata(LLVMContext::MD_type, Types);
     if (!Types.empty()) {
-      // errs() << "Insert " << G.getGUID() << "\t into MD5VTableMap\n";
       MD5VTableMap.emplace_back(G.getGUID(), &G);
     }
   }
diff --git a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
index 64de8a5e747026c..c46d4fd0550163b 100644
--- a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
+++ b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
@@ -1431,8 +1431,6 @@ void InstrProfiling::emitVTableNames() {
   VTableNamesVar->setAlignment(Align(1));
   // Make VTableNames linker retained.
   UsedVars.push_back(VTableNamesVar);
-
-  // FIXME: Why emitNames call erase method?
 }
 
 void InstrProfiling::emitRegistration() {

>From d4dcc8bbd29f73bb162c8051ba4f9e57ae4b6a79 Mon Sep 17 00:00:00 2001
From: Mingming Liu <mingmingl at google.com>
Date: Mon, 9 Oct 2023 22:57:20 -0700
Subject: [PATCH 6/6] [type profiling] profile-use

---
 .../llvm/Analysis/IndirectCallVisitor.h       |  15 +
 llvm/include/llvm/Bitcode/LLVMBitCodes.h      |   3 +
 llvm/include/llvm/IR/IRBuilder.h              |  32 +-
 llvm/include/llvm/IR/ModuleSummaryIndex.h     |  58 ++-
 llvm/include/llvm/IR/ModuleSummaryIndexYAML.h |   5 +-
 llvm/include/llvm/ProfileData/InstrProf.h     |   1 +
 .../Transforms/Utils/CallPromotionUtils.h     |  30 +-
 llvm/lib/Analysis/ModuleSummaryAnalysis.cpp   |  45 +-
 llvm/lib/AsmParser/LLParser.cpp               |   3 +-
 llvm/lib/Bitcode/Reader/BitcodeAnalyzer.cpp   |   1 +
 llvm/lib/Bitcode/Reader/BitcodeReader.cpp     |  20 +-
 llvm/lib/Bitcode/Writer/BitcodeWriter.cpp     |  56 ++-
 llvm/lib/IR/AsmWriter.cpp                     |  39 ++
 llvm/lib/ProfileData/InstrProf.cpp            |   5 +-
 llvm/lib/Transforms/IPO/FunctionImport.cpp    |  47 +-
 .../Instrumentation/IndirectCallPromotion.cpp | 435 +++++++++++++++++-
 .../Transforms/Utils/CallPromotionUtils.cpp   | 277 ++++++++++-
 llvm/test/Transforms/PGOProfile/icp_vtable.ll | 340 ++++++++++++++
 .../PGOProfile/icp_vtable_invoke.ll           | 369 +++++++++++++++
 .../PGOProfile/icp_vtable_musttail.ll         | 191 ++++++++
 20 files changed, 1907 insertions(+), 65 deletions(-)
 create mode 100644 llvm/test/Transforms/PGOProfile/icp_vtable.ll
 create mode 100644 llvm/test/Transforms/PGOProfile/icp_vtable_invoke.ll
 create mode 100644 llvm/test/Transforms/PGOProfile/icp_vtable_musttail.ll

diff --git a/llvm/include/llvm/Analysis/IndirectCallVisitor.h b/llvm/include/llvm/Analysis/IndirectCallVisitor.h
index 7fe9cba88410113..c46b2e861e7efe0 100644
--- a/llvm/include/llvm/Analysis/IndirectCallVisitor.h
+++ b/llvm/include/llvm/Analysis/IndirectCallVisitor.h
@@ -81,6 +81,21 @@ struct PGOIndirectCallVisitor : public InstVisitor<PGOIndirectCallVisitor> {
     }
   }
 
+  // write a helper function to vtable ptr for an indirect call.
+  static Instruction *getAnnotatedVTableInstruction(CallBase *CB) {
+    assert(CB != nullptr && "Caller guaranteed");
+    LoadInst *LI = dyn_cast<LoadInst>(CB->getCalledOperand());
+
+    if (LI != nullptr) {
+      Value *FuncPtr = LI->getPointerOperand(); // GEP (or bitcast)
+      Value *VTablePtr = FuncPtr->stripInBoundsConstantOffsets();
+      if (VTablePtr != nullptr && isa<Instruction>(VTablePtr)) {
+        return cast<Instruction>(VTablePtr);
+      }
+    }
+    return nullptr;
+  }
+
 private:
   // Keeps track of the pointers that are tested by llvm type intrinsics for
   // look up.
diff --git a/llvm/include/llvm/Bitcode/LLVMBitCodes.h b/llvm/include/llvm/Bitcode/LLVMBitCodes.h
index 52e76356a892e45..a9203562498f14b 100644
--- a/llvm/include/llvm/Bitcode/LLVMBitCodes.h
+++ b/llvm/include/llvm/Bitcode/LLVMBitCodes.h
@@ -319,6 +319,9 @@ enum GlobalValueSummarySymtabCodes {
   //  numver x version]
   FS_COMBINED_ALLOC_INFO = 29,
   FS_STACK_IDS = 30,
+
+  // [n x (vtable-guid, compatible-type-guid, offset-from-address-point)]
+  FS_VTABLE_EDGES = 31,
 };
 
 enum MetadataCodes {
diff --git a/llvm/include/llvm/IR/IRBuilder.h b/llvm/include/llvm/IR/IRBuilder.h
index ef86eefdf33b834..fc4692fbf87a0f5 100644
--- a/llvm/include/llvm/IR/IRBuilder.h
+++ b/llvm/include/llvm/IR/IRBuilder.h
@@ -1300,10 +1300,13 @@ class IRBuilderBase {
 
 public:
   Value *CreateAdd(Value *LHS, Value *RHS, const Twine &Name = "",
-                   bool HasNUW = false, bool HasNSW = false) {
-    if (Value *V =
-            Folder.FoldNoWrapBinOp(Instruction::Add, LHS, RHS, HasNUW, HasNSW))
-      return V;
+                   bool HasNUW = false, bool HasNSW = false,
+                   bool AllowFold = true) {
+    if (AllowFold) {
+      if (Value *V = Folder.FoldNoWrapBinOp(Instruction::Add, LHS, RHS, HasNUW,
+                                            HasNSW))
+        return V;
+    }
     return CreateInsertNUWNSWBinOp(Instruction::Add, LHS, RHS, Name, HasNUW,
                                    HasNSW);
   }
@@ -1312,15 +1315,19 @@ class IRBuilderBase {
     return CreateAdd(LHS, RHS, Name, false, true);
   }
 
-  Value *CreateNUWAdd(Value *LHS, Value *RHS, const Twine &Name = "") {
-    return CreateAdd(LHS, RHS, Name, true, false);
+  Value *CreateNUWAdd(Value *LHS, Value *RHS, const Twine &Name = "",
+                      bool AllowFold = true) {
+    return CreateAdd(LHS, RHS, Name, true, false, AllowFold);
   }
 
   Value *CreateSub(Value *LHS, Value *RHS, const Twine &Name = "",
-                   bool HasNUW = false, bool HasNSW = false) {
-    if (Value *V =
-            Folder.FoldNoWrapBinOp(Instruction::Sub, LHS, RHS, HasNUW, HasNSW))
-      return V;
+                   bool HasNUW = false, bool HasNSW = false,
+                   bool AllowFold = true) {
+    if (AllowFold) {
+      if (Value *V = Folder.FoldNoWrapBinOp(Instruction::Sub, LHS, RHS, HasNUW,
+                                            HasNSW))
+        return V;
+    }
     return CreateInsertNUWNSWBinOp(Instruction::Sub, LHS, RHS, Name, HasNUW,
                                    HasNSW);
   }
@@ -1329,8 +1336,9 @@ class IRBuilderBase {
     return CreateSub(LHS, RHS, Name, false, true);
   }
 
-  Value *CreateNUWSub(Value *LHS, Value *RHS, const Twine &Name = "") {
-    return CreateSub(LHS, RHS, Name, true, false);
+  Value *CreateNUWSub(Value *LHS, Value *RHS, const Twine &Name = "",
+                      bool AllowFold = true) {
+    return CreateSub(LHS, RHS, Name, true, false, AllowFold);
   }
 
   Value *CreateMul(Value *LHS, Value *RHS, const Twine &Name = "",
diff --git a/llvm/include/llvm/IR/ModuleSummaryIndex.h b/llvm/include/llvm/IR/ModuleSummaryIndex.h
index cd02c71adddfc25..00e34c8e5cd7ad2 100644
--- a/llvm/include/llvm/IR/ModuleSummaryIndex.h
+++ b/llvm/include/llvm/IR/ModuleSummaryIndex.h
@@ -17,12 +17,14 @@
 
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/MapVector.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/StringSet.h"
 #include "llvm/IR/ConstantRange.h"
 #include "llvm/IR/GlobalValue.h"
 #include "llvm/IR/Module.h"
@@ -663,6 +665,12 @@ class FunctionSummary : public GlobalValueSummary {
     uint64_t Offset;
   };
 
+  struct VTableTypeAndOffsetInfo {
+    ValueInfo VTableVI;
+    StringRef CompatibleTypeStr;
+    uint64_t Offset;
+  };
+
   /// A specification for a virtual function call with all constant integer
   /// arguments. This is used to perform virtual constant propagation on the
   /// summary.
@@ -689,6 +697,8 @@ class FunctionSummary : public GlobalValueSummary {
     /// all constant integer arguments.
     std::vector<ConstVCall> TypeTestAssumeConstVCalls,
         TypeCheckedLoadConstVCalls;
+
+    std::vector<VTableTypeAndOffsetInfo> VTableEdges;
   };
 
   /// Flags specific to function summaries.
@@ -811,6 +821,7 @@ class FunctionSummary : public GlobalValueSummary {
         std::vector<FunctionSummary::VFuncId>(),
         std::vector<FunctionSummary::ConstVCall>(),
         std::vector<FunctionSummary::ConstVCall>(),
+        std::vector<FunctionSummary::VTableTypeAndOffsetInfo>(),
         std::vector<FunctionSummary::ParamAccess>(),
         std::vector<CallsiteInfo>(), std::vector<AllocInfo>());
   }
@@ -868,6 +879,7 @@ class FunctionSummary : public GlobalValueSummary {
                   std::vector<VFuncId> TypeCheckedLoadVCalls,
                   std::vector<ConstVCall> TypeTestAssumeConstVCalls,
                   std::vector<ConstVCall> TypeCheckedLoadConstVCalls,
+                  std::vector<VTableTypeAndOffsetInfo> VTableEdges,
                   std::vector<ParamAccess> Params, CallsitesTy CallsiteList,
                   AllocsTy AllocList)
       : GlobalValueSummary(FunctionKind, Flags, std::move(Refs)),
@@ -876,11 +888,11 @@ class FunctionSummary : public GlobalValueSummary {
     if (!TypeTests.empty() || !TypeTestAssumeVCalls.empty() ||
         !TypeCheckedLoadVCalls.empty() || !TypeTestAssumeConstVCalls.empty() ||
         !TypeCheckedLoadConstVCalls.empty())
-      TIdInfo = std::make_unique<TypeIdInfo>(
-          TypeIdInfo{std::move(TypeTests), std::move(TypeTestAssumeVCalls),
-                     std::move(TypeCheckedLoadVCalls),
-                     std::move(TypeTestAssumeConstVCalls),
-                     std::move(TypeCheckedLoadConstVCalls)});
+      TIdInfo = std::make_unique<TypeIdInfo>(TypeIdInfo{
+          std::move(TypeTests), std::move(TypeTestAssumeVCalls),
+          std::move(TypeCheckedLoadVCalls),
+          std::move(TypeTestAssumeConstVCalls),
+          std::move(TypeCheckedLoadConstVCalls), std::move(VTableEdges)});
     if (!Params.empty())
       ParamAccesses = std::make_unique<ParamAccessesTy>(std::move(Params));
     if (!CallsiteList.empty())
@@ -928,6 +940,12 @@ class FunctionSummary : public GlobalValueSummary {
     return {};
   }
 
+  ArrayRef<VTableTypeAndOffsetInfo> vtable_edges() const {
+    if (TIdInfo)
+      return TIdInfo->VTableEdges;
+    return {};
+  }
+
   /// Returns the list of virtual calls made by this function using
   /// llvm.assume(llvm.type.test) intrinsics that do not have all constant
   /// integer arguments.
@@ -1261,6 +1279,36 @@ struct TypeIdOffsetVtableInfo {
 /// to inheritance, which is why this is a vector.
 using TypeIdCompatibleVtableInfo = std::vector<TypeIdOffsetVtableInfo>;
 
+// Define DenseMapInfo since VTableTypeAndOffsetInfo is used as element type
+// inside SetVector.
+template <> struct DenseMapInfo<FunctionSummary::VTableTypeAndOffsetInfo> {
+  static FunctionSummary::VTableTypeAndOffsetInfo getEmptyKey() {
+    return {DenseMapInfo<ValueInfo>::getEmptyKey(),
+            DenseMapInfo<StringRef>::getEmptyKey(),
+            DenseMapInfo<uint64_t>::getEmptyKey()};
+  }
+
+  static FunctionSummary::VTableTypeAndOffsetInfo getTombstoneKey() {
+    return {DenseMapInfo<ValueInfo>::getTombstoneKey(),
+            DenseMapInfo<StringRef>::getTombstoneKey(),
+            DenseMapInfo<uint64_t>::getTombstoneKey()};
+  }
+
+  static unsigned
+  getHashValue(const FunctionSummary::VTableTypeAndOffsetInfo &V) {
+    return DenseMapInfo<ValueInfo>::getHashValue(V.VTableVI) ^
+           DenseMapInfo<StringRef>::getHashValue(V.CompatibleTypeStr) ^
+           DenseMapInfo<uint64_t>::getHashValue(V.Offset);
+  }
+
+  static bool isEqual(const FunctionSummary::VTableTypeAndOffsetInfo &LHS,
+                      const FunctionSummary::VTableTypeAndOffsetInfo &RHS) {
+    return DenseMapInfo<ValueInfo>::isEqual(LHS.VTableVI, RHS.VTableVI) &&
+           LHS.CompatibleTypeStr == RHS.CompatibleTypeStr &&
+           LHS.Offset == RHS.Offset;
+  }
+};
+
 /// Class to hold module path string table and global value map,
 /// and encapsulate methods for operating on them.
 class ModuleSummaryIndex {
diff --git a/llvm/include/llvm/IR/ModuleSummaryIndexYAML.h b/llvm/include/llvm/IR/ModuleSummaryIndexYAML.h
index 33e57e5f2102fde..c74e8ffa54479e5 100644
--- a/llvm/include/llvm/IR/ModuleSummaryIndexYAML.h
+++ b/llvm/include/llvm/IR/ModuleSummaryIndexYAML.h
@@ -144,6 +144,7 @@ struct FunctionSummaryYaml {
       TypeCheckedLoadVCalls;
   std::vector<FunctionSummary::ConstVCall> TypeTestAssumeConstVCalls,
       TypeCheckedLoadConstVCalls;
+  std::vector<FunctionSummary::VTableTypeAndOffsetInfo> VTableEdges;
 };
 
 } // End yaml namespace
@@ -234,8 +235,8 @@ template <> struct CustomMappingTraits<GlobalValueSummaryMapTy> {
           std::move(FSum.TypeCheckedLoadVCalls),
           std::move(FSum.TypeTestAssumeConstVCalls),
           std::move(FSum.TypeCheckedLoadConstVCalls),
-          ArrayRef<FunctionSummary::ParamAccess>{}, ArrayRef<CallsiteInfo>{},
-          ArrayRef<AllocInfo>{}));
+          std::move(FSum.VTableEdges), ArrayRef<FunctionSummary::ParamAccess>{},
+          ArrayRef<CallsiteInfo>{}, ArrayRef<AllocInfo>{}));
     }
   }
   static void output(IO &io, GlobalValueSummaryMapTy &V) {
diff --git a/llvm/include/llvm/ProfileData/InstrProf.h b/llvm/include/llvm/ProfileData/InstrProf.h
index ce780a34dd876dd..a4480ea475c185f 100644
--- a/llvm/include/llvm/ProfileData/InstrProf.h
+++ b/llvm/include/llvm/ProfileData/InstrProf.h
@@ -660,6 +660,7 @@ void InstrProfSymtab::finalizeSymtab() {
     return;
   llvm::sort(MD5NameMap, less_first());
   llvm::sort(MD5FuncMap, less_first());
+  llvm::sort(MD5VTableMap, less_first());
   llvm::sort(AddrToMD5Map, less_first());
   AddrToMD5Map.erase(std::unique(AddrToMD5Map.begin(), AddrToMD5Map.end()),
                      AddrToMD5Map.end());
diff --git a/llvm/include/llvm/Transforms/Utils/CallPromotionUtils.h b/llvm/include/llvm/Transforms/Utils/CallPromotionUtils.h
index fcb384ec361339d..73e2f1234577f57 100644
--- a/llvm/include/llvm/Transforms/Utils/CallPromotionUtils.h
+++ b/llvm/include/llvm/Transforms/Utils/CallPromotionUtils.h
@@ -14,6 +14,8 @@
 #ifndef LLVM_TRANSFORMS_UTILS_CALLPROMOTIONUTILS_H
 #define LLVM_TRANSFORMS_UTILS_CALLPROMOTIONUTILS_H
 
+#include "llvm/IR/GlobalVariable.h"
+
 namespace llvm {
 class CallBase;
 class CastInst;
@@ -21,6 +23,20 @@ class Function;
 class MDNode;
 class Value;
 
+// This is a per callsite information.
+struct VTableCandidate {
+  Instruction *VTableInstr; // the instruction that's instrumented
+  GlobalVariable *VTableVariable;
+  uint64_t AddressPointOffset; // Address point offset.
+  Function *TargetFunction;
+  uint64_t VTableValCount; // compute percentage
+
+  VTableCandidate(Instruction *I, GlobalVariable *GV, uint32_t Offset,
+                  Function *F, uint64_t C)
+      : VTableInstr(I), VTableVariable(GV), AddressPointOffset(Offset),
+        TargetFunction(F), VTableValCount(C) {}
+};
+
 /// Return true if the given indirect call site can be made to call \p Callee.
 ///
 /// This function ensures that the number and type of the call site's arguments
@@ -39,7 +55,8 @@ bool isLegalToPromote(const CallBase &CB, Function *Callee,
 /// RetBitCast is non-null, it will be used to store the return value bitcast,
 /// if created.
 CallBase &promoteCall(CallBase &CB, Function *Callee,
-                      CastInst **RetBitCast = nullptr);
+                      CastInst **RetBitCast = nullptr,
+                      bool DirectCalleeAlreadySet = false);
 
 /// Promote the given indirect call site to conditionally call \p Callee.
 ///
@@ -51,6 +68,17 @@ CallBase &promoteCall(CallBase &CB, Function *Callee,
 CallBase &promoteCallWithIfThenElse(CallBase &CB, Function *Callee,
                                     MDNode *BranchWeights = nullptr);
 
+/// Promote the given indirect call to a conditional call.
+/// Before:
+///
+/// After:
+CallBase &promoteIndirectCallWithVTableInfo(
+    CallBase &CB, Function *TargetFunction,
+    const SmallVector<VTableCandidate> &VTableCandidates,
+    const std::vector<int> &VTableIndices,
+    const std::unordered_map<int, Value *> &VTableOffsetToValueMap,
+    uint64_t &SumPromotedVTableCount, MDNode *BranchWeights);
+
 /// Try to promote (devirtualize) a virtual call on an Alloca. Return true on
 /// success.
 ///
diff --git a/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp b/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp
index a88622efa12db8c..a8a16a24cd61a08 100644
--- a/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp
+++ b/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp
@@ -23,6 +23,7 @@
 #include "llvm/Analysis/BlockFrequencyInfo.h"
 #include "llvm/Analysis/BranchProbabilityInfo.h"
 #include "llvm/Analysis/IndirectCallPromotionAnalysis.h"
+#include "llvm/Analysis/IndirectCallVisitor.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/MemoryProfileInfo.h"
 #include "llvm/Analysis/ProfileSummaryInfo.h"
@@ -166,7 +167,7 @@ static void addVCallToSet(
 /// If this intrinsic call requires that we add information to the function
 /// summary, do so via the non-constant reference arguments.
 static void addIntrinsicToSummary(
-    const CallInst *CI,
+    ModuleSummaryIndex &Index, const CallInst *CI,
     SetVector<GlobalValue::GUID, std::vector<GlobalValue::GUID>> &TypeTests,
     SetVector<FunctionSummary::VFuncId, std::vector<FunctionSummary::VFuncId>>
         &TypeTestAssumeVCalls,
@@ -178,6 +179,9 @@ static void addIntrinsicToSummary(
     SetVector<FunctionSummary::ConstVCall,
               std::vector<FunctionSummary::ConstVCall>>
         &TypeCheckedLoadConstVCalls,
+    SetVector<FunctionSummary::VTableTypeAndOffsetInfo,
+              std::vector<FunctionSummary::VTableTypeAndOffsetInfo>>
+        &VTableTypeAndOffsetData,
     DominatorTree &DT) {
   switch (CI->getCalledFunction()->getIntrinsicID()) {
   case Intrinsic::type_test:
@@ -201,10 +205,32 @@ static void addIntrinsicToSummary(
     SmallVector<DevirtCallSite, 4> DevirtCalls;
     SmallVector<CallInst *, 4> Assumes;
     findDevirtualizableCallsForTypeTest(DevirtCalls, Assumes, CI, DT);
-    for (auto &Call : DevirtCalls)
+    for (auto &Call : DevirtCalls) {
       addVCallToSet(Call, Guid, TypeTestAssumeVCalls,
                     TypeTestAssumeConstVCalls);
 
+      Instruction *VTableInstr =
+          PGOIndirectCallVisitor::getAnnotatedVTableInstruction(&Call.CB);
+
+      if (VTableInstr) {
+        std::unique_ptr<InstrProfValueData[]> ValueDataArray =
+            std::make_unique<InstrProfValueData[]>(24);
+
+        uint32_t ActualNumValues = 0;
+        uint64_t TotalCount = 0;
+
+        getValueProfDataFromInst(*VTableInstr, IPVK_VTableTarget, 24,
+                                 ValueDataArray.get(), ActualNumValues,
+                                 TotalCount);
+
+        for (uint32_t j = 0; j < ActualNumValues; j++) {
+          VTableTypeAndOffsetData.insert(
+              {Index.getOrInsertValueInfo(
+                   ValueDataArray[j].Value) /* VTableGUID */,
+               TypeId->getString() /* CompatibleTypeStr */, Call.Offset});
+        }
+      }
+    }
     break;
   }
 
@@ -281,6 +307,9 @@ static void computeFunctionSummary(
       CallGraphEdges;
   SetVector<ValueInfo, std::vector<ValueInfo>> RefEdges, LoadRefEdges,
       StoreRefEdges;
+  SetVector<FunctionSummary::VTableTypeAndOffsetInfo,
+            std::vector<FunctionSummary::VTableTypeAndOffsetInfo>>
+      VTableEdges;
   SetVector<GlobalValue::GUID, std::vector<GlobalValue::GUID>> TypeTests;
   SetVector<FunctionSummary::VFuncId, std::vector<FunctionSummary::VFuncId>>
       TypeTestAssumeVCalls, TypeCheckedLoadVCalls;
@@ -389,9 +418,10 @@ static void computeFunctionSummary(
       // intrinsic, or an indirect call with profile data.
       if (CalledFunction) {
         if (CI && CalledFunction->isIntrinsic()) {
-          addIntrinsicToSummary(
-              CI, TypeTests, TypeTestAssumeVCalls, TypeCheckedLoadVCalls,
-              TypeTestAssumeConstVCalls, TypeCheckedLoadConstVCalls, DT);
+          addIntrinsicToSummary(Index, CI, TypeTests, TypeTestAssumeVCalls,
+                                TypeCheckedLoadVCalls,
+                                TypeTestAssumeConstVCalls,
+                                TypeCheckedLoadConstVCalls, VTableEdges, DT);
           continue;
         }
         // We should have named any anonymous globals
@@ -631,8 +661,8 @@ static void computeFunctionSummary(
       CallGraphEdges.takeVector(), TypeTests.takeVector(),
       TypeTestAssumeVCalls.takeVector(), TypeCheckedLoadVCalls.takeVector(),
       TypeTestAssumeConstVCalls.takeVector(),
-      TypeCheckedLoadConstVCalls.takeVector(), std::move(ParamAccesses),
-      std::move(Callsites), std::move(Allocs));
+      TypeCheckedLoadConstVCalls.takeVector(), VTableEdges.takeVector(),
+      std::move(ParamAccesses), std::move(Callsites), std::move(Allocs));
   if (NonRenamableLocal)
     CantBePromoted.insert(F.getGUID());
   Index.addGlobalValueSummary(F, std::move(FuncSummary));
@@ -890,6 +920,7 @@ ModuleSummaryIndex llvm::buildModuleSummaryIndex(
                     ArrayRef<FunctionSummary::VFuncId>{},
                     ArrayRef<FunctionSummary::ConstVCall>{},
                     ArrayRef<FunctionSummary::ConstVCall>{},
+                    ArrayRef<FunctionSummary::VTableTypeAndOffsetInfo>{},
                     ArrayRef<FunctionSummary::ParamAccess>{},
                     ArrayRef<CallsiteInfo>{}, ArrayRef<AllocInfo>{});
             Index.addGlobalValueSummary(*GV, std::move(Summary));
diff --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp
index f1f0cdf746ee12a..24de2b3371da845 100644
--- a/llvm/lib/AsmParser/LLParser.cpp
+++ b/llvm/lib/AsmParser/LLParser.cpp
@@ -8864,7 +8864,8 @@ bool LLParser::parseFunctionSummary(std::string Name, GlobalValue::GUID GUID,
       std::move(TypeIdInfo.TypeCheckedLoadVCalls),
       std::move(TypeIdInfo.TypeTestAssumeConstVCalls),
       std::move(TypeIdInfo.TypeCheckedLoadConstVCalls),
-      std::move(ParamAccesses), std::move(Callsites), std::move(Allocs));
+      std::move(TypeIdInfo.VTableEdges), std::move(ParamAccesses),
+      std::move(Callsites), std::move(Allocs));
 
   FS->setModulePath(ModulePath);
 
diff --git a/llvm/lib/Bitcode/Reader/BitcodeAnalyzer.cpp b/llvm/lib/Bitcode/Reader/BitcodeAnalyzer.cpp
index 7005011980ebc95..37c311f1eadbd14 100644
--- a/llvm/lib/Bitcode/Reader/BitcodeAnalyzer.cpp
+++ b/llvm/lib/Bitcode/Reader/BitcodeAnalyzer.cpp
@@ -311,6 +311,7 @@ GetCodeName(unsigned CodeID, unsigned BlockID,
       STRINGIFY_CODE(FS, TYPE_TEST_ASSUME_CONST_VCALL)
       STRINGIFY_CODE(FS, TYPE_CHECKED_LOAD_CONST_VCALL)
       STRINGIFY_CODE(FS, VALUE_GUID)
+      STRINGIFY_CODE(FS, VTABLE_EDGES)
       STRINGIFY_CODE(FS, CFI_FUNCTION_DEFS)
       STRINGIFY_CODE(FS, CFI_FUNCTION_DECLS)
       STRINGIFY_CODE(FS, TYPE_ID)
diff --git a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
index 1d1ec988a93d847..2893f9568584abf 100644
--- a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -7171,6 +7171,7 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) {
       PendingTypeCheckedLoadVCalls;
   std::vector<FunctionSummary::ConstVCall> PendingTypeTestAssumeConstVCalls,
       PendingTypeCheckedLoadConstVCalls;
+  std::vector<FunctionSummary::VTableTypeAndOffsetInfo> PendingVTableEdges;
   std::vector<FunctionSummary::ParamAccess> PendingParamAccesses;
 
   std::vector<CallsiteInfo> PendingCallsites;
@@ -7285,8 +7286,8 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) {
           std::move(PendingTypeCheckedLoadVCalls),
           std::move(PendingTypeTestAssumeConstVCalls),
           std::move(PendingTypeCheckedLoadConstVCalls),
-          std::move(PendingParamAccesses), std::move(PendingCallsites),
-          std::move(PendingAllocs));
+          std::move(PendingVTableEdges), std::move(PendingParamAccesses),
+          std::move(PendingCallsites), std::move(PendingAllocs));
       FS->setModulePath(getThisModule()->first());
       FS->setOriginalName(std::get<1>(VIAndOriginalGUID));
       TheIndex.addGlobalValueSummary(std::get<0>(VIAndOriginalGUID),
@@ -7429,8 +7430,8 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) {
           std::move(PendingTypeCheckedLoadVCalls),
           std::move(PendingTypeTestAssumeConstVCalls),
           std::move(PendingTypeCheckedLoadConstVCalls),
-          std::move(PendingParamAccesses), std::move(PendingCallsites),
-          std::move(PendingAllocs));
+          std::move(PendingVTableEdges), std::move(PendingParamAccesses),
+          std::move(PendingCallsites), std::move(PendingAllocs));
       LastSeenSummary = FS.get();
       LastSeenGUID = VI.getGUID();
       FS->setModulePath(ModuleIdMap[ModuleId]);
@@ -7617,6 +7618,17 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) {
       PendingAllocs.push_back(AllocInfo(std::move(MIBs)));
       break;
     }
+    case bitc::FS_VTABLE_EDGES: {
+      assert(PendingVTableEdges.empty() && "VTableEdges not read yet");
+      for (unsigned I = 0; I != Record.size(); I += 4) {
+        // restore Record[I] to VI
+        StringRef TypeStr(Strtab.data() + Record[I + 1], Record[I + 2]);
+        PendingVTableEdges.push_back(
+            {std::get<0>(getValueInfoFromValueId(Record[I])), TypeStr,
+             Record[I + 3]});
+      }
+      break;
+    }
 
     case bitc::FS_COMBINED_ALLOC_INFO: {
       unsigned I = 0;
diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
index f53fbd73667762c..98ef2f02295de23 100644
--- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -198,14 +198,25 @@ class ModuleBitcodeWriterBase : public BitcodeWriterBase {
     for (const auto &GUIDSummaryLists : *Index)
       // Examine all summaries for this GUID.
       for (auto &Summary : GUIDSummaryLists.second.SummaryList)
-        if (auto FS = dyn_cast<FunctionSummary>(Summary.get()))
+        if (auto FS = dyn_cast<FunctionSummary>(Summary.get())) {
           // For each call in the function summary, see if the call
           // is to a GUID (which means it is for an indirect call,
           // otherwise we would have a Value for it). If so, synthesize
           // a value id.
           for (auto &CallEdge : FS->calls())
-            if (!CallEdge.first.haveGVs() || !CallEdge.first.getValue())
+            if (!CallEdge.first.haveGVs() || !CallEdge.first.getValue()) {
               assignValueId(CallEdge.first.getGUID());
+            }
+
+          for (auto &VTable : FS->vtable_edges()) {
+            ValueInfo VTableVI = VTable.VTableVI;
+            assert(VTableVI && "VTableVI doesn't exist when writing per-module "
+                               "function summary");
+            if (!VTableVI.haveGVs() || !VTableVI.getValue()) {
+              assignValueId(VTableVI.getGUID());
+            }
+          }
+        }
   }
 
 protected:
@@ -3761,8 +3772,9 @@ void IndexBitcodeWriter::writeModStrings() {
 /// a function summary entry (whether per-module or combined).
 template <typename Fn>
 static void writeFunctionTypeMetadataRecords(BitstreamWriter &Stream,
-                                             FunctionSummary *FS,
-                                             Fn GetValueID) {
+                                             FunctionSummary *FS, Fn GetValueID,
+                                             StringTableBuilder &StrtabBuilder,
+                                             bool PerModule) {
   if (!FS->type_tests().empty())
     Stream.EmitRecord(bitc::FS_TYPE_TESTS, FS->type_tests());
 
@@ -3982,9 +3994,38 @@ void ModuleBitcodeWriterBase::writePerModuleFunctionSummaryRecord(
   FunctionSummary *FS = cast<FunctionSummary>(Summary);
 
   writeFunctionTypeMetadataRecords(
-      Stream, FS, [&](const ValueInfo &VI) -> std::optional<unsigned> {
+      Stream, FS,
+      [&](const ValueInfo &VI) -> std::optional<unsigned> {
         return {VE.getValueID(VI.getValue())};
-      });
+      },
+      StrtabBuilder, true);
+
+  SmallVector<uint64_t, 64> Record;
+  // The resolved information produced by thin-link will be represented using
+  // different formats.
+  auto WriteVTableEdges =
+      [&](uint64_t Ty,
+          ArrayRef<FunctionSummary::VTableTypeAndOffsetInfo> VTableEdges) {
+        if (VTableEdges.empty())
+          return;
+        Record.clear();
+        for (auto &Edge : VTableEdges) {
+          assert(Edge.VTableVI && "Expect VTableVI for an edge");
+
+          std::optional<unsigned> ValueID = getValueId(Edge.VTableVI);
+
+          assert(ValueID && "Expect ValueID for an VTableEdge");
+
+          Record.push_back(*ValueID);
+          // Record.push_back(Edge.VTableGUID);
+          Record.push_back(StrtabBuilder.add(Edge.CompatibleTypeStr));
+          Record.push_back(Edge.CompatibleTypeStr.size());
+          Record.push_back(Edge.Offset);
+        }
+        Stream.EmitRecord(Ty, Record);
+      };
+
+  WriteVTableEdges(bitc::FS_VTABLE_EDGES, FS->vtable_edges());
 
   writeFunctionHeapProfileRecords(
       Stream, FS, CallsiteAbbrev, AllocAbbrev,
@@ -4448,7 +4489,8 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
     };
 
     auto *FS = cast<FunctionSummary>(S);
-    writeFunctionTypeMetadataRecords(Stream, FS, GetValueId);
+    writeFunctionTypeMetadataRecords(Stream, FS, GetValueId, StrtabBuilder,
+                                     false);
     getReferencedTypeIds(FS, ReferencedTypeIds);
 
     writeFunctionHeapProfileRecords(
diff --git a/llvm/lib/IR/AsmWriter.cpp b/llvm/lib/IR/AsmWriter.cpp
index e190d82127908db..a0f6ec1b51513e6 100644
--- a/llvm/lib/IR/AsmWriter.cpp
+++ b/llvm/lib/IR/AsmWriter.cpp
@@ -2626,6 +2626,11 @@ class AssemblyWriter {
   void printWPDRes(const WholeProgramDevirtResolution &WPDRes);
   void printTypeIdInfo(const FunctionSummary::TypeIdInfo &TIDInfo);
   void printVFuncId(const FunctionSummary::VFuncId VFId);
+  void printVTableEdges(
+      const std::vector<FunctionSummary::VTableTypeAndOffsetInfo> &VTableEdges,
+      const char *Tag);
+  void
+  printVTableEdge(const FunctionSummary::VTableTypeAndOffsetInfo &VTableEdge);
   void
   printNonConstVCalls(const std::vector<FunctionSummary::VFuncId> &VCallList,
                       const char *Tag);
@@ -3353,9 +3358,29 @@ void AssemblyWriter::printTypeIdInfo(
     printConstVCalls(TIDInfo.TypeCheckedLoadConstVCalls,
                      "typeCheckedLoadConstVCalls");
   }
+  if (!TIDInfo.VTableEdges.empty()) {
+    Out << TIDFS;
+    printVTableEdges(TIDInfo.VTableEdges, "vtableEdges");
+  }
   Out << ")";
 }
 
+void AssemblyWriter::printVTableEdge(
+    const FunctionSummary::VTableTypeAndOffsetInfo &VTableEdge) {
+  Out << "vTableEdge: (";
+  auto GUIDSlot = Machine.getGUIDSlot(VTableEdge.VTableVI.getGUID());
+  // FIXME: Change this to assert(GUIDSlot != -1)
+  if (GUIDSlot == -1) {
+    Out << "vtableGuid: " << VTableEdge.VTableVI.getGUID();
+  } else {
+    Out << "vtableGuid: ^" << GUIDSlot;
+  }
+  Out << ", " << VTableEdge.CompatibleTypeStr;
+  Out << ", " << VTableEdge.Offset;
+  Out << ")";
+  return;
+}
+
 void AssemblyWriter::printVFuncId(const FunctionSummary::VFuncId VFId) {
   auto TidIter = TheIndex->typeIds().equal_range(VFId.GUID);
   if (TidIter.first == TidIter.second) {
@@ -3407,6 +3432,20 @@ void AssemblyWriter::printConstVCalls(
   Out << ")";
 }
 
+void AssemblyWriter::printVTableEdges(
+    const std::vector<FunctionSummary::VTableTypeAndOffsetInfo> &VTableEdges,
+    const char *Tag) {
+  Out << Tag << ": (";
+  FieldSeparator FS;
+  for (auto &VTableEdge : VTableEdges) {
+    Out << FS;
+    Out << "(";
+    printVTableEdge(VTableEdge);
+    Out << ")";
+  }
+  Out << ")";
+}
+
 void AssemblyWriter::printSummary(const GlobalValueSummary &Summary) {
   GlobalValueSummary::GVFlags GVFlags = Summary.flags();
   GlobalValue::LinkageTypes LT = (GlobalValue::LinkageTypes)GVFlags.Linkage;
diff --git a/llvm/lib/ProfileData/InstrProf.cpp b/llvm/lib/ProfileData/InstrProf.cpp
index a9c180d59ea5c70..78f772f489ff5ee 100644
--- a/llvm/lib/ProfileData/InstrProf.cpp
+++ b/llvm/lib/ProfileData/InstrProf.cpp
@@ -453,7 +453,10 @@ Error InstrProfSymtab::create(Module &M, bool InLTO) {
     Types.clear();
     G.getMetadata(LLVMContext::MD_type, Types);
     if (!Types.empty()) {
-      MD5VTableMap.emplace_back(G.getGUID(), &G);
+      if (Error E = addVTableName(G.getName()))
+        return E;
+
+      MD5VTableMap.emplace_back(GlobalValue::getGUID(G.getName()), &G);
     }
   }
   Sorted = false;
diff --git a/llvm/lib/Transforms/IPO/FunctionImport.cpp b/llvm/lib/Transforms/IPO/FunctionImport.cpp
index 80c360b8dd0f74f..bee81033a671885 100644
--- a/llvm/lib/Transforms/IPO/FunctionImport.cpp
+++ b/llvm/lib/Transforms/IPO/FunctionImport.cpp
@@ -427,9 +427,46 @@ static void computeImportForFunction(
     SmallVectorImpl<EdgeInfo> &Worklist, GlobalsImporter &GVImporter,
     FunctionImporter::ImportMapTy &ImportList,
     DenseMap<StringRef, FunctionImporter::ExportSetTy> *ExportLists,
-    FunctionImporter::ImportThresholdsTy &ImportThresholds) {
+    FunctionImporter::ImportThresholdsTy &ImportThresholds, StringRef ModName) {
   GVImporter.onImportingSummary(Summary);
   static int ImportCount = 0;
+  // If there is a local variable, make sure to import the copy in caller's
+  // module.
+  auto LocalNotInModule = [&](const GlobalValueSummary *GVS) -> bool {
+    return GlobalValue::isLocalLinkage(GVS->linkage()) &&
+           GVS->modulePath() != Summary.modulePath();
+  };
+  // When a vtable definition is imported, IR linker should import all the
+  // declarations.
+  if (!Summary.vtable_edges().empty()) {
+    for (const auto &edge : Summary.vtable_edges()) {
+      ValueInfo VTableVI = edge.VTableVI;
+      if (!VTableVI) {
+        continue;
+      }
+
+      // VTable already defined in destination module
+      //
+      // FIXME: Change to shouldImportGlobal
+      if (DefinedGVSummaries.count(VTableVI.getGUID()))
+        continue;
+
+      for (const auto &VTableSummary : VTableVI.getSummaryList()) {
+        const auto *GVS = dyn_cast<GlobalVarSummary>(VTableSummary.get());
+
+        if (!GVS || !Index.canImportGlobalVar(GVS, /* AnalyzeRefs */ true) ||
+            LocalNotInModule(GVS))
+          continue;
+
+        // import the declaration from module.
+        ImportList[VTableSummary->modulePath()].insert(VTableVI.getGUID());
+
+        if (ExportLists) {
+          (*ExportLists)[VTableSummary->modulePath()].insert(VTableVI);
+        }
+      }
+    }
+  }
   for (const auto &Edge : Summary.calls()) {
     ValueInfo VI = Edge.first;
     LLVM_DEBUG(dbgs() << " edge -> " << VI << " Threshold:" << Threshold
@@ -618,9 +655,9 @@ void ModuleImportsManager::computeImportForModule(
       // Skip import for global variables
       continue;
     LLVM_DEBUG(dbgs() << "Initialize import for " << VI << "\n");
-    computeImportForFunction(*FuncSummary, Index, ImportInstrLimit,
-                             DefinedGVSummaries, IsPrevailing, Worklist, GVI,
-                             ImportList, ExportLists, ImportThresholds);
+    computeImportForFunction(
+        *FuncSummary, Index, ImportInstrLimit, DefinedGVSummaries, IsPrevailing,
+        Worklist, GVI, ImportList, ExportLists, ImportThresholds, ModName);
   }
 
   // Process the newly imported functions and add callees to the worklist.
@@ -632,7 +669,7 @@ void ModuleImportsManager::computeImportForModule(
     if (auto *FS = dyn_cast<FunctionSummary>(Summary))
       computeImportForFunction(*FS, Index, Threshold, DefinedGVSummaries,
                                IsPrevailing, Worklist, GVI, ImportList,
-                               ExportLists, ImportThresholds);
+                               ExportLists, ImportThresholds, ModName);
   }
 
   // Print stats about functions considered but rejected for importing
diff --git a/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp b/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp
index 5c9799235017a8a..2dd8620444dcf51 100644
--- a/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp
+++ b/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp
@@ -13,13 +13,17 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Analysis/IndirectCallPromotionAnalysis.h"
 #include "llvm/Analysis/IndirectCallVisitor.h"
 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
 #include "llvm/Analysis/ProfileSummaryInfo.h"
+#include "llvm/Analysis/TypeMetadataUtils.h"
 #include "llvm/IR/DiagnosticInfo.h"
+#include "llvm/IR/Dominators.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/InstrTypes.h"
 #include "llvm/IR/Instructions.h"
@@ -39,6 +43,7 @@
 #include <cassert>
 #include <cstdint>
 #include <memory>
+#include <optional>
 #include <string>
 #include <utility>
 #include <vector>
@@ -55,6 +60,14 @@ STATISTIC(NumOfPGOICallsites, "Number of indirect call candidate sites.");
 static cl::opt<bool> DisableICP("disable-icp", cl::init(false), cl::Hidden,
                                 cl::desc("Disable indirect call promotion"));
 
+static cl::opt<bool> EnableVTableProm("enable-vtable-prom", cl::init(false),
+                                      cl::Hidden,
+                                      cl::desc("Enable vtable prom"));
+
+static cl::opt<int>
+    MaxNumAdditionalOffset("max-num-additional-offset", cl::init(0), cl::Hidden,
+                           cl::desc("The max number of additional offset"));
+
 // Set the cutoff value for the promotion. If the value is other than 0, we
 // stop the transformation once the total number of promotions equals the cutoff
 // value.
@@ -102,18 +115,31 @@ static cl::opt<bool>
     ICPDUMPAFTER("icp-dumpafter", cl::init(false), cl::Hidden,
                  cl::desc("Dump IR after transformation happens"));
 
-namespace {
+// namespace {
 
 // Promote indirect calls to conditional direct calls, keeping track of
 // thresholds.
 class IndirectCallPromoter {
+public:
+  struct VirtualCallInfo {
+    uint64_t
+        Offset; // The byte offset from address point to the virtual function.
+    Instruction *I;              // The vptr instruction
+    StringRef CompatibleTypeStr; // Compatible type str
+    Instruction *TypeTestInstr;  // The type.test intrinsic
+  };
+
 private:
+  // 24 is the maximum number of counters per instrumented value.
+  static constexpr int MaxNumVTableToConsider = 24;
   Function &F;
 
   // Symtab that maps indirect call profile values to function names and
   // defines.
   InstrProfSymtab *const Symtab;
 
+  const DenseMap<const CallBase *, VirtualCallInfo> &CBToVirtualCallInfoMap;
+
   const bool SamplePGO;
 
   OptimizationRemarkEmitter &ORE;
@@ -126,6 +152,38 @@ class IndirectCallPromoter {
     PromotionCandidate(Function *F, uint64_t C) : TargetFunction(F), Count(C) {}
   };
 
+  // A helper function that transforms CB (indirect call) to a conditional call
+  // to TargetFunction.
+  // Inputs:
+  // - VTableIndices collects the indices of elements in VTableCandidates whose
+  // function is the TargetFunction.
+  // - VTableOffsetToValueMap
+  //   The key is address point offset, and value is the offset variable.
+  // Outputs:
+  // - TotalVTableCount is updated to subtract the count of TargetFunction.
+  // - VTablePromotedSet adds TargetFunction into the set.
+  // Returns the promoted direct call instruction.
+  CallBase &promoteIndirectCallBasedOnVTable(
+      CallBase &CB, Function *TargetFunction,
+      const SmallVector<VTableCandidate> &VTableCandidates,
+      const std::vector<int> &VTableIndices,
+      const std::unordered_map<int, Value *> &VTableOffsetToValueMap,
+      uint64_t &TotalVTableCount,
+      SmallPtrSet<Function *, 4> &VTablePromotedSet);
+
+  struct PerFunctionCandidateInfo {
+    std::vector<int> VTableIndices;
+    SetVector<int> Offsets;
+  };
+
+  // Does cost benefit analysis between comparing functions and comparing
+  // vtables. Returns true if comparing vtable is more efficient and false
+  // otherwise.
+  bool shouldCompareVTable(
+      CallBase &CB, const std::vector<PromotionCandidate> &Candidates,
+      const SmallVector<VTableCandidate> &VTableCandidates,
+      std::vector<PerFunctionCandidateInfo> &PerFunctionCandiateInfo);
+
   // Check if the indirect-call call site should be promoted. Return the number
   // of promotions. Inst is the candidate indirect call, ValueDataRef
   // contains the array of value profile data for profiled targets,
@@ -135,23 +193,234 @@ class IndirectCallPromoter {
       const CallBase &CB, const ArrayRef<InstrProfValueData> &ValueDataRef,
       uint64_t TotalCount, uint32_t NumCandidates);
 
+  uint32_t promoteIndirectCallsByComparingFunctions(
+      CallBase &CB, const std::vector<PromotionCandidate> &Candidates,
+      uint64_t &TotalCount, bool AttachProfToDirectCall,
+      OptimizationRemarkEmitter *ORE);
+
   // Promote a list of targets for one indirect-call callsite. Return
   // the number of promotions.
   uint32_t tryToPromote(CallBase &CB,
                         const std::vector<PromotionCandidate> &Candidates,
-                        uint64_t &TotalCount);
+                        uint64_t &TotalCount,
+                        const SmallVector<VTableCandidate> &VTableCandidates,
+                        uint64_t &TotalVTableCount);
+
+  // For indirect call 'CB', find the list of vtable candidates where callees
+  // are loaded from. Returns false if the callee is not loaded from virtual
+  // tables.
+  bool getVTableCandidates(CallBase *CB,
+                           SmallVector<VTableCandidate> &VTableCandidates,
+                           uint64_t &TotalVTableCount);
 
 public:
-  IndirectCallPromoter(Function &Func, InstrProfSymtab *Symtab, bool SamplePGO,
-                       OptimizationRemarkEmitter &ORE)
-      : F(Func), Symtab(Symtab), SamplePGO(SamplePGO), ORE(ORE) {}
+  IndirectCallPromoter(
+      Function &Func, InstrProfSymtab *Symtab, bool SamplePGO,
+      const DenseMap<const CallBase *, VirtualCallInfo> &CBToVirtualCallInfoMap,
+      OptimizationRemarkEmitter &ORE)
+      : F(Func), Symtab(Symtab), CBToVirtualCallInfoMap(CBToVirtualCallInfoMap),
+        SamplePGO(SamplePGO), ORE(ORE) {}
   IndirectCallPromoter(const IndirectCallPromoter &) = delete;
   IndirectCallPromoter &operator=(const IndirectCallPromoter &) = delete;
 
   bool processFunction(ProfileSummaryInfo *PSI);
 };
 
-} // end anonymous namespace
+//} // end anonymous namespace
+
+static std::optional<uint64_t>
+getCompatibleTypeOffset(const SmallVector<MDNode *, 2> &Types,
+                        StringRef CompatibleType) {
+  std::optional<uint64_t> Offset = std::nullopt;
+  for (MDNode *Type : Types) {
+    auto TypeIDMetadata = Type->getOperand(1).get();
+    if (auto *TypeId = dyn_cast<MDString>(TypeIDMetadata)) {
+      if (TypeId->getString() != CompatibleType) {
+        continue;
+      }
+      Offset = cast<ConstantInt>(
+                   cast<ConstantAsMetadata>(Type->getOperand(0))->getValue())
+                   ->getZExtValue();
+      break;
+    }
+  }
+  return Offset;
+}
+
+static Function *getFunctionAtVTableOffset(GlobalVariable *GV, uint64_t Offset,
+                                           Module &M) {
+  Constant *Ptr = getPointerAtOffset(GV->getInitializer(), Offset, M, GV);
+  if (!Ptr)
+    return nullptr;
+
+  auto C = Ptr->stripPointerCasts();
+  auto Fn = dyn_cast<Function>(C);
+  auto A = dyn_cast<GlobalAlias>(C);
+  if (!Fn && A)
+    Fn = dyn_cast<Function>(A->getAliasee());
+  return Fn;
+}
+
+bool IndirectCallPromoter::getVTableCandidates(
+    CallBase *CB, SmallVector<VTableCandidate> &VTableCandidates,
+    uint64_t &TotalVTableCount) {
+  VTableCandidates.clear();
+  // CB doesn't have virtual call info. This is possible, for example,
+  // when the indirect callee is a function pointer.
+  auto VirtualCallInfoIter = CBToVirtualCallInfoMap.find(CB);
+  if (VirtualCallInfoIter == CBToVirtualCallInfoMap.end())
+    return false;
+
+  auto &VirtualCallInfo = VirtualCallInfoIter->second;
+
+  Instruction *VTablePtr = VirtualCallInfo.I;
+  StringRef CompatibleTypeStr = VirtualCallInfo.CompatibleTypeStr;
+
+  std::unique_ptr<InstrProfValueData[]> VTableArray =
+      std::make_unique<InstrProfValueData[]>(MaxNumVTableToConsider);
+  uint32_t ActualNumValueData = 0;
+  // Find out all vtables with callees in candidate sets, and their counts.
+  bool Res = getValueProfDataFromInst(*VTablePtr, IPVK_VTableTarget,
+                                      MaxNumVTableToConsider, VTableArray.get(),
+                                      ActualNumValueData, TotalVTableCount);
+  if (!Res || ActualNumValueData == 0)
+    return false;
+
+  SmallVector<MDNode *, 2> Types; // type metadata associated with a vtable.
+
+  // Compute the functions and counts contributed by each vtable.
+  for (uint32_t j = 0; j < ActualNumValueData; j++) {
+    const uint64_t VTableVal = VTableArray[j].Value;
+    GlobalVariable *VTableVariable = Symtab->getGlobalVariable(VTableVal);
+    if (!VTableVariable) {
+      LLVM_DEBUG(dbgs() << "No vtable definition for " << VTableVal
+                        << " from callsite " << (*CB) << "\n");
+      continue;
+    }
+
+    Types.clear();
+    VTableVariable->getMetadata(LLVMContext::MD_type, Types);
+    std::optional<uint64_t> MaybeOffset =
+        getCompatibleTypeOffset(Types, CompatibleTypeStr);
+    if (!MaybeOffset) {
+      LLVM_DEBUG(dbgs() << "Cannot compute compatible type offset "
+                        << CompatibleTypeStr << "\t" << *VTableVariable
+                        << "\n");
+      continue;
+    }
+
+    const uint64_t FuncByteOffset = (*MaybeOffset) + VirtualCallInfo.Offset;
+    Function *Callee = getFunctionAtVTableOffset(VTableVariable, FuncByteOffset,
+                                                 *(F.getParent()));
+    if (!Callee) {
+      LLVM_DEBUG(dbgs() << "Cannot find callee at offset " << FuncByteOffset
+                        << " in vtable " << *VTableVariable << "\n");
+      continue;
+    }
+
+    VTableCandidates.push_back({VTablePtr, VTableVariable, *MaybeOffset, Callee,
+                                VTableArray[j].Count});
+  }
+
+  sort(VTableCandidates.begin(), VTableCandidates.end(),
+       [](const VTableCandidate &LHS, const VTableCandidate &RHS) {
+         return LHS.VTableValCount > RHS.VTableValCount;
+       });
+
+  return true;
+}
+
+CallBase &IndirectCallPromoter::promoteIndirectCallBasedOnVTable(
+    CallBase &CB, Function *TargetFunction,
+    const SmallVector<VTableCandidate> &VTableCandidates,
+    const std::vector<int> &VTableIndices,
+    const std::unordered_map<int /*address-point-offset*/, Value *>
+        &VTableOffsetToValueMap,
+    uint64_t &TotalVTableCount, SmallPtrSet<Function *, 4> &VTablePromotedSet) {
+  uint64_t IfCount = 0;
+  for (auto Index : VTableIndices) {
+    IfCount += VTableCandidates[Index].VTableValCount;
+  }
+  uint64_t ElseCount = TotalVTableCount - IfCount;
+  uint64_t MaxCount = (IfCount >= ElseCount ? IfCount : ElseCount);
+  uint64_t Scale = calculateCountScale(MaxCount);
+  MDBuilder MDB(CB.getContext());
+  MDNode *BranchWeights = MDB.createBranchWeights(
+      scaleBranchCount(IfCount, Scale), scaleBranchCount(ElseCount, Scale));
+  uint64_t SumPromotedVTableCount = 0;
+  CallBase &NewInst = promoteIndirectCallWithVTableInfo(
+      CB, TargetFunction, VTableCandidates, VTableIndices,
+      VTableOffsetToValueMap, SumPromotedVTableCount, BranchWeights);
+  TotalVTableCount -= SumPromotedVTableCount;
+  VTablePromotedSet.insert(TargetFunction);
+
+  promoteCall(NewInst, TargetFunction, nullptr, true);
+  return NewInst;
+}
+
+bool IndirectCallPromoter::shouldCompareVTable(
+    CallBase &CB, const std::vector<PromotionCandidate> &Candidates,
+    const SmallVector<VTableCandidate> &VTableCandidates,
+    std::vector<PerFunctionCandidateInfo> &PerFunctionCandidateInfo) {
+  if (!EnableVTableProm)
+    return false;
+
+  assert(PerFunctionCandidateInfo.empty() &&
+         "Expect empty PerFunctionCandidateInfo");
+  PerFunctionCandidateInfo.resize(Candidates.size());
+  SmallDenseMap<Function *, int, 4> FunctionToIndexMap;
+  for (int i = 0, size = Candidates.size(); i < size; i++) {
+    auto &Candidate = Candidates[i];
+    assert(FunctionToIndexMap.find(Candidate.TargetFunction) ==
+               FunctionToIndexMap.end() &&
+           "Expect unique functions");
+    FunctionToIndexMap[Candidate.TargetFunction] = i;
+  }
+  for (int i = 0, size = VTableCandidates.size(); i < size; i++) {
+    VTableCandidate C = VTableCandidates[i];
+    auto iter = FunctionToIndexMap.find(C.TargetFunction);
+    if (iter == FunctionToIndexMap.end())
+      continue;
+
+    PerFunctionCandidateInfo[iter->second].VTableIndices.push_back(i);
+  }
+
+  auto computeOffsets =
+      [&VTableCandidates](const std::vector<int> &VTableIndices,
+                          SetVector<int> &Offsets) {
+        for (auto Index : VTableIndices) {
+          Offsets.insert(VTableCandidates[Index].AddressPointOffset);
+        }
+      };
+
+  for (auto &CandidateInfo : PerFunctionCandidateInfo) {
+    computeOffsets(CandidateInfo.VTableIndices, CandidateInfo.Offsets);
+  }
+
+  int Offset = -1;
+  bool EachCandiateFuncUniqueVTable = true;
+  bool AllVTablesHaveSameOffset = true;
+  for (int i = 0, size = PerFunctionCandidateInfo.size(); i < size; i++) {
+    if (PerFunctionCandidateInfo[i].VTableIndices.size() != 1)
+      EachCandiateFuncUniqueVTable = false;
+
+    if (PerFunctionCandidateInfo[i].Offsets.size() != 1) {
+      AllVTablesHaveSameOffset = false;
+    } else {
+      if (Offset == -1) {
+        Offset = PerFunctionCandidateInfo[i].Offsets[0];
+      } else if (Offset != PerFunctionCandidateInfo[i].Offsets[0]) {
+        AllVTablesHaveSameOffset = false;
+      }
+    }
+  }
+
+  if (!AllVTablesHaveSameOffset || !EachCandiateFuncUniqueVTable) {
+    return false;
+  }
+
+  return true;
+}
 
 // Indirect-call promotion heuristic. The direct targets are sorted based on
 // the count. Stop at the first target that is not promoted.
@@ -274,16 +543,16 @@ CallBase &llvm::pgo::promoteIndirectCall(CallBase &CB, Function *DirectCallee,
   return NewInst;
 }
 
-// Promote indirect-call to conditional direct-call for one callsite.
-uint32_t IndirectCallPromoter::tryToPromote(
+uint32_t IndirectCallPromoter::promoteIndirectCallsByComparingFunctions(
     CallBase &CB, const std::vector<PromotionCandidate> &Candidates,
-    uint64_t &TotalCount) {
+    uint64_t &TotalCount, bool AttachProfToDirectCall,
+    OptimizationRemarkEmitter *ORE) {
   uint32_t NumPromoted = 0;
 
   for (const auto &C : Candidates) {
     uint64_t Count = C.Count;
-    pgo::promoteIndirectCall(CB, C.TargetFunction, Count, TotalCount, SamplePGO,
-                             &ORE);
+    pgo::promoteIndirectCall(CB, C.TargetFunction, Count, TotalCount,
+                             AttachProfToDirectCall, ORE);
     assert(TotalCount >= Count);
     TotalCount -= Count;
     NumOfPGOICallPromotion++;
@@ -292,11 +561,76 @@ uint32_t IndirectCallPromoter::tryToPromote(
   return NumPromoted;
 }
 
+// Promote indirect-call to conditional direct-call for one callsite.
+uint32_t IndirectCallPromoter::tryToPromote(
+    CallBase &CB, const std::vector<PromotionCandidate> &Candidates,
+    uint64_t &TotalCount, const SmallVector<VTableCandidate> &VTableCandidates,
+    uint64_t &TotalVTableCount) {
+  LLVM_DEBUG(dbgs() << "Try to promote callsite " << CB << "\n  with "
+                    << Candidates.size() << " function candidates and "
+                    << VTableCandidates.size() << " vtable candidates\n");
+
+  std::vector<PerFunctionCandidateInfo> PerFunctionCandidateInfo;
+
+  const bool compareVTable = shouldCompareVTable(
+      CB, Candidates, VTableCandidates, PerFunctionCandidateInfo);
+
+  if (!compareVTable) {
+    LLVM_DEBUG(dbgs() << "\tCompare functions for callsite " << CB << "\n");
+
+    return promoteIndirectCallsByComparingFunctions(CB, Candidates, TotalCount,
+                                                    SamplePGO, &ORE);
+  }
+
+  LLVM_DEBUG(dbgs() << "\tCompare virtual table addresses for callsite " << CB
+                    << "\n");
+
+  auto VirtualCallInfoIter = CBToVirtualCallInfoMap.find(&CB);
+
+  assert(VirtualCallInfoIter != CBToVirtualCallInfoMap.end() &&
+         "Expect each virtual call to have an entry in map");
+
+  // assert all vtables have the same offset
+  IRBuilder<> Builder(VirtualCallInfoIter->second.TypeTestInstr);
+
+  Value *CastedVTableInstr = Builder.CreatePtrToInt(
+      VTableCandidates[PerFunctionCandidateInfo[0].VTableIndices[0]]
+          .VTableInstr,
+      Builder.getInt64Ty());
+
+  Value *ValueObject =
+      Builder.CreateNUWSub(CastedVTableInstr,
+                           Builder.getInt64(static_cast<uint64_t>(
+                               PerFunctionCandidateInfo[0].Offsets[0])),
+                           "vtable_object", false /* AllowFold */
+      );
+
+  std::unordered_map<int, Value *> OffsetToValueMap;
+  OffsetToValueMap[PerFunctionCandidateInfo[0].Offsets[0]] = ValueObject;
+
+  SmallPtrSet<Function *, 4> PromotedFunctionSet;
+
+  for (int i = 0, size = Candidates.size(); i < size; i++) {
+    promoteIndirectCallBasedOnVTable(
+        CB, Candidates[i].TargetFunction, VTableCandidates,
+        PerFunctionCandidateInfo[i].VTableIndices, OffsetToValueMap,
+        TotalVTableCount, PromotedFunctionSet);
+  }
+
+  assert(PromotedFunctionSet.size() == Candidates.size() &&
+         "All functions should be promotable if cost-benefit analysis decides "
+         "to compare vtables");
+
+  return Candidates.size();
+}
+
 // Traverse all the indirect-call callsite and get the value profile
 // annotation to perform indirect-call promotion.
 bool IndirectCallPromoter::processFunction(ProfileSummaryInfo *PSI) {
   bool Changed = false;
   ICallPromotionAnalysis ICallAnalysis;
+  SmallVector<VTableCandidate> VTableCandidates;
+
   for (auto *CB : findIndirectCalls(F)) {
     uint32_t NumVals, NumCandidates;
     uint64_t TotalCount;
@@ -305,12 +639,22 @@ bool IndirectCallPromoter::processFunction(ProfileSummaryInfo *PSI) {
     if (!NumCandidates ||
         (PSI && PSI->hasProfileSummary() && !PSI->isHotCount(TotalCount)))
       continue;
-    auto PromotionCandidates = getPromotionCandidatesForCallSite(
+    auto FunctionCandidates = getPromotionCandidatesForCallSite(
         *CB, ICallProfDataRef, TotalCount, NumCandidates);
-    uint32_t NumPromoted = tryToPromote(*CB, PromotionCandidates, TotalCount);
+
+    uint64_t TotalVTableCount = 0;
+    if (!getVTableCandidates(CB, VTableCandidates, TotalVTableCount)) {
+      VTableCandidates.clear();
+    }
+
+    // get the vtable set for each target value.
+    // for target values with only one vtable, compare vtable.
+    uint32_t NumPromoted = tryToPromote(*CB, FunctionCandidates, TotalCount,
+                                        VTableCandidates, TotalVTableCount);
     if (NumPromoted == 0)
       continue;
 
+    // FIXME: Update vtable prof metadata.
     Changed = true;
     // Adjust the MD.prof metadata. First delete the old one.
     CB->setMetadata(LLVMContext::MD_prof, nullptr);
@@ -324,17 +668,79 @@ bool IndirectCallPromoter::processFunction(ProfileSummaryInfo *PSI) {
   return Changed;
 }
 
+static void buildCBToVirtualCallInfoMap(
+    Module &M, function_ref<DominatorTree &(Function &)> LookupDomTree,
+    DenseMap<const CallBase *, IndirectCallPromoter::VirtualCallInfo>
+        &CBToVirtualCallInfoMap) {
+  Function *TypeTestFunc =
+      M.getFunction(Intrinsic::getName(Intrinsic::type_test));
+  if (!TypeTestFunc || TypeTestFunc->use_empty())
+    return;
+
+  SmallVector<DevirtCallSite, 1> DevirtCalls;
+  SmallVector<CallInst *, 1> Assumes;
+  for (Use &U : llvm::make_early_inc_range(TypeTestFunc->uses())) {
+    auto *CI = dyn_cast<CallInst>(U.getUser());
+    if (!CI)
+      continue;
+
+    auto *TypeMDVal = cast<MetadataAsValue>(CI->getArgOperand(1));
+    if (!TypeMDVal)
+      continue;
+
+    auto *CompatibleTypeId = dyn_cast<MDString>(TypeMDVal->getMetadata());
+    if (!CompatibleTypeId)
+      continue;
+
+    StringRef CompatibleTypeStr = CompatibleTypeId->getString();
+    DevirtCalls.clear();
+    Assumes.clear();
+
+    auto &DT = LookupDomTree(*CI->getFunction());
+
+    findDevirtualizableCallsForTypeTest(DevirtCalls, Assumes, CI, DT);
+
+    for (auto &DevirtCall : DevirtCalls) {
+      CallBase &CB = DevirtCall.CB;
+      uint64_t Offset = DevirtCall.Offset;
+
+      Instruction *VTableInstr =
+          PGOIndirectCallVisitor::getAnnotatedVTableInstruction(&CB);
+
+      if (!VTableInstr)
+        continue;
+
+      CBToVirtualCallInfoMap[&CB] = {Offset, VTableInstr, CompatibleTypeStr,
+                                     dyn_cast<Instruction>(CI)};
+    }
+  }
+}
+
 // A wrapper function that does the actual work.
 static bool promoteIndirectCalls(Module &M, ProfileSummaryInfo *PSI, bool InLTO,
                                  bool SamplePGO, ModuleAnalysisManager &MAM) {
   if (DisableICP)
     return false;
+
+  auto &FAM = MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
+  auto LookupDomTree = [&FAM](Function &F) -> DominatorTree & {
+    return FAM.getResult<DominatorTreeAnalysis>(F);
+  };
+
   InstrProfSymtab Symtab;
   if (Error E = Symtab.create(M, InLTO)) {
     std::string SymtabFailure = toString(std::move(E));
     M.getContext().emitError("Failed to create symtab: " + SymtabFailure);
     return false;
   }
+
+  // Keys are indirect calls that call virtual function and is the subset of all
+  // indirect calls.
+  DenseMap<const CallBase *, IndirectCallPromoter::VirtualCallInfo>
+      CBToVirtualCallInfoMap;
+
+  buildCBToVirtualCallInfoMap(M, LookupDomTree, CBToVirtualCallInfoMap);
+
   bool Changed = false;
   for (auto &F : M) {
     if (F.isDeclaration() || F.hasOptNone())
@@ -344,7 +750,8 @@ static bool promoteIndirectCalls(Module &M, ProfileSummaryInfo *PSI, bool InLTO,
         MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
     auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
 
-    IndirectCallPromoter CallPromoter(F, &Symtab, SamplePGO, ORE);
+    IndirectCallPromoter CallPromoter(F, &Symtab, SamplePGO,
+                                      CBToVirtualCallInfoMap, ORE);
     bool FuncChanged = CallPromoter.processFunction(PSI);
     if (ICPDUMPAFTER && FuncChanged) {
       LLVM_DEBUG(dbgs() << "\n== IR Dump After =="; F.print(dbgs()));
diff --git a/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp b/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp
index e42cdab64446e2b..a1fb9c5889d43c0 100644
--- a/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp
+++ b/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp
@@ -15,7 +15,9 @@
 #include "llvm/Analysis/Loads.h"
 #include "llvm/Analysis/TypeMetadataUtils.h"
 #include "llvm/IR/AttributeMask.h"
+#include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instruction.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 
@@ -380,6 +382,265 @@ CallBase &llvm::versionCallSite(CallBase &CB, Value *Callee,
   return *NewInst;
 }
 
+// Returns true if 'I' could be sinked to 'Block'.
+static bool shouldSinkInstrToBlock(Instruction &I, BasicBlock *Block) {
+  // Conservatively regard no-use as not sinkable. For instance,
+  // llvm.assume should remain in the original block.
+  if (I.use_empty())
+    return false;
+
+  for (Use &U : I.uses()) {
+    Instruction *User = dyn_cast<Instruction>(U.getUser());
+    // Conservatively returns true if there is a non-instruction user.
+    if (!User || User->getParent() != Block)
+      return false;
+  }
+  return true;
+}
+
+static void sinkInstructionsForIndirectCall(BasicBlock *SrcBlock,
+                                            BasicBlock *DestBlock,
+                                            Instruction *VTableInstr,
+                                            const CallBase &CB) {
+  std::vector<Instruction *> SinkInsts;
+  LoadInst *FPtr = nullptr;
+  for (Instruction &Inst :
+       make_range(++SrcBlock->getTerminator()->getReverseIterator(),
+                  SrcBlock->rend())) {
+    if (Inst.isDebugOrPseudoInst())
+      continue;
+
+    // Exit loop if vtable instr is seen.
+    if (&Inst == VTableInstr)
+      break;
+
+    if (!shouldSinkInstrToBlock(Inst, DestBlock))
+      continue;
+
+    if (LoadInst *LI = dyn_cast<LoadInst>(&Inst)) {
+      // This load instruction loads the vfunc-ptr for indirect call 'CB',
+      // and it doesn't have other users.
+      if (cast<Value>(LI) == CB.getCalledOperand()) {
+        FPtr = LI;
+        break;
+      }
+    }
+  }
+  if (FPtr != nullptr) {
+    FPtr->moveBefore(&*DestBlock->getFirstInsertionPt());
+
+    Value *Addr = FPtr->getPointerOperand();
+
+    if (Instruction *AddrInst = dyn_cast<Instruction>(Addr)) {
+      if (shouldSinkInstrToBlock(*AddrInst, DestBlock)) {
+        AddrInst->moveBefore(&*DestBlock->getFirstInsertionPt());
+      }
+    }
+  }
+}
+
+static Value *getOrResult(std::vector<Value *> &ICmps, IRBuilder<> &Builder) {
+  assert(!ICmps.empty());
+  if (ICmps.size() == 1) {
+    return ICmps[0];
+  }
+  std::vector<Value *> UpdatedResult;
+  int i = 0, size = ICmps.size();
+  for (i = 0; i + 1 < size; i += 2) {
+    Value *Or = Builder.CreateOr(ICmps[i], ICmps[i + 1], "vtable-cmp-or");
+    UpdatedResult.push_back(Or);
+  }
+  if (i < size) {
+    UpdatedResult.push_back(ICmps[i]);
+  }
+  return getOrResult(UpdatedResult, Builder);
+}
+
+// FIXME: Drop !prof on direct calls, and update !prof on indirect calls.
+// FIXME: Split this function to smaller helper functions and de-dup code with
+// function-based promotion.
+CallBase &llvm::promoteIndirectCallWithVTableInfo(
+    CallBase &CB, Function *TargetFunction,
+    const SmallVector<VTableCandidate> &VTableCandidates,
+    const std::vector<int> &VTableIndices,
+    const std::unordered_map<int, Value *> &VTableOffsetToValueMap,
+    uint64_t &SumPromotedVTableCount, MDNode *BranchWeights) {
+  SumPromotedVTableCount = 0;
+  IRBuilder<> Builder(&CB);
+  CallBase *OrigIndirectCall = &CB;
+
+  Instruction *VTableInstr = nullptr;
+
+  std::vector<Value *> ICmps;
+  for (auto Index : VTableIndices) {
+    SumPromotedVTableCount += VTableCandidates[Index].VTableValCount;
+    const auto &VTableCandidate = VTableCandidates[Index];
+    if (VTableInstr == nullptr) {
+      VTableInstr = VTableCandidate.VTableInstr;
+    } else {
+      // The VTableInstr that's being instrumented. It should remain the same
+      // across all candidates.
+      assert(VTableInstr == VTableCandidate.VTableInstr &&
+             "VTableInstr should remain the same across all vtable candidates");
+    }
+    Value *VTableVar = Builder.CreatePtrToInt(VTableCandidate.VTableVariable,
+                                              Builder.getInt64Ty());
+    assert(VTableOffsetToValueMap.find(VTableCandidate.AddressPointOffset) !=
+               VTableOffsetToValueMap.end() &&
+           "Caller should construct the offset-variables");
+
+    Value *OffsetVar =
+        VTableOffsetToValueMap.at(VTableCandidate.AddressPointOffset);
+    Value *ICmp = Builder.CreateICmpEQ(VTableVar, OffsetVar);
+    ICmps.push_back(ICmp);
+  }
+
+  Value *Cond = getOrResult(ICmps, Builder);
+
+  // FIXME:
+  // This should be optimized to the comparison with a newly-created (alias of?)
+  // vtable variable (only frequently accessed vtables are created)
+
+  if (CB.isMustTailCall()) {
+    // From:
+    //   bb:
+    //     vptr = load
+    //     func-addr = gep vptr
+    //     funcptr = load
+    //     res = tail call funcptr
+    //     ret res
+    //
+    // To:
+    //   bb:
+    //     vptr = load
+    //     minus = sub vptr, vtable
+    //     cond = icmp minus, constant-offset
+    //     br cond true if.then, false if.else
+    //   if.then:
+    //     %res1 = musttail call direct_callee
+    //     ret %res1
+    //   if.else:
+    //     func-addr = gep vptr
+    //     funcptr = load func-addr
+    //     res2 = musttail call funcptr
+    //     ret res2
+    BasicBlock *OrigBlock = CB.getParent();
+    Instruction *ThenTerm =
+        SplitBlockAndInsertIfThen(Cond, &CB, false, BranchWeights);
+    BasicBlock *ThenBlock = ThenTerm->getParent();
+    BasicBlock *ElseBlock = CB.getParent();
+    ThenBlock->setName("if.then.direct_vtable_targ");
+    ElseBlock->setName("if.else.orig_indirect_call");
+    CallBase *NewInst = cast<CallBase>(CB.clone());
+    NewInst->insertBefore(ThenTerm);
+    NewInst->setCalledOperand(TargetFunction);
+
+    // Sink the instructions that load funcptr to else block if possible.
+    sinkInstructionsForIndirectCall(OrigBlock, ElseBlock, VTableInstr, CB);
+
+    // After this copy the bitcast and ret.
+    Value *NewRetVal = NewInst;
+    auto Next = CB.getNextNode();
+    if (auto *BitCast = dyn_cast_or_null<BitCastInst>(Next)) {
+      assert(BitCast->getOperand(0) == &CB &&
+             "bitcast following musttail call must use the call");
+      auto NewBitCast = BitCast->clone();
+      NewBitCast->replaceUsesOfWith(&CB, NewInst);
+      NewBitCast->insertBefore(ThenTerm);
+      NewRetVal = NewBitCast;
+      Next = BitCast->getNextNode();
+    }
+    // Place a clone of the return instruction after the new call site.
+    ReturnInst *Ret = dyn_cast_or_null<ReturnInst>(Next);
+    assert(Ret && "musttail call must precede a ret with an optional bitcast");
+    auto NewRet = Ret->clone();
+    if (Ret->getReturnValue()) {
+      NewRet->replaceUsesOfWith(Ret->getReturnValue(), NewRetVal);
+    }
+    NewRet->insertBefore(ThenTerm);
+    // A return instruction is terminating, so we don't need the terminating
+    // instruction just created.
+    ThenTerm->eraseFromParent();
+    return *NewInst;
+  }
+
+  // Create if-then-else structure. The original instruction is moved into else,
+  // and instructions to load virtual functions are sinked into else.
+  // Put a clone of the original instruction in the then block and set its
+  // callee to the direct function. Before:
+  //     %vtable = load ptr, ptr %object, !prof !0
+  //     %vfnaddr = getelementptr inbounds ptr, ptr %vtable, i64 1
+  //     %1 = load ptr, ptr %vfnaddr
+  //     %call = call i32 %1(ptr %object), !prof !1
+  //
+  // After:
+  //     %vtableptr = load ptr, ptr %object
+  //     %tmp0 = ptrtoint ptr %vtableptr to i64
+  //     %vtableobject = sub nuw i64 %tmp9, constant-offset
+  //     %cmp = icmp eq i64 ptrtoint(ptr @_ZTV8Derived2 to i64), %vtableobject
+  //     br i1 %cmp, label if.true.direct_vtable_target, label
+  //     if.false.orig_indirect
+  //   if.true.direct_vtable_target:
+  //     %res1 = call i32 @_ZN4Base5func2Ev(ptr %object)
+  //     br label if.end.vtable_icp
+  //   if.false.orig_indirect:
+  //     %vfnaddr = getelementptr inbounds ptr, ptr %vtable, i64 1
+  //     %vfnptr = load ptr, ptr %vfnaddr
+  //     %res2 = call i32 %vfnptr(ptr %object)
+  //     br label if.end.vtable_icp
+  //   if.end.vtable_icp:
+  //     %call = phi i32 [%res2, if.false.orig_indirect], [%res1,
+  //     if.true.direct_vtable_target]
+
+  Instruction *ThenTerm = nullptr;
+  Instruction *ElseTerm = nullptr;
+
+  BasicBlock *OrigBlock = CB.getParent();
+  SplitBlockAndInsertIfThenElse(Cond, &CB, &ThenTerm, &ElseTerm, BranchWeights);
+  BasicBlock *ThenBlock = ThenTerm->getParent();
+  BasicBlock *ElseBlock = ElseTerm->getParent();
+  BasicBlock *MergeBlock = OrigIndirectCall->getParent();
+
+  ThenBlock->setName("if.true.direct_vtable_target");
+  ElseBlock->setName("if.false.orig_indirect");
+  MergeBlock->setName("if.end.vtable_icp");
+
+  CallBase *NewInst = cast<CallBase>(OrigIndirectCall->clone());
+  OrigIndirectCall->moveBefore(ElseTerm);
+  NewInst->insertBefore(ThenTerm);
+  // Rewrite NewInst to use direct callee to sink the instructions that
+  // compute virtual function addresses.
+  NewInst->setCalledOperand(TargetFunction);
+
+  sinkInstructionsForIndirectCall(OrigBlock, ElseBlock, VTableInstr, CB);
+
+  if (auto *OrigInvoke = dyn_cast<InvokeInst>(OrigIndirectCall)) {
+    auto *NewInvoke = cast<InvokeInst>(NewInst);
+
+    // Invoke instructions are terminating themselves.
+    ThenTerm->eraseFromParent();
+    ElseTerm->eraseFromParent();
+
+    // Branch from the "merge" block to the original normal destination.
+    Builder.SetInsertPoint(MergeBlock);
+    Builder.CreateBr(OrigInvoke->getNormalDest());
+
+    // Fix-up phi nodes in the original invoke's normal and unwind destinations.
+    fixupPHINodeForNormalDest(OrigInvoke, OrigBlock, MergeBlock);
+    fixupPHINodeForUnwindDest(OrigInvoke, MergeBlock, ThenBlock, ElseBlock);
+
+    // Now set the normal destinations of the invoke instructions to be the
+    // "merge" block.
+    OrigInvoke->setNormalDest(MergeBlock);
+    NewInvoke->setNormalDest(MergeBlock);
+  }
+
+  // create a phi node for the returned value
+  createRetPHINode(OrigIndirectCall, NewInst, MergeBlock, Builder);
+
+  return *NewInst;
+}
+
 bool llvm::isLegalToPromote(const CallBase &CB, Function *Callee,
                             const char **FailureReason) {
   assert(!CB.getCalledFunction() && "Only indirect call sites can be promoted");
@@ -467,12 +728,16 @@ bool llvm::isLegalToPromote(const CallBase &CB, Function *Callee,
 }
 
 CallBase &llvm::promoteCall(CallBase &CB, Function *Callee,
-                            CastInst **RetBitCast) {
-  assert(!CB.getCalledFunction() && "Only indirect call sites can be promoted");
-
-  // Set the called function of the call site to be the given callee (but don't
-  // change the type).
-  CB.setCalledOperand(Callee);
+                            CastInst **RetBitCast,
+                            bool DirectCalleeAlreadySet) {
+  if (!DirectCalleeAlreadySet) {
+    assert(!CB.getCalledFunction() &&
+           "Only indirect call sites can be promoted");
+
+    // Set the called function of the call site to be the given callee (but
+    // don't change the type).
+    CB.setCalledOperand(Callee);
+  }
 
   // Since the call site will no longer be direct, we must clear metadata that
   // is only appropriate for indirect calls. This includes !prof and !callees
diff --git a/llvm/test/Transforms/PGOProfile/icp_vtable.ll b/llvm/test/Transforms/PGOProfile/icp_vtable.ll
new file mode 100644
index 000000000000000..dc7812f61a1f543
--- /dev/null
+++ b/llvm/test/Transforms/PGOProfile/icp_vtable.ll
@@ -0,0 +1,340 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals --version 3
+
+
+; RUN: opt < %s -passes=pgo-icall-prom -S | FileCheck %s --check-prefix=ICALL-FUNC
+; RUN: opt < %s -passes=pgo-icall-prom -enable-vtable-prom -S | FileCheck %s --check-prefixes=ICALL-VTABLE
+
+target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64-unknown-linux-gnu"
+
+ at _ZTV4Base = constant { [4 x ptr] } { [4 x ptr] [ptr null, ptr null, ptr @_ZN4Base5func1Ei, ptr @_ZN4Base5func2Ev] }, !type !0
+ at _ZTV8Derived1 = constant { [4 x ptr] } { [4 x ptr] [ptr null, ptr null, ptr @_ZN8Derived15func1Ei, ptr @_ZN4Base5func2Ev] }, !type !0, !type !1
+ at _ZTV8Derived2 = constant { [4 x ptr] } { [4 x ptr] [ptr null, ptr null, ptr @_ZN8Derived25func1Ei, ptr @_ZN4Base5func2Ev] }, !type !0, !type !2
+; @_ZTV8Derived3 = constant { [4 x ptr] } { [4 x ptr] [ptr null, ptr null, ptr @_ZN8Derived35func1Ei, ptr @_ZN4Base5func2Ev] }, !type !0, !type !3
+
+; one function, one vtable, same offset
+;.
+; ICALL-FUNC: @[[_ZTV4BASE:[a-zA-Z0-9_$"\\.-]+]] = constant { [4 x ptr] } { [4 x ptr] [ptr null, ptr null, ptr @_ZN4Base5func1Ei, ptr @_ZN4Base5func2Ev] }, !type !0
+; ICALL-FUNC: @[[_ZTV8DERIVED1:[a-zA-Z0-9_$"\\.-]+]] = constant { [4 x ptr] } { [4 x ptr] [ptr null, ptr null, ptr @_ZN8Derived15func1Ei, ptr @_ZN4Base5func2Ev] }, !type !0, !type !1
+; ICALL-FUNC: @[[_ZTV8DERIVED2:[a-zA-Z0-9_$"\\.-]+]] = constant { [4 x ptr] } { [4 x ptr] [ptr null, ptr null, ptr @_ZN8Derived25func1Ei, ptr @_ZN4Base5func2Ev] }, !type !0, !type !2
+;.
+; ICALL-VTABLE: @[[_ZTV4BASE:[a-zA-Z0-9_$"\\.-]+]] = constant { [4 x ptr] } { [4 x ptr] [ptr null, ptr null, ptr @_ZN4Base5func1Ei, ptr @_ZN4Base5func2Ev] }, !type !0
+; ICALL-VTABLE: @[[_ZTV8DERIVED1:[a-zA-Z0-9_$"\\.-]+]] = constant { [4 x ptr] } { [4 x ptr] [ptr null, ptr null, ptr @_ZN8Derived15func1Ei, ptr @_ZN4Base5func2Ev] }, !type !0, !type !1
+; ICALL-VTABLE: @[[_ZTV8DERIVED2:[a-zA-Z0-9_$"\\.-]+]] = constant { [4 x ptr] } { [4 x ptr] [ptr null, ptr null, ptr @_ZN8Derived25func1Ei, ptr @_ZN4Base5func2Ev] }, !type !0, !type !2
+;.
+define i32 @test_one_function_candidate(ptr %d) {
+; ICALL-FUNC-LABEL: define i32 @test_one_function_candidate(
+; ICALL-FUNC-SAME: ptr [[D:%.*]]) {
+; ICALL-FUNC-NEXT:  entry:
+; ICALL-FUNC-NEXT:    [[VTABLE:%.*]] = load ptr, ptr [[D]], align 8, !prof [[PROF3:![0-9]+]]
+; ICALL-FUNC-NEXT:    [[TMP0:%.*]] = tail call i1 @llvm.type.test(ptr [[VTABLE]], metadata !"_ZTS4Base")
+; ICALL-FUNC-NEXT:    tail call void @llvm.assume(i1 [[TMP0]])
+; ICALL-FUNC-NEXT:    [[VFN:%.*]] = getelementptr inbounds ptr, ptr [[VTABLE]], i64 1
+; ICALL-FUNC-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[VFN]], align 8
+; ICALL-FUNC-NEXT:    [[TMP2:%.*]] = icmp eq ptr [[TMP1]], @_ZN4Base5func2Ev
+; ICALL-FUNC-NEXT:    br i1 [[TMP2]], label [[IF_TRUE_DIRECT_TARG:%.*]], label [[IF_FALSE_ORIG_INDIRECT:%.*]], !prof [[PROF4:![0-9]+]]
+; ICALL-FUNC:       if.true.direct_targ:
+; ICALL-FUNC-NEXT:    [[TMP3:%.*]] = tail call i32 @_ZN4Base5func2Ev(ptr [[D]])
+; ICALL-FUNC-NEXT:    br label [[IF_END_ICP:%.*]]
+; ICALL-FUNC:       if.false.orig_indirect:
+; ICALL-FUNC-NEXT:    [[CALL:%.*]] = tail call i32 [[TMP1]](ptr [[D]])
+; ICALL-FUNC-NEXT:    br label [[IF_END_ICP]]
+; ICALL-FUNC:       if.end.icp:
+; ICALL-FUNC-NEXT:    [[TMP4:%.*]] = phi i32 [ [[CALL]], [[IF_FALSE_ORIG_INDIRECT]] ], [ [[TMP3]], [[IF_TRUE_DIRECT_TARG]] ]
+; ICALL-FUNC-NEXT:    ret i32 [[TMP4]]
+;
+; ICALL-VTABLE-LABEL: define i32 @test_one_function_candidate(
+; ICALL-VTABLE-SAME: ptr [[D:%.*]]) {
+; ICALL-VTABLE-NEXT:  entry:
+; ICALL-VTABLE-NEXT:    [[VTABLE:%.*]] = load ptr, ptr [[D]], align 8, !prof [[PROF3:![0-9]+]]
+; ICALL-VTABLE-NEXT:    [[TMP0:%.*]] = ptrtoint ptr [[VTABLE]] to i64
+; ICALL-VTABLE-NEXT:    [[VTABLE_OBJECT:%.*]] = sub nuw i64 [[TMP0]], 16
+; ICALL-VTABLE-NEXT:    [[TMP1:%.*]] = tail call i1 @llvm.type.test(ptr [[VTABLE]], metadata !"_ZTS4Base")
+; ICALL-VTABLE-NEXT:    tail call void @llvm.assume(i1 [[TMP1]])
+; ICALL-VTABLE-NEXT:    [[TMP2:%.*]] = icmp eq i64 ptrtoint (ptr @_ZTV8Derived2 to i64), [[VTABLE_OBJECT]]
+; ICALL-VTABLE-NEXT:    br i1 [[TMP2]], label [[IF_TRUE_DIRECT_VTABLE_TARGET:%.*]], label [[IF_FALSE_ORIG_INDIRECT:%.*]], !prof [[PROF4:![0-9]+]]
+; ICALL-VTABLE:       if.true.direct_vtable_target:
+; ICALL-VTABLE-NEXT:    [[TMP3:%.*]] = tail call i32 @_ZN4Base5func2Ev(ptr [[D]])
+; ICALL-VTABLE-NEXT:    br label [[IF_END_VTABLE_ICP:%.*]]
+; ICALL-VTABLE:       if.false.orig_indirect:
+; ICALL-VTABLE-NEXT:    [[VFN:%.*]] = getelementptr inbounds ptr, ptr [[VTABLE]], i64 1
+; ICALL-VTABLE-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[VFN]], align 8
+; ICALL-VTABLE-NEXT:    [[CALL:%.*]] = tail call i32 [[TMP4]](ptr [[D]])
+; ICALL-VTABLE-NEXT:    br label [[IF_END_VTABLE_ICP]]
+; ICALL-VTABLE:       if.end.vtable_icp:
+; ICALL-VTABLE-NEXT:    [[TMP5:%.*]] = phi i32 [ [[CALL]], [[IF_FALSE_ORIG_INDIRECT]] ], [ [[TMP3]], [[IF_TRUE_DIRECT_VTABLE_TARGET]] ]
+; ICALL-VTABLE-NEXT:    ret i32 [[TMP5]]
+;
+entry:
+  %vtable = load ptr, ptr %d, !prof !4
+  %0 = tail call i1 @llvm.type.test(ptr %vtable, metadata !"_ZTS4Base")
+  tail call void @llvm.assume(i1 %0)
+  %vfn = getelementptr inbounds ptr, ptr %vtable, i64 1
+  %1 = load ptr, ptr %vfn
+  %call = tail call i32 %1(ptr %d), !prof !5
+  ret i32 %call
+}
+
+define i32 @test_two_function_candidates(ptr %d, i32 %a) {
+; ICALL-FUNC-LABEL: define i32 @test_two_function_candidates(
+; ICALL-FUNC-SAME: ptr [[D:%.*]], i32 [[A:%.*]]) {
+; ICALL-FUNC-NEXT:  entry:
+; ICALL-FUNC-NEXT:    [[VTABLE:%.*]] = load ptr, ptr [[D]], align 8, !prof [[PROF5:![0-9]+]]
+; ICALL-FUNC-NEXT:    [[TMP0:%.*]] = tail call i1 @llvm.type.test(ptr [[VTABLE]], metadata !"_ZTS4Base")
+; ICALL-FUNC-NEXT:    tail call void @llvm.assume(i1 [[TMP0]])
+; ICALL-FUNC-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[VTABLE]], align 8
+; ICALL-FUNC-NEXT:    [[TMP2:%.*]] = icmp eq ptr [[TMP1]], @_ZN4Base5func1Ei
+; ICALL-FUNC-NEXT:    br i1 [[TMP2]], label [[IF_TRUE_DIRECT_TARG:%.*]], label [[IF_FALSE_ORIG_INDIRECT:%.*]], !prof [[PROF6:![0-9]+]]
+; ICALL-FUNC:       if.true.direct_targ:
+; ICALL-FUNC-NEXT:    [[TMP3:%.*]] = tail call i32 @_ZN4Base5func1Ei(ptr [[D]], i32 [[A]])
+; ICALL-FUNC-NEXT:    br label [[IF_END_ICP:%.*]]
+; ICALL-FUNC:       if.false.orig_indirect:
+; ICALL-FUNC-NEXT:    [[TMP4:%.*]] = icmp eq ptr [[TMP1]], @_ZN8Derived15func1Ei
+; ICALL-FUNC-NEXT:    br i1 [[TMP4]], label [[IF_TRUE_DIRECT_TARG1:%.*]], label [[IF_FALSE_ORIG_INDIRECT2:%.*]], !prof [[PROF7:![0-9]+]]
+; ICALL-FUNC:       if.true.direct_targ1:
+; ICALL-FUNC-NEXT:    [[TMP5:%.*]] = tail call i32 @_ZN8Derived15func1Ei(ptr [[D]], i32 [[A]])
+; ICALL-FUNC-NEXT:    br label [[IF_END_ICP3:%.*]]
+; ICALL-FUNC:       if.false.orig_indirect2:
+; ICALL-FUNC-NEXT:    [[CALL:%.*]] = tail call i32 [[TMP1]](ptr [[D]], i32 [[A]])
+; ICALL-FUNC-NEXT:    br label [[IF_END_ICP3]]
+; ICALL-FUNC:       if.end.icp3:
+; ICALL-FUNC-NEXT:    [[TMP6:%.*]] = phi i32 [ [[CALL]], [[IF_FALSE_ORIG_INDIRECT2]] ], [ [[TMP5]], [[IF_TRUE_DIRECT_TARG1]] ]
+; ICALL-FUNC-NEXT:    br label [[IF_END_ICP]]
+; ICALL-FUNC:       if.end.icp:
+; ICALL-FUNC-NEXT:    [[TMP7:%.*]] = phi i32 [ [[TMP6]], [[IF_END_ICP3]] ], [ [[TMP3]], [[IF_TRUE_DIRECT_TARG]] ]
+; ICALL-FUNC-NEXT:    ret i32 [[TMP7]]
+;
+; ICALL-VTABLE-LABEL: define i32 @test_two_function_candidates(
+; ICALL-VTABLE-SAME: ptr [[D:%.*]], i32 [[A:%.*]]) {
+; ICALL-VTABLE-NEXT:  entry:
+; ICALL-VTABLE-NEXT:    [[VTABLE:%.*]] = load ptr, ptr [[D]], align 8, !prof [[PROF5:![0-9]+]]
+; ICALL-VTABLE-NEXT:    [[TMP0:%.*]] = ptrtoint ptr [[VTABLE]] to i64
+; ICALL-VTABLE-NEXT:    [[VTABLE_OBJECT:%.*]] = sub nuw i64 [[TMP0]], 16
+; ICALL-VTABLE-NEXT:    [[TMP1:%.*]] = tail call i1 @llvm.type.test(ptr [[VTABLE]], metadata !"_ZTS4Base")
+; ICALL-VTABLE-NEXT:    tail call void @llvm.assume(i1 [[TMP1]])
+; ICALL-VTABLE-NEXT:    [[TMP2:%.*]] = icmp eq i64 ptrtoint (ptr @_ZTV4Base to i64), [[VTABLE_OBJECT]]
+; ICALL-VTABLE-NEXT:    br i1 [[TMP2]], label [[IF_TRUE_DIRECT_VTABLE_TARGET:%.*]], label [[IF_FALSE_ORIG_INDIRECT:%.*]], !prof [[PROF6:![0-9]+]]
+; ICALL-VTABLE:       if.true.direct_vtable_target:
+; ICALL-VTABLE-NEXT:    [[TMP3:%.*]] = tail call i32 @_ZN4Base5func1Ei(ptr [[D]], i32 [[A]])
+; ICALL-VTABLE-NEXT:    br label [[IF_END_VTABLE_ICP:%.*]]
+; ICALL-VTABLE:       if.false.orig_indirect:
+; ICALL-VTABLE-NEXT:    [[TMP4:%.*]] = icmp eq i64 ptrtoint (ptr @_ZTV8Derived1 to i64), [[VTABLE_OBJECT]]
+; ICALL-VTABLE-NEXT:    br i1 [[TMP4]], label [[IF_TRUE_DIRECT_VTABLE_TARGET1:%.*]], label [[IF_FALSE_ORIG_INDIRECT2:%.*]], !prof [[PROF7:![0-9]+]]
+; ICALL-VTABLE:       if.true.direct_vtable_target1:
+; ICALL-VTABLE-NEXT:    [[TMP5:%.*]] = tail call i32 @_ZN8Derived15func1Ei(ptr [[D]], i32 [[A]])
+; ICALL-VTABLE-NEXT:    br label [[IF_END_VTABLE_ICP3:%.*]]
+; ICALL-VTABLE:       if.false.orig_indirect2:
+; ICALL-VTABLE-NEXT:    [[TMP6:%.*]] = load ptr, ptr [[VTABLE]], align 8
+; ICALL-VTABLE-NEXT:    [[CALL:%.*]] = tail call i32 [[TMP6]](ptr [[D]], i32 [[A]])
+; ICALL-VTABLE-NEXT:    br label [[IF_END_VTABLE_ICP3]]
+; ICALL-VTABLE:       if.end.vtable_icp3:
+; ICALL-VTABLE-NEXT:    [[TMP7:%.*]] = phi i32 [ [[CALL]], [[IF_FALSE_ORIG_INDIRECT2]] ], [ [[TMP5]], [[IF_TRUE_DIRECT_VTABLE_TARGET1]] ]
+; ICALL-VTABLE-NEXT:    br label [[IF_END_VTABLE_ICP]]
+; ICALL-VTABLE:       if.end.vtable_icp:
+; ICALL-VTABLE-NEXT:    [[TMP8:%.*]] = phi i32 [ [[TMP7]], [[IF_END_VTABLE_ICP3]] ], [ [[TMP3]], [[IF_TRUE_DIRECT_VTABLE_TARGET]] ]
+; ICALL-VTABLE-NEXT:    ret i32 [[TMP8]]
+;
+entry:
+  %vtable = load ptr, ptr %d, !prof !6
+  %0 = tail call i1 @llvm.type.test(ptr %vtable, metadata !"_ZTS4Base")
+  tail call void @llvm.assume(i1 %0)
+  %1 = load ptr, ptr %vtable
+  %call = tail call i32 %1(ptr %d, i32 %a), !prof !7
+  ret i32 %call
+}
+
+define i32 @test_three_function_candidates(ptr %d, i32 %a) {
+; ICALL-FUNC-LABEL: define i32 @test_three_function_candidates(
+; ICALL-FUNC-SAME: ptr [[D:%.*]], i32 [[A:%.*]]) {
+; ICALL-FUNC-NEXT:  entry:
+; ICALL-FUNC-NEXT:    [[VTABLE:%.*]] = load ptr, ptr [[D]], align 8, !prof [[PROF8:![0-9]+]]
+; ICALL-FUNC-NEXT:    [[TMP0:%.*]] = tail call i1 @llvm.type.test(ptr [[VTABLE]], metadata !"_ZTS4Base")
+; ICALL-FUNC-NEXT:    tail call void @llvm.assume(i1 [[TMP0]])
+; ICALL-FUNC-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[VTABLE]], align 8
+; ICALL-FUNC-NEXT:    [[TMP2:%.*]] = icmp eq ptr [[TMP1]], @_ZN4Base5func1Ei
+; ICALL-FUNC-NEXT:    br i1 [[TMP2]], label [[IF_TRUE_DIRECT_TARG:%.*]], label [[IF_FALSE_ORIG_INDIRECT:%.*]], !prof [[PROF9:![0-9]+]]
+; ICALL-FUNC:       if.true.direct_targ:
+; ICALL-FUNC-NEXT:    [[TMP3:%.*]] = tail call i32 @_ZN4Base5func1Ei(ptr [[D]], i32 [[A]])
+; ICALL-FUNC-NEXT:    br label [[IF_END_ICP:%.*]]
+; ICALL-FUNC:       if.false.orig_indirect:
+; ICALL-FUNC-NEXT:    [[TMP4:%.*]] = icmp eq ptr [[TMP1]], @_ZN8Derived15func1Ei
+; ICALL-FUNC-NEXT:    br i1 [[TMP4]], label [[IF_TRUE_DIRECT_TARG1:%.*]], label [[IF_FALSE_ORIG_INDIRECT2:%.*]], !prof [[PROF10:![0-9]+]]
+; ICALL-FUNC:       if.true.direct_targ1:
+; ICALL-FUNC-NEXT:    [[TMP5:%.*]] = tail call i32 @_ZN8Derived15func1Ei(ptr [[D]], i32 [[A]])
+; ICALL-FUNC-NEXT:    br label [[IF_END_ICP3:%.*]]
+; ICALL-FUNC:       if.false.orig_indirect2:
+; ICALL-FUNC-NEXT:    [[TMP6:%.*]] = icmp eq ptr [[TMP1]], @_ZN8Derived25func1Ei
+; ICALL-FUNC-NEXT:    br i1 [[TMP6]], label [[IF_TRUE_DIRECT_TARG4:%.*]], label [[IF_FALSE_ORIG_INDIRECT5:%.*]], !prof [[PROF11:![0-9]+]]
+; ICALL-FUNC:       if.true.direct_targ4:
+; ICALL-FUNC-NEXT:    [[TMP7:%.*]] = tail call i32 @_ZN8Derived25func1Ei(ptr [[D]], i32 [[A]])
+; ICALL-FUNC-NEXT:    br label [[IF_END_ICP6:%.*]]
+; ICALL-FUNC:       if.false.orig_indirect5:
+; ICALL-FUNC-NEXT:    [[CALL:%.*]] = tail call i32 [[TMP1]](ptr [[D]], i32 [[A]])
+; ICALL-FUNC-NEXT:    br label [[IF_END_ICP6]]
+; ICALL-FUNC:       if.end.icp6:
+; ICALL-FUNC-NEXT:    [[TMP8:%.*]] = phi i32 [ [[CALL]], [[IF_FALSE_ORIG_INDIRECT5]] ], [ [[TMP7]], [[IF_TRUE_DIRECT_TARG4]] ]
+; ICALL-FUNC-NEXT:    br label [[IF_END_ICP3]]
+; ICALL-FUNC:       if.end.icp3:
+; ICALL-FUNC-NEXT:    [[TMP9:%.*]] = phi i32 [ [[TMP8]], [[IF_END_ICP6]] ], [ [[TMP5]], [[IF_TRUE_DIRECT_TARG1]] ]
+; ICALL-FUNC-NEXT:    br label [[IF_END_ICP]]
+; ICALL-FUNC:       if.end.icp:
+; ICALL-FUNC-NEXT:    [[TMP10:%.*]] = phi i32 [ [[TMP9]], [[IF_END_ICP3]] ], [ [[TMP3]], [[IF_TRUE_DIRECT_TARG]] ]
+; ICALL-FUNC-NEXT:    ret i32 [[TMP10]]
+;
+; ICALL-VTABLE-LABEL: define i32 @test_three_function_candidates(
+; ICALL-VTABLE-SAME: ptr [[D:%.*]], i32 [[A:%.*]]) {
+; ICALL-VTABLE-NEXT:  entry:
+; ICALL-VTABLE-NEXT:    [[VTABLE:%.*]] = load ptr, ptr [[D]], align 8, !prof [[PROF8:![0-9]+]]
+; ICALL-VTABLE-NEXT:    [[TMP0:%.*]] = ptrtoint ptr [[VTABLE]] to i64
+; ICALL-VTABLE-NEXT:    [[VTABLE_OBJECT:%.*]] = sub nuw i64 [[TMP0]], 16
+; ICALL-VTABLE-NEXT:    [[TMP1:%.*]] = tail call i1 @llvm.type.test(ptr [[VTABLE]], metadata !"_ZTS4Base")
+; ICALL-VTABLE-NEXT:    tail call void @llvm.assume(i1 [[TMP1]])
+; ICALL-VTABLE-NEXT:    [[TMP2:%.*]] = icmp eq i64 ptrtoint (ptr @_ZTV4Base to i64), [[VTABLE_OBJECT]]
+; ICALL-VTABLE-NEXT:    br i1 [[TMP2]], label [[IF_TRUE_DIRECT_VTABLE_TARGET:%.*]], label [[IF_FALSE_ORIG_INDIRECT:%.*]], !prof [[PROF9:![0-9]+]]
+; ICALL-VTABLE:       if.true.direct_vtable_target:
+; ICALL-VTABLE-NEXT:    [[TMP3:%.*]] = tail call i32 @_ZN4Base5func1Ei(ptr [[D]], i32 [[A]])
+; ICALL-VTABLE-NEXT:    br label [[IF_END_VTABLE_ICP:%.*]]
+; ICALL-VTABLE:       if.false.orig_indirect:
+; ICALL-VTABLE-NEXT:    [[TMP4:%.*]] = icmp eq i64 ptrtoint (ptr @_ZTV8Derived1 to i64), [[VTABLE_OBJECT]]
+; ICALL-VTABLE-NEXT:    br i1 [[TMP4]], label [[IF_TRUE_DIRECT_VTABLE_TARGET1:%.*]], label [[IF_FALSE_ORIG_INDIRECT2:%.*]], !prof [[PROF10:![0-9]+]]
+; ICALL-VTABLE:       if.true.direct_vtable_target1:
+; ICALL-VTABLE-NEXT:    [[TMP5:%.*]] = tail call i32 @_ZN8Derived15func1Ei(ptr [[D]], i32 [[A]])
+; ICALL-VTABLE-NEXT:    br label [[IF_END_VTABLE_ICP3:%.*]]
+; ICALL-VTABLE:       if.false.orig_indirect2:
+; ICALL-VTABLE-NEXT:    [[TMP6:%.*]] = icmp eq i64 ptrtoint (ptr @_ZTV8Derived2 to i64), [[VTABLE_OBJECT]]
+; ICALL-VTABLE-NEXT:    br i1 [[TMP6]], label [[IF_TRUE_DIRECT_VTABLE_TARGET4:%.*]], label [[IF_FALSE_ORIG_INDIRECT5:%.*]], !prof [[PROF11:![0-9]+]]
+; ICALL-VTABLE:       if.true.direct_vtable_target4:
+; ICALL-VTABLE-NEXT:    [[TMP7:%.*]] = tail call i32 @_ZN8Derived25func1Ei(ptr [[D]], i32 [[A]])
+; ICALL-VTABLE-NEXT:    br label [[IF_END_VTABLE_ICP6:%.*]]
+; ICALL-VTABLE:       if.false.orig_indirect5:
+; ICALL-VTABLE-NEXT:    [[TMP8:%.*]] = load ptr, ptr [[VTABLE]], align 8
+; ICALL-VTABLE-NEXT:    [[CALL:%.*]] = tail call i32 [[TMP8]](ptr [[D]], i32 [[A]])
+; ICALL-VTABLE-NEXT:    br label [[IF_END_VTABLE_ICP6]]
+; ICALL-VTABLE:       if.end.vtable_icp6:
+; ICALL-VTABLE-NEXT:    [[TMP9:%.*]] = phi i32 [ [[CALL]], [[IF_FALSE_ORIG_INDIRECT5]] ], [ [[TMP7]], [[IF_TRUE_DIRECT_VTABLE_TARGET4]] ]
+; ICALL-VTABLE-NEXT:    br label [[IF_END_VTABLE_ICP3]]
+; ICALL-VTABLE:       if.end.vtable_icp3:
+; ICALL-VTABLE-NEXT:    [[TMP10:%.*]] = phi i32 [ [[TMP9]], [[IF_END_VTABLE_ICP6]] ], [ [[TMP5]], [[IF_TRUE_DIRECT_VTABLE_TARGET1]] ]
+; ICALL-VTABLE-NEXT:    br label [[IF_END_VTABLE_ICP]]
+; ICALL-VTABLE:       if.end.vtable_icp:
+; ICALL-VTABLE-NEXT:    [[TMP11:%.*]] = phi i32 [ [[TMP10]], [[IF_END_VTABLE_ICP3]] ], [ [[TMP3]], [[IF_TRUE_DIRECT_VTABLE_TARGET]] ]
+; ICALL-VTABLE-NEXT:    ret i32 [[TMP11]]
+;
+entry:
+  %vtable = load ptr, ptr %d, !prof !8
+  %0 = tail call i1 @llvm.type.test(ptr %vtable, metadata !"_ZTS4Base")
+  tail call void @llvm.assume(i1 %0)
+  %1 = load ptr, ptr %vtable
+  %call = tail call i32 %1(ptr %d, i32 %a), !prof !9
+  ret i32 %call
+}
+
+declare i1 @llvm.type.test(ptr, metadata)
+declare void @llvm.assume(i1 noundef)
+define i32 @_ZN4Base5func1Ei(ptr, i32) {
+; ICALL-FUNC-LABEL: define i32 @_ZN4Base5func1Ei(
+; ICALL-FUNC-SAME: ptr [[TMP0:%.*]], i32 [[TMP1:%.*]]) {
+; ICALL-FUNC-NEXT:    ret i32 0
+;
+; ICALL-VTABLE-LABEL: define i32 @_ZN4Base5func1Ei(
+; ICALL-VTABLE-SAME: ptr [[TMP0:%.*]], i32 [[TMP1:%.*]]) {
+; ICALL-VTABLE-NEXT:    ret i32 0
+;
+  ret i32 0
+}
+define i32 @_ZN8Derived15func1Ei(ptr, i32) {
+; ICALL-FUNC-LABEL: define i32 @_ZN8Derived15func1Ei(
+; ICALL-FUNC-SAME: ptr [[TMP0:%.*]], i32 [[TMP1:%.*]]) {
+; ICALL-FUNC-NEXT:    ret i32 1
+;
+; ICALL-VTABLE-LABEL: define i32 @_ZN8Derived15func1Ei(
+; ICALL-VTABLE-SAME: ptr [[TMP0:%.*]], i32 [[TMP1:%.*]]) {
+; ICALL-VTABLE-NEXT:    ret i32 1
+;
+  ret i32 1
+}
+define i32 @_ZN8Derived25func1Ei(ptr, i32) {
+; ICALL-FUNC-LABEL: define i32 @_ZN8Derived25func1Ei(
+; ICALL-FUNC-SAME: ptr [[TMP0:%.*]], i32 [[TMP1:%.*]]) {
+; ICALL-FUNC-NEXT:    ret i32 2
+;
+; ICALL-VTABLE-LABEL: define i32 @_ZN8Derived25func1Ei(
+; ICALL-VTABLE-SAME: ptr [[TMP0:%.*]], i32 [[TMP1:%.*]]) {
+; ICALL-VTABLE-NEXT:    ret i32 2
+;
+  ret i32 2
+}
+define i32 @_ZN8Derived35func1Ei(ptr, i32) {
+; ICALL-FUNC-LABEL: define i32 @_ZN8Derived35func1Ei(
+; ICALL-FUNC-SAME: ptr [[TMP0:%.*]], i32 [[TMP1:%.*]]) {
+; ICALL-FUNC-NEXT:    ret i32 3
+;
+; ICALL-VTABLE-LABEL: define i32 @_ZN8Derived35func1Ei(
+; ICALL-VTABLE-SAME: ptr [[TMP0:%.*]], i32 [[TMP1:%.*]]) {
+; ICALL-VTABLE-NEXT:    ret i32 3
+;
+  ret i32 3
+}
+
+define i32 @_ZN4Base5func2Ev(ptr %this) {
+; ICALL-FUNC-LABEL: define i32 @_ZN4Base5func2Ev(
+; ICALL-FUNC-SAME: ptr [[THIS:%.*]]) {
+; ICALL-FUNC-NEXT:  entry:
+; ICALL-FUNC-NEXT:    ret i32 0
+;
+; ICALL-VTABLE-LABEL: define i32 @_ZN4Base5func2Ev(
+; ICALL-VTABLE-SAME: ptr [[THIS:%.*]]) {
+; ICALL-VTABLE-NEXT:  entry:
+; ICALL-VTABLE-NEXT:    ret i32 0
+;
+entry:
+  ret i32 0
+}
+
+!0 = !{i64 16, !"_ZTS4Base"}
+!1 = !{i64 16, !"_ZTS8Derived1"}
+!2 = !{i64 16, !"_ZTS8Derived2"}
+!3 = !{i64 16, !"_ZTS8Derived3"}
+!4 = !{!"VP", i32 2, i64 1600, i64 5035968517245772950, i64 1600}
+!5 = !{!"VP", i32 0, i64 1600, i64 -3104805163612457913, i64 1600}
+!6 = !{!"VP", i32 2, i64 1600, i64 1960855528937986108, i64 1000, i64 -9064381665493407289, i64 600}
+!7 = !{!"VP", i32 0, i64 1600, i64 -6112946599537438938, i64 1000, i64 5741628532826110430, i64 600}
+!8 = !{!"VP", i32 2, i64 1600, i64 1960855528937986108, i64 600, i64 -9064381665493407289, i64 550, i64 5035968517245772950, i64 450}
+!9 = !{!"VP", i32 0, i64 1600, i64 -6112946599537438938, i64 600, i64 5741628532826110430, i64 550, i64 8412292991472345728, i64 450}
+
+; two function, two vtable, same offset
+
+; three function, three vtable, same offset
+;.
+; ICALL-FUNC: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
+; ICALL-FUNC: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) }
+;.
+; ICALL-VTABLE: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
+; ICALL-VTABLE: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) }
+;.
+; ICALL-FUNC: [[META0:![0-9]+]] = !{i64 16, !"_ZTS4Base"}
+; ICALL-FUNC: [[META1:![0-9]+]] = !{i64 16, !"_ZTS8Derived1"}
+; ICALL-FUNC: [[META2:![0-9]+]] = !{i64 16, !"_ZTS8Derived2"}
+; ICALL-FUNC: [[PROF3]] = !{!"VP", i32 2, i64 1600, i64 5035968517245772950, i64 1600}
+; ICALL-FUNC: [[PROF4]] = !{!"branch_weights", i32 1600, i32 0}
+; ICALL-FUNC: [[PROF5]] = !{!"VP", i32 2, i64 1600, i64 1960855528937986108, i64 1000, i64 -9064381665493407289, i64 600}
+; ICALL-FUNC: [[PROF6]] = !{!"branch_weights", i32 1000, i32 600}
+; ICALL-FUNC: [[PROF7]] = !{!"branch_weights", i32 600, i32 0}
+; ICALL-FUNC: [[PROF8]] = !{!"VP", i32 2, i64 1600, i64 1960855528937986108, i64 600, i64 -9064381665493407289, i64 550, i64 5035968517245772950, i64 450}
+; ICALL-FUNC: [[PROF9]] = !{!"branch_weights", i32 600, i32 1000}
+; ICALL-FUNC: [[PROF10]] = !{!"branch_weights", i32 550, i32 450}
+; ICALL-FUNC: [[PROF11]] = !{!"branch_weights", i32 450, i32 0}
+;.
+; ICALL-VTABLE: [[META0:![0-9]+]] = !{i64 16, !"_ZTS4Base"}
+; ICALL-VTABLE: [[META1:![0-9]+]] = !{i64 16, !"_ZTS8Derived1"}
+; ICALL-VTABLE: [[META2:![0-9]+]] = !{i64 16, !"_ZTS8Derived2"}
+; ICALL-VTABLE: [[PROF3]] = !{!"VP", i32 2, i64 1600, i64 5035968517245772950, i64 1600}
+; ICALL-VTABLE: [[PROF4]] = !{!"branch_weights", i32 1600, i32 0}
+; ICALL-VTABLE: [[PROF5]] = !{!"VP", i32 2, i64 1600, i64 1960855528937986108, i64 1000, i64 -9064381665493407289, i64 600}
+; ICALL-VTABLE: [[PROF6]] = !{!"branch_weights", i32 1000, i32 600}
+; ICALL-VTABLE: [[PROF7]] = !{!"branch_weights", i32 600, i32 0}
+; ICALL-VTABLE: [[PROF8]] = !{!"VP", i32 2, i64 1600, i64 1960855528937986108, i64 600, i64 -9064381665493407289, i64 550, i64 5035968517245772950, i64 450}
+; ICALL-VTABLE: [[PROF9]] = !{!"branch_weights", i32 600, i32 1000}
+; ICALL-VTABLE: [[PROF10]] = !{!"branch_weights", i32 550, i32 450}
+; ICALL-VTABLE: [[PROF11]] = !{!"branch_weights", i32 450, i32 0}
+;.
diff --git a/llvm/test/Transforms/PGOProfile/icp_vtable_invoke.ll b/llvm/test/Transforms/PGOProfile/icp_vtable_invoke.ll
new file mode 100644
index 000000000000000..0add5dd9aca83b1
--- /dev/null
+++ b/llvm/test/Transforms/PGOProfile/icp_vtable_invoke.ll
@@ -0,0 +1,369 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function _Z4testP4Base --scrub-attributes --check-globals --version 3
+
+
+; RUN: opt < %s -passes=pgo-icall-prom -S | FileCheck %s --check-prefix=ICALL-FUNC
+; RUN: opt < %s -passes=pgo-icall-prom -enable-vtable-prom -S | FileCheck %s --check-prefixes=ICALL-VTABLE
+
+target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64-unknown-linux-gnu"
+
+; The C++ code to generate the test case
+;
+; class Error {
+;   public:
+;     Error(const char* err_msg, int error_code);
+;     int error_code();
+; };
+
+; int get_ticket_id();
+; class Base {
+; public:
+;   virtual int get_ticket() noexcept(false) {
+;     int cnt = get_ticket_id();
+;     if (cnt != -1)
+;       return cnt;
+;
+;     throw Error("base out of tickets", 1);
+;   }
+; };
+;
+; class Derived : public Base {
+; public:
+;   virtual int get_ticket() noexcept(false) override {
+;     int cnt = get_ticket_id();
+;     if (cnt != -1)
+;       return cnt;
+;     throw Error("derived out of ticket", 2);
+;    }
+; };
+
+; int test(Base* b)  {
+;   int ret = 0;
+;   try {
+;      ret = b->get_ticket();
+;   }
+;   catch(Error e) {
+;    ret = e.error_code();
+;   }
+;   return ret;
+; }
+
+source_filename = "/app/example.cpp"
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+%class.Error = type { i8 }
+
+ at _ZTV4Base = constant { [3 x ptr] } { [3 x ptr] [ptr null, ptr null, ptr @_ZN4Base10get_ticketEv] }, !type !0
+ at _ZTV7Derived = constant { [3 x ptr] } { [3 x ptr] [ptr null, ptr null, ptr @_ZN7Derived10get_ticketEv] }, !type !0, !type !1
+ at .str = private constant [22 x i8] c"derived out of ticket\00", align 1
+ at .str.1 = private constant [20 x i8] c"base out of tickets\00", align 1
+
+ at _ZTVN10__cxxabiv117__class_type_infoE = external global [0 x ptr]
+ at _ZTS5Error = constant [7 x i8] c"5Error\00"
+ at _ZTI5Error = constant { ptr, ptr } { ptr getelementptr inbounds (ptr, ptr @_ZTVN10__cxxabiv117__class_type_infoE, i64 2), ptr @_ZTS5Error }
+
+;.
+; ICALL-FUNC: @[[_ZTV4BASE:[a-zA-Z0-9_$"\\.-]+]] = constant { [3 x ptr] } { [3 x ptr] [ptr null, ptr null, ptr @_ZN4Base10get_ticketEv] }, !type !0
+; ICALL-FUNC: @[[_ZTV7DERIVED:[a-zA-Z0-9_$"\\.-]+]] = constant { [3 x ptr] } { [3 x ptr] [ptr null, ptr null, ptr @_ZN7Derived10get_ticketEv] }, !type !0, !type !1
+; ICALL-FUNC: @[[_STR:[a-zA-Z0-9_$"\\.-]+]] = private constant [22 x i8] c"derived out of ticket\00", align 1
+; ICALL-FUNC: @[[_STR_1:[a-zA-Z0-9_$"\\.-]+]] = private constant [20 x i8] c"base out of tickets\00", align 1
+; ICALL-FUNC: @[[_ZTVN10__CXXABIV117__CLASS_TYPE_INFOE:[a-zA-Z0-9_$"\\.-]+]] = external global [0 x ptr]
+; ICALL-FUNC: @[[_ZTS5ERROR:[a-zA-Z0-9_$"\\.-]+]] = constant [7 x i8] c"5Error\00"
+; ICALL-FUNC: @[[_ZTI5ERROR:[a-zA-Z0-9_$"\\.-]+]] = constant { ptr, ptr } { ptr getelementptr inbounds (ptr, ptr @_ZTVN10__cxxabiv117__class_type_infoE, i64 2), ptr @_ZTS5Error }
+;.
+; ICALL-VTABLE: @[[_ZTV4BASE:[a-zA-Z0-9_$"\\.-]+]] = constant { [3 x ptr] } { [3 x ptr] [ptr null, ptr null, ptr @_ZN4Base10get_ticketEv] }, !type !0
+; ICALL-VTABLE: @[[_ZTV7DERIVED:[a-zA-Z0-9_$"\\.-]+]] = constant { [3 x ptr] } { [3 x ptr] [ptr null, ptr null, ptr @_ZN7Derived10get_ticketEv] }, !type !0, !type !1
+; ICALL-VTABLE: @[[_STR:[a-zA-Z0-9_$"\\.-]+]] = private constant [22 x i8] c"derived out of ticket\00", align 1
+; ICALL-VTABLE: @[[_STR_1:[a-zA-Z0-9_$"\\.-]+]] = private constant [20 x i8] c"base out of tickets\00", align 1
+; ICALL-VTABLE: @[[_ZTVN10__CXXABIV117__CLASS_TYPE_INFOE:[a-zA-Z0-9_$"\\.-]+]] = external global [0 x ptr]
+; ICALL-VTABLE: @[[_ZTS5ERROR:[a-zA-Z0-9_$"\\.-]+]] = constant [7 x i8] c"5Error\00"
+; ICALL-VTABLE: @[[_ZTI5ERROR:[a-zA-Z0-9_$"\\.-]+]] = constant { ptr, ptr } { ptr getelementptr inbounds (ptr, ptr @_ZTVN10__cxxabiv117__class_type_infoE, i64 2), ptr @_ZTS5Error }
+;.
+define i32 @_Z4testP4Base(ptr %b) personality ptr @__gxx_personality_v0 {
+; ICALL-FUNC-LABEL: define i32 @_Z4testP4Base(
+; ICALL-FUNC-SAME: ptr [[B:%.*]]) personality ptr @__gxx_personality_v0 {
+; ICALL-FUNC-NEXT:  entry:
+; ICALL-FUNC-NEXT:    [[E:%.*]] = alloca [[CLASS_ERROR:%.*]], align 1
+; ICALL-FUNC-NEXT:    [[VTABLE:%.*]] = load ptr, ptr [[B]], align 8, !prof [[PROF2:![0-9]+]]
+; ICALL-FUNC-NEXT:    [[TEST:%.*]] = tail call i1 @llvm.type.test(ptr [[VTABLE]], metadata !"_ZTS4Base")
+; ICALL-FUNC-NEXT:    tail call void @llvm.assume(i1 [[TEST]])
+; ICALL-FUNC-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[VTABLE]], align 8
+; ICALL-FUNC-NEXT:    [[TMP1:%.*]] = icmp eq ptr [[TMP0]], @_ZN4Base10get_ticketEv
+; ICALL-FUNC-NEXT:    br i1 [[TMP1]], label [[IF_TRUE_DIRECT_TARG:%.*]], label [[IF_FALSE_ORIG_INDIRECT:%.*]], !prof [[PROF3:![0-9]+]]
+; ICALL-FUNC:       if.true.direct_targ:
+; ICALL-FUNC-NEXT:    [[TMP2:%.*]] = invoke i32 @_ZN4Base10get_ticketEv(ptr nonnull align 8 dereferenceable(8) [[B]])
+; ICALL-FUNC-NEXT:    to label [[IF_END_ICP:%.*]] unwind label [[LPAD:%.*]]
+; ICALL-FUNC:       if.false.orig_indirect:
+; ICALL-FUNC-NEXT:    [[TMP3:%.*]] = icmp eq ptr [[TMP0]], @_ZN7Derived10get_ticketEv
+; ICALL-FUNC-NEXT:    br i1 [[TMP3]], label [[IF_TRUE_DIRECT_TARG1:%.*]], label [[IF_FALSE_ORIG_INDIRECT2:%.*]], !prof [[PROF4:![0-9]+]]
+; ICALL-FUNC:       if.true.direct_targ1:
+; ICALL-FUNC-NEXT:    [[TMP4:%.*]] = invoke i32 @_ZN7Derived10get_ticketEv(ptr nonnull align 8 dereferenceable(8) [[B]])
+; ICALL-FUNC-NEXT:    to label [[IF_END_ICP3:%.*]] unwind label [[LPAD]]
+; ICALL-FUNC:       if.false.orig_indirect2:
+; ICALL-FUNC-NEXT:    [[CALL:%.*]] = invoke i32 [[TMP0]](ptr nonnull align 8 dereferenceable(8) [[B]])
+; ICALL-FUNC-NEXT:    to label [[IF_END_ICP3]] unwind label [[LPAD]]
+; ICALL-FUNC:       if.end.icp3:
+; ICALL-FUNC-NEXT:    [[TMP5:%.*]] = phi i32 [ [[CALL]], [[IF_FALSE_ORIG_INDIRECT2]] ], [ [[TMP4]], [[IF_TRUE_DIRECT_TARG1]] ]
+; ICALL-FUNC-NEXT:    br label [[IF_END_ICP]]
+; ICALL-FUNC:       if.end.icp:
+; ICALL-FUNC-NEXT:    [[TMP6:%.*]] = phi i32 [ [[TMP5]], [[IF_END_ICP3]] ], [ [[TMP2]], [[IF_TRUE_DIRECT_TARG]] ]
+; ICALL-FUNC-NEXT:    br label [[TRY_CONT:%.*]]
+; ICALL-FUNC:       lpad:
+; ICALL-FUNC-NEXT:    [[TMP7:%.*]] = landingpad { ptr, i32 }
+; ICALL-FUNC-NEXT:    cleanup
+; ICALL-FUNC-NEXT:    catch ptr @_ZTI5Error
+; ICALL-FUNC-NEXT:    [[TMP8:%.*]] = extractvalue { ptr, i32 } [[TMP7]], 1
+; ICALL-FUNC-NEXT:    [[TMP9:%.*]] = tail call i32 @llvm.eh.typeid.for(ptr nonnull @_ZTI5Error)
+; ICALL-FUNC-NEXT:    [[MATCHES:%.*]] = icmp eq i32 [[TMP8]], [[TMP9]]
+; ICALL-FUNC-NEXT:    br i1 [[MATCHES]], label [[CATCH:%.*]], label [[EHCLEANUP:%.*]]
+; ICALL-FUNC:       catch:
+; ICALL-FUNC-NEXT:    [[TMP10:%.*]] = extractvalue { ptr, i32 } [[TMP7]], 0
+; ICALL-FUNC-NEXT:    call void @llvm.lifetime.start.p0(i64 1, ptr nonnull [[E]])
+; ICALL-FUNC-NEXT:    [[TMP11:%.*]] = tail call ptr @__cxa_begin_catch(ptr [[TMP10]])
+; ICALL-FUNC-NEXT:    [[CALL3:%.*]] = invoke i32 @_ZN5Error10error_codeEv(ptr [[E]])
+; ICALL-FUNC-NEXT:    to label [[INVOKE_CONT2:%.*]] unwind label [[LPAD1:%.*]]
+; ICALL-FUNC:       invoke.cont2:
+; ICALL-FUNC-NEXT:    call void @__cxa_end_catch()
+; ICALL-FUNC-NEXT:    call void @llvm.lifetime.end.p0(i64 1, ptr nonnull [[E]])
+; ICALL-FUNC-NEXT:    br label [[TRY_CONT]]
+; ICALL-FUNC:       try.cont:
+; ICALL-FUNC-NEXT:    [[RET_0:%.*]] = phi i32 [ [[CALL3]], [[INVOKE_CONT2]] ], [ [[TMP6]], [[IF_END_ICP]] ]
+; ICALL-FUNC-NEXT:    ret i32 [[RET_0]]
+; ICALL-FUNC:       lpad1:
+; ICALL-FUNC-NEXT:    [[TMP12:%.*]] = landingpad { ptr, i32 }
+; ICALL-FUNC-NEXT:    cleanup
+; ICALL-FUNC-NEXT:    invoke void @__cxa_end_catch()
+; ICALL-FUNC-NEXT:    to label [[INVOKE_CONT4:%.*]] unwind label [[TERMINATE_LPAD:%.*]]
+; ICALL-FUNC:       invoke.cont4:
+; ICALL-FUNC-NEXT:    call void @llvm.lifetime.end.p0(i64 1, ptr nonnull [[E]])
+; ICALL-FUNC-NEXT:    br label [[EHCLEANUP]]
+; ICALL-FUNC:       ehcleanup:
+; ICALL-FUNC-NEXT:    [[LPAD_VAL7_MERGED:%.*]] = phi { ptr, i32 } [ [[TMP12]], [[INVOKE_CONT4]] ], [ [[TMP7]], [[LPAD]] ]
+; ICALL-FUNC-NEXT:    resume { ptr, i32 } [[LPAD_VAL7_MERGED]]
+; ICALL-FUNC:       terminate.lpad:
+; ICALL-FUNC-NEXT:    [[TMP13:%.*]] = landingpad { ptr, i32 }
+; ICALL-FUNC-NEXT:    catch ptr null
+; ICALL-FUNC-NEXT:    [[TMP14:%.*]] = extractvalue { ptr, i32 } [[TMP13]], 0
+; ICALL-FUNC-NEXT:    call void @__clang_call_terminate(ptr [[TMP14]])
+; ICALL-FUNC-NEXT:    unreachable
+;
+; ICALL-VTABLE-LABEL: define i32 @_Z4testP4Base(
+; ICALL-VTABLE-SAME: ptr [[B:%.*]]) personality ptr @__gxx_personality_v0 {
+; ICALL-VTABLE-NEXT:  entry:
+; ICALL-VTABLE-NEXT:    [[E:%.*]] = alloca [[CLASS_ERROR:%.*]], align 1
+; ICALL-VTABLE-NEXT:    [[VTABLE:%.*]] = load ptr, ptr [[B]], align 8, !prof [[PROF2:![0-9]+]]
+; ICALL-VTABLE-NEXT:    [[TMP0:%.*]] = ptrtoint ptr [[VTABLE]] to i64
+; ICALL-VTABLE-NEXT:    [[VTABLE_OBJECT:%.*]] = sub nuw i64 [[TMP0]], 16
+; ICALL-VTABLE-NEXT:    [[TEST:%.*]] = tail call i1 @llvm.type.test(ptr [[VTABLE]], metadata !"_ZTS4Base")
+; ICALL-VTABLE-NEXT:    tail call void @llvm.assume(i1 [[TEST]])
+; ICALL-VTABLE-NEXT:    [[TMP1:%.*]] = icmp eq i64 ptrtoint (ptr @_ZTV4Base to i64), [[VTABLE_OBJECT]]
+; ICALL-VTABLE-NEXT:    br i1 [[TMP1]], label [[IF_TRUE_DIRECT_VTABLE_TARGET:%.*]], label [[IF_FALSE_ORIG_INDIRECT:%.*]], !prof [[PROF3:![0-9]+]]
+; ICALL-VTABLE:       if.true.direct_vtable_target:
+; ICALL-VTABLE-NEXT:    [[TMP2:%.*]] = invoke i32 @_ZN4Base10get_ticketEv(ptr nonnull align 8 dereferenceable(8) [[B]])
+; ICALL-VTABLE-NEXT:    to label [[IF_END_VTABLE_ICP:%.*]] unwind label [[LPAD:%.*]]
+; ICALL-VTABLE:       if.false.orig_indirect:
+; ICALL-VTABLE-NEXT:    [[TMP3:%.*]] = icmp eq i64 ptrtoint (ptr @_ZTV7Derived to i64), [[VTABLE_OBJECT]]
+; ICALL-VTABLE-NEXT:    br i1 [[TMP3]], label [[IF_TRUE_DIRECT_VTABLE_TARGET1:%.*]], label [[IF_FALSE_ORIG_INDIRECT2:%.*]], !prof [[PROF4:![0-9]+]]
+; ICALL-VTABLE:       if.true.direct_vtable_target1:
+; ICALL-VTABLE-NEXT:    [[TMP4:%.*]] = invoke i32 @_ZN7Derived10get_ticketEv(ptr nonnull align 8 dereferenceable(8) [[B]])
+; ICALL-VTABLE-NEXT:    to label [[IF_END_VTABLE_ICP3:%.*]] unwind label [[LPAD]]
+; ICALL-VTABLE:       if.false.orig_indirect2:
+; ICALL-VTABLE-NEXT:    [[TMP5:%.*]] = load ptr, ptr [[VTABLE]], align 8
+; ICALL-VTABLE-NEXT:    [[CALL:%.*]] = invoke i32 [[TMP5]](ptr nonnull align 8 dereferenceable(8) [[B]])
+; ICALL-VTABLE-NEXT:    to label [[IF_END_VTABLE_ICP3]] unwind label [[LPAD]]
+; ICALL-VTABLE:       if.end.vtable_icp3:
+; ICALL-VTABLE-NEXT:    [[TMP6:%.*]] = phi i32 [ [[CALL]], [[IF_FALSE_ORIG_INDIRECT2]] ], [ [[TMP4]], [[IF_TRUE_DIRECT_VTABLE_TARGET1]] ]
+; ICALL-VTABLE-NEXT:    br label [[IF_END_VTABLE_ICP]]
+; ICALL-VTABLE:       if.end.vtable_icp:
+; ICALL-VTABLE-NEXT:    [[TMP7:%.*]] = phi i32 [ [[TMP6]], [[IF_END_VTABLE_ICP3]] ], [ [[TMP2]], [[IF_TRUE_DIRECT_VTABLE_TARGET]] ]
+; ICALL-VTABLE-NEXT:    br label [[TRY_CONT:%.*]]
+; ICALL-VTABLE:       lpad:
+; ICALL-VTABLE-NEXT:    [[TMP8:%.*]] = landingpad { ptr, i32 }
+; ICALL-VTABLE-NEXT:    cleanup
+; ICALL-VTABLE-NEXT:    catch ptr @_ZTI5Error
+; ICALL-VTABLE-NEXT:    [[TMP9:%.*]] = extractvalue { ptr, i32 } [[TMP8]], 1
+; ICALL-VTABLE-NEXT:    [[TMP10:%.*]] = tail call i32 @llvm.eh.typeid.for(ptr nonnull @_ZTI5Error)
+; ICALL-VTABLE-NEXT:    [[MATCHES:%.*]] = icmp eq i32 [[TMP9]], [[TMP10]]
+; ICALL-VTABLE-NEXT:    br i1 [[MATCHES]], label [[CATCH:%.*]], label [[EHCLEANUP:%.*]]
+; ICALL-VTABLE:       catch:
+; ICALL-VTABLE-NEXT:    [[TMP11:%.*]] = extractvalue { ptr, i32 } [[TMP8]], 0
+; ICALL-VTABLE-NEXT:    call void @llvm.lifetime.start.p0(i64 1, ptr nonnull [[E]])
+; ICALL-VTABLE-NEXT:    [[TMP12:%.*]] = tail call ptr @__cxa_begin_catch(ptr [[TMP11]])
+; ICALL-VTABLE-NEXT:    [[CALL3:%.*]] = invoke i32 @_ZN5Error10error_codeEv(ptr [[E]])
+; ICALL-VTABLE-NEXT:    to label [[INVOKE_CONT2:%.*]] unwind label [[LPAD1:%.*]]
+; ICALL-VTABLE:       invoke.cont2:
+; ICALL-VTABLE-NEXT:    call void @__cxa_end_catch()
+; ICALL-VTABLE-NEXT:    call void @llvm.lifetime.end.p0(i64 1, ptr nonnull [[E]])
+; ICALL-VTABLE-NEXT:    br label [[TRY_CONT]]
+; ICALL-VTABLE:       try.cont:
+; ICALL-VTABLE-NEXT:    [[RET_0:%.*]] = phi i32 [ [[CALL3]], [[INVOKE_CONT2]] ], [ [[TMP7]], [[IF_END_VTABLE_ICP]] ]
+; ICALL-VTABLE-NEXT:    ret i32 [[RET_0]]
+; ICALL-VTABLE:       lpad1:
+; ICALL-VTABLE-NEXT:    [[TMP13:%.*]] = landingpad { ptr, i32 }
+; ICALL-VTABLE-NEXT:    cleanup
+; ICALL-VTABLE-NEXT:    invoke void @__cxa_end_catch()
+; ICALL-VTABLE-NEXT:    to label [[INVOKE_CONT4:%.*]] unwind label [[TERMINATE_LPAD:%.*]]
+; ICALL-VTABLE:       invoke.cont4:
+; ICALL-VTABLE-NEXT:    call void @llvm.lifetime.end.p0(i64 1, ptr nonnull [[E]])
+; ICALL-VTABLE-NEXT:    br label [[EHCLEANUP]]
+; ICALL-VTABLE:       ehcleanup:
+; ICALL-VTABLE-NEXT:    [[LPAD_VAL7_MERGED:%.*]] = phi { ptr, i32 } [ [[TMP13]], [[INVOKE_CONT4]] ], [ [[TMP8]], [[LPAD]] ]
+; ICALL-VTABLE-NEXT:    resume { ptr, i32 } [[LPAD_VAL7_MERGED]]
+; ICALL-VTABLE:       terminate.lpad:
+; ICALL-VTABLE-NEXT:    [[TMP14:%.*]] = landingpad { ptr, i32 }
+; ICALL-VTABLE-NEXT:    catch ptr null
+; ICALL-VTABLE-NEXT:    [[TMP15:%.*]] = extractvalue { ptr, i32 } [[TMP14]], 0
+; ICALL-VTABLE-NEXT:    call void @__clang_call_terminate(ptr [[TMP15]])
+; ICALL-VTABLE-NEXT:    unreachable
+;
+entry:
+  %e = alloca %class.Error, align 1
+  %vtable = load ptr, ptr %b, align 8, !prof !2
+  %test = tail call i1 @llvm.type.test(ptr %vtable, metadata !"_ZTS4Base")
+  tail call void @llvm.assume(i1 %test)
+  %0 = load ptr, ptr %vtable, align 8
+  %call = invoke i32 %0(ptr nonnull align 8 dereferenceable(8) %b)
+  to label %try.cont unwind label %lpad, !prof !3
+
+lpad:
+  %1 = landingpad { ptr, i32 }
+  cleanup
+  catch ptr @_ZTI5Error
+  %2 = extractvalue { ptr, i32 } %1, 1
+  %3 = tail call i32 @llvm.eh.typeid.for(ptr nonnull @_ZTI5Error)
+  %matches = icmp eq i32 %2, %3
+  br i1 %matches, label %catch, label %ehcleanup
+
+catch:
+  %4 = extractvalue { ptr, i32 } %1, 0
+  call void @llvm.lifetime.start.p0(i64 1, ptr nonnull %e)
+  %5 = tail call ptr @__cxa_begin_catch(ptr %4)
+  %call3 = invoke i32 @_ZN5Error10error_codeEv(ptr %e)
+  to label %invoke.cont2 unwind label %lpad1
+
+invoke.cont2:
+  call void @__cxa_end_catch()
+  call void @llvm.lifetime.end.p0(i64 1, ptr nonnull %e)
+  br label %try.cont
+
+try.cont:
+  %ret.0 = phi i32 [ %call3, %invoke.cont2 ], [ %call, %entry ]
+  ret i32 %ret.0
+
+lpad1:
+  %6 = landingpad { ptr, i32 }
+  cleanup
+  invoke void @__cxa_end_catch()
+  to label %invoke.cont4 unwind label %terminate.lpad
+
+invoke.cont4:
+  call void @llvm.lifetime.end.p0(i64 1, ptr nonnull %e)
+  br label %ehcleanup
+
+ehcleanup:
+  %lpad.val7.merged = phi { ptr, i32 } [ %6, %invoke.cont4 ], [ %1, %lpad ]
+  resume { ptr, i32 } %lpad.val7.merged
+
+terminate.lpad:
+  %7 = landingpad { ptr, i32 }
+  catch ptr null
+  %8 = extractvalue { ptr, i32 } %7, 0
+  call void @__clang_call_terminate(ptr %8)
+  unreachable
+}
+
+!0 = !{i64 16, !"_ZTS4Base"}
+!1 = !{i64 16, !"_ZTS8Derived"}
+!2 = !{!"VP", i32 2, i64 1600, i64 1960855528937986108, i64 1000, i64 -4576307468236080025, i64 600}
+!3 = !{!"VP", i32 0, i64 1600, i64 -9184999152603961491, i64 1000, i64 -3635426779157076872, i64 600}
+
+declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture)
+declare i32 @__gxx_personality_v0(...)
+declare i32 @llvm.eh.typeid.for(ptr)
+declare ptr @__cxa_begin_catch(ptr)
+declare i32 @_ZN5Error10error_codeEv(ptr)
+declare void @__cxa_end_catch()
+declare void @__clang_call_terminate(ptr %0)
+declare void @llvm.lifetime.end.p0(i64, ptr)
+declare  i32 @_Z13get_ticket_idv()
+declare void @_ZN5ErrorC1EPKci(ptr, ptr, i32)
+declare ptr @__cxa_allocate_exception(i64)
+declare void @__cxa_free_exception(ptr)
+declare void @__cxa_throw(ptr, ptr, ptr)
+declare i1 @llvm.type.test(ptr, metadata)
+declare void @llvm.assume(i1)
+
+define i32 @_ZN4Base10get_ticketEv(ptr %this)  personality ptr @__gxx_personality_v0 {
+entry:
+  %call = tail call i32 @_Z13get_ticket_idv()
+  %cmp.not = icmp eq i32 %call, -1
+  br i1 %cmp.not, label %if.end, label %if.then
+
+if.then:                                          ; preds = %entry
+  ret i32 %call
+
+if.end:                                           ; preds = %entry
+  %exception = tail call ptr @__cxa_allocate_exception(i64 1)
+  invoke void @_ZN5ErrorC1EPKci(ptr %exception, ptr nonnull @.str.1, i32 1)
+  to label %invoke.cont unwind label %lpad
+
+invoke.cont:                                      ; preds = %if.end
+  tail call void @__cxa_throw(ptr nonnull %exception, ptr nonnull @_ZTI5Error, ptr null)
+  unreachable
+
+lpad:                                             ; preds = %if.end
+  %0 = landingpad { ptr, i32 }
+  cleanup
+  tail call void @__cxa_free_exception(ptr %exception)
+  resume { ptr, i32 } %0
+}
+
+
+define i32 @_ZN7Derived10get_ticketEv(ptr %this) personality ptr @__gxx_personality_v0 {
+entry:
+  %call = tail call i32 @_Z13get_ticket_idv()
+  %cmp.not = icmp eq i32 %call, -1
+  br i1 %cmp.not, label %if.end, label %if.then
+
+if.then:                                          ; preds = %entry
+  ret i32 %call
+
+if.end:                                           ; preds = %entry
+  %exception = tail call ptr @__cxa_allocate_exception(i64 1)
+  invoke void @_ZN5ErrorC1EPKci(ptr %exception, ptr nonnull @.str, i32 2)
+  to label %invoke.cont unwind label %lpad
+
+invoke.cont:                                      ; preds = %if.end
+  tail call void @__cxa_throw(ptr nonnull %exception, ptr nonnull @_ZTI5Error, ptr null)
+  unreachable
+
+lpad:                                             ; preds = %if.end
+  %0 = landingpad { ptr, i32 }
+  cleanup
+  tail call void @__cxa_free_exception(ptr %exception)
+  resume { ptr, i32 } %0
+}
+;.
+; ICALL-FUNC: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) }
+; ICALL-FUNC: attributes #[[ATTR1:[0-9]+]] = { nounwind memory(none) }
+; ICALL-FUNC: attributes #[[ATTR2:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
+; ICALL-FUNC: attributes #[[ATTR3:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) }
+;.
+; ICALL-VTABLE: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) }
+; ICALL-VTABLE: attributes #[[ATTR1:[0-9]+]] = { nounwind memory(none) }
+; ICALL-VTABLE: attributes #[[ATTR2:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
+; ICALL-VTABLE: attributes #[[ATTR3:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) }
+;.
+; ICALL-FUNC: [[META0:![0-9]+]] = !{i64 16, !"_ZTS4Base"}
+; ICALL-FUNC: [[META1:![0-9]+]] = !{i64 16, !"_ZTS8Derived"}
+; ICALL-FUNC: [[PROF2]] = !{!"VP", i32 2, i64 1600, i64 1960855528937986108, i64 1000, i64 -4576307468236080025, i64 600}
+; ICALL-FUNC: [[PROF3]] = !{!"branch_weights", i32 1000, i32 600}
+; ICALL-FUNC: [[PROF4]] = !{!"branch_weights", i32 600, i32 0}
+;.
+; ICALL-VTABLE: [[META0:![0-9]+]] = !{i64 16, !"_ZTS4Base"}
+; ICALL-VTABLE: [[META1:![0-9]+]] = !{i64 16, !"_ZTS8Derived"}
+; ICALL-VTABLE: [[PROF2]] = !{!"VP", i32 2, i64 1600, i64 1960855528937986108, i64 1000, i64 -4576307468236080025, i64 600}
+; ICALL-VTABLE: [[PROF3]] = !{!"branch_weights", i32 1000, i32 600}
+; ICALL-VTABLE: [[PROF4]] = !{!"branch_weights", i32 600, i32 0}
+;.
diff --git a/llvm/test/Transforms/PGOProfile/icp_vtable_musttail.ll b/llvm/test/Transforms/PGOProfile/icp_vtable_musttail.ll
new file mode 100644
index 000000000000000..241cd1a8fc20bfb
--- /dev/null
+++ b/llvm/test/Transforms/PGOProfile/icp_vtable_musttail.ll
@@ -0,0 +1,191 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals --version 3
+
+
+; RUN: opt < %s -passes=pgo-icall-prom -S | FileCheck %s --check-prefix=ICALL-FUNC
+; RUN: opt < %s -passes=pgo-icall-prom -enable-vtable-prom -S | FileCheck %s --check-prefixes=ICALL-VTABLE
+
+target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64-unknown-linux-gnu"
+
+ at _ZTV4Base = constant { [4 x ptr] } { [4 x ptr] [ptr null, ptr null, ptr @_ZN4Base5func1Ei, ptr @_ZN4Base5func2Ev] }, !type !0
+ at _ZTV8Derived1 = constant { [4 x ptr] } { [4 x ptr] [ptr null, ptr null, ptr @_ZN8Derived15func1Ei, ptr @_ZN4Base5func2Ev] }, !type !0, !type !1
+ at _ZTV8Derived2 = constant { [4 x ptr] } { [4 x ptr] [ptr null, ptr null, ptr @_ZN8Derived25func1Ei, ptr @_ZN4Base5func2Ev] }, !type !0, !type !2
+
+; one function, one vtable, same offset
+
+;.
+; ICALL-FUNC: @[[_ZTV4BASE:[a-zA-Z0-9_$"\\.-]+]] = constant { [4 x ptr] } { [4 x ptr] [ptr null, ptr null, ptr @_ZN4Base5func1Ei, ptr @_ZN4Base5func2Ev] }, !type !0
+; ICALL-FUNC: @[[_ZTV8DERIVED1:[a-zA-Z0-9_$"\\.-]+]] = constant { [4 x ptr] } { [4 x ptr] [ptr null, ptr null, ptr @_ZN8Derived15func1Ei, ptr @_ZN4Base5func2Ev] }, !type !0, !type !1
+; ICALL-FUNC: @[[_ZTV8DERIVED2:[a-zA-Z0-9_$"\\.-]+]] = constant { [4 x ptr] } { [4 x ptr] [ptr null, ptr null, ptr @_ZN8Derived25func1Ei, ptr @_ZN4Base5func2Ev] }, !type !0, !type !2
+;.
+; ICALL-VTABLE: @[[_ZTV4BASE:[a-zA-Z0-9_$"\\.-]+]] = constant { [4 x ptr] } { [4 x ptr] [ptr null, ptr null, ptr @_ZN4Base5func1Ei, ptr @_ZN4Base5func2Ev] }, !type !0
+; ICALL-VTABLE: @[[_ZTV8DERIVED1:[a-zA-Z0-9_$"\\.-]+]] = constant { [4 x ptr] } { [4 x ptr] [ptr null, ptr null, ptr @_ZN8Derived15func1Ei, ptr @_ZN4Base5func2Ev] }, !type !0, !type !1
+; ICALL-VTABLE: @[[_ZTV8DERIVED2:[a-zA-Z0-9_$"\\.-]+]] = constant { [4 x ptr] } { [4 x ptr] [ptr null, ptr null, ptr @_ZN8Derived25func1Ei, ptr @_ZN4Base5func2Ev] }, !type !0, !type !2
+;.
+define i32 @test_three_function_candidates(ptr %d, i32 %a) {
+; ICALL-FUNC-LABEL: define i32 @test_three_function_candidates(
+; ICALL-FUNC-SAME: ptr [[D:%.*]], i32 [[A:%.*]]) {
+; ICALL-FUNC-NEXT:  entry:
+; ICALL-FUNC-NEXT:    [[VTABLE:%.*]] = load ptr, ptr [[D]], align 8, !prof [[PROF3:![0-9]+]]
+; ICALL-FUNC-NEXT:    [[TMP0:%.*]] = tail call i1 @llvm.type.test(ptr [[VTABLE]], metadata !"_ZTS4Base")
+; ICALL-FUNC-NEXT:    tail call void @llvm.assume(i1 [[TMP0]])
+; ICALL-FUNC-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[VTABLE]], align 8
+; ICALL-FUNC-NEXT:    [[TMP2:%.*]] = icmp eq ptr [[TMP1]], @_ZN4Base5func1Ei
+; ICALL-FUNC-NEXT:    br i1 [[TMP2]], label [[IF_TRUE_DIRECT_TARG:%.*]], label [[TMP4:%.*]], !prof [[PROF4:![0-9]+]]
+; ICALL-FUNC:       if.true.direct_targ:
+; ICALL-FUNC-NEXT:    [[TMP3:%.*]] = musttail call i32 @_ZN4Base5func1Ei(ptr [[D]], i32 [[A]])
+; ICALL-FUNC-NEXT:    ret i32 [[TMP3]]
+; ICALL-FUNC:       4:
+; ICALL-FUNC-NEXT:    [[TMP5:%.*]] = icmp eq ptr [[TMP1]], @_ZN8Derived15func1Ei
+; ICALL-FUNC-NEXT:    br i1 [[TMP5]], label [[IF_TRUE_DIRECT_TARG1:%.*]], label [[TMP7:%.*]], !prof [[PROF5:![0-9]+]]
+; ICALL-FUNC:       if.true.direct_targ1:
+; ICALL-FUNC-NEXT:    [[TMP6:%.*]] = musttail call i32 @_ZN8Derived15func1Ei(ptr [[D]], i32 [[A]])
+; ICALL-FUNC-NEXT:    ret i32 [[TMP6]]
+; ICALL-FUNC:       7:
+; ICALL-FUNC-NEXT:    [[TMP8:%.*]] = icmp eq ptr [[TMP1]], @_ZN8Derived25func1Ei
+; ICALL-FUNC-NEXT:    br i1 [[TMP8]], label [[IF_TRUE_DIRECT_TARG2:%.*]], label [[TMP10:%.*]], !prof [[PROF6:![0-9]+]]
+; ICALL-FUNC:       if.true.direct_targ2:
+; ICALL-FUNC-NEXT:    [[TMP9:%.*]] = musttail call i32 @_ZN8Derived25func1Ei(ptr [[D]], i32 [[A]])
+; ICALL-FUNC-NEXT:    ret i32 [[TMP9]]
+; ICALL-FUNC:       10:
+; ICALL-FUNC-NEXT:    [[CALL:%.*]] = musttail call i32 [[TMP1]](ptr [[D]], i32 [[A]])
+; ICALL-FUNC-NEXT:    ret i32 [[CALL]]
+;
+; ICALL-VTABLE-LABEL: define i32 @test_three_function_candidates(
+; ICALL-VTABLE-SAME: ptr [[D:%.*]], i32 [[A:%.*]]) {
+; ICALL-VTABLE-NEXT:  entry:
+; ICALL-VTABLE-NEXT:    [[VTABLE:%.*]] = load ptr, ptr [[D]], align 8, !prof [[PROF3:![0-9]+]]
+; ICALL-VTABLE-NEXT:    [[TMP0:%.*]] = ptrtoint ptr [[VTABLE]] to i64
+; ICALL-VTABLE-NEXT:    [[VTABLE_OBJECT:%.*]] = sub nuw i64 [[TMP0]], 16
+; ICALL-VTABLE-NEXT:    [[TMP1:%.*]] = tail call i1 @llvm.type.test(ptr [[VTABLE]], metadata !"_ZTS4Base")
+; ICALL-VTABLE-NEXT:    tail call void @llvm.assume(i1 [[TMP1]])
+; ICALL-VTABLE-NEXT:    [[TMP2:%.*]] = icmp eq i64 ptrtoint (ptr @_ZTV4Base to i64), [[VTABLE_OBJECT]]
+; ICALL-VTABLE-NEXT:    br i1 [[TMP2]], label [[IF_THEN_DIRECT_VTABLE_TARG:%.*]], label [[IF_ELSE_ORIG_INDIRECT_CALL:%.*]], !prof [[PROF4:![0-9]+]]
+; ICALL-VTABLE:       if.then.direct_vtable_targ:
+; ICALL-VTABLE-NEXT:    [[TMP3:%.*]] = musttail call i32 @_ZN4Base5func1Ei(ptr [[D]], i32 [[A]])
+; ICALL-VTABLE-NEXT:    ret i32 [[TMP3]]
+; ICALL-VTABLE:       if.else.orig_indirect_call:
+; ICALL-VTABLE-NEXT:    [[TMP4:%.*]] = icmp eq i64 ptrtoint (ptr @_ZTV8Derived1 to i64), [[VTABLE_OBJECT]]
+; ICALL-VTABLE-NEXT:    br i1 [[TMP4]], label [[IF_THEN_DIRECT_VTABLE_TARG1:%.*]], label [[IF_ELSE_ORIG_INDIRECT_CALL2:%.*]], !prof [[PROF5:![0-9]+]]
+; ICALL-VTABLE:       if.then.direct_vtable_targ1:
+; ICALL-VTABLE-NEXT:    [[TMP5:%.*]] = musttail call i32 @_ZN8Derived15func1Ei(ptr [[D]], i32 [[A]])
+; ICALL-VTABLE-NEXT:    ret i32 [[TMP5]]
+; ICALL-VTABLE:       if.else.orig_indirect_call2:
+; ICALL-VTABLE-NEXT:    [[TMP6:%.*]] = icmp eq i64 ptrtoint (ptr @_ZTV8Derived2 to i64), [[VTABLE_OBJECT]]
+; ICALL-VTABLE-NEXT:    br i1 [[TMP6]], label [[IF_THEN_DIRECT_VTABLE_TARG3:%.*]], label [[IF_ELSE_ORIG_INDIRECT_CALL4:%.*]], !prof [[PROF6:![0-9]+]]
+; ICALL-VTABLE:       if.then.direct_vtable_targ3:
+; ICALL-VTABLE-NEXT:    [[TMP7:%.*]] = musttail call i32 @_ZN8Derived25func1Ei(ptr [[D]], i32 [[A]])
+; ICALL-VTABLE-NEXT:    ret i32 [[TMP7]]
+; ICALL-VTABLE:       if.else.orig_indirect_call4:
+; ICALL-VTABLE-NEXT:    [[TMP8:%.*]] = load ptr, ptr [[VTABLE]], align 8
+; ICALL-VTABLE-NEXT:    [[CALL:%.*]] = musttail call i32 [[TMP8]](ptr [[D]], i32 [[A]])
+; ICALL-VTABLE-NEXT:    ret i32 [[CALL]]
+;
+entry:
+  %vtable = load ptr, ptr %d, !prof !8
+  %0 = tail call i1 @llvm.type.test(ptr %vtable, metadata !"_ZTS4Base")
+  tail call void @llvm.assume(i1 %0)
+  %1 = load ptr, ptr %vtable
+  %call = musttail call i32 %1(ptr %d, i32 %a), !prof !9
+  ret i32 %call
+}
+
+declare i1 @llvm.type.test(ptr, metadata)
+declare void @llvm.assume(i1 noundef)
+define i32 @_ZN4Base5func1Ei(ptr, i32) {
+; ICALL-FUNC-LABEL: define i32 @_ZN4Base5func1Ei(
+; ICALL-FUNC-SAME: ptr [[TMP0:%.*]], i32 [[TMP1:%.*]]) {
+; ICALL-FUNC-NEXT:    ret i32 0
+;
+; ICALL-VTABLE-LABEL: define i32 @_ZN4Base5func1Ei(
+; ICALL-VTABLE-SAME: ptr [[TMP0:%.*]], i32 [[TMP1:%.*]]) {
+; ICALL-VTABLE-NEXT:    ret i32 0
+;
+  ret i32 0
+}
+define i32 @_ZN8Derived15func1Ei(ptr, i32) {
+; ICALL-FUNC-LABEL: define i32 @_ZN8Derived15func1Ei(
+; ICALL-FUNC-SAME: ptr [[TMP0:%.*]], i32 [[TMP1:%.*]]) {
+; ICALL-FUNC-NEXT:    ret i32 1
+;
+; ICALL-VTABLE-LABEL: define i32 @_ZN8Derived15func1Ei(
+; ICALL-VTABLE-SAME: ptr [[TMP0:%.*]], i32 [[TMP1:%.*]]) {
+; ICALL-VTABLE-NEXT:    ret i32 1
+;
+  ret i32 1
+}
+define i32 @_ZN8Derived25func1Ei(ptr, i32) {
+; ICALL-FUNC-LABEL: define i32 @_ZN8Derived25func1Ei(
+; ICALL-FUNC-SAME: ptr [[TMP0:%.*]], i32 [[TMP1:%.*]]) {
+; ICALL-FUNC-NEXT:    ret i32 2
+;
+; ICALL-VTABLE-LABEL: define i32 @_ZN8Derived25func1Ei(
+; ICALL-VTABLE-SAME: ptr [[TMP0:%.*]], i32 [[TMP1:%.*]]) {
+; ICALL-VTABLE-NEXT:    ret i32 2
+;
+  ret i32 2
+}
+define i32 @_ZN8Derived35func1Ei(ptr, i32) {
+; ICALL-FUNC-LABEL: define i32 @_ZN8Derived35func1Ei(
+; ICALL-FUNC-SAME: ptr [[TMP0:%.*]], i32 [[TMP1:%.*]]) {
+; ICALL-FUNC-NEXT:    ret i32 3
+;
+; ICALL-VTABLE-LABEL: define i32 @_ZN8Derived35func1Ei(
+; ICALL-VTABLE-SAME: ptr [[TMP0:%.*]], i32 [[TMP1:%.*]]) {
+; ICALL-VTABLE-NEXT:    ret i32 3
+;
+  ret i32 3
+}
+
+define i32 @_ZN4Base5func2Ev(ptr %this) {
+; ICALL-FUNC-LABEL: define i32 @_ZN4Base5func2Ev(
+; ICALL-FUNC-SAME: ptr [[THIS:%.*]]) {
+; ICALL-FUNC-NEXT:  entry:
+; ICALL-FUNC-NEXT:    ret i32 0
+;
+; ICALL-VTABLE-LABEL: define i32 @_ZN4Base5func2Ev(
+; ICALL-VTABLE-SAME: ptr [[THIS:%.*]]) {
+; ICALL-VTABLE-NEXT:  entry:
+; ICALL-VTABLE-NEXT:    ret i32 0
+;
+entry:
+  ret i32 0
+}
+
+!0 = !{i64 16, !"_ZTS4Base"}
+!1 = !{i64 16, !"_ZTS8Derived1"}
+!2 = !{i64 16, !"_ZTS8Derived2"}
+!3 = !{i64 16, !"_ZTS8Derived3"}
+!4 = !{!"VP", i32 2, i64 1600, i64 5035968517245772950, i64 1600}
+!5 = !{!"VP", i32 0, i64 1600, i64 -3104805163612457913, i64 1600}
+!6 = !{!"VP", i32 2, i64 1600, i64 1960855528937986108, i64 1000, i64 -9064381665493407289, i64 600}
+!7 = !{!"VP", i32 0, i64 1600, i64 -6112946599537438938, i64 1000, i64 5741628532826110430, i64 600}
+!8 = !{!"VP", i32 2, i64 1600, i64 1960855528937986108, i64 600, i64 -9064381665493407289, i64 550, i64 5035968517245772950, i64 450}
+!9 = !{!"VP", i32 0, i64 1600, i64 -6112946599537438938, i64 600, i64 5741628532826110430, i64 550, i64 8412292991472345728, i64 450}
+
+; two function, two vtable, same offset
+
+; three function, three vtable, same offset
+;.
+; ICALL-FUNC: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
+; ICALL-FUNC: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) }
+;.
+; ICALL-VTABLE: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
+; ICALL-VTABLE: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) }
+;.
+; ICALL-FUNC: [[META0:![0-9]+]] = !{i64 16, !"_ZTS4Base"}
+; ICALL-FUNC: [[META1:![0-9]+]] = !{i64 16, !"_ZTS8Derived1"}
+; ICALL-FUNC: [[META2:![0-9]+]] = !{i64 16, !"_ZTS8Derived2"}
+; ICALL-FUNC: [[PROF3]] = !{!"VP", i32 2, i64 1600, i64 1960855528937986108, i64 600, i64 -9064381665493407289, i64 550, i64 5035968517245772950, i64 450}
+; ICALL-FUNC: [[PROF4]] = !{!"branch_weights", i32 600, i32 1000}
+; ICALL-FUNC: [[PROF5]] = !{!"branch_weights", i32 550, i32 450}
+; ICALL-FUNC: [[PROF6]] = !{!"branch_weights", i32 450, i32 0}
+;.
+; ICALL-VTABLE: [[META0:![0-9]+]] = !{i64 16, !"_ZTS4Base"}
+; ICALL-VTABLE: [[META1:![0-9]+]] = !{i64 16, !"_ZTS8Derived1"}
+; ICALL-VTABLE: [[META2:![0-9]+]] = !{i64 16, !"_ZTS8Derived2"}
+; ICALL-VTABLE: [[PROF3]] = !{!"VP", i32 2, i64 1600, i64 1960855528937986108, i64 600, i64 -9064381665493407289, i64 550, i64 5035968517245772950, i64 450}
+; ICALL-VTABLE: [[PROF4]] = !{!"branch_weights", i32 600, i32 1000}
+; ICALL-VTABLE: [[PROF5]] = !{!"branch_weights", i32 550, i32 450}
+; ICALL-VTABLE: [[PROF6]] = !{!"branch_weights", i32 450, i32 0}
+;.



More information about the cfe-commits mailing list