[llvm] r367679 - [ThinLTO] Implement index-based WPD

Teresa Johnson via llvm-commits llvm-commits at lists.llvm.org
Fri Aug 2 06:10:52 PDT 2019


Author: tejohnson
Date: Fri Aug  2 06:10:52 2019
New Revision: 367679

URL: http://llvm.org/viewvc/llvm-project?rev=367679&view=rev
Log:
[ThinLTO] Implement index-based WPD

This patch adds support to the WholeProgramDevirt pass to perform
index-based WPD, which is invoked from ThinLTO during the thin link.

The ThinLTO backend (WPD import phase) behaves the same regardless of
whether the WPD decisions were made with the index-based or (the
existing) IR-based analysis.

Depends on D54815.

Reviewers: pcc

Subscribers: mehdi_amini, inglorion, eraman, steven_wu, dexonsmith, arphaman, dang, llvm-commits

Differential Revision: https://reviews.llvm.org/D55153

Added:
    llvm/trunk/test/ThinLTO/X86/Inputs/devirt2.ll
    llvm/trunk/test/ThinLTO/X86/devirt2.ll
    llvm/trunk/test/ThinLTO/X86/nodevirt-nonpromoted-typeid.ll
Modified:
    llvm/trunk/include/llvm/IR/ModuleSummaryIndex.h
    llvm/trunk/include/llvm/Transforms/IPO/WholeProgramDevirt.h
    llvm/trunk/lib/LTO/LTO.cpp
    llvm/trunk/lib/Transforms/IPO/WholeProgramDevirt.cpp
    llvm/trunk/test/ThinLTO/X86/devirt.ll
    llvm/trunk/tools/llvm-lto2/llvm-lto2.cpp

Modified: llvm/trunk/include/llvm/IR/ModuleSummaryIndex.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/IR/ModuleSummaryIndex.h?rev=367679&r1=367678&r2=367679&view=diff
==============================================================================
--- llvm/trunk/include/llvm/IR/ModuleSummaryIndex.h (original)
+++ llvm/trunk/include/llvm/IR/ModuleSummaryIndex.h Fri Aug  2 06:10:52 2019
@@ -632,6 +632,8 @@ public:
   /// Return the list of <CalleeValueInfo, CalleeInfo> pairs.
   ArrayRef<EdgeTy> calls() const { return CallGraphEdgeList; }
 
+  void addCall(EdgeTy E) { CallGraphEdgeList.push_back(E); }
+
   /// Returns the list of type identifiers used by this function in
   /// llvm.type.test intrinsics other than by an llvm.assume intrinsic,
   /// represented as GUIDs.
@@ -1293,6 +1295,12 @@ public:
     return nullptr;
   }
 
+  TypeIdSummary *getTypeIdSummary(StringRef TypeId) {
+    return const_cast<TypeIdSummary *>(
+        static_cast<const ModuleSummaryIndex *>(this)->getTypeIdSummary(
+            TypeId));
+  }
+
   const std::map<std::string, TypeIdCompatibleVtableInfo> &
   typeIdCompatibleVtableMap() const {
     return TypeIdCompatibleVtableMap;

Modified: llvm/trunk/include/llvm/Transforms/IPO/WholeProgramDevirt.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Transforms/IPO/WholeProgramDevirt.h?rev=367679&r1=367678&r2=367679&view=diff
==============================================================================
--- llvm/trunk/include/llvm/Transforms/IPO/WholeProgramDevirt.h (original)
+++ llvm/trunk/include/llvm/Transforms/IPO/WholeProgramDevirt.h Fri Aug  2 06:10:52 2019
@@ -16,8 +16,10 @@
 
 #include "llvm/IR/Module.h"
 #include "llvm/IR/PassManager.h"
+#include "llvm/Transforms/IPO/FunctionImport.h"
 #include <cassert>
 #include <cstdint>
+#include <set>
 #include <utility>
 #include <vector>
 
@@ -28,6 +30,7 @@ template <typename T> class MutableArray
 class Function;
 class GlobalVariable;
 class ModuleSummaryIndex;
+struct ValueInfo;
 
 namespace wholeprogramdevirt {
 
@@ -228,6 +231,29 @@ struct WholeProgramDevirtPass : public P
   PreservedAnalyses run(Module &M, ModuleAnalysisManager &);
 };
 
+struct VTableSlotSummary {
+  StringRef TypeID;
+  uint64_t ByteOffset;
+};
+
+/// Perform index-based whole program devirtualization on the \p Summary
+/// index. Any devirtualized targets used by a type test in another module
+/// are added to the \p ExportedGUIDs set. For any local devirtualized targets
+/// only used within the defining module, the information necessary for
+/// locating the corresponding WPD resolution is recorded for the ValueInfo
+/// in case it is exported by cross module importing (in which case the
+/// devirtualized target name will need adjustment).
+void runWholeProgramDevirtOnIndex(
+    ModuleSummaryIndex &Summary, std::set<GlobalValue::GUID> &ExportedGUIDs,
+    std::map<ValueInfo, std::vector<VTableSlotSummary>> &LocalWPDTargetsMap);
+
+/// Call after cross-module importing to update the recorded single impl
+/// devirt target names for any locals that were exported.
+void updateIndexWPDForExports(
+    ModuleSummaryIndex &Summary,
+    StringMap<FunctionImporter::ExportSetTy> &ExportLists,
+    std::map<ValueInfo, std::vector<VTableSlotSummary>> &LocalWPDTargetsMap);
+
 } // end namespace llvm
 
 #endif // LLVM_TRANSFORMS_IPO_WHOLEPROGRAMDEVIRT_H

Modified: llvm/trunk/lib/LTO/LTO.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/LTO/LTO.cpp?rev=367679&r1=367678&r2=367679&view=diff
==============================================================================
--- llvm/trunk/lib/LTO/LTO.cpp (original)
+++ llvm/trunk/lib/LTO/LTO.cpp Fri Aug  2 06:10:52 2019
@@ -44,6 +44,7 @@
 #include "llvm/Target/TargetOptions.h"
 #include "llvm/Transforms/IPO.h"
 #include "llvm/Transforms/IPO/PassManagerBuilder.h"
+#include "llvm/Transforms/IPO/WholeProgramDevirt.h"
 #include "llvm/Transforms/Utils/FunctionImportUtils.h"
 #include "llvm/Transforms/Utils/SplitModule.h"
 
@@ -1274,15 +1275,28 @@ Error LTO::runThinLTO(AddStreamFn AddStr
   if (DumpThinCGSCCs)
     ThinLTO.CombinedIndex.dumpSCCs(outs());
 
+  std::set<GlobalValue::GUID> ExportedGUIDs;
+
+  // Perform index-based WPD. This will return immediately if there are
+  // no index entries in the typeIdMetadata map (e.g. if we are instead
+  // performing IR-based WPD in hybrid regular/thin LTO mode).
+  std::map<ValueInfo, std::vector<VTableSlotSummary>> LocalWPDTargetsMap;
+  runWholeProgramDevirtOnIndex(ThinLTO.CombinedIndex, ExportedGUIDs,
+                               LocalWPDTargetsMap);
+
   if (Conf.OptLevel > 0)
     ComputeCrossModuleImport(ThinLTO.CombinedIndex, ModuleToDefinedGVSummaries,
                              ImportLists, ExportLists);
 
+  // Update local devirtualized targets that were exported by cross-module
+  // importing
+  updateIndexWPDForExports(ThinLTO.CombinedIndex, ExportLists,
+                           LocalWPDTargetsMap);
+
   // Figure out which symbols need to be internalized. This also needs to happen
   // at -O0 because summary-based DCE is implemented using internalization, and
   // we must apply DCE consistently with the full LTO module in order to avoid
   // undefined references during the final link.
-  std::set<GlobalValue::GUID> ExportedGUIDs;
   for (auto &Res : GlobalResolutions) {
     // If the symbol does not have external references or it is not prevailing,
     // then not need to mark it as exported from a ThinLTO partition.

Modified: llvm/trunk/lib/Transforms/IPO/WholeProgramDevirt.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/IPO/WholeProgramDevirt.cpp?rev=367679&r1=367678&r2=367679&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/IPO/WholeProgramDevirt.cpp (original)
+++ llvm/trunk/lib/Transforms/IPO/WholeProgramDevirt.cpp Fri Aug  2 06:10:52 2019
@@ -24,12 +24,14 @@
 //   returns 0, or a single vtable's function returns 1, replace each virtual
 //   call with a comparison of the vptr against that vtable's address.
 //
-// This pass is intended to be used during the regular and thin LTO pipelines.
+// This pass is intended to be used during the regular and thin LTO pipelines:
+//
 // During regular LTO, the pass determines the best optimization for each
 // virtual call and applies the resolutions directly to virtual calls that are
 // eligible for virtual call optimization (i.e. calls that use either of the
-// llvm.assume(llvm.type.test) or llvm.type.checked.load intrinsics). During
-// ThinLTO, the pass operates in two phases:
+// llvm.assume(llvm.type.test) or llvm.type.checked.load intrinsics).
+//
+// During hybrid Regular/ThinLTO, the pass operates in two phases:
 // - Export phase: this is run during the thin link over a single merged module
 //   that contains all vtables with !type metadata that participate in the link.
 //   The pass computes a resolution for each virtual call and stores it in the
@@ -38,6 +40,14 @@
 //   modules. The pass applies the resolutions previously computed during the
 //   import phase to each eligible virtual call.
 //
+// During ThinLTO, the pass operates in two phases:
+// - Export phase: this is run during the thin link over the index which
+//   contains a summary of all vtables with !type metadata that participate in
+//   the link. It computes a resolution for each virtual call and stores it in
+//   the type identifier summary. Only single implementation devirtualization
+//   is supported.
+// - Import phase: (same as with hybrid case above).
+//
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Transforms/IPO/WholeProgramDevirt.h"
@@ -117,6 +127,11 @@ static cl::opt<unsigned>
                 cl::desc("Maximum number of call targets per "
                          "call site to enable branch funnels"));
 
+static cl::opt<bool>
+    PrintSummaryDevirt("wholeprogramdevirt-print-index-based", cl::Hidden,
+                       cl::init(false), cl::ZeroOrMore,
+                       cl::desc("Print index-based devirtualization messages"));
+
 // Find the minimum offset that we may store a value of size Size bits at. If
 // IsAfter is set, look for an offset before the object, otherwise look for an
 // offset after the object.
@@ -265,6 +280,25 @@ template <> struct DenseMapInfo<VTableSl
   }
 };
 
+template <> struct DenseMapInfo<VTableSlotSummary> {
+  static VTableSlotSummary getEmptyKey() {
+    return {DenseMapInfo<StringRef>::getEmptyKey(),
+            DenseMapInfo<uint64_t>::getEmptyKey()};
+  }
+  static VTableSlotSummary getTombstoneKey() {
+    return {DenseMapInfo<StringRef>::getTombstoneKey(),
+            DenseMapInfo<uint64_t>::getTombstoneKey()};
+  }
+  static unsigned getHashValue(const VTableSlotSummary &I) {
+    return DenseMapInfo<StringRef>::getHashValue(I.TypeID) ^
+           DenseMapInfo<uint64_t>::getHashValue(I.ByteOffset);
+  }
+  static bool isEqual(const VTableSlotSummary &LHS,
+                      const VTableSlotSummary &RHS) {
+    return LHS.TypeID == RHS.TypeID && LHS.ByteOffset == RHS.ByteOffset;
+  }
+};
+
 } // end namespace llvm
 
 namespace {
@@ -342,6 +376,7 @@ struct CallSiteInfo {
   /// pass the vector is non-empty, we will need to add a use of llvm.type.test
   /// to each of the function summaries in the vector.
   std::vector<FunctionSummary *> SummaryTypeCheckedLoadUsers;
+  std::vector<FunctionSummary *> SummaryTypeTestAssumeUsers;
 
   bool isExported() const {
     return SummaryHasTypeTestAssumeUsers ||
@@ -358,6 +393,11 @@ struct CallSiteInfo {
     AllCallSitesDevirted = false;
   }
 
+  void addSummaryTypeTestAssumeUser(FunctionSummary *FS) {
+    SummaryTypeTestAssumeUsers.push_back(FS);
+    markSummaryHasTypeTestAssumeUsers();
+  }
+
   void markDevirt() {
     AllCallSitesDevirted = true;
 
@@ -542,6 +582,38 @@ struct DevirtModule {
                 function_ref<DominatorTree &(Function &)> LookupDomTree);
 };
 
+struct DevirtIndex {
+  ModuleSummaryIndex &ExportSummary;
+  // The set in which to record GUIDs exported from their module by
+  // devirtualization, used by client to ensure they are not internalized.
+  std::set<GlobalValue::GUID> &ExportedGUIDs;
+  // A map in which to record the information necessary to locate the WPD
+  // resolution for local targets in case they are exported by cross module
+  // importing.
+  std::map<ValueInfo, std::vector<VTableSlotSummary>> &LocalWPDTargetsMap;
+
+  MapVector<VTableSlotSummary, VTableSlotInfo> CallSlots;
+
+  DevirtIndex(
+      ModuleSummaryIndex &ExportSummary,
+      std::set<GlobalValue::GUID> &ExportedGUIDs,
+      std::map<ValueInfo, std::vector<VTableSlotSummary>> &LocalWPDTargetsMap)
+      : ExportSummary(ExportSummary), ExportedGUIDs(ExportedGUIDs),
+        LocalWPDTargetsMap(LocalWPDTargetsMap) {}
+
+  bool tryFindVirtualCallTargets(std::vector<ValueInfo> &TargetsForSlot,
+                                 const TypeIdCompatibleVtableInfo TIdInfo,
+                                 uint64_t ByteOffset);
+
+  bool trySingleImplDevirt(MutableArrayRef<ValueInfo> TargetsForSlot,
+                           VTableSlotSummary &SlotSummary,
+                           VTableSlotInfo &SlotInfo,
+                           WholeProgramDevirtResolution *Res,
+                           std::set<ValueInfo> &DevirtTargets);
+
+  void run();
+};
+
 struct WholeProgramDevirt : public ModulePass {
   static char ID;
 
@@ -632,6 +704,43 @@ PreservedAnalyses WholeProgramDevirtPass
   return PreservedAnalyses::none();
 }
 
+namespace llvm {
+void runWholeProgramDevirtOnIndex(
+    ModuleSummaryIndex &Summary, std::set<GlobalValue::GUID> &ExportedGUIDs,
+    std::map<ValueInfo, std::vector<VTableSlotSummary>> &LocalWPDTargetsMap) {
+  DevirtIndex(Summary, ExportedGUIDs, LocalWPDTargetsMap).run();
+}
+
+void updateIndexWPDForExports(
+    ModuleSummaryIndex &Summary,
+    StringMap<FunctionImporter::ExportSetTy> &ExportLists,
+    std::map<ValueInfo, std::vector<VTableSlotSummary>> &LocalWPDTargetsMap) {
+  for (auto &T : LocalWPDTargetsMap) {
+    auto &VI = T.first;
+    // This was enforced earlier during trySingleImplDevirt.
+    assert(VI.getSummaryList().size() == 1 &&
+           "Devirt of local target has more than one copy");
+    auto &S = VI.getSummaryList()[0];
+    const auto &ExportList = ExportLists.find(S->modulePath());
+    if (ExportList == ExportLists.end() ||
+        !ExportList->second.count(VI.getGUID()))
+      continue;
+
+    // It's been exported by a cross module import.
+    for (auto &SlotSummary : T.second) {
+      auto *TIdSum = Summary.getTypeIdSummary(SlotSummary.TypeID);
+      assert(TIdSum);
+      auto WPDRes = TIdSum->WPDRes.find(SlotSummary.ByteOffset);
+      assert(WPDRes != TIdSum->WPDRes.end());
+      WPDRes->second.SingleImplName = ModuleSummaryIndex::getGlobalNameForLocal(
+          WPDRes->second.SingleImplName,
+          Summary.getModuleHash(S->modulePath()));
+    }
+  }
+}
+
+} // end namespace llvm
+
 bool DevirtModule::runForTesting(
     Module &M, function_ref<AAResults &(Function &)> AARGetter,
     function_ref<OptimizationRemarkEmitter &(Function *)> OREGetter,
@@ -766,6 +875,34 @@ bool DevirtModule::tryFindVirtualCallTar
   return !TargetsForSlot.empty();
 }
 
+bool DevirtIndex::tryFindVirtualCallTargets(
+    std::vector<ValueInfo> &TargetsForSlot, const TypeIdCompatibleVtableInfo TIdInfo,
+    uint64_t ByteOffset) {
+  for (const TypeIdOffsetVtableInfo P : TIdInfo) {
+    // VTable initializer should have only one summary, or all copies must be
+    // linkonce/weak ODR.
+    assert(P.VTableVI.getSummaryList().size() == 1 ||
+           llvm::all_of(
+               P.VTableVI.getSummaryList(),
+               [&](const std::unique_ptr<GlobalValueSummary> &Summary) {
+                 return GlobalValue::isLinkOnceODRLinkage(Summary->linkage()) ||
+                        GlobalValue::isWeakODRLinkage(Summary->linkage());
+               }));
+    const auto *VS = cast<GlobalVarSummary>(P.VTableVI.getSummaryList()[0].get());
+    if (!P.VTableVI.getSummaryList()[0]->isLive())
+      continue;
+    for (auto VTP : VS->vTableFuncs()) {
+      if (VTP.VTableOffset != P.AddressPointOffset + ByteOffset)
+        continue;
+
+      TargetsForSlot.push_back(VTP.FuncVI);
+    }
+  }
+
+  // Give up if we couldn't find any targets.
+  return !TargetsForSlot.empty();
+}
+
 void DevirtModule::applySingleImplDevirt(VTableSlotInfo &SlotInfo,
                                          Constant *TheFn, bool &IsExported) {
   auto Apply = [&](CallSiteInfo &CSInfo) {
@@ -837,6 +974,83 @@ bool DevirtModule::trySingleImplDevirt(
   return true;
 }
 
+bool DevirtIndex::trySingleImplDevirt(MutableArrayRef<ValueInfo> TargetsForSlot,
+                                      VTableSlotSummary &SlotSummary,
+                                      VTableSlotInfo &SlotInfo,
+                                      WholeProgramDevirtResolution *Res,
+                                      std::set<ValueInfo> &DevirtTargets) {
+  // See if the program contains a single implementation of this virtual
+  // function.
+  auto TheFn = TargetsForSlot[0];
+  for (auto &&Target : TargetsForSlot)
+    if (TheFn != Target)
+      return false;
+
+  // Don't devirtualize if we don't have target definition.
+  auto Size = TheFn.getSummaryList().size();
+  if (!Size)
+    return false;
+
+  // If the summary list contains multiple summaries where at least one is
+  // a local, give up, as we won't know which (possibly promoted) name to use.
+  for (auto &S : TheFn.getSummaryList())
+    if (GlobalValue::isLocalLinkage(S->linkage()) && Size > 1)
+      return false;
+
+  // Collect functions devirtualized at least for one call site for stats.
+  if (PrintSummaryDevirt)
+    DevirtTargets.insert(TheFn);
+
+  auto &S = TheFn.getSummaryList()[0];
+  bool IsExported = false;
+
+  // Insert calls into the summary index so that the devirtualized targets
+  // are eligible for import.
+  // FIXME: Annotate type tests with hotness. For now, mark these as hot
+  // to better ensure we have the opportunity to inline them.
+  CalleeInfo CI(CalleeInfo::HotnessType::Hot, /* RelBF = */ 0);
+  auto AddCalls = [&](CallSiteInfo &CSInfo) {
+    for (auto *FS : CSInfo.SummaryTypeCheckedLoadUsers) {
+      FS->addCall({TheFn, CI});
+      IsExported |= S->modulePath() != FS->modulePath();
+    }
+    for (auto *FS : CSInfo.SummaryTypeTestAssumeUsers) {
+      FS->addCall({TheFn, CI});
+      IsExported |= S->modulePath() != FS->modulePath();
+    }
+  };
+  AddCalls(SlotInfo.CSInfo);
+  for (auto &P : SlotInfo.ConstCSInfo)
+    AddCalls(P.second);
+
+  if (IsExported)
+    ExportedGUIDs.insert(TheFn.getGUID());
+
+  // Record in summary for use in devirtualization during the ThinLTO import
+  // step.
+  Res->TheKind = WholeProgramDevirtResolution::SingleImpl;
+  if (GlobalValue::isLocalLinkage(S->linkage())) {
+    if (IsExported)
+      // If target is a local function and we are exporting it by
+      // devirtualizing a call in another module, we need to record the
+      // promoted name.
+      Res->SingleImplName = ModuleSummaryIndex::getGlobalNameForLocal(
+          TheFn.name(), ExportSummary.getModuleHash(S->modulePath()));
+    else {
+      LocalWPDTargetsMap[TheFn].push_back(SlotSummary);
+      Res->SingleImplName = TheFn.name();
+    }
+  } else
+    Res->SingleImplName = TheFn.name();
+
+  // Name will be empty if this thin link driven off of serialized combined
+  // index (e.g. llvm-lto). However, WPD is not supported/invoked for the
+  // legacy LTO API anyway.
+  assert(!Res->SingleImplName.empty());
+
+  return true;
+}
+
 void DevirtModule::tryICallBranchFunnel(
     MutableArrayRef<VirtualCallTarget> TargetsForSlot, VTableSlotInfo &SlotInfo,
     WholeProgramDevirtResolution *Res, VTableSlot Slot) {
@@ -1486,8 +1700,11 @@ void DevirtModule::scanTypeCheckedLoadUs
 }
 
 void DevirtModule::importResolution(VTableSlot Slot, VTableSlotInfo &SlotInfo) {
+  auto *TypeId = dyn_cast<MDString>(Slot.TypeID);
+  if (!TypeId)
+    return;
   const TypeIdSummary *TidSummary =
-      ImportSummary->getTypeIdSummary(cast<MDString>(Slot.TypeID)->getString());
+      ImportSummary->getTypeIdSummary(TypeId->getString());
   if (!TidSummary)
     return;
   auto ResI = TidSummary->WPDRes.find(Slot.ByteOffset);
@@ -1496,6 +1713,7 @@ void DevirtModule::importResolution(VTab
   const WholeProgramDevirtResolution &Res = ResI->second;
 
   if (Res.TheKind == WholeProgramDevirtResolution::SingleImpl) {
+    assert(!Res.SingleImplName.empty());
     // The type of the function in the declaration is irrelevant because every
     // call site will cast it to the correct type.
     Constant *SingleImpl =
@@ -1713,7 +1931,7 @@ bool DevirtModule::run() {
       using namespace ore;
       OREGetter(F).emit(OptimizationRemark(DEBUG_TYPE, "Devirtualized", F)
                         << "devirtualized "
-                        << NV("FunctionName", F->getName()));
+                        << NV("FunctionName", DT.first));
     }
   }
 
@@ -1727,3 +1945,78 @@ bool DevirtModule::run() {
 
   return true;
 }
+
+void DevirtIndex::run() {
+  if (ExportSummary.typeIdCompatibleVtableMap().empty())
+    return;
+
+  DenseMap<GlobalValue::GUID, std::vector<StringRef>> NameByGUID;
+  for (auto &P : ExportSummary.typeIdCompatibleVtableMap()) {
+    NameByGUID[GlobalValue::getGUID(P.first)].push_back(P.first);
+  }
+
+  // Collect information from summary about which calls to try to devirtualize.
+  for (auto &P : ExportSummary) {
+    for (auto &S : P.second.SummaryList) {
+      auto *FS = dyn_cast<FunctionSummary>(S.get());
+      if (!FS)
+        continue;
+      // FIXME: Only add live functions.
+      for (FunctionSummary::VFuncId VF : FS->type_test_assume_vcalls()) {
+        for (StringRef Name : NameByGUID[VF.GUID]) {
+          CallSlots[{Name, VF.Offset}].CSInfo.addSummaryTypeTestAssumeUser(FS);
+        }
+      }
+      for (FunctionSummary::VFuncId VF : FS->type_checked_load_vcalls()) {
+        for (StringRef Name : NameByGUID[VF.GUID]) {
+          CallSlots[{Name, VF.Offset}].CSInfo.addSummaryTypeCheckedLoadUser(FS);
+        }
+      }
+      for (const FunctionSummary::ConstVCall &VC :
+           FS->type_test_assume_const_vcalls()) {
+        for (StringRef Name : NameByGUID[VC.VFunc.GUID]) {
+          CallSlots[{Name, VC.VFunc.Offset}]
+              .ConstCSInfo[VC.Args]
+              .addSummaryTypeTestAssumeUser(FS);
+        }
+      }
+      for (const FunctionSummary::ConstVCall &VC :
+           FS->type_checked_load_const_vcalls()) {
+        for (StringRef Name : NameByGUID[VC.VFunc.GUID]) {
+          CallSlots[{Name, VC.VFunc.Offset}]
+              .ConstCSInfo[VC.Args]
+              .addSummaryTypeCheckedLoadUser(FS);
+        }
+      }
+    }
+  }
+
+  std::set<ValueInfo> DevirtTargets;
+  // For each (type, offset) pair:
+  for (auto &S : CallSlots) {
+    // Search each of the members of the type identifier for the virtual
+    // function implementation at offset S.first.ByteOffset, and add to
+    // TargetsForSlot.
+    std::vector<ValueInfo> TargetsForSlot;
+    auto TidSummary = ExportSummary.getTypeIdCompatibleVtableSummary(S.first.TypeID);
+    assert(TidSummary);
+    if (tryFindVirtualCallTargets(TargetsForSlot, *TidSummary,
+                                  S.first.ByteOffset)) {
+      WholeProgramDevirtResolution *Res =
+          &ExportSummary.getOrInsertTypeIdSummary(S.first.TypeID)
+               .WPDRes[S.first.ByteOffset];
+
+      if (!trySingleImplDevirt(TargetsForSlot, S.first, S.second, Res,
+                               DevirtTargets))
+        continue;
+    }
+  }
+
+  // Optionally have the thin link print message for each devirtualized
+  // function.
+  if (PrintSummaryDevirt)
+    for (const auto &DT : DevirtTargets)
+      errs() << "Devirtualized call to " << DT << "\n";
+
+  return;
+}

Added: llvm/trunk/test/ThinLTO/X86/Inputs/devirt2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/ThinLTO/X86/Inputs/devirt2.ll?rev=367679&view=auto
==============================================================================
--- llvm/trunk/test/ThinLTO/X86/Inputs/devirt2.ll (added)
+++ llvm/trunk/test/ThinLTO/X86/Inputs/devirt2.ll Fri Aug  2 06:10:52 2019
@@ -0,0 +1,59 @@
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-grtev4-linux-gnu"
+
+%struct.A = type { i32 (...)** }
+%struct.B = type { %struct.A }
+%struct.C = type { %struct.A }
+%struct.D = type { i32 (...)** }
+%struct.E = type { i32 (...)** }
+
+ at _ZTV1B = constant { [4 x i8*] } { [4 x i8*] [i8* null, i8* undef, i8* bitcast (i32 (%struct.B*, i32)* @_ZN1B1fEi to i8*), i8* bitcast (i32 (%struct.A*, i32)* @_ZN1A1nEi to i8*)] }, !type !0, !type !1
+ at _ZTV1C = constant { [4 x i8*] } { [4 x i8*] [i8* null, i8* undef, i8* bitcast (i32 (%struct.C*, i32)* @_ZN1C1fEi to i8*), i8* bitcast (i32 (%struct.A*, i32)* @_ZN1A1nEi to i8*)] }, !type !0, !type !2
+ at _ZTV1D = linkonce_odr constant { [3 x i8*] } { [3 x i8*] [i8* null, i8* undef, i8* bitcast (i32 (%struct.D*, i32)* @_ZN1D1mEi to i8*)] }, !type !3
+ at _ZTV1E = constant { [3 x i8*] } { [3 x i8*] [i8* null, i8* undef, i8* bitcast (i32 (%struct.E*, i32)* @_ZN1E1mEi to i8*)] }, !type !4
+
+define i32 @_ZN1B1fEi(%struct.B* %this, i32 %a) #0 {
+   ret i32 0;
+}
+
+define internal i32 @_ZN1A1nEi(%struct.A* %this, i32 %a) #0 {
+   ret i32 0;
+}
+
+define i32 @_ZN1C1fEi(%struct.C* %this, i32 %a) #0 {
+   ret i32 0;
+}
+
+define linkonce_odr i32 @_ZN1D1mEi(%struct.D* %this, i32 %a) #0 {
+   ret i32 0;
+}
+
+define internal i32 @_ZN1E1mEi(%struct.E* %this, i32 %a) #0 {
+   ret i32 0;
+}
+
+define i32 @test2(%struct.E* %obj, i32 %a) {
+entry:
+  %0 = bitcast %struct.E* %obj to i8***
+  %vtable2 = load i8**, i8*** %0
+  %1 = bitcast i8** %vtable2 to i8*
+  %p2 = call i1 @llvm.type.test(i8* %1, metadata !"_ZTS1E")
+  call void @llvm.assume(i1 %p2)
+
+  %2 = bitcast i8** %vtable2 to i32 (%struct.E*, i32)**
+  %fptr33 = load i32 (%struct.E*, i32)*, i32 (%struct.E*, i32)** %2, align 8
+
+  %call4 = tail call i32 %fptr33(%struct.E* nonnull %obj, i32 %a)
+  ret i32 %call4
+}
+
+attributes #0 = { noinline optnone }
+
+declare i1 @llvm.type.test(i8*, metadata)
+declare void @llvm.assume(i1)
+
+!0 = !{i64 16, !"_ZTS1A"}
+!1 = !{i64 16, !"_ZTS1B"}
+!2 = !{i64 16, !"_ZTS1C"}
+!3 = !{i64 16, !"_ZTS1D"}
+!4 = !{i64 16, !"_ZTS1E"}

Modified: llvm/trunk/test/ThinLTO/X86/devirt.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/ThinLTO/X86/devirt.ll?rev=367679&r1=367678&r2=367679&view=diff
==============================================================================
--- llvm/trunk/test/ThinLTO/X86/devirt.ll (original)
+++ llvm/trunk/test/ThinLTO/X86/devirt.ll Fri Aug  2 06:10:52 2019
@@ -20,10 +20,10 @@
 ; and that we generate summary information needed for index-based WPD.
 ; RUN: llvm-dis -o - %t2.o | FileCheck %s --check-prefix=NOENABLESPLITFLAG
 ; NOENABLESPLITFLAG-DAG: !{i32 1, !"EnableSplitLTOUnit", i32 0}
-; NOENABLESPLITFLAG-DAG: [[An:\^[0-9]+]] = gv: (name: "_ZN1A1nEi")
-; NOENABLESPLITFLAG-DAG: [[Bf:\^[0-9]+]] = gv: (name: "_ZN1B1fEi")
-; NOENABLESPLITFLAG-DAG: [[Cf:\^[0-9]+]] = gv: (name: "_ZN1C1fEi")
-; NOENABLESPLITFLAG-DAG: [[Dm:\^[0-9]+]] = gv: (name: "_ZN1D1mEi")
+; NOENABLESPLITFLAG-DAG: [[An:\^[0-9]+]] = gv: (name: "_ZN1A1nEi"
+; NOENABLESPLITFLAG-DAG: [[Bf:\^[0-9]+]] = gv: (name: "_ZN1B1fEi"
+; NOENABLESPLITFLAG-DAG: [[Cf:\^[0-9]+]] = gv: (name: "_ZN1C1fEi"
+; NOENABLESPLITFLAG-DAG: [[Dm:\^[0-9]+]] = gv: (name: "_ZN1D1mEi"
 ; NOENABLESPLITFLAG-DAG: [[B:\^[0-9]+]] = gv: (name: "_ZTV1B", {{.*}} vTableFuncs: ((virtFunc: [[Bf]], offset: 16), (virtFunc: [[An]], offset: 24)), refs: ([[Bf]], [[An]])
 ; NOENABLESPLITFLAG-DAG: [[C:\^[0-9]+]] = gv: (name: "_ZTV1C", {{.*}} vTableFuncs: ((virtFunc: [[Cf]], offset: 16), (virtFunc: [[An]], offset: 24)), refs: ([[An]], [[Cf]])
 ; NOENABLESPLITFLAG-DAG: [[D:\^[0-9]+]] = gv: (name: "_ZTV1D", {{.*}} vTableFuncs: ((virtFunc: [[Dm]], offset: 16)), refs: ([[Dm]])
@@ -33,7 +33,31 @@
 ; Type Id on _ZTV1D should have been promoted
 ; NOENABLESPLITFLAG-DAG: typeidCompatibleVTable: (name: "1${{.*}}", summary: ((offset: 16, [[D]])))
 
-; TODO: Test index-based WPD one %t2.o once implemented.
+; Legacy PM, Index based WPD
+; RUN: llvm-lto2 run %t2.o -save-temps -pass-remarks=. \
+; RUN:   -o %t3 \
+; RUN:   -r=%t2.o,test,px \
+; RUN:   -r=%t2.o,_ZN1A1nEi,p \
+; RUN:   -r=%t2.o,_ZN1B1fEi,p \
+; RUN:   -r=%t2.o,_ZN1C1fEi,p \
+; RUN:   -r=%t2.o,_ZN1D1mEi,p \
+; RUN:   -r=%t2.o,_ZTV1B,px \
+; RUN:   -r=%t2.o,_ZTV1C,px \
+; RUN:   -r=%t2.o,_ZTV1D,px 2>&1 | FileCheck %s --check-prefix=REMARK
+; RUN: llvm-dis %t3.1.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-IR
+
+; New PM, Index based WPD
+; RUN: llvm-lto2 run %t2.o -save-temps -use-new-pm -pass-remarks=. \
+; RUN:   -o %t3 \
+; RUN:   -r=%t2.o,test,px \
+; RUN:   -r=%t2.o,_ZN1A1nEi,p \
+; RUN:   -r=%t2.o,_ZN1B1fEi,p \
+; RUN:   -r=%t2.o,_ZN1C1fEi,p \
+; RUN:   -r=%t2.o,_ZN1D1mEi,p \
+; RUN:   -r=%t2.o,_ZTV1B,px \
+; RUN:   -r=%t2.o,_ZTV1C,px \
+; RUN:   -r=%t2.o,_ZTV1D,px 2>&1 | FileCheck %s --check-prefix=REMARK
+; RUN: llvm-dis %t3.1.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-IR
 
 ; Legacy PM
 ; FIXME: Fix machine verifier issues and remove -verify-machineinstrs=0. PR39436.
@@ -138,10 +162,24 @@ entry:
 declare i1 @llvm.type.test(i8*, metadata)
 declare void @llvm.assume(i1)
 
-declare i32 @_ZN1B1fEi(%struct.B* %this, i32 %a)
-declare i32 @_ZN1A1nEi(%struct.A* %this, i32 %a)
-declare i32 @_ZN1C1fEi(%struct.C* %this, i32 %a)
-declare i32 @_ZN1D1mEi(%struct.D* %this, i32 %a)
+define i32 @_ZN1B1fEi(%struct.B* %this, i32 %a) #0 {
+   ret i32 0;
+}
+
+define i32 @_ZN1A1nEi(%struct.A* %this, i32 %a) #0 {
+   ret i32 0;
+}
+
+define i32 @_ZN1C1fEi(%struct.C* %this, i32 %a) #0 {
+   ret i32 0;
+}
+
+define i32 @_ZN1D1mEi(%struct.D* %this, i32 %a) #0 {
+   ret i32 0;
+}
+
+; Make sure we don't inline or otherwise optimize out the direct calls.
+attributes #0 = { noinline optnone }
 
 !0 = !{i64 16, !"_ZTS1A"}
 !1 = !{i64 16, !"_ZTS1B"}

Added: llvm/trunk/test/ThinLTO/X86/devirt2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/ThinLTO/X86/devirt2.ll?rev=367679&view=auto
==============================================================================
--- llvm/trunk/test/ThinLTO/X86/devirt2.ll (added)
+++ llvm/trunk/test/ThinLTO/X86/devirt2.ll Fri Aug  2 06:10:52 2019
@@ -0,0 +1,278 @@
+; REQUIRES: x86-registered-target
+
+; Test devirtualization requiring promotion of local targets.
+
+; Generate split module with summary for hybrid Thin/Regular LTO WPD.
+; RUN: opt -thinlto-bc -thinlto-split-lto-unit -o %t1.o %s
+; RUN: opt -thinlto-bc -thinlto-split-lto-unit -o %t2.o %p/Inputs/devirt3.ll
+
+; Check that we have module flag showing splitting enabled, and that we don't
+; generate summary information needed for index-based WPD.
+; RUN: llvm-modextract -b -n=0 %t2.o -o %t2.o.0
+; RUN: llvm-dis -o - %t2.o.0 | FileCheck %s --check-prefix=ENABLESPLITFLAG --implicit-check-not=vTableFuncs --implicit-check-not=typeidCompatibleVTable
+; RUN: llvm-modextract -b -n=1 %t2.o -o %t2.o.1
+; RUN: llvm-dis -o - %t2.o.1 | FileCheck %s --check-prefix=ENABLESPLITFLAG --implicit-check-not=vTableFuncs --implicit-check-not=typeidCompatibleVTable
+; ENABLESPLITFLAG: !{i32 1, !"EnableSplitLTOUnit", i32 1}
+
+; Generate unsplit module with summary for ThinLTO index-based WPD.
+; RUN: opt -thinlto-bc -o %t3.o %s
+; RUN: opt -thinlto-bc -o %t4.o %p/Inputs/devirt3.ll
+
+; Check that we don't have module flag when splitting not enabled for ThinLTO,
+; and that we generate summary information needed for index-based WPD.
+; RUN: llvm-dis -o - %t4.o | FileCheck %s --check-prefix=NOENABLESPLITFLAG
+; NOENABLESPLITFLAG-DAG: !{i32 1, !"EnableSplitLTOUnit", i32 0}
+; NOENABLESPLITFLAG-DAG: [[An:\^[0-9]+]] = gv: (name: "_ZN1A1nEi"
+; NOENABLESPLITFLAG-DAG: [[Bf:\^[0-9]+]] = gv: (name: "_ZN1B1fEi"
+; NOENABLESPLITFLAG-DAG: [[Cf:\^[0-9]+]] = gv: (name: "_ZN1C1fEi"
+; NOENABLESPLITFLAG-DAG: [[Dm:\^[0-9]+]] = gv: (name: "_ZN1D1mEi"
+; NOENABLESPLITFLAG-DAG: [[B:\^[0-9]+]] = gv: (name: "_ZTV1B", {{.*}} vTableFuncs: ((virtFunc: [[Bf]], offset: 16), (virtFunc: [[An]], offset: 24)), refs: ([[Bf]], [[An]])
+; NOENABLESPLITFLAG-DAG: [[C:\^[0-9]+]] = gv: (name: "_ZTV1C", {{.*}} vTableFuncs: ((virtFunc: [[Cf]], offset: 16), (virtFunc: [[An]], offset: 24)), refs: ([[An]], [[Cf]])
+; NOENABLESPLITFLAG-DAG: [[D:\^[0-9]+]] = gv: (name: "_ZTV1D", {{.*}} vTableFuncs: ((virtFunc: [[Dm]], offset: 16)), refs: ([[Dm]])
+; NOENABLESPLITFLAG-DAG: typeidCompatibleVTable: (name: "_ZTS1A", summary: ((offset: 16, [[B]]), (offset: 16, [[C]])))
+; NOENABLESPLITFLAG-DAG: typeidCompatibleVTable: (name: "_ZTS1B", summary: ((offset: 16, [[B]])))
+; NOENABLESPLITFLAG-DAG: typeidCompatibleVTable: (name: "_ZTS1C", summary: ((offset: 16, [[C]])))
+; NOENABLESPLITFLAG-DAG: typeidCompatibleVTable: (name: "_ZTS1D", summary: ((offset: 16, [[D]])))
+
+; Legacy PM, Index based WPD
+; RUN: llvm-lto2 run %t3.o %t4.o -save-temps -pass-remarks=. \
+; RUN:   -wholeprogramdevirt-print-index-based \
+; RUN:   -o %t5 \
+; RUN:   -r=%t3.o,test,px \
+; RUN:   -r=%t3.o,_ZTV1B, \
+; RUN:   -r=%t3.o,_ZTV1C, \
+; RUN:   -r=%t3.o,_ZTV1D, \
+; RUN:   -r=%t3.o,_ZN1D1mEi, \
+; RUN:   -r=%t3.o,test2, \
+; RUN:   -r=%t4.o,_ZN1B1fEi,p \
+; RUN:   -r=%t4.o,_ZN1C1fEi,p \
+; RUN:   -r=%t4.o,_ZN1D1mEi,p \
+; RUN:   -r=%t4.o,test2,px \
+; RUN:   -r=%t4.o,_ZTV1B,px \
+; RUN:   -r=%t4.o,_ZTV1C,px \
+; RUN:   -r=%t4.o,_ZTV1D,px \
+; RUN:   -r=%t4.o,_ZTV1E,px 2>&1 | FileCheck %s --check-prefix=REMARK --check-prefix=PRINT
+; RUN: llvm-dis %t5.1.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-IR1
+; RUN: llvm-dis %t5.2.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-IR2
+; RUN: nm %t5.1 | FileCheck %s --check-prefix=NM-INDEX1
+; RUN: nm %t5.2 | FileCheck %s --check-prefix=NM-INDEX2
+
+; New PM, Index based WPD
+; RUN: llvm-lto2 run %t3.o %t4.o -save-temps -use-new-pm -pass-remarks=. \
+; RUN:   -wholeprogramdevirt-print-index-based \
+; RUN:   -o %t5 \
+; RUN:   -r=%t3.o,test,px \
+; RUN:   -r=%t3.o,_ZTV1B, \
+; RUN:   -r=%t3.o,_ZTV1C, \
+; RUN:   -r=%t3.o,_ZTV1D, \
+; RUN:   -r=%t3.o,_ZN1D1mEi, \
+; RUN:   -r=%t3.o,test2, \
+; RUN:   -r=%t4.o,_ZN1B1fEi,p \
+; RUN:   -r=%t4.o,_ZN1C1fEi,p \
+; RUN:   -r=%t4.o,_ZN1D1mEi,p \
+; RUN:   -r=%t4.o,test2,px \
+; RUN:   -r=%t4.o,_ZTV1B,px \
+; RUN:   -r=%t4.o,_ZTV1C,px \
+; RUN:   -r=%t4.o,_ZTV1D,px \
+; RUN:   -r=%t4.o,_ZTV1E,px 2>&1 | FileCheck %s --check-prefix=REMARK --check-prefix=PRINT
+; RUN: llvm-dis %t5.1.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-IR1
+; RUN: llvm-dis %t5.2.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-IR2
+; RUN: nm %t5.1 | FileCheck %s --check-prefix=NM-INDEX1
+; RUN: nm %t5.2 | FileCheck %s --check-prefix=NM-INDEX2
+
+; NM-INDEX1-DAG: U _ZN1A1nEi.llvm.
+; NM-INDEX1-DAG: U _ZN1E1mEi.llvm.
+; NM-INDEX1-DAG: U _ZN1D1mEi
+
+; NM-INDEX2-DAG: T _ZN1A1nEi.llvm.
+; NM-INDEX2-DAG: T _ZN1E1mEi.llvm.
+; NM-INDEX2-DAG: W _ZN1D1mEi
+; NM-INDEX2-DAG: t _ZN1B1fEi
+; NM-INDEX2-DAG: t _ZN1C1fEi
+
+; Index based WPD, distributed backends
+; RUN: llvm-lto2 run %t3.o %t4.o -save-temps -use-new-pm \
+; RUN:   -thinlto-distributed-indexes -wholeprogramdevirt-print-index-based \
+; RUN:   -o %t5 \
+; RUN:   -r=%t3.o,test,px \
+; RUN:   -r=%t3.o,_ZTV1B, \
+; RUN:   -r=%t3.o,_ZTV1C, \
+; RUN:   -r=%t3.o,_ZTV1D, \
+; RUN:   -r=%t3.o,_ZN1D1mEi, \
+; RUN:   -r=%t3.o,test2, \
+; RUN:   -r=%t4.o,_ZN1B1fEi,p \
+; RUN:   -r=%t4.o,_ZN1C1fEi,p \
+; RUN:   -r=%t4.o,_ZN1D1mEi,p \
+; RUN:   -r=%t4.o,test2,px \
+; RUN:   -r=%t4.o,_ZTV1B,px \
+; RUN:   -r=%t4.o,_ZTV1C,px \
+; RUN:   -r=%t4.o,_ZTV1D,px \
+; RUN:   -r=%t4.o,_ZTV1E,px 2>&1 | FileCheck %s --check-prefix=PRINT
+
+; PRINT-DAG: Devirtualized call to {{.*}} (_ZN1A1nEi)
+; PRINT-DAG: Devirtualized call to {{.*}} (_ZN1E1mEi)
+; PRINT-DAG: Devirtualized call to {{.*}} (_ZN1D1mEi)
+
+; Legacy PM
+; RUN: llvm-lto2 run %t1.o %t2.o -save-temps -pass-remarks=. \
+; RUN:   -o %t5 \
+; RUN:   -r=%t1.o,test,px \
+; RUN:   -r=%t1.o,_ZTV1B, \
+; RUN:   -r=%t1.o,_ZTV1C, \
+; RUN:   -r=%t1.o,_ZTV1D, \
+; RUN:   -r=%t1.o,_ZTV1D, \
+; RUN:   -r=%t1.o,_ZN1D1mEi, \
+; RUN:   -r=%t1.o,_ZN1D1mEi, \
+; RUN:   -r=%t1.o,test2, \
+; RUN:   -r=%t2.o,_ZN1A1nEi,p \
+; RUN:   -r=%t2.o,_ZN1B1fEi,p \
+; RUN:   -r=%t2.o,_ZN1C1fEi,p \
+; RUN:   -r=%t2.o,_ZN1D1mEi,p \
+; RUN:   -r=%t2.o,_ZN1E1mEi,p \
+; RUN:   -r=%t2.o,_ZTV1B, \
+; RUN:   -r=%t2.o,_ZTV1C, \
+; RUN:   -r=%t2.o,_ZTV1D, \
+; RUN:   -r=%t2.o,_ZTV1E, \
+; RUN:   -r=%t2.o,test2,px \
+; RUN:   -r=%t2.o,_ZN1A1nEi, \
+; RUN:   -r=%t2.o,_ZN1B1fEi, \
+; RUN:   -r=%t2.o,_ZN1C1fEi, \
+; RUN:   -r=%t2.o,_ZN1D1mEi, \
+; RUN:   -r=%t2.o,_ZN1E1mEi, \
+; RUN:   -r=%t2.o,_ZTV1B,px \
+; RUN:   -r=%t2.o,_ZTV1C,px \
+; RUN:   -r=%t2.o,_ZTV1D,px \
+; RUN:   -r=%t2.o,_ZTV1E,px 2>&1 | FileCheck %s --check-prefix=REMARK
+; RUN: llvm-dis %t5.1.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-IR1
+; RUN: llvm-dis %t5.2.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-IR2
+; RUN: nm %t5.1 | FileCheck %s --check-prefix=NM-HYBRID1
+; RUN: nm %t5.2 | FileCheck %s --check-prefix=NM-HYBRID2
+
+; New PM
+; RUN: llvm-lto2 run %t1.o %t2.o -save-temps -use-new-pm -pass-remarks=. \
+; RUN:   -o %t5 \
+; RUN:   -r=%t1.o,test,px \
+; RUN:   -r=%t1.o,_ZTV1B, \
+; RUN:   -r=%t1.o,_ZTV1C, \
+; RUN:   -r=%t1.o,_ZTV1D, \
+; RUN:   -r=%t1.o,_ZTV1D, \
+; RUN:   -r=%t1.o,_ZN1D1mEi, \
+; RUN:   -r=%t1.o,_ZN1D1mEi, \
+; RUN:   -r=%t1.o,test2, \
+; RUN:   -r=%t2.o,_ZN1A1nEi,p \
+; RUN:   -r=%t2.o,_ZN1B1fEi,p \
+; RUN:   -r=%t2.o,_ZN1C1fEi,p \
+; RUN:   -r=%t2.o,_ZN1D1mEi,p \
+; RUN:   -r=%t2.o,_ZN1E1mEi,p \
+; RUN:   -r=%t2.o,_ZTV1B, \
+; RUN:   -r=%t2.o,_ZTV1C, \
+; RUN:   -r=%t2.o,_ZTV1D, \
+; RUN:   -r=%t2.o,_ZTV1E, \
+; RUN:   -r=%t2.o,test2,px \
+; RUN:   -r=%t2.o,_ZN1A1nEi, \
+; RUN:   -r=%t2.o,_ZN1B1fEi, \
+; RUN:   -r=%t2.o,_ZN1C1fEi, \
+; RUN:   -r=%t2.o,_ZN1D1mEi, \
+; RUN:   -r=%t2.o,_ZN1E1mEi, \
+; RUN:   -r=%t2.o,_ZTV1B,px \
+; RUN:   -r=%t2.o,_ZTV1C,px \
+; RUN:   -r=%t2.o,_ZTV1D,px \
+; RUN:   -r=%t2.o,_ZTV1E,px 2>&1 | FileCheck %s --check-prefix=REMARK
+; RUN: llvm-dis %t5.1.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-IR1
+; RUN: llvm-dis %t5.2.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-IR2
+; RUN: nm %t5.1 | FileCheck %s --check-prefix=NM-HYBRID1
+; RUN: nm %t5.2 | FileCheck %s --check-prefix=NM-HYBRID2
+
+; NM-HYBRID1-DAG: U _ZN1A1nEi$
+; NM-HYBRID1-DAG: U _ZN1E1mEi$
+; NM-HYBRID1-DAG: U _ZN1D1mEi
+
+; NM-HYBRID2-DAG: T _ZN1A1nEi$
+; NM-HYBRID2-DAG: T _ZN1E1mEi$
+; NM-HYBRID2-DAG: W _ZN1D1mEi
+; NM-HYBRID2-DAG: T _ZN1B1fEi
+; NM-HYBRID2-DAG: T _ZN1C1fEi
+
+; REMARK-DAG: single-impl: devirtualized a call to _ZN1A1nEi
+; REMARK-DAG: single-impl: devirtualized a call to _ZN1D1mEi
+; We should devirt call to _ZN1E1mEi once in importing module and once
+; in original (exporting) module.
+; REMARK-DAG: single-impl: devirtualized a call to _ZN1E1mEi
+; REMARK-DAG: single-impl: devirtualized a call to _ZN1E1mEi
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-grtev4-linux-gnu"
+
+%struct.A = type { i32 (...)** }
+%struct.B = type { %struct.A }
+%struct.C = type { %struct.A }
+%struct.D = type { i32 (...)** }
+%struct.E = type { i32 (...)** }
+
+ at _ZTV1B = external constant [4 x i8*]
+ at _ZTV1C = external constant [4 x i8*]
+;@_ZTV1D = external constant [3 x i8*]
+ at _ZTV1D = linkonce_odr constant { [3 x i8*] } { [3 x i8*] [i8* null, i8* undef, i8* bitcast (i32 (%struct.D*, i32)* @_ZN1D1mEi to i8*)] }, !type !3
+
+define linkonce_odr i32 @_ZN1D1mEi(%struct.D* %this, i32 %a) #0 {
+   ret i32 0
+}
+
+; CHECK-IR1-LABEL: define i32 @test
+define i32 @test(%struct.A* %obj, %struct.D* %obj2, %struct.E* %obj3, i32 %a) {
+entry:
+  %0 = bitcast %struct.A* %obj to i8***
+  %vtable = load i8**, i8*** %0
+  %1 = bitcast i8** %vtable to i8*
+  %p = call i1 @llvm.type.test(i8* %1, metadata !"_ZTS1A")
+  call void @llvm.assume(i1 %p)
+  %fptrptr = getelementptr i8*, i8** %vtable, i32 1
+  %2 = bitcast i8** %fptrptr to i32 (%struct.A*, i32)**
+  %fptr1 = load i32 (%struct.A*, i32)*, i32 (%struct.A*, i32)** %2, align 8
+
+  ; Check that the call was devirtualized. Ignore extra character before
+  ; symbol name which would happen if it was promoted during module
+  ; splitting for hybrid WPD.
+  ; CHECK-IR1: %call = tail call i32 bitcast (void ()* @{{.*}}_ZN1A1nEi
+  %call = tail call i32 %fptr1(%struct.A* nonnull %obj, i32 %a)
+
+  %3 = bitcast i8** %vtable to i32 (%struct.A*, i32)**
+  %fptr22 = load i32 (%struct.A*, i32)*, i32 (%struct.A*, i32)** %3, align 8
+
+  ; We still have to call it as virtual.
+  ; CHECK-IR1: %call3 = tail call i32 %fptr22
+  %call3 = tail call i32 %fptr22(%struct.A* nonnull %obj, i32 %call)
+
+  %4 = bitcast %struct.D* %obj2 to i8***
+  %vtable2 = load i8**, i8*** %4
+  %5 = bitcast i8** %vtable2 to i8*
+  %p2 = call i1 @llvm.type.test(i8* %5, metadata !"_ZTS1D")
+  call void @llvm.assume(i1 %p2)
+
+  %6 = bitcast i8** %vtable2 to i32 (%struct.D*, i32)**
+  %fptr33 = load i32 (%struct.D*, i32)*, i32 (%struct.D*, i32)** %6, align 8
+
+  ; Check that the call was devirtualized.
+  ; CHECK-IR1: %call4 = tail call i32 @_ZN1D1mEi
+  %call4 = tail call i32 %fptr33(%struct.D* nonnull %obj2, i32 %call3)
+
+  %call5 = tail call i32 @test2(%struct.E* nonnull %obj3, i32 %call4)
+  ret i32 %call5
+}
+; CHECK-IR1-LABEL: ret i32
+; CHECK-IR1-LABEL: }
+
+; CHECK-IR2: define i32 @test2
+; CHECK-IR2-NEXT: entry:
+; Check that the call was devirtualized. Ignore extra character before
+; symbol name which would happen if it was promoted during module
+; splitting for hybrid WPD.
+; CHECK-IR2-NEXT:   %call4 = tail call i32 @{{.*}}_ZN1E1mEi
+
+declare i1 @llvm.type.test(i8*, metadata)
+declare void @llvm.assume(i1)
+declare i32 @test2(%struct.E* %obj, i32 %a)
+
+attributes #0 = { noinline optnone }
+
+!3 = !{i64 16, !"_ZTS1D"}

Added: llvm/trunk/test/ThinLTO/X86/nodevirt-nonpromoted-typeid.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/ThinLTO/X86/nodevirt-nonpromoted-typeid.ll?rev=367679&view=auto
==============================================================================
--- llvm/trunk/test/ThinLTO/X86/nodevirt-nonpromoted-typeid.ll (added)
+++ llvm/trunk/test/ThinLTO/X86/nodevirt-nonpromoted-typeid.ll Fri Aug  2 06:10:52 2019
@@ -0,0 +1,66 @@
+; REQUIRES: x86-registered-target
+
+; Test that index-only devirtualization handles and ignores any
+; type metadata that could not be summarized (because it was internal
+; and could not be promoted due to the fact that the module has
+; no external symbols and therefore could not be assigned a unique
+; identifier). In this case we should simply not get the type
+; metadata summary entries, and no promotion will occur.
+
+; Generate unsplit module with summary for ThinLTO index-based WPD.
+; RUN: opt -thinlto-bc -thinlto-split-lto-unit=false -o %t2.o %s
+
+; Check that we don't have module flag when splitting not enabled for ThinLTO,
+; and that we generate summary information needed for index-based WPD.
+; RUN: llvm-dis -o - %t2.o | FileCheck %s --check-prefix=DIS
+; DIS-NOT: typeIdInfo
+; DIS-NOT: typeidMetadata
+
+; Legacy PM, Index based WPD
+; RUN: llvm-lto2 run %t2.o -save-temps -pass-remarks=. \
+; RUN:   -o %t3 \
+; RUN:   -r=%t2.o,test,plx \
+; RUN:   -r=%t2.o,_ZN1D1mEi,
+; RUN: llvm-dis %t3.1.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-IR
+
+; New PM, Index based WPD
+; RUN: llvm-lto2 run %t2.o -save-temps -use-new-pm -pass-remarks=. \
+; RUN:   -o %t3 \
+; RUN:   -r=%t2.o,test,plx \
+; RUN:   -r=%t2.o,_ZN1D1mEi,
+; RUN: llvm-dis %t3.1.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-IR
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-grtev4-linux-gnu"
+
+%struct.D = type { i32 (...)** }
+
+ at _ZTV1D = internal constant { [3 x i8*] } { [3 x i8*] [i8* null, i8* undef, i8* bitcast (i32 (%struct.D*, i32)* @_ZN1D1mEi to i8*)] }, !type !3
+
+; CHECK-IR-LABEL: define weak_odr dso_local i32 @test
+define weak_odr i32 @test(%struct.D* %obj2, i32 %a) {
+entry:
+  %0 = bitcast %struct.D* %obj2 to i8***
+  %vtable2 = load i8**, i8*** %0
+  %1 = bitcast i8** %vtable2 to i8*
+  %p2 = call i1 @llvm.type.test(i8* %1, metadata !4)
+  call void @llvm.assume(i1 %p2)
+
+  %2 = bitcast i8** %vtable2 to i32 (%struct.D*, i32)**
+  %fptr33 = load i32 (%struct.D*, i32)*, i32 (%struct.D*, i32)** %2, align 8
+
+  ; Check that the call was not devirtualized.
+  ; CHECK-IR: %call4 = tail call i32 %fptr33
+  %call4 = tail call i32 %fptr33(%struct.D* nonnull %obj2, i32 0)
+  ret i32 %call4
+}
+; CHECK-IR-LABEL: ret i32
+; CHECK-IR-LABEL: }
+
+declare i1 @llvm.type.test(i8*, metadata)
+declare void @llvm.assume(i1)
+
+declare i32 @_ZN1D1mEi(%struct.D* %this, i32 %a)
+
+!3 = !{i64 16, !4}
+!4 = distinct !{}

Modified: llvm/trunk/tools/llvm-lto2/llvm-lto2.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/llvm-lto2/llvm-lto2.cpp?rev=367679&r1=367678&r2=367679&view=diff
==============================================================================
--- llvm/trunk/tools/llvm-lto2/llvm-lto2.cpp (original)
+++ llvm/trunk/tools/llvm-lto2/llvm-lto2.cpp Fri Aug  2 06:10:52 2019
@@ -291,6 +291,14 @@ static int run(int argc, char **argv) {
     std::vector<SymbolResolution> Res;
     for (const InputFile::Symbol &Sym : Input->symbols()) {
       auto I = CommandLineResolutions.find({F, Sym.getName()});
+      // If it isn't found, look for "$", which would have been added
+      // (followed by a hash) when the symbol was promoted during module
+      // splitting if it was defined in one part and used in the other.
+      // Try looking up the symbol name before the "$".
+      if (I == CommandLineResolutions.end()) {
+        auto SplitName = Sym.getName().rsplit("$");
+        I = CommandLineResolutions.find({F, SplitName.first});
+      }
       if (I == CommandLineResolutions.end()) {
         llvm::errs() << argv[0] << ": missing symbol resolution for " << F
                      << ',' << Sym.getName() << '\n';




More information about the llvm-commits mailing list