[llvm-branch-commits] [llvm] [nfc][ThinLTO] Generate import status in per-module combined summary (PR #88024)

Mingming Liu via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Thu May 2 16:16:36 PDT 2024


https://github.com/minglotus-6 updated https://github.com/llvm/llvm-project/pull/88024

>From cfb63d775d43a28b560d938346f1dd4b2dddc765 Mon Sep 17 00:00:00 2001
From: mingmingl <mingmingl at google.com>
Date: Thu, 4 Apr 2024 11:54:17 -0700
Subject: [PATCH 1/5] function import changes

---
 llvm/include/llvm/IR/ModuleSummaryIndex.h     |  24 ++++
 .../llvm/Transforms/IPO/FunctionImport.h      |  18 ++-
 llvm/lib/LTO/LTO.cpp                          |  13 +-
 llvm/lib/LTO/LTOBackend.cpp                   |   5 +-
 llvm/lib/LTO/ThinLTOCodeGenerator.cpp         |   9 +-
 llvm/lib/Transforms/IPO/FunctionImport.cpp    | 130 ++++++++++++------
 llvm/tools/llvm-link/llvm-link.cpp            |   2 +-
 7 files changed, 146 insertions(+), 55 deletions(-)

diff --git a/llvm/include/llvm/IR/ModuleSummaryIndex.h b/llvm/include/llvm/IR/ModuleSummaryIndex.h
index 286b51bda0e2c1..259fe56ce5f63e 100644
--- a/llvm/include/llvm/IR/ModuleSummaryIndex.h
+++ b/llvm/include/llvm/IR/ModuleSummaryIndex.h
@@ -296,6 +296,30 @@ template <> struct DenseMapInfo<ValueInfo> {
   static unsigned getHashValue(ValueInfo I) { return (uintptr_t)I.getRef(); }
 };
 
+struct SummaryImportInfo {
+  enum class ImportType : uint8_t {
+    NotImported = 0,
+    Declaration = 1,
+    Definition = 2,
+  };
+  unsigned Type : 3;
+  SummaryImportInfo() : Type(static_cast<unsigned>(ImportType::NotImported)) {}
+  SummaryImportInfo(ImportType Type) : Type(static_cast<unsigned>(Type)) {}
+
+  // FIXME: delete the first two set* helper function.
+  void updateType(ImportType InputType) {
+    Type = std::max(Type, static_cast<unsigned>(InputType));
+  }
+
+  bool isDefinition() const {
+    return static_cast<ImportType>(Type) == ImportType::Definition;
+  }
+
+  bool isDeclaration() const {
+    return static_cast<ImportType>(Type) == ImportType::Declaration;
+  }
+};
+
 /// Summary of memprof callsite metadata.
 struct CallsiteInfo {
   // Actual callee function.
diff --git a/llvm/include/llvm/Transforms/IPO/FunctionImport.h b/llvm/include/llvm/Transforms/IPO/FunctionImport.h
index c4d19e8641eca2..9adc0c31eed439 100644
--- a/llvm/include/llvm/Transforms/IPO/FunctionImport.h
+++ b/llvm/include/llvm/Transforms/IPO/FunctionImport.h
@@ -33,7 +33,14 @@ class FunctionImporter {
 public:
   /// Set of functions to import from a source module. Each entry is a set
   /// containing all the GUIDs of all functions to import for a source module.
-  using FunctionsToImportTy = std::unordered_set<GlobalValue::GUID>;
+  using FunctionsToImportTy = DenseMap<GlobalValue::GUID, SummaryImportInfo>;
+
+  // FIXME: Remove this.
+  enum ImportStatus {
+    NotImported,
+    ImportDeclaration,
+    ImportDefinition,
+  };
 
   /// The different reasons selectCallee will chose not to import a
   /// candidate.
@@ -99,8 +106,10 @@ class FunctionImporter {
   /// index's module path string table).
   using ImportMapTy = DenseMap<StringRef, FunctionsToImportTy>;
 
-  /// The set contains an entry for every global value the module exports.
-  using ExportSetTy = DenseSet<ValueInfo>;
+  /// The map contains an entry for every global value the module exports, the
+  /// key being the value info, and the value is the summary-based import info.
+  /// FIXME: Does this set need to be a map?
+  using ExportSetTy = DenseMap<ValueInfo, SummaryImportInfo>;
 
   /// A function of this type is used to load modules referenced by the index.
   using ModuleLoaderTy =
@@ -211,7 +220,8 @@ void gatherImportedSummariesForModule(
     StringRef ModulePath,
     const DenseMap<StringRef, GVSummaryMapTy> &ModuleToDefinedGVSummaries,
     const FunctionImporter::ImportMapTy &ImportList,
-    std::map<std::string, GVSummaryMapTy> &ModuleToSummariesForIndex);
+    std::map<std::string, GVSummaryMapTy> &ModuleToSummariesForIndex,
+    ModuleToGVSummaryPtrSet &ModuleToDecSummaries);
 
 /// Emit into \p OutputFilename the files module \p ModulePath will import from.
 std::error_code EmitImportsFiles(
diff --git a/llvm/lib/LTO/LTO.cpp b/llvm/lib/LTO/LTO.cpp
index 53060df7f503e0..ace533fe28c92f 100644
--- a/llvm/lib/LTO/LTO.cpp
+++ b/llvm/lib/LTO/LTO.cpp
@@ -159,7 +159,7 @@ void llvm::computeLTOCacheKey(
   std::vector<uint64_t> ExportsGUID;
   ExportsGUID.reserve(ExportList.size());
   for (const auto &VI : ExportList) {
-    auto GUID = VI.getGUID();
+    auto GUID = VI.first.getGUID();
     ExportsGUID.push_back(GUID);
   }
 
@@ -205,7 +205,7 @@ void llvm::computeLTOCacheKey(
 
     AddUint64(Entry.getFunctions().size());
     for (auto &Fn : Entry.getFunctions())
-      AddUint64(Fn);
+      AddUint64(Fn.first);
   }
 
   // Include the hash for the resolved ODR.
@@ -277,7 +277,7 @@ void llvm::computeLTOCacheKey(
   for (const ImportModule &ImpM : ImportModulesVector)
     for (auto &ImpF : ImpM.getFunctions()) {
       GlobalValueSummary *S =
-          Index.findSummaryInModule(ImpF, ImpM.getIdentifier());
+          Index.findSummaryInModule(ImpF.first, ImpM.getIdentifier());
       AddUsedThings(S);
       // If this is an alias, we also care about any types/etc. that the aliasee
       // may reference.
@@ -1389,15 +1389,18 @@ class lto::ThinBackendProc {
                   llvm::StringRef ModulePath,
                   const std::string &NewModulePath) {
     std::map<std::string, GVSummaryMapTy> ModuleToSummariesForIndex;
+    ModuleToGVSummaryPtrSet ModuleToDeclarationSummaries;
     std::error_code EC;
     gatherImportedSummariesForModule(ModulePath, ModuleToDefinedGVSummaries,
-                                     ImportList, ModuleToSummariesForIndex);
+                                     ImportList, ModuleToSummariesForIndex,
+                                     ModuleToDeclarationSummaries);
 
     raw_fd_ostream OS(NewModulePath + ".thinlto.bc", EC,
                       sys::fs::OpenFlags::OF_None);
     if (EC)
       return errorCodeToError(EC);
-    writeIndexToFile(CombinedIndex, OS, &ModuleToSummariesForIndex);
+    writeIndexToFile(CombinedIndex, OS, &ModuleToSummariesForIndex,
+                     &ModuleToDeclarationSummaries);
 
     if (ShouldEmitImportsFiles) {
       EC = EmitImportsFiles(ModulePath, NewModulePath + ".imports",
diff --git a/llvm/lib/LTO/LTOBackend.cpp b/llvm/lib/LTO/LTOBackend.cpp
index 71e8849dc3cc91..6bb514d3d24e70 100644
--- a/llvm/lib/LTO/LTOBackend.cpp
+++ b/llvm/lib/LTO/LTOBackend.cpp
@@ -716,7 +716,10 @@ bool lto::initImportList(const Module &M,
       if (Summary->modulePath() == M.getModuleIdentifier())
         continue;
       // Add an entry to provoke importing by thinBackend.
-      ImportList[Summary->modulePath()].insert(GUID);
+      ImportList[Summary->modulePath()][GUID].updateType(
+          Summary->flags().ImportAsDec
+              ? SummaryImportInfo::ImportType::Declaration
+              : SummaryImportInfo::ImportType::Definition);
     }
   }
   return true;
diff --git a/llvm/lib/LTO/ThinLTOCodeGenerator.cpp b/llvm/lib/LTO/ThinLTOCodeGenerator.cpp
index 8f517eb50dc76f..75aecc95e789fc 100644
--- a/llvm/lib/LTO/ThinLTOCodeGenerator.cpp
+++ b/llvm/lib/LTO/ThinLTOCodeGenerator.cpp
@@ -794,9 +794,11 @@ void ThinLTOCodeGenerator::gatherImportedSummariesForModule(
                            IsPrevailing(PrevailingCopy), ImportLists,
                            ExportLists);
 
+  ModuleToGVSummaryPtrSet ModuleToDecSummaries;
   llvm::gatherImportedSummariesForModule(
       ModuleIdentifier, ModuleToDefinedGVSummaries,
-      ImportLists[ModuleIdentifier], ModuleToSummariesForIndex);
+      ImportLists[ModuleIdentifier], ModuleToSummariesForIndex,
+      ModuleToDecSummaries);
 }
 
 /**
@@ -833,9 +835,12 @@ void ThinLTOCodeGenerator::emitImports(Module &TheModule, StringRef OutputName,
                            ExportLists);
 
   std::map<std::string, GVSummaryMapTy> ModuleToSummariesForIndex;
+  // FIXME: Pass on `ModuleToDecSummaries` to `EmitImportFiles` below.
+  ModuleToGVSummaryPtrSet ModuleToDecSummaries;
   llvm::gatherImportedSummariesForModule(
       ModuleIdentifier, ModuleToDefinedGVSummaries,
-      ImportLists[ModuleIdentifier], ModuleToSummariesForIndex);
+      ImportLists[ModuleIdentifier], ModuleToSummariesForIndex,
+      ModuleToDecSummaries);
 
   std::error_code EC;
   if ((EC = EmitImportsFiles(ModuleIdentifier, OutputName,
diff --git a/llvm/lib/Transforms/IPO/FunctionImport.cpp b/llvm/lib/Transforms/IPO/FunctionImport.cpp
index 68f9799616ae6d..042fc285128b14 100644
--- a/llvm/lib/Transforms/IPO/FunctionImport.cpp
+++ b/llvm/lib/Transforms/IPO/FunctionImport.cpp
@@ -358,17 +358,23 @@ class GlobalsImporter final {
         if (!GVS || !Index.canImportGlobalVar(GVS, /* AnalyzeRefs */ true) ||
             LocalNotInModule(GVS))
           continue;
-        auto ILI = ImportList[RefSummary->modulePath()].insert(VI.getGUID());
+        auto [Iter, Inserted] =
+            ImportList[RefSummary->modulePath()].try_emplace(
+                VI.getGUID(),
+                SummaryImportInfo(SummaryImportInfo::ImportType::Definition));
         // Only update stat and exports if we haven't already imported this
         // variable.
-        if (!ILI.second)
+        if (!Inserted) {
+          Iter->second.updateType(SummaryImportInfo::ImportType::Definition);
           break;
+        }
         NumImportedGlobalVarsThinLink++;
         // Any references made by this variable will be marked exported
         // later, in ComputeCrossModuleImport, after import decisions are
         // complete, which is more efficient than adding them here.
         if (ExportLists)
-          (*ExportLists)[RefSummary->modulePath()].insert(VI);
+          (*ExportLists)[RefSummary->modulePath()][VI].updateType(
+              SummaryImportInfo::ImportType::Definition);
 
         // If variable is not writeonly we attempt to recursively analyze
         // its references in order to import referenced constants.
@@ -545,10 +551,12 @@ class WorkloadImportsManager : public ModuleImportsManager {
       LLVM_DEBUG(dbgs() << "[Workload][Including]" << VI.name() << " from "
                         << ExportingModule << " : "
                         << Function::getGUID(VI.name()) << "\n");
-      ImportList[ExportingModule].insert(VI.getGUID());
+      ImportList[ExportingModule][VI.getGUID()].updateType(
+          SummaryImportInfo::ImportType::Definition);
       GVI.onImportingSummary(*GVS);
       if (ExportLists)
-        (*ExportLists)[ExportingModule].insert(VI);
+        (*ExportLists)[ExportingModule][VI].updateType(
+            SummaryImportInfo::ImportType::Definition);
     }
     LLVM_DEBUG(dbgs() << "[Workload] Done\n");
   }
@@ -816,23 +824,27 @@ static void computeImportForFunction(
              "selectCallee() didn't honor the threshold");
 
       auto ExportModulePath = ResolvedCalleeSummary->modulePath();
-      auto ILI = ImportList[ExportModulePath].insert(VI.getGUID());
+      auto [Iter, Inserted] =
+          ImportList[ExportModulePath].insert(std::make_pair(
+              VI.getGUID(),
+              SummaryImportInfo(SummaryImportInfo::ImportType::Definition)));
       // We previously decided to import this GUID definition if it was already
       // inserted in the set of imports from the exporting module.
-      bool PreviouslyImported = !ILI.second;
-      if (!PreviouslyImported) {
+      if (Inserted) {
         NumImportedFunctionsThinLink++;
         if (IsHotCallsite)
           NumImportedHotFunctionsThinLink++;
         if (IsCriticalCallsite)
           NumImportedCriticalFunctionsThinLink++;
-      }
+      } else
+        Iter->second.updateType(SummaryImportInfo::ImportType::Definition);
 
       // Any calls/references made by this function will be marked exported
       // later, in ComputeCrossModuleImport, after import decisions are
       // complete, which is more efficient than adding them here.
       if (ExportLists)
-        (*ExportLists)[ExportModulePath].insert(VI);
+        (*ExportLists)[ExportModulePath][VI].updateType(
+            SummaryImportInfo::ImportType::Definition);
     }
 
     auto GetAdjustedThreshold = [](unsigned Threshold, bool IsHotCallsite) {
@@ -942,9 +954,11 @@ template <class T>
 static unsigned numGlobalVarSummaries(const ModuleSummaryIndex &Index,
                                       T &Cont) {
   unsigned NumGVS = 0;
-  for (auto &V : Cont)
-    if (isGlobalVarSummary(Index, V))
+  for (auto &[GUID, Type] : Cont) {
+    assert(Type.isDefinition() && "Declaration type doesn't exist yet");
+    if (isGlobalVarSummary(Index, GUID))
       ++NumGVS;
+  }
   return NumGVS;
 }
 #endif
@@ -959,8 +973,8 @@ static bool checkVariableImport(
 
   for (auto &ImportPerModule : ImportLists)
     for (auto &ExportPerModule : ImportPerModule.second)
-      FlattenedImports.insert(ExportPerModule.second.begin(),
-                              ExportPerModule.second.end());
+      for (auto &[GUID, Type] : ExportPerModule.second)
+        FlattenedImports.insert(GUID);
 
   // Checks that all GUIDs of read/writeonly vars we see in export lists
   // are also in the import lists. Otherwise we my face linker undefs,
@@ -979,7 +993,7 @@ static bool checkVariableImport(
   };
 
   for (auto &ExportPerModule : ExportLists)
-    for (auto &VI : ExportPerModule.second)
+    for (auto &[VI, Unused] : ExportPerModule.second)
       if (!FlattenedImports.count(VI.getGUID()) &&
           IsReadOrWriteOnlyVarNeedingImporting(ExportPerModule.first, VI))
         return false;
@@ -1015,7 +1029,8 @@ void llvm::ComputeCrossModuleImport(
     FunctionImporter::ExportSetTy NewExports;
     const auto &DefinedGVSummaries =
         ModuleToDefinedGVSummaries.lookup(ELI.first);
-    for (auto &EI : ELI.second) {
+    for (auto &[EI, Type] : ELI.second) {
+      assert(Type.isDefinition() && "Declaration type doesn't exist yet");
       // Find the copy defined in the exporting module so that we can mark the
       // values it references in that specific definition as exported.
       // Below we will add all references and called values, without regard to
@@ -1035,13 +1050,16 @@ void llvm::ComputeCrossModuleImport(
         // See processGlobalForThinLTO.
         if (!Index.isWriteOnly(GVS))
           for (const auto &VI : GVS->refs())
-            NewExports.insert(VI);
+            NewExports[VI].updateType(
+                SummaryImportInfo::ImportType::Declaration);
       } else {
         auto *FS = cast<FunctionSummary>(S);
         for (const auto &Edge : FS->calls())
-          NewExports.insert(Edge.first);
+          NewExports[Edge.first].updateType(
+              SummaryImportInfo::ImportType::Declaration);
         for (const auto &Ref : FS->refs())
-          NewExports.insert(Ref);
+          NewExports[Ref].updateType(
+              SummaryImportInfo::ImportType::Declaration);
       }
     }
     // Prune list computed above to only include values defined in the exporting
@@ -1049,7 +1067,7 @@ void llvm::ComputeCrossModuleImport(
     // ref/call target multiple times in above loop, and it is more efficient to
     // avoid a set lookup each time.
     for (auto EI = NewExports.begin(); EI != NewExports.end();) {
-      if (!DefinedGVSummaries.count(EI->getGUID()))
+      if (!DefinedGVSummaries.count(EI->first.getGUID()))
         NewExports.erase(EI++);
       else
         ++EI;
@@ -1149,7 +1167,10 @@ static void ComputeCrossModuleImportForModuleFromIndexForTest(
     if (Summary->modulePath() == ModulePath)
       continue;
     // Add an entry to provoke importing by thinBackend.
-    ImportList[Summary->modulePath()].insert(GUID);
+    ImportList[Summary->modulePath()][GUID].updateType(
+        Summary->flags().ImportAsDec
+            ? SummaryImportInfo::ImportType::Declaration
+            : SummaryImportInfo::ImportType::Definition);
   }
 #ifndef NDEBUG
   dumpImportListForModule(Index, ModulePath, ImportList);
@@ -1332,20 +1353,25 @@ void llvm::gatherImportedSummariesForModule(
     StringRef ModulePath,
     const DenseMap<StringRef, GVSummaryMapTy> &ModuleToDefinedGVSummaries,
     const FunctionImporter::ImportMapTy &ImportList,
-    std::map<std::string, GVSummaryMapTy> &ModuleToSummariesForIndex) {
+    std::map<std::string, GVSummaryMapTy> &ModuleToSummariesForIndex,
+    ModuleToGVSummaryPtrSet &ModuleToDecSummaries) {
   // Include all summaries from the importing module.
   ModuleToSummariesForIndex[std::string(ModulePath)] =
       ModuleToDefinedGVSummaries.lookup(ModulePath);
   // Include summaries for imports.
   for (const auto &ILI : ImportList) {
-    auto &SummariesForIndex = ModuleToSummariesForIndex[std::string(ILI.first)];
+    std::string ModulePath(ILI.first);
+    auto &SummariesForIndex = ModuleToSummariesForIndex[ModulePath];
+    auto &DecSummaries = ModuleToDecSummaries[ModulePath];
     const auto &DefinedGVSummaries =
         ModuleToDefinedGVSummaries.lookup(ILI.first);
-    for (const auto &GI : ILI.second) {
-      const auto &DS = DefinedGVSummaries.find(GI);
+    for (const auto &[GUID, Type] : ILI.second) {
+      const auto &DS = DefinedGVSummaries.find(GUID);
       assert(DS != DefinedGVSummaries.end() &&
              "Expected a defined summary for imported global value");
-      SummariesForIndex[GI] = DS->second;
+      SummariesForIndex[GUID] = DS->second;
+      if (Type.isDeclaration())
+        DecSummaries.insert(DS->second);
     }
   }
 }
@@ -1617,6 +1643,16 @@ Expected<bool> FunctionImporter::importFunctions(
   for (const auto &FunctionsToImportPerModule : ImportList) {
     ModuleNameOrderedList.insert(FunctionsToImportPerModule.first);
   }
+
+  auto getImportStatus = [&](const FunctionsToImportTy &GUIDToImportType,
+                             GlobalValue::GUID GUID) -> ImportStatus {
+    auto Iter = GUIDToImportType.find(GUID);
+    if (Iter == GUIDToImportType.end())
+      return ImportStatus::NotImported;
+    return Iter->second.isDefinition() ? ImportStatus::ImportDefinition
+                                       : ImportStatus::ImportDeclaration;
+  };
+
   for (const auto &Name : ModuleNameOrderedList) {
     // Get the module for the import
     const auto &FunctionsToImportPerModule = ImportList.find(Name);
@@ -1634,17 +1670,21 @@ Expected<bool> FunctionImporter::importFunctions(
       return std::move(Err);
 
     auto &ImportGUIDs = FunctionsToImportPerModule->second;
+
     // Find the globals to import
     SetVector<GlobalValue *> GlobalsToImport;
+    SetVector<GlobalValue *> GlobalDeclsToImport;
     for (Function &F : *SrcModule) {
       if (!F.hasName())
         continue;
       auto GUID = F.getGUID();
-      auto Import = ImportGUIDs.count(GUID);
-      LLVM_DEBUG(dbgs() << (Import ? "Is" : "Not") << " importing function "
-                        << GUID << " " << F.getName() << " from "
-                        << SrcModule->getSourceFileName() << "\n");
-      if (Import) {
+      auto ImportStatus = getImportStatus(ImportGUIDs, GUID);
+      const bool ImportDefinition =
+          ImportStatus == ImportStatus::ImportDefinition;
+      LLVM_DEBUG(dbgs() << (ImportDefinition ? "Is" : "Not")
+                        << " importing function " << GUID << " " << F.getName()
+                        << " from " << SrcModule->getSourceFileName() << "\n");
+      if (ImportDefinition) {
         if (Error Err = F.materialize())
           return std::move(Err);
         // MemProf should match function's definition and summary,
@@ -1664,17 +1704,20 @@ Expected<bool> FunctionImporter::importFunctions(
                                          SrcModule->getSourceFileName())}));
         }
         GlobalsToImport.insert(&F);
-      }
+      } else if (ImportStatus == ImportDeclaration)
+        GlobalDeclsToImport.insert(&F);
     }
     for (GlobalVariable &GV : SrcModule->globals()) {
       if (!GV.hasName())
         continue;
       auto GUID = GV.getGUID();
-      auto Import = ImportGUIDs.count(GUID);
-      LLVM_DEBUG(dbgs() << (Import ? "Is" : "Not") << " importing global "
-                        << GUID << " " << GV.getName() << " from "
-                        << SrcModule->getSourceFileName() << "\n");
-      if (Import) {
+      auto ImportStatus = getImportStatus(ImportGUIDs, GUID);
+      const bool ImportDefinition =
+          (ImportStatus == ImportStatus::ImportDefinition);
+      LLVM_DEBUG(dbgs() << (ImportDefinition ? "Is" : "Not")
+                        << " importing global " << GUID << " " << GV.getName()
+                        << " from " << SrcModule->getSourceFileName() << "\n");
+      if (ImportDefinition) {
         if (Error Err = GV.materialize())
           return std::move(Err);
         ImportedGVCount += GlobalsToImport.insert(&GV);
@@ -1684,11 +1727,13 @@ Expected<bool> FunctionImporter::importFunctions(
       if (!GA.hasName() || isa<GlobalIFunc>(GA.getAliaseeObject()))
         continue;
       auto GUID = GA.getGUID();
-      auto Import = ImportGUIDs.count(GUID);
-      LLVM_DEBUG(dbgs() << (Import ? "Is" : "Not") << " importing alias "
-                        << GUID << " " << GA.getName() << " from "
-                        << SrcModule->getSourceFileName() << "\n");
-      if (Import) {
+      auto ImportStatus = getImportStatus(ImportGUIDs, GUID);
+      const bool ImportDefinition =
+          (ImportStatus == ImportStatus::ImportDefinition);
+      LLVM_DEBUG(dbgs() << (ImportDefinition ? "Is" : "Not")
+                        << " importing alias " << GUID << " " << GA.getName()
+                        << " from " << SrcModule->getSourceFileName() << "\n");
+      if (ImportDefinition) {
         if (Error Err = GA.materialize())
           return std::move(Err);
         // Import alias as a copy of its aliasee.
@@ -1738,6 +1783,7 @@ Expected<bool> FunctionImporter::importFunctions(
                << " from " << SrcModule->getSourceFileName() << "\n";
     }
 
+    // FIXME: A later change will pass on GlobalDeclsToImport to IRMover.
     if (Error Err = Mover.move(std::move(SrcModule),
                                GlobalsToImport.getArrayRef(), nullptr,
                                /*IsPerformingImport=*/true))
diff --git a/llvm/tools/llvm-link/llvm-link.cpp b/llvm/tools/llvm-link/llvm-link.cpp
index 9049cb5e858002..0a60b1e3d8f5d2 100644
--- a/llvm/tools/llvm-link/llvm-link.cpp
+++ b/llvm/tools/llvm-link/llvm-link.cpp
@@ -376,7 +376,7 @@ static bool importFunctions(const char *argv0, Module &DestModule) {
 
     auto &Entry =
         ImportList[FileNameStringCache.insert(FileName).first->getKey()];
-    Entry.insert(F->getGUID());
+    Entry[F->getGUID()].updateType(SummaryImportInfo::ImportType::Definition);
   }
   auto CachedModuleLoader = [&](StringRef Identifier) {
     return ModuleLoaderCache.takeModule(std::string(Identifier));

>From 23a3fa4d3d124a2525f20e3a58da6d5e7740e7df Mon Sep 17 00:00:00 2001
From: mingmingl <mingmingl at google.com>
Date: Tue, 16 Apr 2024 21:12:34 -0700
Subject: [PATCH 2/5] simplify code

---
 llvm/include/llvm/IR/ModuleSummaryIndex.h     | 30 ++-----
 .../llvm/Transforms/IPO/FunctionImport.h      | 16 ++--
 llvm/lib/LTO/LTOBackend.cpp                   | 10 ++-
 llvm/lib/Transforms/IPO/FunctionImport.cpp    | 90 +++++++++----------
 llvm/tools/llvm-link/llvm-link.cpp            |  2 +-
 5 files changed, 62 insertions(+), 86 deletions(-)

diff --git a/llvm/include/llvm/IR/ModuleSummaryIndex.h b/llvm/include/llvm/IR/ModuleSummaryIndex.h
index 7f6f7446e09620..29d13cf4f4fae6 100644
--- a/llvm/include/llvm/IR/ModuleSummaryIndex.h
+++ b/llvm/include/llvm/IR/ModuleSummaryIndex.h
@@ -296,30 +296,6 @@ template <> struct DenseMapInfo<ValueInfo> {
   static unsigned getHashValue(ValueInfo I) { return (uintptr_t)I.getRef(); }
 };
 
-struct SummaryImportInfo {
-  enum class ImportType : uint8_t {
-    NotImported = 0,
-    Declaration = 1,
-    Definition = 2,
-  };
-  unsigned Type : 3;
-  SummaryImportInfo() : Type(static_cast<unsigned>(ImportType::NotImported)) {}
-  SummaryImportInfo(ImportType Type) : Type(static_cast<unsigned>(Type)) {}
-
-  // FIXME: delete the first two set* helper function.
-  void updateType(ImportType InputType) {
-    Type = std::max(Type, static_cast<unsigned>(InputType));
-  }
-
-  bool isDefinition() const {
-    return static_cast<ImportType>(Type) == ImportType::Definition;
-  }
-
-  bool isDeclaration() const {
-    return static_cast<ImportType>(Type) == ImportType::Declaration;
-  }
-};
-
 /// Summary of memprof callsite metadata.
 struct CallsiteInfo {
   // Actual callee function.
@@ -609,7 +585,11 @@ class GlobalValueSummary {
     return Flags.ImportType == GlobalValueSummary::ImportKind::Declaration;
   }
 
-  void setImportKind(ImportKind IK) { Flags.ImportType = IK; }
+  GlobalValueSummary::ImportKind importType() const {
+    return static_cast<ImportKind>(Flags.ImportType);
+  }
+
+  void setImportType(ImportKind IK) { Flags.ImportType = IK; }
 
   GlobalValue::VisibilityTypes getVisibility() const {
     return (GlobalValue::VisibilityTypes)Flags.Visibility;
diff --git a/llvm/include/llvm/Transforms/IPO/FunctionImport.h b/llvm/include/llvm/Transforms/IPO/FunctionImport.h
index 9adc0c31eed439..6729fcc420a6ed 100644
--- a/llvm/include/llvm/Transforms/IPO/FunctionImport.h
+++ b/llvm/include/llvm/Transforms/IPO/FunctionImport.h
@@ -33,14 +33,8 @@ class FunctionImporter {
 public:
   /// Set of functions to import from a source module. Each entry is a set
   /// containing all the GUIDs of all functions to import for a source module.
-  using FunctionsToImportTy = DenseMap<GlobalValue::GUID, SummaryImportInfo>;
-
-  // FIXME: Remove this.
-  enum ImportStatus {
-    NotImported,
-    ImportDeclaration,
-    ImportDefinition,
-  };
+  using FunctionsToImportTy =
+      DenseMap<GlobalValue::GUID, GlobalValueSummary::ImportKind>;
 
   /// The different reasons selectCallee will chose not to import a
   /// candidate.
@@ -107,9 +101,9 @@ class FunctionImporter {
   using ImportMapTy = DenseMap<StringRef, FunctionsToImportTy>;
 
   /// The map contains an entry for every global value the module exports, the
-  /// key being the value info, and the value is the summary-based import info.
-  /// FIXME: Does this set need to be a map?
-  using ExportSetTy = DenseMap<ValueInfo, SummaryImportInfo>;
+  /// key being the value info, and the value indicates whether the definition
+  /// or declaration is visible to another module.
+  using ExportSetTy = DenseMap<ValueInfo, GlobalValueSummary::ImportKind>;
 
   /// A function of this type is used to load modules referenced by the index.
   using ModuleLoaderTy =
diff --git a/llvm/lib/LTO/LTOBackend.cpp b/llvm/lib/LTO/LTOBackend.cpp
index 6bb514d3d24e70..23d75c238fc296 100644
--- a/llvm/lib/LTO/LTOBackend.cpp
+++ b/llvm/lib/LTO/LTOBackend.cpp
@@ -715,11 +715,13 @@ bool lto::initImportList(const Module &M,
       // e.g. record required linkage changes.
       if (Summary->modulePath() == M.getModuleIdentifier())
         continue;
+
       // Add an entry to provoke importing by thinBackend.
-      ImportList[Summary->modulePath()][GUID].updateType(
-          Summary->flags().ImportAsDec
-              ? SummaryImportInfo::ImportType::Declaration
-              : SummaryImportInfo::ImportType::Definition);
+      auto [Iter, Inserted] = ImportList[Summary->modulePath()].try_emplace(
+          GUID, Summary->importType());
+
+      if (!Inserted)
+        Iter->second = std::min(Iter->second, Summary->importType());
     }
   }
   return true;
diff --git a/llvm/lib/Transforms/IPO/FunctionImport.cpp b/llvm/lib/Transforms/IPO/FunctionImport.cpp
index 042fc285128b14..1485259e642a04 100644
--- a/llvm/lib/Transforms/IPO/FunctionImport.cpp
+++ b/llvm/lib/Transforms/IPO/FunctionImport.cpp
@@ -358,14 +358,14 @@ class GlobalsImporter final {
         if (!GVS || !Index.canImportGlobalVar(GVS, /* AnalyzeRefs */ true) ||
             LocalNotInModule(GVS))
           continue;
+
         auto [Iter, Inserted] =
             ImportList[RefSummary->modulePath()].try_emplace(
-                VI.getGUID(),
-                SummaryImportInfo(SummaryImportInfo::ImportType::Definition));
+                VI.getGUID(), GlobalValueSummary::Definition);
         // Only update stat and exports if we haven't already imported this
         // variable.
         if (!Inserted) {
-          Iter->second.updateType(SummaryImportInfo::ImportType::Definition);
+          Iter->second = std::min(GlobalValueSummary::Definition, Iter->second);
           break;
         }
         NumImportedGlobalVarsThinLink++;
@@ -373,8 +373,8 @@ class GlobalsImporter final {
         // later, in ComputeCrossModuleImport, after import decisions are
         // complete, which is more efficient than adding them here.
         if (ExportLists)
-          (*ExportLists)[RefSummary->modulePath()][VI].updateType(
-              SummaryImportInfo::ImportType::Definition);
+          (*ExportLists)[RefSummary->modulePath()][VI] =
+              GlobalValueSummary::Definition;
 
         // If variable is not writeonly we attempt to recursively analyze
         // its references in order to import referenced constants.
@@ -551,12 +551,11 @@ class WorkloadImportsManager : public ModuleImportsManager {
       LLVM_DEBUG(dbgs() << "[Workload][Including]" << VI.name() << " from "
                         << ExportingModule << " : "
                         << Function::getGUID(VI.name()) << "\n");
-      ImportList[ExportingModule][VI.getGUID()].updateType(
-          SummaryImportInfo::ImportType::Definition);
+      ImportList[ExportingModule][VI.getGUID()] =
+          GlobalValueSummary::Definition;
       GVI.onImportingSummary(*GVS);
       if (ExportLists)
-        (*ExportLists)[ExportingModule][VI].updateType(
-            SummaryImportInfo::ImportType::Definition);
+        (*ExportLists)[ExportingModule][VI] = GlobalValueSummary::Definition;
     }
     LLVM_DEBUG(dbgs() << "[Workload] Done\n");
   }
@@ -824,10 +823,8 @@ static void computeImportForFunction(
              "selectCallee() didn't honor the threshold");
 
       auto ExportModulePath = ResolvedCalleeSummary->modulePath();
-      auto [Iter, Inserted] =
-          ImportList[ExportModulePath].insert(std::make_pair(
-              VI.getGUID(),
-              SummaryImportInfo(SummaryImportInfo::ImportType::Definition)));
+      auto [Iter, Inserted] = ImportList[ExportModulePath].insert(
+          std::make_pair(VI.getGUID(), GlobalValueSummary::Definition));
       // We previously decided to import this GUID definition if it was already
       // inserted in the set of imports from the exporting module.
       if (Inserted) {
@@ -837,14 +834,13 @@ static void computeImportForFunction(
         if (IsCriticalCallsite)
           NumImportedCriticalFunctionsThinLink++;
       } else
-        Iter->second.updateType(SummaryImportInfo::ImportType::Definition);
+        Iter->second = GlobalValueSummary::Definition;
 
       // Any calls/references made by this function will be marked exported
       // later, in ComputeCrossModuleImport, after import decisions are
       // complete, which is more efficient than adding them here.
       if (ExportLists)
-        (*ExportLists)[ExportModulePath][VI].updateType(
-            SummaryImportInfo::ImportType::Definition);
+        (*ExportLists)[ExportModulePath][VI] = GlobalValueSummary::Definition;
     }
 
     auto GetAdjustedThreshold = [](unsigned Threshold, bool IsHotCallsite) {
@@ -955,7 +951,8 @@ static unsigned numGlobalVarSummaries(const ModuleSummaryIndex &Index,
                                       T &Cont) {
   unsigned NumGVS = 0;
   for (auto &[GUID, Type] : Cont) {
-    assert(Type.isDefinition() && "Declaration type doesn't exist yet");
+    assert(Type == GlobalValueSummary::Definition &&
+           "Declaration type doesn't exist yet");
     if (isGlobalVarSummary(Index, GUID))
       ++NumGVS;
   }
@@ -1030,7 +1027,8 @@ void llvm::ComputeCrossModuleImport(
     const auto &DefinedGVSummaries =
         ModuleToDefinedGVSummaries.lookup(ELI.first);
     for (auto &[EI, Type] : ELI.second) {
-      assert(Type.isDefinition() && "Declaration type doesn't exist yet");
+      assert(Type == GlobalValueSummary::Definition &&
+             "Declaration type doesn't exist yet");
       // Find the copy defined in the exporting module so that we can mark the
       // values it references in that specific definition as exported.
       // Below we will add all references and called values, without regard to
@@ -1049,17 +1047,15 @@ void llvm::ComputeCrossModuleImport(
         // we convert such variables initializers to "zeroinitializer".
         // See processGlobalForThinLTO.
         if (!Index.isWriteOnly(GVS))
-          for (const auto &VI : GVS->refs())
-            NewExports[VI].updateType(
-                SummaryImportInfo::ImportType::Declaration);
+          for (const auto &VI : GVS->refs()) {
+            NewExports.try_emplace(VI, GlobalValueSummary::Declaration);
+          }
       } else {
         auto *FS = cast<FunctionSummary>(S);
         for (const auto &Edge : FS->calls())
-          NewExports[Edge.first].updateType(
-              SummaryImportInfo::ImportType::Declaration);
+          NewExports.try_emplace(Edge.first, GlobalValueSummary::Declaration);
         for (const auto &Ref : FS->refs())
-          NewExports[Ref].updateType(
-              SummaryImportInfo::ImportType::Declaration);
+          NewExports.try_emplace(Ref, GlobalValueSummary::Declaration);
       }
     }
     // Prune list computed above to only include values defined in the exporting
@@ -1167,10 +1163,10 @@ static void ComputeCrossModuleImportForModuleFromIndexForTest(
     if (Summary->modulePath() == ModulePath)
       continue;
     // Add an entry to provoke importing by thinBackend.
-    ImportList[Summary->modulePath()][GUID].updateType(
-        Summary->flags().ImportAsDec
-            ? SummaryImportInfo::ImportType::Declaration
-            : SummaryImportInfo::ImportType::Definition);
+    auto [Iter, Inserted] = ImportList[Summary->modulePath()].try_emplace(
+        GUID, Summary->importType());
+    if (!Inserted)
+      Iter->second = std::min(Iter->second, Summary->importType());
   }
 #ifndef NDEBUG
   dumpImportListForModule(Index, ModulePath, ImportList);
@@ -1370,7 +1366,7 @@ void llvm::gatherImportedSummariesForModule(
       assert(DS != DefinedGVSummaries.end() &&
              "Expected a defined summary for imported global value");
       SummariesForIndex[GUID] = DS->second;
-      if (Type.isDeclaration())
+      if (Type == GlobalValueSummary::Declaration)
         DecSummaries.insert(DS->second);
     }
   }
@@ -1644,13 +1640,13 @@ Expected<bool> FunctionImporter::importFunctions(
     ModuleNameOrderedList.insert(FunctionsToImportPerModule.first);
   }
 
-  auto getImportStatus = [&](const FunctionsToImportTy &GUIDToImportType,
-                             GlobalValue::GUID GUID) -> ImportStatus {
+  auto maybeGetImportType = [&](const FunctionsToImportTy &GUIDToImportType,
+                                GlobalValue::GUID GUID)
+      -> std::optional<GlobalValueSummary::ImportKind> {
     auto Iter = GUIDToImportType.find(GUID);
     if (Iter == GUIDToImportType.end())
-      return ImportStatus::NotImported;
-    return Iter->second.isDefinition() ? ImportStatus::ImportDefinition
-                                       : ImportStatus::ImportDeclaration;
+      return std::nullopt;
+    return Iter->second;
   };
 
   for (const auto &Name : ModuleNameOrderedList) {
@@ -1673,14 +1669,15 @@ Expected<bool> FunctionImporter::importFunctions(
 
     // Find the globals to import
     SetVector<GlobalValue *> GlobalsToImport;
-    SetVector<GlobalValue *> GlobalDeclsToImport;
     for (Function &F : *SrcModule) {
       if (!F.hasName())
         continue;
       auto GUID = F.getGUID();
-      auto ImportStatus = getImportStatus(ImportGUIDs, GUID);
+      auto ImportType = maybeGetImportType(ImportGUIDs, GUID);
+      if (!ImportType)
+        continue;
       const bool ImportDefinition =
-          ImportStatus == ImportStatus::ImportDefinition;
+          *ImportType == GlobalValueSummary::Definition;
       LLVM_DEBUG(dbgs() << (ImportDefinition ? "Is" : "Not")
                         << " importing function " << GUID << " " << F.getName()
                         << " from " << SrcModule->getSourceFileName() << "\n");
@@ -1704,16 +1701,17 @@ Expected<bool> FunctionImporter::importFunctions(
                                          SrcModule->getSourceFileName())}));
         }
         GlobalsToImport.insert(&F);
-      } else if (ImportStatus == ImportDeclaration)
-        GlobalDeclsToImport.insert(&F);
+      }
     }
     for (GlobalVariable &GV : SrcModule->globals()) {
       if (!GV.hasName())
         continue;
       auto GUID = GV.getGUID();
-      auto ImportStatus = getImportStatus(ImportGUIDs, GUID);
+      auto ImportType = maybeGetImportType(ImportGUIDs, GUID);
+      if (!ImportType)
+        continue;
       const bool ImportDefinition =
-          (ImportStatus == ImportStatus::ImportDefinition);
+          (*ImportType == GlobalValueSummary::Definition);
       LLVM_DEBUG(dbgs() << (ImportDefinition ? "Is" : "Not")
                         << " importing global " << GUID << " " << GV.getName()
                         << " from " << SrcModule->getSourceFileName() << "\n");
@@ -1727,9 +1725,12 @@ Expected<bool> FunctionImporter::importFunctions(
       if (!GA.hasName() || isa<GlobalIFunc>(GA.getAliaseeObject()))
         continue;
       auto GUID = GA.getGUID();
-      auto ImportStatus = getImportStatus(ImportGUIDs, GUID);
+      auto ImportType = maybeGetImportType(ImportGUIDs, GUID);
+      if (!ImportType)
+        continue;
+
       const bool ImportDefinition =
-          (ImportStatus == ImportStatus::ImportDefinition);
+          (*ImportType == GlobalValueSummary::Definition);
       LLVM_DEBUG(dbgs() << (ImportDefinition ? "Is" : "Not")
                         << " importing alias " << GUID << " " << GA.getName()
                         << " from " << SrcModule->getSourceFileName() << "\n");
@@ -1783,7 +1784,6 @@ Expected<bool> FunctionImporter::importFunctions(
                << " from " << SrcModule->getSourceFileName() << "\n";
     }
 
-    // FIXME: A later change will pass on GlobalDeclsToImport to IRMover.
     if (Error Err = Mover.move(std::move(SrcModule),
                                GlobalsToImport.getArrayRef(), nullptr,
                                /*IsPerformingImport=*/true))
diff --git a/llvm/tools/llvm-link/llvm-link.cpp b/llvm/tools/llvm-link/llvm-link.cpp
index 9e28b3a217f8b0..a6bee02b1302d3 100644
--- a/llvm/tools/llvm-link/llvm-link.cpp
+++ b/llvm/tools/llvm-link/llvm-link.cpp
@@ -377,7 +377,7 @@ static bool importFunctions(const char *argv0, Module &DestModule) {
 
     auto &Entry =
         ImportList[FileNameStringCache.insert(FileName).first->getKey()];
-    Entry[F->getGUID()].updateType(SummaryImportInfo::ImportType::Definition);
+    Entry[F->getGUID()] = GlobalValueSummary::Definition;
   }
   auto CachedModuleLoader = [&](StringRef Identifier) {
     return ModuleLoaderCache.takeModule(std::string(Identifier));

>From a0277b43faf515c3aa21b290c09e51501fa796ac Mon Sep 17 00:00:00 2001
From: mingmingl <mingmingl at google.com>
Date: Tue, 16 Apr 2024 23:44:18 -0700
Subject: [PATCH 3/5] Flag gate the import-declaration change.

---
 .../llvm/Transforms/IPO/FunctionImport.h      |  3 +-
 llvm/lib/Transforms/IPO/FunctionImport.cpp    | 55 +++++++++++++++++--
 2 files changed, 51 insertions(+), 7 deletions(-)

diff --git a/llvm/include/llvm/Transforms/IPO/FunctionImport.h b/llvm/include/llvm/Transforms/IPO/FunctionImport.h
index 6729fcc420a6ed..fcf51ed5a4eddd 100644
--- a/llvm/include/llvm/Transforms/IPO/FunctionImport.h
+++ b/llvm/include/llvm/Transforms/IPO/FunctionImport.h
@@ -31,8 +31,7 @@ class Module;
 /// based on the provided summary informations.
 class FunctionImporter {
 public:
-  /// Set of functions to import from a source module. Each entry is a set
-  /// containing all the GUIDs of all functions to import for a source module.
+  /// The functions to import from a source module and their import type.
   using FunctionsToImportTy =
       DenseMap<GlobalValue::GUID, GlobalValueSummary::ImportKind>;
 
diff --git a/llvm/lib/Transforms/IPO/FunctionImport.cpp b/llvm/lib/Transforms/IPO/FunctionImport.cpp
index 1485259e642a04..eb0bf36ab26d1f 100644
--- a/llvm/lib/Transforms/IPO/FunctionImport.cpp
+++ b/llvm/lib/Transforms/IPO/FunctionImport.cpp
@@ -140,6 +140,16 @@ static cl::opt<bool>
     ImportAllIndex("import-all-index",
                    cl::desc("Import all external functions in index."));
 
+/// Test-only option.
+/// TODO: Implement selective import (based on combined summary analysis) to
+/// ensure the imported function has a use case in the postlink pipeline.
+/// Import each function declaration as a fallback in real build may grow map
+/// size unnecessarily in the indexing step.
+static cl::opt<bool> ImportDeclaration(
+    "import-declaration", cl::init(false), cl::Hidden,
+    cl::desc("If true, import function declaration as fallback if the function "
+             "definition is not imported."));
+
 /// Pass a workload description file - an example of workload would be the
 /// functions executed to satisfy a RPC request. A workload is defined by a root
 /// function and the list of functions that are (frequently) needed to satisfy
@@ -245,8 +255,10 @@ static auto qualifyCalleeCandidates(
 }
 
 /// Given a list of possible callee implementation for a call site, select one
-/// that fits the \p Threshold. If none are found, the Reason will give the last
-/// reason for the failure (last, in the order of CalleeSummaryList entries).
+/// that fits the \p Threshold for function definition import. If none are
+/// found, the Reason will give the last reason for the failure (last, in the
+/// order of CalleeSummaryList entries). If caller wants to select eligible
+/// summary
 ///
 /// FIXME: select "best" instead of first that fits. But what is "best"?
 /// - The smallest: more likely to be inlined.
@@ -259,24 +271,32 @@ static const GlobalValueSummary *
 selectCallee(const ModuleSummaryIndex &Index,
              ArrayRef<std::unique_ptr<GlobalValueSummary>> CalleeSummaryList,
              unsigned Threshold, StringRef CallerModulePath,
+             const GlobalValueSummary *&TooLargeOrNoInlineSummary,
              FunctionImporter::ImportFailureReason &Reason) {
+  // Records the last summary with reason noinline or too-large.
+  TooLargeOrNoInlineSummary = nullptr;
   auto QualifiedCandidates =
       qualifyCalleeCandidates(Index, CalleeSummaryList, CallerModulePath);
   for (auto QualifiedValue : QualifiedCandidates) {
     Reason = QualifiedValue.first;
+    // Skip a summary if its import is not (proved to be) legal.
     if (Reason != FunctionImporter::ImportFailureReason::None)
       continue;
     auto *Summary =
         cast<FunctionSummary>(QualifiedValue.second->getBaseObject());
 
+    // Don't bother importing the definition if the chance of inlining it is
+    // not high enough (except under `--force-import-all`).
     if ((Summary->instCount() > Threshold) && !Summary->fflags().AlwaysInline &&
         !ForceImportAll) {
+      TooLargeOrNoInlineSummary = Summary;
       Reason = FunctionImporter::ImportFailureReason::TooLarge;
       continue;
     }
 
-    // Don't bother importing if we can't inline it anyway.
+    // Don't bother importing the definition if we can't inline it anyway.
     if (Summary->fflags().NoInline && !ForceImportAll) {
+      TooLargeOrNoInlineSummary = Summary;
       Reason = FunctionImporter::ImportFailureReason::NoInline;
       continue;
     }
@@ -359,12 +379,17 @@ class GlobalsImporter final {
             LocalNotInModule(GVS))
           continue;
 
+        // If there isn't an entry for GUID, insert <GUID, Definition> pair.
+        // Otherwise, definition should take precedence  over declaration.
         auto [Iter, Inserted] =
             ImportList[RefSummary->modulePath()].try_emplace(
                 VI.getGUID(), GlobalValueSummary::Definition);
         // Only update stat and exports if we haven't already imported this
         // variable.
         if (!Inserted) {
+          // FIXME: Introduce a wrapper struct around ImportType, and provide
+          // an `updateType` method for better readability, just like how we
+          // update the hotness of a call edge.
           Iter->second = std::min(GlobalValueSummary::Definition, Iter->second);
           break;
         }
@@ -776,9 +801,28 @@ static void computeImportForFunction(
       }
 
       FunctionImporter::ImportFailureReason Reason{};
-      CalleeSummary = selectCallee(Index, VI.getSummaryList(), NewThreshold,
-                                   Summary.modulePath(), Reason);
+
+      // `SummaryForDeclImport` is an summary eligible for declaration import.
+      const GlobalValueSummary *SummaryForDeclImport = nullptr;
+      CalleeSummary =
+          selectCallee(Index, VI.getSummaryList(), NewThreshold,
+                       Summary.modulePath(), SummaryForDeclImport, Reason);
       if (!CalleeSummary) {
+        // There isn't a callee for definition import but one for declaration
+        // import.
+        if (ImportDeclaration && SummaryForDeclImport) {
+          StringRef DeclSourceModule = SummaryForDeclImport->modulePath();
+
+          // Since definition takes precedence over declaration for the same VI,
+          // try emplace <VI, declaration> pair without checking insert result.
+          // If insert doesn't happen, there must be an existing entry keyed by
+          // VI.
+          if (ExportLists)
+            (*ExportLists)[DeclSourceModule].try_emplace(
+                VI, GlobalValueSummary::Declaration);
+          ImportList[DeclSourceModule].try_emplace(
+              VI.getGUID(), GlobalValueSummary::Declaration);
+        }
         // Update with new larger threshold if this was a retry (otherwise
         // we would have already inserted with NewThreshold above). Also
         // update failure info if requested.
@@ -798,6 +842,7 @@ static void computeImportForFunction(
           FailureInfo = std::make_unique<FunctionImporter::ImportFailureInfo>(
               VI, Edge.second.getHotness(), Reason, 1);
         }
+
         if (ForceImportAll) {
           std::string Msg = std::string("Failed to import function ") +
                             VI.name().str() + " due to " +

>From 5edecccb0fb4dc4d6da54b2533484c7c176314c6 Mon Sep 17 00:00:00 2001
From: mingmingl <mingmingl at google.com>
Date: Tue, 23 Apr 2024 09:16:57 -0700
Subject: [PATCH 4/5] stage changes

---
 llvm/lib/Transforms/IPO/FunctionImport.cpp | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/llvm/lib/Transforms/IPO/FunctionImport.cpp b/llvm/lib/Transforms/IPO/FunctionImport.cpp
index eb0bf36ab26d1f..bc139e5a905bbc 100644
--- a/llvm/lib/Transforms/IPO/FunctionImport.cpp
+++ b/llvm/lib/Transforms/IPO/FunctionImport.cpp
@@ -996,8 +996,6 @@ static unsigned numGlobalVarSummaries(const ModuleSummaryIndex &Index,
                                       T &Cont) {
   unsigned NumGVS = 0;
   for (auto &[GUID, Type] : Cont) {
-    assert(Type == GlobalValueSummary::Definition &&
-           "Declaration type doesn't exist yet");
     if (isGlobalVarSummary(Index, GUID))
       ++NumGVS;
   }
@@ -1092,9 +1090,8 @@ void llvm::ComputeCrossModuleImport(
         // we convert such variables initializers to "zeroinitializer".
         // See processGlobalForThinLTO.
         if (!Index.isWriteOnly(GVS))
-          for (const auto &VI : GVS->refs()) {
+          for (const auto &VI : GVS->refs())
             NewExports.try_emplace(VI, GlobalValueSummary::Declaration);
-          }
       } else {
         auto *FS = cast<FunctionSummary>(S);
         for (const auto &Edge : FS->calls())

>From a139e4afc02c1e9f5dfae86f2f4a540cd97d8e87 Mon Sep 17 00:00:00 2001
From: mingmingl <mingmingl at google.com>
Date: Thu, 2 May 2024 16:15:19 -0700
Subject: [PATCH 5/5] Add test coverage for import delcaration

---
 llvm/lib/Transforms/IPO/FunctionImport.cpp    |  16 ++-
 .../ThinLTO/X86/import_callee_declaration.ll  | 111 ++++++++++++++++++
 llvm/tools/llvm-link/llvm-link.cpp            |   4 +
 3 files changed, 125 insertions(+), 6 deletions(-)
 create mode 100644 llvm/test/ThinLTO/X86/import_callee_declaration.ll

diff --git a/llvm/lib/Transforms/IPO/FunctionImport.cpp b/llvm/lib/Transforms/IPO/FunctionImport.cpp
index bc139e5a905bbc..b13770ed49d834 100644
--- a/llvm/lib/Transforms/IPO/FunctionImport.cpp
+++ b/llvm/lib/Transforms/IPO/FunctionImport.cpp
@@ -140,11 +140,12 @@ static cl::opt<bool>
     ImportAllIndex("import-all-index",
                    cl::desc("Import all external functions in index."));
 
-/// Test-only option.
+/// This is a test-only option.
+/// If this option is enabled, the ThinLTO indexing step will import each
+/// function declaration as a fallback. In a real build this may increase ram
+/// usage of the indexing step unnecessarily.
 /// TODO: Implement selective import (based on combined summary analysis) to
 /// ensure the imported function has a use case in the postlink pipeline.
-/// Import each function declaration as a fallback in real build may grow map
-/// size unnecessarily in the indexing step.
 static cl::opt<bool> ImportDeclaration(
     "import-declaration", cl::init(false), cl::Hidden,
     cl::desc("If true, import function declaration as fallback if the function "
@@ -1070,8 +1071,6 @@ void llvm::ComputeCrossModuleImport(
     const auto &DefinedGVSummaries =
         ModuleToDefinedGVSummaries.lookup(ELI.first);
     for (auto &[EI, Type] : ELI.second) {
-      assert(Type == GlobalValueSummary::Definition &&
-             "Declaration type doesn't exist yet");
       // Find the copy defined in the exporting module so that we can mark the
       // values it references in that specific definition as exported.
       // Below we will add all references and called values, without regard to
@@ -1716,8 +1715,13 @@ Expected<bool> FunctionImporter::importFunctions(
         continue;
       auto GUID = F.getGUID();
       auto ImportType = maybeGetImportType(ImportGUIDs, GUID);
-      if (!ImportType)
+
+      if (!ImportType) {
+        LLVM_DEBUG(dbgs() << "Not importing function " << GUID << " "
+                          << F.getName() << " from "
+                          << SrcModule->getSourceFileName() << "\n");
         continue;
+      }
       const bool ImportDefinition =
           *ImportType == GlobalValueSummary::Definition;
       LLVM_DEBUG(dbgs() << (ImportDefinition ? "Is" : "Not")
diff --git a/llvm/test/ThinLTO/X86/import_callee_declaration.ll b/llvm/test/ThinLTO/X86/import_callee_declaration.ll
new file mode 100644
index 00000000000000..344bde1a1a07d3
--- /dev/null
+++ b/llvm/test/ThinLTO/X86/import_callee_declaration.ll
@@ -0,0 +1,111 @@
+; RUN: rm -rf %t && split-file %s %t && cd %t
+
+; Generate per-module summaries.
+; RUN: opt -module-summary main.ll -o main.bc
+; RUN: opt -module-summary lib.ll -o lib.bc
+
+; Generate the combined summary using per-module summaries.
+; For function import, set 'import-instr-limit' to 5 and fall back to import
+; function declarations.
+;
+; RUN: llvm-lto2 run \
+; RUN:   -import-instr-limit=5 \
+; RUN:   -import-declaration \
+; RUN:   -thinlto-distributed-indexes \
+; RUN:   -thinlto-emit-imports \
+; RUN:   -r=main.bc,main,px \
+; RUN:   -r=main.bc,small_func, \
+; RUN:   -r=main.bc,large_func, \
+; RUN:   -r=lib.bc,callee,px \
+; RUN:   -r=lib.bc,large_indirect_callee,px \
+; RUN:   -r=lib.bc,small_func,px \
+; RUN:   -r=lib.bc,large_func,px \
+; RUN:   -r=lib.bc,calleeAddrs,px -o summary main.bc lib.bc
+
+; main.ll should import {large_func, large_indirect_callee} as declarations.
+; 
+; First disassemble per-module summary and find out the GUID for {large_func, large_indirect_callee}.
+;
+; RUN: llvm-dis lib.bc -o - | FileCheck %s --check-prefix=LIB-DIS
+; LIB-DIS: [[LIBMOD:\^[0-9]+]] = module: (path: "lib.bc", hash: (0, 0, 0, 0, 0))
+; LIB-DIS: [[LARGEFUNC:\^[0-9]+]] = gv: (name: "large_func", summaries: {{.*}}) ; guid = 2418497564662708935
+; LIB-DIS: [[LARGEINDIRECT:\^[0-9]+]] = gv: (name: "large_indirect_callee", summaries: {{.*}}) ; guid = 14343440786664691134
+;
+; Secondly disassemble main's combined summary and verify the import type of
+; these two GUIDs are declaration.
+;
+; RUN: llvm-dis main.bc.thinlto.bc -o - | FileCheck %s --check-prefix=MAIN-DIS
+;
+; MAIN-DIS: [[LIBMOD:\^[0-9]+]] = module: (path: "lib.bc", hash: (0, 0, 0, 0, 0))
+; MAIN-DIS: [[LARGEFUNC:\^[0-9]+]] = gv: (guid: 2418497564662708935, summaries: (function: (module: [[LIBMOD]], flags: ({{.*}} importType: declaration), insts: 6, {{.*}})))
+; MAIN-DIS: [[LARGEINDIRECT:\^[0-9]+]] = gv: (guid: 14343440786664691134, summaries: (function: (module: [[LIBMOD]], flags: ({{.*}} importType: declaration), insts: 7, {{.*}})))
+
+; TODO: Add test coverage for function alias.
+
+;--- main.ll
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define i32 @main() {
+  call void @small_func()
+  call void @large_func()
+  ret i32 0
+}
+
+declare void @small_func()
+
+; large_func without attributes
+declare void @large_func()
+
+;--- lib.ll
+source_filename = "lib.cc"
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+ at calleeAddrs = global [2 x ptr] [ptr @large_indirect_callee, ptr @small_indirect_callee]
+
+define void @callee() #1 {
+  ret void
+}
+
+define void @large_indirect_callee()#2 {
+  call void @callee()
+  call void @callee()
+  call void @callee()
+  call void @callee()
+  call void @callee()
+  call void @callee()
+  ret void
+}
+
+define internal void @small_indirect_callee() #0 {
+  ret void
+}
+
+define void @small_func() {
+entry:
+  %0 = load ptr, ptr @calleeAddrs
+  call void %0(), !prof !0
+  %1 = load ptr, ptr getelementptr inbounds ([2 x ptr], ptr @calleeAddrs, i64 0, i64 1)
+  call void %1(), !prof !1
+  ret void
+}
+
+define void @large_func() #0 {
+entry:
+  call void @callee()
+  call void @callee()
+  call void @callee()
+  call void @callee()
+  call void @callee()
+  ret void
+}
+
+attributes #0 = { nounwind norecurse }
+
+attributes #1 = { noinline }
+
+attributes #2 = { norecurse }
+
+!0 = !{!"VP", i32 0, i64 1, i64 14343440786664691134, i64 1}
+!1 = !{!"VP", i32 0, i64 2, i64 13568239288960714650, i64 1}
diff --git a/llvm/tools/llvm-link/llvm-link.cpp b/llvm/tools/llvm-link/llvm-link.cpp
index a6bee02b1302d3..3a201031351525 100644
--- a/llvm/tools/llvm-link/llvm-link.cpp
+++ b/llvm/tools/llvm-link/llvm-link.cpp
@@ -375,6 +375,10 @@ static bool importFunctions(const char *argv0, Module &DestModule) {
     if (Verbose)
       errs() << "Importing " << FunctionName << " from " << FileName << "\n";
 
+    // `-import` specifies the `<filename,function-name>` pairs to import as
+    // definition, so make the import type definition directly.
+    // FIXME: A follow-up patch should add test coverage for import declaration
+    // in `llvm-link` CLI (e.g., by introducing a new command line option).
     auto &Entry =
         ImportList[FileNameStringCache.insert(FileName).first->getKey()];
     Entry[F->getGUID()] = GlobalValueSummary::Definition;



More information about the llvm-branch-commits mailing list