[llvm] [ThinLTO] Use a set rather than a map to track exported ValueInfos. (PR #97360)

Mingming Liu via llvm-commits llvm-commits at lists.llvm.org
Wed Jul 3 09:43:26 PDT 2024


https://github.com/minglotus-6 updated https://github.com/llvm/llvm-project/pull/97360

>From 14deb75f8d82647260165ab791794d29778befa5 Mon Sep 17 00:00:00 2001
From: mingmingl <mingmingl at google.com>
Date: Mon, 1 Jul 2024 14:14:18 -0700
Subject: [PATCH 1/3] [ThinLTO] Track definitions only in export-set

---
 .../llvm/Transforms/IPO/FunctionImport.h      | 11 ++--
 llvm/lib/LTO/LTO.cpp                          | 14 +++--
 llvm/lib/Transforms/IPO/FunctionImport.cpp    | 52 +++++++++++--------
 3 files changed, 41 insertions(+), 36 deletions(-)

diff --git a/llvm/include/llvm/Transforms/IPO/FunctionImport.h b/llvm/include/llvm/Transforms/IPO/FunctionImport.h
index d8c142ec89d82..3b03ba82b9272 100644
--- a/llvm/include/llvm/Transforms/IPO/FunctionImport.h
+++ b/llvm/include/llvm/Transforms/IPO/FunctionImport.h
@@ -104,13 +104,10 @@ class FunctionImporter {
   /// index's module path string table).
   using ImportMapTy = DenseMap<StringRef, FunctionsToImportTy>;
 
-  /// The map contains an entry for every global value the module exports.
-  /// The key is ValueInfo, and the value indicates whether the definition
-  /// or declaration is visible to another module. If a function's definition is
-  /// visible to other modules, the global values this function referenced are
-  /// visible and shouldn't be internalized.
-  /// TODO: Rename to `ExportMapTy`.
-  using ExportSetTy = DenseMap<ValueInfo, GlobalValueSummary::ImportKind>;
+  /// The set contains an entry for every global value that the module exports.
+  /// Depending on the user context, this container is allowed to contain
+  /// definitions, declarations or a mix of both.
+  using ExportSetTy = DenseSet<ValueInfo>;
 
   /// A function of this type is used to load modules referenced by the index.
   using ModuleLoaderTy =
diff --git a/llvm/lib/LTO/LTO.cpp b/llvm/lib/LTO/LTO.cpp
index 6bbec535d8e98..5382b1158cb04 100644
--- a/llvm/lib/LTO/LTO.cpp
+++ b/llvm/lib/LTO/LTO.cpp
@@ -161,19 +161,17 @@ void llvm::computeLTOCacheKey(
   auto ModHash = Index.getModuleHash(ModuleID);
   Hasher.update(ArrayRef<uint8_t>((uint8_t *)&ModHash[0], sizeof(ModHash)));
 
-  std::vector<std::pair<uint64_t, uint8_t>> ExportsGUID;
+  // TODO: `ExportList` is determined by `ImportList`. Since `ImportList` is
+  // used to compute cache key, we could omit hashing `ExportList` here.
+  std::vector<uint64_t> ExportsGUID;
   ExportsGUID.reserve(ExportList.size());
-  for (const auto &[VI, ExportType] : ExportList)
-    ExportsGUID.push_back(
-        std::make_pair(VI.getGUID(), static_cast<uint8_t>(ExportType)));
+  for (const auto &VI : ExportList)
+    ExportsGUID.push_back(VI.getGUID());
 
   // Sort the export list elements GUIDs.
   llvm::sort(ExportsGUID);
-  for (auto [GUID, ExportType] : ExportsGUID) {
-    // The export list can impact the internalization, be conservative here
+  for (auto GUID : ExportsGUID)
     Hasher.update(ArrayRef<uint8_t>((uint8_t *)&GUID, sizeof(GUID)));
-    AddUint8(ExportType);
-  }
 
   // Include the hash for every module we import functions from. The set of
   // imported symbols for each module may affect code generation and is
diff --git a/llvm/lib/Transforms/IPO/FunctionImport.cpp b/llvm/lib/Transforms/IPO/FunctionImport.cpp
index ec5294b9512cf..f2e67aa998606 100644
--- a/llvm/lib/Transforms/IPO/FunctionImport.cpp
+++ b/llvm/lib/Transforms/IPO/FunctionImport.cpp
@@ -400,8 +400,7 @@ class GlobalsImporter final {
         // later, in ComputeCrossModuleImport, after import decisions are
         // complete, which is more efficient than adding them here.
         if (ExportLists)
-          (*ExportLists)[RefSummary->modulePath()][VI] =
-              GlobalValueSummary::Definition;
+          (*ExportLists)[RefSummary->modulePath()].insert(VI);
 
         // If variable is not writeonly we attempt to recursively analyze
         // its references in order to import referenced constants.
@@ -582,7 +581,7 @@ class WorkloadImportsManager : public ModuleImportsManager {
           GlobalValueSummary::Definition;
       GVI.onImportingSummary(*GVS);
       if (ExportLists)
-        (*ExportLists)[ExportingModule][VI] = GlobalValueSummary::Definition;
+        (*ExportLists)[ExportingModule].insert(VI);
     }
     LLVM_DEBUG(dbgs() << "[Workload] Done\n");
   }
@@ -818,10 +817,8 @@ static void computeImportForFunction(
           // Since definition takes precedence over declaration for the same VI,
           // try emplace <VI, declaration> pair without checking insert result.
           // If insert doesn't happen, there must be an existing entry keyed by
-          // VI.
-          if (ExportLists)
-            (*ExportLists)[DeclSourceModule].try_emplace(
-                VI, GlobalValueSummary::Declaration);
+          // VI. Note `ExportLists` only keeps track of definitions so VI won't
+          // be inserted.
           ImportList[DeclSourceModule].try_emplace(
               VI.getGUID(), GlobalValueSummary::Declaration);
         }
@@ -892,7 +889,7 @@ static void computeImportForFunction(
       // later, in ComputeCrossModuleImport, after import decisions are
       // complete, which is more efficient than adding them here.
       if (ExportLists)
-        (*ExportLists)[ExportModulePath][VI] = GlobalValueSummary::Definition;
+        (*ExportLists)[ExportModulePath].insert(VI);
     }
 
     auto GetAdjustedThreshold = [](unsigned Threshold, bool IsHotCallsite) {
@@ -998,14 +995,29 @@ static bool isGlobalVarSummary(const ModuleSummaryIndex &Index,
   return false;
 }
 
-template <class T>
-static unsigned numGlobalVarSummaries(const ModuleSummaryIndex &Index, T &Cont,
+static unsigned numGlobalVarSummaries(const ModuleSummaryIndex &Index,
+                                      FunctionImporter::ExportSetTy &ExportSet,
                                       unsigned &DefinedGVS,
                                       unsigned &DefinedFS) {
+  DefinedGVS = 0;
+  DefinedFS = 0;
+  for (auto &VI : ExportSet) {
+    if (isGlobalVarSummary(Index, VI.getGUID())) {
+      ++DefinedGVS;
+    } else
+      ++DefinedFS;
+  }
+  return DefinedGVS;
+}
+
+static unsigned
+numGlobalVarSummaries(const ModuleSummaryIndex &Index,
+                      FunctionImporter::FunctionsToImportTy &ImportMap,
+                      unsigned &DefinedGVS, unsigned &DefinedFS) {
   unsigned NumGVS = 0;
   DefinedGVS = 0;
   DefinedFS = 0;
-  for (auto &[GUID, Type] : Cont) {
+  for (auto &[GUID, Type] : ImportMap) {
     if (isGlobalVarSummary(Index, GUID)) {
       if (Type == GlobalValueSummary::Definition)
         ++DefinedGVS;
@@ -1046,7 +1058,7 @@ static bool checkVariableImport(
   };
 
   for (auto &ExportPerModule : ExportLists)
-    for (auto &[VI, Unused] : ExportPerModule.second)
+    for (auto &VI : ExportPerModule.second)
       if (!FlattenedImports.count(VI.getGUID()) &&
           IsReadOrWriteOnlyVarNeedingImporting(ExportPerModule.first, VI))
         return false;
@@ -1079,14 +1091,12 @@ void llvm::ComputeCrossModuleImport(
   // since we may import the same values multiple times into different modules
   // during the import computation.
   for (auto &ELI : ExportLists) {
+    // `NewExports` tracks the VI that gets exported because the full definition
+    // of its user/referencer gets exported.
     FunctionImporter::ExportSetTy NewExports;
     const auto &DefinedGVSummaries =
         ModuleToDefinedGVSummaries.lookup(ELI.first);
-    for (auto &[EI, Type] : ELI.second) {
-      // If a variable is exported as a declaration, its 'refs' and 'calls' are
-      // not further exported.
-      if (Type == GlobalValueSummary::Declaration)
-        continue;
+    for (auto &EI : ELI.second) {
       // Find the copy defined in the exporting module so that we can mark the
       // values it references in that specific definition as exported.
       // Below we will add all references and called values, without regard to
@@ -1108,19 +1118,19 @@ void llvm::ComputeCrossModuleImport(
           for (const auto &VI : GVS->refs()) {
             // Try to emplace the declaration entry. If a definition entry
             // already exists for key `VI`, this is a no-op.
-            NewExports.try_emplace(VI, GlobalValueSummary::Declaration);
+            NewExports.insert(VI);
           }
       } else {
         auto *FS = cast<FunctionSummary>(S);
         for (const auto &Edge : FS->calls()) {
           // Try to emplace the declaration entry. If a definition entry
           // already exists for key `VI`, this is a no-op.
-          NewExports.try_emplace(Edge.first, GlobalValueSummary::Declaration);
+          NewExports.insert(Edge.first);
         }
         for (const auto &Ref : FS->refs()) {
           // Try to emplace the declaration entry. If a definition entry
           // already exists for key `VI`, this is a no-op.
-          NewExports.try_emplace(Ref, GlobalValueSummary::Declaration);
+          NewExports.insert(Ref);
         }
       }
     }
@@ -1129,7 +1139,7 @@ void llvm::ComputeCrossModuleImport(
     // the same ref/call target multiple times in above loop, and it is more
     // efficient to avoid a set lookup each time.
     for (auto EI = NewExports.begin(); EI != NewExports.end();) {
-      if (!DefinedGVSummaries.count(EI->first.getGUID()))
+      if (!DefinedGVSummaries.count(EI->getGUID()))
         NewExports.erase(EI++);
       else
         ++EI;

>From db822c99d0ed5da097e6df4e403f8b9652b1e206 Mon Sep 17 00:00:00 2001
From: mingmingl <mingmingl at google.com>
Date: Tue, 2 Jul 2024 22:16:47 -0700
Subject: [PATCH 2/3] resolve feedback

---
 llvm/lib/Transforms/IPO/FunctionImport.cpp    | 78 ++++++++-----------
 llvm/test/ThinLTO/X86/funcimport-stats.ll     |  2 +-
 .../Transforms/FunctionImport/funcimport.ll   |  2 +-
 3 files changed, 34 insertions(+), 48 deletions(-)

diff --git a/llvm/lib/Transforms/IPO/FunctionImport.cpp b/llvm/lib/Transforms/IPO/FunctionImport.cpp
index f2e67aa998606..b36fd210b28cd 100644
--- a/llvm/lib/Transforms/IPO/FunctionImport.cpp
+++ b/llvm/lib/Transforms/IPO/FunctionImport.cpp
@@ -817,8 +817,8 @@ static void computeImportForFunction(
           // Since definition takes precedence over declaration for the same VI,
           // try emplace <VI, declaration> pair without checking insert result.
           // If insert doesn't happen, there must be an existing entry keyed by
-          // VI. Note `ExportLists` only keeps track of definitions so VI won't
-          // be inserted.
+          // VI. Note `ExportLists` only keeps track of exports due to imported
+          // definitions.
           ImportList[DeclSourceModule].try_emplace(
               VI.getGUID(), GlobalValueSummary::Declaration);
         }
@@ -995,34 +995,36 @@ static bool isGlobalVarSummary(const ModuleSummaryIndex &Index,
   return false;
 }
 
+// Return the number of global summaries and record the number of function
+// summaries as output parameter.
 static unsigned numGlobalVarSummaries(const ModuleSummaryIndex &Index,
                                       FunctionImporter::ExportSetTy &ExportSet,
-                                      unsigned &DefinedGVS,
                                       unsigned &DefinedFS) {
-  DefinedGVS = 0;
+  unsigned NumGVS = 0;
   DefinedFS = 0;
   for (auto &VI : ExportSet) {
-    if (isGlobalVarSummary(Index, VI.getGUID())) {
-      ++DefinedGVS;
-    } else
+    if (isGlobalVarSummary(Index, VI.getGUID()))
+      ++NumGVS;
+    else
       ++DefinedFS;
   }
-  return DefinedGVS;
+  return NumGVS;
 }
 
+// Return the number of global summaries and record the number of function
+// summaries as output parameter. This is the same as `numGlobalVarSummaries`
+// except that it takes `FunctionImporter::FunctionsToImportTy` as input
+// parameter.
 static unsigned
 numGlobalVarSummaries(const ModuleSummaryIndex &Index,
                       FunctionImporter::FunctionsToImportTy &ImportMap,
-                      unsigned &DefinedGVS, unsigned &DefinedFS) {
+                      unsigned &DefinedFS) {
   unsigned NumGVS = 0;
-  DefinedGVS = 0;
   DefinedFS = 0;
   for (auto &[GUID, Type] : ImportMap) {
-    if (isGlobalVarSummary(Index, GUID)) {
-      if (Type == GlobalValueSummary::Definition)
-        ++DefinedGVS;
+    if (isGlobalVarSummary(Index, GUID))
       ++NumGVS;
-    } else if (Type == GlobalValueSummary::Definition)
+    else if (Type == GlobalValueSummary::Definition)
       ++DefinedFS;
   }
   return NumGVS;
@@ -1115,23 +1117,14 @@ void llvm::ComputeCrossModuleImport(
         // we convert such variables initializers to "zeroinitializer".
         // See processGlobalForThinLTO.
         if (!Index.isWriteOnly(GVS))
-          for (const auto &VI : GVS->refs()) {
-            // Try to emplace the declaration entry. If a definition entry
-            // already exists for key `VI`, this is a no-op.
+          for (const auto &VI : GVS->refs())
             NewExports.insert(VI);
-          }
       } else {
         auto *FS = cast<FunctionSummary>(S);
-        for (const auto &Edge : FS->calls()) {
-          // Try to emplace the declaration entry. If a definition entry
-          // already exists for key `VI`, this is a no-op.
+        for (const auto &Edge : FS->calls())
           NewExports.insert(Edge.first);
-        }
-        for (const auto &Ref : FS->refs()) {
-          // Try to emplace the declaration entry. If a definition entry
-          // already exists for key `VI`, this is a no-op.
+        for (const auto &Ref : FS->refs())
           NewExports.insert(Ref);
-        }
       }
     }
     // Prune list computed above to only include values defined in the
@@ -1154,29 +1147,25 @@ void llvm::ComputeCrossModuleImport(
   for (auto &ModuleImports : ImportLists) {
     auto ModName = ModuleImports.first;
     auto &Exports = ExportLists[ModName];
-    unsigned DefinedGVS = 0, DefinedFS = 0;
-    unsigned NumGVS =
-        numGlobalVarSummaries(Index, Exports, DefinedGVS, DefinedFS);
+    unsigned DefinedFS = 0;
+    unsigned NumGVS = numGlobalVarSummaries(Index, Exports, DefinedFS);
     LLVM_DEBUG(dbgs() << "* Module " << ModName << " exports " << DefinedFS
                       << " function as definitions, "
                       << Exports.size() - NumGVS - DefinedFS
-                      << " functions as declarations, " << DefinedGVS
-                      << " var definitions and " << NumGVS - DefinedGVS
-                      << " var declarations. Imports from "
-                      << ModuleImports.second.size() << " modules.\n");
+                      << " functions as declarations and " << NumGVS
+                      << " vars. Imports from " << ModuleImports.second.size()
+                      << " modules.\n");
     for (auto &Src : ModuleImports.second) {
       auto SrcModName = Src.first;
-      unsigned DefinedGVS = 0, DefinedFS = 0;
+      unsigned DefinedFS = 0;
       unsigned NumGVSPerMod =
-          numGlobalVarSummaries(Index, Src.second, DefinedGVS, DefinedFS);
+          numGlobalVarSummaries(Index, Src.second, DefinedFS);
       LLVM_DEBUG(dbgs() << " - " << DefinedFS << " function definitions and "
                         << Src.second.size() - NumGVSPerMod - DefinedFS
                         << " function declarations imported from " << SrcModName
                         << "\n");
-      LLVM_DEBUG(dbgs() << " - " << DefinedGVS << " global vars definition and "
-                        << NumGVSPerMod - DefinedGVS
-                        << " global vars declaration imported from "
-                        << SrcModName << "\n");
+      LLVM_DEBUG(dbgs() << " - " << NumGVSPerMod
+                        << " global vars imported from " << SrcModName << "\n");
     }
   }
 #endif
@@ -1190,17 +1179,14 @@ static void dumpImportListForModule(const ModuleSummaryIndex &Index,
                     << ImportList.size() << " modules.\n");
   for (auto &Src : ImportList) {
     auto SrcModName = Src.first;
-    unsigned DefinedGVS = 0, DefinedFS = 0;
-    unsigned NumGVSPerMod =
-        numGlobalVarSummaries(Index, Src.second, DefinedGVS, DefinedFS);
+    unsigned DefinedFS = 0;
+    unsigned NumGVSPerMod = numGlobalVarSummaries(Index, Src.second, DefinedFS);
     LLVM_DEBUG(dbgs() << " - " << DefinedFS << " function definitions and "
                       << Src.second.size() - DefinedFS - NumGVSPerMod
                       << " function declarations imported from " << SrcModName
                       << "\n");
-    LLVM_DEBUG(dbgs() << " - " << DefinedGVS << " var definitions and "
-                      << NumGVSPerMod - DefinedGVS
-                      << " var declarations imported from " << SrcModName
-                      << "\n");
+    LLVM_DEBUG(dbgs() << " - " << NumGVSPerMod << " vars imported from "
+                      << SrcModName << "\n");
   }
 }
 #endif
diff --git a/llvm/test/ThinLTO/X86/funcimport-stats.ll b/llvm/test/ThinLTO/X86/funcimport-stats.ll
index 7fcd33855fe1a..1c2fd092ccb49 100644
--- a/llvm/test/ThinLTO/X86/funcimport-stats.ll
+++ b/llvm/test/ThinLTO/X86/funcimport-stats.ll
@@ -10,7 +10,7 @@
 ; RUN: cat %t4 | FileCheck %s
 
 ; CHECK:      - [[NUM_FUNCS:[0-9]+]] function definitions and 0 function declarations imported from
-; CHECK-NEXT: - [[NUM_VARS:[0-9]+]] global vars definition and 0 global vars declaration imported from
+; CHECK-NEXT: - [[NUM_VARS:[0-9]+]] global vars imported from
 
 ; CHECK:      [[NUM_FUNCS]] function-import - Number of functions imported in backend
 ; CHECK-NEXT: [[NUM_FUNCS]] function-import - Number of functions thin link decided to import
diff --git a/llvm/test/Transforms/FunctionImport/funcimport.ll b/llvm/test/Transforms/FunctionImport/funcimport.ll
index 635750b33fff0..8f7e8340d4909 100644
--- a/llvm/test/Transforms/FunctionImport/funcimport.ll
+++ b/llvm/test/Transforms/FunctionImport/funcimport.ll
@@ -167,7 +167,7 @@ declare void @variadic_va_start(...)
 
 ; DUMP:       Module [[M1:.*]] imports from 1 module
 ; DUMP-NEXT:  15 function definitions and 0 function declarations imported from [[M2:.*]]
-; DUMP-NEXT:  4 var definitions and 0 var declarations imported from [[M2]]
+; DUMP-NEXT:  4 vars imported from [[M2]]
 
 ; DUMP:       Imported 15 functions for Module [[M1]]
 ; DUMP-NEXT:  Imported 4 global variables for Module [[M1]]

>From 805923adc45e086c8bbac87d07c77ec8f6b83c7e Mon Sep 17 00:00:00 2001
From: mingmingl <mingmingl at google.com>
Date: Wed, 3 Jul 2024 09:42:20 -0700
Subject: [PATCH 3/3] simplify numGlobalVarSummaries

---
 llvm/lib/Transforms/IPO/FunctionImport.cpp | 30 ++++++++--------------
 1 file changed, 10 insertions(+), 20 deletions(-)

diff --git a/llvm/lib/Transforms/IPO/FunctionImport.cpp b/llvm/lib/Transforms/IPO/FunctionImport.cpp
index b36fd210b28cd..288ee313bf10c 100644
--- a/llvm/lib/Transforms/IPO/FunctionImport.cpp
+++ b/llvm/lib/Transforms/IPO/FunctionImport.cpp
@@ -995,26 +995,19 @@ static bool isGlobalVarSummary(const ModuleSummaryIndex &Index,
   return false;
 }
 
-// Return the number of global summaries and record the number of function
-// summaries as output parameter.
-static unsigned numGlobalVarSummaries(const ModuleSummaryIndex &Index,
-                                      FunctionImporter::ExportSetTy &ExportSet,
-                                      unsigned &DefinedFS) {
+// Return the number of global summaries in ExportSet.
+static unsigned
+numGlobalVarSummaries(const ModuleSummaryIndex &Index,
+                      FunctionImporter::ExportSetTy &ExportSet) {
   unsigned NumGVS = 0;
-  DefinedFS = 0;
-  for (auto &VI : ExportSet) {
+  for (auto &VI : ExportSet)
     if (isGlobalVarSummary(Index, VI.getGUID()))
       ++NumGVS;
-    else
-      ++DefinedFS;
-  }
   return NumGVS;
 }
 
-// Return the number of global summaries and record the number of function
-// summaries as output parameter. This is the same as `numGlobalVarSummaries`
-// except that it takes `FunctionImporter::FunctionsToImportTy` as input
-// parameter.
+// Given ImportMap, return the number of global summaries and record the number
+// of defined function summaries as output parameter.
 static unsigned
 numGlobalVarSummaries(const ModuleSummaryIndex &Index,
                       FunctionImporter::FunctionsToImportTy &ImportMap,
@@ -1147,12 +1140,9 @@ void llvm::ComputeCrossModuleImport(
   for (auto &ModuleImports : ImportLists) {
     auto ModName = ModuleImports.first;
     auto &Exports = ExportLists[ModName];
-    unsigned DefinedFS = 0;
-    unsigned NumGVS = numGlobalVarSummaries(Index, Exports, DefinedFS);
-    LLVM_DEBUG(dbgs() << "* Module " << ModName << " exports " << DefinedFS
-                      << " function as definitions, "
-                      << Exports.size() - NumGVS - DefinedFS
-                      << " functions as declarations and " << NumGVS
+    unsigned NumGVS = numGlobalVarSummaries(Index, Exports);
+    LLVM_DEBUG(dbgs() << "* Module " << ModName << " exports "
+                      << Exports.size() - NumGVS << " functions and " << NumGVS
                       << " vars. Imports from " << ModuleImports.second.size()
                       << " modules.\n");
     for (auto &Src : ModuleImports.second) {



More information about the llvm-commits mailing list