[llvm] bacb45e - [ThinLTO] Make the cache key independent of the module identifier paths

Argyrios Kyrtzidis via llvm-commits llvm-commits at lists.llvm.org
Mon May 22 21:45:34 PDT 2023


Author: Argyrios Kyrtzidis
Date: 2023-05-22T21:41:01-07:00
New Revision: bacb45eb9a776c032f69d55a561fa450ba545c57

URL: https://github.com/llvm/llvm-project/commit/bacb45eb9a776c032f69d55a561fa450ba545c57
DIFF: https://github.com/llvm/llvm-project/commit/bacb45eb9a776c032f69d55a561fa450ba545c57.diff

LOG: [ThinLTO] Make the cache key independent of the module identifier paths

Otherwise there are cache misses just from changing the name of a path, even though the input modules did not change.

rdar://109672225

Differential Revision: https://reviews.llvm.org/D151165

Added: 
    llvm/test/ThinLTO/X86/cache-decoupled-from-filenames.ll

Modified: 
    llvm/include/llvm/IR/ModuleSummaryIndex.h
    llvm/lib/LTO/LTO.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/IR/ModuleSummaryIndex.h b/llvm/include/llvm/IR/ModuleSummaryIndex.h
index 6c6ed170cb049..64cb2b5a01643 100644
--- a/llvm/include/llvm/IR/ModuleSummaryIndex.h
+++ b/llvm/include/llvm/IR/ModuleSummaryIndex.h
@@ -1728,6 +1728,13 @@ class ModuleSummaryIndex {
     return &*It;
   }
 
+  /// Return module entry for module with the given \p ModPath.
+  const ModuleInfo *getModule(StringRef ModPath) const {
+    auto It = ModulePathStringTable.find(ModPath);
+    assert(It != ModulePathStringTable.end() && "Module not registered");
+    return &*It;
+  }
+
   /// Check if the given Module has any functions available for exporting
   /// in the index. We consider any module present in the ModulePathStringTable
   /// to have exported functions.

diff  --git a/llvm/lib/LTO/LTO.cpp b/llvm/lib/LTO/LTO.cpp
index de9b8f1da62be..2f52a20e5b8b4 100644
--- a/llvm/lib/LTO/LTO.cpp
+++ b/llvm/lib/LTO/LTO.cpp
@@ -174,22 +174,38 @@ void llvm::computeLTOCacheKey(
   // imported symbols for each module may affect code generation and is
   // sensitive to link order, so include that as well.
   using ImportMapIteratorTy = FunctionImporter::ImportMapTy::const_iterator;
-  std::vector<ImportMapIteratorTy> ImportModulesVector;
+  struct ImportModule {
+    ImportMapIteratorTy ModIt;
+    const ModuleSummaryIndex::ModuleInfo *ModInfo;
+
+    StringRef getIdentifier() const { return ModIt->getKey(); }
+    const FunctionImporter::FunctionsToImportTy &getFunctions() const {
+      return ModIt->second;
+    }
+
+    const ModuleHash &getHash() const { return ModInfo->second.second; }
+    uint64_t getId() const { return ModInfo->second.first; }
+  };
+
+  std::vector<ImportModule> ImportModulesVector;
   ImportModulesVector.reserve(ImportList.size());
 
   for (ImportMapIteratorTy It = ImportList.begin(); It != ImportList.end();
        ++It) {
-    ImportModulesVector.push_back(It);
+    ImportModulesVector.push_back({It, Index.getModule(It->getKey())});
   }
+  // Order using moduleId integer which is based on the order the module was
+  // added.
   llvm::sort(ImportModulesVector,
-             [](const ImportMapIteratorTy &Lhs, const ImportMapIteratorTy &Rhs)
-                 -> bool { return Lhs->getKey() < Rhs->getKey(); });
-  for (const ImportMapIteratorTy &EntryIt : ImportModulesVector) {
-    auto ModHash = Index.getModuleHash(EntryIt->first());
+             [](const ImportModule &Lhs, const ImportModule &Rhs) -> bool {
+               return Lhs.getId() < Rhs.getId();
+             });
+  for (const ImportModule &Entry : ImportModulesVector) {
+    auto ModHash = Entry.getHash();
     Hasher.update(ArrayRef<uint8_t>((uint8_t *)&ModHash[0], sizeof(ModHash)));
 
-    AddUint64(EntryIt->second.size());
-    for (auto &Fn : EntryIt->second)
+    AddUint64(Entry.getFunctions().size());
+    for (auto &Fn : Entry.getFunctions())
       AddUint64(Fn);
   }
 
@@ -259,9 +275,10 @@ void llvm::computeLTOCacheKey(
 
   // Imported functions may introduce new uses of type identifier resolutions,
   // so we need to collect their used resolutions as well.
-  for (auto &ImpM : ImportList)
-    for (auto &ImpF : ImpM.second) {
-      GlobalValueSummary *S = Index.findSummaryInModule(ImpF, ImpM.first());
+  for (const ImportModule &ImpM : ImportModulesVector)
+    for (auto &ImpF : ImpM.getFunctions()) {
+      GlobalValueSummary *S =
+          Index.findSummaryInModule(ImpF, ImpM.getIdentifier());
       AddUsedThings(S);
       // If this is an alias, we also care about any types/etc. that the aliasee
       // may reference.

diff  --git a/llvm/test/ThinLTO/X86/cache-decoupled-from-filenames.ll b/llvm/test/ThinLTO/X86/cache-decoupled-from-filenames.ll
new file mode 100644
index 0000000000000..3b5b75e876de9
--- /dev/null
+++ b/llvm/test/ThinLTO/X86/cache-decoupled-from-filenames.ll
@@ -0,0 +1,27 @@
+; RUN: rm -rf %t && mkdir -p %t/1 %t/2 %t/3 %t/4
+; RUN: opt -module-hash -module-summary %s -o %t/t.bc
+; RUN: opt -module-hash -module-summary %S/Inputs/cache-import-lists1.ll -o %t/1/a.bc
+; RUN: opt -module-hash -module-summary %S/Inputs/cache-import-lists2.ll -o %t/2/b.bc
+
+; Tests that the hash for t is insensitive to the bitcode module filenames.
+
+; RUN: rm -rf %t/cache
+; RUN: llvm-lto2 run -cache-dir %t/cache -o %t.o %t/t.bc %t/1/a.bc %t/2/b.bc -r=%t/t.bc,main,plx -r=%t/t.bc,f1,lx -r=%t/t.bc,f2,lx -r=%t/1/a.bc,f1,plx -r=%t/1/a.bc,linkonce_odr,plx -r=%t/2/b.bc,f2,plx -r=%t/2/b.bc,linkonce_odr,lx
+; RUN: ls %t/cache | count 3
+
+; RUN: cp %t/1/a.bc %t/4/d.bc
+; RUN: cp %t/2/b.bc %t/3/k.bc
+; RUN: llvm-lto2 run -cache-dir %t/cache -o %t.o %t/t.bc %t/4/d.bc %t/3/k.bc -r=%t/t.bc,main,plx -r=%t/t.bc,f1,lx -r=%t/t.bc,f2,lx -r=%t/4/d.bc,f1,plx -r=%t/4/d.bc,linkonce_odr,plx -r=%t/3/k.bc,f2,plx -r=%t/3/k.bc,linkonce_odr,lx
+; RUN: ls %t/cache | count 3
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @main() {
+  call void @f1()
+  call void @f2()
+  ret void
+}
+
+declare void @f1()
+declare void @f2()


        


More information about the llvm-commits mailing list