[llvm] r266959 - ThinLTO: add module caching handling.

Mehdi Amini via llvm-commits llvm-commits at lists.llvm.org
Wed Apr 20 22:54:23 PDT 2016


Author: mehdi_amini
Date: Thu Apr 21 00:54:23 2016
New Revision: 266959

URL: http://llvm.org/viewvc/llvm-project?rev=266959&view=rev
Log:
ThinLTO: add module caching handling.

Differential Revision: http://reviews.llvm.org/D18494

From: Mehdi Amini <mehdi.amini at apple.com>

Modified:
    llvm/trunk/include/llvm/ADT/StringExtras.h
    llvm/trunk/include/llvm/Support/SHA1.h
    llvm/trunk/lib/LTO/ThinLTOCodeGenerator.cpp

Modified: llvm/trunk/include/llvm/ADT/StringExtras.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/ADT/StringExtras.h?rev=266959&r1=266958&r2=266959&view=diff
==============================================================================
--- llvm/trunk/include/llvm/ADT/StringExtras.h (original)
+++ llvm/trunk/include/llvm/ADT/StringExtras.h Thu Apr 21 00:54:23 2016
@@ -59,6 +59,22 @@ static inline std::string utohexstr(uint
   return std::string(BufPtr, std::end(Buffer));
 }
 
+/// Convert buffer \p Input to its hexadecimal representation.
+/// The returned string is double the size of \p Input.
+static inline std::string toHex(StringRef Input) {
+  static const char *const LUT = "0123456789ABCDEF";
+  size_t Length = Input.size();
+
+  std::string Output;
+  Output.reserve(2 * Length);
+  for (size_t i = 0; i < Length; ++i) {
+    const unsigned char c = Input[i];
+    Output.push_back(LUT[c >> 4]);
+    Output.push_back(LUT[c & 15]);
+  }
+  return Output;
+}
+
 static inline std::string utostr(uint64_t X, bool isNeg = false) {
   char Buffer[21];
   char *BufPtr = std::end(Buffer);

Modified: llvm/trunk/include/llvm/Support/SHA1.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Support/SHA1.h?rev=266959&r1=266958&r2=266959&view=diff
==============================================================================
--- llvm/trunk/include/llvm/Support/SHA1.h (original)
+++ llvm/trunk/include/llvm/Support/SHA1.h Thu Apr 21 00:54:23 2016
@@ -16,6 +16,8 @@
 #ifndef LLVM_SUPPORT_SHA1_H
 #define LLVM_SUPPORT_SHA1_H
 
+#include "llvm/ADT/ArrayRef.h"
+
 #include <cstdint>
 
 namespace llvm {
@@ -33,6 +35,11 @@ public:
   /// Digest more data.
   void update(ArrayRef<uint8_t> Data);
 
+  /// Digest more data.
+  void update(StringRef Str) {
+    update(ArrayRef<uint8_t>((uint8_t *)Str.data(), Str.size()));
+  }
+
   /// Return a reference to the current raw 160-bits SHA1 for the digested data
   /// since the last call to init(). This call will add data to the internal
   /// state and as such is not suited for getting an intermediate result

Modified: llvm/trunk/lib/LTO/ThinLTOCodeGenerator.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/LTO/ThinLTOCodeGenerator.cpp?rev=266959&r1=266958&r2=266959&view=diff
==============================================================================
--- llvm/trunk/lib/LTO/ThinLTOCodeGenerator.cpp (original)
+++ llvm/trunk/lib/LTO/ThinLTOCodeGenerator.cpp Thu Apr 21 00:54:23 2016
@@ -14,6 +14,9 @@
 
 #include "llvm/LTO/ThinLTOCodeGenerator.h"
 
+#ifdef HAVE_LLVM_REVISION
+#include "LLVMLTORevision.h"
+#endif
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/Analysis/ModuleSummaryAnalysis.h"
@@ -31,6 +34,10 @@
 #include "llvm/MC/SubtargetFeature.h"
 #include "llvm/Object/ModuleSummaryIndexObjectFile.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/CachePruning.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/SHA1.h"
 #include "llvm/Support/SourceMgr.h"
 #include "llvm/Support/TargetRegistry.h"
 #include "llvm/Support/ThreadPool.h"
@@ -176,7 +183,7 @@ static void ResolveODR(
     const FunctionImporter::ExportSetTy &ExportList,
     const std::map<GlobalValue::GUID, GlobalValueSummary *> &DefinedGlobals,
     StringRef ModuleIdentifier,
-    DenseMap<GlobalValue::GUID, GlobalValue::LinkageTypes> &ResolvedODR) {
+    std::map<GlobalValue::GUID, GlobalValue::LinkageTypes> &ResolvedODR) {
   if (Index.modulePaths().size() == 1)
     // Nothing to do if we don't have multiple modules
     return;
@@ -204,7 +211,7 @@ static void ResolveODR(
 /// Fixup linkage, see ResolveODR() above.
 void fixupODR(
     Module &TheModule,
-    const DenseMap<GlobalValue::GUID, GlobalValue::LinkageTypes> &ResolvedODR) {
+    const std::map<GlobalValue::GUID, GlobalValue::LinkageTypes> &ResolvedODR) {
   // Process functions and global now
   for (auto &GV : TheModule) {
     auto NewLinkage = ResolvedODR.find(GV.getGUID());
@@ -326,11 +333,103 @@ std::unique_ptr<MemoryBuffer> codegenMod
   return make_unique<ObjectMemoryBuffer>(std::move(OutputBuffer));
 }
 
+/// Manage caching for a single Module.
+class ModuleCacheEntry {
+  SmallString<128> EntryPath;
+
+public:
+  // Create a cache entry. This compute a unique hash for the Module considering
+  // the current list of export/import, and offer an interface to query to
+  // access the content in the cache.
+  ModuleCacheEntry(
+      StringRef CachePath, const ModuleSummaryIndex &Index, StringRef ModuleID,
+      const FunctionImporter::ImportMapTy &ImportList,
+      const FunctionImporter::ExportSetTy &ExportList,
+      const std::map<GlobalValue::GUID, GlobalValue::LinkageTypes> &ResolvedODR,
+      const std::map<GlobalValue::GUID, GlobalValueSummary *> &DefinedFunctions,
+      const DenseSet<GlobalValue::GUID> &PreservedSymbols) {
+    if (CachePath.empty())
+      return;
+
+    // Compute the unique hash for this entry
+    // This is based on the current compiler version, the module itself, the
+    // export list, the hash for every single module in the import list, the
+    // list of ResolvedODR for the module, and the list of preserved symbols.
+
+    SHA1 Hasher;
+
+    // Start with the compiler revision
+    Hasher.update(LLVM_VERSION_STRING);
+#ifdef HAVE_LLVM_REVISION
+    Hasher.update(LLVM_REVISION);
+#endif
+
+    // Include the hash for the current module
+    auto ModHash = Index.getModuleHash(ModuleID);
+    Hasher.update(ArrayRef<uint8_t>((uint8_t *)&ModHash[0], sizeof(ModHash)));
+    for (auto F : ExportList)
+      // The export list can impact the internalization, be conservative here
+      Hasher.update(ArrayRef<uint8_t>((uint8_t *)&F, sizeof(F)));
+
+    // Include the hash for every module we import functions from
+    for (auto &Entry : ImportList) {
+      auto ModHash = Index.getModuleHash(Entry.first());
+      Hasher.update(ArrayRef<uint8_t>((uint8_t *)&ModHash[0], sizeof(ModHash)));
+    }
+
+    // Include the hash for the resolved ODR.
+    for (auto &Entry : ResolvedODR) {
+      Hasher.update(ArrayRef<uint8_t>((uint8_t *)&Entry.first,
+                                      sizeof(GlobalValue::GUID)));
+      Hasher.update(ArrayRef<uint8_t>((uint8_t *)&Entry.second,
+                                      sizeof(GlobalValue::LinkageTypes)));
+    }
+
+    // Include the hash for the preserved symbols.
+    for (auto &Entry : PreservedSymbols) {
+      if (DefinedFunctions.count(Entry))
+        Hasher.update(
+            ArrayRef<uint8_t>((uint8_t *)&Entry, sizeof(GlobalValue::GUID)));
+    }
+
+    sys::path::append(EntryPath, CachePath, toHex(Hasher.result()));
+  }
+
+  // Try loading the buffer for this cache entry.
+  ErrorOr<std::unique_ptr<MemoryBuffer>> tryLoadingBuffer() {
+    if (EntryPath.empty())
+      return std::error_code();
+    return MemoryBuffer::getFile(EntryPath);
+  }
+
+  // Cache the Produced object file
+  void write(MemoryBufferRef OutputBuffer) {
+    if (EntryPath.empty())
+      return;
+
+    // Write to a temporary to avoid race condition
+    SmallString<128> TempFilename;
+    int TempFD;
+    std::error_code EC =
+        sys::fs::createTemporaryFile("Thin", "tmp.o", TempFD, TempFilename);
+    if (EC) {
+      errs() << "Error: " << EC.message() << "\n";
+      report_fatal_error("ThinLTO: Can't get a temporary file");
+    }
+    {
+      raw_fd_ostream OS(TempFD, /* ShouldClose */ true);
+      OS << OutputBuffer.getBuffer();
+    }
+    // Rename to final destination (hopefully race condition won't matter here)
+    sys::fs::rename(TempFilename, EntryPath);
+  }
+};
+
 static std::unique_ptr<MemoryBuffer> ProcessThinLTOModule(
     Module &TheModule, const ModuleSummaryIndex &Index,
     StringMap<MemoryBufferRef> &ModuleMap, TargetMachine &TM,
     const FunctionImporter::ImportMapTy &ImportList,
-    DenseMap<GlobalValue::GUID, GlobalValue::LinkageTypes> &ResolvedODR,
+    std::map<GlobalValue::GUID, GlobalValue::LinkageTypes> &ResolvedODR,
     ThinLTOCodeGenerator::CachingOptions CacheOptions, bool DisableCodeGen,
     StringRef SaveTempsDir, unsigned count) {
 
@@ -487,7 +586,9 @@ void ThinLTOCodeGenerator::promote(Modul
   // Resolve the LinkOnceODR, trying to turn them into "available_externally"
   // where possible.
   // This is a compile-time optimization.
-  DenseMap<GlobalValue::GUID, GlobalValue::LinkageTypes> ResolvedODR;
+  // We use a std::map here to be able to have a defined ordering when
+  // producing a hash for the cache entry.
+  std::map<GlobalValue::GUID, GlobalValue::LinkageTypes> ResolvedODR;
   ResolveODR(Index, ExportList, ModuleToDefinedGVSummaries[ModuleIdentifier],
              ModuleIdentifier, ResolvedODR);
   fixupODR(TheModule, ResolvedODR);
@@ -592,23 +693,50 @@ void ThinLTOCodeGenerator::run() {
   ComputeCrossModuleImport(*Index, ModuleToDefinedGVSummaries, ImportLists,
                            ExportLists);
 
+  // Convert the preserved symbols set from string to GUID, this is needed for
+  // computing the caching.
+  DenseSet<GlobalValue::GUID> GUIDPreservedSymbols(PreservedSymbols.size());
+  for (auto &Entry : PreservedSymbols)
+    GUIDPreservedSymbols.insert(GlobalValue::getGUID(Entry.first()));
+
   // Parallel optimizer + codegen
   {
     ThreadPool Pool(ThreadCount);
     int count = 0;
     for (auto &ModuleBuffer : Modules) {
       Pool.async([&](int count) {
-        LLVMContext Context;
-        Context.setDiscardValueNames(LTODiscardValueNames);
-        Context.enableDebugTypeODRUniquing();
         auto ModuleIdentifier = ModuleBuffer.getBufferIdentifier();
         auto &ExportList = ExportLists[ModuleIdentifier];
 
-        DenseMap<GlobalValue::GUID, GlobalValue::LinkageTypes> ResolvedODR;
-        ResolveODR(*Index, ExportList,
-                   ModuleToDefinedGVSummaries[ModuleIdentifier],
+        auto &DefinedFunctions = ModuleToDefinedGVSummaries[ModuleIdentifier];
+
+        // Resolve ODR, this has to be done early because it impacts the caching
+        // We use a std::map here to be able to have a defined ordering when
+        // producing a hash for the cache entry.
+        std::map<GlobalValue::GUID, GlobalValue::LinkageTypes> ResolvedODR;
+        ResolveODR(*Index, ExportList, DefinedFunctions,
                    ModuleIdentifier, ResolvedODR);
 
+        // The module may be cached, this helps handling it.
+        ModuleCacheEntry CacheEntry(
+            CacheOptions.Path, *Index, ModuleBuffer.getBufferIdentifier(),
+            ImportLists[ModuleBuffer.getBufferIdentifier()],
+            ExportLists[ModuleBuffer.getBufferIdentifier()], ResolvedODR,
+            DefinedFunctions, GUIDPreservedSymbols);
+
+        {
+          auto ErrOrBuffer = CacheEntry.tryLoadingBuffer();
+          if (ErrOrBuffer) {
+            // Cache Hit!
+            ProducedBinaries[count] = std::move(ErrOrBuffer.get());
+            return;
+          }
+        }
+
+        LLVMContext Context;
+        Context.setDiscardValueNames(LTODiscardValueNames);
+        Context.enableDebugTypeODRUniquing();
+
         // Parse module now
         auto TheModule = loadModuleFromBuffer(ModuleBuffer, Context, false);
 
@@ -618,14 +746,23 @@ void ThinLTOCodeGenerator::run() {
         }
 
         auto &ImportList = ImportLists[ModuleIdentifier];
-        ProducedBinaries[count] = ProcessThinLTOModule(
+        auto OutputBuffer = ProcessThinLTOModule(
             *TheModule, *Index, ModuleMap, *TMBuilder.create(), ImportList,
             ResolvedODR, CacheOptions, DisableCodeGen, SaveTempsDir, count);
+
+        CacheEntry.write(*OutputBuffer);
+        ProducedBinaries[count] = std::move(OutputBuffer);
       }, count);
       count++;
     }
   }
 
+  CachePruning(CacheOptions.Path)
+      .setPruningInterval(CacheOptions.PruningInterval)
+      .setEntryExpiration(CacheOptions.Expiration)
+      .setMaxSize(CacheOptions.MaxPercentageOfAvailableSpace)
+      .prune();
+
   // If statistics were requested, print them out now.
   if (llvm::AreStatisticsEnabled())
     llvm::PrintStatistics();




More information about the llvm-commits mailing list