[llvm] r267317 - Add an internalization step to the ThinLTOCodeGenerator

Mehdi Amini via llvm-commits llvm-commits at lists.llvm.org
Sat Apr 23 20:18:02 PDT 2016


Author: mehdi_amini
Date: Sat Apr 23 22:18:01 2016
New Revision: 267317

URL: http://llvm.org/viewvc/llvm-project?rev=267317&view=rev
Log:
Add an internalization step to the ThinLTOCodeGenerator

Keeping as much as possible internal/private is
known to help the optimizer. Let's try to benefit from
this in ThinLTO.
Note: this is early work, but is enough to build clang (and
all the LLVM tools). I still need to write some lit-tests...

Differential Revision: http://reviews.llvm.org/D19103

From: Mehdi Amini <mehdi.amini at apple.com>

Added:
    llvm/trunk/test/ThinLTO/X86/internalize.ll
Modified:
    llvm/trunk/include/llvm/LTO/ThinLTOCodeGenerator.h
    llvm/trunk/lib/LTO/ThinLTOCodeGenerator.cpp
    llvm/trunk/tools/llvm-lto/llvm-lto.cpp

Modified: llvm/trunk/include/llvm/LTO/ThinLTOCodeGenerator.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/LTO/ThinLTOCodeGenerator.h?rev=267317&r1=267316&r2=267317&view=diff
==============================================================================
--- llvm/trunk/include/llvm/LTO/ThinLTOCodeGenerator.h (original)
+++ llvm/trunk/include/llvm/LTO/ThinLTOCodeGenerator.h Sat Apr 23 22:18:01 2016
@@ -201,6 +201,11 @@ public:
   void crossModuleImport(Module &Module, ModuleSummaryIndex &Index);
 
   /**
+   * Perform internalization.
+   */
+  void internalize(Module &Module, ModuleSummaryIndex &Index);
+
+  /**
    * Perform post-importing ThinLTO optimizations.
    */
   void optimize(Module &Module);

Modified: llvm/trunk/lib/LTO/ThinLTOCodeGenerator.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/LTO/ThinLTOCodeGenerator.cpp?rev=267317&r1=267316&r2=267317&view=diff
==============================================================================
--- llvm/trunk/lib/LTO/ThinLTOCodeGenerator.cpp (original)
+++ llvm/trunk/lib/LTO/ThinLTOCodeGenerator.cpp Sat Apr 23 22:18:01 2016
@@ -17,6 +17,8 @@
 #ifdef HAVE_LLVM_REVISION
 #include "LLVMLTORevision.h"
 #endif
+
+#include "UpdateCompilerUsed.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/Analysis/ModuleSummaryAnalysis.h"
@@ -32,6 +34,7 @@
 #include "llvm/IRReader/IRReader.h"
 #include "llvm/Linker/Linker.h"
 #include "llvm/MC/SubtargetFeature.h"
+#include "llvm/Object/IRObjectFile.h"
 #include "llvm/Object/ModuleSummaryIndexObjectFile.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/CachePruning.h"
@@ -44,6 +47,7 @@
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Transforms/IPO.h"
 #include "llvm/Transforms/IPO/FunctionImport.h"
+#include "llvm/Transforms/IPO/Internalize.h"
 #include "llvm/Transforms/IPO/PassManagerBuilder.h"
 #include "llvm/Transforms/ObjCARC.h"
 #include "llvm/Transforms/Utils/FunctionImportUtils.h"
@@ -309,6 +313,77 @@ static void optimizeModule(Module &TheMo
   PM.run(TheModule);
 }
 
+// Create a DenseSet of GlobalValue to be used with the Internalizer.
+static DenseSet<const GlobalValue *> computePreservedSymbolsForModule(
+    Module &TheModule, const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols,
+    const FunctionImporter::ExportSetTy &ExportList) {
+  DenseSet<const GlobalValue *> PreservedGV;
+  if (GUIDPreservedSymbols.empty())
+    // Early exit: internalize is disabled when there is nothing to preserve.
+    return PreservedGV;
+
+  auto AddPreserveGV = [&](const GlobalValue &GV) {
+    auto GUID = GV.getGUID();
+    if (GUIDPreservedSymbols.count(GUID) || ExportList.count(GUID))
+      PreservedGV.insert(&GV);
+  };
+
+  for (auto &GV : TheModule)
+    AddPreserveGV(GV);
+  for (auto &GV : TheModule.globals())
+    AddPreserveGV(GV);
+  for (auto &GV : TheModule.aliases())
+    AddPreserveGV(GV);
+
+  return PreservedGV;
+}
+
+// Run internalization on \p TheModule
+static void
+doInternalizeModule(Module &TheModule, const TargetMachine &TM,
+                    const DenseSet<const GlobalValue *> &PreservedGV) {
+  if (PreservedGV.empty()) {
+    // Be friendly and don't nuke totally the module when the client didn't
+    // supply anything to preserve.
+    return;
+  }
+
+  // Parse inline ASM and collect the list of symbols that are not defined in
+  // the current module.
+  StringSet<> AsmUndefinedRefs;
+  object::IRObjectFile::CollectAsmUndefinedRefs(
+      Triple(TheModule.getTargetTriple()), TheModule.getModuleInlineAsm(),
+      [&AsmUndefinedRefs](StringRef Name, object::BasicSymbolRef::Flags Flags) {
+        if (Flags & object::BasicSymbolRef::SF_Undefined)
+          AsmUndefinedRefs.insert(Name);
+      });
+
+  // Update the llvm.compiler_used globals to force preserving libcalls and
+  // symbols referenced from asm
+  UpdateCompilerUsed(TheModule, TM, AsmUndefinedRefs);
+
+  // Declare a callback for the internalize pass that will ask for every
+  // candidate GlobalValue if it can be internalized or not.
+  auto MustPreserveGV =
+      [&](const GlobalValue &GV) -> bool { return PreservedGV.count(&GV); };
+
+  llvm::internalizeModule(TheModule, MustPreserveGV);
+}
+
+// Convert the PreservedSymbols map from "Name" based to "GUID" based.
+static DenseSet<GlobalValue::GUID>
+computeGUIDPreservedSymbols(const StringSet<> &PreservedSymbols,
+                            const Triple &TheTriple) {
+  DenseSet<GlobalValue::GUID> GUIDPreservedSymbols(PreservedSymbols.size());
+  for (auto &Entry : PreservedSymbols) {
+    StringRef Name = Entry.first();
+    if (TheTriple.isOSBinFormatMachO() && Name.size() > 0 && Name[0] == '_')
+      Name = Name.drop_front();
+    GUIDPreservedSymbols.insert(GlobalValue::getGUID(Name));
+  }
+  return GUIDPreservedSymbols;
+}
+
 std::unique_ptr<MemoryBuffer> codegenModule(Module &TheModule,
                                             TargetMachine &TM) {
   SmallVector<char, 128> OutputBuffer;
@@ -395,6 +470,9 @@ public:
     sys::path::append(EntryPath, CachePath, toHex(Hasher.result()));
   }
 
+  // Access the path to this entry in the cache.
+  StringRef getEntryPath() { return EntryPath; }
+
   // Try loading the buffer for this cache entry.
   ErrorOr<std::unique_ptr<MemoryBuffer>> tryLoadingBuffer() {
     if (EntryPath.empty())
@@ -429,6 +507,8 @@ static std::unique_ptr<MemoryBuffer> Pro
     Module &TheModule, const ModuleSummaryIndex &Index,
     StringMap<MemoryBufferRef> &ModuleMap, TargetMachine &TM,
     const FunctionImporter::ImportMapTy &ImportList,
+    const FunctionImporter::ExportSetTy &ExportList,
+    const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols,
     std::map<GlobalValue::GUID, GlobalValue::LinkageTypes> &ResolvedODR,
     ThinLTOCodeGenerator::CachingOptions CacheOptions, bool DisableCodeGen,
     StringRef SaveTempsDir, unsigned count) {
@@ -436,6 +516,13 @@ static std::unique_ptr<MemoryBuffer> Pro
   // Save temps: after IPO.
   saveTempBitcode(TheModule, SaveTempsDir, count, ".1.IPO.bc");
 
+  // Prepare for internalization by computing the set of symbols to preserve.
+  // We need to compute the list of symbols to preserve during internalization
+  // before doing any promotion because after renaming we won't (easily) match
+  // to the original name.
+  auto PreservedGV = computePreservedSymbolsForModule(
+      TheModule, GUIDPreservedSymbols, ExportList);
+
   // "Benchmark"-like optimization: single-source case
   bool SingleModule = (ModuleMap.size() == 1);
 
@@ -449,16 +536,24 @@ static std::unique_ptr<MemoryBuffer> Pro
 
     // Save temps: after promotion.
     saveTempBitcode(TheModule, SaveTempsDir, count, ".2.promoted.bc");
+  }
 
+  // Internalization
+  doInternalizeModule(TheModule, TM, PreservedGV);
+
+  // Save internalized bitcode
+  saveTempBitcode(TheModule, SaveTempsDir, count, ".3.internalized.bc");
+
+  if (!SingleModule) {
     crossImportIntoModule(TheModule, Index, ModuleMap, ImportList);
 
     // Save temps: after cross-module import.
-    saveTempBitcode(TheModule, SaveTempsDir, count, ".3.imported.bc");
+    saveTempBitcode(TheModule, SaveTempsDir, count, ".4.imported.bc");
   }
 
   optimizeModule(TheModule, TM);
 
-  saveTempBitcode(TheModule, SaveTempsDir, count, ".3.opt.bc");
+  saveTempBitcode(TheModule, SaveTempsDir, count, ".5.opt.bc");
 
   if (DisableCodeGen) {
     // Configured to stop before CodeGen, serialize the bitcode and return.
@@ -516,7 +611,10 @@ void ThinLTOCodeGenerator::preserveSymbo
 }
 
 void ThinLTOCodeGenerator::crossReferenceSymbol(StringRef Name) {
-  CrossReferencedSymbols.insert(Name);
+  // FIXME: At the moment, we don't take advantage of this extra information,
+  // we're conservatively considering cross-references as preserved.
+  //  CrossReferencedSymbols.insert(Name);
+  PreservedSymbols.insert(Name);
 }
 
 // TargetMachine factory
@@ -620,10 +718,43 @@ void ThinLTOCodeGenerator::crossModuleIm
 }
 
 /**
+ * Perform internalization.
+ */
+void ThinLTOCodeGenerator::internalize(Module &TheModule,
+                                       ModuleSummaryIndex &Index) {
+  initTMBuilder(TMBuilder, Triple(TheModule.getTargetTriple()));
+  auto ModuleCount = Index.modulePaths().size();
+  auto ModuleIdentifier = TheModule.getModuleIdentifier();
+
+  // Convert the preserved symbols set from string to GUID
+  auto GUIDPreservedSymbols =
+      computeGUIDPreservedSymbols(PreservedSymbols, TMBuilder.TheTriple);
+
+  // Collect for each module the list of function it defines (GUID -> Summary).
+  StringMap<std::map<GlobalValue::GUID, GlobalValueSummary *>>
+      ModuleToDefinedGVSummaries(ModuleCount);
+  Index.collectDefinedGVSummariesPerModule(ModuleToDefinedGVSummaries);
+
+  // Generate import/export list
+  StringMap<FunctionImporter::ImportMapTy> ImportLists(ModuleCount);
+  StringMap<FunctionImporter::ExportSetTy> ExportLists(ModuleCount);
+  ComputeCrossModuleImport(Index, ModuleToDefinedGVSummaries, ImportLists,
+                           ExportLists);
+  auto &ExportList = ExportLists[ModuleIdentifier];
+
+  // Internalization
+  auto PreservedGV = computePreservedSymbolsForModule(
+      TheModule, GUIDPreservedSymbols, ExportList);
+  doInternalizeModule(TheModule, *TMBuilder.create(), PreservedGV);
+}
+
+/**
  * Perform post-importing ThinLTO optimizations.
  */
 void ThinLTOCodeGenerator::optimize(Module &TheModule) {
   initTMBuilder(TMBuilder, Triple(TheModule.getTargetTriple()));
+
+  // Optimize now
   optimizeModule(TheModule, *TMBuilder.create());
 }
 
@@ -694,10 +825,9 @@ void ThinLTOCodeGenerator::run() {
                            ExportLists);
 
   // Convert the preserved symbols set from string to GUID, this is needed for
-  // computing the caching.
-  DenseSet<GlobalValue::GUID> GUIDPreservedSymbols(PreservedSymbols.size());
-  for (auto &Entry : PreservedSymbols)
-    GUIDPreservedSymbols.insert(GlobalValue::getGUID(Entry.first()));
+  // computing the caching hash and the internalization.
+  auto GUIDPreservedSymbols =
+      computeGUIDPreservedSymbols(PreservedSymbols, TMBuilder.TheTriple);
 
   // Parallel optimizer + codegen
   {
@@ -714,18 +844,21 @@ void ThinLTOCodeGenerator::run() {
         // We use a std::map here to be able to have a defined ordering when
         // producing a hash for the cache entry.
         std::map<GlobalValue::GUID, GlobalValue::LinkageTypes> ResolvedODR;
-        ResolveODR(*Index, ExportList, DefinedFunctions,
-                   ModuleIdentifier, ResolvedODR);
+        ResolveODR(*Index, ExportList, DefinedFunctions, ModuleIdentifier,
+                   ResolvedODR);
 
         // The module may be cached, this helps handling it.
-        ModuleCacheEntry CacheEntry(
-            CacheOptions.Path, *Index, ModuleBuffer.getBufferIdentifier(),
-            ImportLists[ModuleBuffer.getBufferIdentifier()],
-            ExportLists[ModuleBuffer.getBufferIdentifier()], ResolvedODR,
-            DefinedFunctions, GUIDPreservedSymbols);
+        ModuleCacheEntry CacheEntry(CacheOptions.Path, *Index, ModuleIdentifier,
+                                    ImportLists[ModuleIdentifier], ExportList,
+                                    ResolvedODR, DefinedFunctions,
+                                    GUIDPreservedSymbols);
 
         {
           auto ErrOrBuffer = CacheEntry.tryLoadingBuffer();
+          DEBUG(dbgs() << "Cache " << (ErrOrBuffer ? "hit" : "miss") << " '"
+                       << CacheEntry.getEntryPath() << "' for buffer " << count
+                       << " " << ModuleIdentifier << "\n");
+
           if (ErrOrBuffer) {
             // Cache Hit!
             ProducedBinaries[count] = std::move(ErrOrBuffer.get());
@@ -741,14 +874,14 @@ void ThinLTOCodeGenerator::run() {
         auto TheModule = loadModuleFromBuffer(ModuleBuffer, Context, false);
 
         // Save temps: original file.
-        if (!SaveTempsDir.empty()) {
-          saveTempBitcode(*TheModule, SaveTempsDir, count, ".0.original.bc");
-        }
+        saveTempBitcode(*TheModule, SaveTempsDir, count, ".0.original.bc");
 
         auto &ImportList = ImportLists[ModuleIdentifier];
+        // Run the main process now, and generates a binary
         auto OutputBuffer = ProcessThinLTOModule(
             *TheModule, *Index, ModuleMap, *TMBuilder.create(), ImportList,
-            ResolvedODR, CacheOptions, DisableCodeGen, SaveTempsDir, count);
+            ExportList, GUIDPreservedSymbols, ResolvedODR, CacheOptions,
+            DisableCodeGen, SaveTempsDir, count);
 
         CacheEntry.write(*OutputBuffer);
         ProducedBinaries[count] = std::move(OutputBuffer);

Added: llvm/trunk/test/ThinLTO/X86/internalize.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/ThinLTO/X86/internalize.ll?rev=267317&view=auto
==============================================================================
--- llvm/trunk/test/ThinLTO/X86/internalize.ll (added)
+++ llvm/trunk/test/ThinLTO/X86/internalize.ll Sat Apr 23 22:18:01 2016
@@ -0,0 +1,19 @@
+;; RUN: opt -module-summary %s -o %t1.bc
+; RUN: llvm-lto -thinlto-action=thinlink -o %t.index.bc %t1.bc
+; RUN: llvm-lto -thinlto-action=internalize -thinlto-index %t.index.bc %t1.bc -o - | llvm-dis -o - | FileCheck %s --check-prefix=REGULAR
+; RUN: llvm-lto -thinlto-action=internalize -thinlto-index %t.index.bc %t1.bc -o -  --exported-symbol=foo | llvm-dis -o - | FileCheck %s --check-prefix=INTERNALIZE
+
+; REGULAR: define void @foo
+; REGULAR: define void @bar
+; INTERNALIZE: define void @foo
+; INTERNALIZE: define internal void @bar
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.11.0"
+
+define void @foo() {
+    ret void
+}
+define void @bar() {
+    ret void
+}
\ No newline at end of file

Modified: llvm/trunk/tools/llvm-lto/llvm-lto.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/llvm-lto/llvm-lto.cpp?rev=267317&r1=267316&r2=267317&view=diff
==============================================================================
--- llvm/trunk/tools/llvm-lto/llvm-lto.cpp (original)
+++ llvm/trunk/tools/llvm-lto/llvm-lto.cpp Sat Apr 23 22:18:01 2016
@@ -68,6 +68,7 @@ enum ThinLTOModes {
   THINLINK,
   THINPROMOTE,
   THINIMPORT,
+  THININTERNALIZE,
   THINOPT,
   THINCODEGEN,
   THINALL
@@ -84,6 +85,9 @@ cl::opt<ThinLTOModes> ThinLTOMode(
         clEnumValN(THINIMPORT, "import", "Perform both promotion and "
                                          "cross-module importing (requires "
                                          "-thinlto-index)."),
+        clEnumValN(THININTERNALIZE, "internalize",
+                   "Perform internalization driven by -exported-symbol "
+                   "(requires -thinlto-index)."),
         clEnumValN(THINOPT, "optimize", "Perform ThinLTO optimizations."),
         clEnumValN(THINCODEGEN, "codegen", "CodeGen (expected to match llc)"),
         clEnumValN(THINALL, "run", "Perform ThinLTO end-to-end"),
@@ -105,10 +109,10 @@ static cl::opt<std::string> OutputFilena
                                            cl::desc("Override output filename"),
                                            cl::value_desc("filename"));
 
-static cl::list<std::string>
-    ExportedSymbols("exported-symbol",
-                    cl::desc("Symbol to export from the resulting object file"),
-                    cl::ZeroOrMore);
+static cl::list<std::string> ExportedSymbols(
+    "exported-symbol",
+    cl::desc("List of symbols to export from the resulting object file"),
+    cl::ZeroOrMore);
 
 static cl::list<std::string>
     DSOSymbols("dso-symbol",
@@ -329,6 +333,10 @@ public:
   ThinLTOProcessing(const TargetOptions &Options) {
     ThinGenerator.setCodePICModel(RelocModel);
     ThinGenerator.setTargetOptions(Options);
+
+    // Add all the exported symbols to the table of symbols to preserve.
+    for (unsigned i = 0; i < ExportedSymbols.size(); ++i)
+      ThinGenerator.preserveSymbol(ExportedSymbols[i]);
   }
 
   void run() {
@@ -339,6 +347,8 @@ public:
       return promote();
     case THINIMPORT:
       return import();
+    case THININTERNALIZE:
+      return internalize();
     case THINOPT:
       return optimize();
     case THINCODEGEN:
@@ -430,6 +440,37 @@ private:
       }
       writeModuleToFile(*TheModule, OutputName);
     }
+  }
+
+  void internalize() {
+    if (InputFilenames.size() != 1 && !OutputFilename.empty())
+      report_fatal_error("Can't handle a single output filename and multiple "
+                         "input files, do not provide an output filename and "
+                         "the output files will be suffixed from the input "
+                         "ones.");
+
+    if (ExportedSymbols.empty())
+      errs() << "Warning: -internalize will not perform without "
+                "-exported-symbol\n";
+
+    auto Index = loadCombinedIndex();
+    auto InputBuffers = loadAllFilesForIndex(*Index);
+    for (auto &MemBuffer : InputBuffers)
+      ThinGenerator.addModule(MemBuffer->getBufferIdentifier(),
+                              MemBuffer->getBuffer());
+
+    for (auto &Filename : InputFilenames) {
+      LLVMContext Ctx;
+      auto TheModule = loadModule(Filename, Ctx);
+
+      ThinGenerator.internalize(*TheModule, *Index);
+
+      std::string OutputName = OutputFilename;
+      if (OutputName.empty()) {
+        OutputName = Filename + ".thinlto.internalized.bc";
+      }
+      writeModuleToFile(*TheModule, OutputName);
+    }
   }
 
   void optimize() {




More information about the llvm-commits mailing list