[llvm] r262724 - [ThinLTO] Launch importing backends in parallel threads from gold plugin

Teresa Johnson via llvm-commits llvm-commits at lists.llvm.org
Fri Mar 4 09:06:03 PST 2016


Author: tejohnson
Date: Fri Mar  4 11:06:02 2016
New Revision: 262724

URL: http://llvm.org/viewvc/llvm-project?rev=262724&view=rev
Log:
[ThinLTO] Launch importing backends in parallel threads from gold plugin

Summary:
Launch ThinLTO backends (LTO and codegen pipelines with importing) in
parallel using a ThreadPool, after creating the combined index.
The number of threads is controlled by the existing -jobs gold plugin
option, or the hardware concurrency if not specified.

The old behavior of exiting after creating the combined index can be
invoked via a new thinlto-index-only plugin option.

This commit involves just the ThinLTO-specific pieces of D15390, the NFC
and other restructuring pieces were committed independently:
  r262677: Add hardware_concurrency interface to llvm::thread (NFC)
  r262719: Change split code gen to use ThreadPool
  r262721: Refactor gold-plugin codegen to prepare for ThinLTO threads (NFC)

Reviewers: pcc, joker.eph, rafael

Subscribers: rafael, davidxl, llvm-commits, joker.eph

Differential Revision: http://reviews.llvm.org/D15390

Added:
    llvm/trunk/test/tools/gold/X86/pr19901_thinlto.ll
Modified:
    llvm/trunk/test/tools/gold/X86/thinlto.ll
    llvm/trunk/tools/gold/gold-plugin.cpp

Added: llvm/trunk/test/tools/gold/X86/pr19901_thinlto.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/gold/X86/pr19901_thinlto.ll?rev=262724&view=auto
==============================================================================
--- llvm/trunk/test/tools/gold/X86/pr19901_thinlto.ll (added)
+++ llvm/trunk/test/tools/gold/X86/pr19901_thinlto.ll Fri Mar  4 11:06:02 2016
@@ -0,0 +1,25 @@
+; RUN: llc %s -o %t.o -filetype=obj -relocation-model=pic
+; RUN: llvm-as -function-summary %p/Inputs/pr19901-1.ll -o %t2.o
+; RUN: %gold -plugin %llvmshlibdir/LLVMgold.so \
+; RUN:     --plugin-opt=thinlto \
+; RUN:     -shared -m elf_x86_64 -o %t.so %t2.o %t.o
+; RUN: llvm-readobj -t %t.so | FileCheck %s
+
+; CHECK:       Symbol {
+; CHECK:         Name: f
+; CHECK-NEXT:    Value:
+; CHECK-NEXT:    Size:
+; CHECK-NEXT:    Binding: Local
+; CHECK-NEXT:    Type: Function
+; CHECK-NEXT:    Other: {{2|0}}
+; CHECK-NEXT:    Section: .text
+; CHECK-NEXT:  }
+
+target triple = "x86_64-unknown-linux-gnu"
+define i32 @g() {
+  call void @f()
+  ret i32 0
+}
+define linkonce_odr hidden void @f() {
+  ret void
+}

Modified: llvm/trunk/test/tools/gold/X86/thinlto.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/gold/X86/thinlto.ll?rev=262724&r1=262723&r2=262724&view=diff
==============================================================================
--- llvm/trunk/test/tools/gold/X86/thinlto.ll (original)
+++ llvm/trunk/test/tools/gold/X86/thinlto.ll Fri Mar  4 11:06:02 2016
@@ -4,17 +4,55 @@
 ; RUN: llvm-as %p/Inputs/thinlto.ll -o %t2.o
 ; RUN: %gold -plugin %llvmshlibdir/LLVMgold.so \
 ; RUN:    --plugin-opt=thinlto \
+; RUN:    --plugin-opt=thinlto-index-only \
 ; RUN:    -shared %t.o %t2.o -o %t3
+; RUN: not test -e %t3
+; RUN: %gold -plugin %llvmshlibdir/LLVMgold.so \
+; RUN:    --plugin-opt=thinlto \
+; RUN:    -shared %t.o %t2.o -o %t4
+; RUN: llvm-nm %t4 | FileCheck %s --check-prefix=NM
 
+; Next generate function summary sections and test gold handling.
 ; RUN: llvm-as -function-summary %s -o %t.o
 ; RUN: llvm-as -function-summary %p/Inputs/thinlto.ll -o %t2.o
 
+; Ensure gold generates an index and not a binary if requested.
 ; RUN: %gold -plugin %llvmshlibdir/LLVMgold.so \
 ; RUN:    --plugin-opt=thinlto \
+; RUN:    --plugin-opt=thinlto-index-only \
 ; RUN:    -shared %t.o %t2.o -o %t3
 ; RUN: llvm-bcanalyzer -dump %t3.thinlto.bc | FileCheck %s --check-prefix=COMBINED
 ; RUN: not test -e %t3
 
+; Ensure gold generates an index as well as a binary by default in ThinLTO mode.
+; First force single-threaded mode
+; RUN: %gold -plugin %llvmshlibdir/LLVMgold.so \
+; RUN:    --plugin-opt=thinlto \
+; RUN:    --plugin-opt=jobs=1 \
+; RUN:    -shared %t.o %t2.o -o %t4
+; RUN: llvm-bcanalyzer -dump %t4.thinlto.bc | FileCheck %s --check-prefix=COMBINED
+; RUN: llvm-nm %t4 | FileCheck %s --check-prefix=NM
+
+; Next force multi-threaded mode
+; RUN: %gold -plugin %llvmshlibdir/LLVMgold.so \
+; RUN:    --plugin-opt=thinlto \
+; RUN:    --plugin-opt=jobs=2 \
+; RUN:    -shared %t.o %t2.o -o %t4
+; RUN: llvm-bcanalyzer -dump %t4.thinlto.bc | FileCheck %s --check-prefix=COMBINED
+; RUN: llvm-nm %t4 | FileCheck %s --check-prefix=NM
+
+; Test --plugin-opt=obj-path to ensure unique object files generated.
+; RUN: %gold -plugin %llvmshlibdir/LLVMgold.so \
+; RUN:    --plugin-opt=thinlto \
+; RUN:    --plugin-opt=jobs=2 \
+; RUN:    --plugin-opt=obj-path=%t5.o \
+; RUN:    -shared %t.o %t2.o -o %t4
+; RUN: llvm-nm %t5.o0 | FileCheck %s --check-prefix=NM2
+; RUN: llvm-nm %t5.o1 | FileCheck %s --check-prefix=NM2
+
+; NM: T f
+; NM2: T {{f|g}}
+
 ; COMBINED: <MODULE_STRTAB_BLOCK
 ; COMBINED-NEXT: <ENTRY {{.*}} record string = '{{.*}}/test/tools/gold/X86/Output/thinlto.ll.tmp{{.*}}.o'
 ; COMBINED-NEXT: <ENTRY {{.*}} record string = '{{.*}}/test/tools/gold/X86/Output/thinlto.ll.tmp{{.*}}.o'
@@ -30,7 +68,10 @@
 ; COMBINED-NEXT: <COMBINED_FNENTRY abbrevid={{[0-9]+}} op0={{[0-9]+}} op1={{-3706093650706652785|-5300342847281564238}}
 ; COMBINED-NEXT: </VALUE_SYMTAB
 
+declare void @g(...)
+
 define void @f() {
 entry:
+  call void (...) @g()
   ret void
 }

Modified: llvm/trunk/tools/gold/gold-plugin.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/gold/gold-plugin.cpp?rev=262724&r1=262723&r2=262724&view=diff
==============================================================================
--- llvm/trunk/tools/gold/gold-plugin.cpp (original)
+++ llvm/trunk/tools/gold/gold-plugin.cpp Fri Mar  4 11:06:02 2016
@@ -38,9 +38,12 @@
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/TargetRegistry.h"
 #include "llvm/Support/TargetSelect.h"
+#include "llvm/Support/ThreadPool.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/thread.h"
 #include "llvm/Transforms/IPO.h"
 #include "llvm/Transforms/IPO/PassManagerBuilder.h"
+#include "llvm/Transforms/Utils/FunctionImportUtils.h"
 #include "llvm/Transforms/Utils/GlobalStatus.h"
 #include "llvm/Transforms/Utils/ModuleUtils.h"
 #include "llvm/Transforms/Utils/ValueMapper.h"
@@ -76,17 +79,25 @@ struct claimed_file {
 /// RAII wrapper to manage opening and releasing of a ld_plugin_input_file.
 struct PluginInputFile {
   void *Handle;
-  ld_plugin_input_file File;
+  std::unique_ptr<ld_plugin_input_file> File;
 
   PluginInputFile(void *Handle) : Handle(Handle) {
-    if (get_input_file(Handle, &File) != LDPS_OK)
+    File = llvm::make_unique<ld_plugin_input_file>();
+    if (get_input_file(Handle, File.get()) != LDPS_OK)
       message(LDPL_FATAL, "Failed to get file information");
   }
   ~PluginInputFile() {
-    if (release_input_file(Handle) != LDPS_OK)
-      message(LDPL_FATAL, "Failed to release file information");
+    // File would have been reset to nullptr if we moved this object
+    // to a new owner.
+    if (File)
+      if (release_input_file(Handle) != LDPS_OK)
+        message(LDPL_FATAL, "Failed to release file information");
   }
-  ld_plugin_input_file &file() { return File; }
+
+  ld_plugin_input_file &file() { return *File; }
+
+  PluginInputFile(PluginInputFile &&RHS) = default;
+  PluginInputFile &operator=(PluginInputFile &&RHS) = default;
 };
 
 struct ResolutionInfo {
@@ -99,6 +110,33 @@ struct ResolutionInfo {
   unsigned CommonAlign = 0;
   claimed_file *CommonFile = nullptr;
 };
+
+/// Class to own information used by a task or during its cleanup for a
+/// ThinLTO backend instantiation.
+class ThinLTOTaskInfo {
+  /// The input file holding the module bitcode read by the ThinLTO task.
+  PluginInputFile InputFile;
+
+  /// The output stream the task will codegen into.
+  std::unique_ptr<raw_fd_ostream> OS;
+
+  /// The file name corresponding to the output stream, used during cleanup.
+  std::string Filename;
+
+  /// Flag indicating whether the output file is a temp file that must be
+  /// added to the cleanup list during cleanup.
+  bool TempOutFile;
+
+public:
+  ThinLTOTaskInfo(PluginInputFile InputFile, std::unique_ptr<raw_fd_ostream> OS,
+                  std::string Filename, bool TempOutFile)
+      : InputFile(std::move(InputFile)), OS(std::move(OS)), Filename(Filename),
+        TempOutFile(TempOutFile) {}
+
+  /// Performs task related cleanup activities that must be done
+  /// single-threaded (i.e. call backs to gold).
+  void cleanup();
+};
 }
 
 static ld_plugin_add_symbols add_symbols = nullptr;
@@ -126,7 +164,8 @@ namespace options {
   static unsigned OptLevel = 2;
   // Default parallelism of 0 used to indicate that user did not specify.
   // Actual parallelism default value depends on implementation.
-  // Currently, code generation defaults to no parallelism.
+  // Currently, code generation defaults to no parallelism, whereas
+  // ThinLTO uses the hardware_concurrency as the default.
   static unsigned Parallelism = 0;
 #ifdef NDEBUG
   static bool DisableVerify = true;
@@ -141,6 +180,11 @@ namespace options {
   // the information from intermediate files and write a combined
   // global index for the ThinLTO backends.
   static bool thinlto = false;
+  // If false, all ThinLTO backend compilations through code gen are performed
+  // using multiple threads in the gold-plugin, before handing control back to
+  // gold. If true, exit after creating the combined index, the assuming is
+  // that the build system will launch the backend processes.
+  static bool thinlto_index_only = false;
   // Additional options to pass into the code generator.
   // Note: This array will contain all plugin options which are not claimed
   // as plugin exclusive to pass to the code generator.
@@ -172,6 +216,8 @@ namespace options {
       TheOutputType = OT_DISABLE;
     } else if (opt == "thinlto") {
       thinlto = true;
+    } else if (opt == "thinlto-index-only") {
+      thinlto_index_only = true;
     } else if (opt.size() == 2 && opt[0] == 'O') {
       if (opt[1] < '0' || opt[1] > '3')
         message(LDPL_FATAL, "Optimization level must be between 0 and 3");
@@ -442,7 +488,7 @@ static ld_plugin_status claim_file_hook(
 
   // If we are doing ThinLTO compilation, don't need to process the symbols.
   // Later we simply build a combined index file after all files are claimed.
-  if (options::thinlto)
+  if (options::thinlto && options::thinlto_index_only)
     return LDPS_OK;
 
   for (auto &Sym : Obj->symbols()) {
@@ -761,24 +807,58 @@ static void recordFile(std::string Filen
     Cleanup.push_back(Filename.c_str());
 }
 
+void ThinLTOTaskInfo::cleanup() {
+  // Close the output file descriptor before we pass it to gold.
+  OS->close();
+
+  recordFile(Filename, TempOutFile);
+}
+
 namespace {
-/// Class to manage optimization and code generation for a module.
+/// Class to manage optimization and code generation for a module, possibly
+/// in a thread (ThinLTO).
 class CodeGen {
   /// The module for which this will generate code.
   std::unique_ptr<llvm::Module> M;
 
+  /// The output stream to generate code into.
+  raw_fd_ostream *OS;
+
+  /// The task ID when this was invoked in a thread (ThinLTO).
+  int TaskID;
+
+  /// The function index for ThinLTO tasks.
+  const FunctionInfoIndex *CombinedIndex;
+
   /// The target machine for generating code for this module.
   std::unique_ptr<TargetMachine> TM;
 
+  /// Filename to use as base when save-temps is enabled, used to get
+  /// a unique and identifiable save-temps output file for each ThinLTO backend.
+  std::string SaveTempsFilename;
+
 public:
   /// Constructor used by full LTO.
-  CodeGen(std::unique_ptr<llvm::Module> M) : M(std::move(M)) {
+  CodeGen(std::unique_ptr<llvm::Module> M)
+      : M(std::move(M)), OS(nullptr), TaskID(-1), CombinedIndex(nullptr) {
+    initTargetMachine();
+  }
+  /// Constructor used by ThinLTO.
+  CodeGen(std::unique_ptr<llvm::Module> M, raw_fd_ostream *OS, int TaskID,
+          const FunctionInfoIndex *CombinedIndex, std::string Filename)
+      : M(std::move(M)), OS(OS), TaskID(TaskID), CombinedIndex(CombinedIndex),
+        SaveTempsFilename(Filename) {
+    assert(options::thinlto == !!CombinedIndex &&
+           "Expected function index iff performing ThinLTO");
     initTargetMachine();
   }
 
   /// Invoke LTO passes and the code generator for the module.
   void runAll();
 
+  /// Invoke the actual code generation to emit Module's object to file.
+  void runCodegenPasses();
+
 private:
   /// Create a target machine for the module. Must be unique for each
   /// module/task.
@@ -854,6 +934,7 @@ void CodeGen::runLTOPasses() {
   PMB.LoopVectorize = true;
   PMB.SLPVectorize = true;
   PMB.OptLevel = options::OptLevel;
+  PMB.FunctionIndex = CombinedIndex;
   PMB.populateLTOPassManager(passes);
   passes.run(*M);
 }
@@ -883,6 +964,15 @@ static int openOutputFile(SmallString<12
   return FD;
 }
 
+void CodeGen::runCodegenPasses() {
+  assert(OS && "Output stream must be set before emitting to file");
+  legacy::PassManager CodeGenPasses;
+  if (TM->addPassesToEmitFile(CodeGenPasses, *OS,
+                              TargetMachine::CGFT_ObjectFile))
+    report_fatal_error("Failed to setup codegen");
+  CodeGenPasses.run(*M);
+}
+
 void CodeGen::runSplitCodeGen() {
   const std::string &TripleStr = M->getTargetTriple();
   Triple TheTriple(TripleStr);
@@ -931,10 +1021,22 @@ void CodeGen::runAll() {
   runLTOPasses();
 
   if (options::TheOutputType == options::OT_SAVE_TEMPS) {
-    saveBCFile(output_name + ".opt.bc", *M);
-  }
-
-  runSplitCodeGen();
+    std::string OptFilename = output_name;
+    // If the CodeGen client provided a filename, use it. Always expect
+    // a provided filename if we are in a task (i.e. ThinLTO backend).
+    assert(!SaveTempsFilename.empty() || TaskID == -1);
+    if (!SaveTempsFilename.empty())
+      OptFilename = SaveTempsFilename;
+    saveBCFile(OptFilename + ".opt.bc", *M);
+  }
+
+  // If we are already in a thread (i.e. ThinLTO), just perform
+  // codegen passes directly.
+  if (TaskID >= 0)
+    runCodegenPasses();
+  // Otherwise attempt split code gen.
+  else
+    runSplitCodeGen();
 }
 
 /// Links the module in \p View from file \p F into the combined module
@@ -959,6 +1061,88 @@ static bool linkInModule(LLVMContext &Co
   return false;
 }
 
+/// Perform the ThinLTO backend on a single module, invoking the LTO and codegen
+/// pipelines.
+static void thinLTOBackendTask(claimed_file &F, const void *View,
+                               ld_plugin_input_file &File,
+                               raw_fd_ostream *ApiFile,
+                               const FunctionInfoIndex &CombinedIndex,
+                               raw_fd_ostream *OS, unsigned TaskID) {
+  // Need to use a separate context for each task
+  LLVMContext Context;
+  Context.setDiagnosticHandler(diagnosticHandlerForContext, nullptr, true);
+
+  std::unique_ptr<llvm::Module> NewModule(new llvm::Module(File.name, Context));
+  IRMover L(*NewModule.get());
+
+  StringSet<> Dummy;
+  if (linkInModule(Context, L, F, View, File, ApiFile, Dummy, Dummy))
+    message(LDPL_FATAL, "Failed to rename module for ThinLTO");
+  if (renameModuleForThinLTO(*NewModule, &CombinedIndex))
+    message(LDPL_FATAL, "Failed to rename module for ThinLTO");
+
+  CodeGen codeGen(std::move(NewModule), OS, TaskID, &CombinedIndex, File.name);
+  codeGen.runAll();
+}
+
+/// Launch each module's backend pipeline in a separate task in a thread pool.
+static void thinLTOBackends(raw_fd_ostream *ApiFile,
+                            const FunctionInfoIndex &CombinedIndex) {
+  unsigned TaskCount = 0;
+  std::vector<ThinLTOTaskInfo> Tasks;
+  Tasks.reserve(Modules.size());
+  unsigned int MaxThreads = options::Parallelism
+                                ? options::Parallelism
+                                : thread::hardware_concurrency();
+
+  // Create ThreadPool in nested scope so that threads will be joined
+  // on destruction.
+  {
+    ThreadPool ThinLTOThreadPool(MaxThreads);
+    for (claimed_file &F : Modules) {
+      // Do all the gold callbacks in the main thread, since gold is not thread
+      // safe by default.
+      PluginInputFile InputFile(F.handle);
+      const void *View = getSymbolsAndView(F);
+
+      SmallString<128> Filename;
+      if (!options::obj_path.empty())
+        // Note that openOutputFile will append a unique ID for each task
+        Filename = options::obj_path;
+      else if (options::TheOutputType == options::OT_SAVE_TEMPS) {
+        // Use the input file name so that we get a unique and identifiable
+        // output file for each ThinLTO backend task.
+        Filename = InputFile.file().name;
+        Filename += ".thinlto.o";
+      }
+      bool TempOutFile = Filename.empty();
+
+      SmallString<128> NewFilename;
+      int FD = openOutputFile(Filename, TempOutFile, NewFilename,
+                              // Only append the TaskID if we will use the
+                              // non-unique obj_path.
+                              !options::obj_path.empty() ? TaskCount : -1);
+      TaskCount++;
+      std::unique_ptr<raw_fd_ostream> OS =
+          llvm::make_unique<raw_fd_ostream>(FD, true);
+
+      // Enqueue the task
+      ThinLTOThreadPool.async(thinLTOBackendTask, std::ref(F), View,
+                              std::ref(InputFile.file()), ApiFile,
+                              std::ref(CombinedIndex), OS.get(), TaskCount);
+
+      // Record the information needed by the task or during its cleanup
+      // to a ThinLTOTaskInfo instance. For information needed by the task
+      // the unique_ptr ownership is transferred to the ThinLTOTaskInfo.
+      Tasks.emplace_back(std::move(InputFile), std::move(OS),
+                         NewFilename.c_str(), TempOutFile);
+    }
+  }
+
+  for (auto &Task : Tasks)
+    Task.cleanup();
+}
+
 /// gold informs us that all symbols have been read. At this point, we use
 /// get_symbols to see if any of our definitions have been overridden by a
 /// native object file. Then, perform optimization and codegen.
@@ -995,8 +1179,13 @@ static ld_plugin_status allSymbolsReadHo
     WriteFunctionSummaryToFile(CombinedIndex, OS);
     OS.close();
 
-    cleanup_hook();
-    exit(0);
+    if (options::thinlto_index_only) {
+      cleanup_hook();
+      exit(0);
+    }
+
+    thinLTOBackends(ApiFile, CombinedIndex);
+    return LDPS_OK;
   }
 
   LLVMContext Context;




More information about the llvm-commits mailing list