[clang] 3762111 - [OpenMP] Link the bitcode library late for device LTO

Joseph Huber via cfe-commits cfe-commits at lists.llvm.org
Mon Jan 31 20:12:01 PST 2022


Author: Joseph Huber
Date: 2022-01-31T23:11:41-05:00
New Revision: 3762111aa9608fce12c4f938bfef2b38aed766dd

URL: https://github.com/llvm/llvm-project/commit/3762111aa9608fce12c4f938bfef2b38aed766dd
DIFF: https://github.com/llvm/llvm-project/commit/3762111aa9608fce12c4f938bfef2b38aed766dd.diff

LOG: [OpenMP] Link the bitcode library late for device LTO

Summary:
This patch adds support for linking the OpenMP device bitcode library
late when doing LTO. This simply passes it in as an additional device
file when doing the final device linking phase with LTO. This has the
advantage that we don't link it multiple times, and the device
references do not get inlined and prevent us from doing needed OpenMP
optimizations when we have visiblity of the whole module.
Fix some failings where the implicit conversion of an Error to an
Expected triggered the deleted copy constructor.

Depends on D116675

Differential revision: https://reviews.llvm.org/D117048

Added: 
    

Modified: 
    clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
    clang/lib/Driver/ToolChains/Clang.cpp
    clang/lib/Driver/ToolChains/Cuda.cpp
    clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp

Removed: 
    


################################################################################
diff  --git a/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp b/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
index 6899f9360da5d..d7cf41e4b6605 100644
--- a/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
+++ b/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
@@ -285,6 +285,10 @@ void AMDGPUOpenMPToolChain::addClangTargetOptions(
   if (DriverArgs.hasArg(options::OPT_nogpulib))
     return;
 
+  // Link the bitcode library late if we're using device LTO.
+  if (getDriver().isUsingLTO(/* IsOffload */ true))
+    return;
+
   std::string BitcodeSuffix;
   if (DriverArgs.hasFlag(options::OPT_fopenmp_target_new_runtime,
                          options::OPT_fno_openmp_target_new_runtime, true))

diff  --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
index 8d300a9618705..7cc47b74ca916 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -8164,6 +8164,34 @@ void LinkerWrapper::ConstructJob(Compilation &C, const JobAction &JA,
           "-target-feature=" + TC->getTripleString() + "=" + *(FeatureIt + 1)));
     }
 
+    // Pass in the bitcode library to be linked during LTO.
+    for (auto TI = OpenMPTCRange.first, TE = OpenMPTCRange.second; TI != TE;
+         ++TI) {
+      const ToolChain *TC = TI->second;
+      const Driver &D = TC->getDriver();
+      const ArgList &TCArgs = C.getArgsForToolChain(TC, "", Action::OFK_OpenMP);
+      StringRef Arch = TCArgs.getLastArgValue(options::OPT_march_EQ);
+
+      std::string BitcodeSuffix;
+      if (TCArgs.hasFlag(options::OPT_fopenmp_target_new_runtime,
+                         options::OPT_fno_openmp_target_new_runtime, true))
+        BitcodeSuffix += "new-";
+      if (TC->getTriple().isNVPTX())
+        BitcodeSuffix += "nvptx-";
+      else if (TC->getTriple().isAMDGPU())
+        BitcodeSuffix += "amdgpu-";
+      BitcodeSuffix += Arch;
+
+      ArgStringList BitcodeLibrary;
+      addOpenMPDeviceRTL(D, TCArgs, BitcodeLibrary, BitcodeSuffix,
+                         TC->getTriple());
+
+      if (!BitcodeLibrary.empty())
+        CmdArgs.push_back(
+            Args.MakeArgString("-target-library=" + TC->getTripleString() +
+                               "-" + Arch + "=" + BitcodeLibrary.back()));
+    }
+
     // Pass in the optimization level to use for LTO.
     if (const Arg *A = Args.getLastArg(options::OPT_O_Group)) {
       StringRef OOpt;

diff  --git a/clang/lib/Driver/ToolChains/Cuda.cpp b/clang/lib/Driver/ToolChains/Cuda.cpp
index 7324339efaa62..4a9f6d4c4e3e4 100644
--- a/clang/lib/Driver/ToolChains/Cuda.cpp
+++ b/clang/lib/Driver/ToolChains/Cuda.cpp
@@ -744,6 +744,10 @@ void CudaToolChain::addClangTargetOptions(
       return;
     }
 
+    // Link the bitcode library late if we're using device LTO.
+    if (getDriver().isUsingLTO(/* IsOffload */ true))
+      return;
+
     std::string BitcodeSuffix;
     if (DriverArgs.hasFlag(options::OPT_fopenmp_target_new_runtime,
                            options::OPT_fno_openmp_target_new_runtime, true))

diff  --git a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
index f44bc46a62440..27f4bdf153c53 100644
--- a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
+++ b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
@@ -68,9 +68,14 @@ static cl::opt<std::string>
 
 static cl::opt<std::string> OptLevel("opt-level",
                                      cl::desc("Optimization level for LTO"),
-                                     cl::init("O0"),
+                                     cl::init("O2"),
                                      cl::cat(ClangLinkerWrapperCategory));
 
+static cl::opt<std::string>
+    BitcodeLibrary("target-library",
+                   cl::desc("Path for the target bitcode library"),
+                   cl::cat(ClangLinkerWrapperCategory));
+
 // Do not parse linker options.
 static cl::list<std::string>
     HostLinkerArgs(cl::Sink, cl::desc("<options to be passed to linker>..."));
@@ -197,7 +202,7 @@ extractFromBinary(const ObjectFile &Obj,
       std::unique_ptr<FileOutputBuffer> Output = std::move(*OutputOrErr);
       std::copy(Contents->begin(), Contents->end(), Output->getBufferStart());
       if (Error E = Output->commit())
-        return E;
+        return std::move(E);
 
       DeviceFiles.emplace_back(DeviceTriple, Arch, TempFile);
       ToBeStripped.push_back(*Name);
@@ -225,7 +230,7 @@ extractFromBinary(const ObjectFile &Obj,
     std::unique_ptr<FileOutputBuffer> Output = std::move(*OutputOrErr);
     std::copy(Contents.begin(), Contents.end(), Output->getBufferStart());
     if (Error E = Output->commit())
-      return E;
+      return std::move(E);
     StripFile = TempFile;
   }
 
@@ -307,7 +312,7 @@ extractFromBitcode(std::unique_ptr<MemoryBuffer> Buffer,
     std::unique_ptr<FileOutputBuffer> Output = std::move(*OutputOrErr);
     std::copy(Contents.begin(), Contents.end(), Output->getBufferStart());
     if (Error E = Output->commit())
-      return E;
+      return std::move(E);
 
     DeviceFiles.emplace_back(DeviceTriple, Arch, TempFile);
     ToBeDeleted.push_back(&GV);
@@ -318,7 +323,7 @@ extractFromBitcode(std::unique_ptr<MemoryBuffer> Buffer,
 
   // We need to materialize the lazy module before we make any changes.
   if (Error Err = M->materializeAll())
-    return Err;
+    return std::move(Err);
 
   // Remove the global from the module and write it to a new file.
   for (GlobalVariable *GV : ToBeDeleted) {
@@ -392,7 +397,7 @@ extractFromArchive(const Archive &Library,
   }
 
   if (Err)
-    return Err;
+    return std::move(Err);
 
   if (!NewMembers)
     return None;
@@ -406,9 +411,9 @@ extractFromArchive(const Archive &Library,
 
   std::unique_ptr<MemoryBuffer> Buffer =
       MemoryBuffer::getMemBuffer(Library.getMemoryBufferRef(), false);
-  if (Error WriteErr = writeArchive(TempFile, Members, true, Library.kind(),
+  if (Error Err = writeArchive(TempFile, Members, true, Library.kind(),
                                     true, Library.isThin(), std::move(Buffer)))
-    return WriteErr;
+    return std::move(Err);
 
   return static_cast<std::string>(TempFile);
 }
@@ -726,7 +731,7 @@ Expected<Optional<std::string>> linkBitcodeFiles(ArrayRef<StringRef> InputFiles,
 
     // Add the bitcode file with its resolved symbols to the LTO job.
     if (Error Err = LTOBackend->add(std::move(BitcodeFile), Resolutions))
-      return Err;
+      return std::move(Err);
   }
 
   // Run the LTO job to compile the bitcode.
@@ -744,7 +749,7 @@ Expected<Optional<std::string>> linkBitcodeFiles(ArrayRef<StringRef> InputFiles,
         std::make_unique<llvm::raw_fd_ostream>(FD, true));
   };
   if (Error Err = LTOBackend->run(AddStream))
-    return Err;
+    return std::move(Err);
 
   for (auto &File : Files) {
     if (!TheTriple.isNVPTX())
@@ -957,6 +962,17 @@ int main(int argc, const char **argv) {
     }
   }
 
+  // Add the device bitcode library to the device files if it was passed in.
+  if (!BitcodeLibrary.empty()) {
+    // FIXME: Hacky workaround to avoid a backend crash at O0.
+    if (OptLevel[1] - '0' == 0)
+      OptLevel[1] = '1';
+    auto DeviceAndPath = StringRef(BitcodeLibrary).split('=');
+    auto TripleAndArch = DeviceAndPath.first.rsplit('-');
+    DeviceFiles.emplace_back(TripleAndArch.first, TripleAndArch.second,
+                             DeviceAndPath.second);
+  }
+
   // Link the device images extracted from the linker input.
   SmallVector<std::string, 16> LinkedImages;
   if (Error Err = linkDeviceFiles(DeviceFiles, LinkerArgs, LinkedImages))


        


More information about the cfe-commits mailing list