[clang] 6e68892 - [Offloading] Embed the target features in the OffloadBinary

Joseph Huber via cfe-commits cfe-commits at lists.llvm.org
Thu Jun 23 10:15:14 PDT 2022


Author: Joseph Huber
Date: 2022-06-23T13:15:01-04:00
New Revision: 6e6889288cdc8433f33723d977c99be5f07423f4

URL: https://github.com/llvm/llvm-project/commit/6e6889288cdc8433f33723d977c99be5f07423f4
DIFF: https://github.com/llvm/llvm-project/commit/6e6889288cdc8433f33723d977c99be5f07423f4.diff

LOG: [Offloading] Embed the target features in the OffloadBinary

The target features are necessary for correctly compiling most programs
in LTO mode. Currently, these are derived in clang at link time and
passed as an arguemnt to the linker wrapper. This is problematic because
it requires knowing the required toolchain at link time, which should
not be necessry. Instead, these features should be embedded into the
offloading binary so we can unify them in the linker wrapper for LTO.
This also required changing the offload packager to interpret multiple
arguments as concatenation with a comma. This is so we can still use the
`,` separator for the argument list.

Depends on D127246

Reviewed By: tra

Differential Revision: https://reviews.llvm.org/D127686

Added: 
    

Modified: 
    clang/lib/Driver/ToolChains/Clang.cpp
    clang/test/Driver/openmp-offload-gpu-new.c
    clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
    clang/tools/clang-offload-packager/ClangOffloadPackager.cpp

Removed: 
    


################################################################################
diff  --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
index 46708a6e782f9..c6a8e34204977 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -8303,11 +8303,27 @@ void OffloadPackager::ConstructJob(Compilation &C, const JobAction &JA,
     StringRef Arch = (OffloadAction->getOffloadingArch())
                          ? OffloadAction->getOffloadingArch()
                          : TCArgs.getLastArgValue(options::OPT_march_EQ);
+    StringRef Kind =
+      Action::GetOffloadKindName(OffloadAction->getOffloadingDeviceKind());
+
+    ArgStringList Features;
+    SmallVector<StringRef> FeatureArgs;
+    getTargetFeatures(TC->getDriver(), TC->getTriple(), Args, Features, false);
+    llvm::copy_if(Features, std::back_inserter(FeatureArgs),
+                  [](StringRef Arg) { return !Arg.startswith("-target"); });
+
+    SmallVector<std::string> Parts{
+        "file=" + File.str(),
+        "triple=" + TC->getTripleString(),
+        "arch=" + Arch.str(),
+        "kind=" + Kind.str(),
+    };
 
-    CmdArgs.push_back(Args.MakeArgString(
-        "--image=file=" + File + "," + "triple=" + TC->getTripleString() + "," +
-        "arch=" + Arch + "," + "kind=" +
-        Action::GetOffloadKindName(OffloadAction->getOffloadingDeviceKind())));
+    if (TC->getDriver().isUsingLTO(/* IsOffload */ true))
+      for (StringRef Feature : FeatureArgs)
+        Parts.emplace_back("feature=" + Feature.str());
+
+    CmdArgs.push_back(Args.MakeArgString("--image=" + llvm::join(Parts, ",")));
   }
 
   C.addCommand(std::make_unique<Command>(
@@ -8365,20 +8381,6 @@ void LinkerWrapper::ConstructJob(Compilation &C, const JobAction &JA,
   }
 
   if (D.isUsingLTO(/* IsOffload */ true)) {
-    // Pass in target features for each toolchain.
-    for (auto &I :
-         llvm::make_range(OpenMPTCRange.first, OpenMPTCRange.second)) {
-      const ToolChain *TC = I.second;
-      const ArgList &TCArgs = C.getArgsForToolChain(TC, "", Action::OFK_OpenMP);
-      ArgStringList FeatureArgs;
-      TC->addClangTargetOptions(TCArgs, FeatureArgs, Action::OFK_OpenMP);
-      auto FeatureIt = llvm::find(FeatureArgs, "-target-feature");
-      if (FeatureIt != FeatureArgs.end())
-        CmdArgs.push_back(
-            Args.MakeArgString("-target-feature=" + TC->getTripleString() +
-                               "=" + *(FeatureIt + 1)));
-    }
-
     // Pass in the optimization level to use for LTO.
     if (const Arg *A = Args.getLastArg(options::OPT_O_Group)) {
       StringRef OOpt;

diff  --git a/clang/test/Driver/openmp-offload-gpu-new.c b/clang/test/Driver/openmp-offload-gpu-new.c
index 4c2748515cf0d..a59952a90e29e 100644
--- a/clang/test/Driver/openmp-offload-gpu-new.c
+++ b/clang/test/Driver/openmp-offload-gpu-new.c
@@ -115,3 +115,8 @@
 // RUN:     %s 2>&1 | FileCheck --check-prefix=CHECK-XLINKER %s
 
 // CHECK-XLINKER: -device-linker=a{{.*}}-device-linker=nvptx64-nvidia-cuda=b{{.*}}-device-linker=nvptx64-nvidia-cuda=c{{.*}}--
+
+// RUN:   %clang -### --target=x86_64-unknown-linux-gnu -fopenmp --offload-arch=sm_52 -nogpulib \
+// RUN:     -foffload-lto %s 2>&1 | FileCheck --check-prefix=CHECK-LTO-FEATURES %s
+
+// CHECK-LTO-FEATURES: clang-offload-packager{{.*}}--image={{.*}}feature=+ptx{{[0-9]+}}

diff  --git a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
index bb5a1963c6bad..eaac0b1099be6 100644
--- a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
+++ b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
@@ -68,10 +68,6 @@ static cl::opt<std::string> LinkerUserPath("linker-path", cl::Required,
                                            cl::desc("Path of linker binary"),
                                            cl::cat(ClangLinkerWrapperCategory));
 
-static cl::opt<std::string>
-    TargetFeatures("target-feature", cl::desc("Target features for triple"),
-                   cl::cat(ClangLinkerWrapperCategory));
-
 static cl::opt<std::string> OptLevel("opt-level",
                                      cl::desc("Optimization level for LTO"),
                                      cl::init("O2"),
@@ -726,16 +722,24 @@ void diagnosticHandler(const DiagnosticInfo &DI) {
   }
 }
 
-// Get the target features passed in from the driver as <triple>=<features>.
-std::vector<std::string> getTargetFeatures(const Triple &TheTriple) {
-  std::vector<std::string> Features;
-  auto TargetAndFeatures = StringRef(TargetFeatures).split('=');
-  if (TargetAndFeatures.first != TheTriple.getTriple())
-    return Features;
+// Get the list of target features from the input file and unify them such that
+// if there are multiple +xxx or -xxx features we only keep the last one.
+std::vector<std::string> getTargetFeatures(ArrayRef<OffloadFile> InputFiles) {
+  SmallVector<StringRef> Features;
+  for (const OffloadFile &File : InputFiles) {
+    for (auto Arg : llvm::split(File.getBinary()->getString("feature"), ","))
+      Features.emplace_back(Arg);
+  }
+
+  // Only add a feature if it hasn't been seen before starting from the end.
+  std::vector<std::string> UnifiedFeatures;
+  DenseSet<StringRef> UsedFeatures;
+  for (StringRef Feature : llvm::reverse(Features)) {
+    if (UsedFeatures.insert(Feature.drop_front()).second)
+      UnifiedFeatures.push_back(Feature.str());
+  }
 
-  for (auto Feature : llvm::split(TargetAndFeatures.second, ','))
-    Features.push_back(Feature.str());
-  return Features;
+  return UnifiedFeatures;
 }
 
 CodeGenOpt::Level getCGOptLevel(unsigned OptLevel) {
@@ -755,6 +759,7 @@ CodeGenOpt::Level getCGOptLevel(unsigned OptLevel) {
 template <typename ModuleHook = function_ref<bool(size_t, const Module &)>>
 std::unique_ptr<lto::LTO> createLTO(
     const Triple &TheTriple, StringRef Arch, bool WholeProgram,
+    const std::vector<std::string> &Features,
     ModuleHook Hook = [](size_t, const Module &) { return true; }) {
   lto::Config Conf;
   lto::ThinBackend Backend;
@@ -765,7 +770,7 @@ std::unique_ptr<lto::LTO> createLTO(
   Conf.CPU = Arch.str();
   Conf.Options = codegen::InitTargetOptionsFromCodeGenFlags(TheTriple);
 
-  Conf.MAttrs = getTargetFeatures(TheTriple);
+  Conf.MAttrs = Features;
   Conf.CGOptLevel = getCGOptLevel(OptLevel[1] - '0');
   Conf.OptLevel = OptLevel[1] - '0';
   if (Conf.OptLevel > 0)
@@ -902,10 +907,12 @@ Error linkBitcodeFiles(SmallVectorImpl<OffloadFile> &InputFiles,
   };
 
   // We assume visibility of the whole program if every input file was bitcode.
+  auto Features = getTargetFeatures(BitcodeInputFiles);
   bool WholeProgram = InputFiles.empty();
   auto LTOBackend =
-      (EmbedBitcode) ? createLTO(TheTriple, Arch, WholeProgram, OutputBitcode)
-                     : createLTO(TheTriple, Arch, WholeProgram);
+      (EmbedBitcode)
+          ? createLTO(TheTriple, Arch, WholeProgram, Features, OutputBitcode)
+          : createLTO(TheTriple, Arch, WholeProgram, Features);
 
   // We need to resolve the symbols so the LTO backend knows which symbols need
   // to be kept or can be internalized. This is a simplified symbol resolution

diff  --git a/clang/tools/clang-offload-packager/ClangOffloadPackager.cpp b/clang/tools/clang-offload-packager/ClangOffloadPackager.cpp
index 7ef258396e989..338b63ad0a223 100644
--- a/clang/tools/clang-offload-packager/ClangOffloadPackager.cpp
+++ b/clang/tools/clang-offload-packager/ClangOffloadPackager.cpp
@@ -22,6 +22,7 @@
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/Path.h"
 #include "llvm/Support/Signals.h"
+#include "llvm/Support/StringSaver.h"
 #include "llvm/Support/WithColor.h"
 
 using namespace llvm;
@@ -71,9 +72,18 @@ int main(int argc, const char **argv) {
   SmallVector<char, 1024> BinaryData;
   raw_svector_ostream OS(BinaryData);
   for (StringRef Image : DeviceImages) {
+    BumpPtrAllocator Alloc;
+    StringSaver Saver(Alloc);
+
     StringMap<StringRef> Args;
-    for (StringRef Arg : llvm::split(Image, ","))
-      Args.insert(Arg.split("="));
+    for (StringRef Arg : llvm::split(Image, ",")) {
+      auto KeyAndValue = Arg.split("=");
+      if (Args.count(KeyAndValue.first))
+        Args[KeyAndValue.first] =
+            Saver.save(Args[KeyAndValue.first] + "," + KeyAndValue.second);
+      else
+        Args[KeyAndValue.first] = KeyAndValue.second;
+    }
 
     if (!Args.count("triple") || !Args.count("file"))
       return reportError(createStringError(


        


More information about the cfe-commits mailing list