[clang] [Clang] Suppress missing architecture error when doing LTO (PR #100652)

Thu Jul 25 13:57:48 PDT 2024

https://github.com/jhuber6 created https://github.com/llvm/llvm-project/pull/100652

Summary:
The `nvlink-wrapper` can do LTO now, which means we can still create
some LLVM-IR without needing an architecture. In the case that we try to
invoke `nvlink` internally, that will still fail. This patch simply
defers the error until later so we can use `--lto-emit-llvm` to get the
IR without specifying an architecture.


>From ae7f5c5388602c4233ff4f1c05c32c9b156a89eb Mon Sep 17 00:00:00 2001
From: Joseph Huber <huberjn at outlook.com>
Date: Thu, 25 Jul 2024 15:40:40 -0500
Subject: [PATCH] [Clang] Suppress missing architecture error when doing LTO

Summary:
The `nvlink-wrapper` can do LTO now, which means we can still create
some LLVM-IR without needing an architecture. In the case that we try to
invoke `nvlink` internally, that will still fail. This patch simply
defers the error until later so we can use `--lto-emit-llvm` to get the
IR without specifying an architecture.
---
 clang/lib/Driver/ToolChains/Cuda.cpp                    | 8 +++++---
 clang/test/Driver/cuda-cross-compiling.c                | 7 +++++++
 clang/tools/clang-nvlink-wrapper/ClangNVLinkWrapper.cpp | 7 +++++++
 3 files changed, 19 insertions(+), 3 deletions(-)

diff --git a/clang/lib/Driver/ToolChains/Cuda.cpp b/clang/lib/Driver/ToolChains/Cuda.cpp
index e98e574d6cc2b..6e10e3d006767 100644
--- a/clang/lib/Driver/ToolChains/Cuda.cpp
+++ b/clang/lib/Driver/ToolChains/Cuda.cpp
@@ -596,14 +596,16 @@ void NVPTX::Linker::ConstructJob(Compilation &C, const JobAction &JA,
     CmdArgs.push_back("-v");
 
   StringRef GPUArch = Args.getLastArgValue(options::OPT_march_EQ);
-  if (GPUArch.empty()) {
+  if (GPUArch.empty() && !C.getDriver().isUsingLTO()) {
     C.getDriver().Diag(diag::err_drv_offload_missing_gpu_arch)
         << getToolChain().getArchName() << getShortName();
     return;
   }
 
-  CmdArgs.push_back("-arch");
-  CmdArgs.push_back(Args.MakeArgString(GPUArch));
+  if (!GPUArch.empty()) {
+    CmdArgs.push_back("-arch");
+    CmdArgs.push_back(Args.MakeArgString(GPUArch));
+  }
 
   if (Args.hasArg(options::OPT_ptxas_path_EQ))
     CmdArgs.push_back(Args.MakeArgString(
diff --git a/clang/test/Driver/cuda-cross-compiling.c b/clang/test/Driver/cuda-cross-compiling.c
index c2e538c25329e..5f24e7a5accb0 100644
--- a/clang/test/Driver/cuda-cross-compiling.c
+++ b/clang/test/Driver/cuda-cross-compiling.c
@@ -84,6 +84,13 @@
 // MISSING: error: must pass in an explicit nvptx64 gpu architecture to 'ptxas'
 // MISSING: error: must pass in an explicit nvptx64 gpu architecture to 'nvlink'
 
+// Do not error when performing LTO.
+//
+// RUN: %clang -target nvptx64-nvidia-cuda -flto %s -### 2>&1 \
+// RUN:   | FileCheck -check-prefix=MISSING-LTO %s
+
+// MISSING-LTO-NOT: error: must pass in an explicit nvptx64 gpu architecture to 'nvlink'
+
 // RUN: %clang -target nvptx64-nvidia-cuda -flto -c %s -### 2>&1 \
 // RUN:   | FileCheck -check-prefix=GENERIC %s
 // RUN: %clang -target nvptx64-nvidia-cuda -march=sm_52 -march=generic -flto -c %s -### 2>&1 \
diff --git a/clang/tools/clang-nvlink-wrapper/ClangNVLinkWrapper.cpp b/clang/tools/clang-nvlink-wrapper/ClangNVLinkWrapper.cpp
index 3885166e76ca7..ac60c96722c65 100644
--- a/clang/tools/clang-nvlink-wrapper/ClangNVLinkWrapper.cpp
+++ b/clang/tools/clang-nvlink-wrapper/ClangNVLinkWrapper.cpp
@@ -302,6 +302,9 @@ Expected<StringRef> runPTXAs(StringRef File, const ArgList &Args) {
       findProgram(Args, "ptxas", {CudaPath + "/bin", GivenPath});
   if (!PTXAsPath)
     return PTXAsPath.takeError();
+  if (!Args.hasArg(OPT_arch))
+    return createStringError(
+        "must pass in an explicit nvptx64 gpu architecture to 'ptxas'");
 
   auto TempFileOrErr = createTempFile(
       Args, sys::path::stem(Args.getLastArgValue(OPT_o, "a.out")), "cubin");
@@ -693,6 +696,10 @@ Error runNVLink(ArrayRef<StringRef> Files, const ArgList &Args) {
   if (!NVLinkPath)
     return NVLinkPath.takeError();
 
+  if (!Args.hasArg(OPT_arch))
+    return createStringError(
+        "must pass in an explicit nvptx64 gpu architecture to 'nvlink'");
+
   ArgStringList NewLinkerArgs;
   for (const opt::Arg *Arg : Args) {
     // Do not forward arguments only intended for the linker wrapper.