[clang] [NVPTX] Add support for -march=native in standalone NVPTX (PR #79373)
Joseph Huber via cfe-commits
cfe-commits at lists.llvm.org
Thu Jan 25 05:51:09 PST 2024
https://github.com/jhuber6 updated https://github.com/llvm/llvm-project/pull/79373
>From 145b7bc932ce3ffa46545cd7af29b1c93981429c Mon Sep 17 00:00:00 2001
From: Joseph Huber <huberjn at outlook.com>
Date: Wed, 24 Jan 2024 15:34:00 -0600
Subject: [PATCH 1/2] [NVPTX] Add support for -march=native in standalone NVPTX
Summary:
We support `--target=nvptx64-nvidia-cuda` as a way to target the NVPTX
architecture from standard CPU. This patch simply uses the existing
support for handling `--offload-arch=native` to also apply to the
standalone toolchain.
---
clang/lib/Driver/ToolChains/Cuda.cpp | 61 +++++++++++++---------
clang/lib/Driver/ToolChains/Cuda.h | 10 ++--
clang/test/Driver/nvptx-cuda-system-arch.c | 5 ++
3 files changed, 45 insertions(+), 31 deletions(-)
diff --git a/clang/lib/Driver/ToolChains/Cuda.cpp b/clang/lib/Driver/ToolChains/Cuda.cpp
index 1462576ca870e6f..6215c43b5fc96bd 100644
--- a/clang/lib/Driver/ToolChains/Cuda.cpp
+++ b/clang/lib/Driver/ToolChains/Cuda.cpp
@@ -738,9 +738,18 @@ NVPTXToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args,
if (!llvm::is_contained(*DAL, A))
DAL->append(A);
- if (!DAL->hasArg(options::OPT_march_EQ))
+ if (!DAL->hasArg(options::OPT_march_EQ)) {
DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_march_EQ),
CudaArchToString(CudaArch::CudaDefault));
+ } else if (DAL->getLastArgValue(options::OPT_march_EQ) == "native") {
+ auto GPUsOrErr = getSystemGPUArchs(Args);
+ if (!GPUsOrErr)
+ getDriver().Diag(diag::err_drv_undetermined_gpu_arch)
+ << getArchName() << llvm::toString(GPUsOrErr.takeError()) << "-march";
+ else
+ DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_march_EQ),
+ Args.MakeArgString(GPUsOrErr->front()));
+ }
return DAL;
}
@@ -783,6 +792,31 @@ void NVPTXToolChain::adjustDebugInfoKind(
}
}
+Expected<SmallVector<std::string>>
+NVPTXToolChain::getSystemGPUArchs(const ArgList &Args) const {
+ // Detect NVIDIA GPUs availible on the system.
+ std::string Program;
+ if (Arg *A = Args.getLastArg(options::OPT_nvptx_arch_tool_EQ))
+ Program = A->getValue();
+ else
+ Program = GetProgramPath("nvptx-arch");
+
+ auto StdoutOrErr = executeToolChainProgram(Program);
+ if (!StdoutOrErr)
+ return StdoutOrErr.takeError();
+
+ SmallVector<std::string, 1> GPUArchs;
+ for (StringRef Arch : llvm::split((*StdoutOrErr)->getBuffer(), "\n"))
+ if (!Arch.empty())
+ GPUArchs.push_back(Arch.str());
+
+ if (GPUArchs.empty())
+ return llvm::createStringError(std::error_code(),
+ "No NVIDIA GPU detected in the system");
+
+ return std::move(GPUArchs);
+}
+
/// CUDA toolchain. Our assembler is ptxas, and our "linker" is fatbinary,
/// which isn't properly a linker but nonetheless performs the step of stitching
/// together object files from the assembler into a single blob.
@@ -948,31 +982,6 @@ CudaToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args,
return DAL;
}
-Expected<SmallVector<std::string>>
-CudaToolChain::getSystemGPUArchs(const ArgList &Args) const {
- // Detect NVIDIA GPUs availible on the system.
- std::string Program;
- if (Arg *A = Args.getLastArg(options::OPT_nvptx_arch_tool_EQ))
- Program = A->getValue();
- else
- Program = GetProgramPath("nvptx-arch");
-
- auto StdoutOrErr = executeToolChainProgram(Program);
- if (!StdoutOrErr)
- return StdoutOrErr.takeError();
-
- SmallVector<std::string, 1> GPUArchs;
- for (StringRef Arch : llvm::split((*StdoutOrErr)->getBuffer(), "\n"))
- if (!Arch.empty())
- GPUArchs.push_back(Arch.str());
-
- if (GPUArchs.empty())
- return llvm::createStringError(std::error_code(),
- "No NVIDIA GPU detected in the system");
-
- return std::move(GPUArchs);
-}
-
Tool *NVPTXToolChain::buildAssembler() const {
return new tools::NVPTX::Assembler(*this);
}
diff --git a/clang/lib/Driver/ToolChains/Cuda.h b/clang/lib/Driver/ToolChains/Cuda.h
index 8a053f3393e1206..43c17ba7c0ba03d 100644
--- a/clang/lib/Driver/ToolChains/Cuda.h
+++ b/clang/lib/Driver/ToolChains/Cuda.h
@@ -168,6 +168,11 @@ class LLVM_LIBRARY_VISIBILITY NVPTXToolChain : public ToolChain {
unsigned GetDefaultDwarfVersion() const override { return 2; }
unsigned getMaxDwarfVersion() const override { return 2; }
+ /// Uses nvptx-arch tool to get arch of the system GPU. Will return error
+ /// if unable to find one.
+ virtual Expected<SmallVector<std::string>>
+ getSystemGPUArchs(const llvm::opt::ArgList &Args) const override;
+
CudaInstallationDetector CudaInstallation;
protected:
@@ -223,11 +228,6 @@ class LLVM_LIBRARY_VISIBILITY CudaToolChain : public NVPTXToolChain {
const ToolChain &HostTC;
- /// Uses nvptx-arch tool to get arch of the system GPU. Will return error
- /// if unable to find one.
- virtual Expected<SmallVector<std::string>>
- getSystemGPUArchs(const llvm::opt::ArgList &Args) const override;
-
protected:
Tool *buildAssembler() const override; // ptxas
Tool *buildLinker() const override; // fatbinary (ok, not really a linker)
diff --git a/clang/test/Driver/nvptx-cuda-system-arch.c b/clang/test/Driver/nvptx-cuda-system-arch.c
index 037215fd52a88b2..bd001f82052dc38 100644
--- a/clang/test/Driver/nvptx-cuda-system-arch.c
+++ b/clang/test/Driver/nvptx-cuda-system-arch.c
@@ -31,3 +31,8 @@
// RUN: %clang -### --target=x86_64-unknown-linux-gnu -nogpulib --offload-arch=native --offload-new-driver --nvptx-arch-tool=%t/nvptx_arch_sm_70 -x cuda --cuda-path=%S/Inputs/CUDA_102/usr/local/cuda %s 2>&1 \
// RUN: | FileCheck %s --check-prefix=ARCH-sm_70
// ARCH-sm_70: "-cc1" "-triple" "nvptx64-nvidia-cuda"{{.*}}"-target-cpu" "sm_70"
+
+// case when nvptx-arch is used via '-march=native'
+// RUN: %clang -### --target=nvptx64-nvidia-cuda -nogpulib -march=native --nvptx-arch-tool=%t/nvptx_arch_sm_70 \
+// RUN: --cuda-path=%S/Inputs/CUDA_102/usr/local/cuda %s 2>&1 | FileCheck %s --check-prefix=MARCH-sm_70
+// MARCH-sm_70: "-cc1" "-triple" "nvptx64-nvidia-cuda"{{.*}}"-target-cpu" "sm_70"
>From a73f15b41d54f0bc8e40a4ed3a4877248d2f75d3 Mon Sep 17 00:00:00 2001
From: Joseph Huber <huberjn at outlook.com>
Date: Thu, 25 Jan 2024 07:50:58 -0600
Subject: [PATCH 2/2] Add test
---
clang/lib/Driver/ToolChains/Cuda.cpp | 9 +++++++--
.../test/Driver/Inputs/nvptx-arch/nvptx_arch_sm_89_sm_80 | 4 ++++
clang/test/Driver/nvptx-cuda-system-arch.c | 8 ++++++++
3 files changed, 19 insertions(+), 2 deletions(-)
create mode 100644 clang/test/Driver/Inputs/nvptx-arch/nvptx_arch_sm_89_sm_80
diff --git a/clang/lib/Driver/ToolChains/Cuda.cpp b/clang/lib/Driver/ToolChains/Cuda.cpp
index 6215c43b5fc96bd..71e7b46dbd72d8f 100644
--- a/clang/lib/Driver/ToolChains/Cuda.cpp
+++ b/clang/lib/Driver/ToolChains/Cuda.cpp
@@ -743,12 +743,17 @@ NVPTXToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args,
CudaArchToString(CudaArch::CudaDefault));
} else if (DAL->getLastArgValue(options::OPT_march_EQ) == "native") {
auto GPUsOrErr = getSystemGPUArchs(Args);
- if (!GPUsOrErr)
+ if (!GPUsOrErr) {
getDriver().Diag(diag::err_drv_undetermined_gpu_arch)
<< getArchName() << llvm::toString(GPUsOrErr.takeError()) << "-march";
- else
+ } else {
+ if (GPUsOrErr->size() > 1)
+ getDriver().Diag(diag::warn_drv_multi_gpu_arch)
+ << llvm::Triple::getArchTypeName(getArch())
+ << llvm::join(*GPUsOrErr, ", ") << "-march";
DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_march_EQ),
Args.MakeArgString(GPUsOrErr->front()));
+ }
}
return DAL;
diff --git a/clang/test/Driver/Inputs/nvptx-arch/nvptx_arch_sm_89_sm_80 b/clang/test/Driver/Inputs/nvptx-arch/nvptx_arch_sm_89_sm_80
new file mode 100644
index 000000000000000..c9df565b9f8d7af
--- /dev/null
+++ b/clang/test/Driver/Inputs/nvptx-arch/nvptx_arch_sm_89_sm_80
@@ -0,0 +1,4 @@
+#!/bin/sh
+echo sm_89
+echo sm_80
+exit 0
diff --git a/clang/test/Driver/nvptx-cuda-system-arch.c b/clang/test/Driver/nvptx-cuda-system-arch.c
index bd001f82052dc38..81cad3715066927 100644
--- a/clang/test/Driver/nvptx-cuda-system-arch.c
+++ b/clang/test/Driver/nvptx-cuda-system-arch.c
@@ -6,9 +6,11 @@
// RUN: mkdir -p %t
// RUN: cp %S/Inputs/nvptx-arch/nvptx_arch_fail %t/
// RUN: cp %S/Inputs/nvptx-arch/nvptx_arch_sm_70 %t/
+// RUN: cp %S/Inputs/nvptx-arch/nvptx_arch_sm_89_sm_80 %t/
// RUN: echo '#!/bin/sh' > %t/nvptx_arch_empty
// RUN: chmod +x %t/nvptx_arch_fail
// RUN: chmod +x %t/nvptx_arch_sm_70
+// RUN: chmod +x %t/nvptx_arch_sm_89_sm_80
// RUN: chmod +x %t/nvptx_arch_empty
// case when nvptx-arch returns nothing or fails
@@ -36,3 +38,9 @@
// RUN: %clang -### --target=nvptx64-nvidia-cuda -nogpulib -march=native --nvptx-arch-tool=%t/nvptx_arch_sm_70 \
// RUN: --cuda-path=%S/Inputs/CUDA_102/usr/local/cuda %s 2>&1 | FileCheck %s --check-prefix=MARCH-sm_70
// MARCH-sm_70: "-cc1" "-triple" "nvptx64-nvidia-cuda"{{.*}}"-target-cpu" "sm_70"
+
+// case when nvptx-arch is used via '-march=native'
+// RUN: %clang -### --target=nvptx64-nvidia-cuda -nogpulib -march=native --nvptx-arch-tool=%t/nvptx_arch_sm_89_sm_80 \
+// RUN: --cuda-path=%S/Inputs/CUDA_102/usr/local/cuda %s 2>&1 | FileCheck %s --check-prefix=MARCH-sm_89
+// MARCH-sm_89: warning: multiple nvptx64 architectures are detected: sm_89, sm_80; only the first one is used for '-march' [-Wmulti-gpu]
+// MARCH-sm_89: "-cc1" "-triple" "nvptx64-nvidia-cuda"{{.*}}"-target-cpu" "sm_89"
More information about the cfe-commits
mailing list