[llvm] 1ea0865 - [Clang] Add env var for nvptx-arch/amdgpu-arch timeout (#102521)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Aug 9 10:39:33 PDT 2024
Author: Joel E. Denny
Date: 2024-08-09T13:39:29-04:00
New Revision: 1ea0865dd6faf99ae25f22161c2aeb65ff9505c0
URL: https://github.com/llvm/llvm-project/commit/1ea0865dd6faf99ae25f22161c2aeb65ff9505c0
DIFF: https://github.com/llvm/llvm-project/commit/1ea0865dd6faf99ae25f22161c2aeb65ff9505c0.diff
LOG: [Clang] Add env var for nvptx-arch/amdgpu-arch timeout (#102521)
When working on very busy systems, check-offload frequently fails many
tests with this diagnostic:
```
clang: error: cannot determine amdgcn architecture: /tmp/llvm/build/bin/amdgpu-arch: Child timed out: ; consider passing it via '-march'
```
This patch accepts the environment variable
`CLANG_TOOLCHAIN_PROGRAM_TIMEOUT` to set the timeout. It also increases
the timeout from 10 to 60 seconds.
Added:
Modified:
clang/include/clang/Basic/DiagnosticDriverKinds.td
clang/include/clang/Driver/ToolChain.h
clang/lib/Driver/ToolChain.cpp
clang/lib/Driver/ToolChains/AMDGPU.cpp
clang/lib/Driver/ToolChains/Cuda.cpp
clang/test/Driver/amdgpu-hip-system-arch.c
clang/test/Driver/nvptx-cuda-system-arch.c
clang/test/Driver/openmp-system-arch.c
llvm/utils/lit/lit/TestingConfig.py
Removed:
################################################################################
diff --git a/clang/include/clang/Basic/DiagnosticDriverKinds.td b/clang/include/clang/Basic/DiagnosticDriverKinds.td
index 3d8240f8357b40..92a602829933ce 100644
--- a/clang/include/clang/Basic/DiagnosticDriverKinds.td
+++ b/clang/include/clang/Basic/DiagnosticDriverKinds.td
@@ -98,8 +98,10 @@ def warn_drv_amdgpu_cov6: Warning<
"code object v6 is still in development and not ready for production use yet;"
" use at your own risk">;
def err_drv_undetermined_gpu_arch : Error<
- "cannot determine %0 architecture: %1; consider passing it via "
- "'%2'">;
+ "cannot determine %0 architecture: %1; consider passing it via '%2'; "
+ "environment variable CLANG_TOOLCHAIN_PROGRAM_TIMEOUT specifies the tool "
+ "timeout (integer secs, <=0 is infinite)">;
+
def warn_drv_multi_gpu_arch : Warning<
"multiple %0 architectures are detected: %1; only the first one is used for "
"'%2'">, InGroup<MultiGPU>;
diff --git a/clang/include/clang/Driver/ToolChain.h b/clang/include/clang/Driver/ToolChain.h
index ece1384d5d3c02..5347e29be91439 100644
--- a/clang/include/clang/Driver/ToolChain.h
+++ b/clang/include/clang/Driver/ToolChain.h
@@ -205,8 +205,7 @@ class ToolChain {
/// Executes the given \p Executable and returns the stdout.
llvm::Expected<std::unique_ptr<llvm::MemoryBuffer>>
- executeToolChainProgram(StringRef Executable,
- unsigned SecondsToWait = 0) const;
+ executeToolChainProgram(StringRef Executable) const;
void setTripleEnvironment(llvm::Triple::EnvironmentType Env);
diff --git a/clang/lib/Driver/ToolChain.cpp b/clang/lib/Driver/ToolChain.cpp
index 2d50c2cbbc881c..01370999915521 100644
--- a/clang/lib/Driver/ToolChain.cpp
+++ b/clang/lib/Driver/ToolChain.cpp
@@ -40,6 +40,7 @@
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/FileUtilities.h"
#include "llvm/Support/Path.h"
+#include "llvm/Support/Process.h"
#include "llvm/Support/VersionTuple.h"
#include "llvm/Support/VirtualFileSystem.h"
#include "llvm/TargetParser/AArch64TargetParser.h"
@@ -104,8 +105,7 @@ ToolChain::ToolChain(const Driver &D, const llvm::Triple &T,
}
llvm::Expected<std::unique_ptr<llvm::MemoryBuffer>>
-ToolChain::executeToolChainProgram(StringRef Executable,
- unsigned SecondsToWait) const {
+ToolChain::executeToolChainProgram(StringRef Executable) const {
llvm::SmallString<64> OutputFile;
llvm::sys::fs::createTemporaryFile("toolchain-program", "txt", OutputFile);
llvm::FileRemover OutputRemover(OutputFile.c_str());
@@ -116,6 +116,16 @@ ToolChain::executeToolChainProgram(StringRef Executable,
};
std::string ErrorMessage;
+ int SecondsToWait = 60;
+ if (std::optional<std::string> Str =
+ llvm::sys::Process::GetEnv("CLANG_TOOLCHAIN_PROGRAM_TIMEOUT")) {
+ if (!llvm::to_integer(*Str, SecondsToWait))
+ return llvm::createStringError(std::error_code(),
+ "CLANG_TOOLCHAIN_PROGRAM_TIMEOUT expected "
+ "an integer, got '" +
+ *Str + "'");
+ SecondsToWait = std::min(SecondsToWait, 0); // infinite
+ }
if (llvm::sys::ExecuteAndWait(Executable, {}, {}, Redirects, SecondsToWait,
/*MemoryLimit=*/0, &ErrorMessage))
return llvm::createStringError(std::error_code(),
diff --git a/clang/lib/Driver/ToolChains/AMDGPU.cpp b/clang/lib/Driver/ToolChains/AMDGPU.cpp
index aa8f9197cfabc3..a788aba57546c8 100644
--- a/clang/lib/Driver/ToolChains/AMDGPU.cpp
+++ b/clang/lib/Driver/ToolChains/AMDGPU.cpp
@@ -899,7 +899,7 @@ AMDGPUToolChain::getSystemGPUArchs(const ArgList &Args) const {
else
Program = GetProgramPath("amdgpu-arch");
- auto StdoutOrErr = executeToolChainProgram(Program, /*SecondsToWait=*/10);
+ auto StdoutOrErr = executeToolChainProgram(Program);
if (!StdoutOrErr)
return StdoutOrErr.takeError();
diff --git a/clang/lib/Driver/ToolChains/Cuda.cpp b/clang/lib/Driver/ToolChains/Cuda.cpp
index 17c952c808f725..67a427b9d5ceee 100644
--- a/clang/lib/Driver/ToolChains/Cuda.cpp
+++ b/clang/lib/Driver/ToolChains/Cuda.cpp
@@ -804,7 +804,7 @@ NVPTXToolChain::getSystemGPUArchs(const ArgList &Args) const {
else
Program = GetProgramPath("nvptx-arch");
- auto StdoutOrErr = executeToolChainProgram(Program, /*SecondsToWait=*/10);
+ auto StdoutOrErr = executeToolChainProgram(Program);
if (!StdoutOrErr)
return StdoutOrErr.takeError();
diff --git a/clang/test/Driver/amdgpu-hip-system-arch.c b/clang/test/Driver/amdgpu-hip-system-arch.c
index a46077b38fac08..f25a4087080f6d 100644
--- a/clang/test/Driver/amdgpu-hip-system-arch.c
+++ b/clang/test/Driver/amdgpu-hip-system-arch.c
@@ -29,3 +29,11 @@
// RUN: %clang -### --target=x86_64-unknown-linux-gnu -nogpuinc -nogpulib --offload-new-driver --offload-arch=native --amdgpu-arch-tool=%t/amdgpu_arch_gfx906 -x hip %s 2>&1 \
// RUN: | FileCheck %s --check-prefix=ARCH-GFX906
// ARCH-GFX906: "-cc1" "-triple" "amdgcn-amd-amdhsa"{{.*}}"-target-cpu" "gfx906"
+
+// case when CLANG_TOOLCHAIN_PROGRAM_TIMEOUT is malformed.
+// RUN: env CLANG_TOOLCHAIN_PROGRAM_TIMEOUT=foo \
+// RUN: not %clang -### --target=x86_64-unknown-linux-gnu -nogpuinc -nogpulib \
+// RUN: --offload-arch=native --amdgpu-arch-tool=%t/amdgpu_arch_gfx906 \
+// RUN: -x hip %s 2>&1 | \
+// RUN: FileCheck %s --check-prefix=BAD-TIMEOUT
+// BAD-TIMEOUT: clang: error: cannot determine amdgcn architecture: CLANG_TOOLCHAIN_PROGRAM_TIMEOUT expected an integer, got 'foo'; consider passing it via '--offload-arch'; environment variable CLANG_TOOLCHAIN_PROGRAM_TIMEOUT specifies the tool timeout (integer secs, <=0 is infinite)
diff --git a/clang/test/Driver/nvptx-cuda-system-arch.c b/clang/test/Driver/nvptx-cuda-system-arch.c
index e6a2fa40f0a038..6a8a218406d139 100644
--- a/clang/test/Driver/nvptx-cuda-system-arch.c
+++ b/clang/test/Driver/nvptx-cuda-system-arch.c
@@ -42,3 +42,11 @@
// RUN: --cuda-path=%S/Inputs/CUDA_102/usr/local/cuda %s 2>&1 | FileCheck %s --check-prefix=MARCH-sm_89
// MARCH-sm_89: warning: multiple nvptx64 architectures are detected: sm_89, sm_80; only the first one is used for '-march' [-Wmulti-gpu]
// MARCH-sm_89: "-cc1" "-triple" "nvptx64-nvidia-cuda"{{.*}}"-target-cpu" "sm_89"
+
+// case when CLANG_TOOLCHAIN_PROGRAM_TIMEOUT is malformed.
+// RUN: env CLANG_TOOLCHAIN_PROGRAM_TIMEOUT=foo \
+// RUN: not %clang -### --target=x86_64-unknown-linux-gnu -nogpulib \
+// RUN: --offload-arch=native --nvptx-arch-tool=%t/nvptx_arch_sm_70 \
+// RUN: --cuda-path=%S/Inputs/CUDA_102/usr/local/cuda -x cuda %s 2>&1 | \
+// RUN: FileCheck %s --check-prefix=BAD-TIMEOUT
+// BAD-TIMEOUT: clang: error: cannot determine nvptx64 architecture: CLANG_TOOLCHAIN_PROGRAM_TIMEOUT expected an integer, got 'foo'; consider passing it via '--offload-arch'; environment variable CLANG_TOOLCHAIN_PROGRAM_TIMEOUT specifies the tool timeout (integer secs, <=0 is infinite)
diff --git a/clang/test/Driver/openmp-system-arch.c b/clang/test/Driver/openmp-system-arch.c
index ea6ec6428592b3..cd49f460099666 100644
--- a/clang/test/Driver/openmp-system-arch.c
+++ b/clang/test/Driver/openmp-system-arch.c
@@ -75,3 +75,19 @@
// RUN: -fopenmp-targets=amdgcn-amd-amdhsa --amdgpu-arch-tool=%t/amdgpu_arch_empty %s 2>&1 \
// RUN: | FileCheck %s --check-prefix=AMDGPU
// AMDGPU: error: cannot determine amdgcn architecture: No AMD GPU detected in the system; consider passing it via '-march'
+
+// case when CLANG_TOOLCHAIN_PROGRAM_TIMEOUT is malformed for nvptx-arch.
+// RUN: env CLANG_TOOLCHAIN_PROGRAM_TIMEOUT=foo \
+// RUN: not %clang -### --target=x86_64-unknown-linux-gnu -fopenmp=libomp \
+// RUN: -fopenmp-targets=nvptx64-nvidia-cuda -nogpulib \
+// RUN: --nvptx-arch-tool=%t/nvptx_arch_sm_70 %s 2>&1 | \
+// RUN: FileCheck %s --check-prefix=BAD-TIMEOUT-NVPTX
+// BAD-TIMEOUT-NVPTX: clang: error: cannot determine nvptx64 architecture: CLANG_TOOLCHAIN_PROGRAM_TIMEOUT expected an integer, got 'foo'; consider passing it via '-march'; environment variable CLANG_TOOLCHAIN_PROGRAM_TIMEOUT specifies the tool timeout (integer secs, <=0 is infinite)
+
+// case when CLANG_TOOLCHAIN_PROGRAM_TIMEOUT is malformed for amdgpu-arch.
+// RUN: env CLANG_TOOLCHAIN_PROGRAM_TIMEOUT= \
+// RUN: not %clang -### --target=x86_64-unknown-linux-gnu -fopenmp=libomp \
+// RUN: -fopenmp-targets=amdgcn-amd-amdhsa -nogpulib \
+// RUN: --amdgpu-arch-tool=%t/amdgpu_arch_gfx906 %s 2>&1 | \
+// RUN: FileCheck %s --check-prefix=BAD-TIMEOUT-AMDGPU
+// BAD-TIMEOUT-AMDGPU: clang: error: cannot determine amdgcn architecture: CLANG_TOOLCHAIN_PROGRAM_TIMEOUT expected an integer, got ''; consider passing it via '-march'; environment variable CLANG_TOOLCHAIN_PROGRAM_TIMEOUT specifies the tool timeout (integer secs, <=0 is infinite)
diff --git a/llvm/utils/lit/lit/TestingConfig.py b/llvm/utils/lit/lit/TestingConfig.py
index eb9f8de2a7f960..9d8e93460933d8 100644
--- a/llvm/utils/lit/lit/TestingConfig.py
+++ b/llvm/utils/lit/lit/TestingConfig.py
@@ -26,6 +26,7 @@ def fromdefaults(litConfig):
"SYSTEMROOT",
"TERM",
"CLANG",
+ "CLANG_TOOLCHAIN_PROGRAM_TIMEOUT",
"LLDB",
"LD_PRELOAD",
"LLVM_SYMBOLIZER_PATH",
More information about the llvm-commits
mailing list