[clang] [Clang] Add timeout for GPU detection utilities (PR #94751)

Joseph Huber via cfe-commits cfe-commits at lists.llvm.org
Fri Jun 7 06:23:33 PDT 2024


https://github.com/jhuber6 updated https://github.com/llvm/llvm-project/pull/94751

>From 0e367c72a1cc163fd781f98b9fac809d90f4beb7 Mon Sep 17 00:00:00 2001
From: Joseph Huber <huberjn at outlook.com>
Date: Fri, 7 Jun 2024 08:15:06 -0500
Subject: [PATCH] [Clang] Add timeout for GPU detection utilities

Summary:
The utilities `nvptx-arch` and `amdgpu-arch` are used to support
`--offload-arch=native` among other utilities in clang. However, these
rely on the GPU drivers to query the features. In certain cases these
drivers can become locked up, which will lead to indefinate hangs on any
compiler jobs running in the meantime.

This patch adds a ten second timeout period for these utilities before
it kills the job and errors out.
---
 clang/include/clang/Driver/ToolChain.h | 3 ++-
 clang/lib/Driver/ToolChain.cpp         | 8 ++++----
 clang/lib/Driver/ToolChains/AMDGPU.cpp | 2 +-
 clang/lib/Driver/ToolChains/Cuda.cpp   | 2 +-
 4 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/clang/include/clang/Driver/ToolChain.h b/clang/include/clang/Driver/ToolChain.h
index a4f9cad98aa8b..9789cfacafd78 100644
--- a/clang/include/clang/Driver/ToolChain.h
+++ b/clang/include/clang/Driver/ToolChain.h
@@ -205,7 +205,8 @@ class ToolChain {
 
   /// Executes the given \p Executable and returns the stdout.
   llvm::Expected<std::unique_ptr<llvm::MemoryBuffer>>
-  executeToolChainProgram(StringRef Executable) const;
+  executeToolChainProgram(StringRef Executable,
+                          unsigned SecondsToWait = 0) const;
 
   void setTripleEnvironment(llvm::Triple::EnvironmentType Env);
 
diff --git a/clang/lib/Driver/ToolChain.cpp b/clang/lib/Driver/ToolChain.cpp
index 0e86bc07e0ea2..40ab2e91125d1 100644
--- a/clang/lib/Driver/ToolChain.cpp
+++ b/clang/lib/Driver/ToolChain.cpp
@@ -104,7 +104,8 @@ ToolChain::ToolChain(const Driver &D, const llvm::Triple &T,
 }
 
 llvm::Expected<std::unique_ptr<llvm::MemoryBuffer>>
-ToolChain::executeToolChainProgram(StringRef Executable) const {
+ToolChain::executeToolChainProgram(StringRef Executable,
+                                   unsigned SecondsToWait) const {
   llvm::SmallString<64> OutputFile;
   llvm::sys::fs::createTemporaryFile("toolchain-program", "txt", OutputFile);
   llvm::FileRemover OutputRemover(OutputFile.c_str());
@@ -115,9 +116,8 @@ ToolChain::executeToolChainProgram(StringRef Executable) const {
   };
 
   std::string ErrorMessage;
-  if (llvm::sys::ExecuteAndWait(Executable, {}, {}, Redirects,
-                                /* SecondsToWait */ 0,
-                                /*MemoryLimit*/ 0, &ErrorMessage))
+  if (llvm::sys::ExecuteAndWait(Executable, {}, {}, Redirects, SecondsToWait,
+                                /*MemoryLimit=*/0, &ErrorMessage))
     return llvm::createStringError(std::error_code(),
                                    Executable + ": " + ErrorMessage);
 
diff --git a/clang/lib/Driver/ToolChains/AMDGPU.cpp b/clang/lib/Driver/ToolChains/AMDGPU.cpp
index 9ffea57b005de..11a98a0ec314d 100644
--- a/clang/lib/Driver/ToolChains/AMDGPU.cpp
+++ b/clang/lib/Driver/ToolChains/AMDGPU.cpp
@@ -877,7 +877,7 @@ AMDGPUToolChain::getSystemGPUArchs(const ArgList &Args) const {
   else
     Program = GetProgramPath("amdgpu-arch");
 
-  auto StdoutOrErr = executeToolChainProgram(Program);
+  auto StdoutOrErr = executeToolChainProgram(Program, /*SecondsToWait=*/10);
   if (!StdoutOrErr)
     return StdoutOrErr.takeError();
 
diff --git a/clang/lib/Driver/ToolChains/Cuda.cpp b/clang/lib/Driver/ToolChains/Cuda.cpp
index bbc8be91fd70b..2dfc7457b0ac7 100644
--- a/clang/lib/Driver/ToolChains/Cuda.cpp
+++ b/clang/lib/Driver/ToolChains/Cuda.cpp
@@ -826,7 +826,7 @@ NVPTXToolChain::getSystemGPUArchs(const ArgList &Args) const {
   else
     Program = GetProgramPath("nvptx-arch");
 
-  auto StdoutOrErr = executeToolChainProgram(Program);
+  auto StdoutOrErr = executeToolChainProgram(Program, /*SecondsToWait=*/10);
   if (!StdoutOrErr)
     return StdoutOrErr.takeError();
 



More information about the cfe-commits mailing list