[clang] d5a5ee8 - [Clang] Add `nvptx-arch` tool to query installed NVIDIA GPUs

Sun Dec 25 19:24:41 PST 2022

Author: Joseph Huber
Date: 2022-12-25T21:24:30-06:00
New Revision: d5a5ee856e7ccd442ad22168c56ff77197255cbe

URL: https://github.com/llvm/llvm-project/commit/d5a5ee856e7ccd442ad22168c56ff77197255cbe
DIFF: https://github.com/llvm/llvm-project/commit/d5a5ee856e7ccd442ad22168c56ff77197255cbe.diff

LOG: [Clang] Add `nvptx-arch` tool to query installed NVIDIA GPUs

We already have a tool called `amdgpu-arch` which returns the GPUs on
the system. This is used to determine the default architecture when
doing offloading. This patch introduces a similar tool `nvptx-arch`.
Right now we use the detected GPU at compile time. This is unhelpful
when building on a login node and moving execution to a compute node for
example. This will allow us to better choose a default architecture when
targeting NVPTX. Also we can probably use this with CMake's `native`
setting for CUDA now.

CUDA since 11.6 provides `__nvcc_device_query` which has a similar
function but it is probably better to define this locally if we want to
depend on it in clang.

Reviewed By: tianshilei1992

Differential Revision: https://reviews.llvm.org/D140433

Added: 
    clang/tools/nvptx-arch/CMakeLists.txt
    clang/tools/nvptx-arch/NVPTXArch.cpp

Modified: 
    clang/tools/CMakeLists.txt

Removed: 
    


################################################################################
diff  --git a/clang/tools/CMakeLists.txt b/clang/tools/CMakeLists.txt
index 2ce7fd65a555b..f60db6ef0ba34 100644

--- a/clang/tools/CMakeLists.txt
+++ b/clang/tools/CMakeLists.txt
@@ -50,3 +50,4 @@ add_llvm_external_project(clang-tools-extra extra)
 add_clang_subdirectory(libclang)
 
 add_clang_subdirectory(amdgpu-arch)
+add_clang_subdirectory(nvptx-arch)

diff  --git a/clang/tools/nvptx-arch/CMakeLists.txt b/clang/tools/nvptx-arch/CMakeLists.txt
new file mode 100644
index 0000000000000..94ef206a3bd75
--- /dev/null
+++ b/clang/tools/nvptx-arch/CMakeLists.txt
@@ -0,0 +1,28 @@
+# //===--------------------------------------------------------------------===//
+# //
+# // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+# // See https://llvm.org/LICENSE.txt for details.
+# // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+# //
+# //===--------------------------------------------------------------------===//
+
+
+# TODO: This is deprecated. Since CMake 3.17 we can use FindCUDAToolkit instead.
+find_package(CUDA QUIET)
+find_library(cuda-library NAMES cuda PATHS /lib64)
+if (NOT cuda-library AND CUDA_FOUND)
+  get_filename_component(CUDA_LIBDIR "${CUDA_cudart_static_LIBRARY}" DIRECTORY)
+  find_library(cuda-library NAMES cuda HINTS "${CUDA_LIBDIR}/stubs")
+endif()
+
+if (NOT CUDA_FOUND OR NOT cuda-library)
+  message(STATUS "Not building nvptx-arch: cuda runtime not found")
+  return()
+endif()
+
+add_clang_tool(nvptx-arch NVPTXArch.cpp)
+
+set_target_properties(nvptx-arch PROPERTIES INSTALL_RPATH_USE_LINK_PATH ON)
+target_include_directories(nvptx-arch PRIVATE ${CUDA_INCLUDE_DIRS})
+
+clang_target_link_libraries(nvptx-arch PRIVATE ${cuda-library})

diff  --git a/clang/tools/nvptx-arch/NVPTXArch.cpp b/clang/tools/nvptx-arch/NVPTXArch.cpp
new file mode 100644
index 0000000000000..f70acf9a9f5b3
--- /dev/null
+++ b/clang/tools/nvptx-arch/NVPTXArch.cpp
@@ -0,0 +1,72 @@
+//===- NVPTXArch.cpp - list installed NVPTX devies ------*- C++ -*---------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a tool for detecting name of CUDA gpus installed in the
+// system.
+//
+//===----------------------------------------------------------------------===//
+
+#if defined(__has_include)
+#if __has_include("cuda.h")
+#include "cuda.h"
+#define CUDA_HEADER_FOUND 1
+#else
+#define CUDA_HEADER_FOUND 0
+#endif
+#else
+#define CUDA_HEADER_FOUND 0
+#endif
+
+#if !CUDA_HEADER_FOUND
+int main() { return 1; }
+#else
+
+#include <cstdint>
+#include <cstdio>
+
+static int handleError(CUresult Err) {
+  const char *ErrStr = nullptr;
+  CUresult Result = cuGetErrorString(Err, &ErrStr);
+  if (Result != CUDA_SUCCESS)
+    return EXIT_FAILURE;
+  fprintf(stderr, "CUDA error: %s\n", ErrStr);
+  return EXIT_FAILURE;
+}
+
+int main() {
+  if (CUresult Err = cuInit(0)) {
+    if (Err == CUDA_ERROR_NO_DEVICE)
+      return EXIT_SUCCESS;
+    else
+      return handleError(Err);
+  }
+
+  int Count = 0;
+  if (CUresult Err = cuDeviceGetCount(&Count))
+    return handleError(Err);
+  if (Count == 0)
+    return EXIT_SUCCESS;
+  for (int DeviceId = 0; DeviceId < Count; ++DeviceId) {
+    CUdevice Device;
+    if (CUresult Err = cuDeviceGet(&Device, DeviceId))
+      return handleError(Err);
+
+    int32_t Major, Minor;
+    if (CUresult Err = cuDeviceGetAttribute(
+            &Major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, Device))
+      return handleError(Err);
+    if (CUresult Err = cuDeviceGetAttribute(
+            &Minor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, Device))
+      return handleError(Err);
+
+    printf("sm_%d%d\n", Major, Minor);
+  }
+  return EXIT_SUCCESS;
+}
+
+#endif