[clang] [clang][tools] Add LevelZero support to offload-arch (PR #160570)
Alex Duran via cfe-commits
cfe-commits at lists.llvm.org
Wed Sep 24 13:52:07 PDT 2025
https://github.com/adurang updated https://github.com/llvm/llvm-project/pull/160570
>From c15a1f18dad2a4ea880bf785f942d9a84be8364f Mon Sep 17 00:00:00 2001
From: Alex Duran <alejandro.duran at intel.com>
Date: Wed, 24 Sep 2025 19:16:41 +0200
Subject: [PATCH 1/4] [clang][tools] Add LevelZero support to offload-arch
---
clang/tools/offload-arch/CMakeLists.txt | 10 +-
clang/tools/offload-arch/LevelZeroArch.cpp | 133 +++++++++++++++++++++
clang/tools/offload-arch/OffloadArch.cpp | 19 ++-
3 files changed, 157 insertions(+), 5 deletions(-)
create mode 100644 clang/tools/offload-arch/LevelZeroArch.cpp
diff --git a/clang/tools/offload-arch/CMakeLists.txt b/clang/tools/offload-arch/CMakeLists.txt
index cb50b9c1d6dde..187c4d9517551 100644
--- a/clang/tools/offload-arch/CMakeLists.txt
+++ b/clang/tools/offload-arch/CMakeLists.txt
@@ -1,8 +1,16 @@
set(LLVM_LINK_COMPONENTS Support)
-add_clang_tool(offload-arch OffloadArch.cpp NVPTXArch.cpp AMDGPUArchByKFD.cpp AMDGPUArchByHIP.cpp)
+add_clang_tool(offload-arch OffloadArch.cpp NVPTXArch.cpp AMDGPUArchByKFD.cpp
+ AMDGPUArchByHIP.cpp LevelZeroArch.cpp)
+
+find_path(OFFLOAD_ARCH_LEVEL_ZERO_INCLUDE_DIR NAMES level_zero/ze_api.h)
+if (OFFLOAD_ARCH_LEVEL_ZERO_INCLUDE_DIR)
+ target_include_directories(offload-arch PRIVATE ${OFFLOAD_ARCH_LEVEL_ZERO_INCLUDE_DIR})
+ target_compile_definitions(offload-arch PRIVATE HAVE_LEVEL_ZERO_HEADERS)
+endif()
add_clang_symlink(amdgpu-arch offload-arch)
add_clang_symlink(nvptx-arch offload-arch)
+add_clang_symlink(intelgpu-arch offload-arch)
target_link_libraries(offload-arch PRIVATE clangBasic)
diff --git a/clang/tools/offload-arch/LevelZeroArch.cpp b/clang/tools/offload-arch/LevelZeroArch.cpp
new file mode 100644
index 0000000000000..8b6b393f6f9f6
--- /dev/null
+++ b/clang/tools/offload-arch/LevelZeroArch.cpp
@@ -0,0 +1,133 @@
+//===- LevelZeroArch.cpp - list installed Level Zero devices ---*- C++ -*--===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a tool for detecting Level Zero devices installed in the
+// system
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef HAVE_LEVEL_ZERO_HEADERS
+
+int printGPUsByLevelZero() {
+ return 0;
+}
+
+#else
+
+#include "clang/Basic/Version.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/DynamicLibrary.h"
+#include "llvm/Support/Error.h"
+#include <cstdint>
+#include <cstdio>
+#include <memory>
+#include <level_zero/ze_api.h>
+
+using namespace llvm;
+extern cl::opt<bool> Verbose;
+
+#define DEFINE_WRAPPER(NAME) \
+ using NAME##_ty = decltype(NAME); \
+ void *NAME##Ptr = nullptr; \
+ template <class... Ts> ze_result_t NAME##_wrap(Ts... args) { \
+ if (!NAME##Ptr) { \
+ return ZE_RESULT_ERROR_UNKNOWN; \
+ } \
+ return reinterpret_cast<NAME##_ty *>(NAME##Ptr)(args...); \
+ };
+
+DEFINE_WRAPPER(zeInitDrivers)
+DEFINE_WRAPPER(zeDeviceGet)
+DEFINE_WRAPPER(zeDeviceGetProperties)
+
+static bool loadLevelZero() {
+ const char *L0Library = "libze_loader.so";
+ std::string ErrMsg;
+
+ auto DynlibHandle = std::make_unique<llvm::sys::DynamicLibrary>(
+ llvm::sys::DynamicLibrary::getPermanentLibrary(L0Library, &ErrMsg));
+ if (!DynlibHandle->isValid()) {
+ if (ErrMsg.empty())
+ ErrMsg = "unknown error";
+ if (Verbose)
+ llvm::errs() << "Unable to load library '" << L0Library << "': " << ErrMsg
+ << "!\n";
+ return false;
+ }
+
+ constexpr struct {
+ const char *name;
+ void **fptr;
+ } dlwrap[] = {
+ {"zeInitDrivers", &zeInitDriversPtr},
+ {"zeDeviceGet", &zeDeviceGetPtr},
+ {"zeDeviceGetProperties", &zeDeviceGetPropertiesPtr},
+ };
+
+ for (auto entry : dlwrap) {
+ void *P = DynlibHandle->getAddressOfSymbol(entry.name);
+ if (P == nullptr) {
+ if (Verbose)
+ llvm::errs() << "Unable to find '" << entry.name << "' in '" << L0Library
+ << "'!\n";
+ return false;
+ }
+ *(entry.fptr) = P;
+ }
+
+ return true;
+}
+
+#define CALL_ZE_AND_CHECK(Fn, ...) \
+ do { \
+ ze_result_t Rc = Fn##_wrap(__VA_ARGS__); \
+ if (Rc != ZE_RESULT_SUCCESS) { \
+ if (Verbose) \
+ llvm::errs() << "Error: " << __func__ << ":" << #Fn \
+ << " failed with error code " << Rc << "\n"; \
+ return 1; \
+ } \
+ } while (0)
+
+int printGPUsByLevelZero() {
+ if (!loadLevelZero())
+ return 1;
+
+ ze_init_driver_type_desc_t driver_type = {};
+ driver_type.stype = ZE_STRUCTURE_TYPE_INIT_DRIVER_TYPE_DESC;
+ driver_type.flags = ZE_INIT_DRIVER_TYPE_FLAG_GPU;
+ driver_type.pNext = nullptr;
+ uint32_t driverCount{0};
+
+ // Initialize and find all drivers
+ CALL_ZE_AND_CHECK(zeInitDrivers, &driverCount, nullptr, &driver_type);
+
+ llvm::SmallVector<ze_driver_handle_t> drivers(driverCount);
+ CALL_ZE_AND_CHECK(zeInitDrivers, &driverCount, drivers.data(), &driver_type);
+
+ for (auto driver : drivers) {
+ // Discover all the devices for a given driver
+ uint32_t deviceCount = 0;
+ CALL_ZE_AND_CHECK(zeDeviceGet, driver, &deviceCount, nullptr);
+
+ llvm::SmallVector<ze_device_handle_t> devices(deviceCount);
+ CALL_ZE_AND_CHECK(zeDeviceGet, driver, &deviceCount, devices.data());
+
+ for (auto device : devices) {
+ // Get device properties
+ ze_device_properties_t deviceProperties;
+ CALL_ZE_AND_CHECK(zeDeviceGetProperties, device, &deviceProperties);
+ // Print device name
+ llvm::outs() << deviceProperties.name << '\n';
+ }
+ }
+
+ return 0;
+}
+
+#endif // HAVE_LEVEL_ZERO_HEADERS
diff --git a/clang/tools/offload-arch/OffloadArch.cpp b/clang/tools/offload-arch/OffloadArch.cpp
index 74be40214a0ec..5043c00986a9b 100644
--- a/clang/tools/offload-arch/OffloadArch.cpp
+++ b/clang/tools/offload-arch/OffloadArch.cpp
@@ -21,6 +21,7 @@ enum VendorName {
all,
amdgpu,
nvptx,
+ intel,
};
static cl::opt<VendorName>
@@ -28,7 +29,8 @@ static cl::opt<VendorName>
cl::init(all),
cl::values(clEnumVal(all, "Print all GPUs (default)"),
clEnumVal(amdgpu, "Only print AMD GPUs"),
- clEnumVal(nvptx, "Only print NVIDIA GPUs")));
+ clEnumVal(nvptx, "Only print NVIDIA GPUs"),
+ clEnumVal(intel, "Only print Intel GPUs")));
cl::opt<bool> Verbose("verbose", cl::desc("Enable verbose output"),
cl::init(false), cl::cat(OffloadArchCategory));
@@ -40,6 +42,7 @@ static void PrintVersion(raw_ostream &OS) {
int printGPUsByKFD();
int printGPUsByHIP();
int printGPUsByCUDA();
+int printGPUsByLevelZero();
static int printAMD() {
#ifndef _WIN32
@@ -51,6 +54,7 @@ static int printAMD() {
}
static int printNVIDIA() { return printGPUsByCUDA(); }
+static int printIntel() { return printGPUsByLevelZero(); }
int main(int argc, char *argv[]) {
cl::HideUnrelatedOptions(OffloadArchCategory);
@@ -73,15 +77,22 @@ int main(int argc, char *argv[]) {
sys::path::stem(argv[0]).starts_with("amdgpu-arch");
bool NVIDIAOnly = Only == VendorName::nvptx ||
sys::path::stem(argv[0]).starts_with("nvptx-arch");
+ bool IntelOnly = Only == VendorName::intel ||
+ sys::path::stem(argv[0]).starts_with("intelgpu-arch");
+ bool All = !AMDGPUOnly && !NVIDIAOnly && !IntelOnly;
int NVIDIAResult = 0;
- if (!AMDGPUOnly)
+ if (NVIDIAOnly || All)
NVIDIAResult = printNVIDIA();
int AMDResult = 0;
- if (!NVIDIAOnly)
+ if (AMDGPUOnly || All)
AMDResult = printAMD();
+ int IntelResult = 0;
+ if (IntelOnly || All)
+ IntelResult = printIntel();
+
// We only failed if all cases returned an error.
- return AMDResult && NVIDIAResult;
+ return AMDResult && NVIDIAResult && IntelResult;
}
>From e8880084aa18c161e217099b49fea62c6b95fb92 Mon Sep 17 00:00:00 2001
From: Alex Duran <alejandro.duran at intel.com>
Date: Wed, 24 Sep 2025 19:44:21 +0200
Subject: [PATCH 2/4] cleanup
---
clang/tools/offload-arch/LevelZeroArch.cpp | 13 ++++---------
1 file changed, 4 insertions(+), 9 deletions(-)
diff --git a/clang/tools/offload-arch/LevelZeroArch.cpp b/clang/tools/offload-arch/LevelZeroArch.cpp
index 8b6b393f6f9f6..fae98d17240ed 100644
--- a/clang/tools/offload-arch/LevelZeroArch.cpp
+++ b/clang/tools/offload-arch/LevelZeroArch.cpp
@@ -13,19 +13,14 @@
#ifndef HAVE_LEVEL_ZERO_HEADERS
-int printGPUsByLevelZero() {
- return 0;
-}
+int printGPUsByLevelZero() { return 0; }
#else
-#include "clang/Basic/Version.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/DynamicLibrary.h"
#include "llvm/Support/Error.h"
-#include <cstdint>
#include <cstdio>
-#include <memory>
#include <level_zero/ze_api.h>
using namespace llvm;
@@ -56,7 +51,7 @@ static bool loadLevelZero() {
ErrMsg = "unknown error";
if (Verbose)
llvm::errs() << "Unable to load library '" << L0Library << "': " << ErrMsg
- << "!\n";
+ << "\n";
return false;
}
@@ -73,8 +68,8 @@ static bool loadLevelZero() {
void *P = DynlibHandle->getAddressOfSymbol(entry.name);
if (P == nullptr) {
if (Verbose)
- llvm::errs() << "Unable to find '" << entry.name << "' in '" << L0Library
- << "'!\n";
+ llvm::errs() << "Unable to find '" << entry.name << "' in '"
+ << L0Library << "'\n";
return false;
}
*(entry.fptr) = P;
>From b4e3780a19c6f1477b582eacb1572c60ae92fc8b Mon Sep 17 00:00:00 2001
From: Alex Duran <alejandro.duran at intel.com>
Date: Wed, 24 Sep 2025 22:33:39 +0200
Subject: [PATCH 3/4] Refactor main
---
clang/tools/offload-arch/LevelZeroArch.cpp | 4 +-
clang/tools/offload-arch/OffloadArch.cpp | 50 ++++++++++++----------
2 files changed, 28 insertions(+), 26 deletions(-)
diff --git a/clang/tools/offload-arch/LevelZeroArch.cpp b/clang/tools/offload-arch/LevelZeroArch.cpp
index fae98d17240ed..32080e8f8af7a 100644
--- a/clang/tools/offload-arch/LevelZeroArch.cpp
+++ b/clang/tools/offload-arch/LevelZeroArch.cpp
@@ -41,7 +41,7 @@ DEFINE_WRAPPER(zeDeviceGet)
DEFINE_WRAPPER(zeDeviceGetProperties)
static bool loadLevelZero() {
- const char *L0Library = "libze_loader.so";
+ constexpr const char *L0Library = "libze_loader.so";
std::string ErrMsg;
auto DynlibHandle = std::make_unique<llvm::sys::DynamicLibrary>(
@@ -114,10 +114,8 @@ int printGPUsByLevelZero() {
CALL_ZE_AND_CHECK(zeDeviceGet, driver, &deviceCount, devices.data());
for (auto device : devices) {
- // Get device properties
ze_device_properties_t deviceProperties;
CALL_ZE_AND_CHECK(zeDeviceGetProperties, device, &deviceProperties);
- // Print device name
llvm::outs() << deviceProperties.name << '\n';
}
}
diff --git a/clang/tools/offload-arch/OffloadArch.cpp b/clang/tools/offload-arch/OffloadArch.cpp
index 5043c00986a9b..0de0befda98e7 100644
--- a/clang/tools/offload-arch/OffloadArch.cpp
+++ b/clang/tools/offload-arch/OffloadArch.cpp
@@ -56,6 +56,17 @@ static int printAMD() {
static int printNVIDIA() { return printGPUsByCUDA(); }
static int printIntel() { return printGPUsByLevelZero(); }
+struct vendor_entry_t {
+ VendorName name;
+ llvm::StringRef alias;
+ int (*printFunc)();
+ bool onlyThis;
+};
+std::array<vendor_entry_t, 3> VendorTable{
+ {{VendorName::amdgpu, "amdgpu-arch", printAMD, false},
+ {VendorName::nvptx, "nvptx-arch", printNVIDIA, false},
+ {VendorName::intel, "intelgpu-arch", printIntel, false}}};
+
int main(int argc, char *argv[]) {
cl::HideUnrelatedOptions(OffloadArchCategory);
@@ -72,27 +83,20 @@ int main(int argc, char *argv[]) {
return 0;
}
- // If this was invoked from the legacy symlinks provide the same behavior.
- bool AMDGPUOnly = Only == VendorName::amdgpu ||
- sys::path::stem(argv[0]).starts_with("amdgpu-arch");
- bool NVIDIAOnly = Only == VendorName::nvptx ||
- sys::path::stem(argv[0]).starts_with("nvptx-arch");
- bool IntelOnly = Only == VendorName::intel ||
- sys::path::stem(argv[0]).starts_with("intelgpu-arch");
- bool All = !AMDGPUOnly && !NVIDIAOnly && !IntelOnly;
-
- int NVIDIAResult = 0;
- if (NVIDIAOnly || All)
- NVIDIAResult = printNVIDIA();
-
- int AMDResult = 0;
- if (AMDGPUOnly || All)
- AMDResult = printAMD();
-
- int IntelResult = 0;
- if (IntelOnly || All)
- IntelResult = printIntel();
-
- // We only failed if all cases returned an error.
- return AMDResult && NVIDIAResult && IntelResult;
+ bool All = true;
+ llvm::for_each(VendorTable, [&](auto &entry) {
+ entry.onlyThis =
+ entry.name == Only || sys::path::stem(argv[0]).starts_with(entry.alias);
+ if (entry.onlyThis)
+ All = false;
+ });
+
+ llvm::SmallVector<int> results(VendorTable.size());
+ llvm::transform(VendorTable, results.begin(), [&](const auto &entry) {
+ if (entry.onlyThis || All)
+ return entry.printFunc();
+ return 0;
+ });
+
+ return llvm::all_of(results, [](int r) { return r == 1; });
}
>From abba395aab57c3cb0fc606bea6a6dd12e594bbd4 Mon Sep 17 00:00:00 2001
From: Alex Duran <alejandro.duran at intel.com>
Date: Wed, 24 Sep 2025 22:51:55 +0200
Subject: [PATCH 4/4] Inline level zero header
---
clang/tools/offload-arch/CMakeLists.txt | 6 --
clang/tools/offload-arch/LevelZeroArch.cpp | 70 +++++++++++++++++++---
2 files changed, 61 insertions(+), 15 deletions(-)
diff --git a/clang/tools/offload-arch/CMakeLists.txt b/clang/tools/offload-arch/CMakeLists.txt
index 187c4d9517551..5035022cb2678 100644
--- a/clang/tools/offload-arch/CMakeLists.txt
+++ b/clang/tools/offload-arch/CMakeLists.txt
@@ -3,12 +3,6 @@ set(LLVM_LINK_COMPONENTS Support)
add_clang_tool(offload-arch OffloadArch.cpp NVPTXArch.cpp AMDGPUArchByKFD.cpp
AMDGPUArchByHIP.cpp LevelZeroArch.cpp)
-find_path(OFFLOAD_ARCH_LEVEL_ZERO_INCLUDE_DIR NAMES level_zero/ze_api.h)
-if (OFFLOAD_ARCH_LEVEL_ZERO_INCLUDE_DIR)
- target_include_directories(offload-arch PRIVATE ${OFFLOAD_ARCH_LEVEL_ZERO_INCLUDE_DIR})
- target_compile_definitions(offload-arch PRIVATE HAVE_LEVEL_ZERO_HEADERS)
-endif()
-
add_clang_symlink(amdgpu-arch offload-arch)
add_clang_symlink(nvptx-arch offload-arch)
add_clang_symlink(intelgpu-arch offload-arch)
diff --git a/clang/tools/offload-arch/LevelZeroArch.cpp b/clang/tools/offload-arch/LevelZeroArch.cpp
index 32080e8f8af7a..3df244fe15502 100644
--- a/clang/tools/offload-arch/LevelZeroArch.cpp
+++ b/clang/tools/offload-arch/LevelZeroArch.cpp
@@ -11,17 +11,71 @@
//
//===----------------------------------------------------------------------===//
-#ifndef HAVE_LEVEL_ZERO_HEADERS
-
-int printGPUsByLevelZero() { return 0; }
-
-#else
-
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/DynamicLibrary.h"
#include "llvm/Support/Error.h"
#include <cstdio>
-#include <level_zero/ze_api.h>
+
+#define ZE_MAX_DEVICE_NAME 256
+#define ZE_MAX_DEVICE_UUID_SIZE 16
+
+typedef void *ze_driver_handle_t;
+typedef void *ze_device_handle_t;
+
+enum ze_result_t {
+ ZE_RESULT_SUCCESS = 0,
+ ZE_RESULT_ERROR_UNKNOWN = 0x7ffffffe
+};
+
+enum ze_structure_type_t {
+ ZE_STRUCTURE_TYPE_INIT_DRIVER_TYPE_DESC = 0x00020021,
+ ZE_STRUCTURE_TYPE_FORCE_UINT32 = 0x7fffffff
+};
+
+enum ze_init_driver_type_flags_t { ZE_INIT_DRIVER_TYPE_FLAG_GPU = 1 };
+
+typedef uint32_t ze_device_type_t;
+typedef uint32_t ze_device_property_flags_t;
+
+struct ze_init_driver_type_desc_t {
+ ze_structure_type_t stype;
+ const void *pNext;
+ ze_init_driver_type_flags_t flags;
+};
+
+typedef struct _ze_device_uuid_t {
+ uint8_t id[ZE_MAX_DEVICE_UUID_SIZE];
+} ze_device_uuid_t;
+
+typedef struct _ze_device_properties_t {
+ ze_structure_type_t stype;
+ void *pNext;
+ ze_device_type_t type;
+ uint32_t vendorId;
+ uint32_t deviceId;
+ ze_device_property_flags_t flags;
+ uint32_t subdeviceId;
+ uint32_t coreClockRate;
+ uint64_t maxMemAllocSize;
+ uint32_t maxHardwareContexts;
+ uint32_t maxCommandQueuePriority;
+ uint32_t numThreadsPerEU;
+ uint32_t physicalEUSimdWidth;
+ uint32_t numEUsPerSubslice;
+ uint32_t numSubslicesPerSlice;
+ uint32_t numSlices;
+ uint64_t timerResolution;
+ uint32_t timestampValidBits;
+ uint32_t kernelTimestampValidBits;
+ ze_device_uuid_t uuid;
+ char name[ZE_MAX_DEVICE_NAME];
+} ze_device_properties_t;
+
+ze_result_t zeInitDrivers(uint32_t *pCount, ze_driver_handle_t *phDrivers,
+ ze_init_driver_type_desc_t *desc);
+ze_result_t zeDeviceGet(ze_driver_handle_t hDriver, uint32_t *pCount,
+ void *phDevices);
+ze_result_t zeDeviceGetProperties(void *hDevice, void *pProperties);
using namespace llvm;
extern cl::opt<bool> Verbose;
@@ -122,5 +176,3 @@ int printGPUsByLevelZero() {
return 0;
}
-
-#endif // HAVE_LEVEL_ZERO_HEADERS
More information about the cfe-commits
mailing list