[llvm] [OFFLOAD] Add plugin with support for Intel oneAPI Level Zero (PR #158900)
Alex Duran via llvm-commits
llvm-commits at lists.llvm.org
Thu Oct 9 02:50:07 PDT 2025
================
@@ -0,0 +1,278 @@
+//===--- Target RTLs Implementation ---------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// RTL for SPIR-V/Xe machine
+//
+//===----------------------------------------------------------------------===//
+
+#include <level_zero/zes_api.h>
+
+#include "L0Device.h"
+#include "L0Interop.h"
+#include "L0Kernel.h"
+#include "L0Plugin.h"
+#include "L0Trace.h"
+
+namespace llvm::omp::target::plugin {
+
+using namespace llvm::omp::target;
+using namespace error;
+
+#pragma clang diagnostic ignored "-Wglobal-constructors"
+// Common data across all possible plugin instantiations
+L0OptionsTy LevelZeroPluginTy::Options;
+
+int32_t LevelZeroPluginTy::findDevices() {
+ CALL_ZE_RET_ZERO(zeInit, ZE_INIT_FLAG_GPU_ONLY);
+ uint32_t NumDrivers = 0;
+ CALL_ZE_RET_ZERO(zeDriverGet, &NumDrivers, nullptr);
+ if (NumDrivers == 0) {
+ DP("Cannot find any drivers.\n");
+ return 0;
+ }
+
+ // We expect multiple drivers on Windows to support different device types,
+ // so we need to maintain multiple drivers and contexts in general.
+ llvm::SmallVector<ze_driver_handle_t> FoundDrivers(NumDrivers);
+ CALL_ZE_RET_ZERO(zeDriverGet, &NumDrivers, FoundDrivers.data());
+
+ struct RootInfoTy {
+ uint32_t OrderId;
+ ze_device_handle_t zeDevice;
+ L0ContextTy *Driver;
+ bool IsDiscrete;
+ };
+ llvm::SmallVector<RootInfoTy> RootDevices;
+
+ uint32_t OrderId = 0;
+ for (uint32_t DriverId = 0; DriverId < NumDrivers; DriverId++) {
+ const auto &Driver = FoundDrivers[DriverId];
+ uint32_t DeviceCount = 0;
+ ze_result_t RC;
+ CALL_ZE(RC, zeDeviceGet, Driver, &DeviceCount, nullptr);
+ if (RC != ZE_RESULT_SUCCESS || DeviceCount == 0) {
+ DP("Cannot find any devices from driver " DPxMOD ".\n", DPxPTR(Driver));
+ continue;
+ }
+ // We have a driver that supports at least one device
+ ContextList.emplace_back(*this, Driver, DriverId);
+ auto &DrvInfo = ContextList.back();
+ llvm::SmallVector<ze_device_handle_t> FoundDevices(DeviceCount);
+ CALL_ZE_RET_ZERO(zeDeviceGet, Driver, &DeviceCount, FoundDevices.data());
+
+ for (auto &zeDevice : FoundDevices)
+ RootDevices.push_back(
+ {OrderId++, zeDevice, &DrvInfo, L0DeviceTy::isDiscrete(zeDevice)});
+ }
+
+ // move discrete devices to the front
+ std::sort(RootDevices.begin(), RootDevices.end(),
+ [](const RootInfoTy &A, const RootInfoTy &B) {
+ // if both are discrete, order by OrderId
+ // if both are not discrete, order by OrderId
+ // Otherwise, discrete goes first
+
+ if (A.IsDiscrete && B.IsDiscrete)
+ return A.OrderId < B.OrderId;
+ if (!A.IsDiscrete && !B.IsDiscrete)
+ return A.OrderId < B.OrderId;
+ return A.IsDiscrete;
+ });
+
+ struct DeviceInfoTy {
+ L0DeviceIdTy Id;
+ L0ContextTy *Driver;
+ bool isRoot() const { return Id.SubId < 0 && Id.CCSId < 0; }
+ };
+
+ llvm::SmallVector<DeviceInfoTy> DevicesToAdd;
+
+ // helper lambda
+ auto addDevice = [&DevicesToAdd](auto &zeDevice, auto *Driver, int32_t RootId,
+ int32_t SubId = -1, int32_t CCSId = -1) {
+ DevicesToAdd.push_back({{zeDevice, RootId, SubId, CCSId}, Driver});
+ };
+ for (size_t RootId = 0; RootId < RootDevices.size(); RootId++) {
+ const auto zeDevice = RootDevices[RootId].zeDevice;
+ auto *RootDriver = RootDevices[RootId].Driver;
+ addDevice(zeDevice, RootDriver, RootId);
+ }
+ NumDevices = DevicesToAdd.size();
+ auto DeviceId = 0;
+ for (auto &DeviceInfo : DevicesToAdd) {
+ auto RootId = DeviceInfo.Id.RootId;
+ auto SubId = DeviceInfo.Id.SubId;
+ auto CCSId = DeviceInfo.Id.CCSId;
+ auto zeDevice = DeviceInfo.Id.zeId;
+ auto *Driver = DeviceInfo.Driver;
+
+ std::string IdStr = std::to_string(RootId) +
+ (SubId < 0 ? "" : "." + std::to_string(SubId)) +
+ (CCSId < 0 ? "" : "." + std::to_string(CCSId));
+
+ L0Devices.push_back(new L0DeviceTy(*this, DeviceId, getNumRootDevices(),
+ zeDevice, *Driver, std::move(IdStr),
+ CCSId < 0 ? 0 : CCSId /* ComputeIndex */
+ ));
+ DeviceId++;
+ }
+
+ DP("Found %" PRIu32 " root devices, %" PRIu32 " total devices.\n",
+ getNumRootDevices(), NumDevices);
+ DP("List of devices (DeviceID[.SubID[.CCSID]])\n");
+ for (auto &l0Device : L0Devices) {
+ DP("-- %s\n", l0Device->getZeIdCStr());
+ (void)l0Device; // silence warning
+ }
+
+ if (getDebugLevel() > 0) {
+ DP("Root Device Information\n");
+ for (uint32_t I = 0; I < getNumRootDevices(); I++) {
+ auto &l0Device = getDeviceFromId(I);
+ l0Device.reportDeviceInfo();
+ }
+ }
+
+ return getNumRootDevices();
+}
+
+/// Clean-up routine to be invoked by the destructor or
+/// LevelZeroPluginTy::deinit.
+void LevelZeroPluginTy::closeRTL() {
+
+ ContextTLSTable.clear();
+ DeviceTLSTable.clear();
+ ThreadTLSTable.clear();
+ ContextList.clear();
+
+ DP("Plugin closed successfully\n");
+}
+
+Expected<int32_t> LevelZeroPluginTy::initImpl() {
+ DP("Level0 NG plugin initialization\n");
+ // process options before anything else
+ Options.init();
+ return findDevices();
+}
+
+Error LevelZeroPluginTy::deinitImpl() {
+ DP("Deinit Level0 plugin!\n");
+ closeRTL();
+ return Plugin::success();
+}
+
+GenericDeviceTy *LevelZeroPluginTy::createDevice(GenericPluginTy &Plugin,
+ int32_t DeviceId,
+ int32_t NumDevices) {
+ return &getDeviceFromId(DeviceId);
+}
+
+GenericGlobalHandlerTy *LevelZeroPluginTy::createGlobalHandler() {
+ return new L0GlobalHandlerTy();
+}
+
+uint16_t LevelZeroPluginTy::getMagicElfBits() const { return ELF::EM_INTELGT; }
+
+Triple::ArchType LevelZeroPluginTy::getTripleArch() const {
+ return Triple::spirv64;
+}
+
+const char *LevelZeroPluginTy::getName() const { return GETNAME(TARGET_NAME); }
+
+Error LevelZeroPluginTy::flushQueueImpl(omp_interop_val_t *Interop) {
+ return Plugin::success();
+}
+
+Expected<bool> LevelZeroPluginTy::isELFCompatible(uint32_t DeviceId,
+ StringRef Image) const {
+ uint64_t MajorVer, MinorVer;
+ return isValidOneOmpImage(Image, MajorVer, MinorVer);
+}
+
+Error LevelZeroPluginTy::syncBarrierImpl(omp_interop_val_t *Interop) {
+ if (!Interop) {
+ return Plugin::error(ErrorCode::INVALID_ARGUMENT,
+ "Invalid/inconsistent OpenMP interop " DPxMOD "\n",
+ DPxPTR(Interop));
+ }
+ if (!Interop->async_info || !Interop->async_info->Queue)
+ return Plugin::success();
+
+ // L0 object
+ const auto L0 = static_cast<L0Interop::Property *>(Interop->rtl_property);
+ const auto device_id = Interop->device_id;
+ auto &l0Device = getDeviceFromId(device_id);
+
+ // We can synchronize both L0 & SYCL objects with the same ze command
+ if (l0Device.useImmForInterop()) {
+ DP("LevelZeroPluginTy::sync_barrier: Synchronizing " DPxMOD
+ " with ImmCmdList barrier\n",
+ DPxPTR(Interop));
+ auto ImmCmdList = L0->ImmCmdList;
+ auto Event = l0Device.getEvent();
+
+ CALL_ZE_RET_ERROR(zeCommandListAppendBarrier, ImmCmdList, Event, 0,
+ nullptr);
+ CALL_ZE_RET_ERROR(zeEventHostSynchronize, Event, UINT64_MAX);
+ l0Device.releaseEvent(Event);
+ } else {
+ DP("LevelZeroPluginTy::sync_barrier: Synchronizing " DPxMOD
+ " with queue synchronize\n",
+ DPxPTR(Interop));
+ auto CmdQueue = L0->CommandQueue;
+ CALL_ZE_RET_ERROR(zeCommandQueueSynchronize, CmdQueue, UINT64_MAX);
+ }
+
+ return Plugin::success();
+}
+
+Error LevelZeroPluginTy::asyncBarrierImpl(omp_interop_val_t *Interop) {
+ if (!Interop) {
+ return Plugin::error(ErrorCode::INVALID_ARGUMENT,
+ "Invalid/inconsistent OpenMP interop " DPxMOD "\n",
+ DPxPTR(Interop));
+ }
+ if (!Interop->async_info || !Interop->async_info->Queue)
+ return Plugin::success();
+
+ const auto L0 = static_cast<L0Interop::Property *>(Interop->rtl_property);
+ const auto device_id = Interop->device_id;
+ if (Interop->attrs.inorder)
+ return Plugin::success();
+
+ auto &l0Device = getDeviceFromId(device_id);
+ if (l0Device.useImmForInterop()) {
+ DP("LevelZeroPluginTy::async_barrier: Appending ImmCmdList barrier "
+ "to " DPxMOD "\n",
+ DPxPTR(Interop));
+ auto ImmCmdList = L0->ImmCmdList;
+ CALL_ZE_RET_ERROR(zeCommandListAppendBarrier, ImmCmdList, nullptr, 0,
+ nullptr);
+ } else {
+ DP("LevelZeroPluginTy::async_barrier: Appending CmdList barrier to " DPxMOD
+ "\n",
+ DPxPTR(Interop));
+ auto CmdQueue = L0->CommandQueue;
+ ze_command_list_handle_t CmdList = l0Device.getCmdList();
+ CALL_ZE_RET_ERROR(zeCommandListAppendBarrier, CmdList, nullptr, 0, nullptr);
+ CALL_ZE_RET_ERROR(zeCommandListClose, CmdList);
+ CALL_ZE_RET_ERROR(zeCommandQueueExecuteCommandLists, CmdQueue, 1, &CmdList,
+ nullptr);
+ CALL_ZE_RET_ERROR(zeCommandListReset, CmdList);
----------------
adurang wrote:
I don't think there's an easy way to delay the reset right now, so I switched to the syncBarrier implementation for this case (which is rarely used tbh).
https://github.com/llvm/llvm-project/pull/158900
More information about the llvm-commits
mailing list