[llvm] [OFFLOAD] Add plugin with support for Intel oneAPI Level Zero (PR #158900)
Joseph Huber via llvm-commits
llvm-commits at lists.llvm.org
Thu Sep 18 13:20:27 PDT 2025
================
@@ -0,0 +1,682 @@
+//===--- Level Zero Target RTL Implementation -----------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// GenericDevice instatiation for SPIR-V/Xe machine
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef OPENMP_LIBOMPTARGET_PLUGINS_NEXTGEN_LEVEL_ZERO_L0DEVICE_H
+#define OPENMP_LIBOMPTARGET_PLUGINS_NEXTGEN_LEVEL_ZERO_L0DEVICE_H
+
+#include "llvm/ADT/SmallVector.h"
+
+#include "PerThreadTable.h"
+
+#include "AsyncQueue.h"
+#include "L0Context.h"
+#include "L0Program.h"
+#include "PluginInterface.h"
+#include "TLS.h"
+
+namespace llvm {
+namespace omp {
+namespace target {
+namespace plugin {
+
+using OmpInteropTy = omp_interop_val_t *;
+class LevelZeroPluginTy;
+
+// clang-format off
+enum class PCIIdTy : int32_t {
+ None = 0x0000,
+ SKL = 0x1900,
+ KBL = 0x5900,
+ CFL = 0x3E00,
+ CFL_2 = 0x9B00,
+ ICX = 0x8A00,
+ TGL = 0xFF20,
+ TGL_2 = 0x9A00,
+ DG1 = 0x4900,
+ RKL = 0x4C00,
+ ADLS = 0x4600,
+ RTL = 0xA700,
+ MTL = 0x7D00,
+ PVC = 0x0B00,
+ DG2_ATS_M = 0x4F00,
+ DG2_ATS_M_2 = 0x5600,
+ LNL = 0x6400,
+ BMG = 0xE200,
+};
+
+/// Device type enumeration common to compiler and runtime
+enum class DeviceArchTy : uint64_t {
+ DeviceArch_None = 0,
+ DeviceArch_Gen = 0x0001, // Gen 9, Gen 11 or Xe
+ DeviceArch_XeLPG = 0x0002,
+ DeviceArch_XeHPC = 0x0004,
+ DeviceArch_XeHPG = 0x0008,
+ DeviceArch_Xe2LP = 0x0010,
+ DeviceArch_Xe2HP = 0x0020,
+ DeviceArch_x86_64 = 0x0100
+};
+// clang-format on
+
+struct L0DeviceIdTy {
+ ze_device_handle_t zeId;
+ int32_t RootId;
+ int32_t SubId;
+ int32_t CCSId;
+
+ L0DeviceIdTy(ze_device_handle_t Device, int32_t RootId, int32_t SubId = -1,
+ int32_t CCSId = -1)
+ : zeId(Device), RootId(RootId), SubId(SubId), CCSId(CCSId) {}
+};
+
+class L0DeviceTLSTy {
+ /// Command list for each device
+ ze_command_list_handle_t CmdList = nullptr;
+
+ /// Main copy command list for each device
+ ze_command_list_handle_t CopyCmdList = nullptr;
+
+ /// Link copy command list for each device
+ ze_command_list_handle_t LinkCopyCmdList = nullptr;
+
+ /// Command queue for each device
+ ze_command_queue_handle_t CmdQueue = nullptr;
+
+ /// Main copy command queue for each device
+ ze_command_queue_handle_t CopyCmdQueue = nullptr;
+
+ /// Link copy command queues for each device
+ ze_command_queue_handle_t LinkCopyCmdQueue = nullptr;
+
+ /// Immediate command list for each device
+ ze_command_list_handle_t ImmCmdList = nullptr;
+
+ /// Immediate copy command list for each device
+ ze_command_list_handle_t ImmCopyCmdList = nullptr;
+
+public:
+ L0DeviceTLSTy() = default;
+ ~L0DeviceTLSTy() {
+ // assert all fields are nullptr on destruction
+ assert(CmdList == nullptr && "CmdList is not nullptr on destruction");
+ assert(CopyCmdList == nullptr &&
+ "CopyCmdList is not nullptr on destruction");
+ assert(LinkCopyCmdList == nullptr &&
+ "LinkCopyCmdList is not nullptr on destruction");
+ assert(CmdQueue == nullptr && "CmdQueue is not nullptr on destruction");
+ assert(CopyCmdQueue == nullptr &&
+ "CopyCmdQueue is not nullptr on destruction");
+ assert(LinkCopyCmdQueue == nullptr &&
+ "LinkCopyCmdQueue is not nullptr on destruction");
+ assert(ImmCmdList == nullptr && "ImmCmdList is not nullptr on destruction");
+ assert(ImmCopyCmdList == nullptr &&
+ "ImmCopyCmdList is not nullptr on destruction");
+ }
+
+ L0DeviceTLSTy(const L0DeviceTLSTy &) = delete;
+ L0DeviceTLSTy(L0DeviceTLSTy &&Other) {
+ CmdList = std::exchange(Other.CmdList, nullptr);
+ CopyCmdList = std::exchange(Other.CopyCmdList, nullptr);
+ LinkCopyCmdList = std::exchange(Other.LinkCopyCmdList, nullptr);
+ CmdQueue = std::exchange(Other.CmdQueue, nullptr);
+ CopyCmdQueue = std::exchange(Other.CopyCmdQueue, nullptr);
+ LinkCopyCmdQueue = std::exchange(Other.LinkCopyCmdQueue, nullptr);
+ ImmCmdList = std::exchange(Other.ImmCmdList, nullptr);
+ ImmCopyCmdList = std::exchange(Other.ImmCopyCmdList, nullptr);
+ }
+
+ void clear() {
+ // destroy all lists and queues
+ if (CmdList)
+ CALL_ZE_EXIT_FAIL(zeCommandListDestroy, CmdList);
+ if (CopyCmdList)
+ CALL_ZE_EXIT_FAIL(zeCommandListDestroy, CopyCmdList);
+ if (LinkCopyCmdList)
+ CALL_ZE_EXIT_FAIL(zeCommandListDestroy, LinkCopyCmdList);
+ if (ImmCmdList)
+ CALL_ZE_EXIT_FAIL(zeCommandListDestroy, ImmCmdList);
+ if (ImmCopyCmdList)
+ CALL_ZE_EXIT_FAIL(zeCommandListDestroy, ImmCopyCmdList);
+ if (CmdQueue)
+ CALL_ZE_EXIT_FAIL(zeCommandQueueDestroy, CmdQueue);
+ if (CopyCmdQueue)
+ CALL_ZE_EXIT_FAIL(zeCommandQueueDestroy, CopyCmdQueue);
+ if (LinkCopyCmdQueue)
+ CALL_ZE_EXIT_FAIL(zeCommandQueueDestroy, LinkCopyCmdQueue);
+
+ CmdList = nullptr;
+ CopyCmdList = nullptr;
+ LinkCopyCmdList = nullptr;
+ CmdQueue = nullptr;
+ CopyCmdQueue = nullptr;
+ LinkCopyCmdQueue = nullptr;
+ ImmCmdList = nullptr;
+ ImmCopyCmdList = nullptr;
+ }
+
+ L0DeviceTLSTy &operator=(const L0DeviceTLSTy &) = delete;
+ L0DeviceTLSTy &operator=(L0DeviceTLSTy &&) = delete;
+
+ auto getCmdList() const { return CmdList; }
+ void setCmdList(ze_command_list_handle_t _CmdList) { CmdList = _CmdList; }
+
+ auto getCopyCmdList() const { return CopyCmdList; }
+ void setCopyCmdList(ze_command_list_handle_t _CopyCmdList) {
+ CopyCmdList = _CopyCmdList;
+ }
+
+ auto getLinkCopyCmdList() const { return LinkCopyCmdList; }
+ void setLinkCopyCmdList(ze_command_list_handle_t _LinkCopyCmdList) {
+ LinkCopyCmdList = _LinkCopyCmdList;
+ }
+
+ auto getImmCmdList() const { return ImmCmdList; }
+ void setImmCmdList(ze_command_list_handle_t _ImmCmdList) {
+ ImmCmdList = _ImmCmdList;
+ }
+
+ auto getImmCopyCmdList() const { return ImmCopyCmdList; }
+ void setImmCopyCmdList(ze_command_list_handle_t _ImmCopyCmdList) {
+ ImmCopyCmdList = _ImmCopyCmdList;
+ }
+
+ auto getCmdQueue() const { return CmdQueue; }
+ void setCmdQueue(ze_command_queue_handle_t _CmdQueue) {
+ CmdQueue = _CmdQueue;
+ }
+
+ auto getCopyCmdQueue() const { return CopyCmdQueue; }
+ void setCopyCmdQueue(ze_command_queue_handle_t _CopyCmdQueue) {
+ CopyCmdQueue = _CopyCmdQueue;
+ }
+
+ auto getLinkCopyCmdQueue() const { return LinkCopyCmdQueue; }
+ void setLinkCopyCmdQueue(ze_command_queue_handle_t _LinkCopyCmdQueue) {
+ LinkCopyCmdQueue = _LinkCopyCmdQueue;
+ }
+};
+
+struct L0DeviceTLSTableTy
+ : public PerThreadContainer<std::vector<L0DeviceTLSTy>, 8> {
+ void clear() {
+ PerThreadTable::clear([](L0DeviceTLSTy &Entry) { Entry.clear(); });
+ }
+};
+
+class L0DeviceTy final : public GenericDeviceTy {
+ // Level Zero Context for this Device
+ L0ContextTy &l0Context;
+
+ // Level Zero handle for this Device
+ ze_device_handle_t zeDevice;
+ // Device Properties
+ ze_device_properties_t DeviceProperties{};
+ ze_device_compute_properties_t ComputeProperties{};
+ ze_device_memory_properties_t MemoryProperties{};
+ ze_device_cache_properties_t CacheProperties{};
+
+ /// Devices' default target allocation kind for internal allocation
+ int32_t AllocKind = TARGET_ALLOC_DEVICE;
+
+ DeviceArchTy DeviceArch = DeviceArchTy::DeviceArch_None;
+
+ std::string DeviceName;
+
+ /// Common indirect access flags for this device
+ ze_kernel_indirect_access_flags_t IndirectAccessFlags = 0;
+
+ /// Device UUID for toplevel devices only
+ std::string DeviceUuid;
+
+ /// L0 Device ID as string
+ std::string zeId;
+
+ /// Command queue group ordinals for each device
+ std::pair<uint32_t, uint32_t> ComputeOrdinal{UINT32_MAX, 0};
+ /// Command queue group ordinals for copying
+ std::pair<uint32_t, uint32_t> CopyOrdinal{UINT32_MAX, 0};
+ /// Command queue group ordinals and number of queues for link copy engines
+ std::pair<uint32_t, uint32_t> LinkCopyOrdinal{UINT32_MAX, 0};
+
+ /// Command queue index for each device
+ uint32_t ComputeIndex = 0;
+
+ bool IsAsyncEnabled = false;
+
+ // lock for this device
+ std::mutex Mutex;
+
+ /// Contains all modules (possibly from multiple device images) to handle
+ /// dynamic link across multiple images
+ llvm::SmallVector<ze_module_handle_t> GlobalModules;
+
+ /// L0 programs created for this device
+ std::list<L0ProgramTy> Programs;
+
+ /// MemAllocator for this device
+ MemAllocatorTy MemAllocator;
+
+ /// The current size of the global device memory pool (managed by us).
+ uint64_t HeapSize = 1L << 23L /*8MB=*/;
+
+ int32_t synchronize(__tgt_async_info *AsyncInfo, bool ReleaseQueue = true);
+ int32_t submitData(void *TgtPtr, const void *HstPtr, int64_t Size,
+ __tgt_async_info *AsyncInfo);
+ int32_t retrieveData(void *HstPtr, const void *TgtPtr, int64_t Size,
+ __tgt_async_info *AsyncInfo);
+
+ bool shouldSetupDeviceMemoryPool() const override { return false; }
+ DeviceArchTy computeArch() const;
+
+ /// Get default compute group ordinal. Returns Ordinal-NumQueues pair
+ std::pair<uint32_t, uint32_t> findComputeOrdinal();
+
+ /// Get copy command queue group ordinal. Returns Ordinal-NumQueues pair
+ std::pair<uint32_t, uint32_t> findCopyOrdinal(bool LinkCopy = false);
+
+ Error internalInit();
+
+public:
+ L0DeviceTy(GenericPluginTy &Plugin, int32_t DeviceId, int32_t NumDevices,
+ ze_device_handle_t zeDevice, L0ContextTy &DriverInfo,
+ const std::string &zeId, int32_t ComputeIndex)
+ : GenericDeviceTy(Plugin, DeviceId, NumDevices, {}),
+ l0Context(DriverInfo), zeDevice(zeDevice), zeId(zeId),
+ ComputeIndex(ComputeIndex) {
+ DeviceProperties.stype = ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES;
+ DeviceProperties.pNext = nullptr;
+ ComputeProperties.stype = ZE_STRUCTURE_TYPE_DEVICE_COMPUTE_PROPERTIES;
+ ComputeProperties.pNext = nullptr;
+ MemoryProperties.stype = ZE_STRUCTURE_TYPE_DEVICE_MEMORY_PROPERTIES;
+ MemoryProperties.pNext = nullptr;
+ CacheProperties.stype = ZE_STRUCTURE_TYPE_DEVICE_CACHE_PROPERTIES;
+ CacheProperties.pNext = nullptr;
+
+ auto Err = internalInit();
+ if (Err) {
+ FATAL_MESSAGE(DeviceId, "Couldn't initialize device: %s\n",
+ toString(std::move(Err)).c_str());
+ }
----------------
jhuber6 wrote:
```suggestion
if (Err)
FATAL_MESSAGE(DeviceId, "Couldn't initialize device: %s\n",
toString(std::move(Err)).c_str());
```
And elsewhere
https://github.com/llvm/llvm-project/pull/158900
More information about the llvm-commits
mailing list