[llvm] [OFFLOAD] Add plugin with support for Intel oneAPI Level Zero (PR #158900)
Alexey Sachkov via llvm-commits
llvm-commits at lists.llvm.org
Tue Sep 16 06:23:40 PDT 2025
================
@@ -0,0 +1,371 @@
+//===--- Level Zero Target RTL Implementation -----------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Level Zero RTL Options support
+//
+//===----------------------------------------------------------------------===//
+
+#include "omptarget.h"
+
+#include "L0Defs.h"
+#include "L0Options.h"
+#include "L0Trace.h"
+
+namespace llvm::omp::target::plugin {
+
+/// Is the given RootID, SubID, CcsID specified in ONEAPI_DEVICE_SELECTOR
+bool L0OptionsTy::shouldAddDevice(int32_t RootID, int32_t SubID,
+ int32_t CCSID) const {
+ if (ExplicitRootDevices.empty())
+ return false;
+ for (const auto &RootDev : ExplicitRootDevices) {
+ const auto ErootID = std::get<1>(RootDev);
+ if (ErootID != -2 && RootID != ErootID)
+ continue;
+ const auto EsubID = std::get<2>(RootDev);
+ if (((EsubID != -2) || (SubID == -1)) && (EsubID != SubID))
+ continue;
+ const auto ECCSID = std::get<3>(RootDev);
+ if (((ECCSID != -2) || (CCSID == -1)) && (ECCSID != CCSID))
+ continue;
+ // Check if isDiscard
+ if (!std::get<0>(RootDev))
+ return false;
+ return true;
+ }
+ return false;
+}
+
+/// Read environment variables
+void L0OptionsTy::processEnvironmentVars() {
+ // Compilation options for IGC
+ UserCompilationOptions +=
+ std::string(" ") +
+ StringEnvar("LIBOMPTARGET_LEVEL_ZERO_COMPILATION_OPTIONS", "").get();
+
+ // Explicit Device mode if ONEAPI_DEVICE_SELECTOR is set
+ const StringEnvar DeviceSelectorVar("ONEAPI_DEVICE_SELECTOR", "");
+ if (DeviceSelectorVar.isPresent()) {
+ std::string EnvStr(std::move(DeviceSelectorVar.get()));
+ uint32_t numDiscard = 0;
+ std::transform(EnvStr.begin(), EnvStr.end(), EnvStr.begin(),
+ [](unsigned char C) { return std::tolower(C); });
+
+ std::vector<std::string_view> Entries = tokenize(EnvStr, ";", true);
+ for (const auto &Term : Entries) {
+ bool isDiscard = false;
+ std::vector<std::string_view> Pair = tokenize(Term, ":", true);
+ if (Pair.empty()) {
+ FAILURE_MESSAGE(
+ "Incomplete selector! Pair and device must be specified.\n");
+ } else if (Pair.size() == 1) {
+ FAILURE_MESSAGE("Incomplete selector! Try '%s:*'if all devices "
+ "under the Pair was original intention.\n",
+ Pair[0].data());
+ } else if (Pair.size() > 2) {
+ FAILURE_MESSAGE(
+ "Error parsing selector string \"%s\" Too many colons (:)\n",
+ Term.data());
+ }
+ if (!((Pair[0][0] == '*') ||
+ (!strncmp(Pair[0].data(), "level_zero", Pair[0].length())) ||
+ (!strncmp(Pair[0].data(), "!level_zero", Pair[0].length()))))
+ break;
+ isDiscard = Pair[0][0] == '!';
+ if (isDiscard)
+ numDiscard++;
+ else if (numDiscard > 0)
+ FAILURE_MESSAGE("All negative(discarding) filters must appear after "
+ "all positive(accepting) filters!");
+
+ std::vector<std::string_view> Targets = tokenize(Pair[1], ",", true);
+ for (const auto &TargetStr : Targets) {
+ bool HasDeviceWildCard = false;
+ bool HasSubDeviceWildCard = false;
+ bool DeviceNum = false;
+ std::vector<std::string_view> DeviceSubTuple =
+ tokenize(TargetStr, ".", true);
+ int32_t RootD[3] = {-1, -1, -1};
+ if (DeviceSubTuple.empty()) {
+ FAILURE_MESSAGE(
+ "ONEAPI_DEVICE_SELECTOR parsing error. Device must be "
+ "specified.");
+ }
+
+ std::string_view TopDeviceStr = DeviceSubTuple[0];
+ static const std::array<std::string, 7> DeviceStr = {
+ "host", "cpu", "gpu", "acc", "fpga", "*"};
+ auto It =
+ find_if(DeviceStr.begin(), DeviceStr.end(),
+ [&](auto DeviceStr) { return TopDeviceStr == DeviceStr; });
+ if (It != DeviceStr.end()) {
+ if (TopDeviceStr[0] == '*') {
+ HasDeviceWildCard = true;
+ RootD[0] = -2;
+ } else if (!strncmp(DeviceSubTuple[0].data(), "gpu", 3))
+ continue;
+ } else {
+ std::string TDS(TopDeviceStr);
+ if (!isDigits(TDS)) {
+ FAILURE_MESSAGE("error parsing device number: %s",
+ DeviceSubTuple[0].data());
+ } else {
+ RootD[0] = std::stoi(TDS);
+ DeviceNum = true;
+ }
+ }
+ if (DeviceSubTuple.size() >= 2) {
+ if (!DeviceNum && !HasDeviceWildCard)
+ FAILURE_MESSAGE("sub-devices can only be requested when parent "
+ "device is specified by number or wildcard, not a "
+ "device type like \'gpu\'");
+ std::string_view SubDeviceStr = DeviceSubTuple[1];
+ if (SubDeviceStr[0] == '*') {
+ HasSubDeviceWildCard = true;
+ RootD[1] = -2;
+ } else {
+ if (HasDeviceWildCard) // subdevice is a number and device is a *
+ FAILURE_MESSAGE(
+ "sub-device can't be requested by number if parent "
+ "device is specified by a wildcard.");
+
+ std::string SDS(SubDeviceStr);
+ if (!isDigits(SDS)) {
+ FAILURE_MESSAGE("error parsing subdevice index: %s",
+ DeviceSubTuple[1].data());
+ } else
+ RootD[1] = std::stoi(SDS);
+ }
+ }
+ if (DeviceSubTuple.size() == 3) {
+ std::string_view SubSubDeviceStr = DeviceSubTuple[2];
+ if (SubSubDeviceStr[0] == '*') {
+ RootD[2] = -2;
+ } else {
+ if (HasSubDeviceWildCard)
+ FAILURE_MESSAGE("sub-sub-device can't be requested by number if "
+ "sub-device before is specified by a wildcard.");
+ std::string SSDS(SubSubDeviceStr);
+ if (!isDigits(SSDS)) {
+ FAILURE_MESSAGE("error parsing sub-sub-device index: %s",
+ DeviceSubTuple[2].data());
+ } else
+ RootD[2] = std::stoi(SSDS);
+ }
+ } else if (DeviceSubTuple.size() > 3) {
+ FAILURE_MESSAGE("error parsing %s Only two levels of sub-devices "
+ "supported at this time ",
+ TargetStr.data());
+ }
+ if (isDiscard)
+ ExplicitRootDevices.insert(
+ ExplicitRootDevices.begin(),
+ std::tuple<bool, int32_t, int32_t, int32_t>(!isDiscard, RootD[0],
+ RootD[1], RootD[2]));
+ else
+ ExplicitRootDevices.push_back(
+ std::tuple<bool, int32_t, int32_t, int32_t>(!isDiscard, RootD[0],
+ RootD[1], RootD[2]));
+ }
+ }
+ }
+
+ DP("ONEAPI_DEVICE_SELECTOR specified %zu root devices\n",
+ ExplicitRootDevices.size());
+ DP(" (Accept/Discard [T/F] DeviceID[.SubID[.CCSID]]) -2(all), "
+ "-1(ignore)\n");
+ for (auto &T : ExplicitRootDevices) {
+ DP(" %c %d.%d.%d\n", (std::get<0>(T) == true) ? 'T' : 'F', std::get<1>(T),
+ std::get<2>(T), std::get<3>(T));
+ (void)T; // silence warning
+ }
+
+ // Memory pool
+ // LIBOMPTARGET_LEVEL_ZERO_MEMORY_POOL=<Option>
+ // <Option> := 0 | <PoolInfoList>
+ // <PoolInfoList> := <PoolInfo>[,<PoolInfoList>]
+ // <PoolInfo> := <MemType>[,<AllocMax>[,<Capacity>[,<PoolSize>]]]
+ // <MemType> := all | device | host | shared
+ // <AllocMax> := non-negative integer or empty, max allocation size in
+ // MB (default: 1)
+ // <Capacity> := positive integer or empty, number of allocations from
+ // a single block (default: 4)
+ // <PoolSize> := positive integer or empty, max pool size in MB
+ // (default: 256)
+ const StringEnvar MemoryPoolVar("LIBOMPTARGET_LEVEL_ZERO_MEMORY_POOL", "");
+ if (MemoryPoolVar.isPresent()) {
+ if (MemoryPoolVar.get() == "0") {
+ Flags.UseMemoryPool = 0;
+ MemPoolInfo.clear();
+ } else {
+ std::istringstream Str(MemoryPoolVar.get());
+ int32_t MemType = -1;
+ int32_t Offset = 0;
+ int32_t Valid = 1;
+ const std::array<int32_t, 3> DefaultValue{1, 4, 256};
+ const int32_t AllMemType = INT32_MAX;
+ std::array<int32_t, 3> AllInfo{1, 4, 256};
+ std::map<int32_t, std::array<int32_t, 3>> PoolInfo;
+ for (std::string Token; std::getline(Str, Token, ',') && Valid > 0;) {
+ if (Token == "device") {
+ MemType = TARGET_ALLOC_DEVICE;
+ PoolInfo.emplace(MemType, DefaultValue);
+ Offset = 0;
+ } else if (Token == "host") {
+ MemType = TARGET_ALLOC_HOST;
+ PoolInfo.emplace(MemType, DefaultValue);
+ Offset = 0;
+ } else if (Token == "shared") {
+ MemType = TARGET_ALLOC_SHARED;
+ PoolInfo.emplace(MemType, DefaultValue);
+ Offset = 0;
+ } else if (Token == "all") {
+ MemType = AllMemType;
+ Offset = 0;
+ Valid = 2;
+ } else if (Offset < 3 && MemType >= 0) {
+ int32_t Num = std::atoi(Token.c_str());
+ bool ValidNum = (Num >= 0 && Offset == 0) || (Num > 0 && Offset > 0);
+ if (ValidNum && MemType == AllMemType)
+ AllInfo[Offset++] = Num;
+ else if (ValidNum)
+ PoolInfo[MemType][Offset++] = Num;
+ else if (Token.size() == 0)
+ Offset++;
+ else
+ Valid = 0;
+ } else {
+ Valid = 0;
+ }
+ }
+ if (Valid > 0) {
+ if (Valid == 2) {
+ // "all" is specified -- ignore other inputs
+ if (AllInfo[0] > 0) {
+ MemPoolInfo[TARGET_ALLOC_DEVICE] = AllInfo;
+ MemPoolInfo[TARGET_ALLOC_HOST] = AllInfo;
+ MemPoolInfo[TARGET_ALLOC_SHARED] = std::move(AllInfo);
+ } else {
+ MemPoolInfo.clear();
+ }
+ } else {
+ // Use user-specified configuration
+ for (auto &I : PoolInfo) {
+ if (I.second[0] > 0)
+ MemPoolInfo[I.first] = I.second;
+ else
+ MemPoolInfo.erase(I.first);
+ }
+ }
+ } else {
+ DP("Ignoring incorrect memory pool configuration "
+ "LIBOMPTARGET_LEVEL_ZERO_MEMORY_POOL=%s\n",
+ MemoryPoolVar.get().c_str());
+ DP("LIBOMPTARGET_LEVEL_ZERO_MEMORY_POOL=<Option>\n");
+ DP(" <Option> := 0 | <PoolInfoList>\n");
+ DP(" <PoolInfoList> := <PoolInfo>[,<PoolInfoList>]\n");
+ DP(" <PoolInfo> := "
+ "<MemType>[,<AllocMax>[,<Capacity>[,<PoolSize>]]]\n");
+ DP(" <MemType> := all | device | host | shared\n");
+ DP(" <AllocMax> := non-negative integer or empty, "
+ "max allocation size in MB (default: 1)\n");
+ DP(" <Capacity> := positive integer or empty, "
+ "number of allocations from a single block (default: 4)\n");
+ DP(" <PoolSize> := positive integer or empty, "
+ "max pool size in MB (default: 256)\n");
+ }
+ }
+ }
+
+ if (StringEnvar("INTEL_ENABLE_OFFLOAD_ANNOTATIONS").isPresent()) {
+ // To match SYCL RT behavior, we just need to check whether
+ // INTEL_ENABLE_OFFLOAD_ANNOTATIONS is set. The actual value
+ // does not matter.
+ CommonSpecConstants.addConstant<char>(0xFF747469, 1);
+ }
+
+ // LIBOMPTARGET_LEVEL_ZERO_STAGING_BUFFER_SIZE=<SizeInKB>
+ const Envar<size_t> StagingBufferSizeVar(
+ "LIBOMPTARGET_LEVEL_ZERO_STAGING_BUFFER_SIZE");
+ if (StagingBufferSizeVar.isPresent()) {
+ size_t SizeInKB = StagingBufferSizeVar;
+ if (SizeInKB > (16 << 10)) {
+ SizeInKB = (16 << 10);
+ DP("Staging buffer size is capped at %zu KB\n", SizeInKB);
+ }
+ StagingBufferSize = SizeInKB << 10;
+ }
+
+ // LIBOMPTARGET_LEVEL_ZERO_COMMAND_MODE=<Fmt>
+ // <Fmt> := sync | async | async_ordered
+ // sync: perform synchronization after each command
+ // async: perform synchronization when it is required
+ // async_ordered: same as "async", but command is ordered
+ // This option is ignored unless IMM is fully enabled on compute and copy.
+ // On Intel PVC GPU, when used with immediate command lists over Level Zero
+ // backend, a target region may involve multiple command submissions to the
+ // L0 copy queue and compute queue. L0 events are used for each submission
+ // (data transfer of a single item or kernel execution). When "async" is
+ // specified, a) each data transfer to device is submitted with an event.
+ // b) The kernel is submitted next with a dependence on all the previous
+ // data transfer events. The kernel also has an event associated with it.
+ // c) The data transfer from device will be submitted with a dependence on
+ // the kernel event. d) Finally wait on the host for all the events
+ // associated with the data transfer from device.
+ // The env-var also affects any "target update" constructs as well.
+ // The env-var only affects the L0 copy/compute commands issued from a
+ // single target construct execution, not across multiple invocations.
+ const StringEnvar CommandModeVar("LIBOMPTARGET_LEVEL_ZERO_COMMAND_MODE");
+ if (CommandModeVar.isPresent()) {
+ if (match(CommandModeVar, "sync"))
+ CommandMode = CommandModeTy::Sync;
+ else if (match(CommandModeVar, "async"))
+ CommandMode = CommandModeTy::Async;
+ else if (match(CommandModeVar, "async_ordered"))
+ CommandMode = CommandModeTy::AsyncOrdered;
+ else
+ INVALID_OPTION(LIBOMPTARGET_LEVEL_ZERO_COMMAND_MODE,
+ CommandModeVar.get().c_str());
+ }
+}
+/// Parse String and split into tokens of string_views based on the
+/// Delim character.
+std::vector<std::string_view>
+L0OptionsTy::tokenize(const std::string_view &Filter, const std::string &Delim,
----------------
AlexeySachkov wrote:
I would say re-use existing `StringRef::split` and make validation for empty sub-strings at the call site, or have a wrapper around `StringRef::split`, no need to re-implement it.
https://github.com/llvm/llvm-project/pull/158900
More information about the llvm-commits
mailing list