[llvm] [OFFLOAD] Add plugin with support for Intel oneAPI Level Zero (PR #158900)
Alexey Sachkov via llvm-commits
llvm-commits at lists.llvm.org
Tue Sep 16 06:23:39 PDT 2025
================
@@ -0,0 +1,625 @@
+//===--- Level Zero Target RTL Implementation -----------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Level Zero Program abstraction
+//
+//===----------------------------------------------------------------------===//
+
+#include <fstream>
+#ifdef _WIN32
+#include <fcntl.h>
+#include <io.h>
+#else
+#include <dlfcn.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#endif // !_WIN32
+
+#include "L0Plugin.h"
+#include "L0Program.h"
+
+namespace llvm::omp::target::plugin {
+
+Error L0GlobalHandlerTy::getGlobalMetadataFromDevice(GenericDeviceTy &Device,
+ DeviceImageTy &Image,
+ GlobalTy &DeviceGlobal) {
+ const char *GlobalName = DeviceGlobal.getName().data();
+
+ L0DeviceTy &l0Device = static_cast<L0DeviceTy &>(Device);
+ const L0ProgramTy *Program =
+ l0Device.getProgramFromImage(Image.getTgtImage());
+ void *Addr = Program->getOffloadVarDeviceAddr(GlobalName);
+
+ // Save the pointer to the symbol allowing nullptr.
+ DeviceGlobal.setPtr(Addr);
+
+ if (Addr == nullptr)
+ return Plugin::error(ErrorCode::UNKNOWN, "Failed to load global '%s'",
+ GlobalName);
+
+ return Plugin::success();
+}
+
+inline L0DeviceTy &L0ProgramTy::getL0Device() const {
+ return L0DeviceTy::makeL0Device(getDevice());
+}
+
+L0ProgramTy::~L0ProgramTy() {
+ for (auto *Kernel : Kernels) {
+ // We need explicit destructor and deallocate calls to release the kernels
+ // created by `GenericDeviceTy::constructKernel()`.
+ Kernel->~L0KernelTy();
+ getL0Device().getPlugin().free(Kernel);
+ }
+ for (auto Module : Modules) {
+ CALL_ZE_RET_VOID(zeModuleDestroy, Module);
+ }
+}
+
+void L0ProgramTy::setLibModule() {
+#if _WIN32
+ return;
+#else
+ const auto *Image = getTgtImage();
+ const size_t NumEntries =
+ static_cast<size_t>(Image->EntriesEnd - Image->EntriesBegin);
+ for (size_t I = 0; I < NumEntries; I++) {
+ const auto &Entry = Image->EntriesBegin[I];
+ // Image contains a kernel, so it is not compiled as a library module
+ if (Entry.SymbolName && Entry.Size == 0)
+ return;
+ }
+ // Check if the image belongs to a dynamic library
+ Dl_info DLI{nullptr};
+ if (dladdr(Image->ImageStart, &DLI) && DLI.dli_fname) {
+ std::vector<uint8_t> FileBin;
+ auto Size = readFile(DLI.dli_fname, FileBin);
+ if (Size) {
+ auto MB = MemoryBuffer::getMemBuffer(
+ StringRef(reinterpret_cast<const char *>(FileBin.data()), Size),
+ /*BufferName=*/"", /*RequiresNullTerminator=*/false);
+ auto ELF = ELFObjectFileBase::createELFObjectFile(MB->getMemBufferRef());
+ if (ELF) {
+ if (auto *Obj = dyn_cast<ELF64LEObjectFile>((*ELF).get())) {
+ const auto Header = Obj->getELFFile().getHeader();
+ if (Header.e_type == ELF::ET_DYN) {
+ DP("Processing current image as library\n");
+ IsLibModule = true;
+ }
+ }
+ }
+ }
+ }
+#endif // _WIN32
+}
+
+int32_t L0ProgramTy::addModule(size_t Size, const uint8_t *Image,
+ const std::string &CommonBuildOptions,
+ ze_module_format_t Format) {
+ const ze_module_constants_t SpecConstants =
+ LevelZeroPluginTy::getOptions().CommonSpecConstants.getModuleConstants();
+ auto &l0Device = getL0Device();
+ std::string BuildOptions(CommonBuildOptions);
+
+ // Add required flag to enable dynamic linking.
+ if (IsLibModule)
+ BuildOptions += " -library-compilation ";
+
+ ze_module_desc_t ModuleDesc{};
+ ModuleDesc.stype = ZE_STRUCTURE_TYPE_MODULE_DESC;
+ ModuleDesc.pNext = nullptr;
+ ModuleDesc.format = Format;
+ ze_module_handle_t Module = nullptr;
+ ze_module_build_log_handle_t BuildLog = nullptr;
+ ze_result_t RC;
+
+ // Build a single module from a single image
+ ModuleDesc.inputSize = Size;
+ ModuleDesc.pInputModule = Image;
+ ModuleDesc.pBuildFlags = BuildOptions.c_str();
+ ModuleDesc.pConstants = &SpecConstants;
+ CALL_ZE_RC(RC, zeModuleCreate, l0Device.getZeContext(),
+ l0Device.getZeDevice(), &ModuleDesc, &Module, &BuildLog);
+
+ const bool BuildFailed = (RC != ZE_RESULT_SUCCESS);
+
+ if (BuildFailed) {
+ if (IsLibModule)
+ return OFFLOAD_SUCCESS;
+ return OFFLOAD_FAIL;
+ } else {
+ // Check if module link is required. We do not need this check for
+ // library module
+ if (!RequiresModuleLink && !IsLibModule) {
+ ze_module_properties_t Properties = {ZE_STRUCTURE_TYPE_MODULE_PROPERTIES,
+ nullptr, 0};
+ CALL_ZE_RET_FAIL(zeModuleGetProperties, Module, &Properties);
+ RequiresModuleLink = Properties.flags & ZE_MODULE_PROPERTY_FLAG_IMPORTS;
+ }
+ // For now, assume the first module contains libraries, globals.
+ if (Modules.empty())
+ GlobalModule = Module;
+ Modules.push_back(Module);
+ l0Device.addGlobalModule(Module);
+ return OFFLOAD_SUCCESS;
+ }
+}
+
+int32_t L0ProgramTy::linkModules() {
+ auto &l0Device = getL0Device();
+ if (!RequiresModuleLink) {
+ DP("Module link is not required\n");
+ return OFFLOAD_SUCCESS;
+ }
+
+ if (Modules.empty()) {
+ DP("Invalid number of modules when linking modules\n");
+ return OFFLOAD_FAIL;
+ }
+
+ ze_result_t RC;
+ ze_module_build_log_handle_t LinkLog = nullptr;
+ CALL_ZE_RC(RC, zeModuleDynamicLink,
+ static_cast<uint32_t>(l0Device.getNumGlobalModules()),
+ l0Device.getGlobalModulesArray(), &LinkLog);
+ const bool LinkFailed = (RC != ZE_RESULT_SUCCESS);
+ return LinkFailed ? OFFLOAD_FAIL : OFFLOAD_SUCCESS;
+}
+
+size_t L0ProgramTy::readFile(const char *FileName,
+ std::vector<uint8_t> &OutFile) const {
+ std::ifstream IFS(FileName, std::ios::binary);
+ if (!IFS.good())
+ return 0;
+ IFS.seekg(0, IFS.end);
+ auto FileSize = static_cast<size_t>(IFS.tellg());
+ OutFile.resize(FileSize);
+ IFS.seekg(0);
+ if (!IFS.read(reinterpret_cast<char *>(OutFile.data()), FileSize)) {
+ OutFile.clear();
+ return 0;
+ }
+ return FileSize;
+}
+
+/// Read SPV from file name
+int32_t L0ProgramTy::readSPVFile(const char *FileName,
+ std::vector<uint8_t> &OutSPV) const {
+ // Resolve full path using the location of the plugin
+ std::string FullPath;
+#ifdef _WIN32
+ char RTLPath[_MAX_PATH];
+ HMODULE RTLModule = nullptr;
+ if (!GetModuleHandleExA(GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS |
+ GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT,
+ (LPCSTR)&__tgt_target_data_begin_nowait,
+ &RTLModule)) {
+ DP("Error: module creation failed -- cannot resolve full path\n");
+ return OFFLOAD_FAIL;
+ }
+ if (!GetModuleFileNameA(RTLModule, RTLPath, sizeof(RTLPath))) {
+ DP("Error: module creation failed -- cannot resolve full path\n");
+ return OFFLOAD_FAIL;
+ }
+ FullPath = RTLPath;
+#else // _WIN32
+ Dl_info RTLInfo;
+ if (!dladdr((void *)&__tgt_target_data_begin_nowait, &RTLInfo)) {
+ DP("Error: module creation failed -- cannot resolve full path\n");
+ return OFFLOAD_FAIL;
+ }
+ FullPath = RTLInfo.dli_fname;
+#endif // _WIN32
+ const size_t PathSep = FullPath.find_last_of("/\\");
+ FullPath.replace(PathSep + 1, std::string::npos, FileName);
+ // Read from the full path
+ if (!readFile(FullPath.c_str(), OutSPV)) {
+ DP("Error: module creation failed -- cannot read %s\n", FullPath.c_str());
+ return OFFLOAD_FAIL;
+ }
+ return OFFLOAD_SUCCESS;
+}
+
+void L0ProgramTy::replaceDriverOptsWithBackendOpts(const L0DeviceTy &Device,
+ std::string &Options) const {
+ // Options that need to be replaced with backend-specific options
+ static const struct {
+ std::string Option;
+ std::string BackendOption;
+ } OptionTranslationTable[] = {
+ {"-ftarget-compile-fast",
+ "-igc_opts 'PartitionUnit=1,SubroutineThreshold=50000'"},
+ {"-foffload-fp32-prec-div", "-ze-fp32-correctly-rounded-divide-sqrt"},
+ {"-foffload-fp32-prec-sqrt", "-ze-fp32-correctly-rounded-divide-sqrt"},
+ };
+
+ for (const auto &OptPair : OptionTranslationTable) {
+ const size_t Pos = Options.find(OptPair.Option);
+ if (Pos != std::string::npos) {
+ Options.replace(Pos, OptPair.Option.length(), OptPair.BackendOption);
+ }
+ }
+}
+
+// FIXME: move this to llvm/BinaryFormat/ELF.h and elf.h:
+#define NT_INTEL_ONEOMP_OFFLOAD_VERSION 1
+#define NT_INTEL_ONEOMP_OFFLOAD_IMAGE_COUNT 2
+#define NT_INTEL_ONEOMP_OFFLOAD_IMAGE_AUX 3
+
+bool isValidOneOmpImage(StringRef Image, uint64_t &MajorVer,
+ uint64_t &MinorVer) {
+ const auto MB = MemoryBuffer::getMemBuffer(Image,
+ /*BufferName=*/"",
+ /*RequiresNullTerminator=*/false);
+ auto ExpectedNewE =
+ ELFObjectFileBase::createELFObjectFile(MB->getMemBufferRef());
+ if (!ExpectedNewE) {
+ DP("Warning: unable to get ELF handle!\n");
+ return false;
+ }
+ bool Res = false;
+ auto processObjF = [&](const auto ELFObjF) {
+ if (!ELFObjF) {
+ DP("Warning: Unexpected ELF type!\n");
+ return false;
+ }
+ const auto &ELFF = ELFObjF->getELFFile();
+ auto Sections = ELFF.sections();
+ if (!Sections) {
+ DP("Warning: unable to get ELF sections!\n");
+ return false;
+ }
+ bool SeenOffloadSection = false;
+ for (auto Sec : *Sections) {
+ if (Sec.sh_type != ELF::SHT_NOTE)
+ continue;
+ Error Err = Plugin::success();
+ for (auto Note : ELFF.notes(Sec, Err)) {
+ if (Err) {
+ DP("Warning: unable to get ELF notes handle!\n");
+ return false;
+ }
+ if (Note.getName() != "INTELONEOMPOFFLOAD")
+ continue;
+ SeenOffloadSection = true;
+ if (Note.getType() != NT_INTEL_ONEOMP_OFFLOAD_VERSION)
+ continue;
+
+ std::string DescStr(std::move(Note.getDescAsStringRef(4).str()));
+ const auto DelimPos = DescStr.find('.');
+ if (DelimPos == std::string::npos) {
+ // The version has to look like "Major#.Minor#".
+ DP("Invalid NT_INTEL_ONEOMP_OFFLOAD_VERSION: '%s'\n",
+ DescStr.c_str());
+ return false;
+ }
+ const std::string MajorVerStr = DescStr.substr(0, DelimPos);
+ DescStr.erase(0, DelimPos + 1);
+ MajorVer = std::stoull(MajorVerStr);
+ MinorVer = std::stoull(DescStr);
+ return (MajorVer == 1 && MinorVer == 0);
+ }
+ }
+ return SeenOffloadSection;
+ };
+ if (const auto *O = dyn_cast<ELF64LEObjectFile>((*ExpectedNewE).get())) {
+ Res = processObjF(O);
+ } else if (const auto *O =
+ dyn_cast<ELF32LEObjectFile>((*ExpectedNewE).get())) {
+ Res = processObjF(O);
+ } else {
+ assert(false && "Unexpected ELF format");
+ }
+ return Res;
+}
+
+static StringRef getImageStringRef(const __tgt_device_image *Image) {
+ const char *ImgBegin = reinterpret_cast<char *>(Image->ImageStart);
+ const char *ImgEnd = reinterpret_cast<char *>(Image->ImageEnd);
+ const size_t ImgSize = ImgEnd - ImgBegin;
+ return StringRef(ImgBegin, ImgSize);
+}
+
+bool isValidOneOmpImage(const __tgt_device_image *Image, uint64_t &MajorVer,
+ uint64_t &MinorVer) {
+ return isValidOneOmpImage(getImageStringRef(Image), MajorVer, MinorVer);
+}
+
+int32_t L0ProgramTy::buildModules(std::string &BuildOptions) {
+ auto &l0Device = getL0Device();
+ auto *Image = getTgtImage();
+ if (identify_magic(getImageStringRef(Image)) == file_magic::spirv_object) {
+ // Handle legacy plain SPIR-V image.
+ uint8_t *ImgBegin = reinterpret_cast<uint8_t *>(Image->ImageStart);
+ uint8_t *ImgEnd = reinterpret_cast<uint8_t *>(Image->ImageEnd);
+ size_t ImgSize = ImgEnd - ImgBegin;
+ return addModule(ImgSize, ImgBegin, BuildOptions,
+ ZE_MODULE_FORMAT_IL_SPIRV);
+ }
+
+ uint64_t MajorVer, MinorVer;
+ if (!isValidOneOmpImage(Image, MajorVer, MinorVer)) {
+ DP("Warning: image is not a valid oneAPI OpenMP image.\n");
+ return OFFLOAD_FAIL;
+ }
+
+ setLibModule();
+
+ // Iterate over the images and pick the first one that fits.
+ uint64_t ImageCount = 0;
+ struct V1ImageInfo {
+ // 0 - native, 1 - SPIR-V
+ uint64_t Format = (std::numeric_limits<uint64_t>::max)();
+ std::string CompileOpts;
+ std::string LinkOpts;
+ // We may have multiple sections created from split-kernel mode
+ std::vector<const uint8_t *> PartBegin;
+ std::vector<uint64_t> PartSize;
+
+ V1ImageInfo(uint64_t Format, std::string CompileOpts, std::string LinkOpts)
+ : Format(Format), CompileOpts(std::move(CompileOpts)),
+ LinkOpts(std::move(LinkOpts)) {}
+ };
+ std::unordered_map<uint64_t, V1ImageInfo> AuxInfo;
+
+ auto MB = MemoryBuffer::getMemBuffer(getImageStringRef(Image),
+ /*BufferName=*/"",
+ /*RequiresNullTerminator=*/false);
+ auto ExpectedNewE =
+ ELFObjectFileBase::createELFObjectFile(MB->getMemBufferRef());
+ assert(ExpectedNewE &&
+ "isValidOneOmpImage() returns true for invalid ELF image");
+ auto processELF = [&](auto *EObj) {
+ assert(EObj && "isValidOneOmpImage() returns true for invalid ELF image.");
+ assert(MajorVer == 1 && MinorVer == 0 &&
+ "FIXME: update image processing for new oneAPI OpenMP version.");
+ const auto &E = EObj->getELFFile();
+ // Collect auxiliary information.
+ uint64_t MaxImageIdx = 0;
+
+ auto Sections = E.sections();
+ assert(Sections && "isValidOneOmpImage() returns true for ELF image with "
+ "invalid sections.");
+
+ for (auto Sec : *Sections) {
+ if (Sec.sh_type != ELF::SHT_NOTE)
+ continue;
+ Error Err = Plugin::success();
+ for (auto Note : E.notes(Sec, Err)) {
+ assert(!Err && "isValidOneOmpImage() returns true for ELF image with "
+ "invalid notes.");
+ if (Note.getName().str() != "INTELONEOMPOFFLOAD")
+ continue;
+
+ const uint64_t Type = Note.getType();
+ std::string DescStr(std::move(Note.getDescAsStringRef(4)));
+ switch (Type) {
+ default:
+ DP("Warning: unrecognized INTELONEOMPOFFLOAD note.\n");
+ break;
+ case NT_INTEL_ONEOMP_OFFLOAD_VERSION:
+ break;
+ case NT_INTEL_ONEOMP_OFFLOAD_IMAGE_COUNT:
+ ImageCount = std::stoull(DescStr);
+ break;
+ case NT_INTEL_ONEOMP_OFFLOAD_IMAGE_AUX: {
+ std::vector<std::string> Parts;
+ do {
+ const auto DelimPos = DescStr.find('\0');
+ if (DelimPos == std::string::npos) {
+ Parts.push_back(std::move(DescStr));
+ break;
+ }
+ Parts.push_back(DescStr.substr(0, DelimPos));
+ DescStr.erase(0, DelimPos + 1);
+ } while (Parts.size() < 4);
+
+ // Ignore records with less than 4 strings.
+ if (Parts.size() != 4) {
+ DP("Warning: short NT_INTEL_ONEOMP_OFFLOAD_IMAGE_AUX "
+ "record is ignored.\n");
+ continue;
+ }
+
+ const uint64_t Idx = std::stoull(Parts[0]);
+ MaxImageIdx = (std::max)(MaxImageIdx, Idx);
+ if (AuxInfo.find(Idx) != AuxInfo.end()) {
+ DP("Warning: duplicate auxiliary information for image %" PRIu64
+ " is ignored.\n",
+ Idx);
+ continue;
+ }
+ AuxInfo.emplace(
+ std::piecewise_construct, std::forward_as_tuple(Idx),
+ std::forward_as_tuple(std::stoull(Parts[1]), Parts[2], Parts[3]));
+ // Image pointer and size
+ // will be initialized later.
+ }
+ }
+ }
+ }
+
+ if (MaxImageIdx >= ImageCount)
+ DP("Warning: invalid image index found in auxiliary information.\n");
+
+ for (auto Sec : *Sections) {
+ const char *Prefix = "__openmp_offload_spirv_";
+ auto ExpectedSectionName = E.getSectionName(Sec);
+ assert(ExpectedSectionName && "isValidOneOmpImage() returns true for ELF "
+ "image with invalid section names");
+ std::string SectionName = (*ExpectedSectionName).str();
+ if (SectionName.find(Prefix) != 0)
+ continue;
+ SectionName.erase(0, std::strlen(Prefix));
----------------
AlexeySachkov wrote:
[`StringRef::consume_front`](https://llvm.org/doxygen/classllvm_1_1StringRef.html#a8a7fac667f8ae35285b8b53d9f2dd9dc)
https://github.com/llvm/llvm-project/pull/158900
More information about the llvm-commits
mailing list