[Mlir-commits] [mlir] [mlir][ExecutionEngine] Add LevelZeroRuntimeWrapper. (PR #151038)
Petr Kurapov
llvmlistbot at llvm.org
Wed Jul 30 03:38:23 PDT 2025
================
@@ -0,0 +1,491 @@
+//===- LevelZeroRuntimeWrappers.cpp - MLIR Level Zero (L0) wrapper library-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Implements wrappers around the Level Zero (L0) runtime library with C linkage
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/Twine.h"
+
+#include "level_zero/ze_api.h"
+#include <cassert>
+#include <deque>
+#include <exception>
+#include <functional>
+#include <iostream>
+#include <limits>
+#include <unordered_set>
+#include <vector>
+
+namespace {
+
+template <typename F>
+auto catchAll(F &&func) {
+ try {
+ return func();
+ } catch (const std::exception &e) {
+ std::cerr << "An exception was thrown: " << e.what() << std::endl;
+ std::abort();
+ } catch (...) {
+ std::cerr << "An unknown exception was thrown." << std::endl;
+ std::abort();
+ }
+}
+
+#define L0_SAFE_CALL(call) \
+ { \
+ ze_result_t status = (call); \
+ if (status != ZE_RESULT_SUCCESS) { \
+ std::cerr << "L0 error " << status << std::endl; \
+ std::abort(); \
+ } \
+ }
+
+} // namespace
+
+//===----------------------------------------------------------------------===//
+// L0 RT context & device setters
+//===----------------------------------------------------------------------===//
+
+// Returns the L0 driver handle for the given index. Default index is 0
+// (i.e., returns the first driver handle of the available drivers).
+
+static ze_driver_handle_t getDriver(uint32_t idx = 0) {
+ ze_init_driver_type_desc_t driver_type = {};
+ driver_type.stype = ZE_STRUCTURE_TYPE_INIT_DRIVER_TYPE_DESC;
+ driver_type.flags = ZE_INIT_DRIVER_TYPE_FLAG_GPU;
+ driver_type.pNext = nullptr;
+ uint32_t driverCount{0};
+ thread_local static std::vector<ze_driver_handle_t> drivers;
+
+ thread_local static bool isDriverInitialised{false};
+ if (isDriverInitialised)
+ return drivers[idx];
+ L0_SAFE_CALL(zeInitDrivers(&driverCount, nullptr, &driver_type));
+ if (!driverCount)
+ throw std::runtime_error("No L0 drivers found.");
+ drivers.resize(driverCount);
+ L0_SAFE_CALL(zeInitDrivers(&driverCount, drivers.data(), &driver_type));
+ if (idx >= driverCount)
+ throw std::runtime_error((llvm::Twine("Requested driver idx out-of-bound, "
+ "number of availabe drivers: ") +
+ std::to_string(driverCount))
+ .str());
+ isDriverInitialised = true;
+ return drivers[idx];
+}
+
+static ze_device_handle_t getDefaultDevice(const uint32_t driverIdx = 0,
+ const int32_t devIdx = 0) {
+ thread_local static ze_device_handle_t l0Device;
+ thread_local static int32_t currDevIdx{-1};
+ if (devIdx == currDevIdx)
+ return l0Device;
+ auto driver = getDriver(driverIdx);
+ uint32_t deviceCount{0};
+ L0_SAFE_CALL(zeDeviceGet(driver, &deviceCount, nullptr));
+ if (!deviceCount)
+ throw std::runtime_error(
+ "getDefaultDevice failed: did not find L0 device.");
+ if (static_cast<int>(deviceCount) < devIdx + 1)
+ throw std::runtime_error("getDefaultDevice failed: devIdx out-of-bounds.");
+ std::vector<ze_device_handle_t> devices(deviceCount);
+ L0_SAFE_CALL(zeDeviceGet(driver, &deviceCount, devices.data()));
+ l0Device = devices[devIdx];
+ currDevIdx = devIdx;
+ return l0Device;
+}
+
+// Returns the default L0 context of the defult driver.
+static ze_context_handle_t getDefaultContext() {
+ thread_local static ze_context_handle_t context;
+ thread_local static bool isContextInitialised{false};
+ if (isContextInitialised)
+ return context;
+ ze_context_desc_t ctxtDesc = {ZE_STRUCTURE_TYPE_CONTEXT_DESC, nullptr, 0};
+ auto driver = getDriver();
+ L0_SAFE_CALL(zeContextCreate(driver, &ctxtDesc, &context));
+ isContextInitialised = true;
+ return context;
+}
+
+//===----------------------------------------------------------------------===//
+// L0 RT helper structs
+//===----------------------------------------------------------------------===//
+
+struct L0RtContext {
+ ze_driver_handle_t driver{nullptr};
+ ze_device_handle_t device{nullptr};
+ ze_context_handle_t context{nullptr};
+ // Usually, one immediate command list with ordinal 0 suffices for
+ // both copy and compute ops, but leaves HW underutilized.
+ ze_command_list_handle_t immCmdListCompute{nullptr};
+ // Copy engines can be used for both memcpy and memset, but
+ // they have limitations for memset pattern size (e.g., 1 byte).
+ ze_command_list_handle_t immCmdListCopy{nullptr};
+ uint32_t copyEngineMaxMemoryFillPatternSize{-1u};
+
+ L0RtContext(const int32_t devIdx = 0)
+ : driver(getDriver()), device(getDefaultDevice(devIdx)),
+ context(getDefaultContext()) {
+ uint32_t computeEngineOrdinal = -1u, copyEngineOrdinal = -1u;
+ ze_device_properties_t deviceProperties = {};
+ L0_SAFE_CALL(zeDeviceGetProperties(device, &deviceProperties));
+ uint32_t queueGroupCount = 0;
+ L0_SAFE_CALL(zeDeviceGetCommandQueueGroupProperties(
+ device, &queueGroupCount, nullptr));
+ std::vector<ze_command_queue_group_properties_t> queueGroupProperties(
+ queueGroupCount);
+ L0_SAFE_CALL(zeDeviceGetCommandQueueGroupProperties(
+ device, &queueGroupCount, queueGroupProperties.data()));
+ for (uint32_t queueGroupIdx = 0; queueGroupIdx < queueGroupCount;
+ ++queueGroupIdx) {
+ const auto &group = queueGroupProperties[queueGroupIdx];
+ if (group.flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COMPUTE)
+ computeEngineOrdinal = queueGroupIdx;
+ else if (group.flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COPY) {
+ copyEngineOrdinal = queueGroupIdx;
+ copyEngineMaxMemoryFillPatternSize = group.maxMemoryFillPatternSize;
+ }
+ if (copyEngineOrdinal != -1u && computeEngineOrdinal != -1u)
+ break;
+ }
+ // Fallback to the default queue if no dedicated copy queue is available.
+ if (copyEngineOrdinal == -1u)
+ copyEngineOrdinal = computeEngineOrdinal;
+ assert(copyEngineOrdinal != -1u && computeEngineOrdinal != -1u &&
+ "Expected two engines to be available.");
+ ze_command_queue_desc_t cmdQueueDesc{
+ ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC,
+ nullptr,
+ copyEngineOrdinal, // ordinal
+ 0, // index (assume one physical engine in the group)
+ 0, // flags
+ ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS,
+ ZE_COMMAND_QUEUE_PRIORITY_NORMAL};
+ L0_SAFE_CALL(zeCommandListCreateImmediate(context, device, &cmdQueueDesc,
+ &immCmdListCopy));
+ cmdQueueDesc.ordinal = computeEngineOrdinal;
+ L0_SAFE_CALL(zeCommandListCreateImmediate(context, device, &cmdQueueDesc,
+ &immCmdListCompute));
+ }
+ void cleanup() {
+ L0_SAFE_CALL(zeCommandListDestroy(immCmdListCopy));
----------------
kurapov-peter wrote:
+1, and if you leave it as is, there's potential for inconsistent states (e.g., user called `cleanup`, then `getDefaultContext` and got an invalid context handle.
https://github.com/llvm/llvm-project/pull/151038
More information about the Mlir-commits
mailing list