[Mlir-commits] [mlir] [mlir][XeVM] Use libocloc API for binary generation. (PR #188353)
Md Abdullah Shahneous Bari
llvmlistbot at llvm.org
Thu Apr 9 10:59:50 PDT 2026
https://github.com/mshahneo updated https://github.com/llvm/llvm-project/pull/188353
>From 67e7eb61eb16d0416efd3c35a31eae10eb70baab Mon Sep 17 00:00:00 2001
From: "Shahneous Bari, Md Abdullah" <md.abdullah.shahneous.bari at intel.com>
Date: Tue, 24 Mar 2026 21:04:35 +0000
Subject: [PATCH 1/5] [mlir][XeVM] Use libocloc API for binary generation.
This PR improves native binary generation by avoiding
`llvm::sys::ExecuteAndWait` call for ocloc and instead
leveraging `oclocInvoke()` that consumes an in-memory SPIR-V string.
---
mlir/cmake/modules/Findocloc.cmake | 82 ++++++++++
mlir/include/mlir/Target/LLVM/XeVM/Utils.h | 3 -
mlir/lib/Target/LLVM/CMakeLists.txt | 23 ++-
mlir/lib/Target/LLVM/XeVM/Target.cpp | 182 +++++++++------------
4 files changed, 178 insertions(+), 112 deletions(-)
create mode 100644 mlir/cmake/modules/Findocloc.cmake
diff --git a/mlir/cmake/modules/Findocloc.cmake b/mlir/cmake/modules/Findocloc.cmake
new file mode 100644
index 0000000000000..7209c145bd762
--- /dev/null
+++ b/mlir/cmake/modules/Findocloc.cmake
@@ -0,0 +1,82 @@
+# Findocloc.cmake
+
+# Define a function to search for ocloc
+function(find_ocloc)
+ message(STATUS "Searching for ocloc")
+
+ if(WIN32)
+ if(CMAKE_SIZEOF_VOID_P EQUAL 8)
+ set(OCLOC_SUFFIX "64")
+ else()
+ set(OCLOC_SUFFIX "32")
+ endif()
+
+ set(OCLOC_EXE_PATHS "${OCLOC_PACKAGE_DIR}" "${OCLOC_PACKAGE_DIR}/bin")
+ set(OCLOC_LIB_PATHS "${OCLOC_PACKAGE_DIR}" "${OCLOC_PACKAGE_DIR}/lib")
+ set(OCLOC_INC_PATHS "${OCLOC_PACKAGE_DIR}" "${OCLOC_PACKAGE_DIR}/include")
+ else()
+ set(OCLOC_SUFFIX "")
+
+ set(OCLOC_EXE_PATHS "${OCLOC_PACKAGE_DIR}/bin")
+ set(OCLOC_LIB_PATHS "${OCLOC_PACKAGE_DIR}/lib")
+ set(OCLOC_INC_PATHS "${OCLOC_PACKAGE_DIR}/include")
+
+ if(CMAKE_SIZEOF_VOID_P EQUAL 8)
+ list(APPEND OCLOC_LIB_PATHS
+ "${OCLOC_PACKAGE_DIR}/lib64"
+ "${OCLOC_PACKAGE_DIR}/lib/x86_64-linux-gnu")
+ endif()
+ endif()
+
+ # Search for ocloc executable
+ find_program(OCLOC_EXECUTABLE NAMES "ocloc" "ocloc${OCLOC_SUFFIX}"
+ PATHS ${OCLOC_EXE_PATHS} NO_DEFAULT_PATH)
+ find_program(OCLOC_EXECUTABLE NAMES "ocloc" "ocloc${OCLOC_SUFFIX}"
+ PATHS ${OCLOC_EXE_PATHS})
+
+ # Search for ocloc library
+ find_library(OCLOC_LIBRARY NAMES "ocloc${OCLOC_SUFFIX}"
+ PATHS ${OCLOC_LIB_PATHS} NO_DEFAULT_PATH)
+ find_library(OCLOC_LIBRARY NAMES "ocloc${OCLOC_SUFFIX}"
+ PATHS ${OCLOC_LIB_PATHS})
+
+ # Search for ocloc_api.h header file
+ find_path(OCLOC_INCLUDE_DIR NAMES ocloc_api.h
+ PATHS ${OCLOC_INC_PATHS} NO_DEFAULT_PATH)
+ find_path(OCLOC_INCLUDE_DIR NAMES ocloc_api.h
+ PATHS ${OCLOC_INC_PATHS})
+
+ # Check if all components are found
+ if(OCLOC_EXECUTABLE AND OCLOC_LIBRARY AND OCLOC_INCLUDE_DIR)
+ set(OCLOC_FOUND TRUE)
+ else()
+ set(OCLOC_FOUND FALSE)
+ endif()
+
+ # Provide the results to the user
+ if(OCLOC_FOUND)
+ message(STATUS "Found ocloc executable: ${OCLOC_EXECUTABLE}")
+ message(STATUS "Found ocloc library: ${OCLOC_LIBRARY}")
+ message(STATUS "Found ocloc_api.h: ${OCLOC_INCLUDE_DIR}")
+ else()
+ message(STATUS "ocloc not found")
+ endif()
+
+ # Set the variables for the user
+ set(OCLOC_EXECUTABLE ${OCLOC_EXECUTABLE} CACHE FILEPATH "Path to ocloc executable")
+ set(OCLOC_LIBRARY ${OCLOC_LIBRARY} CACHE FILEPATH "Path to ocloc library")
+ set(OCLOC_INCLUDE_DIR ${OCLOC_INCLUDE_DIR} CACHE PATH "Path to ocloc_api.h header file")
+ set(OCLOC_FOUND ${OCLOC_FOUND} CACHE BOOL "ocloc found")
+
+ add_executable(ocloc IMPORTED)
+ set_property(TARGET ocloc PROPERTY IMPORTED_LOCATION "${OCLOC_EXECUTABLE}")
+
+ add_library(libocloc SHARED IMPORTED)
+ set_target_properties(libocloc PROPERTIES
+ IMPORTED_LOCATION "${OCLOC_LIBRARY}"
+ INTERFACE_INCLUDE_DIRECTORIES "${OCLOC_INCLUDE_DIR}")
+endfunction()
+
+# Call the function to find ocloc
+find_ocloc()
+
diff --git a/mlir/include/mlir/Target/LLVM/XeVM/Utils.h b/mlir/include/mlir/Target/LLVM/XeVM/Utils.h
index 455c8303a9aa8..02e766ff71cf5 100644
--- a/mlir/include/mlir/Target/LLVM/XeVM/Utils.h
+++ b/mlir/include/mlir/Target/LLVM/XeVM/Utils.h
@@ -51,9 +51,6 @@ class SerializeGPUModuleBase : public LLVM::ModuleToObject {
/// a Resource blob pointing to the LLVM bitcode in-memory.
SmallVector<Attribute> librariesToLink;
- /// Returns the path to the tool used for serialization.
- std::optional<std::string> findTool(StringRef tool);
-
/// GPU compilation target options.
gpu::TargetOptions targetOptions;
};
diff --git a/mlir/lib/Target/LLVM/CMakeLists.txt b/mlir/lib/Target/LLVM/CMakeLists.txt
index 94660e231888b..1d671027c6d34 100644
--- a/mlir/lib/Target/LLVM/CMakeLists.txt
+++ b/mlir/lib/Target/LLVM/CMakeLists.txt
@@ -83,7 +83,7 @@ if ("NVPTX" IN_LIST LLVM_TARGETS_TO_BUILD)
add_library(MLIR_NVPTXCOMPILER_LIB STATIC IMPORTED GLOBAL)
# Downstream projects can modify this path and use it in CMake. For example:
# add_library(MLIR_NVPTXCOMPILER_LIB STATIC IMPORTED GLOBAL)
- # set_property(TARGET MLIR_NVPTXCOMPILER_LIB PROPERTY IMPORTED_LOCATION ${...})
+ # set_property(TARGET MLIR_NVPTXCOMPILER_LIB PROPERTY IMPORTED_LOCATION ${...})
# where `...` is to be replaced with the path to the library.
set_property(TARGET MLIR_NVPTXCOMPILER_LIB PROPERTY IMPORTED_LOCATION ${MLIR_NVPTXCOMPILER_LIB_PATH})
# Link against `nvptxcompiler_static`. TODO: use `CUDA::nvptxcompiler_static`.
@@ -101,7 +101,7 @@ if ("NVPTX" IN_LIST LLVM_TARGETS_TO_BUILD)
endif()
add_library(MLIR_NVFATBIN_LIB STATIC IMPORTED GLOBAL)
- set_property(TARGET MLIR_NVFATBIN_LIB PROPERTY IMPORTED_LOCATION ${MLIR_NVFATBIN_LIB_PATH})
+ set_property(TARGET MLIR_NVFATBIN_LIB PROPERTY IMPORTED_LOCATION ${MLIR_NVFATBIN_LIB_PATH})
target_link_libraries(MLIRNVVMTarget PRIVATE MLIR_NVFATBIN_LIB)
endif()
else()
@@ -141,7 +141,7 @@ if (MLIR_NVVM_EMBED_LIBDEVICE)
"Requested using the `nvptxcompiler` library backend but it couldn't be found.")
endif()
endif()
-
+
embed_binary_to_src(${MLIR_NVVM_LIBDEVICE_PATH} ${CMAKE_CURRENT_BINARY_DIR}/libdevice_embedded.c _mlir_embedded_libdevice)
add_mlir_library(MLIRNVVMLibdevice
${CMAKE_CURRENT_BINARY_DIR}/libdevice_embedded.c
@@ -234,3 +234,20 @@ add_mlir_dialect_library(MLIRXeVMTarget
MLIRTargetLLVM
MLIRXeVMToLLVMIRTranslation
)
+
+find_package(ocloc)
+if (OCLOC_FOUND)
+ target_include_directories(MLIRXeVMTarget PRIVATE "${OCLOC_INCLUDE_DIR}")
+ target_link_libraries(MLIRXeVMTarget PRIVATE "${OCLOC_LIBRARY}")
+ target_compile_definitions(obj.MLIRXeVMTarget
+ PRIVATE
+ MLIR_XEVM_OCLOC_AVAILABLE=1
+ )
+else()
+ target_compile_definitions(obj.MLIRXeVMTarget
+ PRIVATE
+ MLIR_XEVM_OCLOC_AVAILABLE=0
+ )
+ message(WARNING "ocloc not found, MLIRXeVMTarget will not be able to use ocloc for native binary compilation.")
+endif()
+
diff --git a/mlir/lib/Target/LLVM/XeVM/Target.cpp b/mlir/lib/Target/LLVM/XeVM/Target.cpp
index 83eec5e9d5549..ffdd4a26f4ecc 100644
--- a/mlir/lib/Target/LLVM/XeVM/Target.cpp
+++ b/mlir/lib/Target/LLVM/XeVM/Target.cpp
@@ -28,18 +28,17 @@
#include "llvm/IR/LegacyPassManager.h"
#include "llvm/Target/TargetMachine.h"
+#include "llvm/ADT/ScopeExit.h"
#include "llvm/Bitcode/BitcodeWriter.h"
#include "llvm/Config/Targets.h"
-#include "llvm/Support/FileSystem.h"
-#include "llvm/Support/FileUtilities.h"
#include "llvm/Support/FormatVariadic.h"
-#include "llvm/Support/MemoryBuffer.h"
-#include "llvm/Support/Path.h"
-#include "llvm/Support/Process.h"
-#include "llvm/Support/Program.h"
#include "llvm/Support/TargetSelect.h"
#include "llvm/Support/raw_ostream.h"
+#if MLIR_XEVM_OCLOC_AVAILABLE
+#include <ocloc_api.h>
+#endif // MLIR_XEVM_OCLOC_AVAILABLE
+
#include <cstdint>
#include <cstdlib>
@@ -107,59 +106,28 @@ gpu::GPUModuleOp SerializeGPUModuleBase::getGPUModuleOp() {
// There are 2 ways to access IGC: AOT (ocloc) and JIT (L0 runtime).
// - L0 runtime consumes IL and is external to MLIR codebase (rt wrappers).
// - `ocloc` tool can be "queried" from within MLIR.
-FailureOr<SmallVector<char, 0>> SerializeGPUModuleBase::compileToBinary(
- StringRef asmStr, StringRef inputFormat = "-spirv_input") {
- using TmpFile = std::pair<llvm::SmallString<128>, llvm::FileRemover>;
- // Find the `ocloc` tool.
- std::optional<std::string> oclocCompiler = findTool("ocloc");
- if (!oclocCompiler)
- return failure();
+#if MLIR_XEVM_OCLOC_AVAILABLE
+FailureOr<SmallVector<char, 0>>
+SerializeGPUModuleBase::compileToBinary(StringRef asmStr,
+ StringRef inputFormat) {
Location loc = getGPUModuleOp().getLoc();
- std::string basename = llvm::formatv(
- "mlir-{0}-{1}-{2}", getGPUModuleOp().getNameAttr().getValue(),
+ std::string asmFname = llvm::formatv(
+ "mlir-{0}-{1}-{2}.asm", getGPUModuleOp().getNameAttr().getValue(),
getTarget().getTriple(), getTarget().getChip());
-
- auto createTemp = [&](StringRef name,
- StringRef suffix) -> FailureOr<TmpFile> {
- llvm::SmallString<128> filePath;
- if (auto ec = llvm::sys::fs::createTemporaryFile(name, suffix, filePath))
- return getGPUModuleOp().emitError()
- << "Couldn't create the temp file: `" << filePath
- << "`, error message: " << ec.message();
-
- return TmpFile(filePath, llvm::FileRemover(filePath.c_str()));
- };
- // Create temp file
- FailureOr<TmpFile> asmFile = createTemp(basename, "asm");
- FailureOr<TmpFile> binFile = createTemp(basename, "");
- FailureOr<TmpFile> logFile = createTemp(basename, "log");
- if (failed(logFile) || failed(asmFile) || failed(binFile))
- return failure();
- // Dump the assembly to a temp file
- std::error_code ec;
- {
- llvm::raw_fd_ostream asmStream(asmFile->first, ec);
- if (ec)
- return emitError(loc) << "Couldn't open the file: `" << asmFile->first
- << "`, error message: " << ec.message();
-
- asmStream << asmStr;
- if (asmStream.has_error())
- return emitError(loc)
- << "An error occurred while writing the assembly to: `"
- << asmFile->first << "`.";
-
- asmStream.flush();
- }
// Set cmd options
std::pair<llvm::BumpPtrAllocator, SmallVector<const char *>> cmdOpts =
targetOptions.tokenizeCmdOptions();
// Example: --gpu-module-to-binary="opts='opt1 opt2'"
const std::string cmdOptsStr = "\"" + llvm::join(cmdOpts.second, " ") + "\"";
- SmallVector<StringRef, 12> oclocArgs(
- {"ocloc", "compile", "-file", asmFile->first, inputFormat, "-device",
- getTarget().getChip(), "-output", binFile->first, "-output_no_suffix",
- "-options", cmdOptsStr});
+ std::vector<std::string> oclocArgs = {"ocloc",
+ "compile",
+ "-file",
+ asmFname,
+ inputFormat.str(),
+ "-device",
+ getTarget().getChip().str(),
+ "-options",
+ cmdOptsStr};
// Dump tool invocation commands.
#define DEBUG_TYPE "serialize-to-binary"
@@ -170,64 +138,66 @@ FailureOr<SmallVector<char, 0>> SerializeGPUModuleBase::compileToBinary(
llvm::dbgs() << "\n";
});
#undef DEBUG_TYPE
- // Helper function for printing tool error logs.
- std::string message;
- auto emitLogError =
- [&](StringRef toolName) -> FailureOr<SmallVector<char, 0>> {
- if (message.empty()) {
- llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> toolStderr =
- llvm::MemoryBuffer::getFile(logFile->first);
- if (toolStderr)
- return emitError(loc) << toolName << " invocation failed. Log:\n"
- << toolStderr->get()->getBuffer();
- else
- return emitError(loc) << toolName << " invocation failed.";
+
+ std::vector<const char *> argv;
+ for (const auto &str : oclocArgs)
+ argv.push_back(str.c_str());
+
+ uint32_t numSources = 1;
+ const uint8_t *dataSources[1] = {
+ reinterpret_cast<const uint8_t *>(asmStr.data())};
+ const uint64_t lenSources[1] = {asmStr.size()};
+ const char *nameSources[1] = {asmFname.c_str()};
+
+ uint32_t outputs_num = 0;
+ uint8_t **outputs = nullptr;
+ uint64_t *output_length = nullptr;
+ char **output_names = nullptr;
+ auto _ = llvm::scope_exit([&]() {
+ oclocFreeOutput(&outputs_num, &outputs, &output_length, &output_names);
+ });
+
+ int err = oclocInvoke(static_cast<uint32_t>(argv.size()), argv.data(),
+ numSources, dataSources, lenSources, nameSources, 0,
+ nullptr, nullptr, nullptr, &outputs_num, &outputs,
+ &output_length, &output_names);
+
+ if (err != OCLOC_SUCCESS) {
+ emitError(loc) << "`oclocInvoke` failed or produced no output, error: "
+ << err;
+ for (uint32_t i = 0; i < outputs_num; ++i) {
+ if (llvm::StringRef(output_names[i]).ends_with(".log")) {
+ emitError(loc) << "Compiler log:\n";
+ emitError(loc) << llvm::StringRef(reinterpret_cast<char *>(outputs[i]),
+ output_length[i])
+ << "\n";
+ }
}
- return emitError(loc) << toolName
- << " invocation failed, error message: " << message;
- };
- std::optional<StringRef> redirects[] = {
- std::nullopt,
- logFile->first,
- logFile->first,
- };
- // Invoke ocloc.
- if (llvm::sys::ExecuteAndWait(oclocCompiler.value(), oclocArgs, std::nullopt,
- redirects, 0, 0, &message))
- return emitLogError("`ocloc`");
- binFile->first.append(".bin");
- llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> binaryBuffer =
- llvm::MemoryBuffer::getFile(binFile->first);
- if (!binaryBuffer)
- return emitError(loc) << "Couldn't open the file: `" << binFile->first
- << "`, error message: "
- << binaryBuffer.getError().message();
-
- StringRef bin = (*binaryBuffer)->getBuffer();
- return SmallVector<char, 0>(bin.begin(), bin.end());
-}
+ return failure();
+ }
-std::optional<std::string> SerializeGPUModuleBase::findTool(StringRef tool) {
- // 1. Check the toolkit path given in the command line.
- StringRef pathRef = targetOptions.getToolkitPath();
- SmallVector<char, 256> path;
- if (!pathRef.empty()) {
- path.insert(path.begin(), pathRef.begin(), pathRef.end());
- llvm::sys::path::append(path, "bin", tool);
- if (llvm::sys::fs::can_execute(path))
- return StringRef(path.data(), path.size()).str();
+ SmallVector<char, 0> binStr;
+ for (uint32_t i = 0; i < outputs_num; ++i) {
+ if (llvm::StringRef(output_names[i]).ends_with(".bin")) {
+ char *outBegin = reinterpret_cast<char *>(outputs[i]);
+ char *outEnd = outBegin + output_length[i];
+ binStr.assign(outBegin, outEnd);
+ break;
+ }
}
- // 2. Check PATH.
- if (std::optional<std::string> toolPath =
- llvm::sys::Process::FindInEnvPath("PATH", tool))
- return *toolPath;
-
- getGPUModuleOp().emitError()
- << "Couldn't find the `" << tool
- << "` binary. Please specify the toolkit "
- "path via GpuModuleToBinaryPass or add the compiler to $PATH`.";
- return std::nullopt;
+ if (binStr.empty())
+ return emitError(loc) << "`oclocInvoke` did not produce `.bin` output";
+
+ return binStr;
+}
+#else // MLIR_XEVM_OCLOC_AVAILABLE
+FailureOr<SmallVector<char, 0>>
+SerializeGPUModuleBase::compileToBinary(StringRef asmStr,
+ StringRef inputFormat) {
+ return getGPUModuleOp().emitError()
+ << "Native binary cannot be AOT compiled without ocloc.";
}
+#endif // MLIR_XEVM_OCLOC_AVAILABLE
namespace {
class SPIRVSerializer : public SerializeGPUModuleBase {
>From 52ad6d457c0d121e951189e67966721dcfe66984 Mon Sep 17 00:00:00 2001
From: "Shahneous Bari, Md Abdullah" <md.abdullah.shahneous.bari at intel.com>
Date: Sat, 28 Mar 2026 02:06:09 +0000
Subject: [PATCH 2/5] Keep the llvm::sys:ExecuteAndWait based `ocloc` tool
invocation as a fallback.
There might be a scenario when only only `ocloc` binary is available;
but not the library and include files. Use the fallback method
in that scenario.
---
mlir/include/mlir/Target/LLVM/XeVM/Utils.h | 17 +-
mlir/lib/Target/LLVM/XeVM/Target.cpp | 214 +++++++++++++++++----
2 files changed, 195 insertions(+), 36 deletions(-)
diff --git a/mlir/include/mlir/Target/LLVM/XeVM/Utils.h b/mlir/include/mlir/Target/LLVM/XeVM/Utils.h
index 02e766ff71cf5..4a33992004ab6 100644
--- a/mlir/include/mlir/Target/LLVM/XeVM/Utils.h
+++ b/mlir/include/mlir/Target/LLVM/XeVM/Utils.h
@@ -39,7 +39,7 @@ class SerializeGPUModuleBase : public LLVM::ModuleToObject {
/// Returns the gpu module being serialized.
gpu::GPUModuleOp getGPUModuleOp();
- /// Compiles to native code using `ocloc`.
+ /// Compiles to native code using `ocloc` (API or tool).
FailureOr<SmallVector<char, 0>> compileToBinary(StringRef asmStr,
StringRef inputFormat);
@@ -51,6 +51,21 @@ class SerializeGPUModuleBase : public LLVM::ModuleToObject {
/// a Resource blob pointing to the LLVM bitcode in-memory.
SmallVector<Attribute> librariesToLink;
+ /// Returns the path to the tool used for serialization.
+ std::optional<std::string> findTool(StringRef tool);
+
+ /// Compiles to native code using the `ocloc` command-line tool, communicating
+ /// through temporary files.
+ FailureOr<SmallVector<char, 0>>
+ compileToBinaryViaOclocTool(StringRef asmStr, StringRef inputFormat);
+
+ /// Compiles to native code using the `ocloc` shared library API, in-process,
+ /// without temporary files. Only available when the library is linked in.
+#if MLIR_XEVM_OCLOC_AVAILABLE
+ FailureOr<SmallVector<char, 0>>
+ compileToBinaryViaLibocloc(StringRef asmStr, StringRef inputFormat);
+#endif // MLIR_XEVM_OCLOC_AVAILABLE
+
/// GPU compilation target options.
gpu::TargetOptions targetOptions;
};
diff --git a/mlir/lib/Target/LLVM/XeVM/Target.cpp b/mlir/lib/Target/LLVM/XeVM/Target.cpp
index ffdd4a26f4ecc..454b9f786fd4f 100644
--- a/mlir/lib/Target/LLVM/XeVM/Target.cpp
+++ b/mlir/lib/Target/LLVM/XeVM/Target.cpp
@@ -26,6 +26,13 @@
#include "mlir/Target/LLVMIR/Dialect/XeVM/XeVMToLLVMIRTranslation.h"
#include "mlir/Target/LLVMIR/Export.h"
#include "llvm/IR/LegacyPassManager.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/FileUtilities.h"
+#include "llvm/Support/FormatVariadic.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/Process.h"
+#include "llvm/Support/Program.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/ADT/ScopeExit.h"
@@ -106,15 +113,21 @@ gpu::GPUModuleOp SerializeGPUModuleBase::getGPUModuleOp() {
// There are 2 ways to access IGC: AOT (ocloc) and JIT (L0 runtime).
// - L0 runtime consumes IL and is external to MLIR codebase (rt wrappers).
// - `ocloc` tool can be "queried" from within MLIR.
+
+// ----------------------------------------------------------------------------
+// compile via the ocloc shared-library API (in-process, no temp files). Only
+// compiled when the library is available at build time.
+// ----------------------------------------------------------------------------
#if MLIR_XEVM_OCLOC_AVAILABLE
FailureOr<SmallVector<char, 0>>
-SerializeGPUModuleBase::compileToBinary(StringRef asmStr,
- StringRef inputFormat) {
+SerializeGPUModuleBase::compileToBinaryViaLibocloc(StringRef asmStr,
+ StringRef inputFormat) {
Location loc = getGPUModuleOp().getLoc();
std::string asmFname = llvm::formatv(
"mlir-{0}-{1}-{2}.asm", getGPUModuleOp().getNameAttr().getValue(),
getTarget().getTriple(), getTarget().getChip());
- // Set cmd options
+
+ // Build command-line options.
std::pair<llvm::BumpPtrAllocator, SmallVector<const char *>> cmdOpts =
targetOptions.tokenizeCmdOptions();
// Example: --gpu-module-to-binary="opts='opt1 opt2'"
@@ -139,66 +152,197 @@ SerializeGPUModuleBase::compileToBinary(StringRef asmStr,
});
#undef DEBUG_TYPE
+ // Build a plain argv array expected by oclocInvoke.
std::vector<const char *> argv;
for (const auto &str : oclocArgs)
argv.push_back(str.c_str());
- uint32_t numSources = 1;
+ // Wire up in-memory source file.
const uint8_t *dataSources[1] = {
reinterpret_cast<const uint8_t *>(asmStr.data())};
const uint64_t lenSources[1] = {asmStr.size()};
const char *nameSources[1] = {asmFname.c_str()};
- uint32_t outputs_num = 0;
+ uint32_t outputsNum = 0;
uint8_t **outputs = nullptr;
- uint64_t *output_length = nullptr;
- char **output_names = nullptr;
- auto _ = llvm::scope_exit([&]() {
- oclocFreeOutput(&outputs_num, &outputs, &output_length, &output_names);
+ uint64_t *outputLengths = nullptr;
+ char **outputNames = nullptr;
+ // Ensure ocloc output buffers are always freed on exit.
+ auto freeOutputs = llvm::scope_exit([&]() {
+ oclocFreeOutput(&outputsNum, &outputs, &outputLengths, &outputNames);
});
int err = oclocInvoke(static_cast<uint32_t>(argv.size()), argv.data(),
- numSources, dataSources, lenSources, nameSources, 0,
- nullptr, nullptr, nullptr, &outputs_num, &outputs,
- &output_length, &output_names);
+ /*numSources=*/1, dataSources, lenSources, nameSources,
+ /*numHeaders=*/0, nullptr, nullptr, nullptr,
+ &outputsNum, &outputs, &outputLengths, &outputNames);
if (err != OCLOC_SUCCESS) {
- emitError(loc) << "`oclocInvoke` failed or produced no output, error: "
- << err;
- for (uint32_t i = 0; i < outputs_num; ++i) {
- if (llvm::StringRef(output_names[i]).ends_with(".log")) {
- emitError(loc) << "Compiler log:\n";
- emitError(loc) << llvm::StringRef(reinterpret_cast<char *>(outputs[i]),
- output_length[i])
- << "\n";
- }
+ emitError(loc) << "`oclocInvoke` failed, error code: " << err;
+ // Emit any compiler log that ocloc produced.
+ for (uint32_t i = 0; i < outputsNum; ++i) {
+ if (llvm::StringRef(outputNames[i]).ends_with(".log"))
+ emitError(loc) << "Compiler log:\n"
+ << llvm::StringRef(reinterpret_cast<char *>(outputs[i]),
+ outputLengths[i]);
}
return failure();
}
- SmallVector<char, 0> binStr;
- for (uint32_t i = 0; i < outputs_num; ++i) {
- if (llvm::StringRef(output_names[i]).ends_with(".bin")) {
- char *outBegin = reinterpret_cast<char *>(outputs[i]);
- char *outEnd = outBegin + output_length[i];
- binStr.assign(outBegin, outEnd);
- break;
+ // Find and return the .bin output.
+ for (uint32_t i = 0; i < outputsNum; ++i) {
+ if (llvm::StringRef(outputNames[i]).ends_with(".bin")) {
+ char *begin = reinterpret_cast<char *>(outputs[i]);
+ return SmallVector<char, 0>(begin, begin + outputLengths[i]);
+ }
+ }
+ return emitError(loc) << "`oclocInvoke` did not produce `.bin` output";
+}
+#endif // MLIR_XEVM_OCLOC_AVAILABLE
+
+// ----------------------------------------------------------------------------
+// Compile by spawning the `ocloc` command-line tool as a process,
+// communicating through temporary files. Acts as a fallback when the shared
+// library is not available.
+// ----------------------------------------------------------------------------
+FailureOr<SmallVector<char, 0>>
+SerializeGPUModuleBase::compileToBinaryViaOclocTool(StringRef asmStr,
+ StringRef inputFormat) {
+ using TmpFile = std::pair<llvm::SmallString<128>, llvm::FileRemover>;
+
+ // Locate the `ocloc` executable on PATH.
+ std::optional<std::string> oclocPath = findTool("ocloc");
+ if (!oclocPath) {
+ emitError(getGPUModuleOp().getLoc()) << "Could not find `ocloc` on PATH";
+ return failure();
+ }
+
+ Location loc = getGPUModuleOp().getLoc();
+ std::string basename = llvm::formatv(
+ "mlir-{0}-{1}-{2}", getGPUModuleOp().getNameAttr().getValue(),
+ getTarget().getTriple(), getTarget().getChip());
+
+ // Helper: create a named temporary file, returning path + auto-remover.
+ auto createTemp = [&](StringRef name,
+ StringRef suffix) -> std::optional<TmpFile> {
+ llvm::SmallString<128> path;
+ if (auto ec = llvm::sys::fs::createTemporaryFile(name, suffix, path)) {
+ emitError(loc) << "Couldn't create temp file `" << path
+ << "`: " << ec.message();
+ return std::nullopt;
+ }
+ return TmpFile(path, llvm::FileRemover(path.c_str()));
+ };
+
+ std::optional<TmpFile> asmFile = createTemp(basename, "asm");
+ std::optional<TmpFile> binFile = createTemp(basename, "");
+ std::optional<TmpFile> logFile = createTemp(basename, "log");
+ if (!asmFile || !binFile || !logFile)
+ return failure();
+
+ // Write the assembly source to a temp file.
+ {
+ std::error_code ec;
+ llvm::raw_fd_ostream asmStream(asmFile->first, ec);
+ if (ec) {
+ emitError(loc) << "Couldn't open `" << asmFile->first
+ << "`: " << ec.message();
+ return failure();
+ }
+ asmStream << asmStr;
+ if (asmStream.has_error()) {
+ emitError(loc) << "Error writing assembly to `" << asmFile->first << "`";
+ return failure();
+ }
+ asmStream.flush();
+ }
+
+ // Build command-line options.
+ std::pair<llvm::BumpPtrAllocator, SmallVector<const char *>> cmdOpts =
+ targetOptions.tokenizeCmdOptions();
+ const std::string cmdOptsStr = "\"" + llvm::join(cmdOpts.second, " ") + "\"";
+
+ SmallVector<StringRef, 12> oclocArgs(
+ {"ocloc", "compile", "-file", asmFile->first, inputFormat, "-device",
+ getTarget().getChip(), "-output", binFile->first, "-output_no_suffix",
+ "-options", cmdOptsStr});
+
+ // Dump tool invocation commands.
+#define DEBUG_TYPE "serialize-to-binary"
+ LLVM_DEBUG({
+ llvm::dbgs() << "Tool invocation for module: "
+ << getGPUModuleOp().getNameAttr() << "\n";
+ llvm::interleave(oclocArgs, llvm::dbgs(), " ");
+ llvm::dbgs() << "\n";
+ });
+#undef DEBUG_TYPE
+
+ // Redirect stdout/stderr to the log temp file.
+ std::optional<StringRef> redirects[] = {std::nullopt, logFile->first,
+ logFile->first};
+
+ std::string errorMsg;
+ if (llvm::sys::ExecuteAndWait(*oclocPath, oclocArgs, std::nullopt, redirects,
+ 0, 0, &errorMsg)) {
+ // Prefer a structured error message; otherwise dump the log file.
+ if (!errorMsg.empty()) {
+ emitError(loc) << "`ocloc` invocation failed: " << errorMsg;
+ } else if (auto log = llvm::MemoryBuffer::getFile(logFile->first)) {
+ emitError(loc) << "`ocloc` invocation failed. Log:\n"
+ << (*log)->getBuffer();
+ } else {
+ emitError(loc) << "`ocloc` invocation failed (no log available)";
}
+ return failure();
}
- if (binStr.empty())
- return emitError(loc) << "`oclocInvoke` did not produce `.bin` output";
- return binStr;
+ // Read back the binary output (ocloc appends ".bin" to the base name).
+ binFile->first.append(".bin");
+ auto binaryBuffer = llvm::MemoryBuffer::getFile(binFile->first);
+ if (!binaryBuffer) {
+ emitError(loc) << "Couldn't open binary output `" << binFile->first
+ << "`: " << binaryBuffer.getError().message();
+ return failure();
+ }
+ StringRef bin = (*binaryBuffer)->getBuffer();
+ return SmallVector<char, 0>(bin.begin(), bin.end());
}
-#else // MLIR_XEVM_OCLOC_AVAILABLE
+
+// ----------------------------------------------------------------------------
+// Public entry-point: prefer the in-process library path; fall back to the
+// external tool when the library is not available.
+// ----------------------------------------------------------------------------
FailureOr<SmallVector<char, 0>>
SerializeGPUModuleBase::compileToBinary(StringRef asmStr,
StringRef inputFormat) {
- return getGPUModuleOp().emitError()
- << "Native binary cannot be AOT compiled without ocloc.";
+#if MLIR_XEVM_OCLOC_AVAILABLE
+ return compileToBinaryViaLibocloc(asmStr, inputFormat);
+#else
+ return compileToBinaryViaOclocTool(asmStr, inputFormat);
+#endif
}
-#endif // MLIR_XEVM_OCLOC_AVAILABLE
+std::optional<std::string> SerializeGPUModuleBase::findTool(StringRef tool) {
+ // 1. Check the toolkit path given in the command line.
+ StringRef pathRef = targetOptions.getToolkitPath();
+ SmallVector<char, 256> path;
+ if (!pathRef.empty()) {
+ path.insert(path.begin(), pathRef.begin(), pathRef.end());
+ llvm::sys::path::append(path, "bin", tool);
+ if (llvm::sys::fs::can_execute(path))
+ return StringRef(path.data(), path.size()).str();
+ }
+ // 2. Check PATH.
+ if (std::optional<std::string> toolPath =
+ llvm::sys::Process::FindInEnvPath("PATH", tool))
+ return *toolPath;
+
+ getGPUModuleOp().emitError()
+ << "Couldn't find the `" << tool
+ << "` binary. Please specify the toolkit "
+ "path via GpuModuleToBinaryPass or add the compiler to $PATH`.";
+ return std::nullopt;
+}
namespace {
class SPIRVSerializer : public SerializeGPUModuleBase {
public:
>From cd3edb197827ea9cc9bad2a2cc8ce2ebd1dce6c3 Mon Sep 17 00:00:00 2001
From: "Shahneous Bari, Md Abdullah" <md.abdullah.shahneous.bari at intel.com>
Date: Sat, 28 Mar 2026 02:11:05 +0000
Subject: [PATCH 3/5] Consolidate header files.
---
mlir/lib/Target/LLVM/XeVM/Target.cpp | 11 ++++-------
1 file changed, 4 insertions(+), 7 deletions(-)
diff --git a/mlir/lib/Target/LLVM/XeVM/Target.cpp b/mlir/lib/Target/LLVM/XeVM/Target.cpp
index 454b9f786fd4f..0275c2867bb6e 100644
--- a/mlir/lib/Target/LLVM/XeVM/Target.cpp
+++ b/mlir/lib/Target/LLVM/XeVM/Target.cpp
@@ -12,7 +12,6 @@
//===----------------------------------------------------------------------===//
#include "mlir/Target/LLVM/XeVM/Target.h"
-
#include "mlir/Dialect/GPU/IR/CompilationInterfaces.h"
#include "mlir/Dialect/GPU/IR/GPUDialect.h"
#include "mlir/Dialect/LLVMIR/XeVMDialect.h"
@@ -25,6 +24,9 @@
#include "mlir/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.h"
#include "mlir/Target/LLVMIR/Dialect/XeVM/XeVMToLLVMIRTranslation.h"
#include "mlir/Target/LLVMIR/Export.h"
+#include "llvm/ADT/ScopeExit.h"
+#include "llvm/Bitcode/BitcodeWriter.h"
+#include "llvm/Config/Targets.h"
#include "llvm/IR/LegacyPassManager.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/FileUtilities.h"
@@ -33,14 +35,9 @@
#include "llvm/Support/Path.h"
#include "llvm/Support/Process.h"
#include "llvm/Support/Program.h"
-#include "llvm/Target/TargetMachine.h"
-
-#include "llvm/ADT/ScopeExit.h"
-#include "llvm/Bitcode/BitcodeWriter.h"
-#include "llvm/Config/Targets.h"
-#include "llvm/Support/FormatVariadic.h"
#include "llvm/Support/TargetSelect.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
#if MLIR_XEVM_OCLOC_AVAILABLE
#include <ocloc_api.h>
>From e5a2631a338377df026502483631264c20707d00 Mon Sep 17 00:00:00 2001
From: "Shahneous Bari, Md Abdullah" <md.abdullah.shahneous.bari at intel.com>
Date: Sat, 28 Mar 2026 02:15:08 +0000
Subject: [PATCH 4/5] Fix a comment.
---
mlir/lib/Target/LLVM/XeVM/Target.cpp | 9 ++++-----
1 file changed, 4 insertions(+), 5 deletions(-)
diff --git a/mlir/lib/Target/LLVM/XeVM/Target.cpp b/mlir/lib/Target/LLVM/XeVM/Target.cpp
index 0275c2867bb6e..5777796a75f58 100644
--- a/mlir/lib/Target/LLVM/XeVM/Target.cpp
+++ b/mlir/lib/Target/LLVM/XeVM/Target.cpp
@@ -106,11 +106,6 @@ gpu::GPUModuleOp SerializeGPUModuleBase::getGPUModuleOp() {
return dyn_cast<gpu::GPUModuleOp>(&SerializeGPUModuleBase::getOperation());
}
-// There is 1 way to finalize IL to native code: IGC
-// There are 2 ways to access IGC: AOT (ocloc) and JIT (L0 runtime).
-// - L0 runtime consumes IL and is external to MLIR codebase (rt wrappers).
-// - `ocloc` tool can be "queried" from within MLIR.
-
// ----------------------------------------------------------------------------
// compile via the ocloc shared-library API (in-process, no temp files). Only
// compiled when the library is available at build time.
@@ -397,6 +392,10 @@ SPIRVTranslateModule(Module *M, std::string &SpirvObj, std::string &ErrMsg,
} // namespace llvm
#endif
+// There is 1 way to finalize IL to native code: IGC
+// There are 2 ways to access IGC: AOT (ocloc) and JIT (L0 runtime).
+// - L0 runtime consumes IL and is external to MLIR codebase (rt wrappers).
+// - `ocloc` tool can be "queried" from within MLIR.
FailureOr<SmallVector<char, 0>>
SPIRVSerializer::moduleToObject(llvm::Module &llvmModule) {
#define DEBUG_TYPE "serialize-to-llvm"
>From cf6a1d3821ab0e50da7551b371674d7c91c04859 Mon Sep 17 00:00:00 2001
From: "Shahneous Bari, Md Abdullah" <md.abdullah.shahneous.bari at intel.com>
Date: Thu, 9 Apr 2026 17:57:24 +0000
Subject: [PATCH 5/5] Address review comments.
Replace the usage of `ocloc_api.h` with external declarations
of APIs.
Update error messages to differentiate between ocloc and libocloc.
---
mlir/lib/Target/LLVM/XeVM/Target.cpp | 43 ++++++++++++++++++++++++----
1 file changed, 37 insertions(+), 6 deletions(-)
diff --git a/mlir/lib/Target/LLVM/XeVM/Target.cpp b/mlir/lib/Target/LLVM/XeVM/Target.cpp
index 5777796a75f58..1e707d16cde45 100644
--- a/mlir/lib/Target/LLVM/XeVM/Target.cpp
+++ b/mlir/lib/Target/LLVM/XeVM/Target.cpp
@@ -39,16 +39,47 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
-#if MLIR_XEVM_OCLOC_AVAILABLE
-#include <ocloc_api.h>
-#endif // MLIR_XEVM_OCLOC_AVAILABLE
-
#include <cstdint>
#include <cstdlib>
using namespace mlir;
using namespace mlir::xevm;
+#if MLIR_XEVM_OCLOC_AVAILABLE
+// Intel compute runtime includes libocloc in the distribution, but
+// <ocloc_api.h> isn't included. Hence forward declarations for the ocloc
+// shared-library APIs is needed. These replace the inclusion of <ocloc_api.h>
+// so that the header is not a build-time requirement; the symbols are resolved
+// at link/load time via the ocloc shared library.
+extern "C" {
+
+// Return code indicating successful ocloc compilation.
+// Matches the OCLOC_SUCCESS enumerator in the real header (value 0).
+enum OclocErrorCode : int { OCLOC_SUCCESS = 0 };
+
+// Drives an in-process ocloc compilation.
+// argv / numArgs – standard ocloc command-line arguments.
+// numSources – number of in-memory source files (typically 1).
+// dataSources – array of pointers to source byte buffers.
+// lenSources – byte length of each source buffer.
+// nameSources – file name associated with each source buffer.
+// numHeaders / dataHeaders / lenHeaders / nameHeaders – optional headers.
+// numOutputs / dataOutputs / lenOutputs / nameOutputs – [out] results.
+int oclocInvoke(unsigned numArgs, const char **argv, unsigned numSources,
+ const uint8_t **dataSources, const uint64_t *lenSources,
+ const char **nameSources, unsigned numHeaders,
+ const uint8_t **dataHeaders, const uint64_t *lenHeaders,
+ const char **nameHeaders, unsigned *numOutputs,
+ uint8_t ***dataOutputs, uint64_t **lenOutputs,
+ char ***nameOutputs);
+
+// Releases output buffers previously populated by oclocInvoke.
+int oclocFreeOutput(unsigned *numOutputs, uint8_t ***dataOutputs,
+ uint64_t **lenOutputs, char ***nameOutputs);
+
+} // extern "C"
+#endif // MLIR_XEVM_OCLOC_AVAILABLE
+
namespace {
// XeVM implementation of the gpu:TargetAttrInterface.
class XeVMTargetAttrImpl
@@ -137,7 +168,7 @@ SerializeGPUModuleBase::compileToBinaryViaLibocloc(StringRef asmStr,
// Dump tool invocation commands.
#define DEBUG_TYPE "serialize-to-binary"
LLVM_DEBUG({
- llvm::dbgs() << "Tool invocation for module: "
+ llvm::dbgs() << "libocloc invocation for module: "
<< getGPUModuleOp().getNameAttr() << "\n";
llvm::interleave(oclocArgs, llvm::dbgs(), " ");
llvm::dbgs() << "\n";
@@ -170,7 +201,7 @@ SerializeGPUModuleBase::compileToBinaryViaLibocloc(StringRef asmStr,
&outputsNum, &outputs, &outputLengths, &outputNames);
if (err != OCLOC_SUCCESS) {
- emitError(loc) << "`oclocInvoke` failed, error code: " << err;
+ emitError(loc) << "libocloc: `oclocInvoke` failed, error code: " << err;
// Emit any compiler log that ocloc produced.
for (uint32_t i = 0; i < outputsNum; ++i) {
if (llvm::StringRef(outputNames[i]).ends_with(".log"))
More information about the Mlir-commits
mailing list