[Mlir-commits] [mlir] 7b4c9bb - [mlir][XeVM] Use libocloc API for binary generation. (#188353)
llvmlistbot at llvm.org
llvmlistbot at llvm.org
Mon Apr 13 08:44:30 PDT 2026
Author: Md Abdullah Shahneous Bari
Date: 2026-04-13T10:44:26-05:00
New Revision: 7b4c9bb2069536e0df18597795b858e18a1cbaaf
URL: https://github.com/llvm/llvm-project/commit/7b4c9bb2069536e0df18597795b858e18a1cbaaf
DIFF: https://github.com/llvm/llvm-project/commit/7b4c9bb2069536e0df18597795b858e18a1cbaaf.diff
LOG: [mlir][XeVM] Use libocloc API for binary generation. (#188353)
This PR improves native binary generation by avoiding
`llvm::sys::ExecuteAndWait` call for ocloc and instead
leveraging `oclocInvoke()` that consumes an in-memory SPIR-V string.
Co-authored-by: Artem Kroviakov <artem.kroviakov at intel.com>
Added:
mlir/cmake/modules/Findocloc.cmake
Modified:
mlir/include/mlir/Target/LLVM/XeVM/Utils.h
mlir/lib/Target/LLVM/CMakeLists.txt
mlir/lib/Target/LLVM/XeVM/Target.cpp
Removed:
################################################################################
diff --git a/mlir/cmake/modules/Findocloc.cmake b/mlir/cmake/modules/Findocloc.cmake
new file mode 100644
index 0000000000000..5b433ff1fe5e4
--- /dev/null
+++ b/mlir/cmake/modules/Findocloc.cmake
@@ -0,0 +1,76 @@
+# Findocloc.cmake
+
+function(find_ocloc)
+ message(STATUS "Searching for ocloc")
+
+ if(WIN32)
+ if(CMAKE_SIZEOF_VOID_P EQUAL 8)
+ set(OCLOC_SUFFIX "64")
+ else()
+ set(OCLOC_SUFFIX "32")
+ endif()
+
+ set(OCLOC_EXE_PATHS "${OCLOC_PACKAGE_DIR}" "${OCLOC_PACKAGE_DIR}/bin")
+ set(OCLOC_LIB_PATHS "${OCLOC_PACKAGE_DIR}" "${OCLOC_PACKAGE_DIR}/lib")
+ else()
+ set(OCLOC_SUFFIX "")
+
+ set(OCLOC_EXE_PATHS "${OCLOC_PACKAGE_DIR}/bin")
+ set(OCLOC_LIB_PATHS "${OCLOC_PACKAGE_DIR}/lib")
+
+ if(CMAKE_SIZEOF_VOID_P EQUAL 8)
+ list(APPEND OCLOC_LIB_PATHS
+ "${OCLOC_PACKAGE_DIR}/lib64"
+ "${OCLOC_PACKAGE_DIR}/lib/x86_64-linux-gnu")
+ endif()
+ endif()
+
+ # Search for ocloc executable
+ find_program(OCLOC_EXECUTABLE NAMES "ocloc" "ocloc${OCLOC_SUFFIX}"
+ PATHS ${OCLOC_EXE_PATHS} NO_DEFAULT_PATH)
+ find_program(OCLOC_EXECUTABLE NAMES "ocloc" "ocloc${OCLOC_SUFFIX}"
+ PATHS ${OCLOC_EXE_PATHS})
+
+ # Search for ocloc library
+ find_library(OCLOC_LIBRARY NAMES "ocloc${OCLOC_SUFFIX}"
+ PATHS ${OCLOC_LIB_PATHS} NO_DEFAULT_PATH)
+ find_library(OCLOC_LIBRARY NAMES "ocloc${OCLOC_SUFFIX}"
+ PATHS ${OCLOC_LIB_PATHS})
+
+ # Check if all components are found
+ if(OCLOC_EXECUTABLE AND OCLOC_LIBRARY)
+ set(OCLOC_FOUND TRUE)
+ else()
+ set(OCLOC_FOUND FALSE)
+ endif()
+
+ # Provide the results to the user
+ if(OCLOC_FOUND)
+ message(STATUS "Found ocloc executable: ${OCLOC_EXECUTABLE}")
+ message(STATUS "Found ocloc library: ${OCLOC_LIBRARY}")
+ else()
+ message(STATUS "ocloc not found")
+ endif()
+
+ # Set the variables for the user
+ set(OCLOC_EXECUTABLE ${OCLOC_EXECUTABLE} CACHE FILEPATH "Path to ocloc executable")
+ set(OCLOC_LIBRARY ${OCLOC_LIBRARY} CACHE FILEPATH "Path to ocloc library")
+ set(OCLOC_FOUND ${OCLOC_FOUND} CACHE BOOL "ocloc found")
+
+ # Only create imported targets when the underlying artifact was actually
+ # found, to avoid CMake errors from targets with empty IMPORTED_LOCATION.
+ if(OCLOC_EXECUTABLE)
+ add_executable(ocloc IMPORTED)
+ set_property(TARGET ocloc PROPERTY IMPORTED_LOCATION "${OCLOC_EXECUTABLE}")
+ endif()
+
+ if(OCLOC_LIBRARY)
+ add_library(libocloc SHARED IMPORTED)
+ set_target_properties(libocloc PROPERTIES
+ IMPORTED_LOCATION "${OCLOC_LIBRARY}")
+ endif()
+endfunction()
+
+# Call the function to find ocloc
+find_ocloc()
+
diff --git a/mlir/include/mlir/Target/LLVM/XeVM/Utils.h b/mlir/include/mlir/Target/LLVM/XeVM/Utils.h
index 455c8303a9aa8..b5fa0349839ee 100644
--- a/mlir/include/mlir/Target/LLVM/XeVM/Utils.h
+++ b/mlir/include/mlir/Target/LLVM/XeVM/Utils.h
@@ -39,7 +39,7 @@ class SerializeGPUModuleBase : public LLVM::ModuleToObject {
/// Returns the gpu module being serialized.
gpu::GPUModuleOp getGPUModuleOp();
- /// Compiles to native code using `ocloc`.
+ /// Compiles to native code using `ocloc` (API or tool).
FailureOr<SmallVector<char, 0>> compileToBinary(StringRef asmStr,
StringRef inputFormat);
@@ -54,6 +54,18 @@ class SerializeGPUModuleBase : public LLVM::ModuleToObject {
/// Returns the path to the tool used for serialization.
std::optional<std::string> findTool(StringRef tool);
+ /// Compiles to native code using the `ocloc` command-line tool, communicating
+ /// through temporary files.
+ FailureOr<SmallVector<char, 0>>
+ compileToBinaryViaOclocTool(StringRef asmStr, StringRef inputFormat);
+
+ /// Compiles to native code using the `ocloc` shared library API, in-process,
+ /// without temporary files. Only available when the library is linked in.
+#if MLIR_XEVM_OCLOC_LIB_AVAILABLE
+ FailureOr<SmallVector<char, 0>>
+ compileToBinaryViaLibocloc(StringRef asmStr, StringRef inputFormat);
+#endif // MLIR_XEVM_OCLOC_LIB_AVAILABLE
+
/// GPU compilation target options.
gpu::TargetOptions targetOptions;
};
diff --git a/mlir/lib/Target/LLVM/CMakeLists.txt b/mlir/lib/Target/LLVM/CMakeLists.txt
index 94660e231888b..a6f1436e1d3a9 100644
--- a/mlir/lib/Target/LLVM/CMakeLists.txt
+++ b/mlir/lib/Target/LLVM/CMakeLists.txt
@@ -83,7 +83,7 @@ if ("NVPTX" IN_LIST LLVM_TARGETS_TO_BUILD)
add_library(MLIR_NVPTXCOMPILER_LIB STATIC IMPORTED GLOBAL)
# Downstream projects can modify this path and use it in CMake. For example:
# add_library(MLIR_NVPTXCOMPILER_LIB STATIC IMPORTED GLOBAL)
- # set_property(TARGET MLIR_NVPTXCOMPILER_LIB PROPERTY IMPORTED_LOCATION ${...})
+ # set_property(TARGET MLIR_NVPTXCOMPILER_LIB PROPERTY IMPORTED_LOCATION ${...})
# where `...` is to be replaced with the path to the library.
set_property(TARGET MLIR_NVPTXCOMPILER_LIB PROPERTY IMPORTED_LOCATION ${MLIR_NVPTXCOMPILER_LIB_PATH})
# Link against `nvptxcompiler_static`. TODO: use `CUDA::nvptxcompiler_static`.
@@ -101,7 +101,7 @@ if ("NVPTX" IN_LIST LLVM_TARGETS_TO_BUILD)
endif()
add_library(MLIR_NVFATBIN_LIB STATIC IMPORTED GLOBAL)
- set_property(TARGET MLIR_NVFATBIN_LIB PROPERTY IMPORTED_LOCATION ${MLIR_NVFATBIN_LIB_PATH})
+ set_property(TARGET MLIR_NVFATBIN_LIB PROPERTY IMPORTED_LOCATION ${MLIR_NVFATBIN_LIB_PATH})
target_link_libraries(MLIRNVVMTarget PRIVATE MLIR_NVFATBIN_LIB)
endif()
else()
@@ -141,7 +141,7 @@ if (MLIR_NVVM_EMBED_LIBDEVICE)
"Requested using the `nvptxcompiler` library backend but it couldn't be found.")
endif()
endif()
-
+
embed_binary_to_src(${MLIR_NVVM_LIBDEVICE_PATH} ${CMAKE_CURRENT_BINARY_DIR}/libdevice_embedded.c _mlir_embedded_libdevice)
add_mlir_library(MLIRNVVMLibdevice
${CMAKE_CURRENT_BINARY_DIR}/libdevice_embedded.c
@@ -234,3 +234,20 @@ add_mlir_dialect_library(MLIRXeVMTarget
MLIRTargetLLVM
MLIRXeVMToLLVMIRTranslation
)
+
+find_package(ocloc)
+if (OCLOC_FOUND)
+ target_include_directories(MLIRXeVMTarget PRIVATE "${OCLOC_INCLUDE_DIR}")
+ target_link_libraries(MLIRXeVMTarget PRIVATE "${OCLOC_LIBRARY}")
+ target_compile_definitions(obj.MLIRXeVMTarget
+ PRIVATE
+ MLIR_XEVM_OCLOC_LIB_AVAILABLE=1
+ )
+else()
+ target_compile_definitions(obj.MLIRXeVMTarget
+ PRIVATE
+ MLIR_XEVM_OCLOC_LIB_AVAILABLE=0
+ )
+ message(WARNING "ocloc not found, MLIRXeVMTarget will not be able to use ocloc for native binary compilation.")
+endif()
+
diff --git a/mlir/lib/Target/LLVM/XeVM/Target.cpp b/mlir/lib/Target/LLVM/XeVM/Target.cpp
index f8f09258fd861..28c654f3d7c9a 100644
--- a/mlir/lib/Target/LLVM/XeVM/Target.cpp
+++ b/mlir/lib/Target/LLVM/XeVM/Target.cpp
@@ -12,7 +12,6 @@
//===----------------------------------------------------------------------===//
#include "mlir/Target/LLVM/XeVM/Target.h"
-
#include "mlir/Dialect/GPU/IR/CompilationInterfaces.h"
#include "mlir/Dialect/GPU/IR/GPUDialect.h"
#include "mlir/Dialect/LLVMIR/XeVMDialect.h"
@@ -25,11 +24,10 @@
#include "mlir/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.h"
#include "mlir/Target/LLVMIR/Dialect/XeVM/XeVMToLLVMIRTranslation.h"
#include "mlir/Target/LLVMIR/Export.h"
-#include "llvm/IR/LegacyPassManager.h"
-#include "llvm/Target/TargetMachine.h"
-
+#include "llvm/ADT/ScopeExit.h"
#include "llvm/Bitcode/BitcodeWriter.h"
#include "llvm/Config/Targets.h"
+#include "llvm/IR/LegacyPassManager.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/FileUtilities.h"
#include "llvm/Support/FormatVariadic.h"
@@ -39,6 +37,7 @@
#include "llvm/Support/Program.h"
#include "llvm/Support/TargetSelect.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
#include <cstdint>
#include <cstdlib>
@@ -46,6 +45,96 @@
using namespace mlir;
using namespace mlir::xevm;
+#if MLIR_XEVM_OCLOC_LIB_AVAILABLE
+// Intel compute runtime includes libocloc in the distribution, but
+// <ocloc_api.h> isn't included. Hence forward declarations for the libocloc
+// shared-library APIs is needed. These replace the inclusion of <ocloc_api.h>
+// so that the header is not a build-time requirement; the symbols are resolved
+// at link/load time via the ocloc shared library.
+extern "C" {
+
+// Return code indicating successful ocloc compilation.
+// Matches the OCLOC_SUCCESS enumerator in the real header (value 0).
+enum OclocErrorCode : int { OCLOC_SUCCESS = 0 };
+
+// Drives an in-process ocloc compilation.
+/// Invokes ocloc API using C interface. Supported commands match
+/// the functionality of ocloc executable (check ocloc's "help"
+/// for reference : shared/offline_compiler/source/ocloc_api.cpp).
+///
+/// numArgs and argv params represent the command line.
+/// Remaining params represent I/O.
+/// Output params should be freed using oclocFreeOutput when
+/// no longer needed.
+/// List and names of outputs match outputs of ocloc executable.
+///
+/// \param numArgs is the number of arguments to pass to ocloc
+///
+/// \param argv is an array of arguments to be passed to ocloc
+///
+/// \param numSources is the number of in-memory representations
+/// of source files to be passed to ocloc
+///
+/// \param dataSources is an array of in-memory representations
+/// of source files to be passed to ocloc
+///
+/// \param lenSources is an array of sizes of in-memory representations
+/// of source files passed to ocloc as dataSources
+///
+/// \param nameSources is an array of names of in-memory representations
+/// of source files passed to ocloc as dataSources
+///
+/// \param numInputHeaders is the number of in-memory representations
+/// of header files to be passed to ocloc
+///
+/// \param dataInputHeaders is an array of in-memory representations
+/// of header files to be passed to ocloc
+///
+/// \param lenInputHeaders is an array of sizes of in-memory representations
+/// of header files passed to ocloc as dataInputHeaders
+///
+/// \param nameInputHeaders is an array of names of in-memory representations
+/// of header files passed to ocloc as dataInputHeaders
+///
+/// \param numOutputs returns the number of outputs
+///
+/// \param dataOutputs returns an array of in-memory representations
+/// of output files
+///
+/// \param lenOutputs returns an array of sizes of in-memory representations
+/// of output files
+///
+/// \param nameOutputs returns an array of names of in-memory representations
+/// of output files. Special name stdout.log describes output that contains
+/// messages generated by ocloc (e.g. compiler errors/warnings)
+///
+/// \returns 0 on success. Returns non-0 in case of failure.
+
+int oclocInvoke(unsigned numArgs, const char **argv, unsigned numSources,
+ const uint8_t **dataSources, const uint64_t *lenSources,
+ const char **nameSources, unsigned numHeaders,
+ const uint8_t **dataHeaders, const uint64_t *lenHeaders,
+ const char **nameHeaders, unsigned *numOutputs,
+ uint8_t ***dataOutputs, uint64_t **lenOutputs,
+ char ***nameOutputs);
+
+/// Frees results of oclocInvoke
+///
+/// \param numOutputs is number of outputs as returned by oclocInvoke
+///
+/// \param dataOutputs is array of outputs as returned by oclocInvoke
+///
+/// \param lenOutputs is array of sizes of outputs as returned by oclocInvoke
+///
+/// \param nameOutputs is array of names of outputs as returned by oclocInvoke
+///
+/// \returns 0 on success. Returns non-0 in case of failure.
+int oclocFreeOutput(unsigned *numOutputs, uint8_t ***dataOutputs,
+ uint64_t **lenOutputs, char ***nameOutputs);
+
+} // extern "C"
+#endif // MLIR_XEVM_OCLOC_LIB_AVAILABLE
+
namespace {
// XeVM implementation of the gpu:TargetAttrInterface.
class XeVMTargetAttrImpl
@@ -103,65 +192,160 @@ gpu::GPUModuleOp SerializeGPUModuleBase::getGPUModuleOp() {
return dyn_cast<gpu::GPUModuleOp>(&SerializeGPUModuleBase::getOperation());
}
-// There is 1 way to finalize IL to native code: IGC
-// There are 2 ways to access IGC: AOT (ocloc) and JIT (L0 runtime).
-// - L0 runtime consumes IL and is external to MLIR codebase (rt wrappers).
-// - `ocloc` tool can be "queried" from within MLIR.
-FailureOr<SmallVector<char, 0>> SerializeGPUModuleBase::compileToBinary(
- StringRef asmStr, StringRef inputFormat = "-spirv_input") {
+// ----------------------------------------------------------------------------
+// compile via the ocloc shared-library API (in-process, no temp files). Only
+// compiled when the library is available at build time.
+// ----------------------------------------------------------------------------
+#if MLIR_XEVM_OCLOC_LIB_AVAILABLE
+FailureOr<SmallVector<char, 0>>
+SerializeGPUModuleBase::compileToBinaryViaLibocloc(StringRef asmStr,
+ StringRef inputFormat) {
+ Location loc = getGPUModuleOp().getLoc();
+ std::string asmFname = llvm::formatv(
+ "mlir-{0}-{1}-{2}.asm", getGPUModuleOp().getNameAttr().getValue(),
+ getTarget().getTriple(), getTarget().getChip());
+
+ // Build command-line options.
+ std::pair<llvm::BumpPtrAllocator, SmallVector<const char *>> cmdOpts =
+ targetOptions.tokenizeCmdOptions();
+ // Example: --gpu-module-to-binary="opts='opt1 opt2'"
+ const std::string cmdOptsStr = "\"" + llvm::join(cmdOpts.second, " ") + "\"";
+ std::vector<std::string> oclocArgs = {"ocloc",
+ "compile",
+ "-file",
+ asmFname,
+ inputFormat.str(),
+ "-device",
+ getTarget().getChip().str(),
+ "-options",
+ cmdOptsStr};
+
+// Dump tool invocation commands.
+#define DEBUG_TYPE "serialize-to-binary"
+ LLVM_DEBUG({
+ llvm::dbgs() << "libocloc invocation for module: "
+ << getGPUModuleOp().getNameAttr() << "\n";
+ llvm::interleave(oclocArgs, llvm::dbgs(), " ");
+ llvm::dbgs() << "\n";
+ });
+#undef DEBUG_TYPE
+
+ // Build a plain argv array expected by oclocInvoke.
+ std::vector<const char *> argv;
+ for (const auto &str : oclocArgs)
+ argv.push_back(str.c_str());
+
+ // Wire up in-memory source file.
+ const uint8_t *dataSources[1] = {
+ reinterpret_cast<const uint8_t *>(asmStr.data())};
+ const uint64_t lenSources[1] = {asmStr.size()};
+ const char *nameSources[1] = {asmFname.c_str()};
+
+ uint32_t outputsNum = 0;
+ uint8_t **outputs = nullptr;
+ uint64_t *outputLengths = nullptr;
+ char **outputNames = nullptr;
+ // Ensure ocloc output buffers are always freed on exit.
+ auto freeOutputs = llvm::scope_exit([&]() {
+ oclocFreeOutput(&outputsNum, &outputs, &outputLengths, &outputNames);
+ });
+
+ int err = oclocInvoke(static_cast<uint32_t>(argv.size()), argv.data(),
+ /*numSources=*/1, dataSources, lenSources, nameSources,
+ /*numHeaders=*/0, nullptr, nullptr, nullptr,
+ &outputsNum, &outputs, &outputLengths, &outputNames);
+
+ if (err != OCLOC_SUCCESS) {
+ emitError(loc) << "libocloc: `oclocInvoke` failed, error code: " << err;
+ // Emit any compiler log that ocloc produced.
+ for (uint32_t i = 0; i < outputsNum; ++i) {
+ if (llvm::StringRef(outputNames[i]).ends_with(".log"))
+ emitError(loc) << "Compiler log:\n"
+ << llvm::StringRef(reinterpret_cast<char *>(outputs[i]),
+ outputLengths[i]);
+ }
+ return failure();
+ }
+
+ // Find and return the .bin output.
+ for (uint32_t i = 0; i < outputsNum; ++i) {
+ if (llvm::StringRef(outputNames[i]).ends_with(".bin")) {
+ char *begin = reinterpret_cast<char *>(outputs[i]);
+ return SmallVector<char, 0>(begin, begin + outputLengths[i]);
+ }
+ }
+ return emitError(loc) << "`oclocInvoke` did not produce `.bin` output";
+}
+#endif // MLIR_XEVM_OCLOC_LIB_AVAILABLE
+
+// ----------------------------------------------------------------------------
+// Compile by spawning the `ocloc` command-line tool as a process,
+// communicating through temporary files. Acts as a fallback when the shared
+// library is not available.
+// ----------------------------------------------------------------------------
+FailureOr<SmallVector<char, 0>>
+SerializeGPUModuleBase::compileToBinaryViaOclocTool(StringRef asmStr,
+ StringRef inputFormat) {
using TmpFile = std::pair<llvm::SmallString<128>, llvm::FileRemover>;
- // Find the `ocloc` tool.
- std::optional<std::string> oclocCompiler = findTool("ocloc");
- if (!oclocCompiler)
+
+ // Locate the `ocloc` executable on PATH.
+ std::optional<std::string> oclocPath = findTool("ocloc");
+ if (!oclocPath) {
+ emitError(getGPUModuleOp().getLoc()) << "Could not find `ocloc` on PATH";
return failure();
+ }
+
Location loc = getGPUModuleOp().getLoc();
std::string basename = llvm::formatv(
"mlir-{0}-{1}-{2}", getGPUModuleOp().getNameAttr().getValue(),
getTarget().getTriple(), getTarget().getChip());
+ // Helper: create a named temporary file, returning path + auto-remover.
auto createTemp = [&](StringRef name,
- StringRef suffix) -> FailureOr<TmpFile> {
- llvm::SmallString<128> filePath;
- if (auto ec = llvm::sys::fs::createTemporaryFile(name, suffix, filePath))
- return getGPUModuleOp().emitError()
- << "Couldn't create the temp file: `" << filePath
- << "`, error message: " << ec.message();
-
- return TmpFile(filePath, llvm::FileRemover(filePath.c_str()));
+ StringRef suffix) -> std::optional<TmpFile> {
+ llvm::SmallString<128> path;
+ if (auto ec = llvm::sys::fs::createTemporaryFile(name, suffix, path)) {
+ emitError(loc) << "Couldn't create temp file `" << path
+ << "`: " << ec.message();
+ return std::nullopt;
+ }
+ return TmpFile(path, llvm::FileRemover(path.c_str()));
};
- // Create temp file
- FailureOr<TmpFile> asmFile = createTemp(basename, "asm");
- FailureOr<TmpFile> binFile = createTemp(basename, "");
- FailureOr<TmpFile> logFile = createTemp(basename, "log");
- if (failed(logFile) || failed(asmFile) || failed(binFile))
+
+ std::optional<TmpFile> asmFile = createTemp(basename, "asm");
+ std::optional<TmpFile> binFile = createTemp(basename, "");
+ std::optional<TmpFile> logFile = createTemp(basename, "log");
+ if (!asmFile || !binFile || !logFile)
return failure();
- // Dump the assembly to a temp file
- std::error_code ec;
+
+ // Write the assembly source to a temp file.
{
+ std::error_code ec;
llvm::raw_fd_ostream asmStream(asmFile->first, ec);
- if (ec)
- return emitError(loc) << "Couldn't open the file: `" << asmFile->first
- << "`, error message: " << ec.message();
-
+ if (ec) {
+ emitError(loc) << "Couldn't open `" << asmFile->first
+ << "`: " << ec.message();
+ return failure();
+ }
asmStream << asmStr;
- if (asmStream.has_error())
- return emitError(loc)
- << "An error occurred while writing the assembly to: `"
- << asmFile->first << "`.";
-
+ if (asmStream.has_error()) {
+ emitError(loc) << "Error writing assembly to `" << asmFile->first << "`";
+ return failure();
+ }
asmStream.flush();
}
- // Set cmd options
+
+ // Build command-line options.
std::pair<llvm::BumpPtrAllocator, SmallVector<const char *>> cmdOpts =
targetOptions.tokenizeCmdOptions();
- // Example: --gpu-module-to-binary="opts='opt1 opt2'"
const std::string cmdOptsStr = "\"" + llvm::join(cmdOpts.second, " ") + "\"";
+
SmallVector<StringRef, 12> oclocArgs(
{"ocloc", "compile", "-file", asmFile->first, inputFormat, "-device",
getTarget().getChip(), "-output", binFile->first, "-output_no_suffix",
"-options", cmdOptsStr});
-// Dump tool invocation commands.
+ // Dump tool invocation commands.
#define DEBUG_TYPE "serialize-to-binary"
LLVM_DEBUG({
llvm::dbgs() << "Tool invocation for module: "
@@ -170,43 +354,52 @@ FailureOr<SmallVector<char, 0>> SerializeGPUModuleBase::compileToBinary(
llvm::dbgs() << "\n";
});
#undef DEBUG_TYPE
- // Helper function for printing tool error logs.
- std::string message;
- auto emitLogError =
- [&](StringRef toolName) -> FailureOr<SmallVector<char, 0>> {
- if (message.empty()) {
- llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> toolStderr =
- llvm::MemoryBuffer::getFile(logFile->first);
- if (toolStderr)
- return emitError(loc) << toolName << " invocation failed. Log:\n"
- << toolStderr->get()->getBuffer();
- else
- return emitError(loc) << toolName << " invocation failed.";
+
+ // Redirect stdout/stderr to the log temp file.
+ std::optional<StringRef> redirects[] = {std::nullopt, logFile->first,
+ logFile->first};
+
+ std::string errorMsg;
+ if (llvm::sys::ExecuteAndWait(*oclocPath, oclocArgs, std::nullopt, redirects,
+ 0, 0, &errorMsg)) {
+ // Prefer a structured error message; otherwise dump the log file.
+ if (!errorMsg.empty()) {
+ emitError(loc) << "`ocloc` invocation failed: " << errorMsg;
+ } else if (auto log = llvm::MemoryBuffer::getFile(logFile->first)) {
+ emitError(loc) << "`ocloc` invocation failed. Log:\n"
+ << (*log)->getBuffer();
+ } else {
+ emitError(loc) << "`ocloc` invocation failed (no log available)";
}
- return emitError(loc) << toolName
- << " invocation failed, error message: " << message;
- };
- std::optional<StringRef> redirects[] = {
- std::nullopt,
- logFile->first,
- logFile->first,
- };
- // Invoke ocloc.
- if (llvm::sys::ExecuteAndWait(oclocCompiler.value(), oclocArgs, std::nullopt,
- redirects, 0, 0, &message))
- return emitLogError("`ocloc`");
- binFile->first.append(".bin");
- llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> binaryBuffer =
- llvm::MemoryBuffer::getFile(binFile->first);
- if (!binaryBuffer)
- return emitError(loc) << "Couldn't open the file: `" << binFile->first
- << "`, error message: "
- << binaryBuffer.getError().message();
+ return failure();
+ }
+ // Read back the binary output (ocloc appends ".bin" to the base name).
+ binFile->first.append(".bin");
+ auto binaryBuffer = llvm::MemoryBuffer::getFile(binFile->first);
+ if (!binaryBuffer) {
+ emitError(loc) << "Couldn't open binary output `" << binFile->first
+ << "`: " << binaryBuffer.getError().message();
+ return failure();
+ }
StringRef bin = (*binaryBuffer)->getBuffer();
return SmallVector<char, 0>(bin.begin(), bin.end());
}
+// ----------------------------------------------------------------------------
+// Public entry-point: prefer the in-process library path; fall back to the
+// external tool when the library is not available.
+// ----------------------------------------------------------------------------
+FailureOr<SmallVector<char, 0>>
+SerializeGPUModuleBase::compileToBinary(StringRef asmStr,
+ StringRef inputFormat) {
+#if MLIR_XEVM_OCLOC_LIB_AVAILABLE
+ return compileToBinaryViaLibocloc(asmStr, inputFormat);
+#else
+ return compileToBinaryViaOclocTool(asmStr, inputFormat);
+#endif
+}
+
std::optional<std::string> SerializeGPUModuleBase::findTool(StringRef tool) {
// 1. Check the toolkit path given in the command line.
StringRef pathRef = targetOptions.getToolkitPath();
@@ -228,7 +421,6 @@ std::optional<std::string> SerializeGPUModuleBase::findTool(StringRef tool) {
"path via GpuModuleToBinaryPass or add the compiler to $PATH`.";
return std::nullopt;
}
-
namespace {
class SPIRVSerializer : public SerializeGPUModuleBase {
public:
@@ -286,6 +478,10 @@ SPIRVTranslateModule(Module *M, std::string &SpirvObj, std::string &ErrMsg,
} // namespace llvm
#endif
+// There is 1 way to finalize IL to native code: IGC
+// There are 2 ways to access IGC: AOT (ocloc) and JIT (L0 runtime).
+// - L0 runtime consumes IL and is external to MLIR codebase (rt wrappers).
+// - `ocloc` tool can be "queried" from within MLIR.
FailureOr<SmallVector<char, 0>>
SPIRVSerializer::moduleToObject(llvm::Module &llvmModule) {
#define DEBUG_TYPE "serialize-to-llvm"
More information about the Mlir-commits
mailing list