[Mlir-commits] [mlir] 7b4c9bb - [mlir][XeVM] Use libocloc API for binary generation. (#188353)

llvmlistbot at llvm.org llvmlistbot at llvm.org
Mon Apr 13 08:44:30 PDT 2026


Author: Md Abdullah Shahneous Bari
Date: 2026-04-13T10:44:26-05:00
New Revision: 7b4c9bb2069536e0df18597795b858e18a1cbaaf

URL: https://github.com/llvm/llvm-project/commit/7b4c9bb2069536e0df18597795b858e18a1cbaaf
DIFF: https://github.com/llvm/llvm-project/commit/7b4c9bb2069536e0df18597795b858e18a1cbaaf.diff

LOG: [mlir][XeVM] Use libocloc API for binary generation. (#188353)

This PR improves native binary generation by avoiding
`llvm::sys::ExecuteAndWait` call for ocloc and instead
leveraging `oclocInvoke()` that consumes an in-memory SPIR-V string.

Co-authored-by: Artem Kroviakov <artem.kroviakov at intel.com>

Added: 
    mlir/cmake/modules/Findocloc.cmake

Modified: 
    mlir/include/mlir/Target/LLVM/XeVM/Utils.h
    mlir/lib/Target/LLVM/CMakeLists.txt
    mlir/lib/Target/LLVM/XeVM/Target.cpp

Removed: 
    


################################################################################
diff  --git a/mlir/cmake/modules/Findocloc.cmake b/mlir/cmake/modules/Findocloc.cmake
new file mode 100644
index 0000000000000..5b433ff1fe5e4
--- /dev/null
+++ b/mlir/cmake/modules/Findocloc.cmake
@@ -0,0 +1,76 @@
+# Findocloc.cmake
+
+function(find_ocloc)
+    message(STATUS "Searching for ocloc")
+
+    if(WIN32)
+        if(CMAKE_SIZEOF_VOID_P EQUAL 8)
+            set(OCLOC_SUFFIX "64")
+        else()
+            set(OCLOC_SUFFIX "32")
+        endif()
+
+        set(OCLOC_EXE_PATHS "${OCLOC_PACKAGE_DIR}" "${OCLOC_PACKAGE_DIR}/bin")
+        set(OCLOC_LIB_PATHS "${OCLOC_PACKAGE_DIR}" "${OCLOC_PACKAGE_DIR}/lib")
+    else()
+        set(OCLOC_SUFFIX "")
+
+        set(OCLOC_EXE_PATHS "${OCLOC_PACKAGE_DIR}/bin")
+        set(OCLOC_LIB_PATHS "${OCLOC_PACKAGE_DIR}/lib")
+
+        if(CMAKE_SIZEOF_VOID_P EQUAL 8)
+            list(APPEND OCLOC_LIB_PATHS
+                "${OCLOC_PACKAGE_DIR}/lib64"
+                "${OCLOC_PACKAGE_DIR}/lib/x86_64-linux-gnu")
+        endif()
+    endif()
+
+    # Search for ocloc executable
+    find_program(OCLOC_EXECUTABLE NAMES "ocloc" "ocloc${OCLOC_SUFFIX}"
+        PATHS ${OCLOC_EXE_PATHS} NO_DEFAULT_PATH)
+    find_program(OCLOC_EXECUTABLE NAMES "ocloc" "ocloc${OCLOC_SUFFIX}"
+        PATHS ${OCLOC_EXE_PATHS})
+
+    # Search for ocloc library
+    find_library(OCLOC_LIBRARY NAMES "ocloc${OCLOC_SUFFIX}"
+        PATHS ${OCLOC_LIB_PATHS} NO_DEFAULT_PATH)
+    find_library(OCLOC_LIBRARY NAMES "ocloc${OCLOC_SUFFIX}"
+        PATHS ${OCLOC_LIB_PATHS})
+
+    # Check if all components are found
+    if(OCLOC_EXECUTABLE AND OCLOC_LIBRARY)
+        set(OCLOC_FOUND TRUE)
+    else()
+        set(OCLOC_FOUND FALSE)
+    endif()
+
+    # Provide the results to the user
+    if(OCLOC_FOUND)
+        message(STATUS "Found ocloc executable: ${OCLOC_EXECUTABLE}")
+        message(STATUS "Found ocloc library: ${OCLOC_LIBRARY}")
+    else()
+        message(STATUS "ocloc not found")
+    endif()
+
+    # Set the variables for the user
+    set(OCLOC_EXECUTABLE ${OCLOC_EXECUTABLE} CACHE FILEPATH "Path to ocloc executable")
+    set(OCLOC_LIBRARY ${OCLOC_LIBRARY} CACHE FILEPATH "Path to ocloc library")
+    set(OCLOC_FOUND ${OCLOC_FOUND} CACHE BOOL "ocloc found")
+
+    # Only create imported targets when the underlying artifact was actually
+    # found, to avoid CMake errors from targets with empty IMPORTED_LOCATION.
+    if(OCLOC_EXECUTABLE)
+        add_executable(ocloc IMPORTED)
+        set_property(TARGET ocloc PROPERTY IMPORTED_LOCATION "${OCLOC_EXECUTABLE}")
+    endif()
+
+    if(OCLOC_LIBRARY)
+        add_library(libocloc SHARED IMPORTED)
+        set_target_properties(libocloc PROPERTIES
+            IMPORTED_LOCATION "${OCLOC_LIBRARY}")
+    endif()
+endfunction()
+
+# Call the function to find ocloc
+find_ocloc()
+

diff  --git a/mlir/include/mlir/Target/LLVM/XeVM/Utils.h b/mlir/include/mlir/Target/LLVM/XeVM/Utils.h
index 455c8303a9aa8..b5fa0349839ee 100644
--- a/mlir/include/mlir/Target/LLVM/XeVM/Utils.h
+++ b/mlir/include/mlir/Target/LLVM/XeVM/Utils.h
@@ -39,7 +39,7 @@ class SerializeGPUModuleBase : public LLVM::ModuleToObject {
   /// Returns the gpu module being serialized.
   gpu::GPUModuleOp getGPUModuleOp();
 
-  /// Compiles to native code using `ocloc`.
+  /// Compiles to native code using `ocloc` (API or tool).
   FailureOr<SmallVector<char, 0>> compileToBinary(StringRef asmStr,
                                                   StringRef inputFormat);
 
@@ -54,6 +54,18 @@ class SerializeGPUModuleBase : public LLVM::ModuleToObject {
   /// Returns the path to the tool used for serialization.
   std::optional<std::string> findTool(StringRef tool);
 
+  /// Compiles to native code using the `ocloc` command-line tool, communicating
+  /// through temporary files.
+  FailureOr<SmallVector<char, 0>>
+  compileToBinaryViaOclocTool(StringRef asmStr, StringRef inputFormat);
+
+  /// Compiles to native code using the `ocloc` shared library API, in-process,
+  /// without temporary files. Only available when the library is linked in.
+#if MLIR_XEVM_OCLOC_LIB_AVAILABLE
+  FailureOr<SmallVector<char, 0>>
+  compileToBinaryViaLibocloc(StringRef asmStr, StringRef inputFormat);
+#endif // MLIR_XEVM_OCLOC_LIB_AVAILABLE
+
   /// GPU compilation target options.
   gpu::TargetOptions targetOptions;
 };

diff  --git a/mlir/lib/Target/LLVM/CMakeLists.txt b/mlir/lib/Target/LLVM/CMakeLists.txt
index 94660e231888b..a6f1436e1d3a9 100644
--- a/mlir/lib/Target/LLVM/CMakeLists.txt
+++ b/mlir/lib/Target/LLVM/CMakeLists.txt
@@ -83,7 +83,7 @@ if ("NVPTX" IN_LIST LLVM_TARGETS_TO_BUILD)
       add_library(MLIR_NVPTXCOMPILER_LIB STATIC IMPORTED GLOBAL)
       # Downstream projects can modify this path and use it in CMake. For example:
       # add_library(MLIR_NVPTXCOMPILER_LIB STATIC IMPORTED GLOBAL)
-      # set_property(TARGET MLIR_NVPTXCOMPILER_LIB PROPERTY IMPORTED_LOCATION ${...})  
+      # set_property(TARGET MLIR_NVPTXCOMPILER_LIB PROPERTY IMPORTED_LOCATION ${...})
       # where `...` is to be replaced with the path to the library.
       set_property(TARGET MLIR_NVPTXCOMPILER_LIB PROPERTY IMPORTED_LOCATION ${MLIR_NVPTXCOMPILER_LIB_PATH})
       # Link against `nvptxcompiler_static`. TODO: use `CUDA::nvptxcompiler_static`.
@@ -101,7 +101,7 @@ if ("NVPTX" IN_LIST LLVM_TARGETS_TO_BUILD)
       endif()
 
       add_library(MLIR_NVFATBIN_LIB STATIC IMPORTED GLOBAL)
-      set_property(TARGET MLIR_NVFATBIN_LIB PROPERTY IMPORTED_LOCATION ${MLIR_NVFATBIN_LIB_PATH})  
+      set_property(TARGET MLIR_NVFATBIN_LIB PROPERTY IMPORTED_LOCATION ${MLIR_NVFATBIN_LIB_PATH})
       target_link_libraries(MLIRNVVMTarget PRIVATE MLIR_NVFATBIN_LIB)
     endif()
   else()
@@ -141,7 +141,7 @@ if (MLIR_NVVM_EMBED_LIBDEVICE)
               "Requested using the `nvptxcompiler` library backend but it couldn't be found.")
     endif()
   endif()
-  
+
   embed_binary_to_src(${MLIR_NVVM_LIBDEVICE_PATH} ${CMAKE_CURRENT_BINARY_DIR}/libdevice_embedded.c _mlir_embedded_libdevice)
   add_mlir_library(MLIRNVVMLibdevice
     ${CMAKE_CURRENT_BINARY_DIR}/libdevice_embedded.c
@@ -234,3 +234,20 @@ add_mlir_dialect_library(MLIRXeVMTarget
   MLIRTargetLLVM
   MLIRXeVMToLLVMIRTranslation
 )
+
+find_package(ocloc)
+if (OCLOC_FOUND)
+  target_include_directories(MLIRXeVMTarget PRIVATE "${OCLOC_INCLUDE_DIR}")
+  target_link_libraries(MLIRXeVMTarget PRIVATE "${OCLOC_LIBRARY}")
+  target_compile_definitions(obj.MLIRXeVMTarget
+    PRIVATE
+    MLIR_XEVM_OCLOC_LIB_AVAILABLE=1
+  )
+else()
+  target_compile_definitions(obj.MLIRXeVMTarget
+    PRIVATE
+    MLIR_XEVM_OCLOC_LIB_AVAILABLE=0
+  )
+  message(WARNING "ocloc not found, MLIRXeVMTarget will not be able to use ocloc for native binary compilation.")
+endif()
+

diff  --git a/mlir/lib/Target/LLVM/XeVM/Target.cpp b/mlir/lib/Target/LLVM/XeVM/Target.cpp
index f8f09258fd861..28c654f3d7c9a 100644
--- a/mlir/lib/Target/LLVM/XeVM/Target.cpp
+++ b/mlir/lib/Target/LLVM/XeVM/Target.cpp
@@ -12,7 +12,6 @@
 //===----------------------------------------------------------------------===//
 
 #include "mlir/Target/LLVM/XeVM/Target.h"
-
 #include "mlir/Dialect/GPU/IR/CompilationInterfaces.h"
 #include "mlir/Dialect/GPU/IR/GPUDialect.h"
 #include "mlir/Dialect/LLVMIR/XeVMDialect.h"
@@ -25,11 +24,10 @@
 #include "mlir/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.h"
 #include "mlir/Target/LLVMIR/Dialect/XeVM/XeVMToLLVMIRTranslation.h"
 #include "mlir/Target/LLVMIR/Export.h"
-#include "llvm/IR/LegacyPassManager.h"
-#include "llvm/Target/TargetMachine.h"
-
+#include "llvm/ADT/ScopeExit.h"
 #include "llvm/Bitcode/BitcodeWriter.h"
 #include "llvm/Config/Targets.h"
+#include "llvm/IR/LegacyPassManager.h"
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/FileUtilities.h"
 #include "llvm/Support/FormatVariadic.h"
@@ -39,6 +37,7 @@
 #include "llvm/Support/Program.h"
 #include "llvm/Support/TargetSelect.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
 
 #include <cstdint>
 #include <cstdlib>
@@ -46,6 +45,96 @@
 using namespace mlir;
 using namespace mlir::xevm;
 
+#if MLIR_XEVM_OCLOC_LIB_AVAILABLE
+// Intel compute runtime includes libocloc in the distribution, but
+// <ocloc_api.h> isn't included. Hence forward declarations for the libocloc
+// shared-library APIs is needed. These replace the inclusion of <ocloc_api.h>
+// so that the header is not a build-time requirement; the symbols are resolved
+// at link/load time via the ocloc shared library.
+extern "C" {
+
+// Return code indicating successful ocloc compilation.
+// Matches the OCLOC_SUCCESS enumerator in the real header (value 0).
+enum OclocErrorCode : int { OCLOC_SUCCESS = 0 };
+
+// Drives an in-process ocloc compilation.
+/// Invokes ocloc API using C interface. Supported commands match
+/// the functionality of ocloc executable (check ocloc's "help"
+/// for reference : shared/offline_compiler/source/ocloc_api.cpp).
+///
+/// numArgs and argv params represent the command line.
+/// Remaining params represent I/O.
+/// Output params should be freed using oclocFreeOutput when
+/// no longer needed.
+/// List and names of outputs match outputs of ocloc executable.
+///
+/// \param numArgs is the number of arguments to pass to ocloc
+///
+/// \param argv is an array of arguments to be passed to ocloc
+///
+/// \param numSources is the number of in-memory representations
+/// of source files to be passed to ocloc
+///
+/// \param dataSources is an array of in-memory representations
+/// of source files to be passed to ocloc
+///
+/// \param lenSources is an array of sizes of in-memory representations
+/// of source files passed to ocloc as dataSources
+///
+/// \param nameSources is an array of names of in-memory representations
+/// of source files passed to ocloc as dataSources
+///
+/// \param numInputHeaders is the number of in-memory representations
+/// of header files to be passed to ocloc
+///
+/// \param dataInputHeaders is an array of in-memory representations
+/// of header files to be passed to ocloc
+///
+/// \param lenInputHeaders is an array of sizes of in-memory representations
+/// of header files passed to ocloc as dataInputHeaders
+///
+/// \param nameInputHeaders is an array of names of in-memory representations
+/// of header files passed to ocloc as dataInputHeaders
+///
+/// \param numOutputs returns the number of outputs
+///
+/// \param dataOutputs returns an array of in-memory representations
+/// of output files
+///
+/// \param lenOutputs returns an array of sizes of in-memory representations
+/// of output files
+///
+/// \param nameOutputs returns an array of names of in-memory representations
+/// of output files. Special name stdout.log describes output that contains
+/// messages generated by ocloc (e.g. compiler errors/warnings)
+///
+/// \returns 0 on success. Returns non-0 in case of failure.
+
+int oclocInvoke(unsigned numArgs, const char **argv, unsigned numSources,
+                const uint8_t **dataSources, const uint64_t *lenSources,
+                const char **nameSources, unsigned numHeaders,
+                const uint8_t **dataHeaders, const uint64_t *lenHeaders,
+                const char **nameHeaders, unsigned *numOutputs,
+                uint8_t ***dataOutputs, uint64_t **lenOutputs,
+                char ***nameOutputs);
+
+/// Frees results of oclocInvoke
+///
+/// \param numOutputs is number of outputs as returned by oclocInvoke
+///
+/// \param dataOutputs is array of outputs as returned by oclocInvoke
+///
+/// \param lenOutputs is array of sizes of outputs as returned by oclocInvoke
+///
+/// \param nameOutputs is array of names of outputs as returned by oclocInvoke
+///
+/// \returns 0 on success. Returns non-0 in case of failure.
+int oclocFreeOutput(unsigned *numOutputs, uint8_t ***dataOutputs,
+                    uint64_t **lenOutputs, char ***nameOutputs);
+
+} // extern "C"
+#endif // MLIR_XEVM_OCLOC_LIB_AVAILABLE
+
 namespace {
 // XeVM implementation of the gpu:TargetAttrInterface.
 class XeVMTargetAttrImpl
@@ -103,65 +192,160 @@ gpu::GPUModuleOp SerializeGPUModuleBase::getGPUModuleOp() {
   return dyn_cast<gpu::GPUModuleOp>(&SerializeGPUModuleBase::getOperation());
 }
 
-// There is 1 way to finalize IL to native code: IGC
-// There are 2 ways to access IGC: AOT (ocloc) and JIT (L0 runtime).
-// - L0 runtime consumes IL and is external to MLIR codebase (rt wrappers).
-// - `ocloc` tool can be "queried" from within MLIR.
-FailureOr<SmallVector<char, 0>> SerializeGPUModuleBase::compileToBinary(
-    StringRef asmStr, StringRef inputFormat = "-spirv_input") {
+// ----------------------------------------------------------------------------
+// compile via the ocloc shared-library API (in-process, no temp files).  Only
+// compiled when the library is available at build time.
+// ----------------------------------------------------------------------------
+#if MLIR_XEVM_OCLOC_LIB_AVAILABLE
+FailureOr<SmallVector<char, 0>>
+SerializeGPUModuleBase::compileToBinaryViaLibocloc(StringRef asmStr,
+                                                   StringRef inputFormat) {
+  Location loc = getGPUModuleOp().getLoc();
+  std::string asmFname = llvm::formatv(
+      "mlir-{0}-{1}-{2}.asm", getGPUModuleOp().getNameAttr().getValue(),
+      getTarget().getTriple(), getTarget().getChip());
+
+  // Build command-line options.
+  std::pair<llvm::BumpPtrAllocator, SmallVector<const char *>> cmdOpts =
+      targetOptions.tokenizeCmdOptions();
+  // Example: --gpu-module-to-binary="opts='opt1 opt2'"
+  const std::string cmdOptsStr = "\"" + llvm::join(cmdOpts.second, " ") + "\"";
+  std::vector<std::string> oclocArgs = {"ocloc",
+                                        "compile",
+                                        "-file",
+                                        asmFname,
+                                        inputFormat.str(),
+                                        "-device",
+                                        getTarget().getChip().str(),
+                                        "-options",
+                                        cmdOptsStr};
+
+// Dump tool invocation commands.
+#define DEBUG_TYPE "serialize-to-binary"
+  LLVM_DEBUG({
+    llvm::dbgs() << "libocloc invocation for module: "
+                 << getGPUModuleOp().getNameAttr() << "\n";
+    llvm::interleave(oclocArgs, llvm::dbgs(), " ");
+    llvm::dbgs() << "\n";
+  });
+#undef DEBUG_TYPE
+
+  // Build a plain argv array expected by oclocInvoke.
+  std::vector<const char *> argv;
+  for (const auto &str : oclocArgs)
+    argv.push_back(str.c_str());
+
+  // Wire up in-memory source file.
+  const uint8_t *dataSources[1] = {
+      reinterpret_cast<const uint8_t *>(asmStr.data())};
+  const uint64_t lenSources[1] = {asmStr.size()};
+  const char *nameSources[1] = {asmFname.c_str()};
+
+  uint32_t outputsNum = 0;
+  uint8_t **outputs = nullptr;
+  uint64_t *outputLengths = nullptr;
+  char **outputNames = nullptr;
+  // Ensure ocloc output buffers are always freed on exit.
+  auto freeOutputs = llvm::scope_exit([&]() {
+    oclocFreeOutput(&outputsNum, &outputs, &outputLengths, &outputNames);
+  });
+
+  int err = oclocInvoke(static_cast<uint32_t>(argv.size()), argv.data(),
+                        /*numSources=*/1, dataSources, lenSources, nameSources,
+                        /*numHeaders=*/0, nullptr, nullptr, nullptr,
+                        &outputsNum, &outputs, &outputLengths, &outputNames);
+
+  if (err != OCLOC_SUCCESS) {
+    emitError(loc) << "libocloc: `oclocInvoke` failed, error code: " << err;
+    // Emit any compiler log that ocloc produced.
+    for (uint32_t i = 0; i < outputsNum; ++i) {
+      if (llvm::StringRef(outputNames[i]).ends_with(".log"))
+        emitError(loc) << "Compiler log:\n"
+                       << llvm::StringRef(reinterpret_cast<char *>(outputs[i]),
+                                          outputLengths[i]);
+    }
+    return failure();
+  }
+
+  // Find and return the .bin output.
+  for (uint32_t i = 0; i < outputsNum; ++i) {
+    if (llvm::StringRef(outputNames[i]).ends_with(".bin")) {
+      char *begin = reinterpret_cast<char *>(outputs[i]);
+      return SmallVector<char, 0>(begin, begin + outputLengths[i]);
+    }
+  }
+  return emitError(loc) << "`oclocInvoke` did not produce `.bin` output";
+}
+#endif // MLIR_XEVM_OCLOC_LIB_AVAILABLE
+
+// ----------------------------------------------------------------------------
+// Compile by spawning the `ocloc` command-line tool as a process,
+// communicating through temporary files.  Acts as a fallback when the shared
+// library is not available.
+// ----------------------------------------------------------------------------
+FailureOr<SmallVector<char, 0>>
+SerializeGPUModuleBase::compileToBinaryViaOclocTool(StringRef asmStr,
+                                                    StringRef inputFormat) {
   using TmpFile = std::pair<llvm::SmallString<128>, llvm::FileRemover>;
-  // Find the `ocloc` tool.
-  std::optional<std::string> oclocCompiler = findTool("ocloc");
-  if (!oclocCompiler)
+
+  // Locate the `ocloc` executable on PATH.
+  std::optional<std::string> oclocPath = findTool("ocloc");
+  if (!oclocPath) {
+    emitError(getGPUModuleOp().getLoc()) << "Could not find `ocloc` on PATH";
     return failure();
+  }
+
   Location loc = getGPUModuleOp().getLoc();
   std::string basename = llvm::formatv(
       "mlir-{0}-{1}-{2}", getGPUModuleOp().getNameAttr().getValue(),
       getTarget().getTriple(), getTarget().getChip());
 
+  // Helper: create a named temporary file, returning path + auto-remover.
   auto createTemp = [&](StringRef name,
-                        StringRef suffix) -> FailureOr<TmpFile> {
-    llvm::SmallString<128> filePath;
-    if (auto ec = llvm::sys::fs::createTemporaryFile(name, suffix, filePath))
-      return getGPUModuleOp().emitError()
-             << "Couldn't create the temp file: `" << filePath
-             << "`, error message: " << ec.message();
-
-    return TmpFile(filePath, llvm::FileRemover(filePath.c_str()));
+                        StringRef suffix) -> std::optional<TmpFile> {
+    llvm::SmallString<128> path;
+    if (auto ec = llvm::sys::fs::createTemporaryFile(name, suffix, path)) {
+      emitError(loc) << "Couldn't create temp file `" << path
+                     << "`: " << ec.message();
+      return std::nullopt;
+    }
+    return TmpFile(path, llvm::FileRemover(path.c_str()));
   };
-  // Create temp file
-  FailureOr<TmpFile> asmFile = createTemp(basename, "asm");
-  FailureOr<TmpFile> binFile = createTemp(basename, "");
-  FailureOr<TmpFile> logFile = createTemp(basename, "log");
-  if (failed(logFile) || failed(asmFile) || failed(binFile))
+
+  std::optional<TmpFile> asmFile = createTemp(basename, "asm");
+  std::optional<TmpFile> binFile = createTemp(basename, "");
+  std::optional<TmpFile> logFile = createTemp(basename, "log");
+  if (!asmFile || !binFile || !logFile)
     return failure();
-  // Dump the assembly to a temp file
-  std::error_code ec;
+
+  // Write the assembly source to a temp file.
   {
+    std::error_code ec;
     llvm::raw_fd_ostream asmStream(asmFile->first, ec);
-    if (ec)
-      return emitError(loc) << "Couldn't open the file: `" << asmFile->first
-                            << "`, error message: " << ec.message();
-
+    if (ec) {
+      emitError(loc) << "Couldn't open `" << asmFile->first
+                     << "`: " << ec.message();
+      return failure();
+    }
     asmStream << asmStr;
-    if (asmStream.has_error())
-      return emitError(loc)
-             << "An error occurred while writing the assembly to: `"
-             << asmFile->first << "`.";
-
+    if (asmStream.has_error()) {
+      emitError(loc) << "Error writing assembly to `" << asmFile->first << "`";
+      return failure();
+    }
     asmStream.flush();
   }
-  // Set cmd options
+
+  // Build command-line options.
   std::pair<llvm::BumpPtrAllocator, SmallVector<const char *>> cmdOpts =
       targetOptions.tokenizeCmdOptions();
-  // Example: --gpu-module-to-binary="opts='opt1 opt2'"
   const std::string cmdOptsStr = "\"" + llvm::join(cmdOpts.second, " ") + "\"";
+
   SmallVector<StringRef, 12> oclocArgs(
       {"ocloc", "compile", "-file", asmFile->first, inputFormat, "-device",
        getTarget().getChip(), "-output", binFile->first, "-output_no_suffix",
        "-options", cmdOptsStr});
 
-// Dump tool invocation commands.
+  // Dump tool invocation commands.
 #define DEBUG_TYPE "serialize-to-binary"
   LLVM_DEBUG({
     llvm::dbgs() << "Tool invocation for module: "
@@ -170,43 +354,52 @@ FailureOr<SmallVector<char, 0>> SerializeGPUModuleBase::compileToBinary(
     llvm::dbgs() << "\n";
   });
 #undef DEBUG_TYPE
-  // Helper function for printing tool error logs.
-  std::string message;
-  auto emitLogError =
-      [&](StringRef toolName) -> FailureOr<SmallVector<char, 0>> {
-    if (message.empty()) {
-      llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> toolStderr =
-          llvm::MemoryBuffer::getFile(logFile->first);
-      if (toolStderr)
-        return emitError(loc) << toolName << " invocation failed. Log:\n"
-                              << toolStderr->get()->getBuffer();
-      else
-        return emitError(loc) << toolName << " invocation failed.";
+
+  // Redirect stdout/stderr to the log temp file.
+  std::optional<StringRef> redirects[] = {std::nullopt, logFile->first,
+                                          logFile->first};
+
+  std::string errorMsg;
+  if (llvm::sys::ExecuteAndWait(*oclocPath, oclocArgs, std::nullopt, redirects,
+                                0, 0, &errorMsg)) {
+    // Prefer a structured error message; otherwise dump the log file.
+    if (!errorMsg.empty()) {
+      emitError(loc) << "`ocloc` invocation failed: " << errorMsg;
+    } else if (auto log = llvm::MemoryBuffer::getFile(logFile->first)) {
+      emitError(loc) << "`ocloc` invocation failed. Log:\n"
+                     << (*log)->getBuffer();
+    } else {
+      emitError(loc) << "`ocloc` invocation failed (no log available)";
     }
-    return emitError(loc) << toolName
-                          << " invocation failed, error message: " << message;
-  };
-  std::optional<StringRef> redirects[] = {
-      std::nullopt,
-      logFile->first,
-      logFile->first,
-  };
-  // Invoke ocloc.
-  if (llvm::sys::ExecuteAndWait(oclocCompiler.value(), oclocArgs, std::nullopt,
-                                redirects, 0, 0, &message))
-    return emitLogError("`ocloc`");
-  binFile->first.append(".bin");
-  llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> binaryBuffer =
-      llvm::MemoryBuffer::getFile(binFile->first);
-  if (!binaryBuffer)
-    return emitError(loc) << "Couldn't open the file: `" << binFile->first
-                          << "`, error message: "
-                          << binaryBuffer.getError().message();
+    return failure();
+  }
 
+  // Read back the binary output (ocloc appends ".bin" to the base name).
+  binFile->first.append(".bin");
+  auto binaryBuffer = llvm::MemoryBuffer::getFile(binFile->first);
+  if (!binaryBuffer) {
+    emitError(loc) << "Couldn't open binary output `" << binFile->first
+                   << "`: " << binaryBuffer.getError().message();
+    return failure();
+  }
   StringRef bin = (*binaryBuffer)->getBuffer();
   return SmallVector<char, 0>(bin.begin(), bin.end());
 }
 
+// ----------------------------------------------------------------------------
+// Public entry-point: prefer the in-process library path; fall back to the
+// external tool when the library is not available.
+// ----------------------------------------------------------------------------
+FailureOr<SmallVector<char, 0>>
+SerializeGPUModuleBase::compileToBinary(StringRef asmStr,
+                                        StringRef inputFormat) {
+#if MLIR_XEVM_OCLOC_LIB_AVAILABLE
+  return compileToBinaryViaLibocloc(asmStr, inputFormat);
+#else
+  return compileToBinaryViaOclocTool(asmStr, inputFormat);
+#endif
+}
+
 std::optional<std::string> SerializeGPUModuleBase::findTool(StringRef tool) {
   // 1. Check the toolkit path given in the command line.
   StringRef pathRef = targetOptions.getToolkitPath();
@@ -228,7 +421,6 @@ std::optional<std::string> SerializeGPUModuleBase::findTool(StringRef tool) {
          "path via GpuModuleToBinaryPass or add the compiler to $PATH`.";
   return std::nullopt;
 }
-
 namespace {
 class SPIRVSerializer : public SerializeGPUModuleBase {
 public:
@@ -286,6 +478,10 @@ SPIRVTranslateModule(Module *M, std::string &SpirvObj, std::string &ErrMsg,
 } // namespace llvm
 #endif
 
+// There is 1 way to finalize IL to native code: IGC
+// There are 2 ways to access IGC: AOT (ocloc) and JIT (L0 runtime).
+// - L0 runtime consumes IL and is external to MLIR codebase (rt wrappers).
+// - `ocloc` tool can be "queried" from within MLIR.
 FailureOr<SmallVector<char, 0>>
 SPIRVSerializer::moduleToObject(llvm::Module &llvmModule) {
 #define DEBUG_TYPE "serialize-to-llvm"


        


More information about the Mlir-commits mailing list