[Mlir-commits] [mlir] [mlir][XeVM] Use libocloc API for binary generation. (PR #188353)

Md Abdullah Shahneous Bari llvmlistbot at llvm.org
Thu Apr 9 10:59:50 PDT 2026


https://github.com/mshahneo updated https://github.com/llvm/llvm-project/pull/188353

>From 67e7eb61eb16d0416efd3c35a31eae10eb70baab Mon Sep 17 00:00:00 2001
From: "Shahneous Bari, Md Abdullah" <md.abdullah.shahneous.bari at intel.com>
Date: Tue, 24 Mar 2026 21:04:35 +0000
Subject: [PATCH 1/5] [mlir][XeVM] Use libocloc API for binary generation.

This PR improves native binary generation by avoiding
`llvm::sys::ExecuteAndWait` call for ocloc and instead
leveraging `oclocInvoke()` that consumes an in-memory SPIR-V string.
---
 mlir/cmake/modules/Findocloc.cmake         |  82 ++++++++++
 mlir/include/mlir/Target/LLVM/XeVM/Utils.h |   3 -
 mlir/lib/Target/LLVM/CMakeLists.txt        |  23 ++-
 mlir/lib/Target/LLVM/XeVM/Target.cpp       | 182 +++++++++------------
 4 files changed, 178 insertions(+), 112 deletions(-)
 create mode 100644 mlir/cmake/modules/Findocloc.cmake

diff --git a/mlir/cmake/modules/Findocloc.cmake b/mlir/cmake/modules/Findocloc.cmake
new file mode 100644
index 0000000000000..7209c145bd762
--- /dev/null
+++ b/mlir/cmake/modules/Findocloc.cmake
@@ -0,0 +1,82 @@
+# Findocloc.cmake
+
+# Define a function to search for ocloc
+function(find_ocloc)
+    message(STATUS "Searching for ocloc")
+
+    if(WIN32)
+        if(CMAKE_SIZEOF_VOID_P EQUAL 8)
+            set(OCLOC_SUFFIX "64")
+        else()
+            set(OCLOC_SUFFIX "32")
+        endif()
+
+        set(OCLOC_EXE_PATHS "${OCLOC_PACKAGE_DIR}" "${OCLOC_PACKAGE_DIR}/bin")
+        set(OCLOC_LIB_PATHS "${OCLOC_PACKAGE_DIR}" "${OCLOC_PACKAGE_DIR}/lib")
+        set(OCLOC_INC_PATHS "${OCLOC_PACKAGE_DIR}" "${OCLOC_PACKAGE_DIR}/include")
+    else()
+        set(OCLOC_SUFFIX "")
+
+        set(OCLOC_EXE_PATHS "${OCLOC_PACKAGE_DIR}/bin")
+        set(OCLOC_LIB_PATHS "${OCLOC_PACKAGE_DIR}/lib")
+        set(OCLOC_INC_PATHS "${OCLOC_PACKAGE_DIR}/include")
+
+        if(CMAKE_SIZEOF_VOID_P EQUAL 8)
+            list(APPEND OCLOC_LIB_PATHS
+                "${OCLOC_PACKAGE_DIR}/lib64"
+                "${OCLOC_PACKAGE_DIR}/lib/x86_64-linux-gnu")
+        endif()
+    endif()
+
+    # Search for ocloc executable
+    find_program(OCLOC_EXECUTABLE NAMES "ocloc" "ocloc${OCLOC_SUFFIX}"
+        PATHS ${OCLOC_EXE_PATHS} NO_DEFAULT_PATH)
+    find_program(OCLOC_EXECUTABLE NAMES "ocloc" "ocloc${OCLOC_SUFFIX}"
+        PATHS ${OCLOC_EXE_PATHS})
+
+    # Search for ocloc library
+    find_library(OCLOC_LIBRARY NAMES "ocloc${OCLOC_SUFFIX}"
+        PATHS ${OCLOC_LIB_PATHS} NO_DEFAULT_PATH)
+    find_library(OCLOC_LIBRARY NAMES "ocloc${OCLOC_SUFFIX}"
+        PATHS ${OCLOC_LIB_PATHS})
+
+    # Search for ocloc_api.h header file
+    find_path(OCLOC_INCLUDE_DIR NAMES ocloc_api.h
+        PATHS ${OCLOC_INC_PATHS} NO_DEFAULT_PATH)
+    find_path(OCLOC_INCLUDE_DIR NAMES ocloc_api.h
+        PATHS ${OCLOC_INC_PATHS})
+
+    # Check if all components are found
+    if(OCLOC_EXECUTABLE AND OCLOC_LIBRARY AND OCLOC_INCLUDE_DIR)
+        set(OCLOC_FOUND TRUE)
+    else()
+        set(OCLOC_FOUND FALSE)
+    endif()
+
+    # Provide the results to the user
+    if(OCLOC_FOUND)
+        message(STATUS "Found ocloc executable: ${OCLOC_EXECUTABLE}")
+        message(STATUS "Found ocloc library: ${OCLOC_LIBRARY}")
+        message(STATUS "Found ocloc_api.h: ${OCLOC_INCLUDE_DIR}")
+    else()
+        message(STATUS "ocloc not found")
+    endif()
+
+    # Set the variables for the user
+    set(OCLOC_EXECUTABLE ${OCLOC_EXECUTABLE} CACHE FILEPATH "Path to ocloc executable")
+    set(OCLOC_LIBRARY ${OCLOC_LIBRARY} CACHE FILEPATH "Path to ocloc library")
+    set(OCLOC_INCLUDE_DIR ${OCLOC_INCLUDE_DIR} CACHE PATH "Path to ocloc_api.h header file")
+    set(OCLOC_FOUND ${OCLOC_FOUND} CACHE BOOL "ocloc found")
+
+    add_executable(ocloc IMPORTED)
+    set_property(TARGET ocloc PROPERTY IMPORTED_LOCATION "${OCLOC_EXECUTABLE}")
+
+    add_library(libocloc SHARED IMPORTED)
+    set_target_properties(libocloc PROPERTIES
+        IMPORTED_LOCATION "${OCLOC_LIBRARY}"
+        INTERFACE_INCLUDE_DIRECTORIES "${OCLOC_INCLUDE_DIR}")
+endfunction()
+
+# Call the function to find ocloc
+find_ocloc()
+
diff --git a/mlir/include/mlir/Target/LLVM/XeVM/Utils.h b/mlir/include/mlir/Target/LLVM/XeVM/Utils.h
index 455c8303a9aa8..02e766ff71cf5 100644
--- a/mlir/include/mlir/Target/LLVM/XeVM/Utils.h
+++ b/mlir/include/mlir/Target/LLVM/XeVM/Utils.h
@@ -51,9 +51,6 @@ class SerializeGPUModuleBase : public LLVM::ModuleToObject {
   /// a Resource blob pointing to the LLVM bitcode in-memory.
   SmallVector<Attribute> librariesToLink;
 
-  /// Returns the path to the tool used for serialization.
-  std::optional<std::string> findTool(StringRef tool);
-
   /// GPU compilation target options.
   gpu::TargetOptions targetOptions;
 };
diff --git a/mlir/lib/Target/LLVM/CMakeLists.txt b/mlir/lib/Target/LLVM/CMakeLists.txt
index 94660e231888b..1d671027c6d34 100644
--- a/mlir/lib/Target/LLVM/CMakeLists.txt
+++ b/mlir/lib/Target/LLVM/CMakeLists.txt
@@ -83,7 +83,7 @@ if ("NVPTX" IN_LIST LLVM_TARGETS_TO_BUILD)
       add_library(MLIR_NVPTXCOMPILER_LIB STATIC IMPORTED GLOBAL)
       # Downstream projects can modify this path and use it in CMake. For example:
       # add_library(MLIR_NVPTXCOMPILER_LIB STATIC IMPORTED GLOBAL)
-      # set_property(TARGET MLIR_NVPTXCOMPILER_LIB PROPERTY IMPORTED_LOCATION ${...})  
+      # set_property(TARGET MLIR_NVPTXCOMPILER_LIB PROPERTY IMPORTED_LOCATION ${...})
       # where `...` is to be replaced with the path to the library.
       set_property(TARGET MLIR_NVPTXCOMPILER_LIB PROPERTY IMPORTED_LOCATION ${MLIR_NVPTXCOMPILER_LIB_PATH})
       # Link against `nvptxcompiler_static`. TODO: use `CUDA::nvptxcompiler_static`.
@@ -101,7 +101,7 @@ if ("NVPTX" IN_LIST LLVM_TARGETS_TO_BUILD)
       endif()
 
       add_library(MLIR_NVFATBIN_LIB STATIC IMPORTED GLOBAL)
-      set_property(TARGET MLIR_NVFATBIN_LIB PROPERTY IMPORTED_LOCATION ${MLIR_NVFATBIN_LIB_PATH})  
+      set_property(TARGET MLIR_NVFATBIN_LIB PROPERTY IMPORTED_LOCATION ${MLIR_NVFATBIN_LIB_PATH})
       target_link_libraries(MLIRNVVMTarget PRIVATE MLIR_NVFATBIN_LIB)
     endif()
   else()
@@ -141,7 +141,7 @@ if (MLIR_NVVM_EMBED_LIBDEVICE)
               "Requested using the `nvptxcompiler` library backend but it couldn't be found.")
     endif()
   endif()
-  
+
   embed_binary_to_src(${MLIR_NVVM_LIBDEVICE_PATH} ${CMAKE_CURRENT_BINARY_DIR}/libdevice_embedded.c _mlir_embedded_libdevice)
   add_mlir_library(MLIRNVVMLibdevice
     ${CMAKE_CURRENT_BINARY_DIR}/libdevice_embedded.c
@@ -234,3 +234,20 @@ add_mlir_dialect_library(MLIRXeVMTarget
   MLIRTargetLLVM
   MLIRXeVMToLLVMIRTranslation
 )
+
+find_package(ocloc)
+if (OCLOC_FOUND)
+  target_include_directories(MLIRXeVMTarget PRIVATE "${OCLOC_INCLUDE_DIR}")
+  target_link_libraries(MLIRXeVMTarget PRIVATE "${OCLOC_LIBRARY}")
+  target_compile_definitions(obj.MLIRXeVMTarget
+    PRIVATE
+    MLIR_XEVM_OCLOC_AVAILABLE=1
+  )
+else()
+  target_compile_definitions(obj.MLIRXeVMTarget
+    PRIVATE
+    MLIR_XEVM_OCLOC_AVAILABLE=0
+  )
+  message(WARNING "ocloc not found, MLIRXeVMTarget will not be able to use ocloc for native binary compilation.")
+endif()
+
diff --git a/mlir/lib/Target/LLVM/XeVM/Target.cpp b/mlir/lib/Target/LLVM/XeVM/Target.cpp
index 83eec5e9d5549..ffdd4a26f4ecc 100644
--- a/mlir/lib/Target/LLVM/XeVM/Target.cpp
+++ b/mlir/lib/Target/LLVM/XeVM/Target.cpp
@@ -28,18 +28,17 @@
 #include "llvm/IR/LegacyPassManager.h"
 #include "llvm/Target/TargetMachine.h"
 
+#include "llvm/ADT/ScopeExit.h"
 #include "llvm/Bitcode/BitcodeWriter.h"
 #include "llvm/Config/Targets.h"
-#include "llvm/Support/FileSystem.h"
-#include "llvm/Support/FileUtilities.h"
 #include "llvm/Support/FormatVariadic.h"
-#include "llvm/Support/MemoryBuffer.h"
-#include "llvm/Support/Path.h"
-#include "llvm/Support/Process.h"
-#include "llvm/Support/Program.h"
 #include "llvm/Support/TargetSelect.h"
 #include "llvm/Support/raw_ostream.h"
 
+#if MLIR_XEVM_OCLOC_AVAILABLE
+#include <ocloc_api.h>
+#endif // MLIR_XEVM_OCLOC_AVAILABLE
+
 #include <cstdint>
 #include <cstdlib>
 
@@ -107,59 +106,28 @@ gpu::GPUModuleOp SerializeGPUModuleBase::getGPUModuleOp() {
 // There are 2 ways to access IGC: AOT (ocloc) and JIT (L0 runtime).
 // - L0 runtime consumes IL and is external to MLIR codebase (rt wrappers).
 // - `ocloc` tool can be "queried" from within MLIR.
-FailureOr<SmallVector<char, 0>> SerializeGPUModuleBase::compileToBinary(
-    StringRef asmStr, StringRef inputFormat = "-spirv_input") {
-  using TmpFile = std::pair<llvm::SmallString<128>, llvm::FileRemover>;
-  // Find the `ocloc` tool.
-  std::optional<std::string> oclocCompiler = findTool("ocloc");
-  if (!oclocCompiler)
-    return failure();
+#if MLIR_XEVM_OCLOC_AVAILABLE
+FailureOr<SmallVector<char, 0>>
+SerializeGPUModuleBase::compileToBinary(StringRef asmStr,
+                                        StringRef inputFormat) {
   Location loc = getGPUModuleOp().getLoc();
-  std::string basename = llvm::formatv(
-      "mlir-{0}-{1}-{2}", getGPUModuleOp().getNameAttr().getValue(),
+  std::string asmFname = llvm::formatv(
+      "mlir-{0}-{1}-{2}.asm", getGPUModuleOp().getNameAttr().getValue(),
       getTarget().getTriple(), getTarget().getChip());
-
-  auto createTemp = [&](StringRef name,
-                        StringRef suffix) -> FailureOr<TmpFile> {
-    llvm::SmallString<128> filePath;
-    if (auto ec = llvm::sys::fs::createTemporaryFile(name, suffix, filePath))
-      return getGPUModuleOp().emitError()
-             << "Couldn't create the temp file: `" << filePath
-             << "`, error message: " << ec.message();
-
-    return TmpFile(filePath, llvm::FileRemover(filePath.c_str()));
-  };
-  // Create temp file
-  FailureOr<TmpFile> asmFile = createTemp(basename, "asm");
-  FailureOr<TmpFile> binFile = createTemp(basename, "");
-  FailureOr<TmpFile> logFile = createTemp(basename, "log");
-  if (failed(logFile) || failed(asmFile) || failed(binFile))
-    return failure();
-  // Dump the assembly to a temp file
-  std::error_code ec;
-  {
-    llvm::raw_fd_ostream asmStream(asmFile->first, ec);
-    if (ec)
-      return emitError(loc) << "Couldn't open the file: `" << asmFile->first
-                            << "`, error message: " << ec.message();
-
-    asmStream << asmStr;
-    if (asmStream.has_error())
-      return emitError(loc)
-             << "An error occurred while writing the assembly to: `"
-             << asmFile->first << "`.";
-
-    asmStream.flush();
-  }
   // Set cmd options
   std::pair<llvm::BumpPtrAllocator, SmallVector<const char *>> cmdOpts =
       targetOptions.tokenizeCmdOptions();
   // Example: --gpu-module-to-binary="opts='opt1 opt2'"
   const std::string cmdOptsStr = "\"" + llvm::join(cmdOpts.second, " ") + "\"";
-  SmallVector<StringRef, 12> oclocArgs(
-      {"ocloc", "compile", "-file", asmFile->first, inputFormat, "-device",
-       getTarget().getChip(), "-output", binFile->first, "-output_no_suffix",
-       "-options", cmdOptsStr});
+  std::vector<std::string> oclocArgs = {"ocloc",
+                                        "compile",
+                                        "-file",
+                                        asmFname,
+                                        inputFormat.str(),
+                                        "-device",
+                                        getTarget().getChip().str(),
+                                        "-options",
+                                        cmdOptsStr};
 
 // Dump tool invocation commands.
 #define DEBUG_TYPE "serialize-to-binary"
@@ -170,64 +138,66 @@ FailureOr<SmallVector<char, 0>> SerializeGPUModuleBase::compileToBinary(
     llvm::dbgs() << "\n";
   });
 #undef DEBUG_TYPE
-  // Helper function for printing tool error logs.
-  std::string message;
-  auto emitLogError =
-      [&](StringRef toolName) -> FailureOr<SmallVector<char, 0>> {
-    if (message.empty()) {
-      llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> toolStderr =
-          llvm::MemoryBuffer::getFile(logFile->first);
-      if (toolStderr)
-        return emitError(loc) << toolName << " invocation failed. Log:\n"
-                              << toolStderr->get()->getBuffer();
-      else
-        return emitError(loc) << toolName << " invocation failed.";
+
+  std::vector<const char *> argv;
+  for (const auto &str : oclocArgs)
+    argv.push_back(str.c_str());
+
+  uint32_t numSources = 1;
+  const uint8_t *dataSources[1] = {
+      reinterpret_cast<const uint8_t *>(asmStr.data())};
+  const uint64_t lenSources[1] = {asmStr.size()};
+  const char *nameSources[1] = {asmFname.c_str()};
+
+  uint32_t outputs_num = 0;
+  uint8_t **outputs = nullptr;
+  uint64_t *output_length = nullptr;
+  char **output_names = nullptr;
+  auto _ = llvm::scope_exit([&]() {
+    oclocFreeOutput(&outputs_num, &outputs, &output_length, &output_names);
+  });
+
+  int err = oclocInvoke(static_cast<uint32_t>(argv.size()), argv.data(),
+                        numSources, dataSources, lenSources, nameSources, 0,
+                        nullptr, nullptr, nullptr, &outputs_num, &outputs,
+                        &output_length, &output_names);
+
+  if (err != OCLOC_SUCCESS) {
+    emitError(loc) << "`oclocInvoke` failed or produced no output, error: "
+                   << err;
+    for (uint32_t i = 0; i < outputs_num; ++i) {
+      if (llvm::StringRef(output_names[i]).ends_with(".log")) {
+        emitError(loc) << "Compiler log:\n";
+        emitError(loc) << llvm::StringRef(reinterpret_cast<char *>(outputs[i]),
+                                          output_length[i])
+                       << "\n";
+      }
     }
-    return emitError(loc) << toolName
-                          << " invocation failed, error message: " << message;
-  };
-  std::optional<StringRef> redirects[] = {
-      std::nullopt,
-      logFile->first,
-      logFile->first,
-  };
-  // Invoke ocloc.
-  if (llvm::sys::ExecuteAndWait(oclocCompiler.value(), oclocArgs, std::nullopt,
-                                redirects, 0, 0, &message))
-    return emitLogError("`ocloc`");
-  binFile->first.append(".bin");
-  llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> binaryBuffer =
-      llvm::MemoryBuffer::getFile(binFile->first);
-  if (!binaryBuffer)
-    return emitError(loc) << "Couldn't open the file: `" << binFile->first
-                          << "`, error message: "
-                          << binaryBuffer.getError().message();
-
-  StringRef bin = (*binaryBuffer)->getBuffer();
-  return SmallVector<char, 0>(bin.begin(), bin.end());
-}
+    return failure();
+  }
 
-std::optional<std::string> SerializeGPUModuleBase::findTool(StringRef tool) {
-  // 1. Check the toolkit path given in the command line.
-  StringRef pathRef = targetOptions.getToolkitPath();
-  SmallVector<char, 256> path;
-  if (!pathRef.empty()) {
-    path.insert(path.begin(), pathRef.begin(), pathRef.end());
-    llvm::sys::path::append(path, "bin", tool);
-    if (llvm::sys::fs::can_execute(path))
-      return StringRef(path.data(), path.size()).str();
+  SmallVector<char, 0> binStr;
+  for (uint32_t i = 0; i < outputs_num; ++i) {
+    if (llvm::StringRef(output_names[i]).ends_with(".bin")) {
+      char *outBegin = reinterpret_cast<char *>(outputs[i]);
+      char *outEnd = outBegin + output_length[i];
+      binStr.assign(outBegin, outEnd);
+      break;
+    }
   }
-  // 2. Check PATH.
-  if (std::optional<std::string> toolPath =
-          llvm::sys::Process::FindInEnvPath("PATH", tool))
-    return *toolPath;
-
-  getGPUModuleOp().emitError()
-      << "Couldn't find the `" << tool
-      << "` binary. Please specify the toolkit "
-         "path via GpuModuleToBinaryPass or add the compiler to $PATH`.";
-  return std::nullopt;
+  if (binStr.empty())
+    return emitError(loc) << "`oclocInvoke` did not produce `.bin` output";
+
+  return binStr;
+}
+#else  // MLIR_XEVM_OCLOC_AVAILABLE
+FailureOr<SmallVector<char, 0>>
+SerializeGPUModuleBase::compileToBinary(StringRef asmStr,
+                                        StringRef inputFormat) {
+  return getGPUModuleOp().emitError()
+         << "Native binary cannot be AOT compiled without ocloc.";
 }
+#endif // MLIR_XEVM_OCLOC_AVAILABLE
 
 namespace {
 class SPIRVSerializer : public SerializeGPUModuleBase {

>From 52ad6d457c0d121e951189e67966721dcfe66984 Mon Sep 17 00:00:00 2001
From: "Shahneous Bari, Md Abdullah" <md.abdullah.shahneous.bari at intel.com>
Date: Sat, 28 Mar 2026 02:06:09 +0000
Subject: [PATCH 2/5] Keep the llvm::sys:ExecuteAndWait based `ocloc` tool
 invocation as a fallback.

There might be a scenario when only only `ocloc` binary is available;
but not the library and include files. Use the fallback method
in that scenario.
---
 mlir/include/mlir/Target/LLVM/XeVM/Utils.h |  17 +-
 mlir/lib/Target/LLVM/XeVM/Target.cpp       | 214 +++++++++++++++++----
 2 files changed, 195 insertions(+), 36 deletions(-)

diff --git a/mlir/include/mlir/Target/LLVM/XeVM/Utils.h b/mlir/include/mlir/Target/LLVM/XeVM/Utils.h
index 02e766ff71cf5..4a33992004ab6 100644
--- a/mlir/include/mlir/Target/LLVM/XeVM/Utils.h
+++ b/mlir/include/mlir/Target/LLVM/XeVM/Utils.h
@@ -39,7 +39,7 @@ class SerializeGPUModuleBase : public LLVM::ModuleToObject {
   /// Returns the gpu module being serialized.
   gpu::GPUModuleOp getGPUModuleOp();
 
-  /// Compiles to native code using `ocloc`.
+  /// Compiles to native code using `ocloc` (API or tool).
   FailureOr<SmallVector<char, 0>> compileToBinary(StringRef asmStr,
                                                   StringRef inputFormat);
 
@@ -51,6 +51,21 @@ class SerializeGPUModuleBase : public LLVM::ModuleToObject {
   /// a Resource blob pointing to the LLVM bitcode in-memory.
   SmallVector<Attribute> librariesToLink;
 
+  /// Returns the path to the tool used for serialization.
+  std::optional<std::string> findTool(StringRef tool);
+
+  /// Compiles to native code using the `ocloc` command-line tool, communicating
+  /// through temporary files.
+  FailureOr<SmallVector<char, 0>>
+  compileToBinaryViaOclocTool(StringRef asmStr, StringRef inputFormat);
+
+  /// Compiles to native code using the `ocloc` shared library API, in-process,
+  /// without temporary files. Only available when the library is linked in.
+#if MLIR_XEVM_OCLOC_AVAILABLE
+  FailureOr<SmallVector<char, 0>>
+  compileToBinaryViaLibocloc(StringRef asmStr, StringRef inputFormat);
+#endif // MLIR_XEVM_OCLOC_AVAILABLE
+
   /// GPU compilation target options.
   gpu::TargetOptions targetOptions;
 };
diff --git a/mlir/lib/Target/LLVM/XeVM/Target.cpp b/mlir/lib/Target/LLVM/XeVM/Target.cpp
index ffdd4a26f4ecc..454b9f786fd4f 100644
--- a/mlir/lib/Target/LLVM/XeVM/Target.cpp
+++ b/mlir/lib/Target/LLVM/XeVM/Target.cpp
@@ -26,6 +26,13 @@
 #include "mlir/Target/LLVMIR/Dialect/XeVM/XeVMToLLVMIRTranslation.h"
 #include "mlir/Target/LLVMIR/Export.h"
 #include "llvm/IR/LegacyPassManager.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/FileUtilities.h"
+#include "llvm/Support/FormatVariadic.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/Process.h"
+#include "llvm/Support/Program.h"
 #include "llvm/Target/TargetMachine.h"
 
 #include "llvm/ADT/ScopeExit.h"
@@ -106,15 +113,21 @@ gpu::GPUModuleOp SerializeGPUModuleBase::getGPUModuleOp() {
 // There are 2 ways to access IGC: AOT (ocloc) and JIT (L0 runtime).
 // - L0 runtime consumes IL and is external to MLIR codebase (rt wrappers).
 // - `ocloc` tool can be "queried" from within MLIR.
+
+// ----------------------------------------------------------------------------
+// compile via the ocloc shared-library API (in-process, no temp files).  Only
+// compiled when the library is available at build time.
+// ----------------------------------------------------------------------------
 #if MLIR_XEVM_OCLOC_AVAILABLE
 FailureOr<SmallVector<char, 0>>
-SerializeGPUModuleBase::compileToBinary(StringRef asmStr,
-                                        StringRef inputFormat) {
+SerializeGPUModuleBase::compileToBinaryViaLibocloc(StringRef asmStr,
+                                                   StringRef inputFormat) {
   Location loc = getGPUModuleOp().getLoc();
   std::string asmFname = llvm::formatv(
       "mlir-{0}-{1}-{2}.asm", getGPUModuleOp().getNameAttr().getValue(),
       getTarget().getTriple(), getTarget().getChip());
-  // Set cmd options
+
+  // Build command-line options.
   std::pair<llvm::BumpPtrAllocator, SmallVector<const char *>> cmdOpts =
       targetOptions.tokenizeCmdOptions();
   // Example: --gpu-module-to-binary="opts='opt1 opt2'"
@@ -139,66 +152,197 @@ SerializeGPUModuleBase::compileToBinary(StringRef asmStr,
   });
 #undef DEBUG_TYPE
 
+  // Build a plain argv array expected by oclocInvoke.
   std::vector<const char *> argv;
   for (const auto &str : oclocArgs)
     argv.push_back(str.c_str());
 
-  uint32_t numSources = 1;
+  // Wire up in-memory source file.
   const uint8_t *dataSources[1] = {
       reinterpret_cast<const uint8_t *>(asmStr.data())};
   const uint64_t lenSources[1] = {asmStr.size()};
   const char *nameSources[1] = {asmFname.c_str()};
 
-  uint32_t outputs_num = 0;
+  uint32_t outputsNum = 0;
   uint8_t **outputs = nullptr;
-  uint64_t *output_length = nullptr;
-  char **output_names = nullptr;
-  auto _ = llvm::scope_exit([&]() {
-    oclocFreeOutput(&outputs_num, &outputs, &output_length, &output_names);
+  uint64_t *outputLengths = nullptr;
+  char **outputNames = nullptr;
+  // Ensure ocloc output buffers are always freed on exit.
+  auto freeOutputs = llvm::scope_exit([&]() {
+    oclocFreeOutput(&outputsNum, &outputs, &outputLengths, &outputNames);
   });
 
   int err = oclocInvoke(static_cast<uint32_t>(argv.size()), argv.data(),
-                        numSources, dataSources, lenSources, nameSources, 0,
-                        nullptr, nullptr, nullptr, &outputs_num, &outputs,
-                        &output_length, &output_names);
+                        /*numSources=*/1, dataSources, lenSources, nameSources,
+                        /*numHeaders=*/0, nullptr, nullptr, nullptr,
+                        &outputsNum, &outputs, &outputLengths, &outputNames);
 
   if (err != OCLOC_SUCCESS) {
-    emitError(loc) << "`oclocInvoke` failed or produced no output, error: "
-                   << err;
-    for (uint32_t i = 0; i < outputs_num; ++i) {
-      if (llvm::StringRef(output_names[i]).ends_with(".log")) {
-        emitError(loc) << "Compiler log:\n";
-        emitError(loc) << llvm::StringRef(reinterpret_cast<char *>(outputs[i]),
-                                          output_length[i])
-                       << "\n";
-      }
+    emitError(loc) << "`oclocInvoke` failed, error code: " << err;
+    // Emit any compiler log that ocloc produced.
+    for (uint32_t i = 0; i < outputsNum; ++i) {
+      if (llvm::StringRef(outputNames[i]).ends_with(".log"))
+        emitError(loc) << "Compiler log:\n"
+                       << llvm::StringRef(reinterpret_cast<char *>(outputs[i]),
+                                          outputLengths[i]);
     }
     return failure();
   }
 
-  SmallVector<char, 0> binStr;
-  for (uint32_t i = 0; i < outputs_num; ++i) {
-    if (llvm::StringRef(output_names[i]).ends_with(".bin")) {
-      char *outBegin = reinterpret_cast<char *>(outputs[i]);
-      char *outEnd = outBegin + output_length[i];
-      binStr.assign(outBegin, outEnd);
-      break;
+  // Find and return the .bin output.
+  for (uint32_t i = 0; i < outputsNum; ++i) {
+    if (llvm::StringRef(outputNames[i]).ends_with(".bin")) {
+      char *begin = reinterpret_cast<char *>(outputs[i]);
+      return SmallVector<char, 0>(begin, begin + outputLengths[i]);
+    }
+  }
+  return emitError(loc) << "`oclocInvoke` did not produce `.bin` output";
+}
+#endif // MLIR_XEVM_OCLOC_AVAILABLE
+
+// ----------------------------------------------------------------------------
+// Compile by spawning the `ocloc` command-line tool as a process,
+// communicating through temporary files.  Acts as a fallback when the shared
+// library is not available.
+// ----------------------------------------------------------------------------
+FailureOr<SmallVector<char, 0>>
+SerializeGPUModuleBase::compileToBinaryViaOclocTool(StringRef asmStr,
+                                                    StringRef inputFormat) {
+  using TmpFile = std::pair<llvm::SmallString<128>, llvm::FileRemover>;
+
+  // Locate the `ocloc` executable on PATH.
+  std::optional<std::string> oclocPath = findTool("ocloc");
+  if (!oclocPath) {
+    emitError(getGPUModuleOp().getLoc()) << "Could not find `ocloc` on PATH";
+    return failure();
+  }
+
+  Location loc = getGPUModuleOp().getLoc();
+  std::string basename = llvm::formatv(
+      "mlir-{0}-{1}-{2}", getGPUModuleOp().getNameAttr().getValue(),
+      getTarget().getTriple(), getTarget().getChip());
+
+  // Helper: create a named temporary file, returning path + auto-remover.
+  auto createTemp = [&](StringRef name,
+                        StringRef suffix) -> std::optional<TmpFile> {
+    llvm::SmallString<128> path;
+    if (auto ec = llvm::sys::fs::createTemporaryFile(name, suffix, path)) {
+      emitError(loc) << "Couldn't create temp file `" << path
+                     << "`: " << ec.message();
+      return std::nullopt;
+    }
+    return TmpFile(path, llvm::FileRemover(path.c_str()));
+  };
+
+  std::optional<TmpFile> asmFile = createTemp(basename, "asm");
+  std::optional<TmpFile> binFile = createTemp(basename, "");
+  std::optional<TmpFile> logFile = createTemp(basename, "log");
+  if (!asmFile || !binFile || !logFile)
+    return failure();
+
+  // Write the assembly source to a temp file.
+  {
+    std::error_code ec;
+    llvm::raw_fd_ostream asmStream(asmFile->first, ec);
+    if (ec) {
+      emitError(loc) << "Couldn't open `" << asmFile->first
+                     << "`: " << ec.message();
+      return failure();
+    }
+    asmStream << asmStr;
+    if (asmStream.has_error()) {
+      emitError(loc) << "Error writing assembly to `" << asmFile->first << "`";
+      return failure();
+    }
+    asmStream.flush();
+  }
+
+  // Build command-line options.
+  std::pair<llvm::BumpPtrAllocator, SmallVector<const char *>> cmdOpts =
+      targetOptions.tokenizeCmdOptions();
+  const std::string cmdOptsStr = "\"" + llvm::join(cmdOpts.second, " ") + "\"";
+
+  SmallVector<StringRef, 12> oclocArgs(
+      {"ocloc", "compile", "-file", asmFile->first, inputFormat, "-device",
+       getTarget().getChip(), "-output", binFile->first, "-output_no_suffix",
+       "-options", cmdOptsStr});
+
+  // Dump tool invocation commands.
+#define DEBUG_TYPE "serialize-to-binary"
+  LLVM_DEBUG({
+    llvm::dbgs() << "Tool invocation for module: "
+                 << getGPUModuleOp().getNameAttr() << "\n";
+    llvm::interleave(oclocArgs, llvm::dbgs(), " ");
+    llvm::dbgs() << "\n";
+  });
+#undef DEBUG_TYPE
+
+  // Redirect stdout/stderr to the log temp file.
+  std::optional<StringRef> redirects[] = {std::nullopt, logFile->first,
+                                          logFile->first};
+
+  std::string errorMsg;
+  if (llvm::sys::ExecuteAndWait(*oclocPath, oclocArgs, std::nullopt, redirects,
+                                0, 0, &errorMsg)) {
+    // Prefer a structured error message; otherwise dump the log file.
+    if (!errorMsg.empty()) {
+      emitError(loc) << "`ocloc` invocation failed: " << errorMsg;
+    } else if (auto log = llvm::MemoryBuffer::getFile(logFile->first)) {
+      emitError(loc) << "`ocloc` invocation failed. Log:\n"
+                     << (*log)->getBuffer();
+    } else {
+      emitError(loc) << "`ocloc` invocation failed (no log available)";
     }
+    return failure();
   }
-  if (binStr.empty())
-    return emitError(loc) << "`oclocInvoke` did not produce `.bin` output";
 
-  return binStr;
+  // Read back the binary output (ocloc appends ".bin" to the base name).
+  binFile->first.append(".bin");
+  auto binaryBuffer = llvm::MemoryBuffer::getFile(binFile->first);
+  if (!binaryBuffer) {
+    emitError(loc) << "Couldn't open binary output `" << binFile->first
+                   << "`: " << binaryBuffer.getError().message();
+    return failure();
+  }
+  StringRef bin = (*binaryBuffer)->getBuffer();
+  return SmallVector<char, 0>(bin.begin(), bin.end());
 }
-#else  // MLIR_XEVM_OCLOC_AVAILABLE
+
+// ----------------------------------------------------------------------------
+// Public entry-point: prefer the in-process library path; fall back to the
+// external tool when the library is not available.
+// ----------------------------------------------------------------------------
 FailureOr<SmallVector<char, 0>>
 SerializeGPUModuleBase::compileToBinary(StringRef asmStr,
                                         StringRef inputFormat) {
-  return getGPUModuleOp().emitError()
-         << "Native binary cannot be AOT compiled without ocloc.";
+#if MLIR_XEVM_OCLOC_AVAILABLE
+  return compileToBinaryViaLibocloc(asmStr, inputFormat);
+#else
+  return compileToBinaryViaOclocTool(asmStr, inputFormat);
+#endif
 }
-#endif // MLIR_XEVM_OCLOC_AVAILABLE
 
+std::optional<std::string> SerializeGPUModuleBase::findTool(StringRef tool) {
+  // 1. Check the toolkit path given in the command line.
+  StringRef pathRef = targetOptions.getToolkitPath();
+  SmallVector<char, 256> path;
+  if (!pathRef.empty()) {
+    path.insert(path.begin(), pathRef.begin(), pathRef.end());
+    llvm::sys::path::append(path, "bin", tool);
+    if (llvm::sys::fs::can_execute(path))
+      return StringRef(path.data(), path.size()).str();
+  }
+  // 2. Check PATH.
+  if (std::optional<std::string> toolPath =
+          llvm::sys::Process::FindInEnvPath("PATH", tool))
+    return *toolPath;
+
+  getGPUModuleOp().emitError()
+      << "Couldn't find the `" << tool
+      << "` binary. Please specify the toolkit "
+         "path via GpuModuleToBinaryPass or add the compiler to $PATH`.";
+  return std::nullopt;
+}
 namespace {
 class SPIRVSerializer : public SerializeGPUModuleBase {
 public:

>From cd3edb197827ea9cc9bad2a2cc8ce2ebd1dce6c3 Mon Sep 17 00:00:00 2001
From: "Shahneous Bari, Md Abdullah" <md.abdullah.shahneous.bari at intel.com>
Date: Sat, 28 Mar 2026 02:11:05 +0000
Subject: [PATCH 3/5] Consolidate header files.

---
 mlir/lib/Target/LLVM/XeVM/Target.cpp | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/mlir/lib/Target/LLVM/XeVM/Target.cpp b/mlir/lib/Target/LLVM/XeVM/Target.cpp
index 454b9f786fd4f..0275c2867bb6e 100644
--- a/mlir/lib/Target/LLVM/XeVM/Target.cpp
+++ b/mlir/lib/Target/LLVM/XeVM/Target.cpp
@@ -12,7 +12,6 @@
 //===----------------------------------------------------------------------===//
 
 #include "mlir/Target/LLVM/XeVM/Target.h"
-
 #include "mlir/Dialect/GPU/IR/CompilationInterfaces.h"
 #include "mlir/Dialect/GPU/IR/GPUDialect.h"
 #include "mlir/Dialect/LLVMIR/XeVMDialect.h"
@@ -25,6 +24,9 @@
 #include "mlir/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.h"
 #include "mlir/Target/LLVMIR/Dialect/XeVM/XeVMToLLVMIRTranslation.h"
 #include "mlir/Target/LLVMIR/Export.h"
+#include "llvm/ADT/ScopeExit.h"
+#include "llvm/Bitcode/BitcodeWriter.h"
+#include "llvm/Config/Targets.h"
 #include "llvm/IR/LegacyPassManager.h"
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/FileUtilities.h"
@@ -33,14 +35,9 @@
 #include "llvm/Support/Path.h"
 #include "llvm/Support/Process.h"
 #include "llvm/Support/Program.h"
-#include "llvm/Target/TargetMachine.h"
-
-#include "llvm/ADT/ScopeExit.h"
-#include "llvm/Bitcode/BitcodeWriter.h"
-#include "llvm/Config/Targets.h"
-#include "llvm/Support/FormatVariadic.h"
 #include "llvm/Support/TargetSelect.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
 
 #if MLIR_XEVM_OCLOC_AVAILABLE
 #include <ocloc_api.h>

>From e5a2631a338377df026502483631264c20707d00 Mon Sep 17 00:00:00 2001
From: "Shahneous Bari, Md Abdullah" <md.abdullah.shahneous.bari at intel.com>
Date: Sat, 28 Mar 2026 02:15:08 +0000
Subject: [PATCH 4/5] Fix a comment.

---
 mlir/lib/Target/LLVM/XeVM/Target.cpp | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/mlir/lib/Target/LLVM/XeVM/Target.cpp b/mlir/lib/Target/LLVM/XeVM/Target.cpp
index 0275c2867bb6e..5777796a75f58 100644
--- a/mlir/lib/Target/LLVM/XeVM/Target.cpp
+++ b/mlir/lib/Target/LLVM/XeVM/Target.cpp
@@ -106,11 +106,6 @@ gpu::GPUModuleOp SerializeGPUModuleBase::getGPUModuleOp() {
   return dyn_cast<gpu::GPUModuleOp>(&SerializeGPUModuleBase::getOperation());
 }
 
-// There is 1 way to finalize IL to native code: IGC
-// There are 2 ways to access IGC: AOT (ocloc) and JIT (L0 runtime).
-// - L0 runtime consumes IL and is external to MLIR codebase (rt wrappers).
-// - `ocloc` tool can be "queried" from within MLIR.
-
 // ----------------------------------------------------------------------------
 // compile via the ocloc shared-library API (in-process, no temp files).  Only
 // compiled when the library is available at build time.
@@ -397,6 +392,10 @@ SPIRVTranslateModule(Module *M, std::string &SpirvObj, std::string &ErrMsg,
 } // namespace llvm
 #endif
 
+// There is 1 way to finalize IL to native code: IGC
+// There are 2 ways to access IGC: AOT (ocloc) and JIT (L0 runtime).
+// - L0 runtime consumes IL and is external to MLIR codebase (rt wrappers).
+// - `ocloc` tool can be "queried" from within MLIR.
 FailureOr<SmallVector<char, 0>>
 SPIRVSerializer::moduleToObject(llvm::Module &llvmModule) {
 #define DEBUG_TYPE "serialize-to-llvm"

>From cf6a1d3821ab0e50da7551b371674d7c91c04859 Mon Sep 17 00:00:00 2001
From: "Shahneous Bari, Md Abdullah" <md.abdullah.shahneous.bari at intel.com>
Date: Thu, 9 Apr 2026 17:57:24 +0000
Subject: [PATCH 5/5] Address review comments.

Replace the usage of `ocloc_api.h` with external declarations
of APIs.

Update error messages to differentiate between ocloc and libocloc.
---
 mlir/lib/Target/LLVM/XeVM/Target.cpp | 43 ++++++++++++++++++++++++----
 1 file changed, 37 insertions(+), 6 deletions(-)

diff --git a/mlir/lib/Target/LLVM/XeVM/Target.cpp b/mlir/lib/Target/LLVM/XeVM/Target.cpp
index 5777796a75f58..1e707d16cde45 100644
--- a/mlir/lib/Target/LLVM/XeVM/Target.cpp
+++ b/mlir/lib/Target/LLVM/XeVM/Target.cpp
@@ -39,16 +39,47 @@
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetMachine.h"
 
-#if MLIR_XEVM_OCLOC_AVAILABLE
-#include <ocloc_api.h>
-#endif // MLIR_XEVM_OCLOC_AVAILABLE
-
 #include <cstdint>
 #include <cstdlib>
 
 using namespace mlir;
 using namespace mlir::xevm;
 
+#if MLIR_XEVM_OCLOC_AVAILABLE
+// Intel compute runtime includes libocloc in the distribution, but
+// <ocloc_api.h> isn't included. Hence forward declarations for the ocloc
+// shared-library APIs is needed. These replace the inclusion of <ocloc_api.h>
+// so that the header is not a build-time requirement; the symbols are resolved
+// at link/load time via the ocloc shared library.
+extern "C" {
+
+// Return code indicating successful ocloc compilation.
+// Matches the OCLOC_SUCCESS enumerator in the real header (value 0).
+enum OclocErrorCode : int { OCLOC_SUCCESS = 0 };
+
+// Drives an in-process ocloc compilation.
+// argv / numArgs  – standard ocloc command-line arguments.
+// numSources      – number of in-memory source files (typically 1).
+// dataSources     – array of pointers to source byte buffers.
+// lenSources      – byte length of each source buffer.
+// nameSources     – file name associated with each source buffer.
+// numHeaders / dataHeaders / lenHeaders / nameHeaders – optional headers.
+// numOutputs / dataOutputs / lenOutputs / nameOutputs – [out] results.
+int oclocInvoke(unsigned numArgs, const char **argv, unsigned numSources,
+                const uint8_t **dataSources, const uint64_t *lenSources,
+                const char **nameSources, unsigned numHeaders,
+                const uint8_t **dataHeaders, const uint64_t *lenHeaders,
+                const char **nameHeaders, unsigned *numOutputs,
+                uint8_t ***dataOutputs, uint64_t **lenOutputs,
+                char ***nameOutputs);
+
+// Releases output buffers previously populated by oclocInvoke.
+int oclocFreeOutput(unsigned *numOutputs, uint8_t ***dataOutputs,
+                    uint64_t **lenOutputs, char ***nameOutputs);
+
+} // extern "C"
+#endif // MLIR_XEVM_OCLOC_AVAILABLE
+
 namespace {
 // XeVM implementation of the gpu:TargetAttrInterface.
 class XeVMTargetAttrImpl
@@ -137,7 +168,7 @@ SerializeGPUModuleBase::compileToBinaryViaLibocloc(StringRef asmStr,
 // Dump tool invocation commands.
 #define DEBUG_TYPE "serialize-to-binary"
   LLVM_DEBUG({
-    llvm::dbgs() << "Tool invocation for module: "
+    llvm::dbgs() << "libocloc invocation for module: "
                  << getGPUModuleOp().getNameAttr() << "\n";
     llvm::interleave(oclocArgs, llvm::dbgs(), " ");
     llvm::dbgs() << "\n";
@@ -170,7 +201,7 @@ SerializeGPUModuleBase::compileToBinaryViaLibocloc(StringRef asmStr,
                         &outputsNum, &outputs, &outputLengths, &outputNames);
 
   if (err != OCLOC_SUCCESS) {
-    emitError(loc) << "`oclocInvoke` failed, error code: " << err;
+    emitError(loc) << "libocloc: `oclocInvoke` failed, error code: " << err;
     // Emit any compiler log that ocloc produced.
     for (uint32_t i = 0; i < outputsNum; ++i) {
       if (llvm::StringRef(outputNames[i]).ends_with(".log"))



More information about the Mlir-commits mailing list