[Mlir-commits] [mlir] [mlir][gpu] Extend `mgpumoduleLoadJIT` API to add assemblySize parameter (PR #189429)

Md Abdullah Shahneous Bari llvmlistbot at llvm.org
Mon Mar 30 09:47:38 PDT 2026


https://github.com/mshahneo created https://github.com/llvm/llvm-project/pull/189429

When JITing SPIR-V using LevelZero API, it expects the length of the string since passed input data is a `void *`. Problem is, getting the length of the string is not possible using something like `strlen(reinterpret_cast<char *>(data))` in `mgpuModuleLoadJIT` implementation. Becasuse the SPIR-V binary contains null bytes (i.e., the data is binary SPIR-V, not null-terminated text).

As a result we need to pass the `assmeblySize` via the `mgpuModuleLoadJIT(void* data, int optLevel, size_t assmeblySize)`.

>From f2aeeab710137ab11a5e2ea3e499d7c92da2565e Mon Sep 17 00:00:00 2001
From: "Shahneous Bari, Md Abdullah" <md.abdullah.shahneous.bari at intel.com>
Date: Mon, 30 Mar 2026 16:37:45 +0000
Subject: [PATCH] [mlir][gpu] Extend `mgpumoduleLoadJIT` API to add
 assemblySize parameter

When JITing SPIR-V using LevelZero API, it expects the length
of the string since passed input data is a `void *`.
Problem is, getting the length of the string is not possible using
something like `strlen(reinterpret_cast<char *>(data))` in
`mgpuModuleLoadJIT` implementation. Becasuse the SPIR-V binary
contains null bytes (i.e., the data is binary SPIR-V,
not null-terminated text).

As a result we need to pass the `assmeblySize` via the
`mgpuModuleLoadJIT(void* data, int optLevel, size_t assmeblySize)`.
---
 .../ExecutionEngine/CudaRuntimeWrappers.cpp   |  4 +--
 .../LevelZeroRuntimeWrappers.cpp              |  6 ++--
 .../ExecutionEngine/RocmRuntimeWrappers.cpp   |  3 +-
 .../Target/LLVMIR/Dialect/GPU/CMakeLists.txt  |  1 +
 .../LLVMIR/Dialect/GPU/SelectObjectAttr.cpp   | 30 +++++++++++++++----
 5 files changed, 32 insertions(+), 12 deletions(-)

diff --git a/mlir/lib/ExecutionEngine/CudaRuntimeWrappers.cpp b/mlir/lib/ExecutionEngine/CudaRuntimeWrappers.cpp
index f203363e16ea2..6307e0b59f3d2 100644
--- a/mlir/lib/ExecutionEngine/CudaRuntimeWrappers.cpp
+++ b/mlir/lib/ExecutionEngine/CudaRuntimeWrappers.cpp
@@ -124,8 +124,8 @@ mgpuModuleLoad(void *data, size_t /*gpuBlobSize*/) {
   return module;
 }
 
-extern "C" MLIR_CUDA_WRAPPERS_EXPORT CUmodule mgpuModuleLoadJIT(void *data,
-                                                                int optLevel) {
+extern "C" MLIR_CUDA_WRAPPERS_EXPORT CUmodule
+mgpuModuleLoadJIT(void *data, int optLevel, size_t /*assmeblySize*/) {
   ScopedContext scopedContext;
   CUmodule module = nullptr;
   char jitErrorBuffer[4096] = {0};
diff --git a/mlir/lib/ExecutionEngine/LevelZeroRuntimeWrappers.cpp b/mlir/lib/ExecutionEngine/LevelZeroRuntimeWrappers.cpp
index 01965da038820..75e997ead233d 100644
--- a/mlir/lib/ExecutionEngine/LevelZeroRuntimeWrappers.cpp
+++ b/mlir/lib/ExecutionEngine/LevelZeroRuntimeWrappers.cpp
@@ -520,10 +520,10 @@ extern "C" ze_module_handle_t mgpuModuleLoad(const void *data,
   return catchAll([&]() { return loadModule(data, gpuBlobSize); });
 }
 
-extern "C" ze_module_handle_t mgpuModuleLoadJIT(void *data, int optLevel) {
+extern "C" ze_module_handle_t mgpuModuleLoadJIT(void *data, int optLevel,
+                                                size_t assemblySize) {
   return catchAll([&]() {
-    return loadModule(data, strlen(reinterpret_cast<char *>(data)),
-                      ZE_MODULE_FORMAT_IL_SPIRV);
+    return loadModule(data, assemblySize, ZE_MODULE_FORMAT_IL_SPIRV);
   });
 }
 
diff --git a/mlir/lib/ExecutionEngine/RocmRuntimeWrappers.cpp b/mlir/lib/ExecutionEngine/RocmRuntimeWrappers.cpp
index b984149ca6dea..e729e4f9fca9d 100644
--- a/mlir/lib/ExecutionEngine/RocmRuntimeWrappers.cpp
+++ b/mlir/lib/ExecutionEngine/RocmRuntimeWrappers.cpp
@@ -38,7 +38,8 @@ extern "C" hipModule_t mgpuModuleLoad(void *data, size_t /*gpuBlobSize*/) {
   return module;
 }
 
-extern "C" hipModule_t mgpuModuleLoadJIT(void *data, int optLevel) {
+extern "C" hipModule_t mgpuModuleLoadJIT(void *data, int optLevel,
+                                         size_t /*assmeblySize*/) {
   assert(false && "This function is not available in HIP.");
   return nullptr;
 }
diff --git a/mlir/lib/Target/LLVMIR/Dialect/GPU/CMakeLists.txt b/mlir/lib/Target/LLVMIR/Dialect/GPU/CMakeLists.txt
index 11816ff5c2c1f..dd54dc10e1abe 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/GPU/CMakeLists.txt
+++ b/mlir/lib/Target/LLVMIR/Dialect/GPU/CMakeLists.txt
@@ -9,6 +9,7 @@ add_mlir_translation_library(MLIRGPUToLLVMIRTranslation
   MLIRIR
   MLIRGPUDialect
   MLIRLLVMDialect
+  MLIRXeVMTarget
   MLIRSupport
   MLIRTargetLLVMIRExport
   )
diff --git a/mlir/lib/Target/LLVMIR/Dialect/GPU/SelectObjectAttr.cpp b/mlir/lib/Target/LLVMIR/Dialect/GPU/SelectObjectAttr.cpp
index 85fac86743a13..8e31a6e26fc4d 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/GPU/SelectObjectAttr.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/GPU/SelectObjectAttr.cpp
@@ -13,7 +13,10 @@
 
 #include "mlir/Dialect/GPU/IR/CompilationInterfaces.h"
 #include "mlir/Dialect/GPU/IR/GPUDialect.h"
+#include "mlir/Dialect/LLVMIR/XeVMDialect.h"
 
+#include "mlir/Dialect/SPIRV/IR/SPIRVAttributes.h"
+#include "mlir/Target/LLVM/XeVM/Target.h"
 #include "mlir/Target/LLVMIR/Dialect/GPU/GPUToLLVMIRTranslation.h"
 #include "mlir/Target/LLVMIR/Export.h"
 #include "mlir/Target/LLVMIR/ModuleTranslation.h"
@@ -98,8 +101,15 @@ static LogicalResult embedBinaryImpl(StringRef moduleName,
 
   // Embed the object as a global string.
   // Add null for assembly output for JIT paths that expect null-terminated
-  // strings.
-  bool addNull = (object.getFormat() == gpu::CompilationTarget::Assembly);
+  // strings. SPIR-V (for both XeVM and SPIR-V target) is passed as a binary
+  // blob and should not have a null terminator.
+  auto xevmTarget = dyn_cast<xevm::XeVMTargetAttr>(object.getTarget());
+  bool isXeVMSPIRV =
+      xevmTarget && xevmTarget.getTriple() == "spirv64-unknown-unknown";
+  bool isSPIRVTarget = isa<spirv::TargetEnvAttr>(object.getTarget());
+
+  bool addNull = !(isXeVMSPIRV || isSPIRVTarget) &&
+                 (object.getFormat() == gpu::CompilationTarget::Assembly);
   StringRef serializedStr = object.getObject().getValue();
   Constant *serializedCst =
       ConstantDataArray::getString(module.getContext(), serializedStr, addNull);
@@ -142,16 +152,24 @@ static LogicalResult embedBinaryImpl(StringRef moduleName,
   auto *loadBlock = BasicBlock::Create(module.getContext(), "entry", loadFn);
   builder.SetInsertPoint(loadBlock);
   Value *moduleObj = [&] {
+    Constant *binarySize =
+        ConstantInt::get(i64Ty, serializedStr.size() + (addNull ? 1 : 0));
     if (object.getFormat() == gpu::CompilationTarget::Assembly) {
       FunctionCallee moduleLoadFn = module.getOrInsertFunction(
-          "mgpuModuleLoadJIT", FunctionType::get(ptrTy, {ptrTy, i32Ty}, false));
+          "mgpuModuleLoadJIT", FunctionType::get(ptrTy,
+                                                 {
+                                                     ptrTy,
+                                                     i32Ty,
+                                                     i64Ty,
+                                                 },
+                                                 false));
+
       Constant *optValue = ConstantInt::get(i32Ty, optLevel);
-      return builder.CreateCall(moduleLoadFn, {serializedObj, optValue});
+      return builder.CreateCall(moduleLoadFn,
+                                {serializedObj, optValue, binarySize});
     }
     FunctionCallee moduleLoadFn = module.getOrInsertFunction(
         "mgpuModuleLoad", FunctionType::get(ptrTy, {ptrTy, i64Ty}, false));
-    Constant *binarySize =
-        ConstantInt::get(i64Ty, serializedStr.size() + (addNull ? 1 : 0));
     return builder.CreateCall(moduleLoadFn, {serializedObj, binarySize});
   }();
   builder.CreateStore(moduleObj, modulePtr);



More information about the Mlir-commits mailing list