[Mlir-commits] [mlir] [MLIR] Split ExecutionEngine Initialization out of ctor into an explicit method call (PR #153524)

Shenghang Tsai llvmlistbot at llvm.org
Fri Aug 15 03:02:53 PDT 2025


https://github.com/jackalcooper updated https://github.com/llvm/llvm-project/pull/153524

>From 975a39e9d5f9a89a3bcbcdea87221500c9365eec Mon Sep 17 00:00:00 2001
From: jackalcooper <jackalcooper at gmail.com>
Date: Thu, 14 Aug 2025 08:22:51 +0800
Subject: [PATCH 01/12] port changes from last PR

---
 mlir/include/mlir-c/ExecutionEngine.h         |  7 +++
 .../mlir/ExecutionEngine/ExecutionEngine.h    |  9 +++
 .../Bindings/Python/ExecutionEngineModule.cpp | 13 ++++-
 .../CAPI/ExecutionEngine/ExecutionEngine.cpp  |  9 ++-
 mlir/lib/ExecutionEngine/ExecutionEngine.cpp  | 20 ++++---
 mlir/lib/ExecutionEngine/JitRunner.cpp        |  2 +
 mlir/test/CAPI/execution_engine.c             | 55 +++++++++++++++++++
 mlir/unittests/ExecutionEngine/Invoke.cpp     | 38 +++++++++++++
 8 files changed, 141 insertions(+), 12 deletions(-)

diff --git a/mlir/include/mlir-c/ExecutionEngine.h b/mlir/include/mlir-c/ExecutionEngine.h
index 99cddc5c2598d..1a58d68533f24 100644
--- a/mlir/include/mlir-c/ExecutionEngine.h
+++ b/mlir/include/mlir-c/ExecutionEngine.h
@@ -46,6 +46,13 @@ MLIR_CAPI_EXPORTED MlirExecutionEngine mlirExecutionEngineCreate(
     MlirModule op, int optLevel, int numPaths,
     const MlirStringRef *sharedLibPaths, bool enableObjectDump);
 
+/// Initialize the ExecutionEngine. Global constructors specified by
+/// `llvm.mlir.global_ctors` will be run. One common scenario is that kernel
+/// binary compiled from `gpu.module` gets loaded during initialization. Make
+/// sure all symbols are resolvable before initialization by calling
+/// `mlirExecutionEngineRegisterSymbol` or including shared libraries.
+MLIR_CAPI_EXPORTED void mlirExecutionEngineInitialize(MlirExecutionEngine jit);
+
 /// Destroy an ExecutionEngine instance.
 MLIR_CAPI_EXPORTED void mlirExecutionEngineDestroy(MlirExecutionEngine jit);
 
diff --git a/mlir/include/mlir/ExecutionEngine/ExecutionEngine.h b/mlir/include/mlir/ExecutionEngine/ExecutionEngine.h
index 96ccebcd5685e..5bd71d68d253a 100644
--- a/mlir/include/mlir/ExecutionEngine/ExecutionEngine.h
+++ b/mlir/include/mlir/ExecutionEngine/ExecutionEngine.h
@@ -227,6 +227,13 @@ class ExecutionEngine {
       llvm::function_ref<llvm::orc::SymbolMap(llvm::orc::MangleAndInterner)>
           symbolMap);
 
+  /// Initialize the ExecutionEngine. Global constructors specified by
+  /// `llvm.mlir.global_ctors` will be run. One common scenario is that kernel
+  /// binary compiled from `gpu.module` gets loaded during initialization. Make
+  /// sure all symbols are resolvable before initialization by calling
+  /// `registerSymbols` or including shared libraries.
+  void initialize();
+
 private:
   /// Ordering of llvmContext and jit is important for destruction purposes: the
   /// jit must be destroyed before the context.
@@ -250,6 +257,8 @@ class ExecutionEngine {
   /// Destroy functions in the libraries loaded by the ExecutionEngine that are
   /// called when this ExecutionEngine is destructed.
   SmallVector<LibraryDestroyFn> destroyFns;
+
+  bool isInitialized = false;
 };
 
 } // namespace mlir
diff --git a/mlir/lib/Bindings/Python/ExecutionEngineModule.cpp b/mlir/lib/Bindings/Python/ExecutionEngineModule.cpp
index 81dada3553622..4f7a4a628e246 100644
--- a/mlir/lib/Bindings/Python/ExecutionEngineModule.cpp
+++ b/mlir/lib/Bindings/Python/ExecutionEngineModule.cpp
@@ -7,8 +7,8 @@
 //===----------------------------------------------------------------------===//
 
 #include "mlir-c/ExecutionEngine.h"
-#include "mlir/Bindings/Python/NanobindAdaptors.h"
 #include "mlir/Bindings/Python/Nanobind.h"
+#include "mlir/Bindings/Python/NanobindAdaptors.h"
 
 namespace nb = nanobind;
 using namespace mlir;
@@ -124,6 +124,17 @@ NB_MODULE(_mlirExecutionEngine, m) {
           },
           nb::arg("name"), nb::arg("callback"),
           "Register `callback` as the runtime symbol `name`.")
+      .def(
+          "initialize",
+          [](PyExecutionEngine &executionEngine) {
+            mlirExecutionEngineInitialize(executionEngine.get());
+          },
+          "Initialize the ExecutionEngine. Global constructors specified by "
+          "`llvm.mlir.global_ctors` will be run. One common scenario is that "
+          "kernel binary compiled from `gpu.module` gets loaded during "
+          "initialization. Make sure all symbols are resolvable before "
+          "initialization by calling `raw_register_runtime` or including "
+          "shared libraries.")
       .def(
           "dump_to_object_file",
           [](PyExecutionEngine &executionEngine, const std::string &fileName) {
diff --git a/mlir/lib/CAPI/ExecutionEngine/ExecutionEngine.cpp b/mlir/lib/CAPI/ExecutionEngine/ExecutionEngine.cpp
index 306cebd236be9..2dbb993b1640f 100644
--- a/mlir/lib/CAPI/ExecutionEngine/ExecutionEngine.cpp
+++ b/mlir/lib/CAPI/ExecutionEngine/ExecutionEngine.cpp
@@ -68,6 +68,10 @@ mlirExecutionEngineCreate(MlirModule op, int optLevel, int numPaths,
   return wrap(jitOrError->release());
 }
 
+extern "C" void mlirExecutionEngineInitialize(MlirExecutionEngine jit) {
+  unwrap(jit)->initialize();
+}
+
 extern "C" void mlirExecutionEngineDestroy(MlirExecutionEngine jit) {
   delete (unwrap(jit));
 }
@@ -106,9 +110,8 @@ extern "C" void mlirExecutionEngineRegisterSymbol(MlirExecutionEngine jit,
                                                   void *sym) {
   unwrap(jit)->registerSymbols([&](llvm::orc::MangleAndInterner interner) {
     llvm::orc::SymbolMap symbolMap;
-    symbolMap[interner(unwrap(name))] =
-        { llvm::orc::ExecutorAddr::fromPtr(sym),
-          llvm::JITSymbolFlags::Exported };
+    symbolMap[interner(unwrap(name))] = {llvm::orc::ExecutorAddr::fromPtr(sym),
+                                         llvm::JITSymbolFlags::Exported};
     return symbolMap;
   });
 }
diff --git a/mlir/lib/ExecutionEngine/ExecutionEngine.cpp b/mlir/lib/ExecutionEngine/ExecutionEngine.cpp
index f704fbfbe8fff..52162a43aeae3 100644
--- a/mlir/lib/ExecutionEngine/ExecutionEngine.cpp
+++ b/mlir/lib/ExecutionEngine/ExecutionEngine.cpp
@@ -106,7 +106,7 @@ void ExecutionEngine::dumpToObjectFile(StringRef filename) {
   }
   // Compilation is lazy and it doesn't populate object cache unless requested.
   // In case object dump is requested before cache is populated, we need to
-  // force compilation manually. 
+  // force compilation manually.
   if (cache->isEmpty()) {
     for (std::string &functionName : functionNames) {
       auto result = lookupPacked(functionName);
@@ -400,13 +400,6 @@ ExecutionEngine::create(Operation *m, const ExecutionEngineOptions &options,
     return symbolMap;
   };
   engine->registerSymbols(runtimeSymbolMap);
-
-  // Execute the global constructors from the module being processed.
-  // TODO: Allow JIT initialize for AArch64. Currently there's a bug causing a
-  // crash for AArch64 see related issue #71963.
-  if (!engine->jit->getTargetTriple().isAArch64())
-    cantFail(engine->jit->initialize(engine->jit->getMainJITDylib()));
-
   return std::move(engine);
 }
 
@@ -442,6 +435,7 @@ Expected<void *> ExecutionEngine::lookup(StringRef name) const {
 
 Error ExecutionEngine::invokePacked(StringRef name,
                                     MutableArrayRef<void *> args) {
+  initialize();
   auto expectedFPtr = lookupPacked(name);
   if (!expectedFPtr)
     return expectedFPtr.takeError();
@@ -451,3 +445,13 @@ Error ExecutionEngine::invokePacked(StringRef name,
 
   return Error::success();
 }
+
+void ExecutionEngine::initialize() {
+  if (isInitialized)
+    return;
+  // TODO: Allow JIT initialize for AArch64. Currently there's a bug causing a
+  // crash for AArch64 see related issue #71963.
+  if (!jit->getTargetTriple().isAArch64())
+    cantFail(jit->initialize(jit->getMainJITDylib()));
+  isInitialized = true;
+}
diff --git a/mlir/lib/ExecutionEngine/JitRunner.cpp b/mlir/lib/ExecutionEngine/JitRunner.cpp
index 2107df37d1997..0ada4cc96570a 100644
--- a/mlir/lib/ExecutionEngine/JitRunner.cpp
+++ b/mlir/lib/ExecutionEngine/JitRunner.cpp
@@ -202,6 +202,8 @@ compileAndExecute(Options &options, Operation *module, StringRef entryPoint,
 
   auto engine = std::move(*expectedEngine);
 
+  engine->initialize();
+
   auto expectedFPtr = engine->lookupPacked(entryPoint);
   if (!expectedFPtr)
     return expectedFPtr.takeError();
diff --git a/mlir/test/CAPI/execution_engine.c b/mlir/test/CAPI/execution_engine.c
index 4751288c3ee4b..900588aeea2da 100644
--- a/mlir/test/CAPI/execution_engine.c
+++ b/mlir/test/CAPI/execution_engine.c
@@ -137,6 +137,60 @@ void testOmpCreation(void) {
   mlirContextDestroy(ctx);
 }
 
+// Helper variable to track callback invocations
+static int initCnt = 0;
+
+// Callback function that will be called during JIT initialization
+static void initCallback(void) { initCnt += 1; }
+
+// CHECK-LABEL: Running test 'testGlobalCtorJitCallback'
+void testGlobalCtorJitCallback(void) {
+  MlirContext ctx = mlirContextCreate();
+  registerAllUpstreamDialects(ctx);
+
+  // Create module with global constructor that calls our callback
+  MlirModule module = mlirModuleCreateParse(
+      ctx, mlirStringRefCreateFromCString(
+               // clang-format off
+"module {                                                                       \n"
+"  llvm.mlir.global_ctors ctors = [@ctor], priorities = [0 : i32], data = [#llvm.zero] \n"
+"  llvm.func @ctor() {                                                          \n"
+"    func.call @init_callback() : () -> ()                                      \n"
+"    llvm.return                                                                \n"
+"  }                                                                            \n"
+"  func.func private @init_callback() attributes { llvm.emit_c_interface }      \n"
+"}                                                                              \n"
+               // clang-format on
+               ));
+
+  lowerModuleToLLVM(ctx, module);
+  mlirRegisterAllLLVMTranslations(ctx);
+
+  // Create execution engine with initialization disabled
+  MlirExecutionEngine jit = mlirExecutionEngineCreate(
+      module, /*optLevel=*/2, /*numPaths=*/0, /*sharedLibPaths=*/NULL,
+      /*enableObjectDump=*/false);
+
+  if (mlirExecutionEngineIsNull(jit)) {
+    fprintf(stderr, "Execution engine creation failed");
+    exit(2);
+  }
+
+  // Register callback symbol before initialization
+  mlirExecutionEngineRegisterSymbol(
+      jit, mlirStringRefCreateFromCString("_mlir_ciface_init_callback"),
+      (void *)(uintptr_t)initCallback);
+
+  mlirExecutionEngineInitialize(jit);
+
+  // CHECK: Init count: 1
+  printf("Init count: %d\n", initCnt);
+
+  mlirExecutionEngineDestroy(jit);
+  mlirModuleDestroy(module);
+  mlirContextDestroy(ctx);
+}
+
 int main(void) {
 
 #define _STRINGIFY(x) #x
@@ -147,5 +201,6 @@ int main(void) {
 
   TEST(testSimpleExecution);
   TEST(testOmpCreation);
+  TEST(testGlobalCtorJitCallback);
   return 0;
 }
diff --git a/mlir/unittests/ExecutionEngine/Invoke.cpp b/mlir/unittests/ExecutionEngine/Invoke.cpp
index 312b10f28143f..712d4c49a2afe 100644
--- a/mlir/unittests/ExecutionEngine/Invoke.cpp
+++ b/mlir/unittests/ExecutionEngine/Invoke.cpp
@@ -322,4 +322,42 @@ TEST(NativeMemRefJit, MAYBE_JITCallback) {
     ASSERT_EQ(elt, coefficient * count++);
 }
 
+static int initCnt = 0;
+// A helper function that will be called during the JIT's initialization.
+static void initCallback() { initCnt += 1; }
+
+TEST(GlobalCtorJit, MAYBE_JITCallback) {
+  std::string moduleStr = R"mlir(
+  llvm.mlir.global_ctors ctors = [@ctor], priorities = [0 : i32], data = [#llvm.zero]
+  llvm.func @ctor() {
+    func.call @init_callback() : () -> ()
+    llvm.return
+  }
+  func.func private @init_callback() attributes { llvm.emit_c_interface }
+  )mlir";
+
+  DialectRegistry registry;
+  registerAllDialects(registry);
+  registerBuiltinDialectTranslation(registry);
+  registerLLVMDialectTranslation(registry);
+  MLIRContext context(registry);
+  auto module = parseSourceString<ModuleOp>(moduleStr, &context);
+  ASSERT_TRUE(!!module);
+  ASSERT_TRUE(succeeded(lowerToLLVMDialect(*module)));
+  ExecutionEngineOptions jitOptions;
+  auto jitOrError = ExecutionEngine::create(*module, jitOptions);
+  ASSERT_TRUE(!!jitOrError);
+  auto jit = std::move(jitOrError.get());
+  // Define any extra symbols so they're available at initialization.
+  jit->registerSymbols([&](llvm::orc::MangleAndInterner interner) {
+    llvm::orc::SymbolMap symbolMap;
+    symbolMap[interner("_mlir_ciface_init_callback")] = {
+        llvm::orc::ExecutorAddr::fromPtr(initCallback),
+        llvm::JITSymbolFlags::Exported};
+    return symbolMap;
+  });
+  jit->initialize();
+  ASSERT_EQ(initCnt, 1);
+}
+
 #endif // _WIN32

>From 793e83a840d5055fb87cc0c7dd12a393e48b6628 Mon Sep 17 00:00:00 2001
From: jackalcooper <jackalcooper at gmail.com>
Date: Thu, 14 Aug 2025 08:42:40 +0800
Subject: [PATCH 02/12] update GPU docs to reflect latest impl

---
 mlir/docs/Dialects/GPU.md | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/mlir/docs/Dialects/GPU.md b/mlir/docs/Dialects/GPU.md
index 94b053daa1615..3578f430ec486 100644
--- a/mlir/docs/Dialects/GPU.md
+++ b/mlir/docs/Dialects/GPU.md
@@ -193,10 +193,18 @@ llvm.func @foo() {
 // mlir-translate --mlir-to-llvmir:
 @binary_bin_cst = internal constant [6 x i8] c"AMDGPU", align 8
 @binary_func_kernel_name = private unnamed_addr constant [7 x i8] c"func\00", align 1
+ at module = internal global ptr null
+ at llvm.global_ctors = appending global [1 x {i32, ptr, ptr}] [{i32 123, ptr @binary_load, ptr null}]
+ at llvm.global_dtors = appending global [1 x {i32, ptr, ptr}] [{i32 123, ptr @binary_unload, ptr null}]
+define internal void @binary_load() section ".text.startup" {
+entry:
+  %0 = call ptr @mgpuModuleLoad(ptr @binary_bin_cst)
+  store ptr %0, ptr @module
+  ...
+}
 ...
 define void @foo() {
   ...
-  %module = call ptr @mgpuModuleLoad(ptr @binary_bin_cst)
   %kernel = call ptr @mgpuModuleGetFunction(ptr %module, ptr @binary_func_kernel_name)
   call void @mgpuLaunchKernel(ptr %kernel, ...) ; Launch the kernel
   ...

>From e948795c900104a28e8cb51c98a45a8fbe69f622 Mon Sep 17 00:00:00 2001
From: jackalcooper <jackalcooper at gmail.com>
Date: Thu, 14 Aug 2025 08:47:08 +0800
Subject: [PATCH 03/12] add module symbol->ptr load

---
 mlir/docs/Dialects/GPU.md | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/mlir/docs/Dialects/GPU.md b/mlir/docs/Dialects/GPU.md
index 3578f430ec486..a06183ad39dce 100644
--- a/mlir/docs/Dialects/GPU.md
+++ b/mlir/docs/Dialects/GPU.md
@@ -193,7 +193,7 @@ llvm.func @foo() {
 // mlir-translate --mlir-to-llvmir:
 @binary_bin_cst = internal constant [6 x i8] c"AMDGPU", align 8
 @binary_func_kernel_name = private unnamed_addr constant [7 x i8] c"func\00", align 1
- at module = internal global ptr null
+ at binary_@module = internal global ptr null
 @llvm.global_ctors = appending global [1 x {i32, ptr, ptr}] [{i32 123, ptr @binary_load, ptr null}]
 @llvm.global_dtors = appending global [1 x {i32, ptr, ptr}] [{i32 123, ptr @binary_unload, ptr null}]
 define internal void @binary_load() section ".text.startup" {
@@ -205,6 +205,7 @@ entry:
 ...
 define void @foo() {
   ...
+  %module = load ptr, ptr @binary_module, align 8
   %kernel = call ptr @mgpuModuleGetFunction(ptr %module, ptr @binary_func_kernel_name)
   call void @mgpuLaunchKernel(ptr %kernel, ...) ; Launch the kernel
   ...

>From fe4d555d7b6bdc8c379416aa3ca2ab26b89b568c Mon Sep 17 00:00:00 2001
From: jackalcooper <jackalcooper at gmail.com>
Date: Thu, 14 Aug 2025 09:11:10 +0800
Subject: [PATCH 04/12] __has_feature(address_sanitizer)

---
 mlir/unittests/ExecutionEngine/Invoke.cpp | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/mlir/unittests/ExecutionEngine/Invoke.cpp b/mlir/unittests/ExecutionEngine/Invoke.cpp
index 712d4c49a2afe..566c587957992 100644
--- a/mlir/unittests/ExecutionEngine/Invoke.cpp
+++ b/mlir/unittests/ExecutionEngine/Invoke.cpp
@@ -326,6 +326,11 @@ static int initCnt = 0;
 // A helper function that will be called during the JIT's initialization.
 static void initCallback() { initCnt += 1; }
 
+#if __has_feature(memory_sanitizer) || __has_feature(address_sanitizer)
+#define MAYBE_JITCallback DISABLED_JITCallback
+#else
+#define MAYBE_JITCallback SKIP_WITHOUT_JIT(JITCallback)
+#endif
 TEST(GlobalCtorJit, MAYBE_JITCallback) {
   std::string moduleStr = R"mlir(
   llvm.mlir.global_ctors ctors = [@ctor], priorities = [0 : i32], data = [#llvm.zero]

>From 0a55a9b4070e2eaa039272587daae1ed0390265b Mon Sep 17 00:00:00 2001
From: jackalcooper <jackalcooper at gmail.com>
Date: Thu, 14 Aug 2025 09:25:55 +0800
Subject: [PATCH 05/12] remove test in C

---
 mlir/test/CAPI/execution_engine.c | 55 -------------------------------
 1 file changed, 55 deletions(-)

diff --git a/mlir/test/CAPI/execution_engine.c b/mlir/test/CAPI/execution_engine.c
index 900588aeea2da..4751288c3ee4b 100644
--- a/mlir/test/CAPI/execution_engine.c
+++ b/mlir/test/CAPI/execution_engine.c
@@ -137,60 +137,6 @@ void testOmpCreation(void) {
   mlirContextDestroy(ctx);
 }
 
-// Helper variable to track callback invocations
-static int initCnt = 0;
-
-// Callback function that will be called during JIT initialization
-static void initCallback(void) { initCnt += 1; }
-
-// CHECK-LABEL: Running test 'testGlobalCtorJitCallback'
-void testGlobalCtorJitCallback(void) {
-  MlirContext ctx = mlirContextCreate();
-  registerAllUpstreamDialects(ctx);
-
-  // Create module with global constructor that calls our callback
-  MlirModule module = mlirModuleCreateParse(
-      ctx, mlirStringRefCreateFromCString(
-               // clang-format off
-"module {                                                                       \n"
-"  llvm.mlir.global_ctors ctors = [@ctor], priorities = [0 : i32], data = [#llvm.zero] \n"
-"  llvm.func @ctor() {                                                          \n"
-"    func.call @init_callback() : () -> ()                                      \n"
-"    llvm.return                                                                \n"
-"  }                                                                            \n"
-"  func.func private @init_callback() attributes { llvm.emit_c_interface }      \n"
-"}                                                                              \n"
-               // clang-format on
-               ));
-
-  lowerModuleToLLVM(ctx, module);
-  mlirRegisterAllLLVMTranslations(ctx);
-
-  // Create execution engine with initialization disabled
-  MlirExecutionEngine jit = mlirExecutionEngineCreate(
-      module, /*optLevel=*/2, /*numPaths=*/0, /*sharedLibPaths=*/NULL,
-      /*enableObjectDump=*/false);
-
-  if (mlirExecutionEngineIsNull(jit)) {
-    fprintf(stderr, "Execution engine creation failed");
-    exit(2);
-  }
-
-  // Register callback symbol before initialization
-  mlirExecutionEngineRegisterSymbol(
-      jit, mlirStringRefCreateFromCString("_mlir_ciface_init_callback"),
-      (void *)(uintptr_t)initCallback);
-
-  mlirExecutionEngineInitialize(jit);
-
-  // CHECK: Init count: 1
-  printf("Init count: %d\n", initCnt);
-
-  mlirExecutionEngineDestroy(jit);
-  mlirModuleDestroy(module);
-  mlirContextDestroy(ctx);
-}
-
 int main(void) {
 
 #define _STRINGIFY(x) #x
@@ -201,6 +147,5 @@ int main(void) {
 
   TEST(testSimpleExecution);
   TEST(testOmpCreation);
-  TEST(testGlobalCtorJitCallback);
   return 0;
 }

>From 637e13faeb9ca32d1072ccd8acace35d398296fe Mon Sep 17 00:00:00 2001
From: jackalcooper <jackalcooper at gmail.com>
Date: Thu, 14 Aug 2025 09:32:29 +0800
Subject: [PATCH 06/12] better test naming

---
 mlir/unittests/ExecutionEngine/Invoke.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/mlir/unittests/ExecutionEngine/Invoke.cpp b/mlir/unittests/ExecutionEngine/Invoke.cpp
index 566c587957992..63ff6f44ed8a9 100644
--- a/mlir/unittests/ExecutionEngine/Invoke.cpp
+++ b/mlir/unittests/ExecutionEngine/Invoke.cpp
@@ -327,11 +327,11 @@ static int initCnt = 0;
 static void initCallback() { initCnt += 1; }
 
 #if __has_feature(memory_sanitizer) || __has_feature(address_sanitizer)
-#define MAYBE_JITCallback DISABLED_JITCallback
+#define MAYBE_JITCallbackInGlobalCtor DISABLED_JITCallbackInGlobalCtor
 #else
-#define MAYBE_JITCallback SKIP_WITHOUT_JIT(JITCallback)
+#define MAYBE_JITCallbackInGlobalCtor SKIP_WITHOUT_JIT(JITCallbackInGlobalCtor)
 #endif
-TEST(GlobalCtorJit, MAYBE_JITCallback) {
+TEST(MLIRExecutionEngine, MAYBE_JITCallbackInGlobalCtor) {
   std::string moduleStr = R"mlir(
   llvm.mlir.global_ctors ctors = [@ctor], priorities = [0 : i32], data = [#llvm.zero]
   llvm.func @ctor() {

>From 25db89b31c4d5abb924d4db4c1d8e3b8e1a7a858 Mon Sep 17 00:00:00 2001
From: jackalcooper <jackalcooper at gmail.com>
Date: Thu, 14 Aug 2025 10:09:45 +0800
Subject: [PATCH 07/12] skip asserts for AArch64

---
 mlir/unittests/ExecutionEngine/Invoke.cpp | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/mlir/unittests/ExecutionEngine/Invoke.cpp b/mlir/unittests/ExecutionEngine/Invoke.cpp
index 63ff6f44ed8a9..8d5f99ec71c13 100644
--- a/mlir/unittests/ExecutionEngine/Invoke.cpp
+++ b/mlir/unittests/ExecutionEngine/Invoke.cpp
@@ -352,6 +352,8 @@ TEST(MLIRExecutionEngine, MAYBE_JITCallbackInGlobalCtor) {
   ExecutionEngineOptions jitOptions;
   auto jitOrError = ExecutionEngine::create(*module, jitOptions);
   ASSERT_TRUE(!!jitOrError);
+  // validate initialization is not run on construction
+  ASSERT_EQ(initCnt, 0);
   auto jit = std::move(jitOrError.get());
   // Define any extra symbols so they're available at initialization.
   jit->registerSymbols([&](llvm::orc::MangleAndInterner interner) {
@@ -362,7 +364,17 @@ TEST(MLIRExecutionEngine, MAYBE_JITCallbackInGlobalCtor) {
     return symbolMap;
   });
   jit->initialize();
-  ASSERT_EQ(initCnt, 1);
+  // TODO: Allow JIT initialize for AArch64. Currently there's a bug causing a
+  // crash for AArch64 see related issue #71963.
+  auto tmBuilderOrError = llvm::orc::JITTargetMachineBuilder::detectHost();
+  ASSERT_TRUE(!!tmBuilderOrError);
+  if (!tmBuilderOrError->getTargetTriple().isAArch64()) {
+    // validate the side effect of initialization
+    ASSERT_EQ(initCnt, 1);
+    // next initialization should be noop
+    jit->initialize();
+    ASSERT_EQ(initCnt, 1);
+  }
 }
 
 #endif // _WIN32

>From 413551072e782e9f3157482f381fc37f359054ca Mon Sep 17 00:00:00 2001
From: jackalcooper <jackalcooper at gmail.com>
Date: Thu, 14 Aug 2025 10:25:44 +0800
Subject: [PATCH 08/12] add more sanitizers to skip

---
 mlir/unittests/ExecutionEngine/Invoke.cpp | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/mlir/unittests/ExecutionEngine/Invoke.cpp b/mlir/unittests/ExecutionEngine/Invoke.cpp
index 8d5f99ec71c13..f827ec9b77066 100644
--- a/mlir/unittests/ExecutionEngine/Invoke.cpp
+++ b/mlir/unittests/ExecutionEngine/Invoke.cpp
@@ -326,7 +326,9 @@ static int initCnt = 0;
 // A helper function that will be called during the JIT's initialization.
 static void initCallback() { initCnt += 1; }
 
-#if __has_feature(memory_sanitizer) || __has_feature(address_sanitizer)
+#if __has_feature(memory_sanitizer) || __has_feature(address_sanitizer) ||     \
+    __has_feature(hwaddress_sanitizer) ||                                      \
+    __has_feature(undefined_behavior_sanitizer)
 #define MAYBE_JITCallbackInGlobalCtor DISABLED_JITCallbackInGlobalCtor
 #else
 #define MAYBE_JITCallbackInGlobalCtor SKIP_WITHOUT_JIT(JITCallbackInGlobalCtor)

>From 03a62ac8b779726dbc397ea81ccc848957c54710 Mon Sep 17 00:00:00 2001
From: jackalcooper <jackalcooper at gmail.com>
Date: Fri, 15 Aug 2025 17:19:23 +0800
Subject: [PATCH 09/12] fix typo

---
 mlir/docs/Dialects/GPU.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mlir/docs/Dialects/GPU.md b/mlir/docs/Dialects/GPU.md
index a06183ad39dce..c74c8cb3f036e 100644
--- a/mlir/docs/Dialects/GPU.md
+++ b/mlir/docs/Dialects/GPU.md
@@ -193,7 +193,7 @@ llvm.func @foo() {
 // mlir-translate --mlir-to-llvmir:
 @binary_bin_cst = internal constant [6 x i8] c"AMDGPU", align 8
 @binary_func_kernel_name = private unnamed_addr constant [7 x i8] c"func\00", align 1
- at binary_@module = internal global ptr null
+ at binary_module = internal global ptr null
 @llvm.global_ctors = appending global [1 x {i32, ptr, ptr}] [{i32 123, ptr @binary_load, ptr null}]
 @llvm.global_dtors = appending global [1 x {i32, ptr, ptr}] [{i32 123, ptr @binary_unload, ptr null}]
 define internal void @binary_load() section ".text.startup" {

>From b5a4244d598ee6e1ac4ad6b45a410f156d35a804 Mon Sep 17 00:00:00 2001
From: jackalcooper <jackalcooper at gmail.com>
Date: Fri, 15 Aug 2025 17:43:15 +0800
Subject: [PATCH 10/12] py version test

---
 .../Bindings/Python/ExecutionEngineModule.cpp |  2 +-
 .../mlir/_mlir_libs/_mlirExecutionEngine.pyi  |  1 +
 mlir/test/python/execution_engine.py          | 37 +++++++++++++++++++
 3 files changed, 39 insertions(+), 1 deletion(-)

diff --git a/mlir/lib/Bindings/Python/ExecutionEngineModule.cpp b/mlir/lib/Bindings/Python/ExecutionEngineModule.cpp
index 4f7a4a628e246..4885d62c56e6e 100644
--- a/mlir/lib/Bindings/Python/ExecutionEngineModule.cpp
+++ b/mlir/lib/Bindings/Python/ExecutionEngineModule.cpp
@@ -133,7 +133,7 @@ NB_MODULE(_mlirExecutionEngine, m) {
           "`llvm.mlir.global_ctors` will be run. One common scenario is that "
           "kernel binary compiled from `gpu.module` gets loaded during "
           "initialization. Make sure all symbols are resolvable before "
-          "initialization by calling `raw_register_runtime` or including "
+          "initialization by calling `register_runtime` or including "
           "shared libraries.")
       .def(
           "dump_to_object_file",
diff --git a/mlir/python/mlir/_mlir_libs/_mlirExecutionEngine.pyi b/mlir/python/mlir/_mlir_libs/_mlirExecutionEngine.pyi
index 58d453d2b2d37..4b82c78489295 100644
--- a/mlir/python/mlir/_mlir_libs/_mlirExecutionEngine.pyi
+++ b/mlir/python/mlir/_mlir_libs/_mlirExecutionEngine.pyi
@@ -19,5 +19,6 @@ class ExecutionEngine:
     def dump_to_object_file(self, file_name: str) -> None: ...
     def raw_lookup(self, func_name: str) -> int: ...
     def raw_register_runtime(self, name: str, callback: object) -> None: ...
+    def init() -> None: ...
     @property
     def _CAPIPtr(self) -> object: ...
diff --git a/mlir/test/python/execution_engine.py b/mlir/test/python/execution_engine.py
index d569fcef32bfd..6e8a422b12d29 100644
--- a/mlir/test/python/execution_engine.py
+++ b/mlir/test/python/execution_engine.py
@@ -339,6 +339,43 @@ def callback(a):
 
 run(testUnrankedMemRefWithOffsetCallback)
 
+# Test JIT callback in global constructor
+# CHECK-LABEL: TEST: testJITCallbackInGlobalCtor
+def testJITCallbackInGlobalCtor():
+    init_cnt = 0
+
+    @ctypes.CFUNCTYPE(None)
+    def initCallback():
+        nonlocal init_cnt
+        init_cnt += 1
+
+    with Context():
+        module = Module.parse(r"""
+llvm.mlir.global_ctors ctors = [@ctor], priorities = [0 : i32], data = [#llvm.zero]
+llvm.func @ctor() {
+  func.call @init_callback() : () -> ()
+  llvm.return
+}
+func.func private @init_callback() attributes { llvm.emit_c_interface }
+        """)
+
+        # Setup execution engine
+        execution_engine = ExecutionEngine(lowerToLLVM(module))
+
+        # Validate initialization hasn't run yet
+        assert init_cnt == 0
+
+        # # Register callback
+        execution_engine.register_runtime("init_callback", initCallback)
+
+        # # Initialize and verify
+        execution_engine.initialize()
+        assert init_cnt == 1
+        # # Second initialization should be no-op
+        execution_engine.initialize()
+        assert init_cnt == 1
+
+run(testJITCallbackInGlobalCtor)
 
 #  Test addition of two memrefs.
 # CHECK-LABEL: TEST: testMemrefAdd

>From acf71c4d3f0caccddf371bbc97f225c4466ef34c Mon Sep 17 00:00:00 2001
From: jackalcooper <jackalcooper at gmail.com>
Date: Fri, 15 Aug 2025 17:48:31 +0800
Subject: [PATCH 11/12] doc @binary_unload()

---
 mlir/docs/Dialects/GPU.md | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/mlir/docs/Dialects/GPU.md b/mlir/docs/Dialects/GPU.md
index c74c8cb3f036e..8d4d2ca3e5743 100644
--- a/mlir/docs/Dialects/GPU.md
+++ b/mlir/docs/Dialects/GPU.md
@@ -199,7 +199,13 @@ llvm.func @foo() {
 define internal void @binary_load() section ".text.startup" {
 entry:
   %0 = call ptr @mgpuModuleLoad(ptr @binary_bin_cst)
-  store ptr %0, ptr @module
+  store ptr %0, ptr @binary_module
+  ...
+}
+define internal void @binary_unload() section ".text.startup" {
+entry:
+  %0 = load ptr, ptr @binary_module, align 8
+  call void @mgpuModuleUnload(ptr %0)
   ...
 }
 ...

>From 5dd9b203dfac58d1c388673c910e5b8defa869af Mon Sep 17 00:00:00 2001
From: jackalcooper <jackalcooper at gmail.com>
Date: Fri, 15 Aug 2025 18:02:34 +0800
Subject: [PATCH 12/12] fmt

---
 mlir/test/python/execution_engine.py | 23 ++++++++++++-----------
 1 file changed, 12 insertions(+), 11 deletions(-)

diff --git a/mlir/test/python/execution_engine.py b/mlir/test/python/execution_engine.py
index 6e8a422b12d29..e67863f5b3694 100644
--- a/mlir/test/python/execution_engine.py
+++ b/mlir/test/python/execution_engine.py
@@ -382,17 +382,18 @@ def initCallback():
 def testMemrefAdd():
     with Context():
         module = Module.parse(
-            """
-    module  {
-      func.func @main(%arg0: memref<1xf32>, %arg1: memref<f32>, %arg2: memref<1xf32>) attributes { llvm.emit_c_interface } {
-        %0 = arith.constant 0 : index
-        %1 = memref.load %arg0[%0] : memref<1xf32>
-        %2 = memref.load %arg1[] : memref<f32>
-        %3 = arith.addf %1, %2 : f32
-        memref.store %3, %arg2[%0] : memref<1xf32>
-        return
-      }
-    } """
+            r"""
+module  {
+    func.func @main(%arg0: memref<1xf32>, %arg1: memref<f32>, %arg2: memref<1xf32>) attributes { llvm.emit_c_interface } {
+    %0 = arith.constant 0 : index
+    %1 = memref.load %arg0[%0] : memref<1xf32>
+    %2 = memref.load %arg1[] : memref<f32>
+    %3 = arith.addf %1, %2 : f32
+    memref.store %3, %arg2[%0] : memref<1xf32>
+    return
+    }
+}
+        """
         )
         arg1 = np.array([32.5]).astype(np.float32)
         arg2 = np.array(6).astype(np.float32)



More information about the Mlir-commits mailing list