[Mlir-commits] [mlir] [NVGPU] Fix nvdsl examples (PR #156830)

Thu Oct 2 02:33:14 PDT 2025

https://github.com/castigli updated https://github.com/llvm/llvm-project/pull/156830

>From d0ae04d915eca2533d7719838e73d8f8e3ebd4b3 Mon Sep 17 00:00:00 2001
From: Giacomo Castiglioni <giacastiglioni at gmail.com>
Date: Tue, 2 Sep 2025 14:56:12 +0200
Subject: [PATCH] fix nvdsl

---
 mlir/test/Examples/NVGPU/Ch5.py                 | 2 +-
 mlir/test/Examples/NVGPU/tools/nvdsl.py         | 7 +++----
 mlir/test/Examples/NVGPU/tools/nvgpucompiler.py | 4 +++-
 3 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/mlir/test/Examples/NVGPU/Ch5.py b/mlir/test/Examples/NVGPU/Ch5.py
index f98cfd758a75f..91c346c837dda 100644
--- a/mlir/test/Examples/NVGPU/Ch5.py
+++ b/mlir/test/Examples/NVGPU/Ch5.py
@@ -156,7 +156,7 @@ def producer_loop(
 ):
     phase = const(True, ty=T.bool())
 
-    for iv, phase in scf.for_(0, (K // TILE_K), 1, [phase]):
+    for iv, phase, _ in scf.for_(0, (K // TILE_K), 1, [phase]):
         stage = iv % num_stages
         # Wait MMA to be done
         mbar_mma[stage].try_wait(phase)
diff --git a/mlir/test/Examples/NVGPU/tools/nvdsl.py b/mlir/test/Examples/NVGPU/tools/nvdsl.py
index 90dbb2355e1c8..d4c50fc9bc28d 100644
--- a/mlir/test/Examples/NVGPU/tools/nvdsl.py
+++ b/mlir/test/Examples/NVGPU/tools/nvdsl.py
@@ -84,8 +84,7 @@ def arrive(self, txcount: int = 0, predicate=None):
                 self.mbar_group_op, txcount_op, self.id_op, predicate=predicate
             )
         else:
-            nvgpu.mbarrier_arrive(
-                ir.Type.parse("!nvgpu.mbarrier.token"), self.mbar_group_op, self.id_op
+            nvgpu.mbarrier_arrive(self.mbar_group_op, self.id_op
             )
 
     def try_wait(self, phase: bool = False, ticks: int = 10000000):
@@ -144,7 +143,7 @@ def create_descriptor(self, device_ptr):
             device_ptr,
         )
         self.tma_descriptor = nvgpu.TmaCreateDescriptorOp(
-            tma_descriptor_ty, device_unranked_memref, map(const, self.tma_box_shape)
+            tma_descriptor_ty, device_unranked_memref, list(map(const, self.tma_box_shape))
         )
         return self.tma_descriptor.result
 
@@ -156,7 +155,7 @@ def load(self, dest, mbarrier: Mbarriers, coords=[0], predicate=None):
             dest,
             mbarrier.mbar_group_op,
             self.tma_descriptor,
-            coordinates=map(const, coords),
+            coordinates=list(map(const, coords)),
             mbarId=mbarrier.id_op,
             predicate=predicate,
         )
diff --git a/mlir/test/Examples/NVGPU/tools/nvgpucompiler.py b/mlir/test/Examples/NVGPU/tools/nvgpucompiler.py
index 1c9cc74fcd169..4b661f8df6a9f 100644
--- a/mlir/test/Examples/NVGPU/tools/nvgpucompiler.py
+++ b/mlir/test/Examples/NVGPU/tools/nvgpucompiler.py
@@ -35,9 +35,11 @@ def compile(self, module: ir.Module):
 
     def jit(self, module: ir.Module) -> execution_engine.ExecutionEngine:
         """Wraps the module in a JIT execution engine."""
-        return execution_engine.ExecutionEngine(
+        ee = execution_engine.ExecutionEngine(
             module, opt_level=self.opt_level, shared_libs=self.shared_libs
         )
+        ee.initialize()
+        return ee
 
     def compile_and_jit(self, module: ir.Module) -> execution_engine.ExecutionEngine:
         """Compiles and jits the module."""