[Mlir-commits] [mlir] [mlir][python] Add bindings for OpenACC dialect (PR #163620)

Razvan Lupusoru llvmlistbot at llvm.org
Wed Oct 15 13:50:19 PDT 2025


================
@@ -0,0 +1,138 @@
+# RUN: python %s | FileCheck %s
+from mlir.ir import (
+    Context,
+    FunctionType,
+    Location,
+    Module,
+    InsertionPoint,
+    IntegerType,
+    IndexType,
+    MemRefType,
+    F32Type,
+    Block,
+    ArrayAttr,
+    Attribute,
+    UnitAttr,
+    StringAttr,
+    DenseI32ArrayAttr,
+    ShapedType,
+)
+from mlir.dialects import openacc, func, arith, memref
+
+
+def run(f):
+    print("\n// TEST:", f.__name__)
+    with Context(), Location.unknown():
+        f()
+    return f
+
+
+ at run
+def testManualReconstructedKernel():
+    module = Module.create()
+
+    # Add required module attributes
+    module.operation.attributes["dlti.dl_spec"] = Attribute.parse("#dlti.dl_spec<>")
+    module.operation.attributes["gpu.container_module"] = UnitAttr.get()
+
+    i32 = IntegerType.get_signless(32)
+    i64 = IntegerType.get_signless(64)
+    f32 = F32Type.get()
+    dynamic = ShapedType.get_dynamic_size()
+    memref_f32_1d_any = MemRefType.get([dynamic], f32)
+
+    with InsertionPoint(module.body):
+        function_type = FunctionType.get(
+            [memref_f32_1d_any, memref_f32_1d_any, i64], []
+        )
+        f = func.FuncOp(
+            type=function_type,
+            name="memcpy_idiom",
+        )
+        f.attributes["sym_visibility"] = StringAttr.get("public")
+
+    with InsertionPoint(f.add_entry_block()):
+        c1024 = arith.ConstantOp(i32, 1024)
+        c128 = arith.ConstantOp(i32, 128)
+
+        parallel_op = openacc.ParallelOp(
+            asyncOperands=[],
+            waitOperands=[],
+            numGangs=[c1024],
+            numWorkers=[],
+            vectorLength=[c128],
+            reductionOperands=[],
+            privateOperands=[],
+            firstprivateOperands=[],
+            dataClauseOperands=[],
+        )
+
+        # Set required device_type and segment attributes to satisfy verifier
+        acc_device_none = ArrayAttr.get([Attribute.parse("#acc.device_type<none>")])
+        parallel_op.numGangsDeviceType = acc_device_none
+        parallel_op.numGangsSegments = DenseI32ArrayAttr.get([1])
+        parallel_op.vectorLengthDeviceType = acc_device_none
+
+        parallel_block = Block.create_at_start(parent=parallel_op.region, arg_types=[])
+
+        with InsertionPoint(parallel_block):
+            c0 = arith.ConstantOp(i64, 0)
+            c1 = arith.ConstantOp(i64, 1)
+
+            loop_op = openacc.LoopOp(
+                results_=[],
+                lowerbound=[c0],
+                upperbound=[f.arguments[2]],
+                step=[c1],
+                gangOperands=[],
+                workerNumOperands=[],
+                vectorOperands=[],
+                tileOperands=[],
+                cacheOperands=[],
+                privateOperands=[],
+                reductionOperands=[],
+                firstprivateOperands=[],
+            )
+
+            # Set loop attributes: gang and independent on device_type<none>
+            acc_device_none = ArrayAttr.get([Attribute.parse("#acc.device_type<none>")])
+            loop_op.gang = acc_device_none
+            loop_op.independent = acc_device_none
+
+            loop_block = Block.create_at_start(parent=loop_op.region, arg_types=[i64])
+
+            with InsertionPoint(loop_block):
+                idx0 = arith.index_cast(
+                    out=IndexType.get(), in_=loop_block.arguments[0]
+                )
+                val = memref.load(memref=f.arguments[1], indices=[idx0])
+                idx1 = arith.index_cast(
+                    out=IndexType.get(), in_=loop_block.arguments[0]
+                )
+                memref.store(value=val, memref=f.arguments[0], indices=[idx1])
+                openacc.YieldOp([])
+
+            openacc.YieldOp([])
+
+        func.ReturnOp([])
+
+    print(module)
+
+    # CHECK-LABEL:   func.func public @memcpy_idiom
----------------
razvanlupusoru wrote:

To make the example even more realistic, I would suggest using acc.copyin for input, acc.create for output, and acc.copyout for updating the host copy after acc.parallel. This would make your example even more thorough in terms of ops coverage.

https://github.com/llvm/llvm-project/pull/163620


More information about the Mlir-commits mailing list