[Mlir-commits] [mlir] [mlir][python] Add bindings for OpenACC dialect (PR #163620)

Wed Oct 15 16:05:47 PDT 2025

================
@@ -0,0 +1,138 @@
+# RUN: python %s | FileCheck %s
+from mlir.ir import (
+    Context,
+    FunctionType,
+    Location,
+    Module,
+    InsertionPoint,
+    IntegerType,
+    IndexType,
+    MemRefType,
+    F32Type,
+    Block,
+    ArrayAttr,
+    Attribute,
+    UnitAttr,
+    StringAttr,
+    DenseI32ArrayAttr,
+    ShapedType,
+)
+from mlir.dialects import openacc, func, arith, memref
+
+
+def run(f):
+    print("\n// TEST:", f.__name__)
+    with Context(), Location.unknown():
+        f()
+    return f
+
+
+ at run
+def testManualReconstructedKernel():
+    module = Module.create()
+
+    # Add required module attributes
+    module.operation.attributes["dlti.dl_spec"] = Attribute.parse("#dlti.dl_spec<>")
+    module.operation.attributes["gpu.container_module"] = UnitAttr.get()
+
+    i32 = IntegerType.get_signless(32)
+    i64 = IntegerType.get_signless(64)
+    f32 = F32Type.get()
+    dynamic = ShapedType.get_dynamic_size()
+    memref_f32_1d_any = MemRefType.get([dynamic], f32)
+
+    with InsertionPoint(module.body):
+        function_type = FunctionType.get(
+            [memref_f32_1d_any, memref_f32_1d_any, i64], []
+        )
+        f = func.FuncOp(
+            type=function_type,
+            name="memcpy_idiom",
+        )
+        f.attributes["sym_visibility"] = StringAttr.get("public")
+
+    with InsertionPoint(f.add_entry_block()):
+        c1024 = arith.ConstantOp(i32, 1024)
+        c128 = arith.ConstantOp(i32, 128)
+
+        parallel_op = openacc.ParallelOp(
+            asyncOperands=[],
+            waitOperands=[],
+            numGangs=[c1024],
+            numWorkers=[],
+            vectorLength=[c128],
+            reductionOperands=[],
+            privateOperands=[],
+            firstprivateOperands=[],
+            dataClauseOperands=[],
+        )
+
+        # Set required device_type and segment attributes to satisfy verifier
+        acc_device_none = ArrayAttr.get([Attribute.parse("#acc.device_type<none>")])
+        parallel_op.numGangsDeviceType = acc_device_none
+        parallel_op.numGangsSegments = DenseI32ArrayAttr.get([1])
+        parallel_op.vectorLengthDeviceType = acc_device_none
+
+        parallel_block = Block.create_at_start(parent=parallel_op.region, arg_types=[])
+
+        with InsertionPoint(parallel_block):
+            c0 = arith.ConstantOp(i64, 0)
+            c1 = arith.ConstantOp(i64, 1)
+
+            loop_op = openacc.LoopOp(
+                results_=[],
+                lowerbound=[c0],
+                upperbound=[f.arguments[2]],
+                step=[c1],
+                gangOperands=[],
+                workerNumOperands=[],
+                vectorOperands=[],
+                tileOperands=[],
+                cacheOperands=[],
+                privateOperands=[],
+                reductionOperands=[],
+                firstprivateOperands=[],
+            )
+
+            # Set loop attributes: gang and independent on device_type<none>
+            acc_device_none = ArrayAttr.get([Attribute.parse("#acc.device_type<none>")])
+            loop_op.gang = acc_device_none
+            loop_op.independent = acc_device_none
+
+            loop_block = Block.create_at_start(parent=loop_op.region, arg_types=[i64])
+
+            with InsertionPoint(loop_block):
+                idx0 = arith.index_cast(
+                    out=IndexType.get(), in_=loop_block.arguments[0]
+                )
+                val = memref.load(memref=f.arguments[1], indices=[idx0])
+                idx1 = arith.index_cast(
+                    out=IndexType.get(), in_=loop_block.arguments[0]
+                )
+                memref.store(value=val, memref=f.arguments[0], indices=[idx1])
+                openacc.YieldOp([])
+
+            openacc.YieldOp([])
+
+        func.ReturnOp([])
+
+    print(module)
+
+    # CHECK-LABEL:   func.func public @memcpy_idiom
----------------
ashermancinelli wrote:

Working on it, thanks for the suggestion!

https://github.com/llvm/llvm-project/pull/163620