[Mlir-commits] [mlir] [mlir][gpu]add AffineScope to gpu.func op. (PR #118010)

Thu Nov 28 06:29:05 PST 2024

https://github.com/linuxlonelyeagle created https://github.com/llvm/llvm-project/pull/118010

as title.
This PR in order to solve the following problem. https://github.com/llvm/llvm-project/pull/117721.
To efficiently implement the thread-to-data mapping relationship, I introduced AffineScope in gpu.func(Data or thread layout).
I think there's a lot to be gained from it.Feel free to discuss in the comments below.Thanks!

>From 7cf85bce9bbdcdde0fbe1e7bcb4b638e536de31e Mon Sep 17 00:00:00 2001
From: linuxlonelyeagle <2020382038 at qq.com>
Date: Thu, 28 Nov 2024 22:16:32 +0800
Subject: [PATCH] add AffineScope to gpu.func op.

---
 mlir/include/mlir/Dialect/GPU/IR/GPUOps.td |  2 +-
 mlir/test/Dialect/Affine/ops.mlir          | 26 ++++++++++++++++++++++
 2 files changed, 27 insertions(+), 1 deletion(-)

diff --git a/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td b/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
index 5b1d7bb87a219a..d08e7ceb9e6c69 100644
--- a/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
+++ b/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
@@ -353,7 +353,7 @@ def GPU_OptionalDimSizeHintAttr : ConfinedAttr<OptionalAttr<DenseI32ArrayAttr>,
 
 def GPU_GPUFuncOp : GPU_Op<"func", [
     HasParent<"GPUModuleOp">, AutomaticAllocationScope, FunctionOpInterface,
-    IsolatedFromAbove
+    IsolatedFromAbove, AffineScope
   ]> {
   let summary = "Function executable on a GPU";
 
diff --git a/mlir/test/Dialect/Affine/ops.mlir b/mlir/test/Dialect/Affine/ops.mlir
index 1d1db5f58f54cb..c2be45f398c45c 100644
--- a/mlir/test/Dialect/Affine/ops.mlir
+++ b/mlir/test/Dialect/Affine/ops.mlir
@@ -298,3 +298,29 @@ func.func @linearize_mixed(%index0: index, %index1: index, %index2: index, %basi
   %1 = affine.linearize_index disjoint [%index0, %index1, %index2] by (2, %basis1, 3) : index
   return %1 : index
 }
+
+// -----
+
+#map = affine_map<()[s0] -> (s0)>
+
+// CHECK-LABEL: @gpu_affine_for
+
+module attributes {gpu.container_module} {
+  gpu.module @gpu {
+    gpu.func @gpu_affine_for(%arg0: memref<?x?xf32>) kernel {
+      %c3 = arith.constant 1 : index
+      %dim = memref.dim %arg0, %c3 : memref<?x?xf32>
+      %c0 = arith.constant 0 : index
+      affine.for %arg3 = %c0 to #map()[%dim] step 32 {
+      }
+      gpu.return
+    }
+  }
+}
+// CHECK-SAME:        (%[[VAL_0:.*]]: memref<?x?xf32>) kernel {
+// CHECK:             %[[VAL_1:.*]] = arith.constant 1 : index
+// CHECK:             %[[VAL_2:.*]] = memref.dim %[[VAL_0]], %[[VAL_1]] : memref<?x?xf32>
+// CHECK:             %[[VAL_3:.*]] = arith.constant 0 : index
+// CHECK:             affine.for %[[VAL_4:.*]] = %[[VAL_3]] to %[[VAL_2]] step 32 {
+// CHECK:             }
+// CHECK:             gpu.return
\ No newline at end of file