[Mlir-commits] [mlir] 26ffca0 - [mlir][gpu]add AffineScope to gpu.func op. (#118010)

Fri Nov 29 02:10:03 PST 2024

Author: lonely eagle
Date: 2024-11-29T11:10:00+01:00
New Revision: 26ffca08439f23e8db43beeb021c4cae32716822

URL: https://github.com/llvm/llvm-project/commit/26ffca08439f23e8db43beeb021c4cae32716822
DIFF: https://github.com/llvm/llvm-project/commit/26ffca08439f23e8db43beeb021c4cae32716822.diff

LOG: [mlir][gpu]add AffineScope to gpu.func op. (#118010)

This PR in order to solve the following problem.
https://github.com/llvm/llvm-project/pull/117721.
To efficiently implement the thread-to-data mapping relationship, I
introduced AffineScope in gpu.func(Data or thread layout).

Added: 
    

Modified: 
    mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
    mlir/test/Dialect/Affine/ops.mlir

Removed: 
    


################################################################################
diff  --git a/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td b/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
index 5b1d7bb87a219a..d08e7ceb9e6c69 100644

--- a/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
+++ b/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
@@ -353,7 +353,7 @@ def GPU_OptionalDimSizeHintAttr : ConfinedAttr<OptionalAttr<DenseI32ArrayAttr>,
 
 def GPU_GPUFuncOp : GPU_Op<"func", [
     HasParent<"GPUModuleOp">, AutomaticAllocationScope, FunctionOpInterface,
-    IsolatedFromAbove
+    IsolatedFromAbove, AffineScope
   ]> {
   let summary = "Function executable on a GPU";
 

diff  --git a/mlir/test/Dialect/Affine/ops.mlir b/mlir/test/Dialect/Affine/ops.mlir
index 1d1db5f58f54cb..c6bfb688db1c1d 100644
--- a/mlir/test/Dialect/Affine/ops.mlir
+++ b/mlir/test/Dialect/Affine/ops.mlir
@@ -298,3 +298,29 @@ func.func @linearize_mixed(%index0: index, %index1: index, %index2: index, %basi
   %1 = affine.linearize_index disjoint [%index0, %index1, %index2] by (2, %basis1, 3) : index
   return %1 : index
 }
+
+// -----
+
+#map = affine_map<()[s0] -> (s0)>
+
+// CHECK-LABEL: @gpu_affine_for
+
+module attributes {gpu.container_module} {
+  gpu.module @gpu {
+    gpu.func @gpu_affine_for(%arg0: memref<?x?xf32>) kernel {
+      %c3 = arith.constant 1 : index
+      %dim = memref.dim %arg0, %c3 : memref<?x?xf32>
+      %c0 = arith.constant 0 : index
+      affine.for %arg3 = %c0 to #map()[%dim] step 32 {
+      }
+      gpu.return
+    }
+  }
+}
+// CHECK-SAME:        (%[[VAL_0:.*]]: memref<?x?xf32>) kernel {
+// CHECK:             %[[VAL_1:.*]] = arith.constant 1 : index
+// CHECK:             %[[VAL_2:.*]] = memref.dim %[[VAL_0]], %[[VAL_1]] : memref<?x?xf32>
+// CHECK:             %[[VAL_3:.*]] = arith.constant 0 : index
+// CHECK:             affine.for %[[VAL_4:.*]] = %[[VAL_3]] to %[[VAL_2]] step 32 {
+// CHECK:             }
+// CHECK:             gpu.return