[Mlir-commits] [mlir] [mlir][affine]introduce AffineSymbol trait and use it for using gpu.threadid op in the inner loops. (PR #118478)

Thu Jan 16 19:02:57 PST 2025

https://github.com/linuxlonelyeagle updated https://github.com/llvm/llvm-project/pull/118478

>From cb2e5a8ad1ed186f049d73d0ef63dded5481b9be Mon Sep 17 00:00:00 2001
From: linuxlonelyeagle <2020382038 at qq.com>
Date: Sat, 11 Jan 2025 16:51:01 +0800
Subject: [PATCH 1/4] Introduce AffineSymbol trait and use it for using
 gpu.threadid op in inner loops.

---
 mlir/docs/Dialects/Affine.md               |  5 +-
 mlir/include/mlir/Dialect/GPU/IR/GPUOps.td |  2 +-
 mlir/include/mlir/IR/OpBase.td             |  2 +
 mlir/include/mlir/IR/OpDefinition.h        | 11 +++++
 mlir/lib/Dialect/Affine/IR/AffineOps.cpp   |  4 ++
 mlir/lib/IR/Operation.cpp                  |  8 ++++
 mlir/test/Dialect/Affine/ops.mlir          | 36 ++++++++++++++
 mlir/test/Dialect/GPU/transform-gpu.mlir   | 56 +++++++++++-----------
 8 files changed, 93 insertions(+), 31 deletions(-)

diff --git a/mlir/docs/Dialects/Affine.md b/mlir/docs/Dialects/Affine.md
index bfcbbf5bb3b132..7990fc89e75bd0 100644
--- a/mlir/docs/Dialects/Affine.md
+++ b/mlir/docs/Dialects/Affine.md
@@ -69,10 +69,11 @@ immediately enclosed by the latter),
 3. a value that dominates the `AffineScope` op enclosing the value's
 use,
 4. the result of a constant operation,
-5. the result of an
+5. the result of an `AffineSymbol` op,
+6. the result of an
 [`affine.apply` operation](#affineapply-mliraffineapplyop) that recursively takes as
 arguments any valid symbolic identifiers, or
-6. the result of a
+7. the result of a
 [`dim` operation](MemRef.md/#memrefdim-mlirmemrefdimop) on either a memref that
 is an argument to a `AffineScope` op or a memref where the corresponding
 dimension is either static or a dynamic one in turn bound to a valid symbol.
diff --git a/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td b/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
index 3adfd5f4f2c436..0c22c9c09c106b 100644
--- a/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
+++ b/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
@@ -223,7 +223,7 @@ def GPU_GridDimOp : GPU_IndexOp<"grid_dim"> {
     There is an implicit upper bound of `kMaxDim` (currently uint32_t::max).
   }];
 }
-def GPU_ThreadIdOp : GPU_IndexOp<"thread_id"> {
+def GPU_ThreadIdOp : GPU_IndexOp<"thread_id", [AffineSymbol]> {
   let description = [{
     Returns the thread id, i.e. the index of the current thread within the block
     along the x, y, or z `dimension`.
diff --git a/mlir/include/mlir/IR/OpBase.td b/mlir/include/mlir/IR/OpBase.td
index 51b60972203e7f..8dd0773c0bd978 100644
--- a/mlir/include/mlir/IR/OpBase.td
+++ b/mlir/include/mlir/IR/OpBase.td
@@ -63,6 +63,8 @@ class PredOpTrait<string descr, Pred pred, list<Trait> traits = []>
 
 // Op defines an affine scope.
 def AffineScope : NativeOpTrait<"AffineScope">;
+// Op defines an affine symbol.
+def AffineSymbol : NativeOpTrait<"AffineSymbol">;
 // Op defines an automatic allocation scope.
 def AutomaticAllocationScope :
   NativeOpTrait<"AutomaticAllocationScope">;
diff --git a/mlir/include/mlir/IR/OpDefinition.h b/mlir/include/mlir/IR/OpDefinition.h
index 59f094d6690991..6f44294ae06bb8 100644
--- a/mlir/include/mlir/IR/OpDefinition.h
+++ b/mlir/include/mlir/IR/OpDefinition.h
@@ -365,6 +365,7 @@ LogicalResult verifyResultSizeAttr(Operation *op, StringRef sizeAttrName);
 LogicalResult verifyNoRegionArguments(Operation *op);
 LogicalResult verifyElementwise(Operation *op);
 LogicalResult verifyIsIsolatedFromAbove(Operation *op);
+LogicalResult verifyIndexResultType(Operation *op);
 } // namespace impl
 
 /// Helper class for implementing traits.  Clients are not expected to interact
@@ -1268,6 +1269,16 @@ class AffineScope : public TraitBase<ConcreteType, AffineScope> {
   }
 };
 
+/// A trait of operation. Any operation holds the AffineSymbol, and its result
+/// can be used as a symbol.
+template <typename ConcreteType>
+class AffineSymbol : public TraitBase<ConcreteType, AffineSymbol> {
+public:
+  static LogicalResult verifyTrait(Operation *op) {
+    return impl::verifyIndexResultType(op);
+  }
+};
+
 /// A trait of region holding operations that define a new scope for automatic
 /// allocations, i.e., allocations that are freed when control is transferred
 /// back from the operation's region. Any operations performing such allocations
diff --git a/mlir/lib/Dialect/Affine/IR/AffineOps.cpp b/mlir/lib/Dialect/Affine/IR/AffineOps.cpp
index b45829bcf6d2ca..c99bd28cf85ba6 100644
--- a/mlir/lib/Dialect/Affine/IR/AffineOps.cpp
+++ b/mlir/lib/Dialect/Affine/IR/AffineOps.cpp
@@ -443,6 +443,10 @@ bool mlir::affine::isValidSymbol(Value value, Region *region) {
   if (matchPattern(defOp, m_Constant(&operandCst)))
     return true;
 
+  // AffineScope Op.
+  if (defOp->hasTrait<OpTrait::AffineSymbol>())
+    return true;
+
   // Affine apply operation is ok if all of its operands are ok.
   if (auto applyOp = dyn_cast<AffineApplyOp>(defOp))
     return applyOp.isValidSymbol(region);
diff --git a/mlir/lib/IR/Operation.cpp b/mlir/lib/IR/Operation.cpp
index fe0fee0f8db2ce..5246eebf2e67bb 100644
--- a/mlir/lib/IR/Operation.cpp
+++ b/mlir/lib/IR/Operation.cpp
@@ -1390,6 +1390,14 @@ LogicalResult OpTrait::impl::verifyIsIsolatedFromAbove(Operation *isolatedOp) {
   return success();
 }
 
+LogicalResult OpTrait::impl::verifyIndexResultType(Operation *op) {
+  if (op->getNumResults() != 1)
+    op->emitError("operation's result number should be 1.");
+  if (!mlir::isa<IndexType>(op->getResult(0).getType()))
+    op->emitError("operation's result type should be index.");
+  return success();
+}
+
 bool OpTrait::hasElementwiseMappableTraits(Operation *op) {
   return op->hasTrait<Elementwise>() && op->hasTrait<Scalarizable>() &&
          op->hasTrait<Vectorizable>() && op->hasTrait<Tensorizable>();
diff --git a/mlir/test/Dialect/Affine/ops.mlir b/mlir/test/Dialect/Affine/ops.mlir
index c6bfb688db1c1d..5bd556619f3d5a 100644
--- a/mlir/test/Dialect/Affine/ops.mlir
+++ b/mlir/test/Dialect/Affine/ops.mlir
@@ -324,3 +324,39 @@ module attributes {gpu.container_module} {
 // CHECK:             affine.for %[[VAL_4:.*]] = %[[VAL_3]] to %[[VAL_2]] step 32 {
 // CHECK:             }
 // CHECK:             gpu.return
+
+// -----
+
+#map = affine_map<()[s0] -> (s0 mod 32)>
+
+// CHECK: #[[$ATTR_0:.+]] = affine_map<()[s0] -> (s0 mod 32)>
+
+module {
+  gpu.module @gpu {
+    gpu.func @affine_thread_id(%arg0: memref<?x?xf32>) kernel {
+      %c3 = arith.constant 3 : index
+      %dim = memref.dim %arg0, %c3 : memref<?x?xf32>
+      %c0 = arith.constant 0 : index
+      affine.for %arg3 = %c0 to %dim step 32 {
+        %thread_id_x = gpu.thread_id  x
+        %0 = affine.apply #map()[%thread_id_x]
+        %c128 = arith.constant 128 : index
+        affine.for %arg4 = %0 to %c128 step 8 {
+          %c32 = arith.constant 32 : index
+        }
+      }
+      gpu.return
+    }
+  }
+}
+
+// CHECK-LABEL:     @affine_thread_id
+// CHECK-SAME:        (%[[VAL_0:.*]]: memref<?x?xf32>) kernel {
+// CHECK:             %[[VAL_1:.*]] = arith.constant 3 : index
+// CHECK:             %[[VAL_2:.*]] = memref.dim %[[VAL_0]], %[[VAL_1]] : memref<?x?xf32>
+// CHECK:             %[[VAL_3:.*]] = arith.constant 0 : index
+// CHECK:             affine.for %[[VAL_4:.*]] = %[[VAL_3]] to %[[VAL_2]] step 32 {
+// CHECK:               %[[VAL_5:.*]] = gpu.thread_id  x
+// CHECK:               %[[VAL_6:.*]] = affine.apply #[[$ATTR_0]](){{\[}}%[[VAL_5]]]
+// CHECK:               %[[VAL_7:.*]] = arith.constant 128 : index
+// CHECK:               affine.for %[[VAL_8:.*]] = %[[VAL_6]] to %[[VAL_7]] step 8 {
diff --git a/mlir/test/Dialect/GPU/transform-gpu.mlir b/mlir/test/Dialect/GPU/transform-gpu.mlir
index 72572c6a38de12..6018eb40bac2a8 100644
--- a/mlir/test/Dialect/GPU/transform-gpu.mlir
+++ b/mlir/test/Dialect/GPU/transform-gpu.mlir
@@ -43,7 +43,7 @@ module attributes {transform.with_named_sequence} {
 !type = memref<2 x 32 x f32>
 !type1d = memref<32 x f32>
 
-// CHECK-DAG: #[[$MAP:.*]] = affine_map<(d0) -> (d0 floordiv 128)>
+// CHECK-DAG: #[[$MAP:.*]] = affine_map<()[s0] -> (s0 floordiv 128)> 
 
 // CHECK-LABEL: func.func @warpgroup_3d(
 // CHECK-SAME:    %[[ARGX:[0-9a-z]+]]: memref<2x32xf32>
@@ -61,7 +61,7 @@ func.func @warpgroup_3d(%x: !type, %y: !type, %t: !type1d, %alpha : f32, %stream
 //      CHECK:   gpu.launch
 //      CHECK:   %[[TIDX:.*]] = gpu.thread_id  x
 //      CHECK:   %[[TIDY:.*]] = gpu.thread_id  y
-//  CHECK-DAG:   %[[WG:.*]] = affine.apply #[[$MAP]](%[[TIDX]])
+//  CHECK-DAG:   %[[WG:.*]] = affine.apply #[[$MAP]]()[%[[TIDX]]]
 //  CHECK-DAG:   %[[CMPX:.*]] = arith.cmpi ult, %[[TIDX]], %[[C384]] : index
 //  CHECK-DAG:   %[[CMPY:.*]] = arith.cmpi ult, %[[TIDY]], %[[C1]] : index
 //      CHECK:   %[[COND:.*]] = arith.andi %[[CMPX]], %[[CMPY]] : i1
@@ -95,7 +95,7 @@ module attributes {transform.with_named_sequence} {
 !type = memref<2 x 32 x f32>
 !type1d = memref<32 x f32>
 
-// CHECK-DAG: #[[$MAP:.*]] = affine_map<(d0) -> (d0 floordiv 16)>
+// CHECK-DAG: #map = affine_map<()[s0] -> (s0 floordiv 16)>
 
 // CHECK-LABEL: func.func @warp_3d(
 // CHECK-SAME:    %[[ARGX:[0-9a-z]+]]: memref<2x32xf32>
@@ -114,7 +114,7 @@ func.func @warp_3d(%x: !type, %y: !type, %t: !type1d, %alpha : f32, %stream : !g
 //      CHECK:   gpu.launch
 //      CHECK:   %[[TIDX:.*]] = gpu.thread_id  x
 //      CHECK:   %[[TIDY:.*]] = gpu.thread_id  y
-//  CHECK-DAG:   %[[W:.*]] = affine.apply #[[$MAP]](%[[TIDX]])
+//  CHECK-DAG:   %[[W:.*]] = affine.apply #[[$MAP]]()[%[[TIDX]]]
 //  CHECK-DAG:   %[[CMPX:.*]] = arith.cmpi ult, %[[TIDX]], %[[C32]] : index
 //  CHECK-DAG:   %[[CMPY:.*]] = arith.cmpi ult, %[[TIDY]], %[[C3]] : index
 //      CHECK:   %[[COND:.*]] = arith.andi %[[CMPX]], %[[CMPY]] : i1
@@ -354,9 +354,9 @@ module attributes {transform.with_named_sequence} {
 !type = memref<2 x 32 x f32>
 !type1d = memref<32 x f32>
 
-// CHECK-DAG: #[[$MAPWGLIN:.*]] = affine_map<(d0, d1, d2) -> (d0 + d1 * 32 + d2 * 256)>
-// CHECK-DAG: #[[$MAPWGX:.*]] = affine_map<(d0, d1) -> (((d0 + d1 * 32) floordiv 128) mod 2)>
-// CHECK-DAG: #[[$MAPWGY:.*]] = affine_map<(d0, d1, d2) -> (d2 + ((d0 + d1 * 32) floordiv 128) floordiv 2)>
+// CHECK-DAG: #[[$MAPWGLIN:.*]] = affine_map<()[s0, s1, s2] -> (s0 + s1 * 32 + s2 * 256)>
+// CHECK-DAG: #[[$MAPWGX:.*]] = affine_map<()[s0, s1] -> (((s0 + s1 * 32) floordiv 128) mod 2)>
+// CHECK-DAG: #[[$MAPWGY:.*]] = affine_map<()[s0, s1, s2] -> (s2 + ((s0 + s1 * 32) floordiv 128) floordiv 2)>
 
 // CHECK-LABEL: func.func @warpgroup_linear(
 // CHECK-SAME:    %[[ARGX:[0-9a-z]+]]: memref<2x32xf32>
@@ -376,9 +376,9 @@ func.func @warpgroup_linear(%x: !type, %y: !type, %t: !type1d, %alpha : f32, %st
 // CHECK-DAG: %[[TIDX:.*]] = gpu.thread_id  x
 // CHECK-DAG: %[[TIDY:.*]] = gpu.thread_id  y
 // CHECK-DAG: %[[TIDZ:.*]] = gpu.thread_id  z
-// CHECK-DAG: %[[WIDLIN:.*]] = affine.apply #[[$MAPWGLIN]](%[[TIDX]], %[[TIDY]], %[[TIDZ]])
-// CHECK-DAG: %[[WIDX:.*]] = affine.apply #[[$MAPWGX]](%[[TIDX]], %[[TIDY]])
-// CHECK-DAG: %[[WIDY:.*]] = affine.apply #[[$MAPWGY]](%[[TIDX]], %[[TIDY]], %[[TIDZ]])
+// CHECK-DAG: %[[WIDLIN:.*]] = affine.apply #[[$MAPWGLIN]]()[%[[TIDX]], %[[TIDY]], %[[TIDZ]]]
+// CHECK-DAG: %[[WIDX:.*]] = affine.apply #[[$MAPWGX]]()[%[[TIDX]], %[[TIDY]]]
+// CHECK-DAG: %[[WIDY:.*]] = affine.apply #[[$MAPWGY]]()[%[[TIDX]], %[[TIDY]], %[[TIDZ]]]
 // CHECK-DAG: %[[CMPLIN:.*]] = arith.cmpi ult, %[[WIDLIN]], %[[C768]] : index
 //     CHECK: scf.if %[[CMPLIN]]
 //      CHECK:   memref.load %[[ARGX]][%[[WIDX]], %[[WIDY]]]
@@ -410,9 +410,9 @@ module attributes {transform.with_named_sequence} {
 !type = memref<2 x 32 x f32>
 !type1d = memref<32 x f32>
 
-// CHECK-DAG: #[[$MAPWLIN:.*]] = affine_map<(d0, d1, d2) -> (d0 + d1 * 32 + d2 * 256)>
-// CHECK-DAG: #[[$MAPWX:.*]] = affine_map<(d0, d1, d2) -> ((d1 + d2 * 8 + d0 floordiv 32) mod 2)>
-// CHECK-DAG: #[[$MAPWY:.*]] = affine_map<(d0, d1, d2) -> ((d1 + d2 * 8 + d0 floordiv 32) floordiv 2)>
+// CHECK-DAG: #[[$MAPWLIN:.*]] = affine_map<()[s0, s1, s2] -> (s0 + s1 * 32 + s2 * 256)>
+// CHECK-DAG: #[[$MAPWX:.*]] = affine_map<()[s0, s1, s2] -> ((s1 + s2 * 8 + s0 floordiv 32) mod 2)>
+// CHECK-DAG: #[[$MAPWY:.*]] = affine_map<()[s0, s1, s2] -> ((s1 + s2 * 8 + s0 floordiv 32) floordiv 2)>
 
 // CHECK-LABEL: func.func @warp_linear(
 // CHECK-SAME:    %[[ARGX:[0-9a-z]+]]: memref<2x32xf32>
@@ -432,9 +432,9 @@ func.func @warp_linear(%x: !type, %y: !type, %t: !type1d, %alpha : f32, %stream
 // CHECK-DAG: %[[TIDX:.*]] = gpu.thread_id  x
 // CHECK-DAG: %[[TIDY:.*]] = gpu.thread_id  y
 // CHECK-DAG: %[[TIDZ:.*]] = gpu.thread_id  z
-// CHECK-DAG: %[[WIDLIN:.*]] = affine.apply #[[$MAPWLIN]](%[[TIDX]], %[[TIDY]], %[[TIDZ]])
-// CHECK-DAG: %[[WIDX:.*]] = affine.apply #[[$MAPWX]](%[[TIDX]], %[[TIDY]], %[[TIDZ]])
-// CHECK-DAG: %[[WIDY:.*]] = affine.apply #[[$MAPWY]](%[[TIDX]], %[[TIDY]], %[[TIDZ]])
+// CHECK-DAG: %[[WIDLIN:.*]] = affine.apply #[[$MAPWLIN]]()[%[[TIDX]], %[[TIDY]], %[[TIDZ]]]
+// CHECK-DAG: %[[WIDX:.*]] = affine.apply #[[$MAPWX]]()[%[[TIDX]], %[[TIDY]], %[[TIDZ]]]
+// CHECK-DAG: %[[WIDY:.*]] = affine.apply #[[$MAPWY]]()[%[[TIDX]], %[[TIDY]], %[[TIDZ]]]
 // CHECK-DAG: %[[CMPLIN:.*]] = arith.cmpi ult, %[[WIDLIN]], %[[C192]] : index
 //     CHECK: scf.if %[[CMPLIN]]
 //      CHECK:   memref.load %[[ARGX]][%[[WIDX]], %[[WIDY]]]
@@ -466,12 +466,12 @@ module attributes {transform.with_named_sequence} {
 !type = memref<2 x 32 x f32>
 !type1d = memref<32 x f32>
 
-// CHECK-DAG: #[[$MAPWX:.*]] = affine_map<(d0, d1) -> (((d0 + d1 * 18) floordiv 32) mod 3)>
-// CHECK-DAG: #[[$MAPWY:.*]] = affine_map<(d0, d1) -> ((((d0 + d1 * 18) floordiv 32) mod 6) floordiv 3)>
+// CHECK-DAG: #[[$MAPWX:.*]] = affine_map<()[s0, s1] -> (((s0 + s1 * 18) floordiv 32) mod 3)>
+// CHECK-DAG: #[[$MAPWY:.*]] = affine_map<()[s0, s1] -> ((((s0 + s1 * 18) floordiv 32) mod 6) floordiv 3)>
 
-// CHECK-DAG: #[[$MAPLIN:.*]] = affine_map<(d0, d1) -> (d0 + d1 * 18)>
-// CHECK-DAG: #[[$MAPLX:.*]] = affine_map<(d0, d1) -> ((d0 + d1 * 18) mod 10)>
-// CHECK-DAG: #[[$MAPLY:.*]] = affine_map<(d0, d1) -> ((d0 + d1 * 18) floordiv 10)>
+// CHECK-DAG: #[[$MAPLIN:.*]] = affine_map<()[s0, s1] -> (s0 + s1 * 18)>
+// CHECK-DAG: #[[$MAPLX:.*]] = affine_map<()[s0, s1] -> ((s0 + s1 * 18) mod 10)>
+// CHECK-DAG: #[[$MAPLY:.*]] = affine_map<()[s0, s1] -> ((s0 + s1 * 18) floordiv 10)>
 
 // CHECK-LABEL: func.func @map_multi_level_linear(
 func.func @map_multi_level_linear(%x: !type, %y: !type, %t: !type1d, %alpha : f32, %stream : !gpu.async.token) -> !type {
@@ -504,9 +504,9 @@ func.func @map_multi_level_linear(%x: !type, %y: !type, %t: !type1d, %alpha : f3
       memref.store %6, %y[%i, %j] : !type
     }  { mapping = [#gpu.thread<y>, #gpu.thread<x>]}
 
-    // CHECK-DAG: %[[LIN:.*]] = affine.apply #[[$MAPLIN]](%[[TIDX]], %[[TIDY]])
-    // CHECK-DAG: %[[WIDX:.*]] = affine.apply #[[$MAPWX]](%[[TIDX]], %[[TIDY]])
-    // CHECK-DAG: %[[WIDY:.*]] = affine.apply #[[$MAPWY]](%[[TIDX]], %[[TIDY]])
+    // CHECK-DAG: %[[LIN:.*]] = affine.apply #[[$MAPLIN]]()[%[[TIDX]], %[[TIDY]]]
+    // CHECK-DAG: %[[WIDX:.*]] = affine.apply #[[$MAPWX]]()[%[[TIDX]], %[[TIDY]]]
+    // CHECK-DAG: %[[WIDY:.*]] = affine.apply #[[$MAPWY]]()[%[[TIDX]], %[[TIDY]]]
     // CHECK-DAG: %[[CMPLIN:.*]] = arith.cmpi ult, %[[LIN]], %[[C192]] : index
     //     CHECK: scf.if %[[CMPLIN]]
     scf.forall (%i, %j, %k) in (%c3, %c2, %c1) {
@@ -515,8 +515,8 @@ func.func @map_multi_level_linear(%x: !type, %y: !type, %t: !type1d, %alpha : f3
         memref.store %8, %y[%i, %j] : !type
      }  {mapping = [#gpu.warp<linear_dim_0>, #gpu.warp<linear_dim_1>, #gpu.warp<linear_dim_2>] }
 
-    // CHECK-DAG: %[[LIDX:.*]] = affine.apply #[[$MAPLX]](%[[TIDX]], %[[TIDY]])
-    // CHECK-DAG: %[[LIDY:.*]] = affine.apply #[[$MAPLY]](%[[TIDX]], %[[TIDY]])
+    // CHECK-DAG: %[[LIDX:.*]] = affine.apply #[[$MAPLX]]()[%[[TIDX]], %[[TIDY]]]
+    // CHECK-DAG: %[[LIDY:.*]] = affine.apply #[[$MAPLY]]()[%[[TIDX]], %[[TIDY]]]
     // CHECK-DAG: %[[COND:.*]] = arith.cmpi ult, %[[LIN]], %[[C20]] : index
     //     CHECK: scf.if %[[COND]]
     //     CHECK:   memref.load %{{.*}}[%[[LIDX]]] : memref<32xf32>
@@ -648,7 +648,7 @@ module attributes {transform.with_named_sequence} {
 #map1 = affine_map<(d0) -> (d0 * 32)>
 
 // CHECK-DAG: #[[$MAPB:.*]] = affine_map<(d0) -> (d0 * 128)>
-// CHECK-DAG: #[[$MAPW:.*]] = affine_map<(d0, d1, d2) -> (d2 * 32 + ((d0 + d1 * 4) floordiv 32) * 32)>
+// CHECK-DAG: #[[$MAPW:.*]] = affine_map<()[s0, s1, s2] -> (s2 * 32 + ((s0 + s1 * 4) floordiv 32) * 32)>
 
 // CHECK-LABEL: func.func @simple_fill(
 func.func @simple_fill(%arg0: memref<128xf32>) -> memref<128xf32> {
@@ -667,7 +667,7 @@ func.func @simple_fill(%arg0: memref<128xf32>) -> memref<128xf32> {
 //       CHECK:     %[[TIDX:.*]] = gpu.thread_id  x
 //       CHECK:     %[[TIDY:.*]] = gpu.thread_id  y
 //       CHECK:     %[[TIDZ:.*]] = gpu.thread_id  z
-//       CHECK:     %[[THX:.*]] = affine.apply #[[$MAPW]](%[[TIDX]], %[[TIDY]], %[[TIDZ]])
+//       CHECK:     %[[THX:.*]] = affine.apply #[[$MAPW]]()[%[[TIDX]], %[[TIDY]], %[[TIDZ]]]
 //   CHECK-NOT:     scf.if
 //       CHECK:       memref.subview %{{.*}}[%[[THX]]]
       %1 = affine.apply #map1(%arg2)

>From 7e5a95b4497a172815df0576260fe00660d2eab9 Mon Sep 17 00:00:00 2001
From: linuxlonelyeagle <2020382038 at qq.com>
Date: Sun, 12 Jan 2025 17:06:51 +0800
Subject: [PATCH 2/4] imporve doc, test, change trait check.

---
 mlir/docs/Dialects/Affine.md             |  2 +-
 mlir/docs/Traits/_index.md               | 10 ++++++++++
 mlir/include/mlir/IR/OpDefinition.h      |  6 ++++--
 mlir/lib/Dialect/Affine/IR/AffineOps.cpp |  3 ++-
 mlir/lib/IR/Operation.cpp                | 15 +++++++++++----
 mlir/test/Dialect/Affine/ops.mlir        | 21 +++++++++++----------
 6 files changed, 39 insertions(+), 18 deletions(-)

diff --git a/mlir/docs/Dialects/Affine.md b/mlir/docs/Dialects/Affine.md
index 7990fc89e75bd0..b3e3d5a15b93ad 100644
--- a/mlir/docs/Dialects/Affine.md
+++ b/mlir/docs/Dialects/Affine.md
@@ -69,7 +69,7 @@ immediately enclosed by the latter),
 3. a value that dominates the `AffineScope` op enclosing the value's
 use,
 4. the result of a constant operation,
-5. the result of an `AffineSymbol` op,
+5. the result of an op with [`AffineSymbol`](Traits.md#AffineSymbol) trait,
 6. the result of an
 [`affine.apply` operation](#affineapply-mliraffineapplyop) that recursively takes as
 arguments any valid symbolic identifiers, or
diff --git a/mlir/docs/Traits/_index.md b/mlir/docs/Traits/_index.md
index 3fa24ec77107f0..4aaf50039b0682 100644
--- a/mlir/docs/Traits/_index.md
+++ b/mlir/docs/Traits/_index.md
@@ -223,6 +223,16 @@ affine.load, and affine.store. The polyhedral scope defined by an operation with
 this trait includes all operations in its region excluding operations that are
 nested inside of other operations that themselves have this trait.
 
+*   `OpTrait::AffineSymbol` -- `AffineSymbol`
+
+This trait will make the results of the operation that holds it become valid symbols.
+It complements the AffineScope trait, allowing SSA values in the region of operation
+to be used as operands for various Affine dialect operations depending on the AffineScope
+trait, without altering the properties of the operation. In contrast, the AffineSymbol
+trait directly changes the properties of the operations. Additionally, the AffineScope
+trait cannot make SSA values in the nested region of operations valid symbols, operations
+can hold the AffineSymbol trait, then their results whill be vaild symbols.
+
 ### AutomaticAllocationScope
 
 *   `OpTrait::AutomaticAllocationScope` -- `AutomaticAllocationScope`
diff --git a/mlir/include/mlir/IR/OpDefinition.h b/mlir/include/mlir/IR/OpDefinition.h
index 6f44294ae06bb8..0d57d4806fcde5 100644
--- a/mlir/include/mlir/IR/OpDefinition.h
+++ b/mlir/include/mlir/IR/OpDefinition.h
@@ -1269,8 +1269,10 @@ class AffineScope : public TraitBase<ConcreteType, AffineScope> {
   }
 };
 
-/// A trait of operation. Any operation holds the AffineSymbol, and its result
-/// can be used as a symbol.
+/// A trait of operation. The results of an operation with the AffineSymbol
+/// trait can be used as symbols for the purposes for affine dialect purposes.
+/// This trait will make the results of the operation that holds it become valid
+/// symbols. For more details, see `Traits.md#AffineSymbol`.
 template <typename ConcreteType>
 class AffineSymbol : public TraitBase<ConcreteType, AffineSymbol> {
 public:
diff --git a/mlir/lib/Dialect/Affine/IR/AffineOps.cpp b/mlir/lib/Dialect/Affine/IR/AffineOps.cpp
index c99bd28cf85ba6..0cc6244c46dd9d 100644
--- a/mlir/lib/Dialect/Affine/IR/AffineOps.cpp
+++ b/mlir/lib/Dialect/Affine/IR/AffineOps.cpp
@@ -410,6 +410,7 @@ bool mlir::affine::isValidSymbol(Value value) {
 /// A value can be used as a symbol for `region` iff it meets one of the
 /// following conditions:
 /// *) It is a constant.
+/// *) It is a result, its defineOp is holded by `AffineSymbol` Trait.
 /// *) It is the result of an affine apply operation with symbol arguments.
 /// *) It is a result of the dim op on a memref whose corresponding size is
 ///    a valid symbol.
@@ -443,7 +444,7 @@ bool mlir::affine::isValidSymbol(Value value, Region *region) {
   if (matchPattern(defOp, m_Constant(&operandCst)))
     return true;
 
-  // AffineScope Op.
+  // AffineSymbol Op.
   if (defOp->hasTrait<OpTrait::AffineSymbol>())
     return true;
 
diff --git a/mlir/lib/IR/Operation.cpp b/mlir/lib/IR/Operation.cpp
index 5246eebf2e67bb..78b902035bfb42 100644
--- a/mlir/lib/IR/Operation.cpp
+++ b/mlir/lib/IR/Operation.cpp
@@ -1391,10 +1391,17 @@ LogicalResult OpTrait::impl::verifyIsIsolatedFromAbove(Operation *isolatedOp) {
 }
 
 LogicalResult OpTrait::impl::verifyIndexResultType(Operation *op) {
-  if (op->getNumResults() != 1)
-    op->emitError("operation's result number should be 1.");
-  if (!mlir::isa<IndexType>(op->getResult(0).getType()))
-    op->emitError("operation's result type should be index.");
+  ResultRange results = op->getResults();
+  if (results.empty()) {
+    op->emitError("operation must have at least one result");
+    return failure();
+  }
+  for (OpResult result : results) {
+    if (!mlir::isa<IndexType>(result.getType())) {
+      op->emitError("operation's result type should be index");
+      return failure();
+    }
+  }
   return success();
 }
 
diff --git a/mlir/test/Dialect/Affine/ops.mlir b/mlir/test/Dialect/Affine/ops.mlir
index 5bd556619f3d5a..95d81f8519d480 100644
--- a/mlir/test/Dialect/Affine/ops.mlir
+++ b/mlir/test/Dialect/Affine/ops.mlir
@@ -331,6 +331,8 @@ module attributes {gpu.container_module} {
 
 // CHECK: #[[$ATTR_0:.+]] = affine_map<()[s0] -> (s0 mod 32)>
 
+// CHECK-LABEL: @affine_thread_id
+
 module {
   gpu.module @gpu {
     gpu.func @affine_thread_id(%arg0: memref<?x?xf32>) kernel {
@@ -350,13 +352,12 @@ module {
   }
 }
 
-// CHECK-LABEL:     @affine_thread_id
-// CHECK-SAME:        (%[[VAL_0:.*]]: memref<?x?xf32>) kernel {
-// CHECK:             %[[VAL_1:.*]] = arith.constant 3 : index
-// CHECK:             %[[VAL_2:.*]] = memref.dim %[[VAL_0]], %[[VAL_1]] : memref<?x?xf32>
-// CHECK:             %[[VAL_3:.*]] = arith.constant 0 : index
-// CHECK:             affine.for %[[VAL_4:.*]] = %[[VAL_3]] to %[[VAL_2]] step 32 {
-// CHECK:               %[[VAL_5:.*]] = gpu.thread_id  x
-// CHECK:               %[[VAL_6:.*]] = affine.apply #[[$ATTR_0]](){{\[}}%[[VAL_5]]]
-// CHECK:               %[[VAL_7:.*]] = arith.constant 128 : index
-// CHECK:               affine.for %[[VAL_8:.*]] = %[[VAL_6]] to %[[VAL_7]] step 8 {
+// CHECK-SAME: (%[[VAL_0:.*]]: memref<?x?xf32>) kernel {
+// CHECK: %[[VAL_1:.*]] = arith.constant 3 : index
+// CHECK: %[[VAL_2:.*]] = memref.dim %[[VAL_0]], %[[VAL_1]] : memref<?x?xf32>
+// CHECK: %[[VAL_3:.*]] = arith.constant 0 : index
+// CHECK: affine.for %[[VAL_4:.*]] = %[[VAL_3]] to %[[VAL_2]] step 32 {
+// CHECK: %[[VAL_5:.*]] = gpu.thread_id  x
+// CHECK: %[[VAL_6:.*]] = affine.apply #[[$ATTR_0]](){{\[}}%[[VAL_5]]]
+// CHECK: %[[VAL_7:.*]] = arith.constant 128 : index
+// CHECK: affine.for %[[VAL_8:.*]] = %[[VAL_6]] to %[[VAL_7]] step 8 {

>From c8551ce986c45d7d038d3f5871d7263e9e84d63c Mon Sep 17 00:00:00 2001
From: linuxlonelyeagle <2020382038 at qq.com>
Date: Thu, 16 Jan 2025 14:39:19 +0800
Subject: [PATCH 3/4] the result of a Pure operation that its operands are the
 valid symbolic identifiers.

---
 mlir/docs/Dialects/Affine.md                  |  7 +--
 mlir/docs/Traits/_index.md                    | 10 -----
 mlir/include/mlir/Dialect/GPU/IR/GPUOps.td    |  2 +-
 mlir/include/mlir/IR/OpBase.td                |  2 -
 mlir/include/mlir/IR/OpDefinition.h           | 13 ------
 mlir/lib/Dialect/Affine/IR/AffineOps.cpp      | 20 +++++----
 mlir/lib/IR/Operation.cpp                     | 15 -------
 .../SuperVectorize/vectorize_reduction.mlir   |  6 +--
 mlir/test/Dialect/Affine/invalid.mlir         | 44 -------------------
 mlir/test/Dialect/GPU/transform-gpu.mlir      | 24 +++++-----
 .../Linalg/convert-conv2d-to-img2col.mlir     | 24 +++++-----
 mlir/test/Dialect/Linalg/tile-indexed.mlir    | 10 ++---
 .../Dialect/Linalg/transform-op-split.mlir    |  4 +-
 .../TilingInterface/tile-using-interface.mlir |  6 +--
 .../TilingInterface/tile-using-scfforall.mlir |  6 +--
 15 files changed, 54 insertions(+), 139 deletions(-)

diff --git a/mlir/docs/Dialects/Affine.md b/mlir/docs/Dialects/Affine.md
index b3e3d5a15b93ad..4ff80a90d72656 100644
--- a/mlir/docs/Dialects/Affine.md
+++ b/mlir/docs/Dialects/Affine.md
@@ -69,11 +69,8 @@ immediately enclosed by the latter),
 3. a value that dominates the `AffineScope` op enclosing the value's
 use,
 4. the result of a constant operation,
-5. the result of an op with [`AffineSymbol`](Traits.md#AffineSymbol) trait,
-6. the result of an
-[`affine.apply` operation](#affineapply-mliraffineapplyop) that recursively takes as
-arguments any valid symbolic identifiers, or
-7. the result of a
+5. the result of a `Pure` operation that its operands are the valid symbolic identifiers.
+6. the result of a
 [`dim` operation](MemRef.md/#memrefdim-mlirmemrefdimop) on either a memref that
 is an argument to a `AffineScope` op or a memref where the corresponding
 dimension is either static or a dynamic one in turn bound to a valid symbol.
diff --git a/mlir/docs/Traits/_index.md b/mlir/docs/Traits/_index.md
index 4aaf50039b0682..3fa24ec77107f0 100644
--- a/mlir/docs/Traits/_index.md
+++ b/mlir/docs/Traits/_index.md
@@ -223,16 +223,6 @@ affine.load, and affine.store. The polyhedral scope defined by an operation with
 this trait includes all operations in its region excluding operations that are
 nested inside of other operations that themselves have this trait.
 
-*   `OpTrait::AffineSymbol` -- `AffineSymbol`
-
-This trait will make the results of the operation that holds it become valid symbols.
-It complements the AffineScope trait, allowing SSA values in the region of operation
-to be used as operands for various Affine dialect operations depending on the AffineScope
-trait, without altering the properties of the operation. In contrast, the AffineSymbol
-trait directly changes the properties of the operations. Additionally, the AffineScope
-trait cannot make SSA values in the nested region of operations valid symbols, operations
-can hold the AffineSymbol trait, then their results whill be vaild symbols.
-
 ### AutomaticAllocationScope
 
 *   `OpTrait::AutomaticAllocationScope` -- `AutomaticAllocationScope`
diff --git a/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td b/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
index 0c22c9c09c106b..3adfd5f4f2c436 100644
--- a/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
+++ b/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
@@ -223,7 +223,7 @@ def GPU_GridDimOp : GPU_IndexOp<"grid_dim"> {
     There is an implicit upper bound of `kMaxDim` (currently uint32_t::max).
   }];
 }
-def GPU_ThreadIdOp : GPU_IndexOp<"thread_id", [AffineSymbol]> {
+def GPU_ThreadIdOp : GPU_IndexOp<"thread_id"> {
   let description = [{
     Returns the thread id, i.e. the index of the current thread within the block
     along the x, y, or z `dimension`.
diff --git a/mlir/include/mlir/IR/OpBase.td b/mlir/include/mlir/IR/OpBase.td
index 8dd0773c0bd978..51b60972203e7f 100644
--- a/mlir/include/mlir/IR/OpBase.td
+++ b/mlir/include/mlir/IR/OpBase.td
@@ -63,8 +63,6 @@ class PredOpTrait<string descr, Pred pred, list<Trait> traits = []>
 
 // Op defines an affine scope.
 def AffineScope : NativeOpTrait<"AffineScope">;
-// Op defines an affine symbol.
-def AffineSymbol : NativeOpTrait<"AffineSymbol">;
 // Op defines an automatic allocation scope.
 def AutomaticAllocationScope :
   NativeOpTrait<"AutomaticAllocationScope">;
diff --git a/mlir/include/mlir/IR/OpDefinition.h b/mlir/include/mlir/IR/OpDefinition.h
index 0d57d4806fcde5..59f094d6690991 100644
--- a/mlir/include/mlir/IR/OpDefinition.h
+++ b/mlir/include/mlir/IR/OpDefinition.h
@@ -365,7 +365,6 @@ LogicalResult verifyResultSizeAttr(Operation *op, StringRef sizeAttrName);
 LogicalResult verifyNoRegionArguments(Operation *op);
 LogicalResult verifyElementwise(Operation *op);
 LogicalResult verifyIsIsolatedFromAbove(Operation *op);
-LogicalResult verifyIndexResultType(Operation *op);
 } // namespace impl
 
 /// Helper class for implementing traits.  Clients are not expected to interact
@@ -1269,18 +1268,6 @@ class AffineScope : public TraitBase<ConcreteType, AffineScope> {
   }
 };
 
-/// A trait of operation. The results of an operation with the AffineSymbol
-/// trait can be used as symbols for the purposes for affine dialect purposes.
-/// This trait will make the results of the operation that holds it become valid
-/// symbols. For more details, see `Traits.md#AffineSymbol`.
-template <typename ConcreteType>
-class AffineSymbol : public TraitBase<ConcreteType, AffineSymbol> {
-public:
-  static LogicalResult verifyTrait(Operation *op) {
-    return impl::verifyIndexResultType(op);
-  }
-};
-
 /// A trait of region holding operations that define a new scope for automatic
 /// allocations, i.e., allocations that are freed when control is transferred
 /// back from the operation's region. Any operations performing such allocations
diff --git a/mlir/lib/Dialect/Affine/IR/AffineOps.cpp b/mlir/lib/Dialect/Affine/IR/AffineOps.cpp
index 0cc6244c46dd9d..6784a27fc6c87e 100644
--- a/mlir/lib/Dialect/Affine/IR/AffineOps.cpp
+++ b/mlir/lib/Dialect/Affine/IR/AffineOps.cpp
@@ -410,8 +410,8 @@ bool mlir::affine::isValidSymbol(Value value) {
 /// A value can be used as a symbol for `region` iff it meets one of the
 /// following conditions:
 /// *) It is a constant.
-/// *) It is a result, its defineOp is holded by `AffineSymbol` Trait.
-/// *) It is the result of an affine apply operation with symbol arguments.
+/// *) It is a result of a `Pure` operation that its operands are the valid
+/// *) symbolic identifiers.
 /// *) It is a result of the dim op on a memref whose corresponding size is
 ///    a valid symbol.
 /// *) It is defined at the top level of 'region' or is its argument.
@@ -444,13 +444,15 @@ bool mlir::affine::isValidSymbol(Value value, Region *region) {
   if (matchPattern(defOp, m_Constant(&operandCst)))
     return true;
 
-  // AffineSymbol Op.
-  if (defOp->hasTrait<OpTrait::AffineSymbol>())
-    return true;
-
-  // Affine apply operation is ok if all of its operands are ok.
-  if (auto applyOp = dyn_cast<AffineApplyOp>(defOp))
-    return applyOp.isValidSymbol(region);
+  // `Pure` operation that its operands are the valid symbolic identifiers.
+  if (isPure(defOp)) {
+    bool operandVaildSymbol =
+        llvm::all_of(defOp->getOperands(), [&](Value operand) {
+          return affine::isValidSymbol(operand, region);
+        });
+    if (operandVaildSymbol)
+      return true;
+  }
 
   // Dim op results could be valid symbols at any level.
   if (auto dimOp = dyn_cast<ShapedDimOpInterface>(defOp))
diff --git a/mlir/lib/IR/Operation.cpp b/mlir/lib/IR/Operation.cpp
index 78b902035bfb42..fe0fee0f8db2ce 100644
--- a/mlir/lib/IR/Operation.cpp
+++ b/mlir/lib/IR/Operation.cpp
@@ -1390,21 +1390,6 @@ LogicalResult OpTrait::impl::verifyIsIsolatedFromAbove(Operation *isolatedOp) {
   return success();
 }
 
-LogicalResult OpTrait::impl::verifyIndexResultType(Operation *op) {
-  ResultRange results = op->getResults();
-  if (results.empty()) {
-    op->emitError("operation must have at least one result");
-    return failure();
-  }
-  for (OpResult result : results) {
-    if (!mlir::isa<IndexType>(result.getType())) {
-      op->emitError("operation's result type should be index");
-      return failure();
-    }
-  }
-  return success();
-}
-
 bool OpTrait::hasElementwiseMappableTraits(Operation *op) {
   return op->hasTrait<Elementwise>() && op->hasTrait<Scalarizable>() &&
          op->hasTrait<Vectorizable>() && op->hasTrait<Tensorizable>();
diff --git a/mlir/test/Dialect/Affine/SuperVectorize/vectorize_reduction.mlir b/mlir/test/Dialect/Affine/SuperVectorize/vectorize_reduction.mlir
index 29c42fcd50bd74..b616632a6fe240 100644
--- a/mlir/test/Dialect/Affine/SuperVectorize/vectorize_reduction.mlir
+++ b/mlir/test/Dialect/Affine/SuperVectorize/vectorize_reduction.mlir
@@ -638,13 +638,13 @@ func.func @vecdim_reduction_complex_ub(%in: memref<256x512xf32>, %out: memref<25
  return
 }
 
-// CHECK:       #[[$map3:.*]] = affine_map<([[d0:.*]], [[d1:.*]]) -> ([[d0]], [[d1]] * 2)>
-// CHECK:       #[[$map3_sub:.*]] = affine_map<([[d0:.*]], [[d1:.*]]) -> ([[d0]] - [[d1]])>
+// CHECK:       #[[$map3:.*]] = affine_map<(d0, d1) -> (d0, d1 * 2)>
+// CHECK:       #[[$map3_sub:.*]] = affine_map<(d0)[s0] -> (-d0 + s0)>
 // CHECK-LABEL: @vecdim_reduction_complex_ub
 // CHECK:         %[[vzero:.*]] = arith.constant dense<0.000000e+00> : vector<128xf32>
 // CHECK:         %{{.*}} = affine.for %[[iv:.*]] = 0 to min #[[$map3]](%[[M:.*]], %[[N:.*]]) step 128 iter_args(%[[red_iter:.*]] = {{.*}}) -> (vector<128xf32>) {
 // CHECK:           %[[ub:.*]] = affine.min #[[$map3]](%[[M]], %[[N]])
-// CHECK:           %[[elems_left:.*]] = affine.apply #[[$map3_sub]](%[[ub]], %[[iv]])
+// CHECK:           %[[elems_left:.*]] = affine.apply #[[$map3_sub]](%[[iv]])[%[[ub]]]
 // CHECK:           %[[mask:.*]] = vector.create_mask %[[elems_left]] : vector<128xi1>
 // CHECK:           %[[ld:.*]] = vector.transfer_read %{{.*}} : memref<256x512xf32>, vector<128xf32>
 // CHECK:           %[[select:.*]] = arith.select %[[mask]], %[[ld]], %[[vzero]] : vector<128xi1>, vector<128xf32>
diff --git a/mlir/test/Dialect/Affine/invalid.mlir b/mlir/test/Dialect/Affine/invalid.mlir
index 1539b4f4848276..44e484b9ba5982 100644
--- a/mlir/test/Dialect/Affine/invalid.mlir
+++ b/mlir/test/Dialect/Affine/invalid.mlir
@@ -20,36 +20,6 @@ func.func @affine_apply_resul_non_index(%arg0 : index) {
   return
 }
 
-// -----
-
-#map = affine_map<(d0)[s0] -> (d0 + s0)>
-
-func.func @affine_for_lower_bound_invalid_dim(%arg : index) {
-  affine.for %n0 = 0 to 7 {
-    %dim = arith.addi %arg, %arg : index
-
-    // expected-error at +1 {{operand cannot be used as a dimension id}}
-    affine.for %n1 = 0 to #map(%dim)[%arg] {
-    }
-  }
-  return
-}
-
-// -----
-
-#map = affine_map<(d0)[s0] -> (d0 + s0)>
-
-func.func @affine_for_upper_bound_invalid_dim(%arg : index) {
-  affine.for %n0 = 0 to 7 {
-    %dim = arith.addi %arg, %arg : index
-
-    // expected-error at +1 {{operand cannot be used as a dimension id}}
-    affine.for %n1 = #map(%dim)[%arg] to 7 {
-    }
-  }
-  return
-}
-
 // -----
 func.func @affine_load_invalid_dim(%M : memref<10xi32>) {
   "unknown"() ({
@@ -93,20 +63,6 @@ func.func @affine_for_upper_bound_invalid_sym() {
 
 #set0 = affine_set<(i)[N] : (i >= 0, N - i >= 0)>
 
-func.func @affine_if_invalid_dim(%arg : index) {
-  affine.for %n0 = 0 to 7 {
-    %dim = arith.addi %arg, %arg : index
-
-    // expected-error at +1 {{operand cannot be used as a dimension id}}
-    affine.if #set0(%dim)[%n0] {}
-  }
-  return
-}
-
-// -----
-
-#set0 = affine_set<(i)[N] : (i >= 0, N - i >= 0)>
-
 func.func @affine_if_invalid_sym() {
   affine.for %i0 = 0 to 7 {
     // expected-error at +1 {{operand cannot be used as a symbol}}
diff --git a/mlir/test/Dialect/GPU/transform-gpu.mlir b/mlir/test/Dialect/GPU/transform-gpu.mlir
index 6018eb40bac2a8..0a5c85336831ab 100644
--- a/mlir/test/Dialect/GPU/transform-gpu.mlir
+++ b/mlir/test/Dialect/GPU/transform-gpu.mlir
@@ -545,9 +545,9 @@ module attributes {transform.with_named_sequence} {
 !type = memref<2 x 32 x f32>
 !type1d = memref<32 x f32>
 
-// CHECK-DAG: #[[$MAPBLIN:.*]] = affine_map<(d0, d1, d2) -> (d0 + d1 * 12 + d2 * 108)>
-// CHECK-DAG: #[[$MAPBX:.*]] = affine_map<(d0, d1, d2) -> ((d0 + d1 * 12 + d2 * 108) mod 7)>
-// CHECK-DAG: #[[$MAPBY:.*]] = affine_map<(d0, d1, d2) -> ((d0 + d1 * 12 + d2 * 108) floordiv 7)>
+// CHECK-DAG: #[[$MAPBLIN:.*]] = affine_map<()[s0, s1, s2] -> (s0 + s1 * 12 + s2 * 108)> 
+// CHECK-DAG: #[[$MAPBX:.*]] = affine_map<()[s0, s1, s2] -> ((s0 + s1 * 12 + s2 * 108) mod 7)>
+// CHECK-DAG: #[[$MAPBY:.*]] = affine_map<()[s0, s1, s2] -> ((s0 + s1 * 12 + s2 * 108) floordiv 7)>
 
 // CHECK-LABEL: func.func @block_linear_existing_launch(
 // CHECK-SAME:    %[[ARGX:[0-9a-z]+]]: memref<2x32xf32>
@@ -566,9 +566,9 @@ func.func @block_linear_existing_launch(
 //  CHECK-DAG: %[[BIDX:.*]] = gpu.block_id  x
 //  CHECK-DAG: %[[BIDY:.*]] = gpu.block_id  y
 //  CHECK-DAG: %[[BIDZ:.*]] = gpu.block_id  z
-//  CHECK-DAG: %[[BIDLIN:.*]] = affine.apply #[[$MAPBLIN]](%[[BIDX]], %[[BIDY]], %[[BIDZ]])
-//  CHECK-DAG: %[[BLX:.*]] = affine.apply #[[$MAPBX]](%[[BIDX]], %[[BIDY]], %[[BIDZ]])
-//  CHECK-DAG: %[[BLY:.*]] = affine.apply #[[$MAPBY]](%[[BIDX]], %[[BIDY]], %[[BIDZ]])
+//  CHECK-DAG: %[[BIDLIN:.*]] = affine.apply #[[$MAPBLIN]]()[%[[BIDX]], %[[BIDY]], %[[BIDZ]]]
+//  CHECK-DAG: %[[BLX:.*]] = affine.apply #[[$MAPBX]]()[%[[BIDX]], %[[BIDY]], %[[BIDZ]]]
+//  CHECK-DAG: %[[BLY:.*]] = affine.apply #[[$MAPBY]]()[%[[BIDX]], %[[BIDY]], %[[BIDZ]]]
 //  CHECK-DAG: %[[CMPLIN:.*]] = arith.cmpi ult, %[[BIDLIN]], %[[C63]] : index
 //     CHECK: scf.if %[[CMPLIN]]
 //      CHECK:   memref.load %[[ARGX]][%[[BLX]], %[[BLY]]]
@@ -600,8 +600,8 @@ module attributes {transform.with_named_sequence} {
 !type = memref<2 x 32 x f32>
 !type1d = memref<32 x f32>
 
-// CHECK-DAG: #[[$MAPBX:.*]] = affine_map<(d0) -> (d0 mod 7)>
-// CHECK-DAG: #[[$MAPBY:.*]] = affine_map<(d0, d1, d2) -> (d1 + d2 * 9 + d0 floordiv 7)>
+// CHECK-DAG: #[[$MAPBX:.*]] = affine_map<()[s0] -> (s0 mod 7)>
+// CHECK-DAG: #[[$MAPBY:.*]] = affine_map<()[s0, s1, s2] -> (s1 + s2 * 9 + s0 floordiv 7)>
 
 // CHECK-LABEL: func.func @block_linear_generate_launch(
 // CHECK-SAME:    %[[ARGX:[0-9a-z]+]]: memref<2x32xf32>
@@ -620,8 +620,8 @@ func.func @block_linear_generate_launch(
 //  CHECK-DAG: %[[BIDX:.*]] = gpu.block_id  x
 //  CHECK-DAG: %[[BIDY:.*]] = gpu.block_id  y
 //  CHECK-DAG: %[[BIDZ:.*]] = gpu.block_id  z
-//  CHECK-DAG: %[[BLX:.*]] = affine.apply #[[$MAPBX]](%[[BIDX]])
-//  CHECK-DAG: %[[BLY:.*]] = affine.apply #[[$MAPBY]](%[[BIDX]], %[[BIDY]], %[[BIDZ]])
+//  CHECK-DAG: %[[BLX:.*]] = affine.apply #[[$MAPBX]]()[%[[BIDX]]]
+//  CHECK-DAG: %[[BLY:.*]] = affine.apply #[[$MAPBY]]()[%[[BIDX]], %[[BIDY]], %[[BIDZ]]]
 //      CHECK:   memref.load %[[ARGX]][%[[BLX]], %[[BLY]]]
 //      CHECK:   memref.load %[[ARGY]][%[[BLX]], %[[BLY]]]
   scf.forall (%i, %j) in (%c7, %c9) {
@@ -647,7 +647,7 @@ module attributes {transform.with_named_sequence} {
 #map = affine_map<(d0) -> (d0 *  128)>
 #map1 = affine_map<(d0) -> (d0 * 32)>
 
-// CHECK-DAG: #[[$MAPB:.*]] = affine_map<(d0) -> (d0 * 128)>
+// CHECK-DAG: #[[$MAPB:.*]] = affine_map<()[s0] -> (s0 * 128)> 
 // CHECK-DAG: #[[$MAPW:.*]] = affine_map<()[s0, s1, s2] -> (s2 * 32 + ((s0 + s1 * 4) floordiv 32) * 32)>
 
 // CHECK-LABEL: func.func @simple_fill(
@@ -660,7 +660,7 @@ func.func @simple_fill(%arg0: memref<128xf32>) -> memref<128xf32> {
 //       CHECK:   gpu.launch
   scf.forall (%arg1) in (1) {
 //       CHECK:     %[[BIDX:.*]] = gpu.block_id  x
-//       CHECK:     %[[BLX:.*]] = affine.apply #[[$MAPB]](%[[BIDX]])
+//       CHECK:     %[[BLX:.*]] = affine.apply #[[$MAPB]]()[%[[BIDX]]]
     %0 = affine.apply #map(%arg1)
     %subview = memref.subview %arg0[%0] [128] [1] : memref<128xf32> to memref<128xf32, strided<[1], offset: ?>>
     scf.forall (%arg2) in (4) {
diff --git a/mlir/test/Dialect/Linalg/convert-conv2d-to-img2col.mlir b/mlir/test/Dialect/Linalg/convert-conv2d-to-img2col.mlir
index c7c846d7ecc9c5..c17f20b2d03aba 100644
--- a/mlir/test/Dialect/Linalg/convert-conv2d-to-img2col.mlir
+++ b/mlir/test/Dialect/Linalg/convert-conv2d-to-img2col.mlir
@@ -40,9 +40,9 @@ module attributes {transform.with_named_sequence} {
 // CHECK: %[[KINDEX:.+]] = linalg.index 2 : index
 
 // Compute input channel/convolved indices.
-// CHECK: %[[ICINDEX:.+]] = affine.apply affine_map<(d0) -> (d0 mod 4)>(%[[KINDEX]])
-// CHECK: %[[CONVH:.+]] = affine.apply affine_map<(d0, d1) -> (d0 floordiv 14 + d1 floordiv 12)>(%[[MINDEX]], %[[KINDEX]])
-// CHECK: %[[CONVW:.+]] = affine.apply affine_map<(d0, d1) -> (d0 mod 14 + (d1 mod 12) floordiv 4)>(%[[MINDEX]], %[[KINDEX]])
+// CHECK: %[[ICINDEX:.+]] = affine.apply affine_map<()[s0] -> (s0 mod 4)>()[%[[KINDEX]]]
+// CHECK: %[[CONVH:.+]] = affine.apply affine_map<()[s0, s1] -> (s0 floordiv 14 + s1 floordiv 12)>()[%[[MINDEX]], %[[KINDEX]]]
+// CHECK: %[[CONVW:.+]] = affine.apply affine_map<()[s0, s1] -> (s0 mod 14 + (s1 mod 12) floordiv 4)>()[%[[MINDEX]], %[[KINDEX]]]
 
 // Extract from the input tensor.
 // CHECK: %[[EXTRACTED_INPUT:.+]] = tensor.extract
@@ -227,9 +227,9 @@ module attributes {transform.with_named_sequence} {
 //  CHECK-DAG: #[[MAP:.+]] = affine_map<(d0, d1, d2) -> (d0, d1, d2)>
 
 //  Im2col maps
-//  CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0) -> (d0 floordiv 9)>
-//  CHECK-DAG: #[[MAP7:.+]] = affine_map<(d0, d1) -> (d0 floordiv 14 + (d1 mod 9) floordiv 3)>
-//  CHECK-DAG: #[[MAP8:.+]] = affine_map<(d0, d1) -> (d0 + d1 - (d0 floordiv 14) * 14 - (d1 floordiv 3) * 3)>
+//  CHECK-DAG: #[[MAP1:.+]] = affine_map<()[s0] -> (s0 floordiv 9)>
+//  CHECK-DAG: #[[MAP7:.+]] = affine_map<()[s0, s1] -> (s0 floordiv 14 + (s1 mod 9) floordiv 3)>
+//  CHECK-DAG: #[[MAP8:.+]] = affine_map<()[s0, s1] -> (s0 + s1 - (s0 floordiv 14) * 14 - (s1 floordiv 3) * 3)>
 
 
 //  CHECK-DAG: #[[LHSMAP:.+]] = affine_map<(d0, d1, d2, d3) -> (d1, d3)>
@@ -251,9 +251,9 @@ module attributes {transform.with_named_sequence} {
 //      CHECK:       %[[NINDEX:.+]] = linalg.index 2 : index
 
 //      Compute input channel/convolved indices.
-//      CHECK:       %[[ICINDEX:.+]] = affine.apply #[[MAP1]](%[[KINDEX]])
-//      CHECK:       %[[CONVH:.+]] = affine.apply #[[MAP7]](%[[NINDEX]], %[[KINDEX]])
-//      CHECK:       %[[CONVW:.+]] = affine.apply #[[MAP8]](%[[NINDEX]], %[[KINDEX]])
+//      CHECK:       %[[ICINDEX:.+]] = affine.apply #[[MAP1]]()[%[[KINDEX]]]
+//      CHECK:       %[[CONVH:.+]] = affine.apply #[[MAP7]]()[%[[NINDEX]], %[[KINDEX]]]
+//      CHECK:       %[[CONVW:.+]] = affine.apply #[[MAP8]]()[%[[NINDEX]], %[[KINDEX]]]
 
 //      Extract from the input tensor.
 //      CHECK:       %[[EXTRACTED_INPUT:.+]] = tensor.extract
@@ -300,9 +300,9 @@ module attributes {transform.with_named_sequence} {
 // CHECK: %[[KINDEX:.+]] = linalg.index 2 : index
 
 // Compute input channel/convolved indices.
-// CHECK: %[[ICINDEX:.+]] = affine.apply affine_map<(d0) -> (d0 mod 4)>(%[[KINDEX]])
-// CHECK: %[[CONVH:.+]] = affine.apply affine_map<(d0, d1) -> (d0 floordiv 14 + d1 floordiv 12)>(%[[MINDEX]], %[[KINDEX]])
-// CHECK: %[[CONVW:.+]] = affine.apply affine_map<(d0, d1) -> (d0 mod 14 + (d1 mod 12) floordiv 4)>(%[[MINDEX]], %[[KINDEX]])
+// CHECK: %[[ICINDEX:.+]] = affine.apply affine_map<()[s0] -> (s0 mod 4)>()[%[[KINDEX]]]
+// CHECK: %[[CONVH:.+]] = affine.apply affine_map<()[s0, s1] -> (s0 floordiv 14 + s1 floordiv 12)>()[%[[MINDEX]], %[[KINDEX]]]
+// CHECK: %[[CONVW:.+]] = affine.apply affine_map<()[s0, s1] -> (s0 mod 14 + (s1 mod 12) floordiv 4)>()[%[[MINDEX]], %[[KINDEX]]]
 
 // Extract from the input tensor.
 // CHECK: %[[EXTRACTED_INPUT:.+]] = tensor.extract
diff --git a/mlir/test/Dialect/Linalg/tile-indexed.mlir b/mlir/test/Dialect/Linalg/tile-indexed.mlir
index b4aa0a33bc5926..d96a251b01ccb3 100644
--- a/mlir/test/Dialect/Linalg/tile-indexed.mlir
+++ b/mlir/test/Dialect/Linalg/tile-indexed.mlir
@@ -19,13 +19,13 @@ module attributes {transform.with_named_sequence} {
   }
 }
 
-// TILE-10n25-DAG: [[$MAP:#[a-zA-Z0-9_]*]] = affine_map<(d0, d1) -> (d0 + d1)>
+// TILE-10n25-DAG: [[$MAP:#[a-zA-Z0-9_]*]] = affine_map<(d0)[s0] -> (d0 + s0)> 
 // TILE-10n25-LABEL: func @indexed_vector
 // TILE-10n25: %[[C10:.*]] = arith.constant 10 : index
 // TILE-10n25: scf.for %[[J:.*]] = {{.*}} step %[[C10]]
 // TILE-10n25:   linalg.generic
 // TILE-10n25:     %[[I:.*]] = linalg.index 0 : index
-// TILE-10n25:     %[[NEW_I:.*]] = affine.apply [[$MAP]](%[[I]], %[[J]])
+// TILE-10n25:     %[[NEW_I:.*]] = affine.apply [[$MAP]](%[[J]])[%[[I]]]
 // TILE-10n25:     linalg.yield %[[NEW_I]] : index
 
 // -----
@@ -51,7 +51,7 @@ module attributes {transform.with_named_sequence} {
   }
 }
 
-// TILE-10n25-DAG: [[$MAP:#[a-zA-Z0-9_]*]] = affine_map<(d0, d1) -> (d0 + d1)>
+// TILE-10n25-DAG: [[$MAP:#[a-zA-Z0-9_]*]] = affine_map<(d0)[s0] -> (d0 + s0)>
 // TILE-10n25-LABEL: func @indexed_matrix
 // TILE-10n25-DAG: %[[C25:.*]] = arith.constant 25 : index
 // TILE-10n25-DAG: %[[C10:.*]] = arith.constant 10 : index
@@ -59,8 +59,8 @@ module attributes {transform.with_named_sequence} {
 // TILE-10n25:   scf.for %[[L:.*]] = {{.*}} step %[[C25]]
 // TILE-10n25:     linalg.generic
 // TILE-10n25:       %[[I:.*]] = linalg.index 0 : index
-// TILE-10n25:       %[[NEW_I:.*]] = affine.apply [[$MAP]](%[[I]], %[[K]])
+// TILE-10n25:       %[[NEW_I:.*]] = affine.apply [[$MAP]](%[[K]])[%[[I]]]
 // TILE-10n25:       %[[J:.*]] = linalg.index 1 : index
-// TILE-10n25:       %[[NEW_J:.*]] = affine.apply [[$MAP]](%[[J]], %[[L]])
+// TILE-10n25:       %[[NEW_J:.*]] = affine.apply [[$MAP]](%[[L]])[%[[J]]]
 // TILE-10n25:       %[[SUM:.*]] = arith.addi %[[NEW_I]], %[[NEW_J]] : index
 // TILE-10n25:       linalg.yield %[[SUM]] : index
diff --git a/mlir/test/Dialect/Linalg/transform-op-split.mlir b/mlir/test/Dialect/Linalg/transform-op-split.mlir
index 68c849385ba6b5..7f0ef401c8422b 100644
--- a/mlir/test/Dialect/Linalg/transform-op-split.mlir
+++ b/mlir/test/Dialect/Linalg/transform-op-split.mlir
@@ -10,7 +10,7 @@ module attributes {transform.with_named_sequence} {
 
 func.func private @elem(%arg0: f32, %arg1: index, %arg2: index) -> f32
 
-// CHECK: #[[$ADD_42_MAP:.+]] = affine_map<(d0) -> (d0 + 42)>
+// CHECK: #[[$ADD_42_MAP:.+]] = affine_map<()[s0] -> (s0 + 42)>
 
 // CHECK-LABEL: @one_d_static
 // CHECK-SAME:  %[[IN:.+]]: tensor<100xf32>, %[[OUT:.+]]: tensor<100xf32>
@@ -30,7 +30,7 @@ func.func @one_d_static(%arg0: tensor<100xf32>, %arg1: tensor<100xf32>) -> tenso
   // CHECK:   ins(%[[IN_SLICE_HIGH]]
   // CHECK:   outs(%[[OUT_SLICE_HIGH]]
   // CHECK:   %[[IDX:.+]] = linalg.index 0
-  // CHECK:   affine.apply #[[$ADD_42_MAP]](%[[IDX]])
+  // CHECK:   affine.apply #[[$ADD_42_MAP]]()[%[[IDX]]]
   // CHECK:   func.call @elem
   // CHECK: %[[RES:.+]] = tensor.insert_slice %[[RES_SLICE_HIGH]] into %[[RES_PARTIAL]][42] [58] [1]
   %0 = linalg.generic {
diff --git a/mlir/test/Interfaces/TilingInterface/tile-using-interface.mlir b/mlir/test/Interfaces/TilingInterface/tile-using-interface.mlir
index 8eb1311170c668..2d9d7e432d8753 100644
--- a/mlir/test/Interfaces/TilingInterface/tile-using-interface.mlir
+++ b/mlir/test/Interfaces/TilingInterface/tile-using-interface.mlir
@@ -259,14 +259,14 @@ module attributes {transform.with_named_sequence} {
     transform.yield
   }
 }
-//       CHECK: #[[$MAP_ADD:.+]] = affine_map<(d0, d1) -> (d0 + d1)>
+//       CHECK: #[[$MAP_ADD:.+]] = affine_map<(d0)[s0] -> (d0 + s0)>
 // CHECK-LABEL: @indexed_semantics
 //       CHECK:   scf.for %[[I0:.+]] = %{{.*}} to %{{.*}} step %{{.*}}
 //       CHECK:     scf.for %[[I1:.+]] = %{{.*}} to %{{.*}} step %{{.*}}
 //       CHECK:       %[[INDEX0:.+]] = linalg.index 0
-//       CHECK:       %[[INDEX0_AMENDED:.+]] = affine.apply #[[$MAP_ADD]](%[[INDEX0]], %[[I0]])
+//       CHECK:       %[[INDEX0_AMENDED:.+]] = affine.apply #[[$MAP_ADD]](%[[I0]])[%[[INDEX0]]]
 //       CHECK:       %[[INDEX1:.+]] = linalg.index 1
-//       CHECK:       %[[INDEX1_AMENDED:.+]] = affine.apply #[[$MAP_ADD]](%[[INDEX1]], %[[I1]])
+//       CHECK:       %[[INDEX1_AMENDED:.+]] = affine.apply #[[$MAP_ADD]](%[[I1]])[%[[INDEX1]]]
 //       CHECK:       arith.addi %[[INDEX0_AMENDED]], %[[INDEX1_AMENDED]]
 
 // -----
diff --git a/mlir/test/Interfaces/TilingInterface/tile-using-scfforall.mlir b/mlir/test/Interfaces/TilingInterface/tile-using-scfforall.mlir
index 53dd0c6a2425ce..745a82fc0da751 100644
--- a/mlir/test/Interfaces/TilingInterface/tile-using-scfforall.mlir
+++ b/mlir/test/Interfaces/TilingInterface/tile-using-scfforall.mlir
@@ -205,7 +205,7 @@ module attributes {transform.with_named_sequence} {
 
 // -----
 
-// CHECK: #[[$MAP_ADD:.+]] = affine_map<(d0, d1) -> (d0 + d1)>
+// CHECK: #[[$MAP_ADD:.+]] = affine_map<(d0)[s0] -> (d0 + s0)>
 
 func.func @indexed_semantics(%arg0: tensor<?x?xf32>, %arg1: tensor<?x?xf32>) -> tensor<?x?xf32> {
   // Check that we correctly amend "linalg.index" results.
@@ -241,9 +241,9 @@ module attributes {transform.with_named_sequence} {
 // CHECK-LABEL: @indexed_semantics
 //       CHECK: scf.forall (%[[I0:.+]], %[[I1:.+]]) =
 //       CHECK:   %[[INDEX0:.+]] = linalg.index 0
-//       CHECK:   %[[INDEX0_AMENDED:.+]] = affine.apply #[[$MAP_ADD]](%[[INDEX0]], %[[I0]])
+//       CHECK:   %[[INDEX0_AMENDED:.+]] = affine.apply #[[$MAP_ADD]](%[[I0]])[%[[INDEX0]]]
 //       CHECK:   %[[INDEX1:.+]] = linalg.index 1
-//       CHECK:   %[[INDEX1_AMENDED:.+]] = affine.apply #[[$MAP_ADD]](%[[INDEX1]], %[[I1]])
+//       CHECK:   %[[INDEX1_AMENDED:.+]] = affine.apply #[[$MAP_ADD]](%[[I1]])[%[[INDEX1]]]
 //       CHECK:   arith.addi %[[INDEX0_AMENDED]], %[[INDEX1_AMENDED]]
 
 // -----

>From b81ed076bdf7c0c617d0ef06031442711158c669 Mon Sep 17 00:00:00 2001
From: linuxlonelyeagle <2020382038 at qq.com>
Date: Fri, 17 Jan 2025 11:02:40 +0800
Subject: [PATCH 4/4] fix tests and opt the code.

---
 mlir/docs/Dialects/Affine.md             |  2 +-
 mlir/lib/Dialect/Affine/IR/AffineOps.cpp | 17 ++++----
 mlir/test/Dialect/Affine/ops.mlir        | 52 +++++++++++++++++++++++-
 3 files changed, 58 insertions(+), 13 deletions(-)

diff --git a/mlir/docs/Dialects/Affine.md b/mlir/docs/Dialects/Affine.md
index 4ff80a90d72656..f4fa07aca58831 100644
--- a/mlir/docs/Dialects/Affine.md
+++ b/mlir/docs/Dialects/Affine.md
@@ -69,7 +69,7 @@ immediately enclosed by the latter),
 3. a value that dominates the `AffineScope` op enclosing the value's
 use,
 4. the result of a constant operation,
-5. the result of a `Pure` operation that its operands are the valid symbolic identifiers.
+5. the result of a `Pure` operation that whose operands are the valid symbolic identifiers.
 6. the result of a
 [`dim` operation](MemRef.md/#memrefdim-mlirmemrefdimop) on either a memref that
 is an argument to a `AffineScope` op or a memref where the corresponding
diff --git a/mlir/lib/Dialect/Affine/IR/AffineOps.cpp b/mlir/lib/Dialect/Affine/IR/AffineOps.cpp
index 6784a27fc6c87e..6e53b38b329048 100644
--- a/mlir/lib/Dialect/Affine/IR/AffineOps.cpp
+++ b/mlir/lib/Dialect/Affine/IR/AffineOps.cpp
@@ -410,8 +410,8 @@ bool mlir::affine::isValidSymbol(Value value) {
 /// A value can be used as a symbol for `region` iff it meets one of the
 /// following conditions:
 /// *) It is a constant.
-/// *) It is a result of a `Pure` operation that its operands are the valid
-/// *) symbolic identifiers.
+/// *) It is a result of a `Pure` operation whose operands are valid symbolic
+/// *) identifiers.
 /// *) It is a result of the dim op on a memref whose corresponding size is
 ///    a valid symbol.
 /// *) It is defined at the top level of 'region' or is its argument.
@@ -444,14 +444,11 @@ bool mlir::affine::isValidSymbol(Value value, Region *region) {
   if (matchPattern(defOp, m_Constant(&operandCst)))
     return true;
 
-  // `Pure` operation that its operands are the valid symbolic identifiers.
-  if (isPure(defOp)) {
-    bool operandVaildSymbol =
-        llvm::all_of(defOp->getOperands(), [&](Value operand) {
-          return affine::isValidSymbol(operand, region);
-        });
-    if (operandVaildSymbol)
-      return true;
+  // `Pure` operation that whose operands are valid symbolic identifiers.
+  if (isPure(defOp) && llvm::all_of(defOp->getOperands(), [&](Value operand) {
+        return affine::isValidSymbol(operand, region);
+      })) {
+    return true;
   }
 
   // Dim op results could be valid symbols at any level.
diff --git a/mlir/test/Dialect/Affine/ops.mlir b/mlir/test/Dialect/Affine/ops.mlir
index 95d81f8519d480..e3721806989bb9 100644
--- a/mlir/test/Dialect/Affine/ops.mlir
+++ b/mlir/test/Dialect/Affine/ops.mlir
@@ -331,7 +331,7 @@ module attributes {gpu.container_module} {
 
 // CHECK: #[[$ATTR_0:.+]] = affine_map<()[s0] -> (s0 mod 32)>
 
-// CHECK-LABEL: @affine_thread_id
+// CHECK-LABEL: gpu.func @affine_thread_id
 
 module {
   gpu.module @gpu {
@@ -360,4 +360,52 @@ module {
 // CHECK: %[[VAL_5:.*]] = gpu.thread_id  x
 // CHECK: %[[VAL_6:.*]] = affine.apply #[[$ATTR_0]](){{\[}}%[[VAL_5]]]
 // CHECK: %[[VAL_7:.*]] = arith.constant 128 : index
-// CHECK: affine.for %[[VAL_8:.*]] = %[[VAL_6]] to %[[VAL_7]] step 8 {
+// CHECK: affine.for %{{.*}} = %[[VAL_6]] to %[[VAL_7]] step 8 {
+
+// -----
+
+#map = affine_map<(d0)[s0] -> (d0 + s0)>
+
+// CHECK: #[[$ATTR_0:.+]] = affine_map<(d0)[s0] -> (d0 + s0)>
+
+// CHECK-LABEL: func @arith_add_vaild_symbol_upper_bound
+
+func.func @arith_add_vaild_symbol_upper_bound(%arg : index) {
+  affine.for %n0 = 0 to 7 {
+    %dim = arith.addi %arg, %arg : index
+    affine.for %n1 = 0 to #map(%dim)[%arg] {
+    }
+  }
+  return
+}
+
+// CHECK-SAME: %[[VAL_0:.*]]: index) {
+// CHECK: affine.for %[[VAL_1:.*]] = 0 to 7 {
+// CHECK:   %[[VAL_2:.*]] = arith.addi %[[VAL_0]], %[[VAL_0]] : index
+// CHECK:   affine.for %[[VAL_3:.*]] = 0 to #[[$ATTR_0]](%[[VAL_2]]){{\[}}%[[VAL_0]]] {
+// CHECK:   }
+// CHECK: }
+
+// -----
+
+#map = affine_map<(d0)[s0] -> (d0 + s0)>
+
+// CHECK: #[[$ATTR_0:.+]] = affine_map<(d0)[s0] -> (d0 + s0)>
+
+// CHECK-LABEL: func @arith_add_vaild_symbol_lower_bound
+
+func.func @arith_add_vaild_symbol_lower_bound(%arg : index) {
+  affine.for %n0 = 0 to 7 {
+    %dim = arith.addi %arg, %arg : index
+    affine.for %n1 = #map(%dim)[%arg] to 7 {
+    }
+  }
+  return
+}
+
+// CHECK-SAME: %[[VAL_0:.*]]: index) {
+// CHECK: affine.for %[[VAL_1:.*]] = 0 to 7 {
+// CHECK:   %[[VAL_2:.*]] = arith.addi %[[VAL_0]], %[[VAL_0]] : index
+// CHECK:   affine.for %[[VAL_3:.*]] = #[[$ATTR_0]](%[[VAL_2]]){{\[}}%[[VAL_0]]] to 7 {
+// CHECK:   }
+// CHECK: }