[Mlir-commits] [mlir] b257dba - [mlir][linalg] Create AffineMinOp map in canoncial form.
llvmlistbot at llvm.org
llvmlistbot at llvm.org
Thu Mar 24 00:02:07 PDT 2022
Author: gysit
Date: 2022-03-24T06:55:59Z
New Revision: b257dba58e1ff20611421a777c11c58bc8b8b008
URL: https://github.com/llvm/llvm-project/commit/b257dba58e1ff20611421a777c11c58bc8b8b008
DIFF: https://github.com/llvm/llvm-project/commit/b257dba58e1ff20611421a777c11c58bc8b8b008.diff
LOG: [mlir][linalg] Create AffineMinOp map in canoncial form.
Create the AffineMinOp used to compute the padding width in canonical form and update the tests.
Reviewed By: springerm
Differential Revision: https://reviews.llvm.org/D122311
Added:
Modified:
mlir/lib/Dialect/Linalg/Utils/Utils.cpp
mlir/test/Dialect/Linalg/tile-and-fuse-on-tensors.mlir
Removed:
################################################################################
diff --git a/mlir/lib/Dialect/Linalg/Utils/Utils.cpp b/mlir/lib/Dialect/Linalg/Utils/Utils.cpp
index 431c1eb0be51e..5a566c5c30cab 100644
--- a/mlir/lib/Dialect/Linalg/Utils/Utils.cpp
+++ b/mlir/lib/Dialect/Linalg/Utils/Utils.cpp
@@ -821,9 +821,9 @@ Value makeTiledShape(OpBuilder &builder, Location loc, Value valueToTile,
Value maxIndex = applyMapToValues(builder, loc, m, maxIndices).front();
Value d = makeComposedAffineApply(builder, loc, plusOneMap, {maxIndex});
- // Compute min(size, dim - offset) to avoid out-of-bounds accesses.
+ // Compute min(dim - offset, size) to avoid out-of-bounds accesses.
AffineMap minMap = AffineMap::inferFromExprList(
- {ArrayRef<AffineExpr>{dim0, dim1 - dim2}})
+ {ArrayRef<AffineExpr>{dim1 - dim2, dim0}})
.front();
SmallVector<Value, 4> operands{size, d, offset};
fullyComposeAffineMapAndOperands(&minMap, &operands);
diff --git a/mlir/test/Dialect/Linalg/tile-and-fuse-on-tensors.mlir b/mlir/test/Dialect/Linalg/tile-and-fuse-on-tensors.mlir
index 0126ad1a026e2..ab709c69651aa 100644
--- a/mlir/test/Dialect/Linalg/tile-and-fuse-on-tensors.mlir
+++ b/mlir/test/Dialect/Linalg/tile-and-fuse-on-tensors.mlir
@@ -1,16 +1,16 @@
// RUN: mlir-opt %s -test-linalg-codegen-strategy="anchor-op=linalg.matmul fuse tile-sizes=5,4,7 tile-interchange=1,0,2 run-enable-pass=false" -cse -split-input-file | FileCheck --check-prefix=MATMUL %s
// RUN: mlir-opt %s -test-linalg-codegen-strategy="anchor-op=linalg.generic fuse tile-sizes=5,4,7 tile-interchange=1,0,2 run-enable-pass=false" -cse -split-input-file | FileCheck --check-prefix=GENERIC %s
-// MATMUL-DAG: #[[MAP0:.*]] = affine_map<(d0) -> (5, -d0 + 24)>
-// MATMUL-DAG: #[[MAP1:.*]] = affine_map<(d0) -> (7, -d0 + 12)>
-// MATMUL-DAG: #[[MAP2:.*]] = affine_map<(d0, d1) -> (d0, -d1 + 24)>
-// MATMUL-DAG: #[[MAP3:.*]] = affine_map<(d0, d1) -> (d0, -d1 + 12)>
+// MATMUL-DAG: #[[MAP0:.*]] = affine_map<(d0) -> (-d0 + 24, 5)>
+// MATMUL-DAG: #[[MAP1:.*]] = affine_map<(d0) -> (-d0 + 12, 7)>
+// MATMUL-DAG: #[[MAP2:.*]] = affine_map<(d0, d1) -> (-d1 + 24, d0)>
+// MATMUL-DAG: #[[MAP3:.*]] = affine_map<(d0, d1) -> (-d1 + 12, d0)>
// MATMUL: fuse_input
// MATMUL-SAME: %[[ARG0:[0-9a-zA-Z]*]]: tensor<24x12xf32>
func.func @fuse_input(%arg0: tensor<24x12xf32>,
- %arg1: tensor<12x25xf32>,
- %arg2: tensor<24x25xf32>) -> tensor<24x25xf32> {
+ %arg1: tensor<12x25xf32>,
+ %arg2: tensor<24x25xf32>) -> tensor<24x25xf32> {
%c0 = arith.constant 0 : index
%c12 = arith.constant 12 : index
%c25 = arith.constant 25 : index
@@ -34,19 +34,19 @@ func.func @fuse_input(%arg0: tensor<24x12xf32>,
// MATMUL: %[[T1:.*]] = linalg.fill ins(%{{.*}}{{.*}}outs(%[[T0]]
// MATMUL: %{{.*}} = linalg.matmul ins(%[[T1]]
%1 = linalg.matmul ins(%0, %arg1 : tensor<24x12xf32>, tensor<12x25xf32>) outs(%arg2 : tensor<24x25xf32>) -> tensor<24x25xf32>
- return %1 : tensor<24x25xf32>
+ func.return %1 : tensor<24x25xf32>
}
// -----
-// MATMUL-DAG: #[[MAP0:.*]] = affine_map<(d0) -> (5, -d0 + 24)>
-// MATMUL-DAG: #[[MAP1:.*]] = affine_map<(d0) -> (4, -d0 + 25)>
+// MATMUL-DAG: #[[MAP0:.*]] = affine_map<(d0) -> (-d0 + 24, 5)>
+// MATMUL-DAG: #[[MAP1:.*]] = affine_map<(d0) -> (-d0 + 25, 4)>
// MATMUL: fuse_output
// MATMUL-SAME: %[[ARG2:[0-9a-zA-Z]*]]: tensor<24x25xf32>
func.func @fuse_output(%arg0: tensor<24x12xf32>,
- %arg1: tensor<12x25xf32>,
- %arg2: tensor<24x25xf32>) -> tensor<24x25xf32> {
+ %arg1: tensor<12x25xf32>,
+ %arg2: tensor<24x25xf32>) -> tensor<24x25xf32> {
// MATMUL-DAG: %[[C0:.*]] = arith.constant 0 : index
// MATMUL-DAG: %[[C1:.*]] = arith.constant 1 : index
%c0 = arith.constant 0 : index
@@ -81,15 +81,15 @@ func.func @fuse_output(%arg0: tensor<24x12xf32>,
// MATMUL-SAME: 0, 0
// MATMUL-SAME: %[[D0]], %[[D1]]
%1 = linalg.matmul ins(%arg0, %arg1 : tensor<24x12xf32>, tensor<12x25xf32>) outs(%0 : tensor<24x25xf32>) -> tensor<24x25xf32>
- return %1 : tensor<24x25xf32>
+ func.return %1 : tensor<24x25xf32>
}
// -----
-// MATMUL-DAG: #[[MAP0:.*]] = affine_map<(d0) -> (4, -d0 + 25)>
-// MATMUL-DAG: #[[MAP1:.*]] = affine_map<(d0) -> (7, -d0 + 12)>
-// MATMUL-DAG: #[[MAP2:.*]] = affine_map<(d0, d1) -> (d0, -d1 + 25)>
-// MATMUL-DAG: #[[MAP3:.*]] = affine_map<(d0, d1) -> (d0, -d1 + 12)>
+// MATMUL-DAG: #[[MAP0:.*]] = affine_map<(d0) -> (-d0 + 25, 4)>
+// MATMUL-DAG: #[[MAP1:.*]] = affine_map<(d0) -> (-d0 + 12, 7)>
+// MATMUL-DAG: #[[MAP2:.*]] = affine_map<(d0, d1) -> (-d1 + 25, d0)>
+// MATMUL-DAG: #[[MAP3:.*]] = affine_map<(d0, d1) -> (-d1 + 12, d0)>
#map0 = affine_map<(d0, d1, d2) -> (d0, d1, d2)>
#map1 = affine_map<(d0, d1, d2) -> (d0, d2)>
@@ -97,16 +97,16 @@ func.func @fuse_output(%arg0: tensor<24x12xf32>,
// MATMUL-SAME: %[[ARG1:[0-9a-zA-Z]*]]: tensor<12x25xf32>
// MATMUL-SAME: %[[ARG3:[0-9a-zA-Z]*]]: tensor<12x7x25xf32>
func.func @fuse_reduction(%arg0: tensor<24x12xf32>,
- %arg1: tensor<12x25xf32>,
- %arg2: tensor<24x25xf32>,
- %arg3: tensor<12x7x25xf32>) -> tensor<24x25xf32> {
+ %arg1: tensor<12x25xf32>,
+ %arg2: tensor<24x25xf32>,
+ %arg3: tensor<12x7x25xf32>) -> tensor<24x25xf32> {
%c0 = arith.constant 0 : index
%c12 = arith.constant 12 : index
%c25 = arith.constant 25 : index
%c24 = arith.constant 24 : index
%c4 = arith.constant 4 : index
%0 = linalg.generic {indexing_maps = [#map0, #map1], iterator_types = ["parallel", "reduction", "parallel"]} ins(%arg3 : tensor<12x7x25xf32>) outs(%arg1 : tensor<12x25xf32>) {
- ^bb0(%arg4: f32, %arg5: f32):
+ ^bb0(%arg4: f32, %arg5: f32):
%2 = arith.addf %arg4, %arg5 : f32
linalg.yield %2 : f32
} -> tensor<12x25xf32>
@@ -129,7 +129,7 @@ func.func @fuse_reduction(%arg0: tensor<24x12xf32>,
// MATMUL: %[[T2:.*]] = linalg.generic {{.*}} ins(%[[T0]] {{.*}} outs(%[[T1]]
// MATMUL: %{{.*}} = linalg.matmul ins(%{{.*}}, %[[T2]]
%1 = linalg.matmul ins(%arg0, %0 : tensor<24x12xf32>, tensor<12x25xf32>) outs(%arg2 : tensor<24x25xf32>) -> tensor<24x25xf32>
- return %1 : tensor<24x25xf32>
+ func.return %1 : tensor<24x25xf32>
}
// -----
@@ -141,16 +141,16 @@ func.func @fuse_reduction(%arg0: tensor<24x12xf32>,
// MATMUL-SAME: %[[ARG0:[0-9a-zA-Z]*]]: tensor<24x12xf32>
// MATMUL-SAME: %[[ARG3:[0-9a-zA-Z]*]]: tensor<12x24xf32>
func.func @fuse_transposed(%arg0: tensor<24x12xf32>,
- %arg1: tensor<12x25xf32>,
- %arg2: tensor<24x25xf32>,
- %arg3: tensor<12x24xf32>) -> tensor<24x25xf32> {
+ %arg1: tensor<12x25xf32>,
+ %arg2: tensor<24x25xf32>,
+ %arg3: tensor<12x24xf32>) -> tensor<24x25xf32> {
%c0 = arith.constant 0 : index
%c12 = arith.constant 12 : index
%c25 = arith.constant 25 : index
%c24 = arith.constant 24 : index
%c4 = arith.constant 4 : index
%0 = linalg.generic {indexing_maps = [#map0, #map1], iterator_types = ["parallel", "parallel"]} ins(%arg3 : tensor<12x24xf32>) outs(%arg0 : tensor<24x12xf32>) {
- ^bb0(%arg4: f32, %arg5: f32):
+ ^bb0(%arg4: f32, %arg5: f32):
%2 = arith.addf %arg4, %arg5 : f32
linalg.yield %2 : f32
} -> tensor<24x12xf32>
@@ -167,7 +167,7 @@ func.func @fuse_transposed(%arg0: tensor<24x12xf32>,
// MATMUL: %[[T2:.*]] = linalg.generic {{.*}} ins(%[[T0]] {{.*}} outs(%[[T1]]
// MATMUL: %{{.*}} = linalg.matmul ins(%[[T2]]
%1 = linalg.matmul ins(%0, %arg1 : tensor<24x12xf32>, tensor<12x25xf32>) outs(%arg2 : tensor<24x25xf32>) -> tensor<24x25xf32>
- return %1 : tensor<24x25xf32>
+ func.return %1 : tensor<24x25xf32>
}
// -----
@@ -176,8 +176,8 @@ func.func @fuse_transposed(%arg0: tensor<24x12xf32>,
// MATMUL-SAME: %[[ARG0:[0-9a-zA-Z]*]]: tensor<24x12xf32>
// MATMUL-SAME: %[[ARG2:[0-9a-zA-Z]*]]: tensor<24x25xf32>
func.func @fuse_input_and_output(%arg0: tensor<24x12xf32>,
- %arg1: tensor<12x25xf32>,
- %arg2: tensor<24x25xf32>) -> tensor<24x25xf32> {
+ %arg1: tensor<12x25xf32>,
+ %arg2: tensor<24x25xf32>) -> tensor<24x25xf32> {
%c0 = arith.constant 0 : index
%c12 = arith.constant 12 : index
%c25 = arith.constant 25 : index
@@ -200,7 +200,7 @@ func.func @fuse_input_and_output(%arg0: tensor<24x12xf32>,
// MATMUL: %[[T4:.*]] = tensor.extract_slice %[[ARG5]]
// MATMUL: %{{.*}} = linalg.matmul ins(%[[T3]], {{.*}} outs(%[[T4]]
%2 = linalg.matmul ins(%0, %arg1 : tensor<24x12xf32>, tensor<12x25xf32>) outs(%1 : tensor<24x25xf32>) -> tensor<24x25xf32>
- return %2 : tensor<24x25xf32>
+ func.return %2 : tensor<24x25xf32>
}
// -----
@@ -211,15 +211,15 @@ func.func @fuse_input_and_output(%arg0: tensor<24x12xf32>,
// MATMUL: fuse_indexed
// MATMUL-SAME: %[[ARG1:[0-9a-zA-Z]*]]: tensor<12x25xi32>
func.func @fuse_indexed(%arg0: tensor<24x12xi32>,
- %arg1: tensor<12x25xi32>,
- %arg2: tensor<24x25xi32>) -> tensor<24x25xi32> {
+ %arg1: tensor<12x25xi32>,
+ %arg2: tensor<24x25xi32>) -> tensor<24x25xi32> {
%c0 = arith.constant 0 : index
%c12 = arith.constant 12 : index
%c25 = arith.constant 25 : index
%c24 = arith.constant 24 : index
%c4 = arith.constant 4 : index
%0 = linalg.generic {indexing_maps = [#map0], iterator_types = ["parallel", "parallel"]} outs(%arg1 : tensor<12x25xi32>) {
- ^bb0(%arg3: i32):
+ ^bb0(%arg3: i32):
%6 = linalg.index 0 : index
%7 = linalg.index 1 : index
%8 = arith.addi %6, %7 : index
@@ -241,7 +241,7 @@ func.func @fuse_indexed(%arg0: tensor<24x12xi32>,
// MATMUL: %[[IDX1_SHIFTED:.*]] = affine.apply #[[MAP0]](%[[IDX1]], %[[IV2]])
// MATMUL: %{{.*}} = arith.addi %[[IDX0_SHIFTED]], %[[IDX1_SHIFTED]]
%1 = linalg.matmul ins(%arg0, %0 : tensor<24x12xi32>, tensor<12x25xi32>) outs(%arg2 : tensor<24x25xi32>) -> tensor<24x25xi32>
- return %1 : tensor<24x25xi32>
+ func.return %1 : tensor<24x25xi32>
}
// -----
@@ -252,8 +252,8 @@ func.func @fuse_indexed(%arg0: tensor<24x12xi32>,
// GENERIC: fuse_outermost_reduction
// GENERIC-SAME: %[[ARG0:[0-9a-zA-Z]*]]: tensor<10x17xf32>
// GENERIC-SAME: %[[ARG1:[0-9a-zA-Z]*]]: tensor<10xf32>
-func @fuse_outermost_reduction(%arg0: tensor<10x17xf32>,
- %arg1: tensor<10xf32>) -> tensor<10xf32> {
+func.func @fuse_outermost_reduction(%arg0: tensor<10x17xf32>,
+ %arg1: tensor<10xf32>) -> tensor<10xf32> {
%cst = arith.constant 0.000000e+00 : f32
%0 = linalg.fill ins(%cst : f32) outs(%arg0 : tensor<10x17xf32>) -> tensor<10x17xf32>
@@ -272,25 +272,25 @@ func @fuse_outermost_reduction(%arg0: tensor<10x17xf32>,
// GENERIC-SAME: %[[IV1]]
// GENERIC: linalg.generic {{.*}} ins(%[[T2]] {{.*}} outs(%[[T3]]
%2 = linalg.generic {indexing_maps = [#map0, #map1], iterator_types = ["parallel", "reduction"]} ins(%0 : tensor<10x17xf32>) outs(%1 : tensor<10xf32>) {
- ^bb0(%arg2: f32, %arg3: f32):
+ ^bb0(%arg2: f32, %arg3: f32):
%3 = arith.addf %arg2, %arg3 : f32
linalg.yield %3 : f32
} -> tensor<10xf32>
- return %2 : tensor<10xf32>
+ func.return %2 : tensor<10xf32>
}
// -----
// GENERIC-DAG: #[[MAP0:.*]] = affine_map<(d0, d1) -> (d0 + d1)>
-// GENERIC-DAG: #[[MAP1:.*]] = affine_map<(d0, d1) -> (8, -d0 - d1 + 17)>
-// GENERIC-DAG: #[[MAP2:.*]] = affine_map<(d0, d1, d2) -> (d0, -d1 - d2 + 17)>
+// GENERIC-DAG: #[[MAP1:.*]] = affine_map<(d0, d1) -> (-d0 - d1 + 17, 8)>
+// GENERIC-DAG: #[[MAP2:.*]] = affine_map<(d0, d1, d2) -> (-d1 - d2 + 17, d0)>
#map0 = affine_map<(d0, d1) -> (d0, d0 + d1)>
#map1 = affine_map<(d0, d1) -> (d0, d1)>
// GENERIC: fuse_non_rectangular
// GENERIC-SAME: %[[ARG0:[0-9a-zA-Z]*]]: tensor<10x17xf32>
-func @fuse_non_rectangular(%arg0: tensor<10x17xf32>,
- %arg1: tensor<10x8xf32>) -> tensor<10x8xf32> {
+func.func @fuse_non_rectangular(%arg0: tensor<10x17xf32>,
+ %arg1: tensor<10x8xf32>) -> tensor<10x8xf32> {
// GENERIC-DAG: %[[C0:.*]] = arith.constant 0 : index
// GENERIC-DAG: %[[C4:.*]] = arith.constant 4 : index
@@ -315,9 +315,9 @@ func @fuse_non_rectangular(%arg0: tensor<10x17xf32>,
// GENERIC-SAME: , %[[UB1]]
// GENERIC: %[[T1:.*]] = linalg.fill ins(%{{.*}}{{.*}}outs(%[[T0]]
%1 = linalg.generic {indexing_maps = [#map0, #map1], iterator_types = ["parallel", "parallel"]} ins(%0 : tensor<10x17xf32>) outs(%arg1 : tensor<10x8xf32>) {
- ^bb0(%arg2: f32, %arg3: f32):
+ ^bb0(%arg2: f32, %arg3: f32):
%2 = arith.addf %arg2, %arg3 : f32
linalg.yield %2 : f32
} -> tensor<10x8xf32>
- return %1 : tensor<10x8xf32>
+ func.return %1 : tensor<10x8xf32>
}
More information about the Mlir-commits
mailing list