[Mlir-commits] [mlir] 6288345 - [mlir][linalg] makeTiledShape: No affine.min if tile size == 1
Matthias Springer
llvmlistbot at llvm.org
Mon Sep 13 19:10:41 PDT 2021
Author: Matthias Springer
Date: 2021-09-14T10:48:20+09:00
New Revision: 62883459cdb8c151909bc34e8f71e118f3d3a4ce
URL: https://github.com/llvm/llvm-project/commit/62883459cdb8c151909bc34e8f71e118f3d3a4ce
DIFF: https://github.com/llvm/llvm-project/commit/62883459cdb8c151909bc34e8f71e118f3d3a4ce.diff
LOG: [mlir][linalg] makeTiledShape: No affine.min if tile size == 1
This improves codegen (more static type information) with `scalarize-dynamic-dims`.
Differential Revision: https://reviews.llvm.org/D109415
Added:
Modified:
mlir/lib/Dialect/Linalg/Utils/Utils.cpp
mlir/test/Dialect/Linalg/fusion-sequence.mlir
mlir/test/Dialect/Linalg/tile-scalarize-dynamic-dims.mlir
mlir/test/Dialect/Linalg/transform-patterns-matmul-to-vector.mlir
Removed:
################################################################################
diff --git a/mlir/lib/Dialect/Linalg/Utils/Utils.cpp b/mlir/lib/Dialect/Linalg/Utils/Utils.cpp
index 3317a38ed026e..1305cfdf464a4 100644
--- a/mlir/lib/Dialect/Linalg/Utils/Utils.cpp
+++ b/mlir/lib/Dialect/Linalg/Utils/Utils.cpp
@@ -519,6 +519,15 @@ void GenerateLoopNest<scf::ParallelOp>::doit(
assert(ivs.size() == iteratorTypes.size() && "did not generate enough loops");
}
+static Value fullyComposeAndAffineApply(OpBuilder &b, Location loc,
+ AffineExpr expr, ValueRange operands) {
+ AffineMap map = AffineMap::inferFromExprList({expr}).front();
+ SmallVector<Value> normalizedOperands(operands.begin(), operands.end());
+ mlir::fullyComposeAffineMapAndOperands(&map, &normalizedOperands);
+ canonicalizeMapAndOperands(&map, &normalizedOperands);
+ return b.createOrFold<AffineApplyOp>(loc, map, normalizedOperands);
+}
+
Value makeTiledShape(OpBuilder &builder, Location loc, Value valueToTile,
ValueRange tileSizes, AffineMap map, ValueRange lbs,
ValueRange ubs, ValueRange subShapeSizes) {
@@ -554,16 +563,21 @@ Value makeTiledShape(OpBuilder &builder, Location loc, Value valueToTile,
applyMapToValues(builder, loc, m, subShapeSizes).front();
// Resulting size needs to be made half open interval again.
AffineExpr s0 = getAffineSymbolExpr(0, builder.getContext());
- Value size = makeComposedAffineApply(builder, loc, s0 + 1, closedIntSize);
+ Value size =
+ fullyComposeAndAffineApply(builder, loc, s0 + 1, closedIntSize);
LLVM_DEBUG(llvm::dbgs() << "makeTiledShape: raw size: " << size << "\n");
// The size of the subview / extract_slice should be trimmed to avoid
- // out-of-bounds accesses, unless we statically know the subshape size
- // divides the shape size evenly.
+ // out-of-bounds accesses, unless:
+ // a. We statically know the subshape size divides the shape size evenly.
+ // b. The subshape size is 1. According to the way the loops are set up,
+ // tensors with "0" dimensions would never be constructed.
int64_t shapeSize = shape[r];
auto sizeCst = size.getDefiningOp<ConstantIndexOp>();
- if (ShapedType::isDynamic(shapeSize) || !sizeCst ||
- (shapeSize % sizeCst.getValue()) != 0) {
+ auto hasTileSizeOne = sizeCst && sizeCst.getValue() == 1;
+ auto dividesEvenly = sizeCst && !ShapedType::isDynamic(shapeSize) &&
+ ((shapeSize % sizeCst.getValue()) == 0);
+ if (!hasTileSizeOne && !dividesEvenly) {
LLVM_DEBUG(llvm::dbgs() << "makeTiledShape: shapeSize=" << shapeSize
<< ", size: " << size
<< ": make sure in bound with affine.min\n");
@@ -577,6 +591,7 @@ Value makeTiledShape(OpBuilder &builder, Location loc, Value valueToTile,
Value d = applyMapToValues(builder, loc, m, ubs).front();
SmallVector<Value, 4> operands{size, d, offset};
fullyComposeAffineMapAndOperands(&minMap, &operands);
+ canonicalizeMapAndOperands(&minMap, &operands);
size = builder.create<AffineMinOp>(loc, builder.getIndexType(), minMap,
operands);
}
@@ -623,7 +638,7 @@ SmallVector<Value> computeTileSizes(OpBuilder &b, Location loc, ValueRange ivs,
// Before composing, we need to make range a closed interval.
Value size = isTiled ? tileSizes[idx] : sizeBounds[idx];
AffineExpr d0 = getAffineDimExpr(0, b.getContext());
- sizes.push_back(makeComposedAffineApply(b, loc, d0 - 1, size));
+ sizes.push_back(fullyComposeAndAffineApply(b, loc, d0 - 1, size));
LLVM_DEBUG(llvm::dbgs() << "computeTileSizes: " << sizes.back() << "\n");
}
return sizes;
diff --git a/mlir/test/Dialect/Linalg/fusion-sequence.mlir b/mlir/test/Dialect/Linalg/fusion-sequence.mlir
index a18c007fe355a..de788051e301b 100644
--- a/mlir/test/Dialect/Linalg/fusion-sequence.mlir
+++ b/mlir/test/Dialect/Linalg/fusion-sequence.mlir
@@ -212,7 +212,6 @@ module {
}
}
-// CHaECK: #[[MAP0:.+]] = affine_map<(d0, d1) -> (16, d0 - d1)>
// CHECK: #[[MAP0:.+]] = affine_map<(d0)[s0] -> (16, -d0 + s0)>
// CHECK: #[[MAP1:.+]] = affine_map<(d0)[s0, s1] -> (-d0 + s0, 16, -d0 + s1)>
diff --git a/mlir/test/Dialect/Linalg/tile-scalarize-dynamic-dims.mlir b/mlir/test/Dialect/Linalg/tile-scalarize-dynamic-dims.mlir
index f749a61dc3d48..0daa4bfca5cc0 100644
--- a/mlir/test/Dialect/Linalg/tile-scalarize-dynamic-dims.mlir
+++ b/mlir/test/Dialect/Linalg/tile-scalarize-dynamic-dims.mlir
@@ -25,3 +25,50 @@ func @matmul_partly_dynamic_tensor(%arg0: tensor<?x?xf32>, %arg1: tensor<?x2000x
outs(%out: tensor<?x2000xf32>) -> tensor<?x2000xf32>
return %r : tensor<?x2000xf32>
}
+
+// -----
+
+// The input IR of this test case is a tiled and peeled linalg.matmul op.
+
+// CHECK-LABEL: func @tiled_and_peeled_matmul(
+// CHECK: linalg.matmul ins({{.*}} : tensor<32x259xf32>, tensor<259x258xf32>) outs({{.*}} : tensor<32x258xf32>) -> tensor<32x258xf32>
+// CHECK: linalg.matmul ins({{.*}} : tensor<1x259xf32>, tensor<259x258xf32>) outs({{.*}} : tensor<1x258xf32>) -> tensor<1x258xf32>
+#map0 = affine_map<(d0) -> (64, -d0 + 257)>
+#map1 = affine_map<()[s0] -> ((s0 floordiv 32) * 32)>
+#map2 = affine_map<(d0)[s0] -> (d0 - (s0 floordiv 32) * 32)>
+
+func @tiled_and_peeled_matmul(%arg0: tensor<257x259xf32>, %arg1: tensor<259x258xf32>, %arg2: tensor<257x258xf32>) -> tensor<257x258xf32> {
+ %c257 = constant 257 : index
+ %c64 = constant 64 : index
+ %cst = constant 0.000000e+00 : f32
+ %c0 = constant 0 : index
+ %c32 = constant 32 : index
+ %0 = linalg.fill(%cst, %arg2) : f32, tensor<257x258xf32> -> tensor<257x258xf32>
+ %1 = scf.for %arg3 = %c0 to %c257 step %c64 iter_args(%arg4 = %0) -> (tensor<257x258xf32>) {
+ %2 = affine.min #map0(%arg3)
+ %3 = tensor.extract_slice %arg0[%arg3, 0] [%2, 259] [1, 1] : tensor<257x259xf32> to tensor<?x259xf32>
+ %4 = tensor.extract_slice %arg4[%arg3, 0] [%2, 258] [1, 1] : tensor<257x258xf32> to tensor<?x258xf32>
+ %5 = affine.apply #map1()[%2]
+ %6 = scf.for %arg5 = %c0 to %5 step %c32 iter_args(%arg6 = %4) -> (tensor<?x258xf32>) {
+ %10 = tensor.extract_slice %3[%arg5, 0] [32, 259] [1, 1] : tensor<?x259xf32> to tensor<32x259xf32>
+ %11 = tensor.extract_slice %arg6[%arg5, 0] [32, 258] [1, 1] : tensor<?x258xf32> to tensor<32x258xf32>
+ %12 = linalg.matmul {__internal_linalg_transform__ = "tile"} ins(%10, %arg1 : tensor<32x259xf32>, tensor<259x258xf32>) outs(%11 : tensor<32x258xf32>) -> tensor<32x258xf32>
+ %13 = tensor.insert_slice %12 into %arg6[%arg5, 0] [32, 258] [1, 1] : tensor<32x258xf32> into tensor<?x258xf32>
+ scf.yield %13 : tensor<?x258xf32>
+ }
+ %7 = cmpi slt, %5, %2 : index
+ %8 = scf.if %7 -> (tensor<?x258xf32>) {
+ %10 = affine.apply #map2(%2)[%2]
+ %11 = tensor.extract_slice %3[%5, 0] [%10, 259] [1, 1] : tensor<?x259xf32> to tensor<?x259xf32>
+ %12 = tensor.extract_slice %6[%5, 0] [%10, 258] [1, 1] : tensor<?x258xf32> to tensor<?x258xf32>
+ %13 = linalg.matmul {__internal_linalg_transform__ = "tile"} ins(%11, %arg1 : tensor<?x259xf32>, tensor<259x258xf32>) outs(%12 : tensor<?x258xf32>) -> tensor<?x258xf32>
+ %14 = tensor.insert_slice %13 into %6[%5, 0] [%10, 258] [1, 1] : tensor<?x258xf32> into tensor<?x258xf32>
+ scf.yield %14 : tensor<?x258xf32>
+ } else {
+ scf.yield %6 : tensor<?x258xf32>
+ }
+ %9 = tensor.insert_slice %8 into %arg4[%arg3, 0] [%2, 258] [1, 1] : tensor<?x258xf32> into tensor<257x258xf32>
+ scf.yield %9 : tensor<257x258xf32>
+ }
+ return %1 : tensor<257x258xf32>
+}
diff --git a/mlir/test/Dialect/Linalg/transform-patterns-matmul-to-vector.mlir b/mlir/test/Dialect/Linalg/transform-patterns-matmul-to-vector.mlir
index 2a8a2cc6fb885..feb6309ba8102 100644
--- a/mlir/test/Dialect/Linalg/transform-patterns-matmul-to-vector.mlir
+++ b/mlir/test/Dialect/Linalg/transform-patterns-matmul-to-vector.mlir
@@ -1,5 +1,5 @@
-// RUN: mlir-opt %s -test-linalg-transform-patterns=test-matmul-to-vector-patterns-tile-1d | FileCheck %s
-// RUN: mlir-opt %s -test-linalg-transform-patterns=test-matmul-to-vector-patterns-tile-2d | FileCheck %s
+// RUN: mlir-opt %s -test-linalg-transform-patterns=test-matmul-to-vector-patterns-tile-1d | FileCheck %s -check-prefix=CHECK-1D
+// RUN: mlir-opt %s -test-linalg-transform-patterns=test-matmul-to-vector-patterns-tile-2d | FileCheck %s -check-prefix=CHECK-2D
func @matmul(%A: memref<1584x1584xf32, offset: 0, strides: [1584, 1]>,
%B: memref<1584x1584xf32, offset: 0, strides: [1584, 1]>,
@@ -11,17 +11,36 @@ func @matmul(%A: memref<1584x1584xf32, offset: 0, strides: [1584, 1]>,
return
}
-// CHECK-LABEL:func @matmul
-// CHECK: vector.transfer_write {{.*}} : vector<8x16xf32>, memref<8x16xf32>
-// CHECK: vector.transfer_write {{.*}} : vector<16x12xf32>, memref<16x12xf32>
-// CHECK: vector.transfer_write {{.*}} : vector<8x12xf32>, memref<8x12xf32>
+// CHECK-1D-LABEL:func @matmul
+// CHECK-1D: vector.transfer_write {{.*}} : vector<8x16xf32>, memref<8x16xf32>
+// CHECK-1D: vector.transfer_write {{.*}} : vector<16x12xf32>, memref<16x12xf32>
+// CHECK-1D: vector.transfer_write {{.*}} : vector<8x12xf32>, memref<8x12xf32>
//
-// CHECK: linalg.copy
-// CHECK: linalg.copy
-// CHECK: linalg.copy
+// CHECK-1D: vector.transfer_read {{.*}} : memref<8x16xf32, #{{.*}}>, vector<8x16xf32>
+// CHECK-1D: vector.transfer_write {{.*}} : vector<8x16xf32>, memref<8x16xf32, #{{.*}}>
+// CHECK-1D: vector.transfer_read {{.*}} : memref<16x12xf32, #{{.*}}>, vector<16x12xf32>
+// CHECK-1D: vector.transfer_write {{.*}} : vector<16x12xf32>, memref<16x12xf32, #{{.*}}>
+// CHECK-1D: vector.transfer_read {{.*}} : memref<8x12xf32, #{{.*}}>, vector<8x12xf32>
+// CHECK-1D: vector.transfer_write {{.*}} : vector<8x12xf32>, memref<8x12xf32, #{{.*}}>
//
-// CHECK: vector.contract
-// CHECK-SAME: iterator_types = ["parallel", "parallel", "reduction"]
-// CHECK-SAME: : vector<8x16xf32>, vector<12x16xf32> into vector<8x12xf32>
+// CHECK-1D: vector.contract
+// CHECK-1D-SAME: iterator_types = ["parallel", "parallel", "reduction"]
+// CHECK-1D-SAME: : vector<8x16xf32>, vector<12x16xf32> into vector<8x12xf32>
//
-// CHECK: linalg.copy
+// CHECK-1D: vector.transfer_read {{.*}} : memref<8x12xf32, #{{.*}}>, vector<8x12xf32>
+// CHECK-1D: vector.transfer_write {{.*}} : vector<8x12xf32>, memref<8x12xf32, #{{.*}}>
+
+// CHECK-2D-LABEL:func @matmul
+// CHECK-2D: vector.transfer_write {{.*}} : vector<8x16xf32>, memref<8x16xf32>
+// CHECK-2D: vector.transfer_write {{.*}} : vector<16x12xf32>, memref<16x12xf32>
+// CHECK-2D: vector.transfer_write {{.*}} : vector<8x12xf32>, memref<8x12xf32>
+//
+// CHECK-2D: linalg.copy
+// CHECK-2D: linalg.copy
+// CHECK-2D: linalg.copy
+//
+// CHECK-2D: vector.contract
+// CHECK-2D-SAME: iterator_types = ["parallel", "parallel", "reduction"]
+// CHECK-2D-SAME: : vector<8x16xf32>, vector<12x16xf32> into vector<8x12xf32>
+//
+// CHECK-2D: linalg.copy
More information about the Mlir-commits
mailing list