[Mlir-commits] [mlir] 8b5236d - [mlir][linalg] Simplify slice dim computation for fusion on tensors (NFC).
Tobias Gysi
llvmlistbot at llvm.org
Tue Sep 21 08:16:57 PDT 2021
Author: Tobias Gysi
Date: 2021-09-21T15:09:46Z
New Revision: 8b5236def5a1adc55f649f125d2582254b80f699
URL: https://github.com/llvm/llvm-project/commit/8b5236def5a1adc55f649f125d2582254b80f699
DIFF: https://github.com/llvm/llvm-project/commit/8b5236def5a1adc55f649f125d2582254b80f699.diff
LOG: [mlir][linalg] Simplify slice dim computation for fusion on tensors (NFC).
Compute the tiled producer slice dimensions directly starting from the consumer not using the producer at all.
Reviewed By: nicolasvasilache
Differential Revision: https://reviews.llvm.org/D110147
Added:
Modified:
mlir/lib/Dialect/Linalg/Transforms/FusionOnTensors.cpp
mlir/test/Dialect/Linalg/tile-and-fuse-on-tensors.mlir
Removed:
################################################################################
diff --git a/mlir/lib/Dialect/Linalg/Transforms/FusionOnTensors.cpp b/mlir/lib/Dialect/Linalg/Transforms/FusionOnTensors.cpp
index 13a6fe4fcc90..973d6136738f 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/FusionOnTensors.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/FusionOnTensors.cpp
@@ -30,61 +30,26 @@ using namespace linalg;
// StructuredOp specific helpers.
//===----------------------------------------------------------------------===//
-/// Relate the producer to the consumer loop iterations that access the same
-/// producer result element:
-/// consumerToProducerLoops =
-/// inverse(producerIndexingMap).compose(consumerIndexingMap).
-/// Return `consumerToProducerLoops` or none if the inversion fails.
-static Optional<AffineMap>
-getConsumerToProducerLoopsMap(AffineMap producerIndexingMap,
- AffineMap consumerIndexingMap) {
- assert(consumerIndexingMap.getNumResults() ==
- producerIndexingMap.getNumResults() &&
- "expect the number of indexing map results to match");
- // Ensure the producer indexing map is a projected permutation.
- if (!producerIndexingMap.isProjectedPermutation())
- return None;
- AffineMap inverseIndexingMap =
- inverseAndBroadcastProjectedPermuation(producerIndexingMap);
- return inverseIndexingMap.compose(consumerIndexingMap);
-}
-
-/// Returns the producer result slice dimensions tiled by the tile loop nest or
-/// an empty vector if `getConsumerToProducerLoopsMap` returns none.
-// TODO: replace by Fourier-Motzkin and/or compute starting from consumer.
-SmallVector<int64_t> getTiledSliceDims(OpResult producerResult,
- OpOperand *consumerOperand,
+/// Returns the tiled slice dimensions given the tiled consumer loop dimensions.
+/// The slice defines a hyper rectangular iteration space and fusing the
+/// producer is always possible. However, depending on the consumer indexing
+/// map, not all slice elements may be consumed and the tiles may overlap. In
+/// these cases, fusion introduces redundant computation.
+SmallVector<int64_t> getTiledSliceDims(OpOperand *consumerOperand,
ArrayRef<int64_t> tiledLoopDims) {
+ // Get the consumer operand indexing map.
LinalgOp consumerOp = consumerOperand->getOwner();
- LinalgOp producerOp = producerResult.getOwner();
- OpOperand *opOperand =
- producerOp.getOutputOperand(producerResult.getResultNumber());
-
- // Compute the `consumerToProducerLoopsMap` and exit if the computation fails.
- AffineMap producerIndexingMap = producerOp.getTiedIndexingMap(opOperand);
- Optional<AffineMap> consumerToProducerLoopsMap =
- getConsumerToProducerLoopsMap(
- producerIndexingMap, consumerOp.getTiedIndexingMap(consumerOperand));
- if (!consumerToProducerLoopsMap.hasValue())
- return {};
-
- // Compute the set of tiled producer loops.
- DenseSet<int64_t> tiledProducerLoops;
- for (auto en : enumerate(consumerToProducerLoopsMap->getResults())) {
- for (int64_t dim : tiledLoopDims) {
- if (en.value().isFunctionOfDim(dim))
- tiledProducerLoops.insert(en.index());
+ AffineMap indexingMap = consumerOp.getTiedIndexingMap(consumerOperand);
+
+ // Search the slice dimensions tiled by a tile loop dimension.
+ DenseSet<int64_t> tiledSliceDims;
+ for (auto en : enumerate(indexingMap.getResults())) {
+ for (auto tiledLoopDim : tiledLoopDims) {
+ if (en.value().isFunctionOfDim(tiledLoopDim))
+ tiledSliceDims.insert(en.index());
}
}
-
- // Compute the slice dimensions for the tiled producer loops.
- SmallVector<int64_t> tiledSliceDims;
- for (auto en : enumerate(producerIndexingMap.getResults())) {
- auto dimExpr = en.value().dyn_cast<AffineDimExpr>();
- if (dimExpr && tiledProducerLoops.count(dimExpr.getPosition()) != 0)
- tiledSliceDims.push_back(en.index());
- }
- return tiledSliceDims;
+ return {tiledSliceDims.begin(), tiledSliceDims.end()};
}
/// Returns the producer fused in place of `sliceOp`. Tile the producer operands
@@ -332,9 +297,10 @@ FailureOr<LinalgOp> TileLoopNest::fuseProducer(OpBuilder &b,
if (!producerResult || !isa<LinalgOp>(producerResult.getOwner()))
return failure();
- // Compute the slice dimensions tiled by `tileLoopNest`.
+ // Compute the tiled producer slice dimensions given the tiled root operation
+ // loop dimensions `loopDims`.
SmallVector<int64_t> tiledSliceDims =
- getTiledSliceDims(producerResult, rootOpOperand, loopDims);
+ getTiledSliceDims(rootOpOperand, loopDims);
if (tiledSliceDims.empty())
return failure();
diff --git a/mlir/test/Dialect/Linalg/tile-and-fuse-on-tensors.mlir b/mlir/test/Dialect/Linalg/tile-and-fuse-on-tensors.mlir
index 1a4ca86aa81d..8b430134eb2f 100644
--- a/mlir/test/Dialect/Linalg/tile-and-fuse-on-tensors.mlir
+++ b/mlir/test/Dialect/Linalg/tile-and-fuse-on-tensors.mlir
@@ -230,3 +230,39 @@ builtin.func @fuse_indexed(%arg0: tensor<24x12xi32>,
return %1 : tensor<24x25xi32>
}
+// -----
+
+// CHECK-DAG: #[[MAP0:.*]] = affine_map<(d0, d1) -> (d0 + d1)>
+// CHECK-DAG: #[[MAP1:.*]] = affine_map<(d0, d1) -> (8, -d0 - d1 + 18)>
+// CHECK-DAG: #[[MAP2:.*]] = affine_map<(d0, d1, d2) -> (d0, -d1 - d2 + 18)>
+#map0 = affine_map<(d0, d1) -> (d0, d0 + d1)>
+#map1 = affine_map<(d0, d1) -> (d0, d1)>
+
+// CHECK: fuse_non_rectangular
+// CHECK-SAME: %[[ARG0:[0-9a-zA-Z]*]]: tensor<10x18xf32>
+func @fuse_non_rectangular(%arg0: tensor<10x18xf32>,
+ %arg1: tensor<10x8xf32>) -> tensor<10x8xf32> {
+ %cst = constant 0.000000e+00 : f32
+ %0 = linalg.fill(%cst, %arg0) : f32, tensor<10x18xf32> -> tensor<10x18xf32>
+
+ // CHECK: scf.for %[[IV0:[0-9a-zA-Z]*]] =
+ // CHECK: scf.for %[[IV1:[0-9a-zA-Z]*]] =
+
+ // Compute producer on a hyper rectangular bounding box. Along the second dimenson,
+ // the offset is set to the sum of the induction variables and the upper bound
+ // to either eight (sum of the tile sizes) or eighteen (sum of the domain sizes)
+ // minus the induction variables.
+ // CHECK: %[[SUM:.*]] = affine.apply #[[MAP0]](%[[IV1]], %[[IV0]]
+ // CHECK: %[[TS1:.*]] = affine.min #[[MAP1]](%[[IV1]], %[[IV0]]
+ // CHECK: %[[UB1:.*]] = affine.min #[[MAP2]](%[[TS1]], %[[IV1]], %[[IV0]]
+ // CHECK: %[[T0:.*]] = tensor.extract_slice %[[ARG0]]
+ // CHECK-SAME: %[[IV1]], %[[SUM]]
+ // CHECK-SAME: , %[[UB1]]
+ // CHECK: %[[T1:.*]] = linalg.fill(%{{.*}}, %[[T0]])
+ %1 = linalg.generic {indexing_maps = [#map0, #map1], iterator_types = ["parallel", "parallel"]} ins(%0 : tensor<10x18xf32>) outs(%arg1 : tensor<10x8xf32>) {
+ ^bb0(%arg2: f32, %arg3: f32): // no predecessors
+ %2 = addf %arg2, %arg3 : f32
+ linalg.yield %2 : f32
+ } -> tensor<10x8xf32>
+ return %1 : tensor<10x8xf32>
+}
More information about the Mlir-commits
mailing list