[Mlir-commits] [mlir] d8ee28b - [mlir][Linalg] Extend buffer allocation to support Linalg init tensors
Nicolas Vasilache
llvmlistbot at llvm.org
Tue Oct 6 06:41:59 PDT 2020
Author: Nicolas Vasilache
Date: 2020-10-06T13:24:27Z
New Revision: d8ee28b96ee77a466aea5e9ca9c6ed57b2194b4d
URL: https://github.com/llvm/llvm-project/commit/d8ee28b96ee77a466aea5e9ca9c6ed57b2194b4d
DIFF: https://github.com/llvm/llvm-project/commit/d8ee28b96ee77a466aea5e9ca9c6ed57b2194b4d.diff
LOG: [mlir][Linalg] Extend buffer allocation to support Linalg init tensors
This revision adds init_tensors support to buffer allocation for Linalg on tensors.
Currently makes the assumption that the init_tensors fold onto the first output tensors.
This assumption is not currently enforced or cast in stone and requires experimenting with tiling linalg on tensors for ops **without reductions**.
Still this allows progress towards the end-to-end goal.
Added:
Modified:
mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp
mlir/lib/Dialect/Linalg/Transforms/TensorsToBuffers.cpp
mlir/test/Transforms/buffer-placement-preparation.mlir
mlir/test/lib/Transforms/TestBufferPlacement.cpp
Removed:
################################################################################
diff --git a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp
index 082078dee3af..895085cf79cf 100644
--- a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp
+++ b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp
@@ -374,7 +374,6 @@ LogicalResult BlockArgsVerifier<IndexedGenericOp>::verify(IndexedGenericOp op,
template <typename GenericOpType>
static LogicalResult verifyGenericOp(GenericOpType op) {
- auto nInputViews = op.getNumInputs();
auto nLoops = op.getNumLoops();
if (op.inputs().size() + op.output_buffers().size() +
@@ -410,8 +409,7 @@ static LogicalResult verifyGenericOp(GenericOpType op) {
auto idx = en.index();
auto m = en.value().template cast<AffineMapAttr>().getValue();
indexingMaps.push_back(m); // Save reference to map for further checks.
- auto view = (idx < nInputViews) ? op.getInputShapedType(idx)
- : op.getOutputShapedType(idx - nInputViews);
+ auto view = op.getShapedType(idx);
if (m.getNumSymbols() != expectedNumSymbols)
return op.emitOpError("expected the number of symbols in indexing_map #")
diff --git a/mlir/lib/Dialect/Linalg/Transforms/TensorsToBuffers.cpp b/mlir/lib/Dialect/Linalg/Transforms/TensorsToBuffers.cpp
index 7f671fc9f99e..b714a1f6c642 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/TensorsToBuffers.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/TensorsToBuffers.cpp
@@ -39,32 +39,50 @@ class GenericOpConverter
linalg::GenericOpAdaptor adaptor(operands,
op.getOperation()->getAttrDictionary());
- // TODO: support ops with reduction.
- if (!op.init_tensors().empty())
- return failure();
-
// All inputs need to be turned into buffers first. Until then, bail out.
if (llvm::any_of(adaptor.inputs(),
[](Value in) { return !in.getType().isa<MemRefType>(); }))
return failure();
+ // All init_tensors need to be turned into buffers first. Until then, bail
+ // out.
+ if (llvm::any_of(adaptor.init_tensors(),
+ [](Value in) { return !in.getType().isa<MemRefType>(); }))
+ return failure();
+
Location loc = op.getLoc();
- SmallVector<Value, 2> outputBuffers, newOutputBuffers;
- outputBuffers.assign(adaptor.output_buffers().begin(),
- adaptor.output_buffers().end());
+ SmallVector<Value, 2> newOutputBuffers;
newOutputBuffers.reserve(op.getNumOutputs());
newOutputBuffers.append(adaptor.output_buffers().begin(),
adaptor.output_buffers().end());
// Update all types to memref types.
- for (Type t : op.getResultTypes()) {
- auto type = t.cast<ShapedType>();
+ // Assume the init tensors fold onto the first results.
+ // TODO: update this assumption because the reality is more complex under
+ // linalg on tensor based transformations.
+ for (auto en : llvm::enumerate(op.getResultTypes())) {
+ auto type = en.value().cast<ShapedType>();
if (!type.hasStaticShape())
return rewriter.notifyMatchFailure(
op, "dynamic shapes not currently supported");
auto memrefType = MemRefType::get(type.getShape(), type.getElementType());
- auto alloc = rewriter.create<AllocOp>(loc, memrefType);
- newOutputBuffers.push_back(alloc);
+ bool foldedInitTensor = en.index() < op.getNumInitTensors();
+ if (foldedInitTensor) {
+ // Dealing with an init tensor requires distinguishing between 1-use
+ // and many-use cases which would create aliasing and WAR hazards.
+ Value initTensor = op.getInitTensor(en.index());
+ Value initBuffer = adaptor.init_tensors()[en.index()];
+ if (initTensor.hasOneUse()) {
+ newOutputBuffers.push_back(initBuffer);
+ continue;
+ }
+ auto alloc = rewriter.create<AllocOp>(loc, memrefType);
+ rewriter.create<linalg::CopyOp>(loc, initBuffer, alloc);
+ newOutputBuffers.push_back(alloc);
+ } else {
+ auto alloc = rewriter.create<AllocOp>(loc, memrefType);
+ newOutputBuffers.push_back(alloc);
+ }
}
// Generate a new linalg operation that works on buffers.
@@ -82,8 +100,12 @@ class GenericOpConverter
Block *newBlock = rewriter.createBlock(&newRegion, newRegion.begin(),
oldBlock.getArgumentTypes());
- // Add the result arguments to the new block.
- for (Value v : newOutputBuffers)
+ // Add the result arguments that do not come from init_tensors to the new
+ // block.
+ // TODO: update this assumption because the reality is more complex under
+ // linalg on tensor based transformations.
+ for (Value v :
+ ValueRange(newOutputBuffers).drop_front(adaptor.init_tensors().size()))
newBlock->addArgument(v.getType().cast<MemRefType>().getElementType());
// Clone the body of the old block to the new block.
diff --git a/mlir/test/Transforms/buffer-placement-preparation.mlir b/mlir/test/Transforms/buffer-placement-preparation.mlir
index 4fcd225abc7e..ac3ec1246211 100644
--- a/mlir/test/Transforms/buffer-placement-preparation.mlir
+++ b/mlir/test/Transforms/buffer-placement-preparation.mlir
@@ -382,3 +382,141 @@ func @decompose_tuple_typed_function_args_and_results(%arg0: tuple<i1,f32>, %arg
// CHECK-NEXT: linalg.copy(%[[SECOND_TUPLE_SECOND_ELEM]], %[[RESULT0]])
// CHECK-NEXT: linalg.copy(%[[ARG2]], %[[RESULT1]])
// CHECK-NEXT: return %[[SECOND_TUPLE_FIRST_ELEM]], %[[FIRST_TUPLE_FIRST_ELEM]], %[[FIRST_TUPLE_SECOND_ELEM]]
+
+// -----
+
+#accesses = [
+ affine_map<(i, j, k) -> (j, i, k)>,
+ affine_map<(i, j, k) -> (i, j)>
+]
+
+#trait = {
+ indexing_maps = #accesses,
+ iterator_types = ["parallel", "parallel", "reduction"]
+}
+
+func @generic_with_init_tensor(
+ %arg0: tensor<2x3x4xvector<3x4xi4>>, %arg1: tensor<3x2xf32>) -> (tensor<3x2xf32>) {
+
+ %0 = linalg.generic #trait
+ ins(%arg0 : tensor<2x3x4xvector<3x4xi4>>)
+ init(%arg1 : tensor<3x2xf32>) {
+ ^bb(%v0: vector<3x4xi4>, %v1: f32) :
+ %f0 = constant 0.0 : f32
+ linalg.yield %f0 : f32
+ } -> tensor<3x2xf32>
+
+ return %0 : tensor<3x2xf32>
+}
+// CHECK-LABEL: func @generic_with_init_tensor
+// CHECK-SAME: (%[[ARG0:.*]]: memref<2x3x4xvector<3x4xi4>>, %[[ARG1:.*]]: memref<3x2xf32>, %[[RESULT0:.*]]: memref<3x2xf32>) {
+// CHECK-NEXT: linalg.generic
+// CHECK: linalg.copy(%[[ARG1]], %[[RESULT0]])
+// CHECK-NEXT: return
+// CHECK-NOT: %
+
+// -----
+
+#accesses = [
+ affine_map<(i, j, k) -> (j, i, k)>,
+ affine_map<(i, j, k) -> (i, j)>
+]
+
+#trait = {
+ indexing_maps = #accesses,
+ iterator_types = ["parallel", "parallel", "reduction"]
+}
+
+func @init_tensor_with_2_uses(
+ %arg0: tensor<2x3x4xvector<3x4xi4>>, %arg1: tensor<3x2xf32>) -> (tensor<3x2xf32>, tensor<3x2xf32>) {
+
+ %0 = linalg.generic #trait
+ ins(%arg0 : tensor<2x3x4xvector<3x4xi4>>)
+ init(%arg1 : tensor<3x2xf32>) {
+ ^bb(%v0: vector<3x4xi4>, %v1: f32) :
+ %f0 = constant 0.0 : f32
+ linalg.yield %f0 : f32
+ } -> tensor<3x2xf32>
+
+ %1 = linalg.generic #trait
+ ins(%arg0 : tensor<2x3x4xvector<3x4xi4>>)
+ init(%arg1 : tensor<3x2xf32>) {
+ ^bb(%v0: vector<3x4xi4>, %v1: f32) :
+ %f0 = constant 0.0 : f32
+ linalg.yield %f0 : f32
+ } -> tensor<3x2xf32>
+
+ return %0, %1 : tensor<3x2xf32>, tensor<3x2xf32>
+}
+// CHECK-LABEL: func @init_tensor_with_2_uses
+// CHECK-SAME: (%[[ARG0:.*]]: memref<2x3x4xvector<3x4xi4>>, %[[ARG1:.*]]: memref<3x2xf32>, %[[RESULT0:.*]]: memref<3x2xf32>, %[[RESULT1:.*]]: memref<3x2xf32>) {
+// CHECK-NEXT: %[[ALLOC0:.*]] = alloc
+// CHECK-NEXT: linalg.copy(%[[ARG1]], %[[ALLOC0]])
+// CHECK-NEXT: linalg.generic
+// CHECK-SAME: outs(%[[ALLOC0]]
+// CHECK-NEXT: ^bb
+// CHECK-NEXT: constant
+// CHECK-NEXT: yield
+// CHECK-NEXT: }
+// CHECK-NEXT: %[[ALLOC1:.*]] = alloc
+// CHECK-NEXT: linalg.copy(%[[ARG1]], %[[ALLOC1]])
+// CHECK-NEXT: linalg.generic
+// CHECK-SAME: outs(%[[ALLOC1]]
+// CHECK-NEXT: ^bb
+// CHECK-NEXT: constant
+// CHECK-NEXT: yield
+// CHECK-NEXT: }
+// CHECK-NEXT: linalg.copy(%[[ALLOC0]], %[[RESULT0]])
+// CHECK-NEXT: linalg.copy(%[[ALLOC1]], %[[RESULT1]])
+// CHECK-NEXT: return
+// CHECK-NOT: %
+
+// -----
+
+#accesses = [
+ affine_map<(i, j, k) -> (j, i, k)>,
+ affine_map<(i, j, k) -> (i, j)>
+]
+
+#trait = {
+ indexing_maps = #accesses,
+ iterator_types = ["parallel", "parallel", "reduction"]
+}
+
+func @init_tensor_with_1_use_def_chain(
+ %arg0: tensor<2x3x4xvector<3x4xi4>>, %arg1: tensor<3x2xf32>) -> (tensor<3x2xf32>) {
+
+ %0 = linalg.generic #trait
+ ins(%arg0 : tensor<2x3x4xvector<3x4xi4>>)
+ init(%arg1 : tensor<3x2xf32>) {
+ ^bb(%v0: vector<3x4xi4>, %v1: f32) :
+ %f0 = constant 0.0 : f32
+ linalg.yield %f0 : f32
+ } -> tensor<3x2xf32>
+
+ %1 = linalg.generic #trait
+ ins(%arg0 : tensor<2x3x4xvector<3x4xi4>>)
+ init(%0 : tensor<3x2xf32>) {
+ ^bb(%v0: vector<3x4xi4>, %v1: f32) :
+ %f0 = constant 0.0 : f32
+ linalg.yield %f0 : f32
+ } -> tensor<3x2xf32>
+
+ return %1 : tensor<3x2xf32>
+}
+// CHECK-LABEL: func @init_tensor_with_1_use_def_chain
+// CHECK-SAME: (%[[ARG0:.*]]: memref<2x3x4xvector<3x4xi4>>, %[[ARG1:.*]]: memref<3x2xf32>, %[[RESULT0:.*]]: memref<3x2xf32>) {
+// CHECK-NEXT: linalg.generic
+// CHECK-NEXT: ^bb
+// CHECK-NEXT: constant
+// CHECK-NEXT: yield
+// CHECK-NEXT: }
+// CHECK-NEXT: linalg.generic
+// CHECK-NEXT: ^bb
+// CHECK-NEXT: constant
+// CHECK-NEXT: yield
+// CHECK-NEXT: }
+// CHECK-NEXT: linalg.copy(%[[ARG1]], %[[RESULT0]])
+// CHECK-NEXT: return
+// CHECK-NOT: %
+
diff --git a/mlir/test/lib/Transforms/TestBufferPlacement.cpp b/mlir/test/lib/Transforms/TestBufferPlacement.cpp
index dd6629e80a93..3b31ac0d1a70 100644
--- a/mlir/test/lib/Transforms/TestBufferPlacement.cpp
+++ b/mlir/test/lib/Transforms/TestBufferPlacement.cpp
@@ -56,34 +56,53 @@ struct TestBufferPlacementPreparationPass
linalg::GenericOpAdaptor adaptor(operands,
op.getOperation()->getAttrDictionary());
- // TODO: support ops with reduction.
- if (!op.init_tensors().empty())
- return failure();
-
// All inputs need to be turned into buffers first. Until then, bail out.
if (llvm::any_of(adaptor.inputs(), [](Value in) {
return !in.getType().isa<MemRefType>();
}))
return failure();
+ // All init_tensors need to be turned into buffers first. Until then, bail
+ // out.
+ if (llvm::any_of(adaptor.init_tensors(), [](Value in) {
+ return !in.getType().isa<MemRefType>();
+ }))
+ return failure();
+
Location loc = op.getLoc();
- SmallVector<Value, 2> outputBuffers, newOutputBuffers;
- outputBuffers.assign(adaptor.output_buffers().begin(),
- adaptor.output_buffers().end());
+ SmallVector<Value, 2> newOutputBuffers;
newOutputBuffers.reserve(op.getNumOutputs());
newOutputBuffers.append(adaptor.output_buffers().begin(),
adaptor.output_buffers().end());
// Update all types to memref types.
- for (Type t : op.getResultTypes()) {
- auto type = t.cast<ShapedType>();
+ // Assume the init tensors fold onto the first results.
+ // TODO: update this assumption because the reality is more complex under
+ // linalg on tensor based transformations.
+ for (auto en : llvm::enumerate(op.getResultTypes())) {
+ auto type = en.value().cast<ShapedType>();
if (!type.hasStaticShape())
return rewriter.notifyMatchFailure(
op, "dynamic shapes not currently supported");
auto memrefType =
MemRefType::get(type.getShape(), type.getElementType());
- auto alloc = rewriter.create<AllocOp>(loc, memrefType);
- newOutputBuffers.push_back(alloc);
+ bool foldedInitTensor = en.index() < op.getNumInitTensors();
+ if (foldedInitTensor) {
+ // Dealing with an init tensor requires distinguishing between 1-use
+ // and many-use cases which would create aliasing and WAR hazards.
+ Value initTensor = op.getInitTensor(en.index());
+ Value initBuffer = adaptor.init_tensors()[en.index()];
+ if (initTensor.hasOneUse()) {
+ newOutputBuffers.push_back(initBuffer);
+ continue;
+ }
+ auto alloc = rewriter.create<AllocOp>(loc, memrefType);
+ rewriter.create<linalg::CopyOp>(loc, initBuffer, alloc);
+ newOutputBuffers.push_back(alloc);
+ } else {
+ auto alloc = rewriter.create<AllocOp>(loc, memrefType);
+ newOutputBuffers.push_back(alloc);
+ }
}
// Generate a new linalg operation that works on buffers.
@@ -101,8 +120,12 @@ struct TestBufferPlacementPreparationPass
Block *newBlock = rewriter.createBlock(&newRegion, newRegion.begin(),
oldBlock.getArgumentTypes());
- // Add the result arguments to the new block.
- for (Value v : newOutputBuffers)
+ // Add the result arguments that do not come from init_tensors to the new
+ // block.
+ // TODO: update this assumption because the reality is more complex under
+ // linalg on tensor based transformations.
+ for (Value v : ValueRange(newOutputBuffers)
+ .drop_front(adaptor.init_tensors().size()))
newBlock->addArgument(v.getType().cast<MemRefType>().getElementType());
// Clone the body of the old block to the new block.
More information about the Mlir-commits
mailing list