[Mlir-commits] [mlir] c3eb967 - [mlir][linalg][bufferize] Bufferize ops via PreOrder traversal
Matthias Springer
llvmlistbot at llvm.org
Wed Nov 10 01:51:51 PST 2021
Author: Matthias Springer
Date: 2021-11-10T18:51:39+09:00
New Revision: c3eb967e2ac8d29378ef3acf3110587055734037
URL: https://github.com/llvm/llvm-project/commit/c3eb967e2ac8d29378ef3acf3110587055734037
DIFF: https://github.com/llvm/llvm-project/commit/c3eb967e2ac8d29378ef3acf3110587055734037.diff
LOG: [mlir][linalg][bufferize] Bufferize ops via PreOrder traversal
The existing PostOrder traversal with special rules for certain ops was complicated and had a bug. Switch to PreOrder traversal.
Differential Revision: https://reviews.llvm.org/D113338
Added:
Modified:
mlir/lib/Dialect/Linalg/ComprehensiveBufferize/ComprehensiveBufferize.cpp
mlir/test/Dialect/Linalg/comprehensive-module-bufferize-invalid.mlir
mlir/test/Dialect/Linalg/comprehensive-module-bufferize.mlir
Removed:
################################################################################
diff --git a/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/ComprehensiveBufferize.cpp b/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/ComprehensiveBufferize.cpp
index 914e9f1074e4..d285e530c204 100644
--- a/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/ComprehensiveBufferize.cpp
+++ b/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/ComprehensiveBufferize.cpp
@@ -1735,36 +1735,16 @@ static LogicalResult bufferizeFuncOpInternals(
LLVM_DEBUG(llvm::dbgs() << "\n\n");
LDBG("Begin BufferizeFuncOpInternals:\n" << funcOp << '\n');
OpBuilder b(funcOp->getContext());
- /// Start by bufferizing `funcOp` arguments.
+
+ // Start by bufferizing `funcOp` arguments.
if (failed(bufferize(b, funcOp, bvm, aliasInfo, allocationFns)))
return failure();
// Cannot erase ops during the traversal. Do that afterwards.
SmallVector<Operation *> toErase;
- // Bufferize the function body. `bufferizedOps` keeps track ops that were
- // already bufferized with pre-order traversal.
- DenseSet<Operation *> bufferizedOps;
- auto walkFunc = [&](Operation *op) -> WalkResult {
- // Collect ops that need to be bufferized before `op`.
- SmallVector<Operation *> preorderBufferize;
- Operation *parentOp = op->getParentOp();
- // scf::ForOp and TiledLoopOp must be bufferized before their blocks
- // ("pre-order") because BBargs must be mapped when bufferizing children.
- while (isa_and_nonnull<scf::ForOp, TiledLoopOp>(parentOp)) {
- if (bufferizedOps.contains(parentOp))
- break;
- bufferizedOps.insert(parentOp);
- preorderBufferize.push_back(parentOp);
- parentOp = parentOp->getParentOp();
- }
-
- for (Operation *op : llvm::reverse(preorderBufferize))
- if (failed(bufferizeOp(op, bvm, aliasInfo, allocationFns,
- &bufferizedFunctionTypes)))
- return failure();
- if (!bufferizedOps.contains(op) &&
- failed(bufferizeOp(op, bvm, aliasInfo, allocationFns,
+ auto walkFunc = [&](Operation *op) -> WalkResult {
+ if (failed(bufferizeOp(op, bvm, aliasInfo, allocationFns,
&bufferizedFunctionTypes)))
return failure();
@@ -1776,7 +1756,11 @@ static LogicalResult bufferizeFuncOpInternals(
return success();
};
- if (funcOp.walk(walkFunc).wasInterrupted())
+
+ // Bufferize ops pre-order, i.e., bufferize ops first, then their children.
+ // This is needed for ops with blocks that have BlockArguments. These must be
+ // mapped before bufferizing the children.
+ if (funcOp.walk<WalkOrder::PreOrder>(walkFunc).wasInterrupted())
return failure();
LDBG("End BufferizeFuncOpInternals:\n" << funcOp << '\n');
@@ -2798,31 +2782,40 @@ struct IfOpInterface
BlockAndValueMapping &bvm,
BufferizationAliasInfo &aliasInfo,
AllocationCallbacks &allocationFn) const {
- auto ifOp = cast<scf::IfOp>(op);
-
- // Take a guard before anything else.
- OpBuilder::InsertionGuard g(b);
+ // scf::IfOp is bufferized after scf::YieldOp in the else branch.
+ return success();
+ }
+};
- for (OpResult opResult : ifOp->getResults()) {
- if (!opResult.getType().isa<TensorType>())
- continue;
- // TODO: Atm we bail on unranked TensorType because we don't know how to
- // alloc an UnrankedMemRefType + its underlying ranked MemRefType.
- assert(opResult.getType().isa<RankedTensorType>() &&
- "unsupported unranked tensor");
+/// Bufferize the scf::IfOp. This function is called after the YieldOp was
+/// bufferized.
+static LogicalResult bufferizeIfOp(scf::IfOp ifOp, OpBuilder &b,
+ BlockAndValueMapping &bvm,
+ BufferizationAliasInfo &aliasInfo,
+ AllocationCallbacks &allocationFn) {
+ // Take a guard before anything else.
+ OpBuilder::InsertionGuard g(b);
+ b.setInsertionPoint(ifOp);
- Value resultBuffer =
- getResultBuffer(b, opResult, bvm, aliasInfo, allocationFn);
- if (!resultBuffer)
- return failure();
+ for (OpResult opResult : ifOp->getResults()) {
+ if (!opResult.getType().isa<TensorType>())
+ continue;
+ // TODO: Atm we bail on unranked TensorType because we don't know how to
+ // alloc an UnrankedMemRefType + its underlying ranked MemRefType.
+ assert(opResult.getType().isa<RankedTensorType>() &&
+ "unsupported unranked tensor");
- aliasInfo.createAliasInfoEntry(resultBuffer);
- map(bvm, opResult, resultBuffer);
- }
+ Value resultBuffer =
+ getResultBuffer(b, opResult, bvm, aliasInfo, allocationFn);
+ if (!resultBuffer)
+ return failure();
- return success();
+ aliasInfo.createAliasInfoEntry(resultBuffer);
+ map(bvm, opResult, resultBuffer);
}
-};
+
+ return success();
+}
struct ForOpInterface
: public BufferizableOpInterface::ExternalModel<ForOpInterface,
@@ -2862,6 +2855,9 @@ struct ForOpInterface
BlockAndValueMapping &bvm,
BufferizationAliasInfo &aliasInfo,
AllocationCallbacks &allocationFn) const {
+ // Note: This method is just setting up the mappings for the block arguments
+ // and the result buffer. The op is bufferized after the scf::YieldOp.
+
auto forOp = cast<scf::ForOp>(op);
// Take a guard before anything else.
@@ -2893,6 +2889,39 @@ struct ForOpInterface
}
};
+/// Bufferize the scf::ForOp. This function is called after the YieldOp was
+/// bufferized.
+static LogicalResult bufferizeForOp(scf::ForOp forOp, OpBuilder &b,
+ BlockAndValueMapping &bvm,
+ BufferizationAliasInfo &aliasInfo,
+ AllocationCallbacks &allocationFn) {
+ auto yieldOp = cast<scf::YieldOp>(&forOp.region().front().back());
+ for (OpOperand &operand : yieldOp->getOpOperands()) {
+ auto tensorType = operand.get().getType().dyn_cast<TensorType>();
+ if (!tensorType)
+ continue;
+
+ OpOperand &forOperand = forOp.getOpOperandForResult(
+ forOp->getResult(operand.getOperandNumber()));
+ auto bbArg = forOp.getRegionIterArgForOpOperand(forOperand);
+ Value yieldedBuffer = lookup(bvm, operand.get());
+ Value bbArgBuffer = lookup(bvm, bbArg);
+ if (!aliasInfo.areEquivalentBufferizedValues(yieldedBuffer, bbArgBuffer)) {
+ // TODO: this could get resolved with copies but it can also turn into
+ // swaps so we need to be careful about order of copies.
+ return yieldOp->emitError()
+ << "Yield operand #" << operand.getOperandNumber()
+ << " does not bufferize to an equivalent buffer to the matching"
+ << " enclosing scf::for operand";
+ }
+
+ // Buffers are equivalent so the work is already done and we just yield
+ // the bbArg so that it later canonicalizes away.
+ operand.set(bbArg);
+ }
+ return success();
+}
+
struct YieldOpInterface
: public BufferizableOpInterface::ExternalModel<YieldOpInterface,
scf::YieldOp> {
@@ -2918,11 +2947,6 @@ struct YieldOpInterface
AllocationCallbacks &allocationFn) const {
auto yieldOp = cast<scf::YieldOp>(op);
- // Take a guard before anything else.
- OpBuilder::InsertionGuard g(b);
- // Cannot create IR past a yieldOp.
- b.setInsertionPoint(yieldOp);
-
if (auto execOp = dyn_cast<scf::ExecuteRegionOp>(yieldOp->getParentOp())) {
if (execOp->getNumResults() != 0)
return execOp->emitError(
@@ -2930,37 +2954,19 @@ struct YieldOpInterface
return success();
}
- if (auto ifOp = dyn_cast<scf::IfOp>(yieldOp->getParentOp()))
- return success();
-
- scf::ForOp forOp = dyn_cast<scf::ForOp>(yieldOp->getParentOp());
- if (!forOp)
- return yieldOp->emitError("expected scf::ForOp parent for scf::YieldOp");
- for (OpOperand &operand : yieldOp->getOpOperands()) {
- auto tensorType = operand.get().getType().dyn_cast<TensorType>();
- if (!tensorType)
- continue;
+ // Bufferize scf::IfOp after bufferizing the scf::YieldOp in the else
+ // branch.
+ if (auto ifOp = dyn_cast<scf::IfOp>(yieldOp->getParentOp())) {
+ if (ifOp.elseYield() != yieldOp)
+ return success();
+ return bufferizeIfOp(ifOp, b, bvm, aliasInfo, allocationFn);
+ }
- OpOperand &forOperand = forOp.getOpOperandForResult(
- forOp->getResult(operand.getOperandNumber()));
- auto bbArg = forOp.getRegionIterArgForOpOperand(forOperand);
- Value yieldedBuffer = lookup(bvm, operand.get());
- Value bbArgBuffer = lookup(bvm, bbArg);
- if (!aliasInfo.areEquivalentBufferizedValues(yieldedBuffer,
- bbArgBuffer)) {
- // TODO: this could get resolved with copies but it can also turn into
- // swaps so we need to be careful about order of copies.
- return yieldOp->emitError()
- << "Yield operand #" << operand.getOperandNumber()
- << " does not bufferize to an equivalent buffer to the matching"
- << " enclosing scf::for operand";
- }
+ // Bufferize scf::ForOp after bufferizing the scf::YieldOp.
+ if (auto forOp = dyn_cast<scf::ForOp>(yieldOp->getParentOp()))
+ return bufferizeForOp(forOp, b, bvm, aliasInfo, allocationFn);
- // Buffers are equivalent so the work is already done and we just yield
- // the bbArg so that it later canonicalizes away.
- operand.set(bbArg);
- }
- return success();
+ return yieldOp->emitError("expected scf::ForOp parent for scf::YieldOp");
}
};
diff --git a/mlir/test/Dialect/Linalg/comprehensive-module-bufferize-invalid.mlir b/mlir/test/Dialect/Linalg/comprehensive-module-bufferize-invalid.mlir
index 0706657d14b5..6edc7d1090c3 100644
--- a/mlir/test/Dialect/Linalg/comprehensive-module-bufferize-invalid.mlir
+++ b/mlir/test/Dialect/Linalg/comprehensive-module-bufferize-invalid.mlir
@@ -160,7 +160,7 @@ func @mini_test_case1() -> tensor<10x20xf32> {
// -----
func @main() -> tensor<4xi32> {
- // expected-error @+1 {{expected result-less scf.execute_region containing op}}
+ // expected-error @+1 {{unsupported op with tensors}}
%r = scf.execute_region -> tensor<4xi32> {
%A = arith.constant dense<[1, 2, 3, 4]> : tensor<4xi32>
scf.yield %A: tensor<4xi32>
diff --git a/mlir/test/Dialect/Linalg/comprehensive-module-bufferize.mlir b/mlir/test/Dialect/Linalg/comprehensive-module-bufferize.mlir
index 184e0f36e271..564078ee0859 100644
--- a/mlir/test/Dialect/Linalg/comprehensive-module-bufferize.mlir
+++ b/mlir/test/Dialect/Linalg/comprehensive-module-bufferize.mlir
@@ -887,3 +887,33 @@ func @scf_if_inplace(%cond: i1,
}
return %r : tensor<?xf32>
}
+
+// -----
+
+// CHECK-LABEL: func @scf_if_inside_scf_for
+// CHECK-DAG: %[[c0:.*]] = arith.constant 0 : index
+// CHECK-DAG: %[[c1:.*]] = arith.constant 1 : index
+// CHECK-DAG: %[[c10:.*]] = arith.constant 10 : index
+// CHECK: scf.for %{{.*}} = %[[c0]] to %[[c10]] step %[[c1]] {
+// CHECK: scf.if %{{.*}} {
+// CHECK: } else {
+// CHECK: vector.transfer_write
+// CHECK: }
+// CHECK: }
+func @scf_if_inside_scf_for(%t1: tensor<?xf32> {linalg.inplaceable = true},
+ %v: vector<5xf32>, %idx: index,
+ %cond: i1) -> tensor<?xf32> {
+ %c0 = arith.constant 0 : index
+ %c1 = arith.constant 1 : index
+ %c10 = arith.constant 10 : index
+ %r = scf.for %iv = %c0 to %c10 step %c1 iter_args(%bb = %t1) -> (tensor<?xf32>) {
+ %r2 = scf.if %cond -> (tensor<?xf32>) {
+ scf.yield %bb : tensor<?xf32>
+ } else {
+ %t2 = vector.transfer_write %v, %bb[%idx] : vector<5xf32>, tensor<?xf32>
+ scf.yield %t2 : tensor<?xf32>
+ }
+ scf.yield %r2 : tensor<?xf32>
+ }
+ return %r : tensor<?xf32>
+}
More information about the Mlir-commits
mailing list