[Mlir-commits] [mlir] d3fa067 - [mlir][Tensor] Use folded evaluators in tiling implementation of `tensor.pad`.
Mahesh Ravishankar
llvmlistbot at llvm.org
Tue Mar 14 20:29:29 PDT 2023
Author: Mahesh Ravishankar
Date: 2023-03-15T03:19:55Z
New Revision: d3fa067e699cf3ba4bc89f7d51d26f0faed7fa86
URL: https://github.com/llvm/llvm-project/commit/d3fa067e699cf3ba4bc89f7d51d26f0faed7fa86
DIFF: https://github.com/llvm/llvm-project/commit/d3fa067e699cf3ba4bc89f7d51d26f0faed7fa86.diff
LOG: [mlir][Tensor] Use folded evaluators in tiling implementation of `tensor.pad`.
Reviewed By: springerm
Differential Revision: https://reviews.llvm.org/D145135
Added:
Modified:
mlir/include/mlir/Dialect/Tensor/Utils/Utils.h
mlir/lib/Dialect/Tensor/IR/TensorTilingInterfaceImpl.cpp
mlir/lib/Dialect/Tensor/Utils/Utils.cpp
Removed:
################################################################################
diff --git a/mlir/include/mlir/Dialect/Tensor/Utils/Utils.h b/mlir/include/mlir/Dialect/Tensor/Utils/Utils.h
index c1460c9c79d63..c0f33d15cb518 100644
--- a/mlir/include/mlir/Dialect/Tensor/Utils/Utils.h
+++ b/mlir/include/mlir/Dialect/Tensor/Utils/Utils.h
@@ -26,6 +26,11 @@ PadOp createPadHighOp(RankedTensorType type, Value source, Value pad,
SmallVector<Value> createDynamicDimValues(OpBuilder &b, Location loc,
Value rankedTensor);
+// Returns the tensor extent along dimension `dim` if `rankedTensor` is of
+// `RankedTensorType`. Returns `failure()` otherwise.
+FailureOr<OpFoldResult> createDimValue(OpBuilder &b, Location loc,
+ Value rankedTensor, int64_t dim);
+
// Creates dim ops or constant ops for each dimension of the ranked tensor
// argument and returns these as values.
SmallVector<OpFoldResult> createDimValues(OpBuilder &b, Location loc,
diff --git a/mlir/lib/Dialect/Tensor/IR/TensorTilingInterfaceImpl.cpp b/mlir/lib/Dialect/Tensor/IR/TensorTilingInterfaceImpl.cpp
index ecebf21866dfa..f0e3cf36add87 100644
--- a/mlir/lib/Dialect/Tensor/IR/TensorTilingInterfaceImpl.cpp
+++ b/mlir/lib/Dialect/Tensor/IR/TensorTilingInterfaceImpl.cpp
@@ -459,25 +459,25 @@ Operation *tensor::bubbleUpPadSlice(OpBuilder &b, tensor::PadOp padOp,
bindDims(b.getContext(), dim0, dim1);
// Add two integers.
auto addMap = AffineMap::get(2, 0, {dim0 + dim1});
- auto add = [&](Value v1, Value v2) {
- return b.createOrFold<AffineApplyOp>(loc, addMap, ValueRange{v1, v2});
+ auto add = [&](OpFoldResult v1, OpFoldResult v2) {
+ return makeComposedFoldedAffineApply(b, loc, addMap, {v1, v2});
};
// Subtract two integers.
auto subMap = AffineMap::get(2, 0, {dim0 - dim1});
- auto sub = [&](Value v1, Value v2) {
- return b.createOrFold<AffineApplyOp>(loc, subMap, ValueRange{v1, v2});
+ auto sub = [&](OpFoldResult v1, OpFoldResult v2) {
+ return makeComposedFoldedAffineApply(b, loc, subMap, {v1, v2});
};
// Take the minimum of two integers.
auto idMap = AffineMap::getMultiDimIdentityMap(2, b.getContext());
- auto min = [&](Value v1, Value v2) {
- return b.createOrFold<AffineMinOp>(loc, idMap, ValueRange{v1, v2});
+ auto min = [&](OpFoldResult v1, OpFoldResult v2) {
+ return makeComposedFoldedAffineMin(b, loc, idMap, {v1, v2});
};
// Take the maximum of two integers.
- auto max = [&](Value v1, Value v2) {
- return b.createOrFold<AffineMaxOp>(loc, idMap, ValueRange{v1, v2});
+ auto max = [&](OpFoldResult v1, OpFoldResult v2) {
+ return makeComposedFoldedAffineMax(b, loc, idMap, {v1, v2});
};
// Zero index-typed integer.
- auto zero = b.create<arith::ConstantIndexOp>(loc, 0);
+ OpFoldResult zero = b.getIndexAttr(0);
// Helper function for filling static/dynamic low/high padding indices
// vectors of PadOp.
@@ -493,8 +493,7 @@ Operation *tensor::bubbleUpPadSlice(OpBuilder &b, tensor::PadOp padOp,
// Compute new offsets, lengths, low padding, high padding.
SmallVector<OpFoldResult> newOffsets, newLengths, newStrides;
- SmallVector<Value> newLows, newHighs;
- SmallVector<int64_t> staticNewLows, staticNewHighs;
+ SmallVector<OpFoldResult> newLows, newHighs;
// Set to true if the original data source is not read at all.
bool hasZeroLen = false;
// Same as hasZeroLen, but for dynamic dimension sizes. This condition
@@ -503,23 +502,22 @@ Operation *tensor::bubbleUpPadSlice(OpBuilder &b, tensor::PadOp padOp,
int64_t rank = padOp.getSourceType().getRank();
for (unsigned dim = 0; dim < rank; ++dim) {
- auto low =
- getValueOrCreateConstantIndexOp(b, loc, padOp.getMixedLowPad()[dim]);
- bool hasLowPad = getConstantIntValue(low) != static_cast<int64_t>(0);
- auto high =
- getValueOrCreateConstantIndexOp(b, loc, padOp.getMixedHighPad()[dim]);
- bool hasHighPad = getConstantIntValue(high) != static_cast<int64_t>(0);
- auto offset = getValueOrCreateConstantIndexOp(b, loc, offsets[dim]);
- auto length = getValueOrCreateConstantIndexOp(b, loc, sizes[dim]);
- auto srcSize = b.createOrFold<tensor::DimOp>(loc, padOp.getSource(), dim);
+ auto low = padOp.getMixedLowPad()[dim];
+ bool hasLowPad = !isConstantIntValue(low, 0);
+ auto high = padOp.getMixedHighPad()[dim];
+ bool hasHighPad = !isConstantIntValue(high, 0);
+ auto offset = offsets[dim];
+ auto length = sizes[dim];
+ auto srcSize =
+ tensor::createDimValue(b, loc, padOp.getSource(), dim).value();
// The new amount of low padding is `low - offset`. Except for the case
// where none of the low padding is read. In that case, the new amount of
// low padding is zero.
//
// Optimization: If low = 0, then newLow = 0.
- Value newLow = hasLowPad ? max(zero, sub(low, offset)) : zero;
- appendIndex(newLow, newLows, staticNewLows);
+ OpFoldResult newLow = hasLowPad ? max(zero, sub(low, offset)) : zero;
+ newLows.push_back(newLow);
// Start reading the data from position `offset - low`. Since the original
// read may have started in the low padding zone, this value could be
@@ -533,9 +531,10 @@ Operation *tensor::bubbleUpPadSlice(OpBuilder &b, tensor::PadOp padOp,
// no data from the source.)
//
// Optimization: If low = 0, then the formula can be simplified.
- Value newOffset = hasLowPad ? min(max(sub(offset, low), zero), srcSize)
- : min(offset, srcSize);
- newOffsets.push_back(getAsOpFoldResult(newOffset));
+ OpFoldResult newOffset = hasLowPad
+ ? min(max(sub(offset, low), zero), srcSize)
+ : min(offset, srcSize);
+ newOffsets.push_back(newOffset);
// The original ExtractSliceOp was reading until position `offset +
// length`. Therefore, the corresponding position within the source tensor
@@ -556,19 +555,21 @@ Operation *tensor::bubbleUpPadSlice(OpBuilder &b, tensor::PadOp padOp,
// The new ExtractSliceOp length is `endLoc - newOffset`.
//
// Optimization: If low = 0, then the formula can be simplified.
- Value endLoc = hasLowPad
- ? min(max(add(sub(offset, low), length), zero), srcSize)
- : min(add(offset, length), srcSize);
- Value newLength = sub(endLoc, newOffset);
- newLengths.push_back(getAsOpFoldResult(newLength));
+ OpFoldResult endLoc =
+ hasLowPad ? min(max(add(sub(offset, low), length), zero), srcSize)
+ : min(add(offset, length), srcSize);
+ OpFoldResult newLength = sub(endLoc, newOffset);
+ newLengths.push_back(newLength);
// Check if newLength is zero. In that case, no SubTensorOp should be
// executed.
- if (auto newLengthInt = getConstantIntValue(newLength)) {
- hasZeroLen |= *newLengthInt == 0;
- } else {
- Value check = b.create<arith::CmpIOp>(loc, arith::CmpIPredicate::eq,
- newLength, zero);
+ if (isConstantIntValue(newLength, 0)) {
+ hasZeroLen = true;
+ } else if (!hasZeroLen) {
+ Value check = b.create<arith::CmpIOp>(
+ loc, arith::CmpIPredicate::eq,
+ getValueOrCreateConstantIndexOp(b, loc, newLength),
+ getValueOrCreateConstantIndexOp(b, loc, zero));
dynHasZeroLenCond =
dynHasZeroLenCond
? b.create<arith::OrIOp>(loc, check, dynHasZeroLenCond)
@@ -579,8 +580,9 @@ Operation *tensor::bubbleUpPadSlice(OpBuilder &b, tensor::PadOp padOp,
// so that the result has the same length as the original ExtractSliceOp.
// As an optimization, if the original high padding is zero, then the new
// high padding must also be zero.
- Value newHigh = hasHighPad ? sub(sub(length, newLength), newLow) : zero;
- appendIndex(newHigh, newHighs, staticNewHighs);
+ OpFoldResult newHigh =
+ hasHighPad ? sub(sub(length, newLength), newLow) : zero;
+ newHighs.push_back(newHigh);
// Only unit stride supported.
newStrides.push_back(b.getIndexAttr(1));
@@ -594,7 +596,10 @@ Operation *tensor::bubbleUpPadSlice(OpBuilder &b, tensor::PadOp padOp,
RankedTensorType::get(shape, padOp.getResultType().getElementType());
// Insert cast to ensure that types match. (May be folded away.)
- auto castResult = [&](Value val) -> Operation * {
+ auto castResult = [&](Operation *op) -> Operation * {
+ Value val = op->getResult(0);
+ if (resultType == val.getType())
+ return op;
return b.create<tensor::CastOp>(loc, resultType, val);
};
@@ -615,10 +620,9 @@ Operation *tensor::bubbleUpPadSlice(OpBuilder &b, tensor::PadOp padOp,
// the result shape of the new SliceOp has a zero dimension.
auto createPadOfExtractSlice = [&]() {
// Create pad(extract_slice(x)).
- auto newSliceOp = b.create<tensor::ExtractSliceOp>(
+ Value newSliceOp = b.create<tensor::ExtractSliceOp>(
loc, padOp.getSource(), newOffsets, newLengths, newStrides);
- auto newPadOp = b.create<PadOp>(loc, newSliceOp, staticNewLows,
- staticNewHighs, newLows, newHighs);
+ auto newPadOp = b.create<PadOp>(loc, Type(), newSliceOp, newLows, newHighs);
// Copy region to new PadOp.
IRMapping bvm;
diff --git a/mlir/lib/Dialect/Tensor/Utils/Utils.cpp b/mlir/lib/Dialect/Tensor/Utils/Utils.cpp
index ece3e72af2813..5b81295e1b91e 100644
--- a/mlir/lib/Dialect/Tensor/Utils/Utils.cpp
+++ b/mlir/lib/Dialect/Tensor/Utils/Utils.cpp
@@ -52,6 +52,20 @@ SmallVector<Value> mlir::tensor::createDynamicDimValues(OpBuilder &b,
return dynamicDims;
}
+FailureOr<OpFoldResult> mlir::tensor::createDimValue(OpBuilder &b, Location loc,
+ Value rankedTensor,
+ int64_t dim) {
+ auto tensorTy = rankedTensor.getType().dyn_cast<RankedTensorType>();
+ if (!tensorTy)
+ return failure();
+ auto shape = tensorTy.getShape();
+ if (dim >= shape.size())
+ return failure();
+ if (ShapedType::isDynamic(shape[dim]))
+ return OpFoldResult(b.createOrFold<tensor::DimOp>(loc, rankedTensor, dim));
+ return OpFoldResult(b.getIndexAttr(shape[dim]));
+}
+
SmallVector<OpFoldResult>
mlir::tensor::createDimValues(OpBuilder &b, Location loc, Value rankedTensor) {
auto tensorTy = rankedTensor.getType().cast<RankedTensorType>();
More information about the Mlir-commits
mailing list