[Mlir-commits] [mlir] ad7c49b - [mlir][linalg] Fix padding size calculation for Conv2d ops.
Hanhan Wang
llvmlistbot at llvm.org
Mon May 9 08:46:07 PDT 2022
Author: Jerry Wu
Date: 2022-05-09T08:45:37-07:00
New Revision: ad7c49bef774191304b804e60821c0cdb6a390cb
URL: https://github.com/llvm/llvm-project/commit/ad7c49bef774191304b804e60821c0cdb6a390cb
DIFF: https://github.com/llvm/llvm-project/commit/ad7c49bef774191304b804e60821c0cdb6a390cb.diff
LOG: [mlir][linalg] Fix padding size calculation for Conv2d ops.
This patch fixed the padding size calculation for Conv2d ops when the stride > 1. It contains the changes below:
- Use addBound to add constraint for AffineApplyOp in getUpperBoundForIndex. So the result value can be mapped and retrieved later.
- Fixed the bound from AffineMinOp by adding as a closed bound. Originally the bound was added as an open upper bound, which results in the incorrect bounds when we multiply the values. For example:
```
%0 = affine.min affine_map<()[s0] -> (4, -s0 + 11)>()[iv0]
%1 = affine.apply affine_map<()[s0] -> (s0 * 2)>()[%0]
If we add the affine.min as an open bound, addBound will internally transform it into the close bound "%0 <= 3". The following sliceBounds will derive the bound of %1 as "%1 <= 6" and return the open bound "%1 < 7", while the correct bound should be "%1 <= 8".
```
- In addition to addBound, I also changed sliceBounds to support returning closed upper bound, since for the size computation, we usually care about the closed bounds.
- Change the getUpperBoundForIndex to favor constant bounds when required. The sliceBounds will return a tighter but non-constant bounds, which can't be used for padding. The constantRequired option requires getUpperBoundForIndex to get the constant bounds when possible.
Reviewed By: hanchung
Differential Revision: https://reviews.llvm.org/D124821
Added:
Modified:
mlir/include/mlir/Dialect/Affine/Analysis/AffineStructures.h
mlir/include/mlir/Dialect/Linalg/Utils/Utils.h
mlir/lib/Dialect/Affine/Analysis/AffineStructures.cpp
mlir/lib/Dialect/Linalg/Utils/Utils.cpp
mlir/test/Dialect/Linalg/pad.mlir
Removed:
################################################################################
diff --git a/mlir/include/mlir/Dialect/Affine/Analysis/AffineStructures.h b/mlir/include/mlir/Dialect/Affine/Analysis/AffineStructures.h
index 875e405431961..427d96fef23a6 100644
--- a/mlir/include/mlir/Dialect/Affine/Analysis/AffineStructures.h
+++ b/mlir/include/mlir/Dialect/Affine/Analysis/AffineStructures.h
@@ -166,8 +166,25 @@ class FlatAffineValueConstraints : public presburger::IntegerPolyhedron {
/// bound map is expected to have exactly one result. In case of a LB/UB, the
/// bound map may have more than one result, for each of which an inequality
/// is added.
+ ///
+ /// The bound can be added as open or closed by specifying isClosedBound. In
+ /// case of a LB/UB, isClosedBound = false means the bound is added internally
+ /// as a closed bound by +1/-1 respectively. In case of an EQ bound, it can
+ /// only be added as a closed bound.
+ ///
/// Note: The dimensions/symbols of this FlatAffineConstraints must match the
/// dimensions/symbols of the affine map.
+ LogicalResult addBound(BoundType type, unsigned pos, AffineMap boundMap,
+ bool isClosedBound);
+
+ /// Adds a bound for the identifier at the specified position with constraints
+ /// being drawn from the specified bound map. In case of an EQ bound, the
+ /// bound map is expected to have exactly one result. In case of a LB/UB, the
+ /// bound map may have more than one result, for each of which an inequality
+ /// is added.
+ /// Note: The dimensions/symbols of this FlatAffineConstraints must match the
+ /// dimensions/symbols of the affine map. By default the lower bound is closed
+ /// and the upper bound is open.
LogicalResult addBound(BoundType type, unsigned pos, AffineMap boundMap);
/// Adds a bound for the identifier at the specified position with constraints
@@ -197,9 +214,13 @@ class FlatAffineValueConstraints : public presburger::IntegerPolyhedron {
/// identifiers as floordiv's and mod's of affine expressions of other
/// identifiers with respect to (positive) constants. Sets bound map to a
/// null AffineMap if such a bound can't be found (or yet unimplemented).
+ ///
+ /// By default the returned lower bounds are closed and upper bounds are open.
+ /// This can be changed by getClosedUB.
void getSliceBounds(unsigned offset, unsigned num, MLIRContext *context,
SmallVectorImpl<AffineMap> *lbMaps,
- SmallVectorImpl<AffineMap> *ubMaps);
+ SmallVectorImpl<AffineMap> *ubMaps,
+ bool getClosedUB = false);
/// Composes an affine map whose dimensions and symbols match one to one with
/// the dimensions and symbols of this FlatAffineConstraints. The results of
diff --git a/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h b/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h
index bd427ea3d9e50..3e9d07209017c 100644
--- a/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h
+++ b/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h
@@ -50,6 +50,9 @@ SmallVector<Value, 4> getDynOperands(Location loc, Value val, OpBuilder &b);
/// values. The method sets `boundMap` to an affine map that given
/// `boundOperands` evaluates to an upper bound for the index computation.
///
+/// If constantRequired is true, only returns the constant bounds (potentially
+/// over-approximating) and fails when not possible.
+///
/// Example:
/// ```
/// %dim0 = dim %tensor, %c0
@@ -62,7 +65,8 @@ SmallVector<Value, 4> getDynOperands(Location loc, Value val, OpBuilder &b);
/// - boundMap = affine.map<(d0) -> (d0 + 40)>
/// - boundOperands = [%dim1]
void getUpperBoundForIndex(Value value, AffineMap &boundMap,
- SmallVectorImpl<Value> &boundOperands);
+ SmallVectorImpl<Value> &boundOperands,
+ bool constantRequired = false);
/// Returns a constant upper bound for the result `value` of an index
/// computation. Calls `getUpperBoundForIndex` and returns a constant upper
diff --git a/mlir/lib/Dialect/Affine/Analysis/AffineStructures.cpp b/mlir/lib/Dialect/Affine/Analysis/AffineStructures.cpp
index e31dcbd7498d1..619d9c7a9e7fc 100644
--- a/mlir/lib/Dialect/Affine/Analysis/AffineStructures.cpp
+++ b/mlir/lib/Dialect/Affine/Analysis/AffineStructures.cpp
@@ -965,7 +965,8 @@ FlatAffineValueConstraints::getLowerAndUpperBound(
/// this process if needed.
void FlatAffineValueConstraints::getSliceBounds(
unsigned offset, unsigned num, MLIRContext *context,
- SmallVectorImpl<AffineMap> *lbMaps, SmallVectorImpl<AffineMap> *ubMaps) {
+ SmallVectorImpl<AffineMap> *lbMaps, SmallVectorImpl<AffineMap> *ubMaps,
+ bool getClosedUB) {
assert(num < getNumDimIds() && "invalid range");
// Basic simplification.
@@ -1065,6 +1066,8 @@ void FlatAffineValueConstraints::getSliceBounds(
// again.
} while (changed);
+ int64_t ubAdjustment = getClosedUB ? 0 : 1;
+
// Set the lower and upper bound maps for all the identifiers that were
// computed as affine expressions of the rest as the "detected expr" and
// "detected expr + 1" respectively; set the undetected ones to null.
@@ -1081,7 +1084,7 @@ void FlatAffineValueConstraints::getSliceBounds(
if (expr) {
lbMap = AffineMap::get(numMapDims, numMapSymbols, expr);
- ubMap = AffineMap::get(numMapDims, numMapSymbols, expr + 1);
+ ubMap = AffineMap::get(numMapDims, numMapSymbols, expr + ubAdjustment);
} else {
// TODO: Whenever there are local identifiers in the dependence
// constraints, we'll conservatively over-approximate, since we don't
@@ -1118,9 +1121,10 @@ void FlatAffineValueConstraints::getSliceBounds(
<< "WARNING: Potentially over-approximating slice ub\n");
auto ubConst = getConstantBound(BoundType::UB, pos + offset);
if (ubConst.hasValue()) {
- (ubMap) = AffineMap::get(
- numMapDims, numMapSymbols,
- getAffineConstantExpr(ubConst.getValue() + 1, context));
+ ubMap =
+ AffineMap::get(numMapDims, numMapSymbols,
+ getAffineConstantExpr(
+ ubConst.getValue() + ubAdjustment, context));
}
}
}
@@ -1158,10 +1162,13 @@ LogicalResult FlatAffineValueConstraints::flattenAlignedMapAndMergeLocals(
}
LogicalResult FlatAffineValueConstraints::addBound(BoundType type, unsigned pos,
- AffineMap boundMap) {
+ AffineMap boundMap,
+ bool isClosedBound) {
assert(boundMap.getNumDims() == getNumDimIds() && "dim mismatch");
assert(boundMap.getNumSymbols() == getNumSymbolIds() && "symbol mismatch");
assert(pos < getNumDimAndSymbolIds() && "invalid position");
+ assert((type != BoundType::EQ || isClosedBound) &&
+ "EQ bound must be closed.");
// Equality follows the logic of lower bound except that we add an equality
// instead of an inequality.
@@ -1193,17 +1200,24 @@ LogicalResult FlatAffineValueConstraints::addBound(BoundType type, unsigned pos,
for (unsigned i = boundMap.getNumInputs(); i < end; i++, j++) {
ineq[j] = lower ? -flatExpr[i] : flatExpr[i];
}
+ // Make the bound closed in if flatExpr is open. The inequality is always
+ // created in the upper bound form, so the adjustment is -1.
+ int64_t boundAdjustment = (isClosedBound || type == BoundType::EQ) ? 0 : -1;
// Constant term.
- ineq[getNumCols() - 1] =
- lower ? -flatExpr[flatExpr.size() - 1]
- // Upper bound in flattenedExpr is an exclusive one.
- : flatExpr[flatExpr.size() - 1] - 1;
+ ineq[getNumCols() - 1] = (lower ? -flatExpr[flatExpr.size() - 1]
+ : flatExpr[flatExpr.size() - 1]) +
+ boundAdjustment;
type == BoundType::EQ ? addEquality(ineq) : addInequality(ineq);
}
return success();
}
+LogicalResult FlatAffineValueConstraints::addBound(BoundType type, unsigned pos,
+ AffineMap boundMap) {
+ return addBound(type, pos, boundMap, /*isClosedBound=*/type != BoundType::UB);
+}
+
AffineMap
FlatAffineValueConstraints::computeAlignedMap(AffineMap map,
ValueRange operands) const {
diff --git a/mlir/lib/Dialect/Linalg/Utils/Utils.cpp b/mlir/lib/Dialect/Linalg/Utils/Utils.cpp
index 87af3b9e26a24..bfd2c68cfa68a 100644
--- a/mlir/lib/Dialect/Linalg/Utils/Utils.cpp
+++ b/mlir/lib/Dialect/Linalg/Utils/Utils.cpp
@@ -177,7 +177,8 @@ SmallVector<Value, 4> getDynOperands(Location loc, Value val, OpBuilder &b) {
}
void getUpperBoundForIndex(Value value, AffineMap &boundMap,
- SmallVectorImpl<Value> &boundOperands) {
+ SmallVectorImpl<Value> &boundOperands,
+ bool constantRequired) {
// Initialize `boundMap` and `boundOperands` to the identity returning
// `value`. This combination is the default result of the method if no
// simplification is possible.
@@ -226,11 +227,13 @@ void getUpperBoundForIndex(Value value, AffineMap &boundMap,
if (!(llvm::all_of(op->getResults(), findOrCreateId) &&
llvm::all_of(op->getOperands(), findOrCreateId)))
return;
+
// Add AffineApplyOps to the constraints.
if (auto applyOp = dyn_cast<AffineApplyOp>(op)) {
- AffineValueMap valueMap(applyOp.getAffineMap(), applyOp.getOperands(),
- applyOp.getResult());
- if (failed(constraints.composeMap(&valueMap)))
+ AffineMap map = constraints.computeAlignedMap(applyOp.getAffineMap(),
+ applyOp.getOperands());
+ if (failed(constraints.addBound(IntegerPolyhedron::EQ,
+ getPosition(applyOp.getResult()), map)))
return;
continue;
}
@@ -239,17 +242,30 @@ void getUpperBoundForIndex(Value value, AffineMap &boundMap,
AffineMap map = constraints.computeAlignedMap(minOp.getAffineMap(),
minOp.getOperands());
if (failed(constraints.addBound(IntegerPolyhedron::UB,
- getPosition(minOp.getResult()), map)))
+ getPosition(minOp.getResult()), map,
+ /*isClosedBound=*/true)))
return;
}
// Obtain an upper bound for the affine index computation by projecting out
// all temporary results and expressing the upper bound for `value` in terms
// of the terminals of the index computation.
- SmallVector<AffineMap> lowerBounds(1), upperBounds(1);
- constraints.getSliceBounds(getPosition(value), 1, value.getContext(),
- &lowerBounds, &upperBounds);
+ unsigned pos = getPosition(value);
+ if (constantRequired) {
+ auto ubConst = constraints.getConstantBound(
+ FlatAffineValueConstraints::BoundType::UB, pos);
+ if (!ubConst.hasValue())
+ return;
+ boundMap =
+ AffineMap::getConstantMap(ubConst.getValue(), value.getContext());
+ return;
+ }
+
+ SmallVector<AffineMap> lowerBounds(1), upperBounds(1);
+ constraints.getSliceBounds(pos, 1, value.getContext(), &lowerBounds,
+ &upperBounds,
+ /*getClosedUB=*/true);
// Verify `upperBounds[0]` is valid and has at least one result.
if (!upperBounds[0] || upperBounds[0].getNumResults() == 0)
return;
@@ -265,7 +281,8 @@ FailureOr<int64_t> getConstantUpperBoundForIndex(Value value) {
// Compute an upper bound for `value`.
AffineMap boundMap;
SmallVector<Value> boundOperands;
- getUpperBoundForIndex(value, boundMap, boundOperands);
+ getUpperBoundForIndex(value, boundMap, boundOperands,
+ /*constantRequired=*/true);
// Search the results of `boundMap` for constant upper bounds.
SmallVector<int64_t> constantBounds;
diff --git a/mlir/test/Dialect/Linalg/pad.mlir b/mlir/test/Dialect/Linalg/pad.mlir
index e37f0fc06f2d1..0e0e2e1066d6e 100644
--- a/mlir/test/Dialect/Linalg/pad.mlir
+++ b/mlir/test/Dialect/Linalg/pad.mlir
@@ -3,6 +3,7 @@
// RUN: mlir-opt %s -test-linalg-codegen-strategy="anchor-op=linalg.fill pad padding-values=0.:f32,0.:f32 pack-paddings=0,1 padding-dimensions=0,1,2 run-enable-pass=false" -test-linalg-codegen-strategy="anchor-op=linalg.matmul pad padding-values=0.:f32,0.:f32,0.:f32 padding-dimensions=0,1,2 pack-paddings=0,1 run-enable-pass=false" -cse -split-input-file | FileCheck %s --check-prefix=FILL-MATMUL
// RUN: mlir-opt %s -test-linalg-codegen-strategy="anchor-op=linalg.matmul pad padding-values=0.:f32,0.:f32 pack-paddings=1,1,0 padding-dimensions=0,1,2 run-enable-pass=false" -cse -split-input-file | FileCheck %s --check-prefix=INPUTS-ONLY
// RUN: mlir-opt %s -test-linalg-codegen-strategy="anchor-op=linalg.matmul pad padding-values=0.:f32,0.:f32,0.:f32 padding-dimensions=0,1 pack-paddings=1,1,1 run-enable-pass=false" -cse -split-input-file | FileCheck %s --check-prefix=PARTIAL
+// RUN: mlir-opt %s -test-linalg-codegen-strategy="anchor-op=linalg.depthwise_conv_2d_nhwc_hwc pad padding-values=0.:f32,0.:f32,0.:f32 padding-dimensions=1,2 pack-paddings=1,0,1 run-enable-pass=false" -cse -split-input-file | FileCheck %s --check-prefix=DEPTHWISE_CONV_2D
// MATMUL-DAG: #[[MAP0:[0-9a-z]+]] = affine_map<()[s0] -> (-s0 + 12, 7)>
// MATMUL-DAG: #[[MAP1:[0-9a-z]+]] = affine_map<()[s0] -> (-s0 + 7)>
@@ -535,3 +536,65 @@ func.func @padding_the_output_dims_only(%arg0: tensor<24x12xf32>,
%5 = tensor.insert_slice %4 into %arg2[%iv0, %iv1] [%0, %0] [1, 1] : tensor<?x?xf32> into tensor<24x25xf32>
func.return %5 : tensor<24x25xf32>
}
+
+// -----
+
+// DEPTHWISE_CONV_2D-DAG: #[[MAP0:[0-9a-z]+]] = affine_map<()[s0] -> (4, -s0 + 11)>
+// DEPTHWISE_CONV_2D-DAG: #[[MAP1:[0-9a-z]+]] = affine_map<()[s0] -> (s0 * 2)>
+// DEPTHWISE_CONV_2D-DAG: #[[MAP2:[0-9a-z]+]] = affine_map<()[s0] -> (s0 * 2 + 1)>
+// DEPTHWISE_CONV_2D-DAG: #[[MAP3:[0-9a-z]+]] = affine_map<()[s0] -> (s0 * -2 + 8)>
+// DEPTHWISE_CONV_2D-DAG: #[[MAP4:[0-9a-z]+]] = affine_map<()[s0] -> (-s0 + 4)>
+
+#map0 = affine_map<()[s0] -> (4, -s0 + 11)>
+#map1 = affine_map<()[s0] -> (s0 * 2)>
+#map2 = affine_map<()[s0] -> (s0 * 2 + 1)>
+
+// DEPTHWISE_CONV_2D: depthwise_conv_2d_padding
+// DEPTHWISE_CONV_2D-SAME: %[[ARG0:[0-9a-zA-Z]*]]: tensor<1x23x3x16xf32>
+// DEPTHWISE_CONV_2D-SAME: %[[ARG1:[0-9a-zA-Z]*]]: tensor<3x3x16xf32>
+// DEPTHWISE_CONV_2D-SAME: %[[ARG2:[0-9a-zA-Z]*]]: tensor<1x13x1x16xf32>
+// DEPTHWISE_CONV_2D-SAME: %[[IV0:[0-9a-zA-Z]*]]: index
+func.func @depthwise_conv_2d_padding(%arg0: tensor<1x23x3x16xf32>,
+ %arg1: tensor<3x3x16xf32>,
+ %arg2: tensor<1x13x1x16xf32>,
+ %iv0: index) -> tensor<1x?x1x16xf32> {
+ // DEPTHWISE_CONV_2D-DAG: %[[CST:.*]] = arith.constant 0.
+ // DEPTHWISE_CONV_2D-DAG: %[[C0:.*]] = arith.constant 0 : index
+ // DEPTHWISE_CONV_2D-DAG: %[[T0:.*]] = affine.min #[[MAP0]]()[%[[IV0]]]
+ %0 = affine.min #map0()[%iv0]
+ %1 = affine.apply #map1()[%iv0]
+ %2 = affine.apply #map2()[%0]
+
+ // DEPTHWISE_CONV_2D: %[[T3:.*]] = tensor.extract_slice %[[ARG0]]
+ // DEPTHWISE_CONV_2D: %[[T4:.*]] = tensor.extract_slice %[[ARG2]]
+ %3 = tensor.extract_slice %arg0[0, %1, 0, 0] [1, %2, 3, 16] [1, 1, 1, 1] : tensor<1x23x3x16xf32> to tensor<1x?x3x16xf32>
+ %4 = tensor.extract_slice %arg2[0, %iv0, 0, 0] [1, %0, 1, 16] [1, 1, 1, 1] : tensor<1x13x1x16xf32> to tensor<1x?x1x16xf32>
+
+ // Check the padding on the input.
+ // DEPTHWISE_CONV_2D: %[[T5:.*]] = affine.apply #[[MAP3]]()[%[[T0]]]
+ // DEPTHWISE_CONV_2D: %[[T6:.*]] = tensor.pad %[[T3]]
+ // DEPTHWISE_CONV_2D-SAME: low[%[[C0]], %[[C0]], %[[C0]], %[[C0]]]
+ // DEPTHWISE_CONV_2D-SAME: high[%[[C0]], %[[T5]], %[[C0]], %[[C0]]]
+ // DEPTHWISE_CONV_2D: tensor.yield %[[CST]] : f32
+
+ // Check the padding on the output.
+ // DEPTHWISE_CONV_2D: %[[T7:.*]] = affine.apply #[[MAP4]]()[%[[T0]]]
+ // DEPTHWISE_CONV_2D: %[[T8:.*]] = tensor.pad %[[T4]]
+ // DEPTHWISE_CONV_2D-SAME: low[%[[C0]], %[[C0]], %[[C0]], %[[C0]]]
+ // DEPTHWISE_CONV_2D-SAME: high[%[[C0]], %[[T7]], %[[C0]], %[[C0]]]
+ // DEPTHWISE_CONV_2D: tensor.yield %[[CST]] : f32
+
+ // DEPTHWISE_CONV_2D: %[[T9:.*]] = linalg.depthwise_conv_2d_nhwc_hwc
+ // DEPTHWISE_CONV_2D-SAME: ins(%[[T6]], %[[ARG1]] : tensor<1x9x3x16xf32>, tensor<3x3x16xf32>)
+ // DEPTHWISE_CONV_2D-SAME: outs(%[[T8]] : tensor<1x4x1x16xf32>)
+ %5 = linalg.depthwise_conv_2d_nhwc_hwc
+ {dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>}
+ ins(%3, %arg1 : tensor<1x?x3x16xf32>, tensor<3x3x16xf32>)
+ outs(%4 : tensor<1x?x1x16xf32>) -> tensor<1x?x1x16xf32>
+
+ // Check the extract_slice to crop the padded output before return.
+ // DEPTHWISE_CONV_2D: %[[T10:.*]] = tensor.extract_slice %[[T9]][0, 0, 0, 0]
+ // DEPTHWISE_CONV_2D-SAME: [1, %[[T0]], 1, 16]
+ // DEPTHWISE_CONV_2D: return %[[T10]] : tensor<1x?x1x16xf32>
+ return %5 : tensor<1x?x1x16xf32>
+}
More information about the Mlir-commits
mailing list