[Mlir-commits] [mlir] e4dd964 - [mlir] Loop bounds inference in linalg.generic op improved to support bounds for convolution
Alex Zinenko
llvmlistbot at llvm.org
Thu Jul 23 02:02:03 PDT 2020
Author: Jakub Lichman
Date: 2020-07-23T11:01:54+02:00
New Revision: e4dd964df0164651f1804612ad41582fb801607f
URL: https://github.com/llvm/llvm-project/commit/e4dd964df0164651f1804612ad41582fb801607f
DIFF: https://github.com/llvm/llvm-project/commit/e4dd964df0164651f1804612ad41582fb801607f.diff
LOG: [mlir] Loop bounds inference in linalg.generic op improved to support bounds for convolution
Loop bound inference is right now very limited as it supports only permutation maps and thus
it is impossible to implement convolution with linalg.generic as it requires more advanced
loop bound inference. This commits solves it for the convolution case.
Depends On D83158
Differential Revision: https://reviews.llvm.org/D83191
Added:
Modified:
mlir/include/mlir/Dialect/Linalg/Utils/Utils.h
mlir/lib/Dialect/Linalg/Transforms/Loops.cpp
mlir/lib/Dialect/Linalg/Utils/Utils.cpp
mlir/test/Dialect/Linalg/loops.mlir
Removed:
################################################################################
diff --git a/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h b/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h
index 769f4894a2b7..0dbf863c2f69 100644
--- a/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h
+++ b/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h
@@ -116,15 +116,16 @@ SmallVector<Value, 8> getViewSizes(OpBuilder &builder, ConcreteOp linalgOp) {
for (unsigned idx = 0; idx < attr.getInt(); idx++)
symbolsPos += ranks[idx];
- // Append or rewrite the end of the value list that corresponds to the
+ // Append the end of the value list that corresponds to the
// values mapping to symbols. Since inside concatinated map symbols are
// repeated we have to repeat the sizes as well.
- for (unsigned idx = 0, s = ranks.size(); idx < s; ++idx) {
- for (unsigned idx2 = 0; idx2 < numSymb; ++idx2) {
- Value viewSize = res[symbolsPos + idx2];
- res.push_back(viewSize);
- }
- }
+
+ // Reserve is mandatory to avoid a potential undefined behavior with
+ // pushing back to smallvector from itself.
+ res.reserve(res.size() + ranks.size() * numSymb);
+ for (unsigned idx = 0, s = ranks.size(); idx < s; ++idx)
+ for (unsigned idx2 = 0; idx2 < numSymb; ++idx2)
+ res.push_back(res[symbolsPos + idx2]);
}
return res;
}
@@ -134,7 +135,7 @@ SmallVector<Value, 8> getViewSizes(OpBuilder &builder, ConcreteOp linalgOp) {
/// `createAndFold` builder method. If `folder` is null, the regular `create`
/// method is called.
SmallVector<Value, 4> applyMapToValues(OpBuilder &b, Location loc,
- AffineMap map, ArrayRef<Value> values,
+ AffineMap map, ValueRange values,
OperationFolder *folder = nullptr);
/// Returns all the operands of `linalgOp` that are not views.
diff --git a/mlir/lib/Dialect/Linalg/Transforms/Loops.cpp b/mlir/lib/Dialect/Linalg/Transforms/Loops.cpp
index b5643e997da5..b256852ced26 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/Loops.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/Loops.cpp
@@ -76,6 +76,74 @@ emitLoopRanges(OpBuilder &b, Location loc, AffineMap map,
return res;
}
+/// Creates a number of ranges equal to the number of dimensions in the `map`.
+/// The function supports for now only limited number of expressions inside
+/// map results. It expects a non-inverted, concatenated map and last values in
+/// viewSizes will be applied to the symbols in the map.
+static SmallVector<SubViewOp::Range, 4>
+emitLoopRangesWithSymbols(OpBuilder &b, Location loc, AffineMap map,
+ ValueRange viewSizes) {
+ unsigned numDims = map.getNumDims(), numRes = map.getNumResults();
+ unsigned numSym = map.getNumSymbols();
+ assert(viewSizes.size() == numRes + numSym &&
+ "viewSizes must contain sizes of all views and values for symbols");
+ SmallVector<SubViewOp::Range, 4> res(numDims);
+ for (unsigned idx = 0; idx < numRes; ++idx) {
+ auto result = map.getResult(idx);
+ if (auto d = result.dyn_cast<AffineDimExpr>()) {
+ if (res[d.getPosition()].offset)
+ continue;
+ res[d.getPosition()] = SubViewOp::Range{
+ std_constant_index(0), viewSizes[idx], std_constant_index(1)};
+ }
+
+ // If the access pattern is of form (m, n)[s] -> (m + n - s floordiv 2),
+ // then the bounds are:
+ // (s floordiv 2) <= m <= (size(m) + s floordiv 2 - s + 1).
+ // where size(n) is applied to the symbol s.
+ // This is done statically now.
+ if (auto binOp = result.dyn_cast<AffineBinaryOpExpr>()) {
+ auto lhs = binOp.getLHS().dyn_cast<AffineBinaryOpExpr>();
+ auto rhs = binOp.getRHS().dyn_cast<AffineBinaryOpExpr>();
+ if (!lhs || !rhs || binOp.getKind() != AffineExprKind::Add ||
+ lhs.getKind() != AffineExprKind::Add ||
+ rhs.getKind() != mlir::AffineExprKind::Mul)
+ continue;
+
+ auto m = lhs.getLHS().dyn_cast<AffineDimExpr>();
+ auto n = lhs.getRHS().dyn_cast<AffineDimExpr>();
+ auto fDiv = rhs.getLHS().dyn_cast<AffineBinaryOpExpr>();
+ auto minusOne = rhs.getRHS().dyn_cast<AffineConstantExpr>();
+ if (!m || !n || !fDiv || !minusOne ||
+ fDiv.getKind() != AffineExprKind::FloorDiv ||
+ fDiv.getLHS().getKind() != AffineExprKind::SymbolId ||
+ fDiv.getRHS().getKind() != AffineExprKind::Constant)
+ continue;
+
+ auto s = fDiv.getLHS().dyn_cast<AffineSymbolExpr>();
+ if (minusOne.getValue() != -1)
+ continue;
+
+ int mPos = m.getPosition();
+ AffineExpr one = getAffineConstantExpr(1, s.getContext());
+ AffineExpr sizeOfM = getAffineSymbolExpr(numSym, s.getContext());
+ // Construction of upper bound (size(m) + s floordiv 2 - s + 1).
+ AffineExpr upperOffsetExpr = sizeOfM + fDiv + one - s;
+ AffineMap fromMap = AffineMap::get(numDims, numSym + 1, fDiv);
+ AffineMap toMap = AffineMap::get(numDims, numSym + 1, upperOffsetExpr);
+ SmallVector<Value, 8> values(viewSizes.begin(),
+ viewSizes.begin() + numDims);
+ values.insert(values.end(), viewSizes.begin() + numRes, viewSizes.end());
+ values.push_back(viewSizes[mPos]);
+ // Construction of the lower bound (s floordiv 2).
+ Value from = applyMapToValues(b, loc, fromMap, values).front();
+ Value to = applyMapToValues(b, loc, toMap, values).front();
+ res[mPos] = SubViewOp::Range{from, to, std_constant_index(1)};
+ }
+ }
+ return res;
+}
+
template <typename IndexedValueType, typename OpType>
static void inlineRegionAndEmitStore(OpType op, ArrayRef<Value> indexedValues,
ArrayRef<SmallVector<Value, 8>> indexing,
@@ -469,32 +537,24 @@ Optional<LinalgLoops> linalgOpToLoopsImpl(Operation *op, OpBuilder &builder) {
llvm::map_range(mapsRange, [](AffineMapAttr a) { return a.getValue(); }));
SmallVector<Value, 8> sizes = getViewSizes(builder, linalgOp);
AffineMap map = concatAffineMaps(maps);
+ SmallVector<SubViewOp::Range, 4> loopRanges;
+
if (map.getNumSymbols()) {
- // Ignore symbols for now as they are not supported by inversePermutation.
- unsigned dims = map.getNumDims();
- SmallVector<AffineExpr, 8> zeros(
- map.getNumSymbols(), getAffineConstantExpr(0, map.getContext()));
- SmallVector<AffineExpr, 8> res;
- for (auto result : map.getResults())
- res.push_back(result.replaceDimsAndSymbols({}, zeros));
-
- map = AffineMap::get(dims, 0, res, map.getContext());
-
- // Cut off values that would have been applied to symbols
- sizes.resize(res.size());
- }
+ loopRanges = emitLoopRangesWithSymbols(scope.getBuilderRef(),
+ scope.getLocation(), map, sizes);
+ } else {
+ AffineMap invertedMap = inversePermutation(map);
+ if (!invertedMap)
+ return {};
+ if (invertedMap.isEmpty()) {
+ emitScalarImplementation<IndexedValueTy>({}, linalgOp);
+ return LinalgLoops();
+ }
- AffineMap invertedMap = inversePermutation(map);
- if (!invertedMap)
- return {};
- if (invertedMap.isEmpty()) {
- emitScalarImplementation<IndexedValueTy>({}, linalgOp);
- return LinalgLoops();
+ loopRanges = emitLoopRanges(scope.getBuilderRef(), scope.getLocation(),
+ invertedMap, sizes);
}
-
SmallVector<Value, 4> allIvs;
- auto loopRanges = emitLoopRanges(scope.getBuilderRef(), scope.getLocation(),
- invertedMap, sizes);
GenerateLoopNest<LoopTy>::doit(
loopRanges, linalgOp.iterator_types().getValue(), [&](ValueRange ivs) {
allIvs.append(ivs.begin(), ivs.end());
diff --git a/mlir/lib/Dialect/Linalg/Utils/Utils.cpp b/mlir/lib/Dialect/Linalg/Utils/Utils.cpp
index a9d5e2028c22..968f5b1d82c5 100644
--- a/mlir/lib/Dialect/Linalg/Utils/Utils.cpp
+++ b/mlir/lib/Dialect/Linalg/Utils/Utils.cpp
@@ -57,7 +57,7 @@ RegionMatcher::matchAsScalarBinaryOp(GenericOp op) {
static Value emitOrFoldComposedAffineApply(OpBuilder &b, Location loc,
AffineMap map,
- ArrayRef<Value> operandsRef,
+ ValueRange operandsRef,
OperationFolder *folder) {
SmallVector<Value, 4> operands(operandsRef.begin(), operandsRef.end());
fullyComposeAffineMapAndOperands(&map, &operands);
@@ -68,16 +68,16 @@ static Value emitOrFoldComposedAffineApply(OpBuilder &b, Location loc,
SmallVector<Value, 4> mlir::linalg::applyMapToValues(OpBuilder &b, Location loc,
AffineMap map,
- ArrayRef<Value> values,
+ ValueRange values,
OperationFolder *folder) {
SmallVector<Value, 4> res;
res.reserve(map.getNumResults());
- unsigned numDims = map.getNumDims();
+ unsigned numDims = map.getNumDims(), numSym = map.getNumSymbols();
// For each `expr` in `map`, applies the `expr` to the values extracted from
// ranges. If the resulting application can be folded into a Value, the
// folding occurs eagerly. Otherwise, an affine.apply operation is emitted.
for (auto expr : map.getResults()) {
- AffineMap map = AffineMap::get(numDims, 0, expr);
+ AffineMap map = AffineMap::get(numDims, numSym, expr);
res.push_back(emitOrFoldComposedAffineApply(b, loc, map, values, folder));
}
return res;
diff --git a/mlir/test/Dialect/Linalg/loops.mlir b/mlir/test/Dialect/Linalg/loops.mlir
index b3f6160b17ed..30bb90bdd43a 100644
--- a/mlir/test/Dialect/Linalg/loops.mlir
+++ b/mlir/test/Dialect/Linalg/loops.mlir
@@ -14,6 +14,8 @@
// CHECKLOOP-DAG: #[[$stride2Dilation1:.*]] = affine_map<(d0, d1) -> (d0 * 2 + d1)>
// CHECKLOOP-DAG: #[[$stride2Dilation4:.*]] = affine_map<(d0, d1) -> (d0 * 2 + d1 * 4)>
// CHECKLOOP-DAG: #[[$stride3Dilation5:.*]] = affine_map<(d0, d1) -> (d0 * 3 + d1 * 5)>
+// CHECKLOOP-DAG: #[[$convLowerBound:.*]] = affine_map<()[s0] -> (s0 floordiv 2)>
+// CHECKLOOP-DAG: #[[$convUpperBound:.*]] = affine_map<()[s0, s1] -> (s1 + s0 floordiv 2 - s0 + 1)>
// CHECKLOOP-DAG: #[[$convMap:.*]] = affine_map<(d0, d1)[s0] -> (d0 + d1 - s0 floordiv 2)>
// CHECKPARALLEL-DAG: #[[$strided1D:.*]] = affine_map<(d0)[s0] -> (d0 + s0)>
@@ -26,6 +28,8 @@
// CHECKPARALLEL-DAG: #[[$stride2Dilation1:.*]] = affine_map<(d0, d1) -> (d0 * 2 + d1)>
// CHECKPARALLEL-DAG: #[[$stride2Dilation4:.*]] = affine_map<(d0, d1) -> (d0 * 2 + d1 * 4)>
// CHECKPARALLEL-DAG: #[[$stride3Dilation5:.*]] = affine_map<(d0, d1) -> (d0 * 3 + d1 * 5)>
+// CHECKPARALLEL-DAG: #[[$convLowerBound:.*]] = affine_map<()[s0] -> (s0 floordiv 2)>
+// CHECKPARALLEL-DAG: #[[$convUpperBound:.*]] = affine_map<()[s0, s1] -> (s1 + s0 floordiv 2 - s0 + 1)>
// CHECKPARALLEL-DAG: #[[$convMap:.*]] = affine_map<(d0, d1)[s0] -> (d0 + d1 - s0 floordiv 2)>
@@ -947,10 +951,12 @@ func @conv1d(%in : memref<?xf32>, %filter : memref<?xf32>, %out : memref<?xf32>
// CHECKLOOP-SAME: %[[arg1:[a-zA-Z0-9]+]]: memref<?xf32>
// CHECKLOOP-SAME: %[[arg2:[a-zA-Z0-9]+]]: memref<?xf32>
// CHECKLOOP: %[[c0:.*]] = constant 0 : index
-// CHECKLOOP: %[[dim0:.*]] = dim %[[arg1]], %[[c0]] : memref<?xf32>
-// CHECKLOOP: %[[dim1:.*]] = dim %[[arg2]], %[[c0]] : memref<?xf32>
-// CHECKLOOP: scf.for %[[b:.*]] = %{{.*}} to %[[dim1]] step %{{.*}} {
-// CHECKLOOP: scf.for %[[m:.*]] = %{{.*}} to %[[dim0]] step %{{.*}} {
+// CHECKLOOP: %[[dim0:.*]] = dim %[[arg0]], %[[c0]] : memref<?xf32>
+// CHECKLOOP: %[[dim1:.*]] = dim %[[arg1]], %[[c0]] : memref<?xf32>
+// CHECKLOOP: %[[lowerBound:.*]] = affine.apply #[[$convLowerBound]]()[%[[dim1]]]
+// CHECKLOOP: %[[upperBound:.*]] = affine.apply #[[$convUpperBound]]()[%[[dim1]], %[[dim0]]]
+// CHECKLOOP: scf.for %[[b:.*]] = %[[lowerBound]] to %[[upperBound]] step %{{.*}} {
+// CHECKLOOP: scf.for %[[m:.*]] = %{{.*}} to %[[dim1]] step %{{.*}} {
// CHECKLOOP: %[[dim2:.*]] = dim %[[arg1]], %[[c0]] : memref<?xf32>
// CHECKLOOP: %[[aff:.*]] = affine.apply #[[$convMap]](%{{.*}}, %{{.*}})[%[[dim2]]]
// CHECKLOOP: %[[va:.*]] = load %[[arg0]][%[[aff]]] : memref<?xf32>
@@ -965,9 +971,11 @@ func @conv1d(%in : memref<?xf32>, %filter : memref<?xf32>, %out : memref<?xf32>
// CHECKPARALLEL-SAME: %[[arg1:[a-zA-Z0-9]+]]: memref<?xf32>
// CHECKPARALLEL-SAME: %[[arg2:[a-zA-Z0-9]+]]: memref<?xf32>
// CHECKPARALLEL: %[[c0:.*]] = constant 0 : index
-// CHECKPARALLEL: %[[dim0:.*]] = dim %[[arg1]], %[[c0]] : memref<?xf32>
-// CHECKPARALLEL: %[[dim1:.*]] = dim %[[arg2]], %[[c0]] : memref<?xf32>
-// CHECKPARALLEL: scf.parallel (%[[b:.*]], %[[m:.*]]) = (%{{.*}}, %{{.*}}) to (%[[dim1]], %[[dim0]]) step ({{.*}}) {
+// CHECKPARALLEL: %[[dim0:.*]] = dim %[[arg0]], %[[c0]] : memref<?xf32>
+// CHECKPARALLEL: %[[dim1:.*]] = dim %[[arg1]], %[[c0]] : memref<?xf32>
+// CHECKPARALLEL: %[[lowerBound:.*]] = affine.apply #[[$convLowerBound]]()[%[[dim1]]]
+// CHECKPARALLEL: %[[upperBound:.*]] = affine.apply #[[$convUpperBound]]()[%[[dim1]], %[[dim0]]]
+// CHECKPARALLEL: scf.parallel (%[[b:.*]], %[[m:.*]]) = (%[[lowerBound]], %{{.*}}) to (%[[upperBound]], %[[dim1]]) step ({{.*}}) {
// CHECKPARALLEL: %[[dim2:.*]] = dim %[[arg1]], %[[c0]] : memref<?xf32>
// CHECKPARALLEL: %[[aff:.*]] = affine.apply #[[$convMap]](%{{.*}}, %{{.*}})[%[[dim2]]]
// CHECKPARALLEL: %[[va:.*]] = load %[[arg0]][%[[aff]]] : memref<?xf32>
@@ -1012,14 +1020,18 @@ func @conv2d(%in : memref<?x?xf32>, %filter : memref<?x?xf32>, %out : memref<?x
// CHECKLOOP-SAME: %[[arg2:[a-zA-Z0-9]+]]: memref<?x?xf32>
// CHECKLOOP: %[[c0:.*]] = constant 0 : index
// CHECKLOOP: %[[c1:.*]] = constant 1 : index
-// CHECKLOOP: %[[dim0:.*]] = dim %[[arg1]], %[[c0]] : memref<?x?xf32>
-// CHECKLOOP: %[[dim1:.*]] = dim %[[arg1]], %[[c1]] : memref<?x?xf32>
-// CHECKLOOP: %[[dim2:.*]] = dim %[[arg2]], %[[c0]] : memref<?x?xf32>
-// CHECKLOOP: %[[dim3:.*]] = dim %[[arg2]], %[[c1]] : memref<?x?xf32>
-// CHECKLOOP: scf.for %[[i0:.*]] = %{{.*}} to %[[dim2]] step %{{.*}} {
-// CHECKLOOP: scf.for %[[i1:.*]] = %{{.*}} to %[[dim3]] step %{{.*}} {
-// CHECKLOOP: scf.for %[[i2:.*]] = %{{.*}} to %[[dim0]] step %{{.*}} {
-// CHECKLOOP: scf.for %[[i3:.*]] = %{{.*}} to %[[dim1]] step %{{.*}} {
+// CHECKLOOP: %[[dim0:.*]] = dim %[[arg0]], %[[c0]] : memref<?x?xf32>
+// CHECKLOOP: %[[dim1:.*]] = dim %[[arg0]], %[[c1]] : memref<?x?xf32>
+// CHECKLOOP: %[[dim2:.*]] = dim %[[arg1]], %[[c0]] : memref<?x?xf32>
+// CHECKLOOP: %[[dim3:.*]] = dim %[[arg1]], %[[c1]] : memref<?x?xf32>
+// CHECKLOOP: %[[lowerBound1:.*]] = affine.apply #[[$convLowerBound]]()[%[[dim2]]]
+// CHECKLOOP: %[[upperBound1:.*]] = affine.apply #[[$convUpperBound]]()[%[[dim2]], %[[dim0]]]
+// CHECKLOOP: %[[lowerBound2:.*]] = affine.apply #[[$convLowerBound]]()[%[[dim3]]]
+// CHECKLOOP: %[[upperBound2:.*]] = affine.apply #[[$convUpperBound]]()[%[[dim3]], %[[dim1]]]
+// CHECKLOOP: scf.for %[[i0:.*]] = %[[lowerBound1]] to %[[upperBound1]] step %{{.*}} {
+// CHECKLOOP: scf.for %[[i1:.*]] = %[[lowerBound2]] to %[[upperBound2]] step %{{.*}} {
+// CHECKLOOP: scf.for %[[i2:.*]] = %{{.*}} to %[[dim2]] step %{{.*}} {
+// CHECKLOOP: scf.for %[[i3:.*]] = %{{.*}} to %[[dim3]] step %{{.*}} {
// CHECKLOOP: %[[dim4:.*]] = dim %[[arg1]], %[[c0]] : memref<?x?xf32>
// CHECKLOOP: %[[dim5:.*]] = dim %[[arg1]], %[[c1]] : memref<?x?xf32>
// CHECKLOOP: %[[aff1:.*]] = affine.apply #[[$convMap]](%{{.*}}, %{{.*}})[%[[dim4]]]
@@ -1037,11 +1049,15 @@ func @conv2d(%in : memref<?x?xf32>, %filter : memref<?x?xf32>, %out : memref<?x
// CHECKPARALLEL-SAME: %[[arg2:[a-zA-Z0-9]+]]: memref<?x?xf32>
// CHECKPARALLEL: %[[c0:.*]] = constant 0 : index
// CHECKPARALLEL: %[[c1:.*]] = constant 1 : index
-// CHECKPARALLEL: %[[dim0:.*]] = dim %[[arg1]], %[[c0]] : memref<?x?xf32>
-// CHECKPARALLEL: %[[dim1:.*]] = dim %[[arg1]], %[[c1]] : memref<?x?xf32>
-// CHECKPARALLEL: %[[dim2:.*]] = dim %[[arg2]], %[[c0]] : memref<?x?xf32>
-// CHECKPARALLEL: %[[dim3:.*]] = dim %[[arg2]], %[[c1]] : memref<?x?xf32>
-// CHECKPARALLEL: scf.parallel (%[[i0:.*]], %[[i1:.*]], %[[i2:.*]], %[[i3:.*]]) = (%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) to (%[[dim2]], %[[dim3]], %[[dim0]], %[[dim1]]) step ({{.*}}) {
+// CHECKPARALLEL: %[[dim0:.*]] = dim %[[arg0]], %[[c0]] : memref<?x?xf32>
+// CHECKPARALLEL: %[[dim1:.*]] = dim %[[arg0]], %[[c1]] : memref<?x?xf32>
+// CHECKPARALLEL: %[[dim2:.*]] = dim %[[arg1]], %[[c0]] : memref<?x?xf32>
+// CHECKPARALLEL: %[[dim3:.*]] = dim %[[arg1]], %[[c1]] : memref<?x?xf32>
+// CHECKPARALLEL: %[[lowerBound1:.*]] = affine.apply #[[$convLowerBound]]()[%[[dim2]]]
+// CHECKPARALLEL: %[[upperBound1:.*]] = affine.apply #[[$convUpperBound]]()[%[[dim2]], %[[dim0]]]
+// CHECKPARALLEL: %[[lowerBound2:.*]] = affine.apply #[[$convLowerBound]]()[%[[dim3]]]
+// CHECKPARALLEL: %[[upperBound2:.*]] = affine.apply #[[$convUpperBound]]()[%[[dim3]], %[[dim1]]]
+// CHECKPARALLEL: scf.parallel (%[[i0:.*]], %[[i1:.*]], %[[i2:.*]], %[[i3:.*]]) = (%[[lowerBound1]], %[[lowerBound2]], %{{.*}}, %{{.*}}) to (%[[upperBound1]], %[[upperBound2]], %[[dim2]], %[[dim3]]) step ({{.*}}) {
// CHECKPARALLEL: %[[dim4:.*]] = dim %[[arg1]], %[[c0]] : memref<?x?xf32>
// CHECKPARALLEL: %[[dim5:.*]] = dim %[[arg1]], %[[c1]] : memref<?x?xf32>
// CHECKPARALLEL: %[[aff1:.*]] = affine.apply #[[$convMap]](%{{.*}}, %{{.*}})[%[[dim4]]]
@@ -1089,18 +1105,24 @@ func @conv3d(%in : memref<?x?x?xf32>, %filter : memref<?x?x?xf32>, %out : memre
// CHECKLOOP: %[[c0:.*]] = constant 0 : index
// CHECKLOOP: %[[c1:.*]] = constant 1 : index
// CHECKLOOP: %[[c2:.*]] = constant 2 : index
-// CHECKLOOP: %[[dim0:.*]] = dim %[[arg1]], %[[c0]] : memref<?x?x?xf32>
-// CHECKLOOP: %[[dim1:.*]] = dim %[[arg1]], %[[c1]] : memref<?x?x?xf32>
-// CHECKLOOP: %[[dim2:.*]] = dim %[[arg1]], %[[c2]] : memref<?x?x?xf32>
-// CHECKLOOP: %[[dim3:.*]] = dim %[[arg2]], %[[c0]] : memref<?x?x?xf32>
-// CHECKLOOP: %[[dim4:.*]] = dim %[[arg2]], %[[c1]] : memref<?x?x?xf32>
-// CHECKLOOP: %[[dim5:.*]] = dim %[[arg2]], %[[c2]] : memref<?x?x?xf32>
-// CHECKLOOP: scf.for %[[i0:.*]] = %{{.*}} to %[[dim3]] step %{{.*}} {
-// CHECKLOOP: scf.for %[[i1:.*]] = %{{.*}} to %[[dim4]] step %{{.*}} {
-// CHECKLOOP: scf.for %[[i2:.*]] = %{{.*}} to %[[dim5]] step %{{.*}} {
-// CHECKLOOP: scf.for %[[i3:.*]] = %{{.*}} to %[[dim0]] step %{{.*}} {
-// CHECKLOOP: scf.for %[[i4:.*]] = %{{.*}} to %[[dim1]] step %{{.*}} {
-// CHECKLOOP: scf.for %[[i5:.*]] = %{{.*}} to %[[dim2]] step %{{.*}} {
+// CHECKLOOP: %[[dim0:.*]] = dim %[[arg0]], %[[c0]] : memref<?x?x?xf32>
+// CHECKLOOP: %[[dim1:.*]] = dim %[[arg0]], %[[c1]] : memref<?x?x?xf32>
+// CHECKLOOP: %[[dim2:.*]] = dim %[[arg0]], %[[c2]] : memref<?x?x?xf32>
+// CHECKLOOP: %[[dim3:.*]] = dim %[[arg1]], %[[c0]] : memref<?x?x?xf32>
+// CHECKLOOP: %[[dim4:.*]] = dim %[[arg1]], %[[c1]] : memref<?x?x?xf32>
+// CHECKLOOP: %[[dim5:.*]] = dim %[[arg1]], %[[c2]] : memref<?x?x?xf32>
+// CHECKLOOP: %[[lowerBound1:.*]] = affine.apply #[[$convLowerBound]]()[%[[dim3]]]
+// CHECKLOOP: %[[upperBound1:.*]] = affine.apply #[[$convUpperBound]]()[%[[dim3]], %[[dim0]]]
+// CHECKLOOP: %[[lowerBound2:.*]] = affine.apply #[[$convLowerBound]]()[%[[dim4]]]
+// CHECKLOOP: %[[upperBound2:.*]] = affine.apply #[[$convUpperBound]]()[%[[dim4]], %[[dim1]]]
+// CHECKLOOP: %[[lowerBound3:.*]] = affine.apply #[[$convLowerBound]]()[%[[dim5]]]
+// CHECKLOOP: %[[upperBound3:.*]] = affine.apply #[[$convUpperBound]]()[%[[dim5]], %[[dim2]]]
+// CHECKLOOP: scf.for %[[i0:.*]] = %[[lowerBound1]] to %[[upperBound1]] step %{{.*}} {
+// CHECKLOOP: scf.for %[[i1:.*]] = %[[lowerBound2]] to %[[upperBound2]] step %{{.*}} {
+// CHECKLOOP: scf.for %[[i2:.*]] = %[[lowerBound3]] to %[[upperBound3]] step %{{.*}} {
+// CHECKLOOP: scf.for %[[i3:.*]] = %{{.*}} to %[[dim3]] step %{{.*}} {
+// CHECKLOOP: scf.for %[[i4:.*]] = %{{.*}} to %[[dim4]] step %{{.*}} {
+// CHECKLOOP: scf.for %[[i5:.*]] = %{{.*}} to %[[dim5]] step %{{.*}} {
// CHECKLOOP: %[[dim6:.*]] = dim %[[arg1]], %[[c0]] : memref<?x?x?xf32>
// CHECKLOOP: %[[dim7:.*]] = dim %[[arg1]], %[[c1]] : memref<?x?x?xf32>
// CHECKLOOP: %[[dim8:.*]] = dim %[[arg1]], %[[c2]] : memref<?x?x?xf32>
@@ -1121,13 +1143,19 @@ func @conv3d(%in : memref<?x?x?xf32>, %filter : memref<?x?x?xf32>, %out : memre
// CHECKPARALLEL: %[[c0:.*]] = constant 0 : index
// CHECKPARALLEL: %[[c1:.*]] = constant 1 : index
// CHECKPARALLEL: %[[c2:.*]] = constant 2 : index
-// CHECKPARALLEL: %[[dim0:.*]] = dim %[[arg1]], %[[c0]] : memref<?x?x?xf32>
-// CHECKPARALLEL: %[[dim1:.*]] = dim %[[arg1]], %[[c1]] : memref<?x?x?xf32>
-// CHECKPARALLEL: %[[dim2:.*]] = dim %[[arg1]], %[[c2]] : memref<?x?x?xf32>
-// CHECKPARALLEL: %[[dim3:.*]] = dim %[[arg2]], %[[c0]] : memref<?x?x?xf32>
-// CHECKPARALLEL: %[[dim4:.*]] = dim %[[arg2]], %[[c1]] : memref<?x?x?xf32>
-// CHECKPARALLEL: %[[dim5:.*]] = dim %[[arg2]], %[[c2]] : memref<?x?x?xf32>
-// CHECKPARALLEL: scf.parallel (%[[i0:.*]], %[[i1:.*]], %[[i2:.*]], %[[i3:.*]], %[[i4:.*]], %[[i5:.*]]) = (%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) to (%[[dim3]], %[[dim4]], %[[dim5]], %[[dim0]], %[[dim1]], %[[dim2]]) step ({{.*}}) {
+// CHECKPARALLEL: %[[dim0:.*]] = dim %[[arg0]], %[[c0]] : memref<?x?x?xf32>
+// CHECKPARALLEL: %[[dim1:.*]] = dim %[[arg0]], %[[c1]] : memref<?x?x?xf32>
+// CHECKPARALLEL: %[[dim2:.*]] = dim %[[arg0]], %[[c2]] : memref<?x?x?xf32>
+// CHECKPARALLEL: %[[dim3:.*]] = dim %[[arg1]], %[[c0]] : memref<?x?x?xf32>
+// CHECKPARALLEL: %[[dim4:.*]] = dim %[[arg1]], %[[c1]] : memref<?x?x?xf32>
+// CHECKPARALLEL: %[[dim5:.*]] = dim %[[arg1]], %[[c2]] : memref<?x?x?xf32>
+// CHECKPARALLEL: %[[lowerBound1:.*]] = affine.apply #[[$convLowerBound]]()[%[[dim3]]]
+// CHECKPARALLEL: %[[upperBound1:.*]] = affine.apply #[[$convUpperBound]]()[%[[dim3]], %[[dim0]]]
+// CHECKPARALLEL: %[[lowerBound2:.*]] = affine.apply #[[$convLowerBound]]()[%[[dim4]]]
+// CHECKPARALLEL: %[[upperBound2:.*]] = affine.apply #[[$convUpperBound]]()[%[[dim4]], %[[dim1]]]
+// CHECKPARALLEL: %[[lowerBound3:.*]] = affine.apply #[[$convLowerBound]]()[%[[dim5]]]
+// CHECKPARALLEL: %[[upperBound3:.*]] = affine.apply #[[$convUpperBound]]()[%[[dim5]], %[[dim2]]]
+// CHECKPARALLEL: scf.parallel (%[[i0:.*]], %[[i1:.*]], %[[i2:.*]], %[[i3:.*]], %[[i4:.*]], %[[i5:.*]]) = (%[[lowerBound1]], %[[lowerBound2]], %[[lowerBound3]], %{{.*}}, %{{.*}}, %{{.*}}) to (%[[upperBound1]], %[[upperBound2]], %[[upperBound3]], %[[dim3]], %[[dim4]], %[[dim5]]) step ({{.*}}) {
// CHECKPARALLEL: %[[dim6:.*]] = dim %[[arg1]], %[[c0]] : memref<?x?x?xf32>
// CHECKPARALLEL: %[[dim7:.*]] = dim %[[arg1]], %[[c1]] : memref<?x?x?xf32>
// CHECKPARALLEL: %[[dim8:.*]] = dim %[[arg1]], %[[c2]] : memref<?x?x?xf32>
@@ -1178,22 +1206,30 @@ func @conv4d(%in : memref<?x?x?x?xf32>, %filter : memref<?x?x?x?xf32>, %out : m
// CHECKLOOP: %[[c1:.*]] = constant 1 : index
// CHECKLOOP: %[[c2:.*]] = constant 2 : index
// CHECKLOOP: %[[c3:.*]] = constant 3 : index
-// CHECKLOOP: %[[dim0:.*]] = dim %[[arg1]], %[[c0]] : memref<?x?x?x?xf32>
-// CHECKLOOP: %[[dim1:.*]] = dim %[[arg1]], %[[c1]] : memref<?x?x?x?xf32>
-// CHECKLOOP: %[[dim2:.*]] = dim %[[arg1]], %[[c2]] : memref<?x?x?x?xf32>
-// CHECKLOOP: %[[dim3:.*]] = dim %[[arg1]], %[[c3]] : memref<?x?x?x?xf32>
-// CHECKLOOP: %[[dim4:.*]] = dim %[[arg2]], %[[c0]] : memref<?x?x?x?xf32>
-// CHECKLOOP: %[[dim5:.*]] = dim %[[arg2]], %[[c1]] : memref<?x?x?x?xf32>
-// CHECKLOOP: %[[dim6:.*]] = dim %[[arg2]], %[[c2]] : memref<?x?x?x?xf32>
-// CHECKLOOP: %[[dim7:.*]] = dim %[[arg2]], %[[c3]] : memref<?x?x?x?xf32>
-// CHECKLOOP: scf.for %[[i0:.*]] = %{{.*}} to %[[dim4]] step %{{.*}} {
-// CHECKLOOP: scf.for %[[i1:.*]] = %{{.*}} to %[[dim5]] step %{{.*}} {
-// CHECKLOOP: scf.for %[[i2:.*]] = %{{.*}} to %[[dim6]] step %{{.*}} {
-// CHECKLOOP: scf.for %[[i3:.*]] = %{{.*}} to %[[dim7]] step %{{.*}} {
-// CHECKLOOP: scf.for %[[i4:.*]] = %{{.*}} to %[[dim0]] step %{{.*}} {
-// CHECKLOOP: scf.for %[[i5:.*]] = %{{.*}} to %[[dim1]] step %{{.*}} {
-// CHECKLOOP: scf.for %[[i6:.*]] = %{{.*}} to %[[dim2]] step %{{.*}} {
-// CHECKLOOP: scf.for %[[i7:.*]] = %{{.*}} to %[[dim3]] step %{{.*}} {
+// CHECKLOOP: %[[dim0:.*]] = dim %[[arg0]], %[[c0]] : memref<?x?x?x?xf32>
+// CHECKLOOP: %[[dim1:.*]] = dim %[[arg0]], %[[c1]] : memref<?x?x?x?xf32>
+// CHECKLOOP: %[[dim2:.*]] = dim %[[arg0]], %[[c2]] : memref<?x?x?x?xf32>
+// CHECKLOOP: %[[dim3:.*]] = dim %[[arg0]], %[[c3]] : memref<?x?x?x?xf32>
+// CHECKLOOP: %[[dim4:.*]] = dim %[[arg1]], %[[c0]] : memref<?x?x?x?xf32>
+// CHECKLOOP: %[[dim5:.*]] = dim %[[arg1]], %[[c1]] : memref<?x?x?x?xf32>
+// CHECKLOOP: %[[dim6:.*]] = dim %[[arg1]], %[[c2]] : memref<?x?x?x?xf32>
+// CHECKLOOP: %[[dim7:.*]] = dim %[[arg1]], %[[c3]] : memref<?x?x?x?xf32>
+// CHECKLOOP: %[[lowerBound1:.*]] = affine.apply #[[$convLowerBound]]()[%[[dim4]]]
+// CHECKLOOP: %[[upperBound1:.*]] = affine.apply #[[$convUpperBound]]()[%[[dim4]], %[[dim0]]]
+// CHECKLOOP: %[[lowerBound2:.*]] = affine.apply #[[$convLowerBound]]()[%[[dim5]]]
+// CHECKLOOP: %[[upperBound2:.*]] = affine.apply #[[$convUpperBound]]()[%[[dim5]], %[[dim1]]]
+// CHECKLOOP: %[[lowerBound3:.*]] = affine.apply #[[$convLowerBound]]()[%[[dim6]]]
+// CHECKLOOP: %[[upperBound3:.*]] = affine.apply #[[$convUpperBound]]()[%[[dim6]], %[[dim2]]]
+// CHECKLOOP: %[[lowerBound4:.*]] = affine.apply #[[$convLowerBound]]()[%[[dim7]]]
+// CHECKLOOP: %[[upperBound4:.*]] = affine.apply #[[$convUpperBound]]()[%[[dim7]], %[[dim3]]]
+// CHECKLOOP: scf.for %[[i0:.*]] = %[[lowerBound1]] to %[[upperBound1]] step %{{.*}} {
+// CHECKLOOP: scf.for %[[i1:.*]] = %[[lowerBound2]] to %[[upperBound2]] step %{{.*}} {
+// CHECKLOOP: scf.for %[[i2:.*]] = %[[lowerBound3]] to %[[upperBound3]] step %{{.*}} {
+// CHECKLOOP: scf.for %[[i3:.*]] = %[[lowerBound4]] to %[[upperBound4]] step %{{.*}} {
+// CHECKLOOP: scf.for %[[i4:.*]] = %{{.*}} to %[[dim4]] step %{{.*}} {
+// CHECKLOOP: scf.for %[[i5:.*]] = %{{.*}} to %[[dim5]] step %{{.*}} {
+// CHECKLOOP: scf.for %[[i6:.*]] = %{{.*}} to %[[dim6]] step %{{.*}} {
+// CHECKLOOP: scf.for %[[i7:.*]] = %{{.*}} to %[[dim7]] step %{{.*}} {
// CHECKLOOP: %[[dim8:.*]] = dim %[[arg1]], %[[c0]] : memref<?x?x?x?xf32>
// CHECKLOOP: %[[dim9:.*]] = dim %[[arg1]], %[[c1]] : memref<?x?x?x?xf32>
// CHECKLOOP: %[[dim10:.*]] = dim %[[arg1]], %[[c2]] : memref<?x?x?x?xf32>
@@ -1217,15 +1253,23 @@ func @conv4d(%in : memref<?x?x?x?xf32>, %filter : memref<?x?x?x?xf32>, %out : m
// CHECKPARALLEL: %[[c1:.*]] = constant 1 : index
// CHECKPARALLEL: %[[c2:.*]] = constant 2 : index
// CHECKPARALLEL: %[[c3:.*]] = constant 3 : index
-// CHECKPARALLEL: %[[dim0:.*]] = dim %[[arg1]], %[[c0]] : memref<?x?x?x?xf32>
-// CHECKPARALLEL: %[[dim1:.*]] = dim %[[arg1]], %[[c1]] : memref<?x?x?x?xf32>
-// CHECKPARALLEL: %[[dim2:.*]] = dim %[[arg1]], %[[c2]] : memref<?x?x?x?xf32>
-// CHECKPARALLEL: %[[dim3:.*]] = dim %[[arg1]], %[[c3]] : memref<?x?x?x?xf32>
-// CHECKPARALLEL: %[[dim4:.*]] = dim %[[arg2]], %[[c0]] : memref<?x?x?x?xf32>
-// CHECKPARALLEL: %[[dim5:.*]] = dim %[[arg2]], %[[c1]] : memref<?x?x?x?xf32>
-// CHECKPARALLEL: %[[dim6:.*]] = dim %[[arg2]], %[[c2]] : memref<?x?x?x?xf32>
-// CHECKPARALLEL: %[[dim7:.*]] = dim %[[arg2]], %[[c3]] : memref<?x?x?x?xf32>
-// CHECKPARALLEL: scf.parallel (%[[i0:.*]], %[[i1:.*]], %[[i2:.*]], %[[i3:.*]], %[[i4:.*]], %[[i5:.*]], %[[i6:.*]], %[[i7:.*]]) = (%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) to (%[[dim4]], %[[dim5]], %[[dim6]], %[[dim7]], %[[dim0]], %[[dim1]], %[[dim2]], %[[dim3]]) step ({{.*}}) {
+// CHECKPARALLEL: %[[dim0:.*]] = dim %[[arg0]], %[[c0]] : memref<?x?x?x?xf32>
+// CHECKPARALLEL: %[[dim1:.*]] = dim %[[arg0]], %[[c1]] : memref<?x?x?x?xf32>
+// CHECKPARALLEL: %[[dim2:.*]] = dim %[[arg0]], %[[c2]] : memref<?x?x?x?xf32>
+// CHECKPARALLEL: %[[dim3:.*]] = dim %[[arg0]], %[[c3]] : memref<?x?x?x?xf32>
+// CHECKPARALLEL: %[[dim4:.*]] = dim %[[arg1]], %[[c0]] : memref<?x?x?x?xf32>
+// CHECKPARALLEL: %[[dim5:.*]] = dim %[[arg1]], %[[c1]] : memref<?x?x?x?xf32>
+// CHECKPARALLEL: %[[dim6:.*]] = dim %[[arg1]], %[[c2]] : memref<?x?x?x?xf32>
+// CHECKPARALLEL: %[[dim7:.*]] = dim %[[arg1]], %[[c3]] : memref<?x?x?x?xf32>
+// CHECKPARALLEL: %[[lowerBound1:.*]] = affine.apply #[[$convLowerBound]]()[%[[dim4]]]
+// CHECKPARALLEL: %[[upperBound1:.*]] = affine.apply #[[$convUpperBound]]()[%[[dim4]], %[[dim0]]]
+// CHECKPARALLEL: %[[lowerBound2:.*]] = affine.apply #[[$convLowerBound]]()[%[[dim5]]]
+// CHECKPARALLEL: %[[upperBound2:.*]] = affine.apply #[[$convUpperBound]]()[%[[dim5]], %[[dim1]]]
+// CHECKPARALLEL: %[[lowerBound3:.*]] = affine.apply #[[$convLowerBound]]()[%[[dim6]]]
+// CHECKPARALLEL: %[[upperBound3:.*]] = affine.apply #[[$convUpperBound]]()[%[[dim6]], %[[dim2]]]
+// CHECKPARALLEL: %[[lowerBound4:.*]] = affine.apply #[[$convLowerBound]]()[%[[dim7]]]
+// CHECKPARALLEL: %[[upperBound4:.*]] = affine.apply #[[$convUpperBound]]()[%[[dim7]], %[[dim3]]]
+// CHECKPARALLEL: scf.parallel (%[[i0:.*]], %[[i1:.*]], %[[i2:.*]], %[[i3:.*]], %[[i4:.*]], %[[i5:.*]], %[[i6:.*]], %[[i7:.*]]) = (%[[lowerBound1]], %[[lowerBound2]], %[[lowerBound3]], %[[lowerBound4]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) to (%[[upperBound1]], %[[upperBound2]], %[[upperBound3]], %[[upperBound4]], %[[dim4]], %[[dim5]], %[[dim6]], %[[dim7]]) step ({{.*}}) {
// CHECKPARALLEL: %[[dim8:.*]] = dim %[[arg1]], %[[c0]] : memref<?x?x?x?xf32>
// CHECKPARALLEL: %[[dim9:.*]] = dim %[[arg1]], %[[c1]] : memref<?x?x?x?xf32>
// CHECKPARALLEL: %[[dim10:.*]] = dim %[[arg1]], %[[c2]] : memref<?x?x?x?xf32>
More information about the Mlir-commits
mailing list