[Mlir-commits] [mlir] 54db8cc - [mlir][vector] Remove ExtractMap/InsertMap operations
Thomas Raoux
llvmlistbot at llvm.org
Fri Sep 16 10:41:39 PDT 2022
Author: Thomas Raoux
Date: 2022-09-16T17:41:26Z
New Revision: 54db8cc7b1054fa3ed64953652ddc941e12aee63
URL: https://github.com/llvm/llvm-project/commit/54db8cc7b1054fa3ed64953652ddc941e12aee63
DIFF: https://github.com/llvm/llvm-project/commit/54db8cc7b1054fa3ed64953652ddc941e12aee63.diff
LOG: [mlir][vector] Remove ExtractMap/InsertMap operations
As discussed on discourse: https://discourse.llvm.org/t/vector-vector-distribution-large-vector-to-small-vector/1983/22
removing insert_map/extract_map op as vector distribution now uses
warp_execute_on_lane_0 op.
Differential Revision: https://reviews.llvm.org/D134000
Added:
mlir/lib/Dialect/Vector/Transforms/VectorUnroll.cpp
Modified:
mlir/include/mlir/Dialect/Vector/IR/VectorOps.h
mlir/include/mlir/Dialect/Vector/IR/VectorOps.td
mlir/include/mlir/Dialect/Vector/Transforms/VectorTransforms.h
mlir/lib/Dialect/Vector/IR/VectorOps.cpp
mlir/lib/Dialect/Vector/Transforms/CMakeLists.txt
mlir/lib/Dialect/Vector/Transforms/VectorTransforms.cpp
mlir/test/Dialect/Vector/invalid.mlir
mlir/test/Dialect/Vector/ops.mlir
mlir/test/lib/Dialect/Vector/TestVectorTransforms.cpp
Removed:
mlir/lib/Dialect/Vector/Transforms/VectorUnrollDistribute.cpp
mlir/test/Dialect/Vector/vector-distribution.mlir
mlir/test/Integration/Dialect/Vector/CPU/test-vector-distribute.mlir
################################################################################
diff --git a/mlir/include/mlir/Dialect/Vector/IR/VectorOps.h b/mlir/include/mlir/Dialect/Vector/IR/VectorOps.h
index 61a68449f83c7..6f1f361005904 100644
--- a/mlir/include/mlir/Dialect/Vector/IR/VectorOps.h
+++ b/mlir/include/mlir/Dialect/Vector/IR/VectorOps.h
@@ -119,11 +119,6 @@ void populateVectorMaskMaterializationPatterns(RewritePatternSet &patterns,
bool force32BitVectorIndices,
PatternBenefit benefit = 1);
-/// Collect a set of patterns to propagate insert_map/extract_map in the ssa
-/// chain.
-void populatePropagateVectorDistributionPatterns(RewritePatternSet &patterns,
- PatternBenefit benefit = 1);
-
/// Collects patterns to progressively lower vector.broadcast ops on high-D
/// vectors to low-D vector ops.
void populateVectorBroadcastLoweringPatterns(RewritePatternSet &patterns,
diff --git a/mlir/include/mlir/Dialect/Vector/IR/VectorOps.td b/mlir/include/mlir/Dialect/Vector/IR/VectorOps.td
index 07f1b21a24b88..688056aeabf2a 100644
--- a/mlir/include/mlir/Dialect/Vector/IR/VectorOps.td
+++ b/mlir/include/mlir/Dialect/Vector/IR/VectorOps.td
@@ -599,75 +599,6 @@ def Vector_ExtractOp :
let hasVerifier = 1;
}
-def Vector_ExtractMapOp :
- Vector_Op<"extract_map", [NoSideEffect]>,
- Arguments<(ins AnyVector:$vector, Variadic<Index>:$ids)>,
- Results<(outs AnyVector)> {
- let summary = "vector extract map operation";
- let description = [{
- Takes an N-D vector and extracts a sub-part of the vector starting at id
- along each dimension.
-
- The dimension associated to each element of `ids` used to extract are
- implicitly deduced from the destination type. For each dimension the
- multiplicity is the destination dimension size divided by the source
- dimension size, each dimension with a multiplicity greater than 1 is
- associated to the next id, following ids order.
- For example if the source type is `vector<64x4x32xf32>` and the destination
- type is `vector<4x4x2xf32>`, the first id maps to dimension 0 and the second
- id to dimension 2.
-
- Similarly to vector.tuple_get, this operation is used for progressive
- lowering and should be folded away before converting to LLVM.
-
- It is
diff erent than `vector.extract_slice` and
- `vector.extract_strided_slice` as it takes a Value as index instead of an
- attribute. Also in the future it is meant to support extracting along any
- dimensions and not only the most major ones.
-
- For instance:
- ```
- // dynamic computation producing the value 0 of index type
- %idx0 = ... : index
- // dynamic computation producing the value 1 of index type
- %idx1 = ... : index
- %0 = arith.constant dense<0, 1, 2, 3>: vector<4xi32>
- // extracts values [0, 1]
- %1 = vector.extract_map %0[%idx0] : vector<4xi32> to vector<2xi32>
- // extracts values [1, 2]
- %2 = vector.extract_map %0[%idx1] : vector<4xi32> to vector<2xi32>
- ```
-
- Example:
-
- ```mlir
- %ev = vector.extract_map %v[%id] : vector<32xf32> to vector<1xf32>
- %ev1 = vector.extract_map %v1[%id1, %id2] : vector<64x4x32xf32>
- to vector<4x4x2xf32>
- ```
- }];
- let builders = [
- OpBuilder<(ins "Value":$vector, "ValueRange":$ids,
- "ArrayRef<int64_t>":$multiplicity,
- "AffineMap":$map)>];
- let extraClassDeclaration = [{
- VectorType getSourceVectorType() {
- return getVector().getType().cast<VectorType>();
- }
- VectorType getResultType() {
- return getResult().getType().cast<VectorType>();
- }
- void getMultiplicity(SmallVectorImpl<int64_t> &multiplicity);
- AffineMap map();
- }];
- let assemblyFormat = [{
- $vector `[` $ids `]` attr-dict `:` type($vector) `to` type(results)
- }];
-
- let hasFolder = 1;
- let hasVerifier = 1;
-}
-
def Vector_FMAOp :
Op<Vector_Dialect, "fma", [
NoSideEffect, AllTypesMatch<["lhs", "rhs", "acc", "result"]>,
@@ -790,72 +721,6 @@ def Vector_InsertOp :
let hasVerifier = 1;
}
-def Vector_InsertMapOp :
- Vector_Op<"insert_map", [NoSideEffect, AllTypesMatch<["dest", "result"]>]>,
- Arguments<(ins AnyVector:$vector, AnyVector:$dest, Variadic<Index>:$ids)>,
- Results<(outs AnyVector:$result)> {
- let summary = "vector insert map operation";
- let description = [{
- Inserts a N-D vector and within a larger vector starting at id. The new
- vector created will have the same size as the destination operand vector.
-
- The dimension associated to each element of `ids` used to insert is
- implicitly deduced from the source type (see `ExtractMapOp` for details).
- For example if source type is `vector<4x4x2xf32>` and the destination type
- is `vector<64x4x32xf32>`, the first id maps to dimension 0 and the second id
- to dimension 2.
-
- Similarly to vector.tuple_get, this operation is used for progressive
- lowering and should be folded away before converting to LLVM.
-
- It is
diff erent than `vector.insert` and `vector.insert_strided_slice` as it
- takes a Value as index instead of an attribute. Also in the future it is
- meant to support inserting along any dimensions and not only the most major
- ones.
-
- This operations is meant to be used in combination with vector.extract_map.
-
- For instance:
- ```
- // dynamic computation producing the value 0 of index type
- %idx0 = ... : index
- // dynamic computation producing the value 1 of index type
- %idx1 = ... : index /
- %0 = arith.constant dense<0, 1, 2, 3>: vector<4xi32>
- // extracts values [0, 1]
- %1 = vector.extract_map %0[%idx0] : vector<4xi32> to vector<2xi32>
- // extracts values [1, 2]
- %2 = vector.extract_map %0[%idx1] : vector<4xi32> to vector<2xi32>
- // insert [0, 1] into [x, x, x, x] and produce [0, 1, x, x]
- %3 = vector.insert_map %1, %0[%idx0] : vector<2xi32> into vector<4xi32>
- // insert [1, 2] into [x, x, x, x] and produce [x, 1, 2, x]
- %4 = vector.insert_map %2, %0[%idx1] : vector<2xi32> into vector<4xi32>
- ```
- Example:
-
- ```mlir
- %v = vector.insert_map %ev %v[%id] : vector<1xf32> into vector<32xf32>
- %v1 = vector.insert_map %ev1, %v1[%arg0, %arg1] : vector<2x4x1xf32>
- into vector<64x4x32xf32>
- ```
- }];
- let extraClassDeclaration = [{
- VectorType getSourceVectorType() {
- return getVector().getType().cast<VectorType>();
- }
- VectorType getResultType() {
- return getResult().getType().cast<VectorType>();
- }
- // Return a map indicating the dimension mapping to the given ids.
- AffineMap map();
- }];
- let assemblyFormat = [{
- $vector `,` $dest `[` $ids `]` attr-dict
- `:` type($vector) `into` type($result)
- }];
- let hasVerifier = 1;
-}
-
def Vector_InsertStridedSliceOp :
Vector_Op<"insert_strided_slice", [NoSideEffect,
PredOpTrait<"operand #0 and result have same element type",
diff --git a/mlir/include/mlir/Dialect/Vector/Transforms/VectorTransforms.h b/mlir/include/mlir/Dialect/Vector/Transforms/VectorTransforms.h
index 3bb9a22caf69e..947911f9a3841 100644
--- a/mlir/include/mlir/Dialect/Vector/Transforms/VectorTransforms.h
+++ b/mlir/include/mlir/Dialect/Vector/Transforms/VectorTransforms.h
@@ -65,28 +65,6 @@ LogicalResult splitFullAndPartialTransfer(
VectorTransformsOptions options = VectorTransformsOptions(),
scf::IfOp *ifOp = nullptr);
-struct DistributeOps {
- ExtractMapOp extract;
- InsertMapOp insert;
-};
-
-/// Distribute a N-D vector pointwise operation over a range of given ids taking
-/// *all* values in [0 .. multiplicity - 1] (e.g. loop induction variable or
-/// SPMD id). This transformation only inserts
-/// vector.extract_map/vector.insert_map. It is meant to be used with
-/// canonicalizations pattern to propagate and fold the vector
-/// insert_map/extract_map operations.
-/// Transforms:
-// %v = arith.addf %a, %b : vector<32xf32>
-/// to:
-/// %v = arith.addf %a, %b : vector<32xf32>
-/// %ev = vector.extract_map %v, %id, 32 : vector<32xf32> into vector<1xf32>
-/// %nv = vector.insert_map %ev, %id, 32 : vector<1xf32> into vector<32xf32>
-Optional<DistributeOps>
-distributPointwiseVectorOp(OpBuilder &builder, Operation *op,
- ArrayRef<Value> id, ArrayRef<int64_t> multiplicity,
- const AffineMap &map);
-
/// Implements transfer op write to read forwarding and dead transfer write
/// optimizations.
void transferOpflowOpt(Operation *rootOp);
diff --git a/mlir/lib/Dialect/Vector/IR/VectorOps.cpp b/mlir/lib/Dialect/Vector/IR/VectorOps.cpp
index 65cb37737f329..fad69e65e62c5 100644
--- a/mlir/lib/Dialect/Vector/IR/VectorOps.cpp
+++ b/mlir/lib/Dialect/Vector/IR/VectorOps.cpp
@@ -1630,81 +1630,6 @@ static void populateFromInt64AttrArray(ArrayAttr arrayAttr,
results.push_back(attr.cast<IntegerAttr>().getInt());
}
-//===----------------------------------------------------------------------===//
-// ExtractMapOp
-//===----------------------------------------------------------------------===//
-
-void ExtractMapOp::build(OpBuilder &builder, OperationState &result,
- Value vector, ValueRange ids,
- ArrayRef<int64_t> multiplicity,
- AffineMap permutationMap) {
- assert(ids.size() == multiplicity.size() &&
- ids.size() == permutationMap.getNumResults());
- assert(permutationMap.isProjectedPermutation());
- VectorType type = vector.getType().cast<VectorType>();
- SmallVector<int64_t, 4> newShape(type.getShape().begin(),
- type.getShape().end());
- for (unsigned i = 0, e = permutationMap.getNumResults(); i < e; i++) {
- AffineExpr expr = permutationMap.getResult(i);
- auto dim = expr.cast<AffineDimExpr>();
- newShape[dim.getPosition()] = newShape[dim.getPosition()] / multiplicity[i];
- }
- VectorType resultType = VectorType::get(newShape, type.getElementType());
- ExtractMapOp::build(builder, result, resultType, vector, ids);
-}
-
-LogicalResult ExtractMapOp::verify() {
- if (getSourceVectorType().getRank() != getResultType().getRank())
- return emitOpError("expected source and destination vectors of same rank");
- unsigned numId = 0;
- for (unsigned i = 0, e = getSourceVectorType().getRank(); i < e; ++i) {
- if (getSourceVectorType().getDimSize(i) % getResultType().getDimSize(i) !=
- 0)
- return emitOpError("source vector dimensions must be a multiple of "
- "destination vector dimensions");
- if (getSourceVectorType().getDimSize(i) != getResultType().getDimSize(i))
- numId++;
- }
- if (numId != getIds().size())
- return emitOpError("expected number of ids must match the number of "
- "dimensions distributed");
- return success();
-}
-
-OpFoldResult ExtractMapOp::fold(ArrayRef<Attribute> operands) {
- auto insert = getVector().getDefiningOp<vector::InsertMapOp>();
- if (insert == nullptr || getType() != insert.getVector().getType() ||
- getIds() != insert.getIds())
- return {};
- return insert.getVector();
-}
-
-void ExtractMapOp::getMultiplicity(SmallVectorImpl<int64_t> &multiplicity) {
- assert(multiplicity.empty());
- for (unsigned i = 0, e = getSourceVectorType().getRank(); i < e; i++) {
- if (getSourceVectorType().getDimSize(i) != getResultType().getDimSize(i))
- multiplicity.push_back(getSourceVectorType().getDimSize(i) /
- getResultType().getDimSize(i));
- }
-}
-
-template <typename MapOp>
-AffineMap calculateImplicitMap(MapOp op) {
- SmallVector<AffineExpr, 4> perm;
- // Check which dimension have a multiplicity greater than 1 and associated
- // them to the IDs in order.
- for (unsigned i = 0, e = op.getSourceVectorType().getRank(); i < e; i++) {
- if (op.getSourceVectorType().getDimSize(i) !=
- op.getResultType().getDimSize(i))
- perm.push_back(getAffineDimExpr(i, op.getContext()));
- }
- auto map = AffineMap::get(op.getSourceVectorType().getRank(), 0, perm,
- op.getContext());
- return map;
-}
-
-AffineMap ExtractMapOp::map() { return calculateImplicitMap(*this); }
-
//===----------------------------------------------------------------------===//
// FmaOp
//===----------------------------------------------------------------------===//
@@ -2133,30 +2058,6 @@ OpFoldResult vector::InsertOp::fold(ArrayRef<Attribute> operands) {
return {};
}
-//===----------------------------------------------------------------------===//
-// InsertMapOp
-//===----------------------------------------------------------------------===//
-
-LogicalResult InsertMapOp::verify() {
- if (getSourceVectorType().getRank() != getResultType().getRank())
- return emitOpError("expected source and destination vectors of same rank");
- unsigned numId = 0;
- for (unsigned i = 0, e = getResultType().getRank(); i < e; i++) {
- if (getResultType().getDimSize(i) % getSourceVectorType().getDimSize(i) !=
- 0)
- return emitOpError(
- "destination vector size must be a multiple of source vector size");
- if (getResultType().getDimSize(i) != getSourceVectorType().getDimSize(i))
- numId++;
- }
- if (numId != getIds().size())
- return emitOpError("expected number of ids must match the number of "
- "dimensions distributed");
- return success();
-}
-
-AffineMap InsertMapOp::map() { return calculateImplicitMap(*this); }
-
//===----------------------------------------------------------------------===//
// InsertStridedSliceOp
//===----------------------------------------------------------------------===//
diff --git a/mlir/lib/Dialect/Vector/Transforms/CMakeLists.txt b/mlir/lib/Dialect/Vector/Transforms/CMakeLists.txt
index 01a79d49b1679..de9ec991df0e9 100644
--- a/mlir/lib/Dialect/Vector/Transforms/CMakeLists.txt
+++ b/mlir/lib/Dialect/Vector/Transforms/CMakeLists.txt
@@ -9,7 +9,7 @@ add_mlir_dialect_library(MLIRVectorTransforms
VectorTransferSplitRewritePatterns.cpp
VectorTransferPermutationMapRewritePatterns.cpp
VectorTransforms.cpp
- VectorUnrollDistribute.cpp
+ VectorUnroll.cpp
ADDITIONAL_HEADER_DIRS
${MLIR_MAIN_INCLUDE_DIR}/mlir/Dialect/Vector/Transforms
diff --git a/mlir/lib/Dialect/Vector/Transforms/VectorTransforms.cpp b/mlir/lib/Dialect/Vector/Transforms/VectorTransforms.cpp
index 2bd6756fd77aa..fded10629b307 100644
--- a/mlir/lib/Dialect/Vector/Transforms/VectorTransforms.cpp
+++ b/mlir/lib/Dialect/Vector/Transforms/VectorTransforms.cpp
@@ -1998,37 +1998,6 @@ ContractionOpLowering::lowerReduction(vector::ContractionOp op,
} // namespace mlir
-Optional<mlir::vector::DistributeOps> mlir::vector::distributPointwiseVectorOp(
- OpBuilder &builder, Operation *op, ArrayRef<Value> ids,
- ArrayRef<int64_t> multiplicity, const AffineMap &map) {
- OpBuilder::InsertionGuard guard(builder);
- builder.setInsertionPointAfter(op);
- Location loc = op->getLoc();
- if (op->getNumResults() != 1)
- return {};
- Value result = op->getResult(0);
- VectorType type = op->getResult(0).getType().dyn_cast<VectorType>();
- if (!type || map.getNumResults() != multiplicity.size())
- return {};
- // For each dimension being distributed check that the size is a multiple of
- // the multiplicity. To handle more sizes we would need to support masking.
- unsigned multiplictyCount = 0;
- for (auto exp : map.getResults()) {
- auto affinExp = exp.dyn_cast<AffineDimExpr>();
- if (!affinExp || affinExp.getPosition() >= type.getRank() ||
- type.getDimSize(affinExp.getPosition()) %
- multiplicity[multiplictyCount++] !=
- 0)
- return {};
- }
- DistributeOps ops;
- ops.extract =
- builder.create<vector::ExtractMapOp>(loc, result, ids, multiplicity, map);
- ops.insert =
- builder.create<vector::InsertMapOp>(loc, ops.extract, result, ids);
- return ops;
-}
-
/// Progressive lowering of transfer_read. This pattern supports lowering of
/// `vector.transfer_read` to a combination of `vector.load` and
/// `vector.broadcast` if all of the following hold:
diff --git a/mlir/lib/Dialect/Vector/Transforms/VectorUnrollDistribute.cpp b/mlir/lib/Dialect/Vector/Transforms/VectorUnroll.cpp
similarity index 76%
rename from mlir/lib/Dialect/Vector/Transforms/VectorUnrollDistribute.cpp
rename to mlir/lib/Dialect/Vector/Transforms/VectorUnroll.cpp
index dc314aa2141f4..8c5d5144304e5 100644
--- a/mlir/lib/Dialect/Vector/Transforms/VectorUnrollDistribute.cpp
+++ b/mlir/lib/Dialect/Vector/Transforms/VectorUnroll.cpp
@@ -538,202 +538,6 @@ struct UnrollElementwisePattern : public RewritePattern {
vector::UnrollVectorOptions options;
};
-/// Canonicalize an extract_map using the result of a pointwise operation.
-/// Transforms:
-/// %v = arith.addf %a, %b : vector32xf32>
-/// %dv = vector.extract_map %v[%id] : vector<32xf32> to vector<1xf32>
-/// to:
-/// %da = vector.extract_map %a[%id] : vector<32xf32> to vector<1xf32>
-/// %db = vector.extract_map %a[%id] : vector<32xf32> to vector<1xf32>
-/// %dv = arith.addf %da, %db : vector<1xf32>
-struct PointwiseExtractPattern : public OpRewritePattern<vector::ExtractMapOp> {
- using OpRewritePattern::OpRewritePattern;
-
- LogicalResult matchAndRewrite(vector::ExtractMapOp extract,
- PatternRewriter &rewriter) const override {
- Operation *definedOp = extract.getVector().getDefiningOp();
- if (!definedOp || !OpTrait::hasElementwiseMappableTraits(definedOp) ||
- definedOp->getNumResults() != 1)
- return failure();
- Location loc = extract.getLoc();
- SmallVector<Value, 4> extractOperands;
- for (OpOperand &operand : definedOp->getOpOperands()) {
- auto vecType = operand.get().getType().template dyn_cast<VectorType>();
- if (!vecType) {
- extractOperands.push_back(operand.get());
- continue;
- }
- extractOperands.push_back(rewriter.create<vector::ExtractMapOp>(
- loc,
- VectorType::get(extract.getResultType().getShape(),
- vecType.getElementType()),
- operand.get(), extract.getIds()));
- }
- Operation *newOp = cloneOpWithOperandsAndTypes(
- rewriter, loc, definedOp, extractOperands, extract.getResultType());
- rewriter.replaceOp(extract, newOp->getResult(0));
- return success();
- }
-};
-
-/// Canonicalize an extract_map using the result of a contract operation.
-/// This propagate the extract_map to operands.
-struct ContractExtractPattern : public OpRewritePattern<vector::ExtractMapOp> {
- using OpRewritePattern::OpRewritePattern;
-
- LogicalResult matchAndRewrite(vector::ExtractMapOp extract,
- PatternRewriter &rewriter) const override {
- Operation *definedOp = extract.getVector().getDefiningOp();
- auto contract = dyn_cast_or_null<vector::ContractionOp>(definedOp);
- if (!contract)
- return failure();
- Location loc = contract.getLoc();
- unsigned accIndex = vector::ContractionOp::getAccOperandIndex();
- AffineMap affineMap = contract.getIndexingMapsArray()[accIndex];
- // Create a map of the dimensions distributed based on the acc affine map.
- // Only parallel dimensions are being distributed, reduction dimensions are
- // untouched.
- DenseMap<int64_t, int64_t> map;
- for (unsigned i : llvm::seq(unsigned(0), affineMap.getNumResults()))
- map[affineMap.getDimPosition(i)] = extract.getResultType().getDimSize(i);
- SmallVector<Value, 4> extractOperands;
- for (const auto &it : llvm::enumerate(contract.getIndexingMapsArray())) {
- // For each operands calculate the new vector type after distribution.
- Value operand = contract->getOperand(it.index());
- auto vecType = operand.getType().cast<VectorType>();
- SmallVector<int64_t> operandShape(vecType.getShape().begin(),
- vecType.getShape().end());
- for (unsigned i : llvm::seq(unsigned(0), it.value().getNumResults())) {
- unsigned dim = it.value().getDimPosition(i);
- auto distributedDim = map.find(dim);
- // If the dimension is not in the map it means it is a reduction and
- // doesn't get distributed.
- if (distributedDim == map.end())
- continue;
- operandShape[i] = distributedDim->second;
- }
- VectorType newVecType =
- VectorType::get(operandShape, vecType.getElementType());
- extractOperands.push_back(rewriter.create<vector::ExtractMapOp>(
- loc, newVecType, operand, extract.getIds()));
- }
- Operation *newOp =
- cloneOpWithOperandsAndTypes(rewriter, loc, definedOp, extractOperands,
- extract.getResult().getType());
- rewriter.replaceOp(extract, newOp->getResult(0));
- return success();
- }
-};
-
-/// Converts TransferRead op used by ExtractMap op into a smaller dimension
-/// TransferRead.
-/// Example:
-/// ```
-/// %a = vector.transfer_read %A[%c0, %c0, %c0], %cf0:
-/// memref<64x64x64xf32>, vector<64x4x32xf32>
-/// %e = vector.extract_map %a[%id] : vector<64x4x32xf32> to vector<2x4x1xf32>
-/// ```
-/// to:
-/// ```
-/// %id1 = affine.apply affine_map<()[s0] -> (s0 * 2)> (%id)
-/// %e = vector.transfer_read %A[%id1, %c0, %id1], %cf0 :
-/// memref<64x64x64xf32>, vector<2x4x1xf32>
-/// ```
-struct TransferReadExtractPattern
- : public OpRewritePattern<vector::TransferReadOp> {
- using OpRewritePattern::OpRewritePattern;
-
- LogicalResult matchAndRewrite(vector::TransferReadOp read,
- PatternRewriter &rewriter) const override {
- // TODO: support 0-d corner case.
- if (read.getTransferRank() == 0)
- return failure();
-
- if (!read.getResult().hasOneUse())
- return failure();
- auto extract =
- dyn_cast<vector::ExtractMapOp>(*read.getResult().getUsers().begin());
- if (!extract)
- return failure();
- if (read.getMask())
- return failure();
-
- SmallVector<Value, 4> indices(read.getIndices().begin(),
- read.getIndices().end());
- AffineMap indexMap = extract.map().compose(read.getPermutationMap());
- unsigned idCount = 0;
- ImplicitLocOpBuilder lb(read.getLoc(), rewriter);
- for (auto it :
- llvm::zip(indexMap.getResults(), extract.map().getResults())) {
- AffineExpr d0, d1;
- bindDims(read.getContext(), d0, d1);
- auto indexExpr = std::get<0>(it).dyn_cast<AffineDimExpr>();
- if (!indexExpr)
- continue;
- unsigned indexPos = indexExpr.getPosition();
- unsigned vectorPos = std::get<1>(it).cast<AffineDimExpr>().getPosition();
- auto scale = getAffineConstantExpr(
- extract.getResultType().getDimSize(vectorPos), read.getContext());
- indices[indexPos] = makeComposedAffineApply(
- rewriter, read.getLoc(), d0 + scale * d1,
- {indices[indexPos], extract.getIds()[idCount++]});
- }
- Value newRead = lb.create<vector::TransferReadOp>(
- extract.getType(), read.getSource(), indices,
- read.getPermutationMapAttr(), read.getPadding(), read.getMask(),
- read.getInBoundsAttr());
- Value dest = lb.create<arith::ConstantOp>(
- read.getType(), rewriter.getZeroAttr(read.getType()));
- newRead = lb.create<vector::InsertMapOp>(newRead, dest, extract.getIds());
- rewriter.replaceOp(read, newRead);
- return success();
- }
-};
-
-struct TransferWriteInsertPattern
- : public OpRewritePattern<vector::TransferWriteOp> {
- using OpRewritePattern::OpRewritePattern;
-
- LogicalResult matchAndRewrite(vector::TransferWriteOp write,
- PatternRewriter &rewriter) const override {
- // TODO: support 0-d corner case.
- if (write.getTransferRank() == 0)
- return failure();
-
- auto insert = write.getVector().getDefiningOp<vector::InsertMapOp>();
- if (!insert)
- return failure();
- if (write.getMask())
- return failure();
- SmallVector<Value, 4> indices(write.getIndices().begin(),
- write.getIndices().end());
- AffineMap indexMap = insert.map().compose(write.getPermutationMap());
- unsigned idCount = 0;
- Location loc = write.getLoc();
- for (auto it :
- llvm::zip(indexMap.getResults(), insert.map().getResults())) {
- AffineExpr d0, d1;
- bindDims(write.getContext(), d0, d1);
- auto indexExpr = std::get<0>(it).dyn_cast<AffineDimExpr>();
- if (!indexExpr)
- continue;
- unsigned indexPos = indexExpr.getPosition();
- unsigned vectorPos = std::get<1>(it).cast<AffineDimExpr>().getPosition();
- auto scale = getAffineConstantExpr(
- insert.getSourceVectorType().getDimSize(vectorPos),
- write.getContext());
- indices[indexPos] = makeComposedAffineApply(
- rewriter, loc, d0 + scale * d1,
- {indices[indexPos], insert.getIds()[idCount++]});
- }
- rewriter.create<vector::TransferWriteOp>(
- loc, insert.getVector(), write.getSource(), indices,
- write.getPermutationMapAttr(), write.getInBoundsAttr());
- rewriter.eraseOp(write);
- return success();
- }
-};
-
struct UnrollReductionPattern : public OpRewritePattern<vector::ReductionOp> {
UnrollReductionPattern(MLIRContext *context,
const vector::UnrollVectorOptions &options,
@@ -841,10 +645,3 @@ void mlir::vector::populateVectorUnrollPatterns(
UnrollReductionPattern, UnrollMultiReductionPattern,
UnrollTranposePattern>(patterns.getContext(), options, benefit);
}
-
-void mlir::vector::populatePropagateVectorDistributionPatterns(
- RewritePatternSet &patterns, PatternBenefit benefit) {
- patterns.add<PointwiseExtractPattern, ContractExtractPattern,
- TransferReadExtractPattern, TransferWriteInsertPattern>(
- patterns.getContext(), benefit);
-}
diff --git a/mlir/test/Dialect/Vector/invalid.mlir b/mlir/test/Dialect/Vector/invalid.mlir
index 59474bcd4b32f..5fbcefcf3414f 100644
--- a/mlir/test/Dialect/Vector/invalid.mlir
+++ b/mlir/test/Dialect/Vector/invalid.mlir
@@ -1470,48 +1470,6 @@ func.func @compress_memref_mismatch(%base: memref<?x?xf32>, %mask: vector<16xi1>
// -----
-func.func @extract_map_rank(%v: vector<32xf32>, %id : index) {
- // expected-error at +1 {{'vector.extract_map' op expected source and destination vectors of same rank}}
- %0 = vector.extract_map %v[%id] : vector<32xf32> to vector<2x1xf32>
-}
-
-// -----
-
-func.func @extract_map_size(%v: vector<63xf32>, %id : index) {
- // expected-error at +1 {{'vector.extract_map' op source vector dimensions must be a multiple of destination vector dimensions}}
- %0 = vector.extract_map %v[%id] : vector<63xf32> to vector<2xf32>
-}
-
-// -----
-
-func.func @extract_map_id(%v: vector<2x32xf32>, %id : index) {
- // expected-error at +1 {{'vector.extract_map' op expected number of ids must match the number of dimensions distributed}}
- %0 = vector.extract_map %v[%id] : vector<2x32xf32> to vector<1x1xf32>
-}
-
-// -----
-
-func.func @insert_map_rank(%v: vector<2x1xf32>, %v1: vector<32xf32>, %id : index) {
- // expected-error at +1 {{'vector.insert_map' op expected source and destination vectors of same rank}}
- %0 = vector.insert_map %v, %v1[%id] : vector<2x1xf32> into vector<32xf32>
-}
-
-// -----
-
-func.func @insert_map_size(%v: vector<3xf32>, %v1: vector<64xf32>, %id : index) {
- // expected-error at +1 {{'vector.insert_map' op destination vector size must be a multiple of source vector size}}
- %0 = vector.insert_map %v, %v1[%id] : vector<3xf32> into vector<64xf32>
-}
-
-// -----
-
-func.func @insert_map_id(%v: vector<2x1xf32>, %v1: vector<4x32xf32>, %id : index) {
- // expected-error at +1 {{'vector.insert_map' op expected number of ids must match the number of dimensions distributed}}
- %0 = vector.insert_map %v, %v1[%id] : vector<2x1xf32> into vector<4x32xf32>
-}
-
-// -----
-
func.func @scan_reduction_dim_constraint(%arg0: vector<2x3xi32>, %arg1: vector<3xi32>) -> vector<3xi32> {
// expected-error at +1 {{'vector.scan' op reduction dimension 5 has to be less than 2}}
%0:2 = vector.scan <add>, %arg0, %arg1 {inclusive = true, reduction_dim = 5} :
diff --git a/mlir/test/Dialect/Vector/ops.mlir b/mlir/test/Dialect/Vector/ops.mlir
index e026965eae648..c74a5903a2fbe 100644
--- a/mlir/test/Dialect/Vector/ops.mlir
+++ b/mlir/test/Dialect/Vector/ops.mlir
@@ -754,21 +754,6 @@ func.func @expand_and_compress2d(%base: memref<?x?xf32>, %mask: vector<16xi1>, %
return
}
-// CHECK-LABEL: @extract_insert_map
-func.func @extract_insert_map(%v: vector<32xf32>, %v2: vector<16x32xf32>,
- %id0 : index, %id1 : index) -> (vector<32xf32>, vector<16x32xf32>) {
- // CHECK: %[[V:.*]] = vector.extract_map %{{.*}}[%{{.*}}] : vector<32xf32> to vector<2xf32>
- %vd = vector.extract_map %v[%id0] : vector<32xf32> to vector<2xf32>
- // CHECK: %[[V1:.*]] = vector.extract_map %{{.*}}[%{{.*}}, %{{.*}}] : vector<16x32xf32> to vector<4x2xf32>
- %vd2 = vector.extract_map %v2[%id0, %id1] : vector<16x32xf32> to vector<4x2xf32>
- // CHECK: %[[R:.*]] = vector.insert_map %[[V]], %{{.*}}[%{{.*}}] : vector<2xf32> into vector<32xf32>
- %r = vector.insert_map %vd, %v[%id0] : vector<2xf32> into vector<32xf32>
- // CHECK: %[[R1:.*]] = vector.insert_map %[[V1]], %{{.*}}[%{{.*}}, %{{.*}}] : vector<4x2xf32> into vector<16x32xf32>
- %r2 = vector.insert_map %vd2, %v2[%id0, %id1] : vector<4x2xf32> into vector<16x32xf32>
- // CHECK: return %[[R]], %[[R1]] : vector<32xf32>, vector<16x32xf32>
- return %r, %r2 : vector<32xf32>, vector<16x32xf32>
-}
-
// CHECK-LABEL: @multi_reduction
func.func @multi_reduction(%0: vector<4x8x16x32xf32>, %acc0: vector<4x16xf32>,
%acc1: f32) -> f32 {
diff --git a/mlir/test/Dialect/Vector/vector-distribution.mlir b/mlir/test/Dialect/Vector/vector-distribution.mlir
deleted file mode 100644
index 2475e30fdbe8b..0000000000000
--- a/mlir/test/Dialect/Vector/vector-distribution.mlir
+++ /dev/null
@@ -1,204 +0,0 @@
-// RUN: mlir-opt %s -test-vector-distribute-patterns=distribution-multiplicity=32,1,32 -split-input-file | FileCheck %s
-// RUN: mlir-opt %s -test-vector-distribute-patterns=distribution-multiplicity=32,4 -split-input-file | FileCheck %s --check-prefix=CHECK2D
-
-// CHECK-LABEL: func @distribute_vector_add
-// CHECK-SAME: (%[[ID:.*]]: index
-// CHECK-NEXT: %[[ADDV:.*]] = arith.addf %{{.*}}, %{{.*}} : vector<32xf32>
-// CHECK-NEXT: %[[EXA:.*]] = vector.extract_map %{{.*}}[%[[ID]]] : vector<32xf32> to vector<1xf32>
-// CHECK-NEXT: %[[EXB:.*]] = vector.extract_map %{{.*}}[%[[ID]]] : vector<32xf32> to vector<1xf32>
-// CHECK-NEXT: %[[ADD:.*]] = arith.addf %[[EXA]], %[[EXB]] : vector<1xf32>
-// CHECK-NEXT: %[[INS:.*]] = vector.insert_map %[[ADD]], %[[ADDV]][%[[ID]]] : vector<1xf32> into vector<32xf32>
-// CHECK-NEXT: return %[[INS]] : vector<32xf32>
-func.func @distribute_vector_add(%id : index, %A: vector<32xf32>, %B: vector<32xf32>) -> vector<32xf32> {
- %0 = arith.addf %A, %B : vector<32xf32>
- return %0: vector<32xf32>
-}
-
-// -----
-
-// CHECK-LABEL: func @distribute_vector_add_exp
-// CHECK-SAME: (%[[ID:.*]]: index
-// CHECK-NEXT: %[[EXPV:.*]] = math.exp %{{.*}} : vector<32xf32>
-// CHECK-NEXT: %[[ADDV:.*]] = arith.addf %[[EXPV]], %{{.*}} : vector<32xf32>
-// CHECK-NEXT: %[[EXA:.*]] = vector.extract_map %{{.*}}[%[[ID]]] : vector<32xf32> to vector<1xf32>
-// CHECK-NEXT: %[[EXC:.*]] = math.exp %[[EXA]] : vector<1xf32>
-// CHECK-NEXT: %[[EXB:.*]] = vector.extract_map %{{.*}}[%[[ID]]] : vector<32xf32> to vector<1xf32>
-// CHECK-NEXT: %[[ADD:.*]] = arith.addf %[[EXC]], %[[EXB]] : vector<1xf32>
-// CHECK-NEXT: %[[INS:.*]] = vector.insert_map %[[ADD]], %[[ADDV]][%[[ID]]] : vector<1xf32> into vector<32xf32>
-// CHECK-NEXT: return %[[INS]] : vector<32xf32>
-func.func @distribute_vector_add_exp(%id : index, %A: vector<32xf32>, %B: vector<32xf32>) -> vector<32xf32> {
- %C = math.exp %A : vector<32xf32>
- %0 = arith.addf %C, %B : vector<32xf32>
- return %0: vector<32xf32>
-}
-
-// -----
-
-// CHECK-LABEL: func @vector_add_read_write
-// CHECK-SAME: (%[[ID:.*]]: index
-// CHECK: %[[EXA:.*]] = vector.transfer_read %{{.*}}[%[[ID]]], %{{.*}} : memref<32xf32>, vector<1xf32>
-// CHECK-NEXT: %[[EXB:.*]] = vector.transfer_read %{{.*}}[%[[ID]]], %{{.*}} : memref<32xf32>, vector<1xf32>
-// CHECK-NEXT: %[[ADD1:.*]] = arith.addf %[[EXA]], %[[EXB]] : vector<1xf32>
-// CHECK-NEXT: %[[EXC:.*]] = vector.transfer_read %{{.*}}[%[[ID]]], %{{.*}} : memref<32xf32>, vector<1xf32>
-// CHECK-NEXT: %[[ADD2:.*]] = arith.addf %[[ADD1]], %[[EXC]] : vector<1xf32>
-// CHECK-NEXT: vector.transfer_write %[[ADD2]], %{{.*}}[%[[ID]]] {{.*}} : vector<1xf32>, memref<32xf32>
-// CHECK-NEXT: return
-func.func @vector_add_read_write(%id : index, %A: memref<32xf32>, %B: memref<32xf32>, %C: memref<32xf32>, %D: memref<32xf32>) {
- %c0 = arith.constant 0 : index
- %cf0 = arith.constant 0.0 : f32
- %a = vector.transfer_read %A[%c0], %cf0: memref<32xf32>, vector<32xf32>
- %b = vector.transfer_read %B[%c0], %cf0: memref<32xf32>, vector<32xf32>
- %acc = arith.addf %a, %b: vector<32xf32>
- %c = vector.transfer_read %C[%c0], %cf0: memref<32xf32>, vector<32xf32>
- %d = arith.addf %acc, %c: vector<32xf32>
- vector.transfer_write %d, %D[%c0]: vector<32xf32>, memref<32xf32>
- return
-}
-
-// -----
-
-// CHECK-DAG: #[[MAP0:.*]] = affine_map<()[s0] -> (s0 * 2)>
-
-// CHECK: func @vector_add_cycle
-// CHECK-SAME: (%[[ID:.*]]: index
-// CHECK: %[[ID1:.*]] = affine.apply #[[MAP0]]()[%[[ID]]]
-// CHECK-NEXT: %[[EXA:.*]] = vector.transfer_read %{{.*}}[%[[ID1]]], %{{.*}} : memref<64xf32>, vector<2xf32>
-// CHECK-NEXT: %[[ID2:.*]] = affine.apply #[[MAP0]]()[%[[ID]]]
-// CHECK-NEXT: %[[EXB:.*]] = vector.transfer_read %{{.*}}[%[[ID2]]], %{{.*}} : memref<64xf32>, vector<2xf32>
-// CHECK-NEXT: %[[ADD:.*]] = arith.addf %[[EXA]], %[[EXB]] : vector<2xf32>
-// CHECK-NEXT: %[[ID3:.*]] = affine.apply #[[MAP0]]()[%[[ID]]]
-// CHECK-NEXT: vector.transfer_write %[[ADD]], %{{.*}}[%[[ID3]]] {{.*}} : vector<2xf32>, memref<64xf32>
-// CHECK-NEXT: return
-func.func @vector_add_cycle(%id : index, %A: memref<64xf32>, %B: memref<64xf32>, %C: memref<64xf32>) {
- %c0 = arith.constant 0 : index
- %cf0 = arith.constant 0.0 : f32
- %a = vector.transfer_read %A[%c0], %cf0: memref<64xf32>, vector<64xf32>
- %b = vector.transfer_read %B[%c0], %cf0: memref<64xf32>, vector<64xf32>
- %acc = arith.addf %a, %b: vector<64xf32>
- vector.transfer_write %acc, %C[%c0]: vector<64xf32>, memref<64xf32>
- return
-}
-
-// -----
-
-// Negative test to make sure nothing is done in case the vector size is not a
-// multiple of multiplicity.
-// CHECK-LABEL: func @vector_negative_test
-// CHECK: %[[C0:.*]] = arith.constant 0 : index
-// CHECK: %[[EXA:.*]] = vector.transfer_read %{{.*}}[%[[C0]]], %{{.*}} : memref<64xf32>, vector<16xf32>
-// CHECK-NEXT: %[[EXB:.*]] = vector.transfer_read %{{.*}}[%[[C0]]], %{{.*}} : memref<64xf32>, vector<16xf32>
-// CHECK-NEXT: %[[ADD:.*]] = arith.addf %[[EXA]], %[[EXB]] : vector<16xf32>
-// CHECK-NEXT: vector.transfer_write %[[ADD]], %{{.*}}[%[[C0]]] {{.*}} : vector<16xf32>, memref<64xf32>
-// CHECK-NEXT: return
-func.func @vector_negative_test(%id : index, %A: memref<64xf32>, %B: memref<64xf32>, %C: memref<64xf32>) {
- %c0 = arith.constant 0 : index
- %cf0 = arith.constant 0.0 : f32
- %a = vector.transfer_read %A[%c0], %cf0: memref<64xf32>, vector<16xf32>
- %b = vector.transfer_read %B[%c0], %cf0: memref<64xf32>, vector<16xf32>
- %acc = arith.addf %a, %b: vector<16xf32>
- vector.transfer_write %acc, %C[%c0]: vector<16xf32>, memref<64xf32>
- return
-}
-
-// -----
-
-// CHECK-LABEL: func @distribute_vector_add_3d
-// CHECK-SAME: (%[[ID0:.*]]: index, %[[ID1:.*]]: index
-// CHECK-NEXT: %[[ADDV:.*]] = arith.addf %{{.*}}, %{{.*}} : vector<64x4x32xf32>
-// CHECK-NEXT: %[[EXA:.*]] = vector.extract_map %{{.*}}[%[[ID0]], %[[ID1]]] : vector<64x4x32xf32> to vector<2x4x1xf32>
-// CHECK-NEXT: %[[EXB:.*]] = vector.extract_map %{{.*}}[%[[ID0]], %[[ID1]]] : vector<64x4x32xf32> to vector<2x4x1xf32>
-// CHECK-NEXT: %[[ADD:.*]] = arith.addf %[[EXA]], %[[EXB]] : vector<2x4x1xf32>
-// CHECK-NEXT: %[[INS:.*]] = vector.insert_map %[[ADD]], %[[ADDV]][%[[ID0]], %[[ID1]]] : vector<2x4x1xf32> into vector<64x4x32xf32>
-// CHECK-NEXT: return %[[INS]] : vector<64x4x32xf32>
-func.func @distribute_vector_add_3d(%id0 : index, %id1 : index,
- %A: vector<64x4x32xf32>, %B: vector<64x4x32xf32>) -> vector<64x4x32xf32> {
- %0 = arith.addf %A, %B : vector<64x4x32xf32>
- return %0: vector<64x4x32xf32>
-}
-
-// -----
-
-// CHECK-DAG: #[[MAP0:.*]] = affine_map<()[s0] -> (s0 * 2)>
-
-// CHECK: func @vector_add_transfer_3d
-// CHECK-SAME: (%[[ID_0:.*]]: index, %[[ID_1:.*]]: index
-// CHECK: %[[C0:.*]] = arith.constant 0 : index
-// CHECK: %[[ID1:.*]] = affine.apply #[[MAP0]]()[%[[ID_0]]]
-// CHECK-NEXT: %[[EXA:.*]] = vector.transfer_read %{{.*}}[%[[ID1]], %[[C0]], %[[ID_1]]], %{{.*}} : memref<64x64x64xf32>, vector<2x4x1xf32>
-// CHECK-NEXT: %[[ID2:.*]] = affine.apply #[[MAP0]]()[%[[ID_0]]]
-// CHECK-NEXT: %[[EXB:.*]] = vector.transfer_read %{{.*}}[%[[ID2]], %[[C0]], %[[ID_1]]], %{{.*}} : memref<64x64x64xf32>, vector<2x4x1xf32>
-// CHECK-NEXT: %[[ADD:.*]] = arith.addf %[[EXA]], %[[EXB]] : vector<2x4x1xf32>
-// CHECK-NEXT: %[[ID3:.*]] = affine.apply #[[MAP0]]()[%[[ID_0]]]
-// CHECK-NEXT: vector.transfer_write %[[ADD]], %{{.*}}[%[[ID3]], %[[C0]], %[[ID_1]]] {{.*}} : vector<2x4x1xf32>, memref<64x64x64xf32>
-// CHECK-NEXT: return
-func.func @vector_add_transfer_3d(%id0 : index, %id1 : index, %A: memref<64x64x64xf32>,
- %B: memref<64x64x64xf32>, %C: memref<64x64x64xf32>) {
- %c0 = arith.constant 0 : index
- %cf0 = arith.constant 0.0 : f32
- %a = vector.transfer_read %A[%c0, %c0, %c0], %cf0: memref<64x64x64xf32>, vector<64x4x32xf32>
- %b = vector.transfer_read %B[%c0, %c0, %c0], %cf0: memref<64x64x64xf32>, vector<64x4x32xf32>
- %acc = arith.addf %a, %b: vector<64x4x32xf32>
- vector.transfer_write %acc, %C[%c0, %c0, %c0]: vector<64x4x32xf32>, memref<64x64x64xf32>
- return
-}
-
-// -----
-
-#map0 = affine_map<(d0, d1, d2, d3) -> (d3, 0, 0)>
-#map1 = affine_map<(d0, d1, d2, d3) -> (0, d3, d0)>
-#map2 = affine_map<(d0, d1, d2, d3) -> (d3, d2, d1)>
-
-// CHECK-DAG: #[[MAP0:.*]] = affine_map<()[s0] -> (s0 * 2)>
-// CHECK-DAG: #[[MAP1:.*]] = affine_map<(d0, d1, d2, d3) -> (d3, 0, 0)>
-// CHECK-DAG: #[[MAP2:.*]] = affine_map<(d0, d1, d2, d3) -> (0, d3, d0)>
-// CHECK-DAG: #[[MAP3:.*]] = affine_map<(d0, d1, d2, d3) -> (d3, d2, d1)>
-
-// CHECK: func @vector_add_transfer_permutation
-// CHECK-SAME: (%[[ID_0:.*]]: index, %[[ID_1:.*]]: index
-// CHECK: %[[C0:.*]] = arith.constant 0 : index
-// CHECK: %[[ID2:.*]] = affine.apply #[[MAP0]]()[%[[ID_0]]]
-// CHECK-NEXT: %[[EXA:.*]] = vector.transfer_read %{{.*}}[%[[C0]], %[[C0]], %[[C0]], %[[ID2]]], %{{.*}} {permutation_map = #[[MAP1]]} : memref<?x?x?x?xf32>, vector<2x4x1xf32>
-// CHECK-NEXT: %[[EXB:.*]] = vector.transfer_read %{{.*}}[%[[ID_0]], %[[C0]], %[[C0]], %[[C0]]], %{{.*}} {permutation_map = #[[MAP2]]} : memref<?x?x?x?xf32>, vector<2x4x1xf32>
-// CHECK-NEXT: %[[ADD:.*]] = arith.addf %[[EXA]], %[[EXB]] : vector<2x4x1xf32>
-// CHECK-NEXT: %[[ID3:.*]] = affine.apply #[[MAP0]]()[%[[ID_0]]]
-// CHECK-NEXT: vector.transfer_write %[[ADD]], %{{.*}}[%[[C0]], %[[ID_1]], %[[C0]], %[[ID3]]] {permutation_map = #[[MAP3]]} : vector<2x4x1xf32>, memref<?x?x?x?xf32>
-// CHECK-NEXT: return
-func.func @vector_add_transfer_permutation(%id0 : index, %id1 : index, %A: memref<?x?x?x?xf32>,
- %B: memref<?x?x?x?xf32>, %C: memref<?x?x?x?xf32>) {
- %c0 = arith.constant 0 : index
- %cf0 = arith.constant 0.0 : f32
- %a = vector.transfer_read %A[%c0, %c0, %c0, %c0], %cf0 {permutation_map = #map0} : memref<?x?x?x?xf32>, vector<64x4x32xf32>
- %b = vector.transfer_read %B[%c0, %c0, %c0, %c0], %cf0 {permutation_map = #map1}: memref<?x?x?x?xf32>, vector<64x4x32xf32>
- %acc = arith.addf %a, %b: vector<64x4x32xf32>
- vector.transfer_write %acc, %C[%c0, %c0, %c0, %c0] {permutation_map = #map2}: vector<64x4x32xf32>, memref<?x?x?x?xf32>
- return
-}
-
-// -----
-
-// CHECK2D-LABEL: vector_add_contract
-// CHECK2D: %[[A:.+]] = vector.transfer_read %arg2[%0, %c0], %cst : memref<?x?xf32>, vector<2x4xf32>
-// CHECK2D: %[[B:.+]] = vector.transfer_read %arg3[%2, %c0], %cst : memref<?x?xf32>, vector<16x4xf32>
-// CHECK2D: %[[C:.+]] = vector.transfer_read %arg4[%4, %5], %cst : memref<?x?xf32>, vector<2x16xf32>
-// CHECK2D: %[[E:.+]] = vector.transfer_read %arg5[%7, %8], %cst : memref<?x?xf32>, vector<2x16xf32>
-// CHECK2D: %[[D:.+]] = vector.contract {{.*}} %[[A]], %[[B]], %[[C]] : vector<2x4xf32>, vector<16x4xf32> into vector<2x16xf32>
-// CHECK2D: %[[R:.+]] = arith.addf %[[D]], %[[E]] : vector<2x16xf32>
-// CHECK2D: vector.transfer_write %[[R]], {{.*}} : vector<2x16xf32>, memref<?x?xf32>
-func.func @vector_add_contract(%id0 : index, %id1 : index, %A: memref<?x?xf32>,
- %B: memref<?x?xf32>, %C: memref<?x?xf32>, %D: memref<?x?xf32>) {
- %c0 = arith.constant 0 : index
- %cf0 = arith.constant 0.0 : f32
- %a = vector.transfer_read %A[%c0, %c0], %cf0 : memref<?x?xf32>, vector<64x4xf32>
- %b = vector.transfer_read %B[%c0, %c0], %cf0 : memref<?x?xf32>, vector<64x4xf32>
- %c = vector.transfer_read %C[%c0, %c0], %cf0 : memref<?x?xf32>, vector<64x64xf32>
- %d = vector.contract {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>,
- affine_map<(d0, d1, d2) -> (d1, d2)>,
- affine_map<(d0, d1, d2) -> (d0, d1)>],
- iterator_types = ["parallel", "parallel", "reduction"],
- kind = #vector.kind<add>}
- %a, %b, %c : vector<64x4xf32>, vector<64x4xf32> into vector<64x64xf32>
- %e = vector.transfer_read %D[%c0, %c0], %cf0 : memref<?x?xf32>, vector<64x64xf32>
- %r = arith.addf %d, %e : vector<64x64xf32>
- vector.transfer_write %r, %C[%c0, %c0] : vector<64x64xf32>, memref<?x?xf32>
- return
-}
diff --git a/mlir/test/Integration/Dialect/Vector/CPU/test-vector-distribute.mlir b/mlir/test/Integration/Dialect/Vector/CPU/test-vector-distribute.mlir
deleted file mode 100644
index 95fd387fca959..0000000000000
--- a/mlir/test/Integration/Dialect/Vector/CPU/test-vector-distribute.mlir
+++ /dev/null
@@ -1,66 +0,0 @@
-// RUN: mlir-opt %s -pass-pipeline="func.func(test-vector-to-forloop,convert-vector-to-scf,lower-affine,convert-scf-to-cf),convert-vector-to-llvm,convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts" | \
-// RUN: mlir-cpu-runner -e main -entry-point-result=void \
-// RUN: -shared-libs=%mlir_lib_dir/libmlir_runner_utils%shlibext | \
-// RUN: FileCheck %s
-
-// RUN: mlir-opt %s -pass-pipeline="func.func(convert-vector-to-scf,lower-affine,convert-scf-to-cf),convert-vector-to-llvm,convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts" | mlir-cpu-runner -e main \
-// RUN: -entry-point-result=void \
-// RUN: -shared-libs=%mlir_lib_dir/libmlir_runner_utils%shlibext | \
-// RUN: FileCheck %s
-
-// RUN: mlir-opt %s -pass-pipeline="func.func(test-vector-to-forloop)" | FileCheck %s -check-prefix=TRANSFORM
-
-
-func.func private @printMemrefF32(memref<*xf32>)
-
-func.func @alloc_1d_filled_inc_f32(%arg0: index, %arg1: f32) -> memref<?xf32> {
- %c0 = arith.constant 0 : index
- %c1 = arith.constant 1 : index
- %0 = memref.alloc(%arg0) : memref<?xf32>
- scf.for %arg2 = %c0 to %arg0 step %c1 {
- %tmp = arith.index_cast %arg2 : index to i32
- %tmp1 = arith.sitofp %tmp : i32 to f32
- %tmp2 = arith.addf %tmp1, %arg1 : f32
- memref.store %tmp2, %0[%arg2] : memref<?xf32>
- }
- return %0 : memref<?xf32>
-}
-
-// Large vector addf that can be broken down into a loop of smaller vector addf.
-func.func @main() {
- %cf0 = arith.constant 0.0 : f32
- %cf1 = arith.constant 1.0 : f32
- %cf2 = arith.constant 2.0 : f32
- %c0 = arith.constant 0 : index
- %c1 = arith.constant 1 : index
- %c32 = arith.constant 32 : index
- %c64 = arith.constant 64 : index
- %out = memref.alloc(%c64) : memref<?xf32>
- %in1 = call @alloc_1d_filled_inc_f32(%c64, %cf1) : (index, f32) -> memref<?xf32>
- %in2 = call @alloc_1d_filled_inc_f32(%c64, %cf2) : (index, f32) -> memref<?xf32>
- // Check that the tansformatio correctly happened.
- // TRANSFORM: scf.for
- // TRANSFORM: vector.transfer_read {{.*}} : memref<?xf32>, vector<2xf32>
- // TRANSFORM: vector.transfer_read {{.*}} : memref<?xf32>, vector<2xf32>
- // TRANSFORM: %{{.*}} = arith.addf %{{.*}}, %{{.*}} : vector<2xf32>
- // TRANSFORM: vector.transfer_write {{.*}} : vector<2xf32>, memref<?xf32>
- // TRANSFORM: }
- %a = vector.transfer_read %in1[%c0], %cf0: memref<?xf32>, vector<64xf32>
- %b = vector.transfer_read %in2[%c0], %cf0: memref<?xf32>, vector<64xf32>
- %acc = arith.addf %a, %b: vector<64xf32>
- vector.transfer_write %acc, %out[%c0]: vector<64xf32>, memref<?xf32>
- %converted = memref.cast %out : memref<?xf32> to memref<*xf32>
- call @printMemrefF32(%converted): (memref<*xf32>) -> ()
- // CHECK: Unranked{{.*}}data =
- // CHECK: [
- // CHECK-SAME: 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27,
- // CHECK-SAME: 29, 31, 33, 35, 37, 39, 41, 43, 45, 47, 49, 51,
- // CHECK-SAME: 53, 55, 57, 59, 61, 63, 65, 67, 69, 71, 73, 75,
- // CHECK-SAME: 77, 79, 81, 83, 85, 87, 89, 91, 93, 95, 97, 99,
- // CHECK-SAME: 101, 103, 105, 107, 109, 111, 113, 115, 117, 119,
- // CHECK-SAME: 121, 123, 125, 127, 129]
- memref.dealloc %out : memref<?xf32>
- memref.dealloc %in1 : memref<?xf32>
- memref.dealloc %in2 : memref<?xf32>
- return
-}
diff --git a/mlir/test/lib/Dialect/Vector/TestVectorTransforms.cpp b/mlir/test/lib/Dialect/Vector/TestVectorTransforms.cpp
index eec8de0094477..37a4ea350bc4f 100644
--- a/mlir/test/lib/Dialect/Vector/TestVectorTransforms.cpp
+++ b/mlir/test/lib/Dialect/Vector/TestVectorTransforms.cpp
@@ -364,126 +364,6 @@ struct TestVectorUnrollingPatterns
llvm::cl::init(false)};
};
-struct TestVectorDistributePatterns
- : public PassWrapper<TestVectorDistributePatterns,
- OperationPass<func::FuncOp>> {
- MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(TestVectorDistributePatterns)
-
- StringRef getArgument() const final {
- return "test-vector-distribute-patterns";
- }
- StringRef getDescription() const final {
- return "Test lowering patterns to distribute vector ops in the vector "
- "dialect";
- }
- TestVectorDistributePatterns() = default;
- TestVectorDistributePatterns(const TestVectorDistributePatterns &pass)
- : PassWrapper(pass) {}
- void getDependentDialects(DialectRegistry ®istry) const override {
- registry.insert<VectorDialect>();
- registry.insert<AffineDialect>();
- }
- ListOption<int32_t> multiplicity{
- *this, "distribution-multiplicity",
- llvm::cl::desc("Set the multiplicity used for distributing vector")};
-
- void runOnOperation() override {
- MLIRContext *ctx = &getContext();
- RewritePatternSet patterns(ctx);
- func::FuncOp func = getOperation();
- func.walk([&](arith::AddFOp op) {
- OpBuilder builder(op);
- if (auto vecType = op.getType().dyn_cast<VectorType>()) {
- SmallVector<int64_t, 2> mul;
- SmallVector<AffineExpr, 2> perm;
- SmallVector<Value, 2> ids;
- unsigned count = 0;
- // Remove the multiplicity of 1 and calculate the affine map based on
- // the multiplicity.
- SmallVector<int32_t, 4> m(multiplicity.begin(), multiplicity.end());
- for (unsigned i = 0, e = vecType.getRank(); i < e; i++) {
- if (i < m.size() && m[i] != 1 && vecType.getDimSize(i) % m[i] == 0) {
- mul.push_back(m[i]);
- ids.push_back(func.getArgument(count++));
- perm.push_back(getAffineDimExpr(i, ctx));
- }
- }
- auto map = AffineMap::get(op.getType().cast<VectorType>().getRank(), 0,
- perm, ctx);
- Optional<mlir::vector::DistributeOps> ops = distributPointwiseVectorOp(
- builder, op.getOperation(), ids, mul, map);
- if (ops) {
- SmallPtrSet<Operation *, 1> extractOp({ops->extract, ops->insert});
- op.getResult().replaceAllUsesExcept(ops->insert.getResult(),
- extractOp);
- }
- }
- });
- populatePropagateVectorDistributionPatterns(patterns);
- (void)applyPatternsAndFoldGreedily(getOperation(), std::move(patterns));
- }
-};
-
-struct TestVectorToLoopPatterns
- : public PassWrapper<TestVectorToLoopPatterns,
- OperationPass<func::FuncOp>> {
- MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(TestVectorToLoopPatterns)
-
- StringRef getArgument() const final { return "test-vector-to-forloop"; }
- StringRef getDescription() const final {
- return "Test lowering patterns to break up a vector op into a for loop";
- }
- TestVectorToLoopPatterns() = default;
- TestVectorToLoopPatterns(const TestVectorToLoopPatterns &pass)
- : PassWrapper(pass) {}
- void getDependentDialects(DialectRegistry ®istry) const override {
- registry.insert<VectorDialect>();
- registry.insert<AffineDialect>();
- }
- Option<int32_t> multiplicity{
- *this, "distribution-multiplicity",
- llvm::cl::desc("Set the multiplicity used for distributing vector"),
- llvm::cl::init(32)};
- void runOnOperation() override {
- MLIRContext *ctx = &getContext();
- RewritePatternSet patterns(ctx);
- func::FuncOp func = getOperation();
- func.walk([&](arith::AddFOp op) {
- // Check that the operation type can be broken down into a loop.
- VectorType type = op.getType().dyn_cast<VectorType>();
- if (!type || type.getRank() != 1 ||
- type.getNumElements() % multiplicity != 0)
- return mlir::WalkResult::advance();
- auto filterAlloc = [](Operation *op) {
- return !isa<arith::ConstantOp, memref::AllocOp, func::CallOp>(op);
- };
- auto dependentOps = getSlice(op, filterAlloc);
- // Create a loop and move instructions from the Op slice into the loop.
- OpBuilder builder(op);
- auto zero = builder.create<arith::ConstantIndexOp>(op.getLoc(), 0);
- auto one = builder.create<arith::ConstantIndexOp>(op.getLoc(), 1);
- auto numIter =
- builder.create<arith::ConstantIndexOp>(op.getLoc(), multiplicity);
- auto forOp = builder.create<scf::ForOp>(op.getLoc(), zero, numIter, one);
- for (Operation *it : dependentOps) {
- it->moveBefore(forOp.getBody()->getTerminator());
- }
- auto map = AffineMap::getMultiDimIdentityMap(1, ctx);
- // break up the original op and let the patterns propagate.
- Optional<mlir::vector::DistributeOps> ops = distributPointwiseVectorOp(
- builder, op.getOperation(), {forOp.getInductionVar()}, {multiplicity},
- map);
- if (ops) {
- SmallPtrSet<Operation *, 1> extractOp({ops->extract, ops->insert});
- op.getResult().replaceAllUsesExcept(ops->insert.getResult(), extractOp);
- }
- return mlir::WalkResult::interrupt();
- });
- populatePropagateVectorDistributionPatterns(patterns);
- (void)applyPatternsAndFoldGreedily(getOperation(), std::move(patterns));
- }
-};
-
struct TestVectorTransferUnrollingPatterns
: public PassWrapper<TestVectorTransferUnrollingPatterns,
OperationPass<func::FuncOp>> {
@@ -918,10 +798,6 @@ void registerTestVectorLowerings() {
PassRegistration<TestVectorTransferFullPartialSplitPatterns>();
- PassRegistration<TestVectorDistributePatterns>();
-
- PassRegistration<TestVectorToLoopPatterns>();
-
PassRegistration<TestVectorTransferOpt>();
PassRegistration<TestVectorTransferLoweringPatterns>();
More information about the Mlir-commits
mailing list