[Mlir-commits] [mlir] 8e83d5e - [mlir] Remove old VectorOps directory
River Riddle
llvmlistbot at llvm.org
Wed Mar 25 12:22:09 PDT 2020
Author: River Riddle
Date: 2020-03-25T12:21:57-07:00
New Revision: 8e83d5ea3e03e90d63f89e9f2fb7000e9b024b53
URL: https://github.com/llvm/llvm-project/commit/8e83d5ea3e03e90d63f89e9f2fb7000e9b024b53
DIFF: https://github.com/llvm/llvm-project/commit/8e83d5ea3e03e90d63f89e9f2fb7000e9b024b53.diff
LOG: [mlir] Remove old VectorOps directory
This was accidentally re-added during a rebase.
Added:
Modified:
Removed:
mlir/include/mlir/Dialect/VectorOps/VectorOps.td
mlir/lib/Dialect/VectorOps/VectorTransforms.cpp
mlir/lib/Dialect/VectorOps/VectorUtils.cpp
################################################################################
diff --git a/mlir/include/mlir/Dialect/VectorOps/VectorOps.td b/mlir/include/mlir/Dialect/VectorOps/VectorOps.td
deleted file mode 100644
index ef1fde00e5ec..000000000000
--- a/mlir/include/mlir/Dialect/VectorOps/VectorOps.td
+++ /dev/null
@@ -1,1402 +0,0 @@
-//===- VectorOps.td - Vector op definitions ---------------*- tablegen -*-====//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// Defines MLIR vector operations.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef VECTOR_OPS
-#define VECTOR_OPS
-
-include "mlir/Dialect/Affine/IR/AffineOpsBase.td"
-include "mlir/Interfaces/SideEffects.td"
-
-
-def VectorOps_Dialect : Dialect {
- let name = "vector";
- let cppNamespace = "vector";
- let hasConstantMaterializer = 1;
-}
-
-// Base class for Vector dialect ops.
-class Vector_Op<string mnemonic, list<OpTrait> traits = []> :
- Op<VectorOps_Dialect, mnemonic, traits> {
- // For every vector op, there needs to be a:
- // * void print(OpAsmPrinter &p, ${C++ class of Op} op)
- // * LogicalResult verify(${C++ class of Op} op)
- // * ParseResult parse${C++ class of Op}(OpAsmParser &parser,
- // OperationState &result)
- // functions.
- let printer = [{ return ::print(p, *this); }];
- let verifier = [{ return ::verify(*this); }];
- let parser = [{ return ::parse$cppClass(parser, result); }];
-}
-
-// TODO(andydavis, ntv) Add an attribute to specify a
diff erent algebra
-// with operators other than the current set: {*, +}.
-def Vector_ContractionOp :
- Vector_Op<"contract", [NoSideEffect,
- PredOpTrait<"first operand lhs and result have same element type",
- TCresVTEtIsSameAsOpBase<0, 0>>,
- PredOpTrait<"second operand rhs and result have same element type",
- TCresVTEtIsSameAsOpBase<0, 1>>,
- PredOpTrait<"third operand acc and result have same element type",
- TCresVTEtIsSameAsOpBase<0, 1>>]>,
- Arguments<(ins AnyVector:$lhs, AnyVector:$rhs, AnyType:$acc,
- Variadic<VectorOf<[I1]>>:$masks,
- AffineMapArrayAttr:$indexing_maps, ArrayAttr:$iterator_types)>,
- Results<(outs AnyType)> {
- let summary = "vector contraction operation";
- let description = [{
- Computes the sum of products of vector elements along contracting
- dimension pairs from 2 vectors of rank M and N respectively, adds this
- intermediate result to the accumulator argument of rank K, and returns a
- vector result of rank K (where K = num_lhs_free_dims + num_rhs_free_dims +
- num_batch_dims (see dimension type descriptions below)). For K = 0 (no
- free or batch dimensions), the accumulator and output are a scalar.
-
- Optional vector mask arguments (produced by CreateMaskOp or ConstantMaskOp)
- specify the dynamic dimension sizes of valid data within the lhs/rhs vector
- arguments.
-
- An iterator type attribute list must be specified, where each element of
- the list represents an iterator with one of the following types:
-
- *) "reduction": reduction dimensions are present in the lhs and rhs
- arguments but not in the output (and accumulator
- argument). These are the dimensions along which the vector
- contraction op computes the sum of products, and
- contracting dimension pair dimension sizes must match
- between lhs/rhs.
- *) "parallel": Batch dimensions are iterator type "parallel", and
- are non-contracting dimensions present in the lhs, rhs and
- output. The lhs/rhs co-iterate along the batch dimensions,
- which should be expressed in their indexing maps.
-
- Free dimensions are iterator type "parallel", and are
- non-contraction, non-batch dimensions accessed by either the
- lhs or rhs (but not both). The lhs and rhs free dimensions
- are unrelated to each other and do not co-iterate, which
- should be expressed in their indexing maps.
-
- An indexing map attribute list must be specified with an entry for lhs, rhs
- and acc arguments. An indexing map attribute specifies a mapping from each
- iterator in the iterator type list, to each dimension of an N-D vector.
-
- Examples:
-
- // Simple dot product (K = 0).
- #contraction_accesses = [
- affine_map<(i) -> (i)>,
- affine_map<(i) -> (i)>,
- affine_map<(i) -> ()>
- ]
- #contraction_trait = {
- indexing_maps = #contraction_accesses,
- iterator_types = ["reduction"]
- }
- %3 = vector.contract #contraction_trait %0, %1, %2
- : vector<10xf32>, vector<10xf32> into f32
-
- // 2D vector contraction with one contracting dimension (matmul, K = 2).
- #contraction_accesses = [
- affine_map<(i, j, k) -> (i, k)>,
- affine_map<(i, j, k) -> (k, j)>,
- affine_map<(i, j, k) -> (i, j)>
- ]
- #contraction_trait = {
- indexing_maps = #contraction_accesses,
- iterator_types = ["parallel", "parallel", "reduction"]
- }
-
- %3 = vector.contract #contraction_trait %0, %1, %2
- : vector<4x3xf32>, vector<3x7xf32> into vector<4x7xf32>
-
- // 4D to 3D vector contraction with two contracting dimensions and
- // one batch dimension (K = 3).
- #contraction_accesses = [
- affine_map<(b0, f0, f1, c0, c1) -> (c0, b0, c1, f0)>,
- affine_map<(b0, f0, f1, c0, c1) -> (b0, c1, c0, f1)>,
- affine_map<(b0, f0, f1, c0, c1) -> (b0, f0, f1)>
- ]
- #contraction_trait = {
- indexing_maps = #contraction_accesses,
- iterator_types = ["parallel", "parallel", "parallel",
- "reduction", "reduction"]
- }
-
- %4 = vector.contract #contraction_trait %0, %1, %2
- : vector<7x8x16x15xf32>, vector<8x16x7x5xf32> into vector<8x15x5xf32>
-
- // 4D vector contraction with two contracting dimensions and optional
- // vector mask arguments.
- %lhs_mask = vector.constant_mask [7, 8, 16, 15] : vector<7x8x16x15xi1>
- %rhs_mask = vector.constant_mask [8, 16, 7, 5] : vector<8x16x7x5xi1>
-
- %5 = vector.contract #contraction_trait %0, %1, %2, %lhs_mask, %rhs_mask
- : vector<7x8x16x15xf32>, vector<8x16x7x5xf32> into vector<8x15x8x5xf32>
- }];
- let builders = [OpBuilder<
- "Builder *builder, OperationState &result, Value lhs, Value rhs, "
- "Value acc, ArrayAttr indexingMaps, ArrayAttr iteratorTypes">,
- OpBuilder<
- "Builder *builder, OperationState &result, Value lhs, Value rhs, "
- "Value acc, ArrayRef<ArrayRef<AffineExpr>> indexingExprs, "
- "ArrayRef<StringRef> iteratorTypes">];
- let extraClassDeclaration = [{
- VectorType getLhsType() {
- return lhs().getType().cast<VectorType>();
- }
- VectorType getRhsType() {
- return rhs().getType().cast<VectorType>();
- }
- Type getAccType() { return acc().getType(); }
- VectorType getLHSVectorMaskType() {
- if (llvm::size(masks()) != 2) return VectorType();
- return getOperand(3).getType().cast<VectorType>();
- }
- VectorType getRHSVectorMaskType() {
- if (llvm::size(masks()) != 2) return VectorType();
- return getOperand(4).getType().cast<VectorType>();
- }
- Type getResultType() { return getResult().getType(); }
- ArrayRef<StringRef> getTraitAttrNames();
- SmallVector<AffineMap, 4> getIndexingMaps();
- static unsigned getAccOperandIndex() { return 2; }
-
- // Returns the bounds of each dimension in the iteration space spanned
- // by the iterator types of this operation.
- void getIterationBounds(SmallVectorImpl<int64_t> &iterationBounds);
-
- // Returns a list of index maps, where there is a list entry for each
- // op indexing map attribute (i.e. one for each input and output, with
- // the output listed last). Each index map, maps from this operations
- // iteration space, to vector dimensions of the maps input/output.
- void getIterationIndexMap(
- std::vector<DenseMap<int64_t, int64_t>> &iterationIndexMap);
-
- std::vector<std::pair<int64_t, int64_t>> getContractingDimMap();
- std::vector<std::pair<int64_t, int64_t>> getBatchDimMap();
- }];
-}
-
-def Vector_ReductionOp :
- Vector_Op<"reduction", [NoSideEffect,
- PredOpTrait<"source operand and result have same element type",
- TCresVTEtIsSameAsOpBase<0, 0>>]>,
- Arguments<(ins StrAttr:$kind, AnyVector:$vector, Variadic<AnyType>:$acc)>,
- Results<(outs AnyType:$dest)> {
- let summary = "reduction operation";
- let description = [{
- Reduces an 1-D vector "horizontally" into a scalar using the given
- operation (add/mul/min/max for int/fp and and/or/xor for int only).
- Some reductions (add/mul for fp) also allow an optional fused
- accumulator.
-
- Note that these operations are restricted to 1-D vectors to remain
- close to the corresponding LLVM intrinsics:
-
- http://llvm.org/docs/LangRef.html#experimental-vector-reduction-intrinsics
-
- Examples:
- ```
- %1 = vector.reduction "add", %0 : vector<16xf32> into f32
-
- %3 = vector.reduction "xor", %2 : vector<4xi32> into i32
-
- %4 = vector.reduction "mul", %0, %1 : vector<16xf32> into f32
- ```
- }];
- let extraClassDeclaration = [{
- VectorType getVectorType() {
- return vector().getType().cast<VectorType>();
- }
- }];
-}
-
-def Vector_BroadcastOp :
- Vector_Op<"broadcast", [NoSideEffect,
- PredOpTrait<"source operand and result have same element type",
- TCresVTEtIsSameAsOpBase<0, 0>>]>,
- Arguments<(ins AnyType:$source)>,
- Results<(outs AnyVector:$vector)> {
- let summary = "broadcast operation";
- let description = [{
- Broadcasts the scalar or k-D vector value in the source operand
- to a n-D result vector such that the broadcast makes sense, i.e.,
- the source operand is duplicated to match the given rank and sizes
- in the result vector. The legality rules are:
- * the source operand must have the same element type as the result type
- * a k-D vector <s_1 x .. x s_k x type> can be broadcast to
- a n-D vector <t_1 x .. x t_n x type> if
- * k <= n, and
- * the sizes in the trailing dimensions n-k < i <= n with j=i+k-n
- match exactly as s_j = t_i or s_j = 1:
- ```
- t_1 x .. t_n-k x t_n-k+1 x .. x t_i x .. x t_n
- s_1 x .. x s_j x .. x s_k
- <duplication> <potential stretch>
- ```
- The source operand is duplicated over all the missing leading dimensions
- and stretched over the trailing dimensions where the source has a non-equal
- dimension of 1. These rules imply that any scalar broadcast (k=0) to any
- shaped vector with the same element type is always legal.
-
- Examples:
- ```
- %0 = constant 0.0 : f32
- %1 = vector.broadcast %0 : f32 to vector<16xf32>
- %2 = vector.broadcast %1 : vector<16xf32> to vector<4x16xf32>
- ```
- }];
- let extraClassDeclaration = [{
- Type getSourceType() { return source().getType(); }
- VectorType getVectorType() {
- return vector().getType().cast<VectorType>();
- }
- }];
- let assemblyFormat = "$source attr-dict `:` type($source) `to` type($vector)";
-}
-
-def Vector_ShuffleOp :
- Vector_Op<"shuffle", [NoSideEffect,
- PredOpTrait<"first operand v1 and result have same element type",
- TCresVTEtIsSameAsOpBase<0, 0>>,
- PredOpTrait<"second operand v2 and result have same element type",
- TCresVTEtIsSameAsOpBase<0, 1>>]>,
- Arguments<(ins AnyVector:$v1, AnyVector:$v2, I64ArrayAttr:$mask)>,
- Results<(outs AnyVector:$vector)> {
- let summary = "shuffle operation";
- let description = [{
- The shuffle operation constructs a permutation (or duplication) of elements
- from two input vectors, returning a vector with the same element type as
- the input and a length that is the same as the shuffle mask. The two input
- vectors must have the same element type, rank, and trailing dimension sizes
- and shuffles their values in the leading dimension (which may
diff er in size)
- according to the given mask. The legality rules are:
- * the two operands must have the same element type as the result
- * the two operands and the result must have the same rank and trailing
- dimension sizes, viz. given two k-D operands
- v1 : <s_1 x s_2 x .. x s_k x type> and
- v2 : <t_1 x t_2 x .. x t_k x type>
- we have s_i = t_i for all 1 < i <= k
- * the mask length equals the leading dimension size of the result
- * numbering the input vector indices left to right across the operands, all
- mask values must be within range, viz. given two k-D operands v1 and v2
- above, all mask values are in the range [0,s_1+t_1)
-
- Examples:
- ```
- %0 = vector.shuffle %a, %b[0, 3]
- : vector<2xf32>, vector<2xf32> ; yields vector<2xf32>
- %1 = vector.shuffle %c, %b[0, 1, 2]
- : vector<2x16xf32>, vector<1x16xf32> ; yields vector<3x16xf32>
- %2 = vector.shuffle %a, %b[3, 2, 1, 0]
- : vector<2xf32>, vector<2xf32> ; yields vector<4xf32>
-
- ```
- }];
- let builders = [OpBuilder<"Builder *builder, OperationState &result,"
- "Value v1, Value v2, ArrayRef<int64_t>">];
- let extraClassDeclaration = [{
- static StringRef getMaskAttrName() { return "mask"; }
- VectorType getV1VectorType() {
- return v1().getType().cast<VectorType>();
- }
- VectorType getV2VectorType() {
- return v2().getType().cast<VectorType>();
- }
- VectorType getVectorType() {
- return vector().getType().cast<VectorType>();
- }
- }];
-}
-
-def Vector_ExtractElementOp :
- Vector_Op<"extractelement", [NoSideEffect,
- TypesMatchWith<"result type matches element type of vector operand",
- "vector", "result",
- "$_self.cast<ShapedType>().getElementType()">]>,
- Arguments<(ins AnyVector:$vector, AnySignlessInteger:$position)>,
- Results<(outs AnyType:$result)> {
- let summary = "extractelement operation";
- let description = [{
- Takes an 1-D vector and a dynamic index position and extracts the
- scalar at that position. Note that this instruction resembles
- vector.extract, but is restricted to 1-D vectors and relaxed
- to dynamic indices. It is meant to be closer to LLVM's version:
- https://llvm.org/docs/LangRef.html#extractelement-instruction
-
- Example:
- ```
- %c = constant 15 : i32
- %1 = vector.extractelement %0[%c : i32]: vector<16xf32>
- ```
- }];
- let extraClassDeclaration = [{
- VectorType getVectorType() {
- return vector().getType().cast<VectorType>();
- }
- }];
-
- let assemblyFormat = [{
- $vector `[` $position `:` type($position) `]` attr-dict `:` type($vector)
- }];
-}
-
-def Vector_ExtractOp :
- Vector_Op<"extract", [NoSideEffect,
- PredOpTrait<"operand and result have same element type",
- TCresVTEtIsSameAsOpBase<0, 0>>]>,
- Arguments<(ins AnyVector:$vector, I64ArrayAttr:$position)>,
- Results<(outs AnyType)> {
- let summary = "extract operation";
- let description = [{
- Takes an n-D vector and a k-D position and extracts the (n-k)-D vector at
- the proper position. Degenerates to an element type in the 0-D case.
-
- Examples:
- ```
- %1 = vector.extract %0[3]: vector<4x8x16xf32>
- %2 = vector.extract %0[3, 3, 3]: vector<4x8x16xf32>
- ```
- }];
- let builders = [OpBuilder<
- "Builder *builder, OperationState &result, Value source,"
- "ArrayRef<int64_t>">];
- let extraClassDeclaration = [{
- static StringRef getPositionAttrName() { return "position"; }
- VectorType getVectorType() {
- return vector().getType().cast<VectorType>();
- }
- }];
-}
-
-def Vector_ExtractSlicesOp :
- Vector_Op<"extract_slices", [NoSideEffect]>,
- Arguments<(ins AnyVector:$vector, I64ArrayAttr:$sizes,
- I64ArrayAttr:$strides)>,
- Results<(outs TupleOf<[AnyVector]>)> {
- let summary = "vector extract slices operation";
- let description = [{
- Takes an N-d vector and returns a tuple of vector slices of 'vector',
- based on 'sizes' and 'strides' parameters.
-
- The arguments 'sizes' and 'strides' represent a specification for
- generating the unrolling of 'vector' shape, which has all slices of shape
- 'sizes' except for slices at dimension boundaries when 'vector' dimension
- sizes are not a multiple of 'sizes'.
-
- Each slice is returned at the tuple element index corresponding to the
- linear index of the slice w.r.t the unrolling scheme represented by 'sizes'.
- Currently, only unit strides are supported.
-
- Examples:
- ```
- %0 = vector.transfer_read ...: vector<4x2xf32>
-
- %1 = vector.extract_slices %0, [2, 2], [1, 1]
- : vector<4x2xf32> into tuple<vector<2x2xf32>, vector<2x2xf32>>
-
- // Example with partial slices at dimension boundaries.
- %2 = vector.transfer_read ...: vector<4x3xf32>
-
- %3 = vector.extract_slices %2, [2, 2], [1, 1]
- : vector<4x3xf32> into tuple<vector<2x2xf32>, vector<2x1xf32>,
- vector<2x2xf32>, vector<2x1xf32>>
- ```
- }];
- let builders = [OpBuilder<
- "Builder *builder, OperationState &result, TupleType tupleType, " #
- "Value vector, ArrayRef<int64_t> sizes, " #
- "ArrayRef<int64_t> strides">];
- let extraClassDeclaration = [{
- VectorType getSourceVectorType() {
- return vector().getType().cast<VectorType>();
- }
- TupleType getResultTupleType() {
- return getResult().getType().cast<TupleType>();
- }
- void getSizes(SmallVectorImpl<int64_t> &results);
- void getStrides(SmallVectorImpl<int64_t> &results);
- static StringRef getSizesAttrName() { return "sizes"; }
- static StringRef getStridesAttrName() { return "strides"; }
- }];
- let assemblyFormat = [{
- $vector `,` $sizes `,` $strides attr-dict `:` type($vector) `into`
- type(results)
- }];
-}
-
-def Vector_FMAOp :
- Op<VectorOps_Dialect, "fma", [NoSideEffect,
- AllTypesMatch<["lhs", "rhs", "acc", "result"]>]>,
- Arguments<(ins AnyVector:$lhs, AnyVector:$rhs, AnyVector:$acc)>,
- Results<(outs AnyVector:$result)> {
- let summary = "vector fused multiply-add";
- let description = [{
- Multiply-add expressions operate on n-D vectors and compute a fused
- pointwise multiply-and-accumulate: `$result = `$lhs * $rhs + $acc`.
- All operands and result have the same vector type. The semantics
- of the operation correspond to those of the `llvm.fma`
- [intrinsic](https://llvm.org/docs/LangRef.html#int-fma). In the
- particular case of lowering to LLVM, this is guaranteed to lower
- to the `llvm.fma.*` intrinsic.
-
- Example:
-
- ```
- %3 = vector.fma %0, %1, %2: vector<8x16xf32>
- ```
- }];
- // Fully specified by traits.
- let verifier = ?;
- let assemblyFormat = "$lhs `,` $rhs `,` $acc attr-dict `:` type($lhs)";
- let builders = [OpBuilder<
- "Builder *b, OperationState &result, Value lhs, Value rhs, Value acc",
- "build(b, result, lhs.getType(), lhs, rhs, acc);">];
- let extraClassDeclaration = [{
- VectorType getVectorType() { return lhs().getType().cast<VectorType>(); }
- }];
-}
-
-def Vector_InsertElementOp :
- Vector_Op<"insertelement", [NoSideEffect,
- TypesMatchWith<"source operand type matches element type of result",
- "result", "source",
- "$_self.cast<ShapedType>().getElementType()">,
- AllTypesMatch<["dest", "result"]>]>,
- Arguments<(ins AnyType:$source, AnyVector:$dest,
- AnySignlessInteger:$position)>,
- Results<(outs AnyVector:$result)> {
- let summary = "insertelement operation";
- let description = [{
- Takes a scalar source, an 1-D destination vector and a dynamic index
- position and inserts the source into the destination at the proper
- position. Note that this instruction resembles vector.insert, but
- is restricted to 1-D vectors and relaxed to dynamic indices. It is
- meant to be closer to LLVM's version:
- https://llvm.org/docs/LangRef.html#insertelement-instruction
-
- Example:
- ```
- %c = constant 15 : i32
- %f = constant 0.0f : f32
- %1 = vector.insertelement %f, %0[%c : i32]: vector<16xf32>
- ```
- }];
- let extraClassDeclaration = [{
- Type getSourceType() { return source().getType(); }
- VectorType getDestVectorType() {
- return dest().getType().cast<VectorType>();
- }
- }];
-
- let assemblyFormat = [{
- $source `,` $dest `[` $position `:` type($position) `]` attr-dict `:`
- type($result)
- }];
-}
-
-def Vector_InsertOp :
- Vector_Op<"insert", [NoSideEffect,
- PredOpTrait<"source operand and result have same element type",
- TCresVTEtIsSameAsOpBase<0, 0>>,
- AllTypesMatch<["dest", "res"]>]>,
- Arguments<(ins AnyType:$source, AnyVector:$dest, I64ArrayAttr:$position)>,
- Results<(outs AnyVector:$res)> {
- let summary = "insert operation";
- let description = [{
- Takes an n-D source vector, an (n+k)-D destination vector and a k-D position
- and inserts the n-D source into the (n+k)-D destination at the proper
- position. Degenerates to a scalar source type when n = 0.
-
- Examples:
- ```
- %2 = vector.insert %0, %1[3]:
- vector<8x16xf32> into vector<4x8x16xf32>
- %5 = vector.insert %3, %4[3, 3, 3]:
- f32 into vector<4x8x16xf32>
- ```
- }];
- let assemblyFormat = [{
- $source `,` $dest $position attr-dict `:` type($source) `into` type($dest)
- }];
-
- let builders = [OpBuilder<
- "Builder *builder, OperationState &result, Value source, " #
- "Value dest, ArrayRef<int64_t>">];
- let extraClassDeclaration = [{
- static StringRef getPositionAttrName() { return "position"; }
- Type getSourceType() { return source().getType(); }
- VectorType getDestVectorType() {
- return dest().getType().cast<VectorType>();
- }
- }];
-}
-
-def Vector_InsertSlicesOp :
- Vector_Op<"insert_slices", [NoSideEffect]>,
- Arguments<(ins TupleOf<[AnyVector]>:$vectors, I64ArrayAttr:$sizes,
- I64ArrayAttr:$strides)>,
- Results<(outs AnyVector)> {
- let summary = "vector insert slices operation";
- let description = [{
- Takes a tuple of vector slices and inserts them into the vector result
- according to the 'sizes' and 'strides' parameters.
-
- The arguments 'sizes' and 'strides' represent a specification for
- generating the unrolling of 'vector' shape, which has all slices of shape
- 'sizes' except for slices at dimension boundaries when 'vector' dimension
- sizes are not a multiple of 'sizes'.
-
- Each slice in 'vectors' is at the tuple element index corresponding to the
- linear index of the slice w.r.t the unrolling scheme represented by 'sizes'.
- Currently, only unit strides are supported.
-
- Examples:
- ```
- %0 = vector.extract_slices %0, [2, 2], [1, 1]
- : vector<4x2xf32> into tuple<vector<2x2xf32>, vector<2x2xf32>>
-
- %1 = vector.insert_slices %0, [2, 2], [1, 1]
- : tuple<vector<2x2xf32>, vector<2x2xf32>> into vector<4x2xf32>
-
- // Example with partial slices at dimension boundaries.
- %3 = vector.extract_slices %2, [2, 2], [1, 1]
- : vector<4x3xf32> into tuple<vector<2x2xf32>, vector<2x1xf32>,
- vector<2x2xf32>, vector<2x1xf32>>
-
- %4 = vector.insert_slices %3, [2, 2], [1, 1]
- : tuple<vector<2x2xf32>, vector<2x1xf32>,
- vector<2x2xf32>, vector<2x1xf32>> into vector<4x3xf32>
- ```
- }];
-
- let extraClassDeclaration = [{
- TupleType getSourceTupleType() {
- return vectors().getType().cast<TupleType>();
- }
- VectorType getResultVectorType() {
- return getResult().getType().cast<VectorType>();
- }
- void getSizes(SmallVectorImpl<int64_t> &results);
- void getStrides(SmallVectorImpl<int64_t> &results);
- static StringRef getSizesAttrName() { return "sizes"; }
- static StringRef getStridesAttrName() { return "strides"; }
- }];
- let assemblyFormat = [{
- $vectors `,` $sizes `,` $strides attr-dict `:` type($vectors) `into`
- type(results)
- }];
-}
-
-def Vector_InsertStridedSliceOp :
- Vector_Op<"insert_strided_slice", [NoSideEffect,
- PredOpTrait<"operand #0 and result have same element type",
- TCresVTEtIsSameAsOpBase<0, 0>>,
- AllTypesMatch<["dest", "res"]>]>,
- Arguments<(ins AnyVector:$source, AnyVector:$dest, I64ArrayAttr:$offsets,
- I64ArrayAttr:$strides)>,
- Results<(outs AnyVector:$res)> {
- let summary = "strided_slice operation";
- let description = [{
- Takes a k-D source vector, an n-D destination vector (n >= k), n-sized
- `offsets` integer array attribute, a k-sized `strides` integer array attribute
- and inserts the k-D source vector as a strided subvector at the proper offset
- into the n-D destination vector.
-
- At the moment strides must contain only 1s.
-
- Returns an n-D vector that is a copy of the n-D destination vector in which
- the last k-D dimensions contain the k-D source vector elements strided at
- the proper location as specified by the offsets.
-
- Examples:
- ```
- %2 = vector.insert_strided_slice %0, %1
- {offsets = [0, 0, 2], strides = [1, 1]}:
- vector<2x4xf32> into vector<16x4x8xf32>
- ```
- }];
-
- let assemblyFormat = [{
- $source `,` $dest attr-dict `:` type($source) `into` type($dest)
- }];
-
- let builders = [OpBuilder<
- "Builder *builder, OperationState &result, Value source, Value dest, " #
- "ArrayRef<int64_t> offsets, ArrayRef<int64_t> strides">];
- let extraClassDeclaration = [{
- static StringRef getOffsetsAttrName() { return "offsets"; }
- static StringRef getStridesAttrName() { return "strides"; }
- VectorType getSourceVectorType() {
- return source().getType().cast<VectorType>();
- }
- VectorType getDestVectorType() {
- return dest().getType().cast<VectorType>();
- }
- }];
-}
-
-def Vector_OuterProductOp :
- Vector_Op<"outerproduct", [NoSideEffect, SameOperandsAndResultElementType]>,
- Arguments<(ins AnyVector:$lhs, AnyVector:$rhs, Variadic<AnyVector>:$acc)>,
- Results<(outs AnyVector)> {
- let summary = "vector outerproduct with optional fused add";
- let description = [{
- Takes 2 1-D vectors and returns the 2-D vector containing the outer-product.
-
- An optional extra 2-D vector argument may be specified in which case the
- operation returns the sum of the outer-product and the extra vector. In this
- multiply-accumulate scenario, the rounding mode is that obtained by
- guaranteeing that a fused-multiply add operation is emitted. When lowered to
- the LLVMIR dialect, this form emits `llvm.intr.fma`, which is guaranteed to
- lower to actual `fma` instructions on x86.
-
- Examples:
-
- ```
- %2 = vector.outerproduct %0, %1: vector<4xf32>, vector<8xf32>
- return %2: vector<4x8xf32>
-
- %3 = vector.outerproduct %0, %1, %2:
- vector<4xf32>, vector<8xf32>, vector<4x8xf32>
- return %3: vector<4x8xf32>
- ```
- }];
- let extraClassDeclaration = [{
- VectorType getOperandVectorTypeLHS() {
- return lhs().getType().cast<VectorType>();
- }
- VectorType getOperandVectorTypeRHS() {
- return rhs().getType().cast<VectorType>();
- }
- VectorType getOperandVectorTypeACC() {
- return (llvm::size(acc()) == 0) ? VectorType() :
- (*acc().begin()).getType().cast<VectorType>();
- }
- VectorType getVectorType() {
- return getResult().getType().cast<VectorType>();
- }
- }];
-}
-
-// TODO(andydavis) Add transformation which decomposes ReshapeOp into an
-// optimized sequence of vector rotate/shuffle/select operations.
-def Vector_ReshapeOp :
- Vector_Op<"reshape", [AttrSizedOperandSegments, NoSideEffect]>,
- Arguments<(ins AnyVector:$vector, Variadic<Index>:$input_shape,
- Variadic<Index>:$output_shape,
- I64ArrayAttr:$fixed_vector_sizes)>,
- Results<(outs AnyVector:$result)> {
- let summary = "vector reshape operation";
- let description = [{
- Reshapes its vector operand from 'input_shape' to 'output_shape' maintaining
- fixed vector dimension 'fixed_vector_sizes' on the innermost vector
- dimensions.
-
- The parameters 'input_shape' and 'output_shape' represent valid data shapes
- across fixed vector shapes. For example, if a vector has a valid data
- shape [6] with fixed vector size [8], then the valid data elements are
- assumed to be stored at the beginning of the vector with the remaining
- vector elements undefined.
-
- In the examples below, valid data elements are represented by an alphabetic
- character, and undefined data elements are represented by '-'.
-
- Example
-
- vector<1x8xf32> with valid data shape [6], fixed vector sizes [8]
-
- input: [a, b, c, d, e, f]
-
- layout map: (d0) -> (d0 floordiv 8, d0 mod 8)
-
- vector layout: [a, b, c, d, e, f, -, -]
-
- Example
-
- vector<2x8xf32> with valid data shape [10], fixed vector sizes [8]
-
- input: [a, b, c, d, e, f, g, h, i, j]
-
- layout map: (d0) -> (d0 floordiv 8, d0 mod 8)
-
- vector layout: [[a, b, c, d, e, f, g, h],
- [i, j, -, -, -, -, -, -]]
-
- Example
-
- vector<2x2x2x3xf32> with valid data shape [3, 5], fixed vector sizes
- [2, 3]
-
- input: [[a, b, c, d, e],
- [f, g, h, i, j],
- [k, l, m, n, o]]
-
- layout map: (d0, d1) -> (d0 floordiv 3, d1 floordiv 5,
- d0 mod 3, d1 mod 5)
-
- vector layout: [[[[a, b, c],
- [f, g, h]]
- [[d, e, -],
- [i, j, -]]],
- [[[k, l, m],
- [-, -, -]]
- [[n, o, -],
- [-, -, -]]]]
-
- Example
-
- %1 = vector.reshape %0, [%c3, %c6], [%c2, %c9], [4]
- : vector<3x2x4xf32> to vector<2x3x4xf32>
-
- input: [[a, b, c, d, e, f],
- [g, h, i, j, k, l],
- [m, n, o, p, q, r]]
-
- layout map: (d0, d1) -> (d0, d1 floordiv 4, d1 mod 4)
-
-
- Input vector: [[[a, b, c, d],
- [e, f, -, -]],
- [[g, h, i, j],
- [k, l, -, -]],
- [[m, n, o, p],
- [q, r, -, -]]]
-
- Output vector: [[[a, b, c, d],
- [e, f, g, h],
- [i, -, -, -]],
- [[j, k, l, m],
- [n, o, p, q],
- [r, -, -, -]]]
- }];
-
- let extraClassDeclaration = [{
- VectorType getInputVectorType() {
- return vector().getType().cast<VectorType>();
- }
- VectorType getOutputVectorType() {
- return getResult().getType().cast<VectorType>();
- }
-
- /// Returns as integer value the number of input shape operands.
- int64_t getNumInputShapeSizes() { return input_shape().size(); }
-
- /// Returns as integer value the number of output shape operands.
- int64_t getNumOutputShapeSizes() { return output_shape().size(); }
-
- void getFixedVectorSizes(SmallVectorImpl<int64_t> &results);
-
- static StringRef getFixedVectorSizesAttrName() {
- return "fixed_vector_sizes";
- }
- static StringRef getInputShapeAttrName() { return "input_shape"; }
- static StringRef getOutputShapeAttrName() { return "output_shape"; }
- }];
-
- let assemblyFormat = [{
- $vector `,` `[` $input_shape `]` `,` `[` $output_shape `]` `,`
- $fixed_vector_sizes attr-dict `:` type($vector) `to` type($result)
- }];
-}
-
-def Vector_StridedSliceOp :
- Vector_Op<"strided_slice", [NoSideEffect,
- PredOpTrait<"operand and result have same element type",
- TCresVTEtIsSameAsOpBase<0, 0>>]>,
- Arguments<(ins AnyVector:$vector, I64ArrayAttr:$offsets,
- I64ArrayAttr:$sizes, I64ArrayAttr:$strides)>,
- Results<(outs AnyVector)> {
- let summary = "strided_slice operation";
- let description = [{
- Takes an n-D vector, k-D `offsets` integer array attribute, a k-sized
- `sizes` integer array attribute, a k-sized `strides` integer array
- attribute and extracts the n-D subvector at the proper offset.
-
- At the moment strides must contain only 1s.
- // TODO(ntv) support non-1 strides.
-
- Returns an n-D vector where the first k-D dimensions match the `sizes`
- attribute. The returned subvector contains the elements starting at offset
- `offsets` and ending at `offsets + sizes`.
-
- Examples:
- ```
- %1 = vector.strided_slice %0
- {offsets = [0, 2], sizes = [2, 4], strides = [1, 1]}:
- vector<4x8x16xf32> to vector<2x4x16xf32>
- ```
-
- // TODO(ntv) Evolve to a range form syntax similar to:
- %1 = vector.strided_slice %0[0:2:1][2:4:1]
- vector<4x8x16xf32> to vector<2x4x16xf32>
- }];
- let builders = [OpBuilder<
- "Builder *builder, OperationState &result, Value source, " #
- "ArrayRef<int64_t> offsets, ArrayRef<int64_t> sizes, " #
- "ArrayRef<int64_t> strides">];
- let extraClassDeclaration = [{
- static StringRef getOffsetsAttrName() { return "offsets"; }
- static StringRef getSizesAttrName() { return "sizes"; }
- static StringRef getStridesAttrName() { return "strides"; }
- VectorType getVectorType(){ return vector().getType().cast<VectorType>(); }
- void getOffsets(SmallVectorImpl<int64_t> &results);
- }];
- let hasCanonicalizer = 1;
- let assemblyFormat = "$vector attr-dict `:` type($vector) `to` type(results)";
-}
-
-def Vector_TransferReadOp :
- Vector_Op<"transfer_read">,
- Arguments<(ins AnyMemRef:$memref, Variadic<Index>:$indices,
- AffineMapAttr:$permutation_map, AnyType:$padding)>,
- Results<(outs AnyVector:$vector)> {
-
- let summary = "Reads a supervector from memory into an SSA vector value.";
-
- let description = [{
- The `vector.transfer_read` op performs a blocking read from a slice within
- a [MemRef](../LangRef.md#memref-type) supplied as its first operand
- into a [vector](../LangRef.md#vector-type) of the same base elemental type.
-
- A memref operand with vector element type, must have its vector element
- type match a suffix (shape and element type) of the vector (e.g.
- memref<3x2x6x4x3xf32>, vector<1x1x4x3xf32>).
-
- The slice is further defined by a full-rank index within the MemRef,
- supplied as the operands `2 .. 1 + rank(memref)`. The permutation_map
- [attribute](../LangRef.md#attributes) is an
- [affine-map](Affine.md#affine-maps) which specifies the transposition on the
- slice to match the vector shape. The size of the slice is specified by the
- size of the vector, given as the return type. An `ssa-value` of the same
- elemental type as the MemRef is provided as the last operand to specify
- padding in the case of out-of-bounds accesses. This operation is called
- 'read' by opposition to 'load' because the super-vector granularity is
- generally not representable with a single hardware register.
- A `vector.transfer_read` is thus a mid-level
- abstraction that supports super-vectorization with non-effecting padding for
- full-tile-only code.
-
- More precisely, let's dive deeper into the permutation_map for the following
- MLIR:
-
- ```mlir
- vector.transfer_read %A[%expr1, %expr2, %expr3, %expr4]
- { permutation_map : (d0,d1,d2,d3) -> (d2,0,d0) } :
- memref<?x?x?x?xf32>, vector<3x4x5xf32>
- ```
-
- This operation always reads a slice starting at `%A[%expr1, %expr2, %expr3,
- %expr4]`. The size of the slice is 3 along d2 and 5 along d0, so the slice
- is: `%A[%expr1 : %expr1 + 5, %expr2, %expr3:%expr3 + 3, %expr4]`
-
- That slice needs to be read into a `vector<3x4x5xf32>`. Since the
- permutation map is not full rank, there must be a broadcast along vector
- dimension `1`.
-
- A notional lowering of vector.transfer_read could generate code resembling:
-
- ```mlir
- // %expr1, %expr2, %expr3, %expr4 defined before this point
- %tmp = alloc() : vector<3x4x5xf32>
- %view_in_tmp = "element_type_cast"(%tmp) : memref<1xvector<3x4x5xf32>>
- for %i = 0 to 3 {
- affine.for %j = 0 to 4 {
- affine.for %k = 0 to 5 {
- %a = load %A[%expr1 + %k, %expr2, %expr3 + %i, %expr4] :
- memref<?x?x?x?xf32>
- store %tmp[%i, %j, %k] : vector<3x4x5xf32>
- }}}
- %c0 = constant 0 : index
- %vec = load %view_in_tmp[%c0] : vector<3x4x5xf32>
- ```
-
- On a GPU one could then map `i`, `j`, `k` to blocks and threads. Notice that
- the temporary storage footprint is `3 * 5` values but `3 * 4 * 5` values are
- actually transferred between `%A` and `%tmp`.
-
- Alternatively, if a notional vector broadcast operation were available, the
- lowered code would resemble:
-
- ```mlir
- // %expr1, %expr2, %expr3, %expr4 defined before this point
- %tmp = alloc() : vector<3x4x5xf32>
- %view_in_tmp = "element_type_cast"(%tmp) : memref<1xvector<3x4x5xf32>>
- for %i = 0 to 3 {
- affine.for %k = 0 to 5 {
- %a = load %A[%expr1 + %k, %expr2, %expr3 + %i, %expr4] :
- memref<?x?x?x?xf32>
- store %tmp[%i, 0, %k] : vector<3x4x5xf32>
- }}
- %c0 = constant 0 : index
- %tmpvec = load %view_in_tmp[%c0] : vector<3x4x5xf32>
- %vec = broadcast %tmpvec, 1 : vector<3x4x5xf32>
- ```
-
- where `broadcast` broadcasts from element 0 to all others along the
- specified dimension. This time, the temporary storage footprint is `3 * 5`
- values which is the same amount of data as the `3 * 5` values transferred.
- An additional `1` broadcast is required. On a GPU this broadcast could be
- implemented using a warp-shuffle if loop `j` were mapped to `threadIdx.x`.
-
- Syntax
- ```
- operation ::= ssa-id `=` `vector.transfer_read` ssa-use-list
- `{` attribute-entry `} :` memref-type `,` vector-type
- ```
-
- Examples:
-
- ```mlir
- // Read the slice `%A[%i0, %i1:%i1+256, %i2:%i2+32]` into vector<32x256xf32>
- // and pad with %f0 to handle the boundary case:
- %f0 = constant 0.0f : f32
- for %i0 = 0 to %0 {
- affine.for %i1 = 0 to %1 step 256 {
- affine.for %i2 = 0 to %2 step 32 {
- %v = vector.transfer_read %A[%i0, %i1, %i2], (%f0)
- {permutation_map: (d0, d1, d2) -> (d2, d1)} :
- memref<?x?x?xf32>, vector<32x256xf32>
- }}}
-
- // Read the slice `%A[%i0, %i1]` (i.e. the element `%A[%i0, %i1]`) into
- // vector<128xf32>. The underlying implementation will require a 1-D vector
- // broadcast:
- for %i0 = 0 to %0 {
- affine.for %i1 = 0 to %1 {
- %3 = vector.transfer_read %A[%i0, %i1]
- {permutation_map: (d0, d1) -> (0)} :
- memref<?x?xf32>, vector<128xf32>
- }
- }
-
- // Read from a memref with vector element type.
- %4 = vector.transfer_read %arg1[%c3, %c3], %vf0
- {permutation_map = (d0, d1)->(d0, d1)}
- : memref<?x?xvector<4x3xf32>>, vector<1x1x4x3xf32>
- ```
- }];
-
- let extraClassDeclaration = [{
- MemRefType getMemRefType() {
- return memref().getType().cast<MemRefType>();
- }
- VectorType getVectorType() {
- return vector().getType().cast<VectorType>();
- }
- }];
-}
-
-def Vector_TransferWriteOp :
- Vector_Op<"transfer_write">,
- Arguments<(ins AnyVector:$vector, AnyMemRef:$memref,
- Variadic<Index>:$indices,
- AffineMapAttr:$permutation_map)> {
-
- let summary = "The vector.transfer_write op writes a supervector to memory.";
-
- let description = [{
- The `vector.transfer_write` performs a blocking write from a
- [vector](../LangRef.md#vector-type), supplied as its first operand, into a
- slice within a [MemRef](../LangRef.md#memref-type) of the same base
- elemental type, supplied as its second operand.
-
- A vector memref operand must have its vector element type match a suffix
- (shape and element type) of the vector (e.g. memref<3x2x6x4x3xf32>,
- vector<1x1x4x3xf32>).
-
- The slice is further defined by a full-rank index within the MemRef,
- supplied as the operands `3 .. 2 + rank(memref)`.
- The permutation_map [attribute](../LangRef.md#attributes) is an
- [affine-map](Affine.md#affine-maps) which specifies the transposition on the
- slice to match the vector shape. The size of the slice is specified by the
- size of the vector. This operation is called 'write' by opposition to
- 'store' because the super-vector granularity is generally not representable
- with a single hardware register. A `vector.transfer_write` is thus a
- mid-level abstraction that supports super-vectorization with non-effecting
- padding for full-tile-only code. It is the responsibility of
- `vector.transfer_write`'s implementation to ensure the memory writes are
- valid. Different lowerings may be pertinent depending on the hardware
- support.
-
- Syntax:
-
- ```
- operation ::= `vector.transfer_write` ssa-use-list `{` attribute-entry `} :
- ` vector-type ', ' memref-type '
- ```
-
- Examples:
-
- ```mlir
- // write vector<16x32x64xf32> into the slice
- // `%A[%i0, %i1:%i1+32, %i2:%i2+64, %i3:%i3+16]`:
- for %i0 = 0 to %0 {
- affine.for %i1 = 0 to %1 step 32 {
- affine.for %i2 = 0 to %2 step 64 {
- affine.for %i3 = 0 to %3 step 16 {
- %val = `ssa-value` : vector<16x32x64xf32>
- vector.transfer_write %val, %A[%i0, %i1, %i2, %i3]
- {permutation_map: (d0, d1, d2, d3) -> (d3, d1, d2)} :
- vector<16x32x64xf32>, memref<?x?x?x?xf32>
- }}}}
-
- // write to a memref with vector element type.
- vector.transfer_write %4, %arg1[%c3, %c3]
- {permutation_map = (d0, d1)->(d0, d1)}
- : vector<1x1x4x3xf32>, memref<?x?xvector<4x3xf32>>
- ```
- }];
-
- let extraClassDeclaration = [{
- VectorType getVectorType() {
- return vector().getType().cast<VectorType>();
- }
- MemRefType getMemRefType() {
- return memref().getType().cast<MemRefType>();
- }
- }];
- let assemblyFormat = [{
- $vector `,` $memref `[` $indices `]` attr-dict `:` type($vector) `,`
- type($memref)
- }];
-}
-
-def Vector_ShapeCastOp :
- Vector_Op<"shape_cast", [NoSideEffect]>,
- Arguments<(ins AnyTypeOf<[AnyVector, TupleOf<[AnyVector]>]>:$source)>,
- Results<(outs AnyTypeOf<[AnyVector, TupleOf<[AnyVector]>]>:$result)> {
- let summary = "shape_cast casts between vector shapes";
- let description = [{
- The shape_cast operation casts between an n-D source vector shape and
- a k-D result vector shape (the element type remains the same).
-
- If reducing rank (n > k), result dimension sizes must be a product
- of contiguous source dimension sizes.
- If expanding rank (n < k), source dimensions must factor into a
- contiguous sequence of destination dimension sizes.
- Each source dim is expanded (or contiguous sequence of source dims combined)
- in source dimension list order (i.e. 0 <= i < n), to produce a contiguous
- sequence of result dims (or a single result dim), in result dimension list
- order (i.e. 0 <= j < k). The product of all source dimension sizes and all
- result dimension sizes must match.
-
- If the source/result types are a tuple of vectors, the casting operation
- described above is applied to each source/result tuple element pair.
-
- It is currently assumed that this operation does not require moving data,
- and that it will be folded away before lowering vector operations.
-
- There is an exception to the folding expectation when targeting
- llvm.intr.matrix operations. We need a type conversion back and forth from a
- 2-D MLIR vector to a 1-D flattened LLVM vector.shape_cast lowering to LLVM
- is supported in that particular case, for now.
-
- Examples:
-
- ```mlir
- // Example casting to a lower vector rank.
- %1 = vector.shape_cast %0 : vector<5x1x4x3xf32> to vector<20x3xf32>
-
- // Example casting to a higher vector rank.
- %3 = vector.shape_cast %2 : vector<10x12x8xf32> to vector<5x2x3x4x8xf32>
-
- // Example casting a tuple of vectors of same rank, where tuple elements
- // may have
diff erent shapes.
- %5 = vector.shape_cast %4 : tuple<vector<3x4x2xf32>, vector<3x3x2xf32>> to
- tuple<vector<12x2xf32>, vector<9x2xf32>>
- ```
- }];
- let extraClassDeclaration = [{
- VectorType getSourceVectorType() {
- return source().getType().cast<VectorType>();
- }
- VectorType getResultVectorType() {
- return getResult().getType().cast<VectorType>();
- }
- }];
- let assemblyFormat = "$source attr-dict `:` type($source) `to` type($result)";
-}
-
-def Vector_TypeCastOp :
- Vector_Op<"type_cast", [NoSideEffect]>,
- Arguments<(ins StaticShapeMemRefOf<[AnyType]>:$memref)>,
- Results<(outs AnyMemRef)> {
- let summary = "type_cast op converts a scalar memref to a vector memref";
- let description = [{
- Performs a conversion from a memref with scalar element to a memref with a
- *single* vector element, copying the shape of the memref to the vector. This
- is the minimal viable operation that is required to makeke
- super-vectorization operational. It can be seen as a special case of the
- `view` operation but scoped in the super-vectorization context.
-
- Syntax:
-
- ```
- operation ::= `vector.type_cast` ssa-use : memref-type to memref-type
- ```
-
- Example:
-
- ```mlir
- %A = alloc() : memref<5x4x3xf32>
- %VA = vector.type_cast %A : memref<5x4x3xf32> to memref<vector<5x4x3xf32>>
- ```
- }];
-
- let builders = [OpBuilder<
- "Builder *builder, OperationState &result, Value source">];
-
- let parser = [{
- return impl::parseCastOp(parser, result);
- }];
-
- let extraClassDeclaration = [{
- MemRefType getMemRefType() {
- return memref().getType().cast<MemRefType>();
- }
- MemRefType getResultMemRefType() {
- return getResult().getType().cast<MemRefType>();
- }
- }];
-}
-
-def Vector_ConstantMaskOp :
- Vector_Op<"constant_mask", [NoSideEffect]>,
- Arguments<(ins I64ArrayAttr:$mask_dim_sizes)>,
- Results<(outs VectorOf<[I1]>)> {
- let summary = "creates a constant vector mask";
- let description = [{
- Creates and returns a vector mask where elements of the result vector
- are set to '0' or '1', based on whether the element indices are contained
- within a hyper-rectangular region specified by the 'mask_dim_sizes'
- array attribute argument. Each element of the 'mask_dim_sizes' array,
- specifies an exclusive upper bound [0, mask-dim-size-element-value)
- for a unique dimension in the vector result. The conjunction of the ranges
- define a hyper-rectangular region within which elements values are set to 1
- (otherwise element values are set to 0).
-
- Example: create a constant vector mask of size 4x3xi1 with elements in range
- 0 <= row <= 2 and 0 <= col <= 1 are set to 1 (others to 0).
-
- %1 = vector.constant_mask [3, 2] : vector<4x3xi1>
-
- print %1
- columns
- 0 1 2
- |------------
- 0 | 1 1 0
- rows 1 | 1 1 0
- 2 | 1 1 0
- 3 | 0 0 0
- }];
-
- let extraClassDeclaration = [{
- static StringRef getMaskDimSizesAttrName() { return "mask_dim_sizes"; }
- }];
- let assemblyFormat = "$mask_dim_sizes attr-dict `:` type(results)";
-}
-
-def Vector_CreateMaskOp :
- Vector_Op<"create_mask", [NoSideEffect]>,
- Arguments<(ins Variadic<Index>:$operands)>, Results<(outs VectorOf<[I1]>)> {
- let summary = "creates a vector mask";
- let description = [{
- Creates and returns a vector mask where elements of the result vector
- are set to '0' or '1', based on whether the element indices are contained
- within a hyper-rectangular region specified by the operands. Specifically,
- each operand specifies a range [0, operand-value) for a unique dimension in
- the vector result. The conjunction of the operand ranges define a
- hyper-rectangular region within which elements values are set to 1
- (otherwise element values are set to 0).
-
- Example: create a vector mask of size 4x3xi1 where elements in range
- 0 <= row <= 2 and 0 <= col <= 1 are set to 1 (others to 0).
-
- %1 = vector.create_mask %c3, %c2 : vector<4x3xi1>
-
- print %1
- columns
- 0 1 2
- |------------
- 0 | 1 1 0
- rows 1 | 1 1 0
- 2 | 1 1 0
- 3 | 0 0 0
- }];
-
- let hasCanonicalizer = 1;
- let assemblyFormat = "$operands attr-dict `:` type(results)";
-}
-
-def Vector_TupleOp :
- Vector_Op<"tuple", [NoSideEffect]>,
- Arguments<(ins Variadic<AnyVector>:$vectors)>,
- Results<(outs TupleOf<[AnyVector]>)> {
- let summary = "make tuple of vectors operation";
- let description = [{
- Returns a tuple of its operands 'vectors'.
-
- Note that this operation is used during the vector op unrolling
- transformation and should be removed before lowering to lower-level
- dialects.
-
-
- Examples:
- ```
- %0 = vector.transfer_read ... : vector<2x2xf32>
- %1 = vector.transfer_read ... : vector<2x1xf32>
- %2 = vector.transfer_read ... : vector<2x2xf32>
- %3 = vector.transfer_read ... : vector<2x1xf32>
-
- %4 = vector.tuple %0, %1, %2, %3
- : vector<2x2xf32>, vector<2x1xf32>, vector<2x2xf32>, vector<2x1xf32>
-
- ```
- }];
-
- let extraClassDeclaration = [{
- TupleType getResultTupleType() {
- return getResult().getType().cast<TupleType>();
- }
- }];
-}
-
-def Vector_TupleGetOp :
- Vector_Op<"tuple_get", [NoSideEffect]>,
- Arguments<(ins TupleOf<[AnyVector]>:$vectors, APIntAttr:$index)>,
- Results<(outs AnyVector)> {
- let summary = "vector tuple get operation";
- let description = [{
- Returns the tuple element of 'vectors' at 'index'.
-
- Note that this operation is used during the vector op unrolling
- transformation and should be removed before lowering to lower-level
- dialects.
-
- Examples:
- ```
- %4 = vector.tuple %0, %1, %2, %3
- : vector<2x2xf32>, vector<2x1xf32>, vector<2x2xf32>, vector<2x1xf32>>
-
- %5 = vector.tuple_get %4, 1
- : tuple<vector<2x2xf32>, vector<2x1xf32>,
- vector<2x2xf32>, vector<2x1xf32>>
- ```
- }];
-
- let extraClassDeclaration = [{
- VectorType getResultVectorType() {
- return getResult().getType().cast<VectorType>();
- }
- int64_t getIndex() {
- return getAttrOfType<IntegerAttr>("index").getValue().getSExtValue();
- }
- static StringRef getIndexAttrName() { return "index"; }
- }];
- let hasFolder = 1;
-}
-
-def Vector_PrintOp :
- Vector_Op<"print", []>, Arguments<(ins AnyType:$source)> {
- let summary = "print operation (for testing and debugging)";
- let description = [{
- Prints the source vector (or scalar) to stdout in human readable
- format (for testing and debugging). No return value.
-
- Examples:
- ```
- %0 = constant 0.0 : f32
- %1 = vector.broadcast %0 : f32 to vector<4xf32>
- vector.print %1 : vector<4xf32>
-
- when lowered to LLVM, the vector print is unrolled into
- elementary printing method calls that at runtime will yield
-
- ( 0.0, 0.0, 0.0, 0.0 )
-
- on stdout when linked with a small runtime support library,
- which only needs to provide a few printing methods (single
- value for all data types, opening/closing bracket, comma,
- newline).
- ```
- }];
- let verifier = ?;
- let extraClassDeclaration = [{
- Type getPrintType() {
- return source().getType();
- }
- }];
- let assemblyFormat = "$source attr-dict `:` type($source)";
-}
-
-//===----------------------------------------------------------------------===//
-// Ops used for supporting progressive lowering and conversion type changes.
-//===----------------------------------------------------------------------===//
-
-/// Vector dialect matrix multiplication op that operates on flattened 1-D
-/// MLIR vectors. This is the counterpart of llvm.matrix.multiply in MLIR.
-/// This may seem redundant with vector.contract but it serves the purposes of
-/// more progressive lowering and localized type conversion on the path:
-/// `vector<...x...xf32> -> vector<...xf32> -> !llvm<... x float>`.
-def Vector_MatmulOp : Vector_Op<"matrix_multiply", [NoSideEffect,
- PredOpTrait<"lhs operand and result have same element type",
- TCresVTEtIsSameAsOpBase<0, 0>>,
- PredOpTrait<"rhs operand and result have same element type",
- TCresVTEtIsSameAsOpBase<0, 1>>]>,
- Arguments<(
- // TODO(ntv, fhahn): tighten vector element types that make sense.
- ins VectorOfRankAndType<[1],
- [AnySignlessInteger, AnySignedInteger, AnyFloat]>:$lhs,
- VectorOfRankAndType<[1],
- [AnySignlessInteger, AnySignedInteger, AnyFloat]>:$rhs,
- I32Attr:$lhs_rows, I32Attr:$lhs_columns, I32Attr:$rhs_columns)>,
- Results<(
- outs VectorOfRankAndType<[1],
- [AnySignlessInteger, AnySignedInteger, AnyFloat]>:$res)>
-{
- let summary = "Vector matrix multiplication op that operates on flattened 1-D"
- " MLIR vectors";
- let description = [{
- This is the counterpart of llvm.matrix.multiply in MLIR. It serves the
- purposes of more progressive lowering and localized type conversion.
-
- The ‘vector.matrix_multiply’ op treats `lhs` as matrix with <lhs_rows> rows
- and <lhs_columns> columns, `rhs` as matrix with <lhs_columns> rows and
- <rhs_columns> and multiplies them. The result matrix is returned embedded in
- the result vector.
-
- Example:
-
- ```
- %C = vector.matrix_multiply %A, %B
- { lhs_rows = 4: i32, lhs_columns = 16: i32 , rhs_columns = 3: i32 } :
- (vector<64xf64>, vector<48xf64>) -> vector<12xf64>
- ```
- }];
- let builders = [
- OpBuilder<"Builder *builder, OperationState &result, Value lhs, Value rhs, "
- "unsigned lhsRows, unsigned lhsColumns, unsigned rhsColumns",
- [{
- result.addOperands({lhs, rhs});
- result.addAttribute("lhs_rows", builder->getI32IntegerAttr(lhsRows));
- result.addAttribute("lhs_columns", builder->getI32IntegerAttr(lhsColumns));
- result.addAttribute("rhs_columns", builder->getI32IntegerAttr(rhsColumns));
- result.addTypes(VectorType::get(lhsRows * lhsColumns,
- lhs.getType().cast<VectorType>().getElementType()));
- }]>,
- ];
- let verifier = ?;
- let assemblyFormat = "$lhs `,` $rhs attr-dict "
- "`:` `(` type($lhs) `,` type($rhs) `)` `->` type($res)";
-}
-
-#endif // VECTOR_OPS
diff --git a/mlir/lib/Dialect/VectorOps/VectorTransforms.cpp b/mlir/lib/Dialect/VectorOps/VectorTransforms.cpp
deleted file mode 100644
index e853c76d0dba..000000000000
--- a/mlir/lib/Dialect/VectorOps/VectorTransforms.cpp
+++ /dev/null
@@ -1,1349 +0,0 @@
-//===- VectorToLoops.cpp - Conversion within the Vector dialect -----------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements target-independent rewrites as 1->N patterns.
-//
-//===----------------------------------------------------------------------===//
-
-#include <type_traits>
-
-#include "mlir/Dialect/Affine/IR/AffineOps.h"
-#include "mlir/Dialect/StandardOps/IR/Ops.h"
-#include "mlir/Dialect/Utils/StructuredOpsUtils.h"
-#include "mlir/Dialect/VectorOps/VectorOps.h"
-#include "mlir/Dialect/VectorOps/VectorTransforms.h"
-#include "mlir/Dialect/VectorOps/VectorUtils.h"
-#include "mlir/IR/AffineExpr.h"
-#include "mlir/IR/AffineMap.h"
-#include "mlir/IR/Attributes.h"
-#include "mlir/IR/Builders.h"
-#include "mlir/IR/Function.h"
-#include "mlir/IR/Location.h"
-#include "mlir/IR/Matchers.h"
-#include "mlir/IR/Module.h"
-#include "mlir/IR/OperationSupport.h"
-#include "mlir/IR/PatternMatch.h"
-#include "mlir/IR/Types.h"
-#include "mlir/Support/Functional.h"
-#include "mlir/Support/STLExtras.h"
-
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-
-#define DEBUG_TYPE "vector-to-vector"
-
-using namespace mlir;
-using llvm::dbgs;
-using mlir::functional::zipMap;
-
-static llvm::cl::OptionCategory clOptionsCategory(DEBUG_TYPE " options");
-
-static llvm::cl::opt<bool> lowerToLLVMMatrixIntrinsics(
- "vector-lower-matrix-intrinsics",
- llvm::cl::desc("Lower vector.contract to llvm.intr.matrix.multiply"),
- llvm::cl::init(false), llvm::cl::cat(clOptionsCategory));
-
-/// Given a shape with sizes greater than 0 along all dimensions,
-/// returns the distance, in number of elements, between a slice in a dimension
-/// and the next slice in the same dimension.
-/// e.g. shape[3, 4, 5] -> linearization_basis[20, 5, 1]
-static SmallVector<int64_t, 8> computeStrides(ArrayRef<int64_t> shape) {
- if (shape.empty())
- return {};
- SmallVector<int64_t, 8> tmp;
- tmp.reserve(shape.size());
- int64_t running = 1;
- for (auto size : llvm::reverse(shape)) {
- assert(size > 0 && "size must be nonnegative");
- tmp.push_back(running);
- running *= size;
- }
- return SmallVector<int64_t, 8>(tmp.rbegin(), tmp.rend());
-}
-
-static int64_t computeMaxLinearIndex(ArrayRef<int64_t> basis) {
- if (basis.empty())
- return 0;
- int64_t res = 1;
- for (auto b : basis)
- res *= b;
- return res;
-}
-
-/// Computes and returns the linearized index of 'offsets' w.r.t. 'basis'.
-static int64_t linearize(ArrayRef<int64_t> offsets, ArrayRef<int64_t> basis) {
- assert(offsets.size() == basis.size());
- int64_t linearIndex = 0;
- for (unsigned idx = 0, e = basis.size(); idx < e; ++idx)
- linearIndex += offsets[idx] * basis[idx];
- return linearIndex;
-}
-
-// Clones `op` into a new operations that takes `operands` and returns
-// `resultTypes`.
-static Operation *cloneOpWithOperandsAndTypes(PatternRewriter &builder,
- Location loc, Operation *op,
- ArrayRef<Value> operands,
- ArrayRef<Type> resultTypes) {
- OperationState res(loc, op->getName().getStringRef(), operands, resultTypes,
- op->getAttrs());
- return builder.createOperation(res);
-}
-
-// Populates 'resultElements[indexMap[i]]' with elements from 'inputElements[i]'
-// for each index 'i' in inputElements with a valid mapping in 'indexMap'.
-static void getMappedElements(const DenseMap<int64_t, int64_t> &indexMap,
- ArrayRef<int64_t> inputElements,
- SmallVectorImpl<int64_t> &resultElements) {
- assert(indexMap.size() == resultElements.size());
- assert(inputElements.size() >= resultElements.size());
- for (unsigned i = 0, e = inputElements.size(); i < e; ++i) {
- auto it = indexMap.find(i);
- if (it != indexMap.end())
- resultElements[it->second] = inputElements[i];
- }
-}
-
-// Returns a tuple type with vector element types for each resulting slice
-// of 'vectorType' unrolled by 'sizes' and 'strides'.
-// TODO(andydavis) Move this to a utility function and share it with
-// Extract/InsertSlicesOp verification.
-static TupleType generateExtractSlicesOpResultType(VectorType vectorType,
- ArrayRef<int64_t> sizes,
- ArrayRef<int64_t> strides,
- PatternRewriter &builder) {
- assert(llvm::all_of(strides, [](int64_t s) { return s == 1; }));
- assert(static_cast<int64_t>(sizes.size()) == vectorType.getRank());
- assert(static_cast<int64_t>(strides.size()) == vectorType.getRank());
-
- // Compute shape ratio of 'shape' and 'sizes'.
- auto shape = vectorType.getShape();
- auto maybeDimSliceCounts = shapeRatio(shape, sizes);
- assert(maybeDimSliceCounts.hasValue());
- auto sliceDimCounts = *maybeDimSliceCounts;
-
- // Compute strides w.r.t number of slices in each dimension.
- auto sliceStrides = computeStrides(sliceDimCounts);
- int64_t sliceCount = computeMaxLinearIndex(sliceDimCounts);
- SmallVector<Type, 4> vectorTypes(sliceCount);
- for (unsigned i = 0; i < sliceCount; ++i) {
- auto vectorOffsets = delinearize(sliceStrides, i);
- auto elementOffsets =
- computeElementOffsetsFromVectorSliceOffsets(sizes, vectorOffsets);
- auto sliceSizes = computeSliceSizes(shape, sizes, elementOffsets);
- // Create Vector type and add to 'vectorTypes[i]'.
- vectorTypes[i] = VectorType::get(sliceSizes, vectorType.getElementType());
- }
- return TupleType::get(vectorTypes, builder.getContext());
-}
-
-// UnrolledVectorState aggregates per-operand/result vector state required for
-// unrolling.
-struct UnrolledVectorState {
- SmallVector<int64_t, 4> unrolledShape;
- SmallVector<int64_t, 4> unrollFactors;
- SmallVector<int64_t, 8> basis;
- int64_t numInstances;
- Value slicesTuple;
-};
-
-// Populates 'state' with unrolled shape, unroll factors, basis and
-// num unrolled instances for 'vectorType'.
-static void initUnrolledVectorState(VectorType vectorType, Value initValue,
- const DenseMap<int64_t, int64_t> &indexMap,
- ArrayRef<int64_t> targetShape,
- UnrolledVectorState &state,
- PatternRewriter &builder) {
- // Compute unrolled shape of 'vectorType'.
- state.unrolledShape.resize(vectorType.getRank());
- getMappedElements(indexMap, targetShape, state.unrolledShape);
- // Compute unroll factors for unrolled shape.
- auto maybeUnrollFactors =
- shapeRatio(vectorType.getShape(), state.unrolledShape);
- assert(maybeUnrollFactors.hasValue());
- state.unrollFactors = *maybeUnrollFactors;
- // Compute 'basis' and 'numInstances' based on 'state.unrollFactors'.
- state.basis = computeStrides(state.unrollFactors);
- state.numInstances = computeMaxLinearIndex(state.unrollFactors);
- state.slicesTuple = nullptr;
- if (initValue != nullptr) {
- // Create ExtractSlicesOp.
- SmallVector<int64_t, 4> sizes(state.unrolledShape);
- SmallVector<int64_t, 4> strides(state.unrollFactors.size(), 1);
- auto tupleType =
- generateExtractSlicesOpResultType(vectorType, sizes, strides, builder);
- state.slicesTuple = builder.create<vector::ExtractSlicesOp>(
- initValue.getLoc(), tupleType, initValue, sizes, strides);
- }
-}
-
-// Computes and returns the linear index of the unrolled vector at
-// 'vectorOffsets' within the vector represented by 'state'.
-static int64_t
-getUnrolledVectorLinearIndex(UnrolledVectorState &state,
- ArrayRef<int64_t> vectorOffsets,
- DenseMap<int64_t, int64_t> &indexMap) {
- // Compute vector offsets.
- SmallVector<int64_t, 4> sliceOffsets(state.unrolledShape.size());
- getMappedElements(indexMap, vectorOffsets, sliceOffsets);
- // Compute and return linear index of 'sliceOffsets' w.r.t 'state.basis'.
- return linearize(sliceOffsets, state.basis);
-}
-
-// Returns an unrolled vector at 'vectorOffsets' within the vector
-// represented by 'state'. The vector is created from a slice of 'initValue'
-// if not present in 'cache'.
-static Value getOrCreateUnrolledVectorSlice(
- Location loc, UnrolledVectorState &state, ArrayRef<int64_t> vectorOffsets,
- ArrayRef<int64_t> offsets, DenseMap<int64_t, int64_t> &indexMap,
- Value initValue, SmallVectorImpl<Value> &cache, PatternRewriter &builder) {
- // Compute slice offsets.
- SmallVector<int64_t, 4> sliceOffsets(state.unrolledShape.size());
- getMappedElements(indexMap, offsets, sliceOffsets);
- // TODO(b/144845578) Support non-1 strides.
- SmallVector<int64_t, 4> sliceStrides(state.unrolledShape.size(), 1);
- // Compute linear index of 'sliceOffsets' w.r.t 'state.basis'.
- int64_t sliceLinearIndex =
- getUnrolledVectorLinearIndex(state, vectorOffsets, indexMap);
- assert(sliceLinearIndex < static_cast<int64_t>(cache.size()));
- auto valueSlice = cache[sliceLinearIndex];
- if (valueSlice == nullptr) {
- // Return tuple element at 'sliceLinearIndex'.
- auto tupleIndex = builder.getI64IntegerAttr(sliceLinearIndex);
- auto initValueType = initValue.getType().cast<VectorType>();
- auto vectorType =
- VectorType::get(state.unrolledShape, initValueType.getElementType());
- // Initialize 'cache' with slice from 'initValue'.
- valueSlice = builder.create<vector::TupleGetOp>(
- loc, vectorType, state.slicesTuple, tupleIndex);
- // Store value back to 'cache'.
- cache[sliceLinearIndex] = valueSlice;
- }
- return valueSlice;
-}
-
-// VectorState aggregates per-operand/result vector state required for
-// creating slices of vector operands, and clones of the operation being
-// unrolled.
-struct VectorState {
- // The type of this vector.
- VectorType type;
- // Map from iteration space index to vector dimension index.
- DenseMap<int64_t, int64_t> indexMap;
- // Index of this value in operation's operand list (-1 if not an operand).
- int64_t operandIndex = -1;
- // Accumulator iterator flag.
- bool isAcc = false;
-};
-
-//
-// unrollSingleResultStructuredOp
-//
-// Returns a value representing the result of structured operation 'op'
-// with iteration bounds 'iterationBounds' unrolled to 'targetShape'.
-// A list of VectorState objects must be specified in 'vectors', where
-// each VectorState in the list represents a vector operand or vector result
-// (if the operation does not have an accumulator operand).
-// The VectorState at index 'resultIndex' in the list must be the state
-// associated with the operations single result (i.e. either its accumulator
-// operand or vector result value).
-//
-// Example:
-//
-// // Before unrolling
-//
-// operand0 operand1 operand2
-// \ | /
-// -------------------- opA --------------------
-//
-// // After unrolling by 2
-//
-// operand0 operand1 operand2
-// / \ / \ / \
-// slice00 slice01 slice10 slice11 slice20 slice21
-// \ | | | / |
-// -------------------- opA0 -------------------- |
-// | | | |
-// \ | | /
-// -------------------- opA1 -------------------
-// | |
-// \ /
-// insertslice
-// |
-
-// TODO(andydavis) Add the following canonicalization/simplifcation patterns:
-// *) Add pattern which matches InsertStridedSlice -> StridedSlice and forwards
-// InsertStridedSlice operand to StridedSlice.
-// *) Add pattern which matches SourceOp -> StridedSlice -> UserOp which checks
-// if there are duplicate identical StridedSlice ops from SourceOp, and
-// rewrites itself to use the first duplicate. This transformation should
-// cause users of identifical StridedSlice ops to reuse the same StridedSlice
-// operation, and leave the duplicate StridedSlice ops with no users
-// (removable with DCE).
-
-// TODO(andydavis) Generalize this to support structured ops beyond
-// vector ContractionOp, and merge it with 'unrollSingleResultOpMatchingType'
-static Value unrollSingleResultStructuredOp(Operation *op,
- ArrayRef<int64_t> iterationBounds,
- std::vector<VectorState> &vectors,
- unsigned resultIndex,
- ArrayRef<int64_t> targetShape,
- PatternRewriter &builder) {
- auto shapedType = op->getResult(0).getType().dyn_cast_or_null<ShapedType>();
- if (!shapedType || !shapedType.hasStaticShape())
- assert(false && "Expected a statically shaped result type");
-
- // Compute unroll factors for 'iterationBounds' based on 'targetShape'
- auto maybeUnrollFactors = shapeRatio(iterationBounds, targetShape);
- if (!maybeUnrollFactors.hasValue())
- assert(false && "Failed to compute unroll factors for target shape");
- auto unrollFactors = *maybeUnrollFactors;
-
- // Compute unrolled vector state for each vector in 'vectors'.
- unsigned numVectors = vectors.size();
- SmallVector<UnrolledVectorState, 3> unrolledVectorState(numVectors);
- for (unsigned i = 0; i < numVectors; ++i) {
- int64_t operandIndex = vectors[i].operandIndex;
- auto operand = operandIndex >= 0 ? op->getOperand(operandIndex) : nullptr;
- initUnrolledVectorState(vectors[i].type, operand, vectors[i].indexMap,
- targetShape, unrolledVectorState[i], builder);
- }
- // Compute number of total unrolled instances.
- auto numUnrolledInstances = computeMaxLinearIndex(unrollFactors);
- auto sliceStrides = computeStrides(unrollFactors);
-
- auto &resultValueState = unrolledVectorState[resultIndex];
- auto unrolledResultType = VectorType::get(resultValueState.unrolledShape,
- shapedType.getElementType());
-
- // Initialize caches for intermediate vector results.
- std::vector<SmallVector<Value, 4>> caches(numVectors);
- for (unsigned i = 0; i < numVectors; ++i)
- caches[i].resize(unrolledVectorState[i].numInstances);
-
- // Unroll 'numUnrolledInstances' of 'op', storing results in 'caches'.
- for (unsigned i = 0; i < numUnrolledInstances; ++i) {
- auto vectorOffsets = delinearize(sliceStrides, i);
- auto elementOffsets =
- computeElementOffsetsFromVectorSliceOffsets(targetShape, vectorOffsets);
- // Get cached slice (or create slice) for each operand at 'offsets'.
- SmallVector<Value, 3> operands;
- operands.resize(op->getNumOperands());
- for (unsigned i = 0; i < numVectors; ++i) {
- int64_t operandIndex = vectors[i].operandIndex;
- if (operandIndex < 0)
- continue; // Output
- auto operand = op->getOperand(operandIndex);
- operands[operandIndex] = getOrCreateUnrolledVectorSlice(
- op->getLoc(), unrolledVectorState[i], vectorOffsets, elementOffsets,
- vectors[i].indexMap, operand, caches[i], builder);
- }
- // Create op on sliced vector arguments.
- auto resultVector =
- cloneOpWithOperandsAndTypes(builder, op->getLoc(), op, operands,
- unrolledResultType)
- ->getResult(0);
-
- // Compute linear result index.
- int64_t linearIndex = getUnrolledVectorLinearIndex(
- resultValueState, vectorOffsets, vectors[resultIndex].indexMap);
- // Update result cache at 'linearIndex'.
- caches[resultIndex][linearIndex] = resultVector;
- }
-
- // Create TupleOp of unrolled result vectors.
- SmallVector<Type, 4> vectorTupleTypes(resultValueState.numInstances);
- SmallVector<Value, 4> vectorTupleValues(resultValueState.numInstances);
- for (unsigned i = 0; i < resultValueState.numInstances; ++i) {
- vectorTupleTypes[i] = caches[resultIndex][i].getType().cast<VectorType>();
- vectorTupleValues[i] = caches[resultIndex][i];
- }
- TupleType tupleType = builder.getTupleType(vectorTupleTypes);
- Value tupleOp = builder.create<vector::TupleOp>(op->getLoc(), tupleType,
- vectorTupleValues);
-
- // Create InsertSlicesOp(Tuple(result_vectors)).
- auto resultVectorType = op->getResult(0).getType().cast<VectorType>();
- SmallVector<int64_t, 4> sizes(resultValueState.unrolledShape);
- SmallVector<int64_t, 4> strides(resultValueState.unrollFactors.size(), 1);
-
- Value insertSlicesOp = builder.create<vector::InsertSlicesOp>(
- op->getLoc(), resultVectorType, tupleOp, builder.getI64ArrayAttr(sizes),
- builder.getI64ArrayAttr(strides));
- return insertSlicesOp;
-}
-
-static void getVectorContractionOpUnrollState(
- vector::ContractionOp contractionOp, ArrayRef<int64_t> targetShape,
- SmallVectorImpl<int64_t> &iterationBounds,
- std::vector<VectorState> &vectors, unsigned &resultIndex) {
- // Get contraction op iteration bounds.
- contractionOp.getIterationBounds(iterationBounds);
- assert(iterationBounds.size() == targetShape.size());
- // Get map from iteration space index to lhs/rhs/result shape index.
- std::vector<DenseMap<int64_t, int64_t>> iterationIndexMapList;
- contractionOp.getIterationIndexMap(iterationIndexMapList);
- unsigned numIterators = iterationIndexMapList.size();
- vectors.resize(numIterators);
- unsigned accOperandIndex = vector::ContractionOp::getAccOperandIndex();
- for (unsigned i = 0; i < numIterators; ++i) {
- vectors[i].type = contractionOp.getOperand(i).getType().cast<VectorType>();
- vectors[i].indexMap = iterationIndexMapList[i];
- vectors[i].operandIndex = i;
- vectors[i].isAcc = i == accOperandIndex ? true : false;
- }
-
- if (llvm::size(contractionOp.masks()) == 2) {
- // Add vectors for lhs/rhs vector mask arguments. Masks have the
- // same vector shape lhs/rhs args, so copy their index maps.
- vectors.push_back({contractionOp.getLHSVectorMaskType(),
- vectors[0].indexMap, accOperandIndex + 1, false});
- vectors.push_back({contractionOp.getRHSVectorMaskType(),
- vectors[1].indexMap, accOperandIndex + 2, false});
- }
- // Unroll 'op' 'iterationBounds' to 'targetShape'.
- // TODO(andydavis) Use linalg style 'args_in'/'args_out' to partition
- // 'vectors' instead of 'resultIndex'.
- resultIndex = accOperandIndex;
-}
-
-static void
-getVectorElementwiseOpUnrollState(Operation *op, ArrayRef<int64_t> targetShape,
- SmallVectorImpl<int64_t> &iterationBounds,
- std::vector<VectorState> &vectors,
- unsigned &resultIndex) {
- // Verify that operation and operands all have the same vector shape.
- auto resultType = op->getResult(0).getType().dyn_cast_or_null<VectorType>();
- assert(resultType && "Expected op with vector result type");
- auto resultShape = resultType.getShape();
- // Verify that all operands have the same vector type as result.
- assert(llvm::all_of(op->getOperandTypes(),
- [=](Type type) { return type == resultType; }));
- // Populate 'iterationBounds' with 'resultShape' for elementwise operations.
- iterationBounds.assign(resultShape.begin(), resultShape.end());
-
- // Create trivial elementwise identity index map based on 'resultShape'.
- DenseMap<int64_t, int64_t> indexMap;
- indexMap.reserve(resultShape.size());
- for (unsigned i = 0; i < resultShape.size(); ++i)
- indexMap[i] = i;
-
- // Create VectorState each operand and single result.
- unsigned numVectors = op->getNumOperands() + op->getNumResults();
- vectors.resize(numVectors);
- for (unsigned i = 0; i < op->getNumOperands(); ++i)
- vectors[i] = {resultType, indexMap, i, false};
- vectors[numVectors - 1] = {resultType, indexMap, -1, false};
- resultIndex = numVectors - 1;
-}
-
-// Entry point for unrolling declarative pattern rewrites.
-SmallVector<Value, 1> mlir::vector::unrollSingleResultOpMatchingType(
- PatternRewriter &builder, Operation *op, ArrayRef<int64_t> targetShape) {
- assert(op->getNumResults() == 1 && "Expected single result operation");
-
- // Populate 'iterationBounds', 'vectors' and 'resultIndex' to unroll 'op'.
- SmallVector<int64_t, 6> iterationBounds;
- std::vector<VectorState> vectors;
- unsigned resultIndex;
-
- if (auto contractionOp = dyn_cast<vector::ContractionOp>(op)) {
- // Populate state for vector ContractionOp.
- getVectorContractionOpUnrollState(contractionOp, targetShape,
- iterationBounds, vectors, resultIndex);
- } else {
- // Populate state for vector elementwise op.
- getVectorElementwiseOpUnrollState(op, targetShape, iterationBounds, vectors,
- resultIndex);
- }
-
- // Unroll 'op' with 'iterationBounds' to 'targetShape'.
- return SmallVector<Value, 1>{unrollSingleResultStructuredOp(
- op, iterationBounds, vectors, resultIndex, targetShape, builder)};
-}
-
-/// Generates slices of 'vectorType' according to 'sizes' and 'strides, and
-/// calls 'fn' with linear index and indices for each slice.
-static void
-generateTransferOpSlices(Type memrefElementType, VectorType vectorType,
- TupleType tupleType, ArrayRef<int64_t> sizes,
- ArrayRef<int64_t> strides, ArrayRef<Value> indices,
- PatternRewriter &rewriter,
- function_ref<void(unsigned, ArrayRef<Value>)> fn) {
- // Compute strides w.r.t. to slice counts in each dimension.
- auto maybeDimSliceCounts = shapeRatio(vectorType.getShape(), sizes);
- assert(maybeDimSliceCounts.hasValue());
- auto sliceDimCounts = *maybeDimSliceCounts;
- auto sliceStrides = computeStrides(sliceDimCounts);
-
- int64_t numSlices = tupleType.size();
- unsigned numSliceIndices = indices.size();
- // Compute 'indexOffset' at which to update 'indices', which is equal
- // to the memref rank (indices.size) minus the effective 'vectorRank'.
- // The effective 'vectorRank', is equal to the rank of the vector type
- // minus the rank of the memref vector element type (if it has one).
- //
- // For example:
- //
- // Given memref type 'memref<6x2x1xvector<2x4xf32>>' and vector
- // transfer_read/write ops which read/write vectors of type
- // 'vector<2x1x2x4xf32>'. The memref rank is 3, and the effective
- // vector rank is 4 - 2 = 2, and so 'indexOffset' = 3 - 2 = 1.
- //
- unsigned vectorRank = vectorType.getRank();
- if (auto memrefVectorElementType = memrefElementType.dyn_cast<VectorType>()) {
- assert(vectorRank >= memrefVectorElementType.getRank());
- vectorRank -= memrefVectorElementType.getRank();
- }
- unsigned indexOffset = numSliceIndices - vectorRank;
-
- auto *ctx = rewriter.getContext();
- for (unsigned i = 0; i < numSlices; ++i) {
- auto vectorOffsets = delinearize(sliceStrides, i);
- auto elementOffsets =
- computeElementOffsetsFromVectorSliceOffsets(sizes, vectorOffsets);
- // Compute 'sliceIndices' by adding 'sliceOffsets[i]' to 'indices[i]'.
- SmallVector<Value, 4> sliceIndices(numSliceIndices);
- for (unsigned j = 0; j < numSliceIndices; ++j) {
- if (j < indexOffset) {
- sliceIndices[j] = indices[j];
- } else {
- auto expr = getAffineDimExpr(0, ctx) +
- getAffineConstantExpr(elementOffsets[j - indexOffset], ctx);
- auto map = AffineMap::get(/*dimCount=*/1, /*symbolCount=*/0, expr);
- sliceIndices[j] = rewriter.create<AffineApplyOp>(
- indices[j].getLoc(), map, ArrayRef<Value>(indices[j]));
- }
- }
- // Call 'fn' to generate slice 'i' at 'sliceIndices'.
- fn(i, sliceIndices);
- }
-}
-
-/// Returns true if 'map' is a suffix of an identity affine map, false
-/// otherwise. Example: affine_map<(d0, d1, d2, d3) -> (d2, d3)>
-static bool isIdentitySuffix(AffineMap map) {
- if (map.getNumDims() < map.getNumResults())
- return false;
- ArrayRef<AffineExpr> results = map.getResults();
- Optional<int> lastPos;
- for (unsigned i = 0, e = map.getNumResults(); i < e; ++i) {
- auto expr = results[i].dyn_cast<AffineDimExpr>();
- if (!expr)
- return false;
- int currPos = static_cast<int>(expr.getPosition());
- if (lastPos.hasValue() && currPos != lastPos.getValue() + 1)
- return false;
- lastPos = currPos;
- }
- return true;
-}
-
-namespace {
-
-// Splits vector TransferReadOp into smaller TransferReadOps based on slicing
-// scheme of its unique ExtractSlicesOp user.
-struct SplitTransferReadOp : public OpRewritePattern<vector::TransferReadOp> {
- using OpRewritePattern<vector::TransferReadOp>::OpRewritePattern;
-
- PatternMatchResult matchAndRewrite(vector::TransferReadOp xferReadOp,
- PatternRewriter &rewriter) const override {
- // TODO(andydavis, ntv) Support splitting TransferReadOp with non-identity
- // permutation maps. Repurpose code from MaterializeVectors transformation.
- if (!isIdentitySuffix(xferReadOp.permutation_map()))
- return matchFailure();
- // Return unless the unique 'xferReadOp' user is an ExtractSlicesOp.
- Value xferReadResult = xferReadOp.getResult();
- auto extractSlicesOp =
- dyn_cast<vector::ExtractSlicesOp>(*xferReadResult.getUsers().begin());
- if (!xferReadResult.hasOneUse() || !extractSlicesOp)
- return matchFailure();
-
- // Get 'sizes' and 'strides' parameters from ExtractSlicesOp user.
- auto sourceVectorType = extractSlicesOp.getSourceVectorType();
- auto resultTupleType = extractSlicesOp.getResultTupleType();
- SmallVector<int64_t, 4> sizes;
- extractSlicesOp.getSizes(sizes);
- SmallVector<int64_t, 4> strides;
- extractSlicesOp.getStrides(strides);
- assert(llvm::all_of(strides, [](int64_t s) { return s == 1; }));
-
- Location loc = xferReadOp.getLoc();
- auto memrefElementType =
- xferReadOp.memref().getType().cast<MemRefType>().getElementType();
- int64_t numSlices = resultTupleType.size();
- SmallVector<Value, 4> vectorTupleValues(numSlices);
- SmallVector<Value, 4> indices(xferReadOp.indices().begin(),
- xferReadOp.indices().end());
- auto createSlice = [&](unsigned index, ArrayRef<Value> sliceIndices) {
- // Get VectorType for slice 'i'.
- auto sliceVectorType = resultTupleType.getType(index);
- // Create split TransferReadOp for 'sliceUser'.
- vectorTupleValues[index] = rewriter.create<vector::TransferReadOp>(
- loc, sliceVectorType, xferReadOp.memref(), sliceIndices,
- xferReadOp.permutation_map(), xferReadOp.padding());
- };
- generateTransferOpSlices(memrefElementType, sourceVectorType,
- resultTupleType, sizes, strides, indices, rewriter,
- createSlice);
-
- // Create tuple of splice xfer read operations.
- Value tupleOp = rewriter.create<vector::TupleOp>(loc, resultTupleType,
- vectorTupleValues);
- // Replace 'xferReadOp' with result 'insertSlicesResult'.
- rewriter.replaceOpWithNewOp<vector::InsertSlicesOp>(
- xferReadOp, sourceVectorType, tupleOp, extractSlicesOp.sizes(),
- extractSlicesOp.strides());
- return matchSuccess();
- }
-};
-
-// Splits vector TransferWriteOp into smaller TransferWriteOps for each source.
-struct SplitTransferWriteOp : public OpRewritePattern<vector::TransferWriteOp> {
- using OpRewritePattern<vector::TransferWriteOp>::OpRewritePattern;
-
- PatternMatchResult matchAndRewrite(vector::TransferWriteOp xferWriteOp,
- PatternRewriter &rewriter) const override {
- // TODO(andydavis, ntv) Support splitting TransferWriteOp with non-identity
- // permutation maps. Repurpose code from MaterializeVectors transformation.
- if (!isIdentitySuffix(xferWriteOp.permutation_map()))
- return matchFailure();
- // Return unless the 'xferWriteOp' 'vector' operand is an 'InsertSlicesOp'.
- auto *vectorDefOp = xferWriteOp.vector().getDefiningOp();
- auto insertSlicesOp = dyn_cast_or_null<vector::InsertSlicesOp>(vectorDefOp);
- if (!insertSlicesOp)
- return matchFailure();
-
- // Get TupleOp operand of 'insertSlicesOp'.
- auto tupleOp = dyn_cast_or_null<vector::TupleOp>(
- insertSlicesOp.vectors().getDefiningOp());
- if (!tupleOp)
- return matchFailure();
-
- // Get 'sizes' and 'strides' parameters from InsertSlicesOp user.
- auto sourceTupleType = insertSlicesOp.getSourceTupleType();
- auto resultVectorType = insertSlicesOp.getResultVectorType();
- SmallVector<int64_t, 4> sizes;
- insertSlicesOp.getSizes(sizes);
- SmallVector<int64_t, 4> strides;
- insertSlicesOp.getStrides(strides);
-
- Location loc = xferWriteOp.getLoc();
- auto memrefElementType =
- xferWriteOp.memref().getType().cast<MemRefType>().getElementType();
- SmallVector<Value, 4> indices(xferWriteOp.indices().begin(),
- xferWriteOp.indices().end());
- auto createSlice = [&](unsigned index, ArrayRef<Value> sliceIndices) {
- // Create split TransferWriteOp for source vector 'tupleOp.operand[i]'.
- rewriter.create<vector::TransferWriteOp>(
- loc, tupleOp.getOperand(index), xferWriteOp.memref(), sliceIndices,
- xferWriteOp.permutation_map());
- };
- generateTransferOpSlices(memrefElementType, resultVectorType,
- sourceTupleType, sizes, strides, indices, rewriter,
- createSlice);
-
- // Erase old 'xferWriteOp'.
- rewriter.eraseOp(xferWriteOp);
- return matchSuccess();
- }
-};
-
-/// Decomposes ShapeCastOp on tuple-of-vectors to multiple ShapeCastOps, each
-/// on vector types.
-struct ShapeCastOpDecomposer : public OpRewritePattern<vector::ShapeCastOp> {
- using OpRewritePattern<vector::ShapeCastOp>::OpRewritePattern;
-
- PatternMatchResult matchAndRewrite(vector::ShapeCastOp shapeCastOp,
- PatternRewriter &rewriter) const override {
- // Check if 'shapeCastOp' has tuple source/result type.
- auto sourceTupleType =
- shapeCastOp.source().getType().dyn_cast_or_null<TupleType>();
- auto resultTupleType =
- shapeCastOp.result().getType().dyn_cast_or_null<TupleType>();
- if (!sourceTupleType || !resultTupleType)
- return matchFailure();
- assert(sourceTupleType.size() == resultTupleType.size());
-
- // Create single-vector ShapeCastOp for each source tuple element.
- Location loc = shapeCastOp.getLoc();
- SmallVector<Value, 8> resultElements;
- resultElements.reserve(resultTupleType.size());
- for (unsigned i = 0, e = sourceTupleType.size(); i < e; ++i) {
- auto sourceElement = rewriter.create<vector::TupleGetOp>(
- loc, sourceTupleType.getType(i), shapeCastOp.source(),
- rewriter.getI64IntegerAttr(i));
- resultElements.push_back(rewriter.create<vector::ShapeCastOp>(
- loc, resultTupleType.getType(i), sourceElement));
- }
-
- // Replace 'shapeCastOp' with tuple of 'resultElements'.
- rewriter.replaceOpWithNewOp<vector::TupleOp>(shapeCastOp, resultTupleType,
- resultElements);
- return matchSuccess();
- }
-};
-
-/// ShapeCastOpFolder folds cancelling ShapeCastOps away.
-//
-// Example:
-//
-// The following MLIR with cancelling ShapeCastOps:
-//
-// %0 = source : vector<5x4x2xf32>
-// %1 = shape_cast %0 : vector<5x4x2xf32> to vector<20x2xf32>
-// %2 = shape_cast %1 : vector<20x2xf32> to vector<5x4x2xf32>
-// %3 = user %2 : vector<5x4x2xf32>
-//
-// Should canonicalize to the following:
-//
-// %0 = source : vector<5x4x2xf32>
-// %1 = user %0 : vector<5x4x2xf32>
-//
-struct ShapeCastOpFolder : public OpRewritePattern<vector::ShapeCastOp> {
- using OpRewritePattern<vector::ShapeCastOp>::OpRewritePattern;
-
- PatternMatchResult matchAndRewrite(vector::ShapeCastOp shapeCastOp,
- PatternRewriter &rewriter) const override {
- // Check if 'shapeCastOp' has vector source/result type.
- auto sourceVectorType =
- shapeCastOp.source().getType().dyn_cast_or_null<VectorType>();
- auto resultVectorType =
- shapeCastOp.result().getType().dyn_cast_or_null<VectorType>();
- if (!sourceVectorType || !resultVectorType)
- return matchFailure();
-
- // Check if shape cast op source operand is also a shape cast op.
- auto sourceShapeCastOp = dyn_cast_or_null<vector::ShapeCastOp>(
- shapeCastOp.source().getDefiningOp());
- if (!sourceShapeCastOp)
- return matchFailure();
- auto operandSourceVectorType =
- sourceShapeCastOp.source().getType().cast<VectorType>();
- auto operandResultVectorType =
- sourceShapeCastOp.result().getType().cast<VectorType>();
-
- // Check if shape cast operations invert each other.
- if (operandSourceVectorType != resultVectorType ||
- operandResultVectorType != sourceVectorType)
- return matchFailure();
-
- rewriter.replaceOp(shapeCastOp, sourceShapeCastOp.source());
- return matchSuccess();
- }
-};
-
-// Patter rewrite which forward tuple elements to their users.
-// User(TupleGetOp(ExtractSlicesOp(InsertSlicesOp(TupleOp(Producer)))))
-// -> User(Producer)
-struct TupleGetFolderOp : public OpRewritePattern<vector::TupleGetOp> {
- using OpRewritePattern<vector::TupleGetOp>::OpRewritePattern;
-
- PatternMatchResult matchAndRewrite(vector::TupleGetOp tupleGetOp,
- PatternRewriter &rewriter) const override {
- // Return if 'tupleGetOp.vectors' arg was not defined by ExtractSlicesOp.
- auto extractSlicesOp = dyn_cast_or_null<vector::ExtractSlicesOp>(
- tupleGetOp.vectors().getDefiningOp());
- if (!extractSlicesOp)
- return matchFailure();
-
- // Return if 'extractSlicesOp.vector' arg was not defined by InsertSlicesOp.
- auto insertSlicesOp = dyn_cast_or_null<vector::InsertSlicesOp>(
- extractSlicesOp.vector().getDefiningOp());
- if (!insertSlicesOp)
- return matchFailure();
-
- // Return if 'insertSlicesOp.vectors' arg was not defined by TupleOp.
- auto tupleOp = dyn_cast_or_null<vector::TupleOp>(
- insertSlicesOp.vectors().getDefiningOp());
- if (!tupleOp)
- return matchFailure();
-
- // Forward Value from 'tupleOp' at 'tupleGetOp.index'.
- Value tupleValue = tupleOp.getOperand(tupleGetOp.getIndex());
- rewriter.replaceOp(tupleGetOp, tupleValue);
- return matchSuccess();
- }
-};
-
-/// Progressive lowering of ExtractSlicesOp to tuple of StridedSliceOp.
-/// One:
-/// %x = vector.extract_slices %0
-/// is replaced by:
-/// %a = vector.strided_slice %0
-/// %b = vector.strided_slice %0
-/// ..
-/// %x = vector.tuple %a, %b, ..
-class ExtractSlicesOpLowering
- : public OpRewritePattern<vector::ExtractSlicesOp> {
-public:
- using OpRewritePattern<vector::ExtractSlicesOp>::OpRewritePattern;
-
- PatternMatchResult matchAndRewrite(vector::ExtractSlicesOp op,
- PatternRewriter &rewriter) const override {
- auto loc = op.getLoc();
-
- VectorType vectorType = op.getSourceVectorType();
- auto shape = vectorType.getShape();
-
- SmallVector<int64_t, 4> sizes;
- op.getSizes(sizes);
- SmallVector<int64_t, 4> strides;
- op.getStrides(strides); // all-ones at the moment
-
- // For each element in the tuple, generate the proper strided slice.
- TupleType tupleType = op.getResultTupleType();
- int64_t tupleSize = tupleType.size();
- SmallVector<Value, 4> tupleValues(tupleSize);
- auto sliceStrides = computeStrides(shape, sizes);
- for (int64_t i = 0; i < tupleSize; ++i) {
- auto vectorOffsets = delinearize(sliceStrides, i);
- auto elementOffsets =
- computeElementOffsetsFromVectorSliceOffsets(sizes, vectorOffsets);
- auto sliceSizes = computeSliceSizes(shape, sizes, elementOffsets);
- // Insert in tuple.
- tupleValues[i] = rewriter.create<vector::StridedSliceOp>(
- loc, op.vector(), elementOffsets, sliceSizes, strides);
- }
-
- rewriter.replaceOpWithNewOp<vector::TupleOp>(op, tupleType, tupleValues);
- return matchSuccess();
- }
-};
-
-/// Progressive lowering of InsertSlicesOp to series of InsertStridedSliceOp.
-/// One:
-/// %x = vector.insert_slices %0
-/// is replaced by:
-/// %r0 = vector.splat 0
-// %t1 = vector.tuple_get %0, 0
-/// %r1 = vector.insert_strided_slice %r0, %t1
-// %t2 = vector.tuple_get %0, 1
-/// %r2 = vector.insert_strided_slice %r1, %t2
-/// ..
-/// %x = ..
-class InsertSlicesOpLowering : public OpRewritePattern<vector::InsertSlicesOp> {
-public:
- using OpRewritePattern<vector::InsertSlicesOp>::OpRewritePattern;
-
- PatternMatchResult matchAndRewrite(vector::InsertSlicesOp op,
- PatternRewriter &rewriter) const override {
- auto loc = op.getLoc();
-
- VectorType vectorType = op.getResultVectorType();
- auto shape = vectorType.getShape();
-
- SmallVector<int64_t, 4> sizes;
- op.getSizes(sizes);
- SmallVector<int64_t, 4> strides;
- op.getStrides(strides); // all-ones at the moment
-
- // Prepare result.
- auto elemType = vectorType.getElementType();
- Value zero = rewriter.create<ConstantOp>(loc, elemType,
- rewriter.getZeroAttr(elemType));
- Value result = rewriter.create<SplatOp>(loc, vectorType, zero);
-
- // For each element in the tuple, extract the proper strided slice.
- TupleType tupleType = op.getSourceTupleType();
- int64_t tupleSize = tupleType.size();
- auto sliceStrides = computeStrides(shape, sizes);
- for (int64_t i = 0; i < tupleSize; ++i) {
- auto vectorOffsets = delinearize(sliceStrides, i);
- auto elementOffsets =
- computeElementOffsetsFromVectorSliceOffsets(sizes, vectorOffsets);
- // Extract from tuple into the result.
- auto index = rewriter.getI64IntegerAttr(i);
- auto tupleGet = rewriter.create<vector::TupleGetOp>(
- loc, tupleType.getType(i), op.getOperand(), index);
- result = rewriter.create<vector::InsertStridedSliceOp>(
- loc, tupleGet, result, elementOffsets, strides);
- }
-
- rewriter.replaceOp(op, result);
- return matchSuccess();
- }
-};
-
-/// Progressive lowering of OuterProductOp.
-/// One:
-/// %x = vector.outerproduct %lhs, %rhs, %acc
-/// is replaced by:
-/// %z = zero-result
-/// %0 = vector.extract %lhs[0]
-/// %1 = vector.broadcast %0
-/// %2 = vector.extract %acc[0]
-/// %3 = vector.fma %1, %arg1, %2
-/// %4 = vector.insert %3, %z[0]
-/// ..
-/// %x = vector.insert %.., %..[N-1]
-///
-class OuterProductOpLowering : public OpRewritePattern<vector::OuterProductOp> {
-public:
- using OpRewritePattern<vector::OuterProductOp>::OpRewritePattern;
-
- PatternMatchResult matchAndRewrite(vector::OuterProductOp op,
- PatternRewriter &rewriter) const override {
- auto loc = op.getLoc();
-
- VectorType rhsType = op.getOperandVectorTypeRHS();
- VectorType resType = op.getVectorType();
- Type eltType = resType.getElementType();
- Value acc = (op.acc().empty()) ? nullptr : op.acc()[0];
-
- Value zero = rewriter.create<ConstantOp>(loc, eltType,
- rewriter.getZeroAttr(eltType));
- Value result = rewriter.create<SplatOp>(loc, resType, zero);
- for (int64_t d = 0, e = resType.getDimSize(0); d < e; ++d) {
- auto pos = rewriter.getI64ArrayAttr(d);
- Value x = rewriter.create<vector::ExtractOp>(loc, eltType, op.lhs(), pos);
- Value b = rewriter.create<vector::BroadcastOp>(loc, rhsType, x);
- Value m;
- if (acc) {
- Value z = rewriter.create<vector::ExtractOp>(loc, rhsType, acc, pos);
- m = rewriter.create<vector::FMAOp>(loc, b, op.rhs(), z);
- } else {
- m = rewriter.create<MulFOp>(loc, b, op.rhs());
- }
- result = rewriter.create<vector::InsertOp>(loc, resType, m, result, pos);
- }
- rewriter.replaceOp(op, result);
- return matchSuccess();
- }
-};
-
-/// Progressive lowering of ContractionOp.
-/// One:
-/// %x = vector.contract with at least one free/batch dimension
-/// is replaced by:
-/// %a = vector.contract with one less free/batch dimension
-/// %b = vector.contract with one less free/batch dimension
-/// ..
-/// %x = combine %a %b ..
-/// until a pure contraction is reached (no free/batch dimensions),
-/// which is replaced by a fma/reduction op.
-///
-/// TODO(ajcbik): break down into transpose/reshape/cast ops
-/// when they become available to avoid code dup
-/// TODO(ajcbik): investigate lowering order impact on performance
-class ContractionOpLowering : public OpRewritePattern<vector::ContractionOp> {
-public:
- using OpRewritePattern<vector::ContractionOp>::OpRewritePattern;
-
- PatternMatchResult matchAndRewrite(vector::ContractionOp op,
- PatternRewriter &rewriter) const override {
- // TODO(ajcbik): implement masks
- if (llvm::size(op.masks()) != 0)
- return matchFailure();
-
- // TODO(ntv, ajcbik): implement benefits, cost models, separate this out in
- // a new pattern.
- // TODO(ntv, fhahn): once row-major mode is available in LLVM's matrix
- // intrinsics, use that.
- if (lowerToLLVMMatrixIntrinsics &&
- isColumnMajorMatmul(op.indexing_maps())) {
- VectorType lhsType = op.getLhsType();
- VectorType rhsType = op.getRhsType();
- Type flattenedLHSType =
- VectorType::get(lhsType.getNumElements(), lhsType.getElementType());
- Type flattenedRHSType =
- VectorType::get(rhsType.getNumElements(), rhsType.getElementType());
- auto lhs = rewriter.create<vector::ShapeCastOp>(
- op.getLoc(), flattenedLHSType, op.lhs());
- auto rhs = rewriter.create<vector::ShapeCastOp>(
- op.getLoc(), flattenedRHSType, op.rhs());
-
- unsigned lhsRows = op.getLhsType().getShape()[0];
- unsigned lhsColumns = op.getLhsType().getShape()[1];
- unsigned rhsColumns = op.getRhsType().getShape()[1];
- Value mul = rewriter.create<vector::MatmulOp>(
- op.getLoc(), lhs, rhs, lhsRows, lhsColumns, rhsColumns);
- mul = rewriter.create<vector::ShapeCastOp>(op.getLoc(),
- op.acc().getType(), mul);
- Type elementType = op.getLhsType().getElementType();
- assert(elementType.isIntOrFloat());
- if (elementType.isa<IntegerType>())
- rewriter.replaceOpWithNewOp<AddIOp>(op, op.acc(), mul);
- else
- rewriter.replaceOpWithNewOp<AddFOp>(op, op.acc(), mul);
- return matchSuccess();
- }
-
- // Find first batch dimension in LHS/RHS, and lower when found.
- std::vector<std::pair<int64_t, int64_t>> batchDimMap = op.getBatchDimMap();
- if (!batchDimMap.empty()) {
- int64_t lhsIndex = batchDimMap[0].first;
- int64_t rhsIndex = batchDimMap[0].second;
- rewriter.replaceOp(op, lowerParallel(op, lhsIndex, rhsIndex, rewriter));
- return matchSuccess();
- }
-
- // Collect contracting dimensions.
- std::vector<std::pair<int64_t, int64_t>> contractingDimMap =
- op.getContractingDimMap();
- DenseSet<int64_t> lhsContractingDimSet;
- DenseSet<int64_t> rhsContractingDimSet;
- for (auto &dimPair : contractingDimMap) {
- lhsContractingDimSet.insert(dimPair.first);
- rhsContractingDimSet.insert(dimPair.second);
- }
-
- // Find first free dimension in LHS, and lower when found.
- VectorType lhsType = op.getLhsType();
- for (int64_t lhsIndex = 0, e = lhsType.getRank(); lhsIndex < e;
- ++lhsIndex) {
- if (lhsContractingDimSet.count(lhsIndex) == 0) {
- rewriter.replaceOp(
- op, lowerParallel(op, lhsIndex, /*rhsIndex=*/-1, rewriter));
- return matchSuccess();
- }
- }
-
- // Find first free dimension in RHS, and lower when found.
- VectorType rhsType = op.getRhsType();
- for (int64_t rhsIndex = 0, e = rhsType.getRank(); rhsIndex < e;
- ++rhsIndex) {
- if (rhsContractingDimSet.count(rhsIndex) == 0) {
- rewriter.replaceOp(
- op, lowerParallel(op, /*lhsIndex=*/-1, rhsIndex, rewriter));
- return matchSuccess();
- }
- }
-
- // Lower the first remaining reduction dimension.
- if (!contractingDimMap.empty()) {
- rewriter.replaceOp(op, lowerReduction(op, rewriter));
- return matchSuccess();
- }
-
- return matchFailure();
- }
-
-private:
- // Lower one parallel dimension.
- // TODO(ajcbik): consider reusing existing contract unrolling
- Value lowerParallel(vector::ContractionOp op, int64_t lhsIndex,
- int64_t rhsIndex, PatternRewriter &rewriter) const {
- VectorType lhsType = op.getLhsType();
- VectorType rhsType = op.getRhsType();
- VectorType resType = op.getResultType().cast<VectorType>();
- // Find the iterator type index and result index.
- SmallVector<AffineMap, 4> iMap = op.getIndexingMaps();
- int64_t iterIndex = -1;
- int64_t dimSize = -1;
- if (lhsIndex >= 0) {
- iterIndex =
- iMap[0].getResult(lhsIndex).cast<AffineDimExpr>().getPosition();
- assert((rhsIndex < 0 || iterIndex == iMap[1]
- .getResult(rhsIndex)
- .cast<AffineDimExpr>()
- .getPosition()) &&
- "parallel index should be free in LHS or batch in LHS/RHS");
- dimSize = lhsType.getDimSize(lhsIndex);
- } else {
- assert(rhsIndex >= 0 && "missing parallel index");
- iterIndex =
- iMap[1].getResult(rhsIndex).cast<AffineDimExpr>().getPosition();
- dimSize = rhsType.getDimSize(rhsIndex);
- }
- assert(iterIndex >= 0 && "parallel index not listed in operand mapping");
- Optional<int64_t> lookup = getResultIndex(iMap[2], iterIndex);
- assert(lookup.hasValue() && "parallel index not listed in reduction");
- int64_t resIndex = lookup.getValue();
- // Construct new iterator types and affine map array attribute.
- SmallVector<AffineMap, 4> lowIndexingMaps;
- lowIndexingMaps.push_back(adjustMap(iMap[0], iterIndex, rewriter));
- lowIndexingMaps.push_back(adjustMap(iMap[1], iterIndex, rewriter));
- lowIndexingMaps.push_back(adjustMap(iMap[2], iterIndex, rewriter));
- auto lowAffine = rewriter.getAffineMapArrayAttr(lowIndexingMaps);
- auto lowIter =
- rewriter.getArrayAttr(adjustIter(op.iterator_types(), iterIndex));
- // Unroll into a series of lower dimensional vector.contract ops.
- Location loc = op.getLoc();
- Value result = zeroVector(loc, resType, rewriter);
- for (int64_t d = 0; d < dimSize; ++d) {
- auto lhs = reshapeLoad(loc, op.lhs(), lhsType, lhsIndex, d, rewriter);
- auto rhs = reshapeLoad(loc, op.rhs(), rhsType, rhsIndex, d, rewriter);
- auto acc = reshapeLoad(loc, op.acc(), resType, resIndex, d, rewriter);
- Value lowContract = rewriter.create<vector::ContractionOp>(
- loc, lhs, rhs, acc, lowAffine, lowIter);
- result = reshapeStore(loc, lowContract, result, resType, resIndex, d,
- rewriter);
- }
- return result;
- }
-
- // Lower one reduction dimension.
- Value lowerReduction(vector::ContractionOp op,
- PatternRewriter &rewriter) const {
- auto loc = op.getLoc();
- VectorType lhsType = op.getLhsType();
- VectorType rhsType = op.getRhsType();
- Type resType = op.getResultType();
- assert(!resType.isa<VectorType>());
- // Use iterator index 0.
- int64_t iterIndex = 0;
- SmallVector<AffineMap, 4> iMap = op.getIndexingMaps();
- Optional<int64_t> lookupLhs = getResultIndex(iMap[0], iterIndex);
- Optional<int64_t> lookupRhs = getResultIndex(iMap[1], iterIndex);
- assert(lookupLhs.hasValue() && "missing LHS parallel index");
- assert(lookupRhs.hasValue() && "missing RHS parallel index");
- int64_t lhsIndex = lookupLhs.getValue();
- int64_t rhsIndex = lookupRhs.getValue();
- int64_t dimSize = lhsType.getDimSize(lhsIndex);
- assert(dimSize == rhsType.getDimSize(rhsIndex) && "corrupt shape");
- // Base case.
- if (lhsType.getRank() == 1) {
- assert(rhsType.getRank() == 1 && "corrupt contraction");
- Value zero = zeroVector(loc, lhsType, rewriter);
- Value fma = rewriter.create<vector::FMAOp>(loc, op.lhs(), op.rhs(), zero);
- StringAttr kind = rewriter.getStringAttr("add");
- return rewriter.create<vector::ReductionOp>(loc, resType, kind, fma,
- op.acc());
- }
- // Construct new iterator types and affine map array attribute.
- SmallVector<AffineMap, 4> lowIndexingMaps;
- lowIndexingMaps.push_back(adjustMap(iMap[0], iterIndex, rewriter));
- lowIndexingMaps.push_back(adjustMap(iMap[1], iterIndex, rewriter));
- lowIndexingMaps.push_back(adjustMap(iMap[2], iterIndex, rewriter));
- auto lowAffine = rewriter.getAffineMapArrayAttr(lowIndexingMaps);
- auto lowIter =
- rewriter.getArrayAttr(adjustIter(op.iterator_types(), iterIndex));
- // Unroll into a series of lower dimensional vector.contract ops.
- // By feeding the initial accumulator into the first contraction,
- // and the result of each contraction into the next, eventually
- // the sum of all reductions is computed.
- Value result = op.acc();
- for (int64_t d = 0; d < dimSize; ++d) {
- auto lhs = reshapeLoad(loc, op.lhs(), lhsType, lhsIndex, d, rewriter);
- auto rhs = reshapeLoad(loc, op.rhs(), rhsType, rhsIndex, d, rewriter);
- result = rewriter.create<vector::ContractionOp>(loc, lhs, rhs, result,
- lowAffine, lowIter);
- }
- return result;
- }
-
- // Helper method to construct a zero vector.
- static Value zeroVector(Location loc, VectorType vType,
- PatternRewriter &rewriter) {
- Type eltType = vType.getElementType();
- Value zero = rewriter.create<ConstantOp>(loc, eltType,
- rewriter.getZeroAttr(eltType));
- return rewriter.create<SplatOp>(loc, vType, zero);
- }
-
- // Helper to find an index in an affine map.
- static Optional<int64_t> getResultIndex(AffineMap map, int64_t index) {
- for (int64_t i = 0, e = map.getNumResults(); i < e; ++i) {
- int64_t idx = map.getResult(i).cast<AffineDimExpr>().getPosition();
- if (idx == index)
- return i;
- }
- return None;
- }
-
- // Helper to construct iterator types with one index removed.
- static SmallVector<Attribute, 4> adjustIter(ArrayAttr iteratorTypes,
- int64_t index) {
- SmallVector<Attribute, 4> results;
- for (auto it : llvm::enumerate(iteratorTypes)) {
- int64_t idx = it.index();
- if (idx == index)
- continue;
- results.push_back(it.value());
- }
- return results;
- }
-
- // Helper to construct an affine map with one index removed.
- static AffineMap adjustMap(AffineMap map, int64_t index,
- PatternRewriter &rewriter) {
- auto *ctx = rewriter.getContext();
- SmallVector<AffineExpr, 4> results;
- for (int64_t i = 0, e = map.getNumResults(); i < e; ++i) {
- int64_t idx = map.getResult(i).cast<AffineDimExpr>().getPosition();
- if (idx == index)
- continue;
- // Re-insert remaining indices, but renamed when occurring
- // after the removed index.
- auto targetExpr = getAffineDimExpr(idx < index ? idx : idx - 1, ctx);
- results.push_back(targetExpr);
- }
- // The (...) -> () affine map has its own factory method.
- return results.empty() ? AffineMap::get(map.getNumDims() - 1, 0, ctx)
- : AffineMap::get(map.getNumDims() - 1, 0, results);
- }
-
- // Helper to drop dimension from vector type.
- static Type adjustType(VectorType tp, int64_t index) {
- int64_t rank = tp.getRank();
- Type eltType = tp.getElementType();
- if (rank == 1) {
- assert(index == 0 && "index for scalar result out of bounds");
- return eltType;
- }
- SmallVector<int64_t, 4> adjustedShape;
- for (int64_t i = 0; i < rank; ++i) {
- // Omit dimension at the given index.
- if (i == index)
- continue;
- // Otherwise, add dimension back.
- adjustedShape.push_back(tp.getDimSize(i));
- }
- return VectorType::get(adjustedShape, eltType);
- }
-
- // Helper method to possibly drop a dimension in a load.
- // TODO(ajcbik): use a reshaping vector load (and share lowering code)
- static Value reshapeLoad(Location loc, Value val, VectorType type,
- int64_t index, int64_t pos,
- PatternRewriter &rewriter) {
- if (index == -1)
- return val;
- Type lowType = adjustType(type, 0);
- // At extraction dimension?
- if (index == 0) {
- auto posAttr = rewriter.getI64ArrayAttr(pos);
- return rewriter.create<vector::ExtractOp>(loc, lowType, val, posAttr);
- }
- // Unroll leading dimensions.
- VectorType vType = lowType.cast<VectorType>();
- VectorType resType = adjustType(type, index).cast<VectorType>();
- Value result = zeroVector(loc, resType, rewriter);
- for (int64_t d = 0, e = resType.getDimSize(0); d < e; d++) {
- auto posAttr = rewriter.getI64ArrayAttr(d);
- Value ext = rewriter.create<vector::ExtractOp>(loc, vType, val, posAttr);
- Value load = reshapeLoad(loc, ext, vType, index - 1, pos, rewriter);
- result = rewriter.create<vector::InsertOp>(loc, resType, load, result,
- posAttr);
- }
- return result;
- }
-
- // Helper method to possibly drop a dimension in a store.
- // TODO(ajcbik): use a reshaping vector store (and share lowering code)
- static Value reshapeStore(Location loc, Value val, Value result,
- VectorType type, int64_t index, int64_t pos,
- PatternRewriter &rewriter) {
- // Unmodified?
- if (index == -1)
- return val;
- // At insertion dimension?
- if (index == 0) {
- auto posAttr = rewriter.getI64ArrayAttr(pos);
- return rewriter.create<vector::InsertOp>(loc, type, val, result, posAttr);
- }
- // Unroll leading dimensions.
- Type lowType = adjustType(type, 0);
- VectorType vType = lowType.cast<VectorType>();
- Type insType = adjustType(vType, 0);
- for (int64_t d = 0, e = type.getDimSize(0); d < e; d++) {
- auto posAttr = rewriter.getI64ArrayAttr(d);
- Value ext =
- rewriter.create<vector::ExtractOp>(loc, vType, result, posAttr);
- Value ins =
- rewriter.create<vector::ExtractOp>(loc, insType, val, posAttr);
- Value sto = reshapeStore(loc, ins, ext, vType, index - 1, pos, rewriter);
- result =
- rewriter.create<vector::InsertOp>(loc, type, sto, result, posAttr);
- }
- return result;
- }
-};
-
-/// ShapeOp 2D -> 1D downcast serves the purpose of flattening 2-D to 1-D
-/// vectors progressively on the way to target llvm.matrix intrinsics.
-/// This iterates over the most major dimension of the 2-D vector and performs
-/// rewrites into:
-/// vector.extract from 2-D + vector.insert_strided_slice offset into 1-D
-class ShapeCastOp2DDownCastRewritePattern
- : public OpRewritePattern<vector::ShapeCastOp> {
-public:
- using OpRewritePattern<vector::ShapeCastOp>::OpRewritePattern;
-
- PatternMatchResult matchAndRewrite(vector::ShapeCastOp op,
- PatternRewriter &rewriter) const override {
- auto sourceVectorType = op.getSourceVectorType();
- auto resultVectorType = op.getResultVectorType();
- if (sourceVectorType.getRank() != 2 || resultVectorType.getRank() != 1)
- return matchFailure();
-
- auto loc = op.getLoc();
- auto elemType = sourceVectorType.getElementType();
- Value zero = rewriter.create<ConstantOp>(loc, elemType,
- rewriter.getZeroAttr(elemType));
- Value desc = rewriter.create<SplatOp>(loc, resultVectorType, zero);
- unsigned mostMinorVectorSize = sourceVectorType.getShape()[1];
- for (int64_t i = 0, e = sourceVectorType.getShape().front(); i != e; ++i) {
- Value vec = rewriter.create<vector::ExtractOp>(loc, op.source(), i);
- desc = rewriter.create<vector::InsertStridedSliceOp>(
- loc, vec, desc,
- /*offsets=*/i * mostMinorVectorSize, /*strides=*/1);
- }
- rewriter.replaceOp(op, desc);
- return matchSuccess();
- }
-};
-
-/// ShapeOp 1D -> 2D upcast serves the purpose of unflattening 2-D from 1-D
-/// vectors progressively on the way from targeting llvm.matrix intrinsics.
-/// This iterates over the most major dimension of the 2-D vector and performs
-/// rewrites into:
-/// vector.strided_slice from 1-D + vector.insert into 2-D
-class ShapeCastOp2DUpCastRewritePattern
- : public OpRewritePattern<vector::ShapeCastOp> {
-public:
- using OpRewritePattern<vector::ShapeCastOp>::OpRewritePattern;
-
- PatternMatchResult matchAndRewrite(vector::ShapeCastOp op,
- PatternRewriter &rewriter) const override {
- auto sourceVectorType = op.getSourceVectorType();
- auto resultVectorType = op.getResultVectorType();
- if (sourceVectorType.getRank() != 1 || resultVectorType.getRank() != 2)
- return matchFailure();
-
- auto loc = op.getLoc();
- auto elemType = sourceVectorType.getElementType();
- Value zero = rewriter.create<ConstantOp>(loc, elemType,
- rewriter.getZeroAttr(elemType));
- Value desc = rewriter.create<SplatOp>(loc, resultVectorType, zero);
- unsigned mostMinorVectorSize = resultVectorType.getShape()[1];
- for (int64_t i = 0, e = resultVectorType.getShape().front(); i != e; ++i) {
- Value vec = rewriter.create<vector::StridedSliceOp>(
- loc, op.source(), /*offsets=*/i * mostMinorVectorSize,
- /*sizes=*/mostMinorVectorSize,
- /*strides=*/1);
- desc = rewriter.create<vector::InsertOp>(loc, vec, desc, i);
- }
- rewriter.replaceOp(op, desc);
- return matchSuccess();
- }
-};
-
-} // namespace
-
-// TODO(andydavis) Add pattern to rewrite ExtractSlices(ConstantMaskOp).
-// TODO(andydavis) Add this as DRR pattern.
-void mlir::vector::populateVectorToVectorTransformationPatterns(
- OwningRewritePatternList &patterns, MLIRContext *context) {
- patterns.insert<ShapeCastOpDecomposer, ShapeCastOpFolder, SplitTransferReadOp,
- SplitTransferWriteOp, TupleGetFolderOp>(context);
-}
-
-void mlir::vector::populateVectorSlicesLoweringPatterns(
- OwningRewritePatternList &patterns, MLIRContext *context) {
- patterns.insert<ExtractSlicesOpLowering, InsertSlicesOpLowering>(context);
-}
-
-void mlir::vector::populateVectorContractLoweringPatterns(
- OwningRewritePatternList &patterns, MLIRContext *context) {
- patterns.insert<ContractionOpLowering, ShapeCastOp2DDownCastRewritePattern,
- ShapeCastOp2DUpCastRewritePattern, OuterProductOpLowering>(
- context);
-}
diff --git a/mlir/lib/Dialect/VectorOps/VectorUtils.cpp b/mlir/lib/Dialect/VectorOps/VectorUtils.cpp
deleted file mode 100644
index 46a990080a4f..000000000000
--- a/mlir/lib/Dialect/VectorOps/VectorUtils.cpp
+++ /dev/null
@@ -1,278 +0,0 @@
-//===- VectorUtils.cpp - MLIR Utilities for VectorOps ------------------===//
-//
-// Part of the MLIR Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements utility methods for working with the VectorOps dialect.
-//
-//===----------------------------------------------------------------------===//
-
-#include "mlir/Dialect/VectorOps/VectorUtils.h"
-#include "mlir/Analysis/LoopAnalysis.h"
-#include "mlir/Dialect/Affine/IR/AffineOps.h"
-#include "mlir/Dialect/StandardOps/IR/Ops.h"
-#include "mlir/Dialect/VectorOps/VectorOps.h"
-#include "mlir/IR/Builders.h"
-#include "mlir/IR/IntegerSet.h"
-#include "mlir/IR/Operation.h"
-#include "mlir/Support/Functional.h"
-#include "mlir/Support/LLVM.h"
-#include "mlir/Support/MathExtras.h"
-#include "mlir/Support/STLExtras.h"
-
-#include "llvm/ADT/DenseSet.h"
-#include "llvm/ADT/SetVector.h"
-
-using llvm::SetVector;
-
-namespace mlir {
-
-SmallVector<int64_t, 4> computeStrides(ArrayRef<int64_t> shape,
- ArrayRef<int64_t> sizes) {
- int64_t rank = shape.size();
- // Compute the count for each dimension.
- SmallVector<int64_t, 4> sliceDimCounts(rank);
- for (int64_t r = 0; r < rank; ++r)
- sliceDimCounts[r] = ceilDiv(shape[r], sizes[r]);
- // Use that to compute the slice stride for each dimension.
- SmallVector<int64_t, 4> sliceStrides(rank);
- sliceStrides[rank - 1] = 1;
- for (int64_t r = rank - 2; r >= 0; --r)
- sliceStrides[r] = sliceStrides[r + 1] * sliceDimCounts[r + 1];
- return sliceStrides;
-}
-
-SmallVector<int64_t, 4> delinearize(ArrayRef<int64_t> sliceStrides,
- int64_t index) {
- int64_t rank = sliceStrides.size();
- SmallVector<int64_t, 4> vectorOffsets(rank);
- for (int64_t r = 0; r < rank; ++r) {
- assert(sliceStrides[r] > 0);
- vectorOffsets[r] = index / sliceStrides[r];
- index %= sliceStrides[r];
- }
- return vectorOffsets;
-}
-
-SmallVector<int64_t, 4>
-computeElementOffsetsFromVectorSliceOffsets(ArrayRef<int64_t> sizes,
- ArrayRef<int64_t> vectorOffsets) {
- return functional::zipMap([](int64_t v1, int64_t v2) { return v1 * v2; },
- vectorOffsets, sizes);
-}
-
-SmallVector<int64_t, 4> computeSliceSizes(ArrayRef<int64_t> shape,
- ArrayRef<int64_t> sizes,
- ArrayRef<int64_t> elementOffsets) {
- int64_t rank = shape.size();
- SmallVector<int64_t, 4> sliceSizes(rank);
- for (unsigned r = 0; r < rank; ++r)
- sliceSizes[r] = std::min(sizes[r], shape[r] - elementOffsets[r]);
- return sliceSizes;
-}
-
-Optional<SmallVector<int64_t, 4>> shapeRatio(ArrayRef<int64_t> superShape,
- ArrayRef<int64_t> subShape) {
- if (superShape.size() < subShape.size()) {
- return Optional<SmallVector<int64_t, 4>>();
- }
-
- // Starting from the end, compute the integer divisors.
- // Set the boolean `divides` if integral division is not possible.
- std::vector<int64_t> result;
- result.reserve(superShape.size());
- bool divides = true;
- auto divide = [÷s, &result](int superSize, int subSize) {
- assert(superSize > 0 && "superSize must be > 0");
- assert(subSize > 0 && "subSize must be > 0");
- divides &= (superSize % subSize == 0);
- result.push_back(superSize / subSize);
- };
- functional::zipApply(
- divide, SmallVector<int64_t, 8>{superShape.rbegin(), superShape.rend()},
- SmallVector<int64_t, 8>{subShape.rbegin(), subShape.rend()});
-
- // If integral division does not occur, return and let the caller decide.
- if (!divides) {
- return None;
- }
-
- // At this point we computed the ratio (in reverse) for the common
- // size. Fill with the remaining entries from the super-vector shape (still in
- // reverse).
- int commonSize = subShape.size();
- std::copy(superShape.rbegin() + commonSize, superShape.rend(),
- std::back_inserter(result));
-
- assert(result.size() == superShape.size() &&
- "super to sub shape ratio is not of the same size as the super rank");
-
- // Reverse again to get it back in the proper order and return.
- return SmallVector<int64_t, 4>{result.rbegin(), result.rend()};
-}
-
-Optional<SmallVector<int64_t, 4>> shapeRatio(VectorType superVectorType,
- VectorType subVectorType) {
- assert(superVectorType.getElementType() == subVectorType.getElementType() &&
- "vector types must be of the same elemental type");
- return shapeRatio(superVectorType.getShape(), subVectorType.getShape());
-}
-
-/// Constructs a permutation map from memref indices to vector dimension.
-///
-/// The implementation uses the knowledge of the mapping of enclosing loop to
-/// vector dimension. `enclosingLoopToVectorDim` carries this information as a
-/// map with:
-/// - keys representing "vectorized enclosing loops";
-/// - values representing the corresponding vector dimension.
-/// The algorithm traverses "vectorized enclosing loops" and extracts the
-/// at-most-one MemRef index that is invariant along said loop. This index is
-/// guaranteed to be at most one by construction: otherwise the MemRef is not
-/// vectorizable.
-/// If this invariant index is found, it is added to the permutation_map at the
-/// proper vector dimension.
-/// If no index is found to be invariant, 0 is added to the permutation_map and
-/// corresponds to a vector broadcast along that dimension.
-///
-/// Returns an empty AffineMap if `enclosingLoopToVectorDim` is empty,
-/// signalling that no permutation map can be constructed given
-/// `enclosingLoopToVectorDim`.
-///
-/// Examples can be found in the documentation of `makePermutationMap`, in the
-/// header file.
-static AffineMap makePermutationMap(
- ArrayRef<Value> indices,
- const DenseMap<Operation *, unsigned> &enclosingLoopToVectorDim) {
- if (enclosingLoopToVectorDim.empty())
- return AffineMap();
- MLIRContext *context =
- enclosingLoopToVectorDim.begin()->getFirst()->getContext();
- using functional::makePtrDynCaster;
- using functional::map;
- SmallVector<AffineExpr, 4> perm(enclosingLoopToVectorDim.size(),
- getAffineConstantExpr(0, context));
-
- for (auto kvp : enclosingLoopToVectorDim) {
- assert(kvp.second < perm.size());
- auto invariants = getInvariantAccesses(
- cast<AffineForOp>(kvp.first).getInductionVar(), indices);
- unsigned numIndices = indices.size();
- unsigned countInvariantIndices = 0;
- for (unsigned dim = 0; dim < numIndices; ++dim) {
- if (!invariants.count(indices[dim])) {
- assert(perm[kvp.second] == getAffineConstantExpr(0, context) &&
- "permutationMap already has an entry along dim");
- perm[kvp.second] = getAffineDimExpr(dim, context);
- } else {
- ++countInvariantIndices;
- }
- }
- assert((countInvariantIndices == numIndices ||
- countInvariantIndices == numIndices - 1) &&
- "Vectorization prerequisite violated: at most 1 index may be "
- "invariant wrt a vectorized loop");
- }
- return AffineMap::get(indices.size(), 0, perm);
-}
-
-/// Implementation detail that walks up the parents and records the ones with
-/// the specified type.
-/// TODO(ntv): could also be implemented as a collect parents followed by a
-/// filter and made available outside this file.
-template <typename T>
-static SetVector<Operation *> getParentsOfType(Operation *op) {
- SetVector<Operation *> res;
- auto *current = op;
- while (auto *parent = current->getParentOp()) {
- if (auto typedParent = dyn_cast<T>(parent)) {
- assert(res.count(parent) == 0 && "Already inserted");
- res.insert(parent);
- }
- current = parent;
- }
- return res;
-}
-
-/// Returns the enclosing AffineForOp, from closest to farthest.
-static SetVector<Operation *> getEnclosingforOps(Operation *op) {
- return getParentsOfType<AffineForOp>(op);
-}
-
-AffineMap
-makePermutationMap(Operation *op, ArrayRef<Value> indices,
- const DenseMap<Operation *, unsigned> &loopToVectorDim) {
- DenseMap<Operation *, unsigned> enclosingLoopToVectorDim;
- auto enclosingLoops = getEnclosingforOps(op);
- for (auto *forInst : enclosingLoops) {
- auto it = loopToVectorDim.find(forInst);
- if (it != loopToVectorDim.end()) {
- enclosingLoopToVectorDim.insert(*it);
- }
- }
- return makePermutationMap(indices, enclosingLoopToVectorDim);
-}
-
-bool matcher::operatesOnSuperVectorsOf(Operation &op,
- VectorType subVectorType) {
- // First, extract the vector type and distinguish between:
- // a. ops that *must* lower a super-vector (i.e. vector.transfer_read,
- // vector.transfer_write); and
- // b. ops that *may* lower a super-vector (all other ops).
- // The ops that *may* lower a super-vector only do so if the super-vector to
- // sub-vector ratio exists. The ops that *must* lower a super-vector are
- // explicitly checked for this property.
- /// TODO(ntv): there should be a single function for all ops to do this so we
- /// do not have to special case. Maybe a trait, or just a method, unclear atm.
- bool mustDivide = false;
- (void)mustDivide;
- VectorType superVectorType;
- if (auto read = dyn_cast<vector::TransferReadOp>(op)) {
- superVectorType = read.getVectorType();
- mustDivide = true;
- } else if (auto write = dyn_cast<vector::TransferWriteOp>(op)) {
- superVectorType = write.getVectorType();
- mustDivide = true;
- } else if (op.getNumResults() == 0) {
- if (!isa<ReturnOp>(op)) {
- op.emitError("NYI: assuming only return operations can have 0 "
- " results at this point");
- }
- return false;
- } else if (op.getNumResults() == 1) {
- if (auto v = op.getResult(0).getType().dyn_cast<VectorType>()) {
- superVectorType = v;
- } else {
- // Not a vector type.
- return false;
- }
- } else {
- // Not a vector.transfer and has more than 1 result, fail hard for now to
- // wake us up when something changes.
- op.emitError("NYI: operation has more than 1 result");
- return false;
- }
-
- // Get the ratio.
- auto ratio = shapeRatio(superVectorType, subVectorType);
-
- // Sanity check.
- assert((ratio.hasValue() || !mustDivide) &&
- "vector.transfer operation in which super-vector size is not an"
- " integer multiple of sub-vector size");
-
- // This catches cases that are not strictly necessary to have multiplicity but
- // still aren't divisible by the sub-vector shape.
- // This could be useful information if we wanted to reshape at the level of
- // the vector type (but we would have to look at the compute and distinguish
- // between parallel, reduction and possibly other cases.
- if (!ratio.hasValue()) {
- return false;
- }
-
- return true;
-}
-
-} // namespace mlir
More information about the Mlir-commits
mailing list