[Mlir-commits] [mlir] 0f24163 - [mlir] Replace vector-to-scf with progressive-vector-to-scf
Matthias Springer
llvmlistbot at llvm.org
Thu May 13 07:27:40 PDT 2021
Author: Matthias Springer
Date: 2021-05-13T23:27:31+09:00
New Revision: 0f24163870e1a633c1d79377fdd188fe03769dd8
URL: https://github.com/llvm/llvm-project/commit/0f24163870e1a633c1d79377fdd188fe03769dd8
DIFF: https://github.com/llvm/llvm-project/commit/0f24163870e1a633c1d79377fdd188fe03769dd8.diff
LOG: [mlir] Replace vector-to-scf with progressive-vector-to-scf
Depends On D102388
Reviewed By: nicolasvasilache
Differential Revision: https://reviews.llvm.org/D102101
Added:
Modified:
mlir/include/mlir/Conversion/VectorToSCF/VectorToSCF.h
mlir/lib/Conversion/VectorToSCF/CMakeLists.txt
mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp
mlir/test/Conversion/VectorToSCF/unrolled-vector-to-loops.mlir
mlir/test/Conversion/VectorToSCF/vector-to-loops.mlir
mlir/test/Integration/Dialect/Vector/CPU/test-transfer-read-1d.mlir
mlir/test/Integration/Dialect/Vector/CPU/test-transfer-read-2d.mlir
mlir/test/Integration/Dialect/Vector/CPU/test-transfer-read-3d.mlir
mlir/test/Integration/Dialect/Vector/CPU/test-transfer-read.mlir
mlir/test/Integration/Dialect/Vector/CPU/test-transfer-to-loops.mlir
mlir/test/lib/Transforms/TestVectorTransforms.cpp
Removed:
mlir/include/mlir/Conversion/VectorToSCF/ProgressiveVectorToSCF.h
mlir/lib/Conversion/VectorToSCF/ProgressiveVectorToSCF.cpp
mlir/test/Conversion/VectorToSCF/progressive-vector-to-loops.mlir
################################################################################
diff --git a/mlir/include/mlir/Conversion/VectorToSCF/ProgressiveVectorToSCF.h b/mlir/include/mlir/Conversion/VectorToSCF/ProgressiveVectorToSCF.h
deleted file mode 100644
index b69ec01a0d5c6..0000000000000
--- a/mlir/include/mlir/Conversion/VectorToSCF/ProgressiveVectorToSCF.h
+++ /dev/null
@@ -1,71 +0,0 @@
-//===- ProgressiveVectorToSCF.h - Convert vector to SCF dialect -*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef MLIR_CONVERSION_VECTORTOSCF_PROGRESSIVEVECTORTOSCF_H_
-#define MLIR_CONVERSION_VECTORTOSCF_PROGRESSIVEVECTORTOSCF_H_
-
-#include "mlir/IR/PatternMatch.h"
-
-namespace mlir {
-class MLIRContext;
-class Pass;
-class RewritePatternSet;
-
-/// When lowering an N-d vector transfer op to an (N-1)-d vector transfer op,
-/// a temporary buffer is created through which individual (N-1)-d vector are
-/// staged. This pattern can be applied multiple time, until the transfer op
-/// is 1-d.
-/// This is consistent with the lack of an LLVM instruction to dynamically
-/// index into an aggregate (see the Vector dialect lowering to LLVM deep dive).
-///
-/// An instruction such as:
-/// ```
-/// vector.transfer_write %vec, %A[%a, %b, %c] :
-/// vector<9x17x15xf32>, memref<?x?x?xf32>
-/// ```
-/// Lowers to pseudo-IR resembling (unpacking one dimension):
-/// ```
-/// %0 = alloca() : memref<vector<9x17x15xf32>>
-/// store %vec, %0[] : memref<vector<9x17x15xf32>>
-/// %1 = vector.type_cast %0 :
-/// memref<vector<9x17x15xf32>> to memref<9xvector<17x15xf32>>
-/// affine.for %I = 0 to 9 {
-/// %dim = dim %A, 0 : memref<?x?x?xf32>
-/// %add = affine.apply %I + %a
-/// %cmp = cmpi "slt", %add, %dim : index
-/// scf.if %cmp {
-/// %vec_2d = load %1[%I] : memref<9xvector<17x15xf32>>
-/// vector.transfer_write %vec_2d, %A[%add, %b, %c] :
-/// vector<17x15xf32>, memref<?x?x?xf32>
-/// ```
-///
-/// When applying the pattern a second time, the existing alloca() operation
-/// is reused and only a second vector.type_cast is added.
-
-struct ProgressiveVectorTransferToSCFOptions {
- bool unroll = false;
- ProgressiveVectorTransferToSCFOptions &setUnroll(bool u) {
- unroll = u;
- return *this;
- }
-};
-
-/// Collect a set of patterns to convert from the Vector dialect to SCF + std.
-void populateProgressiveVectorToSCFConversionPatterns(
- RewritePatternSet &patterns,
- const ProgressiveVectorTransferToSCFOptions &options =
- ProgressiveVectorTransferToSCFOptions());
-
-/// Create a pass to convert a subset of vector ops to SCF.
-std::unique_ptr<Pass> createProgressiveConvertVectorToSCFPass(
- const ProgressiveVectorTransferToSCFOptions &options =
- ProgressiveVectorTransferToSCFOptions());
-
-} // namespace mlir
-
-#endif // MLIR_CONVERSION_VECTORTOSCF_PROGRESSIVEVECTORTOSCF_H_
diff --git a/mlir/include/mlir/Conversion/VectorToSCF/VectorToSCF.h b/mlir/include/mlir/Conversion/VectorToSCF/VectorToSCF.h
index e8c7e651cc860..5a42b9a070f84 100644
--- a/mlir/include/mlir/Conversion/VectorToSCF/VectorToSCF.h
+++ b/mlir/include/mlir/Conversion/VectorToSCF/VectorToSCF.h
@@ -1,4 +1,4 @@
-//===- VectorToSCF.h - Utils to convert from the vector dialect -*- C++ -*-===//
+//===- VectorToSCF.h - Convert vector to SCF dialect ------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -15,57 +15,38 @@ namespace mlir {
class MLIRContext;
class Pass;
class RewritePatternSet;
-using OwningRewritePatternList = RewritePatternSet;
-/// Control whether unrolling is used when lowering vector transfer ops to SCF.
+/// When lowering an N-d vector transfer op to an (N-1)-d vector transfer op,
+/// a temporary buffer is created through which individual (N-1)-d vector are
+/// staged. This pattern can be applied multiple time, until the transfer op
+/// is 1-d.
+/// This is consistent with the lack of an LLVM instruction to dynamically
+/// index into an aggregate (see the Vector dialect lowering to LLVM deep dive).
///
-/// Case 1:
-/// =======
-/// When `unroll` is false, a temporary buffer is created through which
-/// individual 1-D vector are staged. this is consistent with the lack of an
-/// LLVM instruction to dynamically index into an aggregate (see the Vector
-/// dialect lowering to LLVM deep dive).
/// An instruction such as:
/// ```
-/// vector.transfer_write %vec, %A[%base, %base] :
-/// vector<17x15xf32>, memref<?x?xf32>
+/// vector.transfer_write %vec, %A[%a, %b, %c] :
+/// vector<9x17x15xf32>, memref<?x?x?xf32>
/// ```
-/// Lowers to pseudo-IR resembling:
+/// Lowers to pseudo-IR resembling (unpacking one dimension):
/// ```
-/// %0 = alloc() : memref<17xvector<15xf32>>
+/// %0 = alloca() : memref<vector<9x17x15xf32>>
+/// store %vec, %0[] : memref<vector<9x17x15xf32>>
/// %1 = vector.type_cast %0 :
-/// memref<17xvector<15xf32>> to memref<vector<17x15xf32>>
-/// store %vec, %1[] : memref<vector<17x15xf32>>
-/// %dim = dim %A, 0 : memref<?x?xf32>
-/// affine.for %I = 0 to 17 {
-/// %add = affine.apply %I + %base
+/// memref<vector<9x17x15xf32>> to memref<9xvector<17x15xf32>>
+/// affine.for %I = 0 to 9 {
+/// %dim = dim %A, 0 : memref<?x?x?xf32>
+/// %add = affine.apply %I + %a
/// %cmp = cmpi "slt", %add, %dim : index
/// scf.if %cmp {
-/// %vec_1d = load %0[%I] : memref<17xvector<15xf32>>
-/// vector.transfer_write %vec_1d, %A[%add, %base] :
-/// vector<15xf32>, memref<?x?xf32>
+/// %vec_2d = load %1[%I] : memref<9xvector<17x15xf32>>
+/// vector.transfer_write %vec_2d, %A[%add, %b, %c] :
+/// vector<17x15xf32>, memref<?x?x?xf32>
/// ```
///
-/// Case 2:
-/// =======
-/// When `unroll` is true, the temporary buffer is skipped and static indices
-/// into aggregates can be used (see the Vector dialect lowering to LLVM deep
-/// dive).
-/// An instruction such as:
-/// ```
-/// vector.transfer_write %vec, %A[%base, %base] :
-/// vector<3x15xf32>, memref<?x?xf32>
-/// ```
-/// Lowers to pseudo-IR resembling:
-/// ```
-/// %0 = vector.extract %arg2[0] : vector<3x15xf32>
-/// vector.transfer_write %0, %arg0[%arg1, %arg1] : vector<15xf32>,
-/// memref<?x?xf32> %1 = affine.apply #map1()[%arg1] %2 = vector.extract
-/// %arg2[1] : vector<3x15xf32> vector.transfer_write %2, %arg0[%1, %arg1] :
-/// vector<15xf32>, memref<?x?xf32> %3 = affine.apply #map2()[%arg1] %4 =
-/// vector.extract %arg2[2] : vector<3x15xf32> vector.transfer_write %4,
-/// %arg0[%3, %arg1] : vector<15xf32>, memref<?x?xf32>
-/// ```
+/// When applying the pattern a second time, the existing alloca() operation
+/// is reused and only a second vector.type_cast is added.
+
struct VectorTransferToSCFOptions {
bool unroll = false;
VectorTransferToSCFOptions &setUnroll(bool u) {
@@ -74,93 +55,6 @@ struct VectorTransferToSCFOptions {
}
};
-/// Implements lowering of TransferReadOp and TransferWriteOp to a
-/// proper abstraction for the hardware.
-///
-/// There are multiple cases.
-///
-/// Case A: Permutation Map does not permute or broadcast.
-/// ======================================================
-///
-/// Progressive lowering occurs to 1-D vector transfer ops according to the
-/// description in `VectorTransferToSCFOptions`.
-///
-/// Case B: Permutation Map permutes and/or broadcasts.
-/// ======================================================
-///
-/// This path will be progressively deprecated and folded into the case above by
-/// using vector broadcast and transpose operations.
-///
-/// This path only emits a simple loop nest that performs clipped pointwise
-/// copies from a remote to a locally allocated memory.
-///
-/// Consider the case:
-///
-/// ```mlir
-/// // Read the slice `%A[%i0, %i1:%i1+256, %i2:%i2+32]` into
-/// // vector<32x256xf32> and pad with %f0 to handle the boundary case:
-/// %f0 = constant 0.0f : f32
-/// scf.for %i0 = 0 to %0 {
-/// scf.for %i1 = 0 to %1 step %c256 {
-/// scf.for %i2 = 0 to %2 step %c32 {
-/// %v = vector.transfer_read %A[%i0, %i1, %i2], %f0
-/// {permutation_map: (d0, d1, d2) -> (d2, d1)} :
-/// memref<?x?x?xf32>, vector<32x256xf32>
-/// }}}
-/// ```
-///
-/// The rewriters construct loop and indices that access MemRef A in a pattern
-/// resembling the following (while guaranteeing an always full-tile
-/// abstraction):
-///
-/// ```mlir
-/// scf.for %d2 = 0 to %c256 {
-/// scf.for %d1 = 0 to %c32 {
-/// %s = %A[%i0, %i1 + %d1, %i2 + %d2] : f32
-/// %tmp[%d2, %d1] = %s
-/// }
-/// }
-/// ```
-///
-/// In the current state, only a clipping transfer is implemented by `clip`,
-/// which creates individual indexing expressions of the form:
-///
-/// ```mlir-dsc
-/// auto condMax = i + ii < N;
-/// auto max = std_select(condMax, i + ii, N - one)
-/// auto cond = i + ii < zero;
-/// std_select(cond, zero, max);
-/// ```
-///
-/// In the future, clipping should not be the only way and instead we should
-/// load vectors + mask them. Similarly on the write side, load/mask/store for
-/// implementing RMW behavior.
-///
-/// Lowers TransferOp into a combination of:
-/// 1. local memory allocation;
-/// 2. perfect loop nest over:
-/// a. scalar load/stores from local buffers (viewed as a scalar memref);
-/// a. scalar store/load to original memref (with clipping).
-/// 3. vector_load/store
-/// 4. local memory deallocation.
-/// Minor variations occur depending on whether a TransferReadOp or
-/// a TransferWriteOp is rewritten.
-template <typename TransferOpTy>
-struct VectorTransferRewriter : public RewritePattern {
- explicit VectorTransferRewriter(VectorTransferToSCFOptions options,
- MLIRContext *context);
-
- /// Used for staging the transfer in a local buffer.
- MemRefType tmpMemRefType(TransferOpTy transfer) const;
-
- /// Performs the rewrite.
- LogicalResult matchAndRewrite(Operation *op,
- PatternRewriter &rewriter) const override;
-
- /// See description of `VectorTransferToSCFOptions`.
- VectorTransferToSCFOptions options;
-};
-
/// Collect a set of patterns to convert from the Vector dialect to SCF + std.
void populateVectorToSCFConversionPatterns(
RewritePatternSet &patterns,
diff --git a/mlir/lib/Conversion/VectorToSCF/CMakeLists.txt b/mlir/lib/Conversion/VectorToSCF/CMakeLists.txt
index 1e61aa924c3e9..2a7ee5ea8a58d 100644
--- a/mlir/lib/Conversion/VectorToSCF/CMakeLists.txt
+++ b/mlir/lib/Conversion/VectorToSCF/CMakeLists.txt
@@ -1,5 +1,4 @@
add_mlir_conversion_library(MLIRVectorToSCF
- ProgressiveVectorToSCF.cpp
VectorToSCF.cpp
ADDITIONAL_HEADER_DIRS
diff --git a/mlir/lib/Conversion/VectorToSCF/ProgressiveVectorToSCF.cpp b/mlir/lib/Conversion/VectorToSCF/ProgressiveVectorToSCF.cpp
deleted file mode 100644
index 981322899a2ee..0000000000000
--- a/mlir/lib/Conversion/VectorToSCF/ProgressiveVectorToSCF.cpp
+++ /dev/null
@@ -1,1142 +0,0 @@
-//===- ProgressiveVectorToSCF.h - Convert vector to SCF dialect -*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements lowering of vector transfer operations to SCF.
-//
-//===----------------------------------------------------------------------===//
-
-#include <type_traits>
-
-#include "mlir/Conversion/VectorToSCF/ProgressiveVectorToSCF.h"
-
-#include "../PassDetail.h"
-#include "mlir/Dialect/Affine/EDSC/Intrinsics.h"
-#include "mlir/Dialect/MemRef/EDSC/Intrinsics.h"
-#include "mlir/Dialect/SCF/EDSC/Intrinsics.h"
-#include "mlir/Dialect/StandardOps/EDSC/Intrinsics.h"
-#include "mlir/Dialect/Vector/EDSC/Intrinsics.h"
-#include "mlir/Dialect/Vector/VectorOps.h"
-#include "mlir/Dialect/Vector/VectorUtils.h"
-#include "mlir/IR/Builders.h"
-#include "mlir/Pass/Pass.h"
-#include "mlir/Transforms/GreedyPatternRewriteDriver.h"
-#include "mlir/Transforms/Passes.h"
-
-using namespace mlir;
-using namespace mlir::edsc;
-using namespace mlir::edsc::intrinsics;
-using vector::TransferReadOp;
-using vector::TransferWriteOp;
-
-namespace {
-
-/// Attribute name used for labeling transfer ops during progressive lowering.
-static const char kPassLabel[] = "__vector_to_scf_lowering__";
-
-/// Lower to 1D transfer ops. Target-specific lowering will lower those.
-static const int64_t kTargetRank = 1;
-
-/// Given a MemRefType with VectorType element type, unpack one dimension from
-/// the VectorType into the MemRefType.
-///
-/// E.g.: memref<9xvector<5x6xf32>> --> memref<9x5xvector<6xf32>>
-static MemRefType unpackOneDim(MemRefType type) {
- auto vectorType = type.getElementType().dyn_cast<VectorType>();
- auto memrefShape = type.getShape();
- SmallVector<int64_t, 8> newMemrefShape;
- newMemrefShape.append(memrefShape.begin(), memrefShape.end());
- newMemrefShape.push_back(vectorType.getDimSize(0));
- return MemRefType::get(newMemrefShape,
- VectorType::get(vectorType.getShape().drop_front(),
- vectorType.getElementType()));
-}
-
-/// Helper data structure for data and mask buffers.
-struct BufferAllocs {
- Value dataBuffer;
- Value maskBuffer;
-};
-
-/// Allocate temporary buffers for data (vector) and mask (if present).
-/// TODO: Parallelism and threadlocal considerations.
-template <typename OpTy>
-static BufferAllocs allocBuffers(OpTy xferOp) {
- auto &b = ScopedContext::getBuilderRef();
- OpBuilder::InsertionGuard guard(b);
- Operation *scope =
- xferOp->template getParentWithTrait<OpTrait::AutomaticAllocationScope>();
- assert(scope && "Expected op to be inside automatic allocation scope");
- b.setInsertionPointToStart(&scope->getRegion(0).front());
-
- BufferAllocs result;
- auto bufferType = MemRefType::get({}, xferOp.getVectorType());
- result.dataBuffer = memref_alloca(bufferType).value;
-
- if (xferOp.mask()) {
- auto maskType = MemRefType::get({}, xferOp.mask().getType());
- Value maskBuffer = memref_alloca(maskType);
- memref_store(xferOp.mask(), maskBuffer);
- result.maskBuffer = memref_load(maskBuffer);
- }
-
- return result;
-}
-
-/// Given a vector transfer op, calculate which dimension of the `source`
-/// memref should be unpacked in the next application of TransferOpConversion.
-/// A return value of None indicates a broadcast.
-template <typename OpTy>
-static Optional<int64_t> unpackedDim(OpTy xferOp) {
- auto map = xferOp.permutation_map();
- if (auto expr = map.getResult(0).template dyn_cast<AffineDimExpr>()) {
- return expr.getPosition();
- }
- assert(xferOp.isBroadcastDim(0) &&
- "Expected AffineDimExpr or AffineConstantExpr");
- return None;
-}
-
-/// Compute the permutation map for the new (N-1)-D vector transfer op. This
-/// map is identical to the current permutation map, but the first result is
-/// omitted.
-template <typename OpTy>
-static AffineMap unpackedPermutationMap(OpTy xferOp, OpBuilder &builder) {
- auto map = xferOp.permutation_map();
- return AffineMap::get(
- map.getNumDims(), 0, map.getResults().drop_front(),
- builder.getContext());
-}
-
-/// Calculate the indices for the new vector transfer op.
-///
-/// E.g.: transfer_read %A[%a, %b, %c, %d] ... : vector<5x4x3xf32> ...
-/// --> transfer_read %A[%a, %b + iv, %c, %d] ... vector<4x3f32>
-/// ^^^^^^
-/// `iv` is the iteration variable of the (new) surrounding loop.
-template <typename OpTy>
-static void getXferIndices(OpTy xferOp, Value iv,
- SmallVector<Value, 8> &indices) {
- typename OpTy::Adaptor adaptor(xferOp);
- // Corresponding memref dim of the vector dim that is unpacked.
- auto dim = unpackedDim(xferOp);
- auto prevIndices = adaptor.indices();
- indices.append(prevIndices.begin(), prevIndices.end());
-
- bool isBroadcast = !dim.hasValue();
- if (!isBroadcast) {
- using edsc::op::operator+;
- indices[dim.getValue()] = adaptor.indices()[dim.getValue()] + iv;
- }
-}
-
-static void maybeYieldValue(
- bool hasRetVal, OpBuilder builder, Location loc, Value value) {
- if (hasRetVal) {
- builder.create<scf::YieldOp>(loc, value);
- } else {
- builder.create<scf::YieldOp>(loc);
- }
-}
-
-/// Generates a boolean Value that is true if the iv-th bit in xferOp's mask
-/// is set to true. No such check is generated under following circumstances:
-/// * xferOp does not have a mask.
-/// * xferOp's mask is not 1D. (In case of (N>1)-D, a subvector of the mask is
-/// computed and attached to the new transfer op in the pattern.)
-/// * The to-be-unpacked dim of xferOp is a broadcast.
-template <typename OpTy>
-static Value generateMaskCheck(OpBuilder &builder, OpTy xferOp, Value iv) {
- if (!xferOp.mask())
- return Value();
- if (xferOp.getMaskType().getRank() != 1)
- return Value();
- if (xferOp.isBroadcastDim(0))
- return Value();
-
- auto ivI32 = std_index_cast(IntegerType::get(builder.getContext(), 32), iv);
- return vector_extract_element(xferOp.mask(), ivI32).value;
-}
-
-/// Helper function TransferOpConversion and TransferOp1dConversion.
-/// Generate an in-bounds check if the transfer op may go out-of-bounds on the
-/// specified dimension `dim` with the loop iteration variable `iv`.
-/// E.g., when unpacking dimension 0 from:
-/// ```
-/// %vec = vector.transfer_read %A[%a, %b] %cst
-/// : vector<5x4xf32>, memref<?x?xf32>
-/// ```
-/// An if check similar to this will be generated inside the loop:
-/// ```
-/// %d = memref.dim %A, %c0 : memref<?x?xf32>
-/// if (%a + iv < %d) {
-/// (in-bounds case)
-/// } else {
-/// (out-of-bounds case)
-/// }
-/// ```
-///
-/// If the transfer is 1D and has a mask, this function generates a more complex
-/// check also accounts for potentially masked out elements.
-///
-/// This function variant returns the value returned by `inBoundsCase` or
-/// `outOfBoundsCase`. The MLIR type of the return value must be specified in
-/// `resultTypes`.
-template <typename OpTy>
-static Value generateInBoundsCheck(
- OpTy xferOp, Value iv, OpBuilder &builder, Optional<int64_t> dim,
- TypeRange resultTypes,
- function_ref<Value(OpBuilder &, Location)> inBoundsCase,
- function_ref<Value(OpBuilder &, Location)> outOfBoundsCase = nullptr) {
- bool hasRetVal = !resultTypes.empty();
- Value cond; // Condition to be built...
-
- // Condition check 1: Access in-bounds?
- bool isBroadcast = !dim.hasValue(); // No in-bounds check for broadcasts.
- if (!xferOp.isDimInBounds(0) && !isBroadcast) {
- auto memrefDim =
- memref_dim(xferOp.source(), std_constant_index(dim.getValue()));
- using edsc::op::operator+;
- auto memrefIdx = xferOp.indices()[dim.getValue()] + iv;
- cond = std_cmpi_sgt(memrefDim.value, memrefIdx);
- }
-
- // Condition check 2: Masked in?
- if (auto maskCond = generateMaskCheck(builder, xferOp, iv)) {
- if (cond) {
- cond = builder.create<AndOp>(xferOp.getLoc(), cond, maskCond);
- } else {
- cond = maskCond;
- }
- }
-
- // If the condition is non-empty, generate an SCF::IfOp.
- if (cond) {
- auto check = builder.create<scf::IfOp>(
- xferOp.getLoc(), resultTypes, cond,
- /*thenBuilder=*/[&](OpBuilder &builder, Location loc) {
- maybeYieldValue(hasRetVal, builder, loc, inBoundsCase(builder, loc));
- }, /*elseBuilder=*/[&](OpBuilder &builder, Location loc) {
- if (outOfBoundsCase) {
- maybeYieldValue(hasRetVal, builder, loc, outOfBoundsCase(builder, loc));
- } else {
- builder.create<scf::YieldOp>(loc);
- }
- });
-
- return hasRetVal ? check.getResult(0) : Value();
- }
-
- // Condition is empty, no need for an SCF::IfOp.
- return inBoundsCase(builder, xferOp.getLoc());
-}
-
-/// In this function variant, `inBoundsCase` and `outOfBoundsCase` do not have
-/// a return value. Consequently, this function does not have a return value.
-template <typename OpTy>
-static void generateInBoundsCheck(
- OpTy xferOp, Value iv, OpBuilder &builder, Optional<int64_t> dim,
- function_ref<void(OpBuilder &, Location)> inBoundsCase,
- function_ref<void(OpBuilder &, Location)> outOfBoundsCase = nullptr) {
- generateInBoundsCheck(
- xferOp, iv, builder, dim, /*resultTypes=*/TypeRange(),
- /*inBoundsCase=*/[&](OpBuilder &builder, Location loc) {
- inBoundsCase(builder, loc);
- return Value();
- },
- /*outOfBoundsCase=*/[&](OpBuilder &builder, Location loc) {
- if (outOfBoundsCase)
- outOfBoundsCase(builder, loc);
- return Value();
- });
-}
-
-/// Given an ArrayAttr, return a copy where the first element is dropped.
-static ArrayAttr dropFirstElem(OpBuilder &builder, ArrayAttr attr) {
- if (!attr)
- return attr;
- return ArrayAttr::get(builder.getContext(), attr.getValue().drop_front());
-}
-
-/// Add the pass label to a vector transfer op if its rank is not the target
-/// rank.
-template <typename OpTy>
-static void maybeApplyPassLabel(OpBuilder &builder, OpTy newXferOp) {
- if (newXferOp.getVectorType().getRank() > kTargetRank)
- newXferOp->setAttr(kPassLabel, builder.getUnitAttr());
-}
-
-/// Given a transfer op, find the memref from which the mask is loaded. This
-/// is similar to Strategy<TransferWriteOp>::getBuffer.
-template <typename OpTy>
-static Value getMaskBuffer(OpTy xferOp) {
- assert(xferOp.mask() && "Expected that transfer op has mask");
- auto loadOp = xferOp.mask().template getDefiningOp<memref::LoadOp>();
- assert(loadOp && "Expected transfer op mask produced by LoadOp");
- return loadOp.getMemRef();
-}
-
-/// Codegen strategy, depending on the operation.
-template <typename OpTy>
-struct Strategy;
-
-/// Code strategy for vector TransferReadOp.
-template<>
-struct Strategy<TransferReadOp> {
- /// Find the StoreOp that is used for writing the current TransferReadOp's
- /// result to the temporary buffer allocation.
- static memref::StoreOp getStoreOp(TransferReadOp xferOp) {
- assert(xferOp->hasOneUse() && "Expected exactly one use of TransferReadOp");
- auto storeOp = dyn_cast<memref::StoreOp>(
- (*xferOp->use_begin()).getOwner());
- assert(storeOp && "Expected TransferReadOp result used by StoreOp");
- return storeOp;
- }
-
- /// Find the temporary buffer allocation. All labeled TransferReadOps are
- /// used like this, where %buf is either the buffer allocation or a type cast
- /// of the buffer allocation:
- /// ```
- /// %vec = vector.transfer_read ... { __vector_to_scf_lowering__ } ...
- /// memref.store %vec, %buf[...] ...
- /// ```
- static Value getBuffer(TransferReadOp xferOp) {
- return getStoreOp(xferOp).getMemRef();
- }
-
- /// Retrieve the indices of the current StoreOp that stores into the buffer.
- static void getBufferIndices(TransferReadOp xferOp,
- SmallVector<Value, 8> &indices) {
- auto storeOp = getStoreOp(xferOp);
- auto prevIndices = memref::StoreOpAdaptor(storeOp).indices();
- indices.append(prevIndices.begin(), prevIndices.end());
- }
-
- /// Rewrite the TransferReadOp, assuming that there are no out-of-bounds
- /// accesses on the to-be-unpacked dimension.
- ///
- /// 1. Generate a new (N-1)-d TransferReadOp using the loop iteration
- /// variable `iv`.
- /// 2. Store the result into the (already `vector.type_cast`ed) buffer.
- ///
- /// E.g.:
- /// ```
- /// %vec = vector.transfer_read %A[%a+%i, %b, %c], %cst
- /// : memref<?x?x?xf32>, vector<4x3xf32>
- /// memref.store %vec, %buf[%i] : memref<5xvector<4x3xf32>>
- /// ```
- /// Is rewritten to:
- /// ```
- /// %casted = vector.type_cast %buf
- /// : memref<5xvector<4x3xf32>> to memref<5x4xvector<3xf32>>
- /// for %j = 0 to 4 {
- /// %vec = vector.transfer_read %A[%a+%i, %b+%j, %c], %cst
- /// : memref<?x?x?xf32>, vector<3xf32>
- /// memref.store %vec, %casted[%i, %j] : memref<5x4xvector<3xf32>>
- /// }
- /// ```
- ///
- /// Note: The loop and type cast are generated in TransferOpConversion.
- /// The original TransferReadOp and store op are deleted in `cleanup`.
- /// Note: The `mask` operand is set in TransferOpConversion.
- static TransferReadOp rewriteOp(OpBuilder &builder, TransferReadOp xferOp,
- Value buffer, Value iv) {
- SmallVector<Value, 8> storeIndices;
- getBufferIndices(xferOp, storeIndices);
- storeIndices.push_back(iv);
-
- SmallVector<Value, 8> xferIndices;
- getXferIndices(xferOp, iv, xferIndices);
-
- auto bufferType = buffer.getType().dyn_cast<ShapedType>();
- auto vecType = bufferType.getElementType().dyn_cast<VectorType>();
- auto inBoundsAttr = dropFirstElem(builder, xferOp.in_boundsAttr());
- auto newXfer = vector_transfer_read(
- vecType, xferOp.source(), xferIndices,
- AffineMapAttr::get(unpackedPermutationMap(xferOp, builder)),
- xferOp.padding(), Value(), inBoundsAttr).value;
-
- maybeApplyPassLabel(builder,
- dyn_cast<TransferReadOp>(newXfer.getDefiningOp()));
-
- memref_store(newXfer, buffer, storeIndices);
- return newXfer.getDefiningOp<TransferReadOp>();
- }
-
- /// Handle out-of-bounds accesses on the to-be-unpacked dimension: Write
- /// padding value to the temporary buffer.
- static void handleOutOfBoundsDim(
- OpBuilder &/*builder*/, TransferReadOp xferOp, Value buffer,
- Value iv) {
- SmallVector<Value, 8> storeIndices;
- getBufferIndices(xferOp, storeIndices);
- storeIndices.push_back(iv);
-
- auto bufferType = buffer.getType().dyn_cast<ShapedType>();
- auto vecType = bufferType.getElementType().dyn_cast<VectorType>();
- auto vec = std_splat(vecType, xferOp.padding());
- memref_store(vec, buffer, storeIndices);
- }
-
- /// Cleanup after rewriting the op.
- static void cleanup(PatternRewriter &rewriter, TransferReadOp xferOp) {
- rewriter.eraseOp(getStoreOp(xferOp));
- rewriter.eraseOp(xferOp);
- }
-};
-
-/// Codegen strategy for vector TransferWriteOp.
-template<>
-struct Strategy<TransferWriteOp> {
- /// Find the temporary buffer allocation. All labeled TransferWriteOps are
- /// used like this, where %buf is either the buffer allocation or a type cast
- /// of the buffer allocation:
- /// ```
- /// %vec = memref.load %buf[...] ...
- /// vector.transfer_write %vec ... { __vector_to_scf_lowering__ } ...
- /// ```
- static Value getBuffer(TransferWriteOp xferOp) {
- auto loadOp = xferOp.vector().getDefiningOp<memref::LoadOp>();
- assert(loadOp && "Expected transfer op vector produced by LoadOp");
- return loadOp.getMemRef();
- }
-
- /// Retrieve the indices of the current LoadOp that loads from the buffer.
- static void getBufferIndices(TransferWriteOp xferOp,
- SmallVector<Value, 8> &indices) {
- auto loadOp = xferOp.vector().getDefiningOp<memref::LoadOp>();
- auto prevIndices = memref::LoadOpAdaptor(loadOp).indices();
- indices.append(prevIndices.begin(), prevIndices.end());
- }
-
- /// Rewrite the TransferWriteOp, assuming that there are no out-of-bounds
- /// accesses on the to-be-unpacked dimension.
- ///
- /// 1. Load an (N-1)-d vector from the (already `vector.type_cast`ed) buffer,
- /// using the loop iteration variable `iv`.
- /// 2. Generate a new (N-1)-d TransferWriteOp, writing the loaded vector back
- /// to memory.
- ///
- /// Note: For more details, see comments on Strategy<TransferReadOp>.
- static TransferWriteOp rewriteOp(OpBuilder &builder, TransferWriteOp xferOp,
- Value buffer, Value iv) {
- SmallVector<Value, 8> loadIndices;
- getBufferIndices(xferOp, loadIndices);
- loadIndices.push_back(iv);
-
- SmallVector<Value, 8> xferIndices;
- getXferIndices(xferOp, iv, xferIndices);
-
- auto vec = memref_load(buffer, loadIndices);
- auto inBoundsAttr = dropFirstElem(builder, xferOp.in_boundsAttr());
- auto newXfer = vector_transfer_write(
- Type(), vec, xferOp.source(), xferIndices,
- AffineMapAttr::get(unpackedPermutationMap(xferOp, builder)),
- Value(), inBoundsAttr);
-
- maybeApplyPassLabel(builder, newXfer.op);
-
- return newXfer;
- }
-
- /// Handle out-of-bounds accesses on the to-be-unpacked dimension.
- static void handleOutOfBoundsDim(
- OpBuilder &builder, TransferWriteOp xferOp, Value buffer,
- Value iv) {}
-
- /// Cleanup after rewriting the op.
- static void cleanup(PatternRewriter &rewriter, TransferWriteOp xferOp) {
- rewriter.eraseOp(xferOp);
- }
-};
-
-template <typename OpTy>
-LogicalResult checkPrepareXferOp(OpTy xferOp) {
- if (xferOp->hasAttr(kPassLabel))
- return failure();
- if (xferOp.getVectorType().getRank() <= kTargetRank)
- return failure();
- return success();
-}
-
-/// Prepare a TransferReadOp for progressive lowering.
-///
-/// 1. Allocate a temporary buffer.
-/// 2. Label the TransferReadOp, marking it eligible for progressive lowering.
-/// 3. Store the result of the TransferReadOp into the temporary buffer.
-/// 4. Load the result from the temporary buffer and replace all uses of the
-/// original TransferReadOp with this load.
-///
-/// E.g.:
-/// ```
-/// %vec = vector.transfer_read %A[%a, %b, %c], %cst
-/// : vector<5x4xf32>, memref<?x?x?xf32>
-/// ```
-/// is rewritten to:
-/// ```
-/// %0 = memref.alloca() : memref<vector<5x4xf32>>
-/// %1 = vector.transfer_read %A[%a, %b, %c], %cst
-/// { __vector_to_scf_lowering__ } : vector<5x4xf32>, memref<?x?x?xf32>
-/// memref.store %1, %0[] : memref<vector<5x4xf32>>
-/// %vec = memref.load %0[] : memref<vector<5x4xf32>>
-/// ```
-///
-/// Note: A second temporary buffer may be allocated for the `mask` operand.
-struct PrepareTransferReadConversion
- : public OpRewritePattern<TransferReadOp> {
- using OpRewritePattern<TransferReadOp>::OpRewritePattern;
-
- LogicalResult matchAndRewrite(TransferReadOp xferOp,
- PatternRewriter &rewriter) const override {
- if (checkPrepareXferOp(xferOp).failed())
- return failure();
-
- ScopedContext scope(rewriter, xferOp.getLoc());
- auto buffers = allocBuffers(xferOp);
- auto *newXfer = rewriter.clone(*xferOp.getOperation());
- newXfer->setAttr(kPassLabel, rewriter.getUnitAttr());
- if (xferOp.mask()) {
- dyn_cast<TransferReadOp>(newXfer).maskMutable().assign(
- buffers.maskBuffer);
- }
-
- memref_store(newXfer->getResult(0), buffers.dataBuffer);
- rewriter.replaceOpWithNewOp<memref::LoadOp>(xferOp, buffers.dataBuffer);
-
- return success();
- }
-};
-
-/// Prepare a TransferWriteOp for progressive lowering.
-///
-/// 1. Allocate a temporary buffer.
-/// 2. Store the vector into the buffer.
-/// 3. Load the vector from the buffer again.
-/// 4. Use the loaded vector as a TransferWriteOp operand and label the op,
-/// marking it eligible for progressive lowering via TransferOpConversion.
-///
-/// E.g.:
-/// ```
-/// vector.transfer_write %vec, %A[%a, %b, %c]
-/// : vector<5x4xf32>, memref<?x?x?xf32>
-/// ```
-/// is rewritten to:
-/// ```
-/// %0 = memref.alloca() : memref<vector<5x4xf32>>
-/// memref.store %vec, %0[] : memref<vector<5x4xf32>>
-/// %1 = memref.load %0[] : memref<vector<5x4xf32>>
-/// vector.transfer_write %1, %A[%a, %b, %c] { __vector_to_scf_lowering__ }
-/// : vector<5x4xf32>, memref<?x?x?xf32>
-/// ```
-///
-/// Note: A second temporary buffer may be allocated for the `mask` operand.
-struct PrepareTransferWriteConversion
- : public OpRewritePattern<TransferWriteOp> {
- using OpRewritePattern<TransferWriteOp>::OpRewritePattern;
-
- LogicalResult matchAndRewrite(TransferWriteOp xferOp,
- PatternRewriter &rewriter) const override {
- if (checkPrepareXferOp(xferOp).failed())
- return failure();
-
- ScopedContext scope(rewriter, xferOp.getLoc());
- auto buffers = allocBuffers(xferOp);
- memref_store(xferOp.vector(), buffers.dataBuffer);
- auto loadedVec = memref_load(buffers.dataBuffer);
- rewriter.updateRootInPlace(xferOp, [&]() {
- xferOp.vectorMutable().assign(loadedVec);
- xferOp->setAttr(kPassLabel, rewriter.getUnitAttr());
- });
-
- if (xferOp.mask()) {
- rewriter.updateRootInPlace(
- xferOp, [&]() { xferOp.maskMutable().assign(buffers.maskBuffer); });
- }
-
- return success();
- }
-};
-
-/// Progressive lowering of vector transfer ops: Unpack one dimension.
-///
-/// 1. Unpack one dimension from the current buffer type and cast the buffer
-/// to that new type. E.g.:
-/// ```
-/// %vec = memref.load %0[%1] : memref<5xvector<4x3xf32>>
-/// vector.transfer_write %vec ...
-/// ```
-/// The following cast is generated:
-/// ```
-/// %casted = vector.type_cast %0
-/// : memref<5xvector<4x3xf32>> to memref<5x4xvector<3xf32>>
-/// ```
-/// 2. Generate a for loop and rewrite the transfer op according to the
-/// corresponding Strategy<OpTy>. If the to-be-unpacked dimension can be
-/// out-of-bounds, generate an if-check and handle both cases separately.
-/// 3. Clean up according to the corresponding Strategy<OpTy>.
-template <typename OpTy>
-struct TransferOpConversion : public OpRewritePattern<OpTy> {
- using OpRewritePattern<OpTy>::OpRewritePattern;
-
- LogicalResult matchAndRewrite(OpTy xferOp,
- PatternRewriter &rewriter) const override {
- if (!xferOp->hasAttr(kPassLabel))
- return failure();
-
- ScopedContext scope(rewriter, xferOp.getLoc());
-
- // Find and cast data buffer. How the buffer can be found depends on OpTy.
- auto dataBuffer = Strategy<OpTy>::getBuffer(xferOp);
- auto dataBufferType = dataBuffer.getType().template dyn_cast<MemRefType>();
- auto castedDataType = unpackOneDim(dataBufferType);
- auto castedDataBuffer = vector_type_cast(castedDataType, dataBuffer);
-
- // If the xferOp has a mask: Find and cast mask buffer.
- Value castedMaskBuffer;
- if (xferOp.mask()) {
- auto maskBuffer = getMaskBuffer(xferOp);
- auto maskBufferType =
- maskBuffer.getType().template dyn_cast<MemRefType>();
- if (xferOp.isBroadcastDim(0) || xferOp.getMaskType().getRank() == 1) {
- // Do not unpack a dimension of the mask, if:
- // * To-be-unpacked transfer op dimension is a broadcast.
- // * Mask is 1D, i.e., the mask cannot be further unpacked.
- // (That means that all remaining dimensions of the transfer op must
- // be broadcasted.)
- castedMaskBuffer = maskBuffer;
- } else {
- auto castedMaskType = unpackOneDim(maskBufferType);
- castedMaskBuffer = vector_type_cast(castedMaskType, maskBuffer);
- }
- }
-
- // Loop bounds and step.
- auto lb = std_constant_index(0).value;
- auto ub = std_constant_index(
- castedDataType.getDimSize(castedDataType.getRank() - 1))
- .value;
- auto step = std_constant_index(1).value;
-
- // Generate for loop.
- rewriter.create<scf::ForOp>(
- xferOp.getLoc(), lb, ub, step, ValueRange(),
- [&](OpBuilder &b, Location loc, Value iv,
- ValueRange /*loopState*/) {
- ScopedContext scope(b, loc);
- generateInBoundsCheck(
- xferOp, iv, b, unpackedDim(xferOp),
- /*inBoundsCase=*/
- [&](OpBuilder &b, Location /*loc*/) {
- // Create new transfer op.
- OpTy newXfer =
- Strategy<OpTy>::rewriteOp(b, xferOp, castedDataBuffer, iv);
-
- // If old transfer op has a mask: Set mask on new transfer op.
- // Special case: If the mask of the old transfer op is 1D and the
- // unpacked dim is not a broadcast, no mask is needed
- // on the new transfer op.
- if (xferOp.mask() && (xferOp.isBroadcastDim(0) ||
- xferOp.getMaskType().getRank() > 1)) {
- OpBuilder::InsertionGuard guard(b);
- b.setInsertionPoint(newXfer); // Insert load before newXfer.
-
- SmallVector<Value, 8> loadIndices;
- Strategy<OpTy>::getBufferIndices(xferOp, loadIndices);
- // In case of broadcast: Use same indices to load from memref as
- // before.
- if (!xferOp.isBroadcastDim(0))
- loadIndices.push_back(iv);
-
- auto mask = memref_load(castedMaskBuffer, loadIndices);
- rewriter.updateRootInPlace(
- newXfer, [&]() { newXfer.maskMutable().assign(mask); });
- }
- },
- /*outOfBoundsCase=*/
- [&](OpBuilder &b, Location /*loc*/) {
- Strategy<OpTy>::handleOutOfBoundsDim(b, xferOp, castedDataBuffer,
- iv);
- });
- b.create<scf::YieldOp>(loc);
- });
-
- Strategy<OpTy>::cleanup(rewriter, xferOp);
- return success();
- }
-};
-
-/// If the original transfer op has a mask, compute the mask of the new transfer
-/// op (for the current iteration `i`) and assign it.
-template <typename OpTy>
-static void maybeAssignMask(OpBuilder &builder, OpTy xferOp, OpTy newXferOp,
- int64_t i) {
- if (!xferOp.mask())
- return;
-
- if (xferOp.isBroadcastDim(0)) {
- // To-be-unpacked dimension is a broadcast, which does not have a
- // corresponding mask dimension. Mask attribute remains unchanged.
- newXferOp.maskMutable().assign(xferOp.mask());
- return;
- }
-
- if (xferOp.getMaskType().getRank() > 1) {
- // Unpack one dimension of the mask.
- OpBuilder::InsertionGuard guard(builder);
- builder.setInsertionPoint(newXferOp); // Insert load before newXfer.
-
- llvm::SmallVector<int64_t, 1> indices({i});
- auto newMask = vector_extract(xferOp.mask(), indices).value;
- newXferOp.maskMutable().assign(newMask);
- }
-
- // If we end up here: The mask of the old transfer op is 1D and the unpacked
- // dim is not a broadcast, so no mask is needed on the new transfer op.
- // `generateInBoundsCheck` will have evaluated the mask already.
-}
-
-/// Progressive lowering of vector TransferReadOp with unrolling: Unpack one
-/// dimension. This is similar to TransferOpConversion<TransferReadOp>, but no
-/// memref buffer is allocated and the SCF loop is fully unrolled.
-///
-/// ```
-/// E.g.:
-/// ```
-/// %vec = vector.transfer_read %A[%a, %b, %c], %padding
-/// : memref<?x?x?xf32>, vector<5x4xf32>
-/// ```
-/// is rewritten to IR such as (simplified):
-/// ```
-/// %v_init = splat %padding : vector<5x4xf32>
-/// %tmp0 = vector.transfer_read %A[%a, %b, %c], %padding
-/// : memref<?x?x?xf32>, vector<4xf32>
-/// %v0 = vector.insert %tmp0, %v_init[0] : vector<4xf32> into vector<5x4xf32>
-/// %tmp1 = vector.transfer_read %A[%a, %b + 1, %c], %padding
-/// : memref<?x?x?xf32>, vector<4xf32>
-/// %v1 = vector.insert %tmp1, %v0[1] : vector<4xf32> into vector<5x4xf32>
-/// ...
-/// %tmp4 = vector.transfer_read %A[%a, %b + 4, %c], %padding
-/// : memref<?x?x?xf32>, vector<4xf32>
-/// %vec = vector.insert %tmp1, %v3[4] : vector<4xf32> into vector<5x4xf32>
-/// ```
-///
-/// Note: As an optimization, if the result of the original TransferReadOp
-/// was directly inserted into another vector, no new %v_init vector is created.
-/// Instead, the new TransferReadOp results are inserted into that vector.
-struct UnrollTransferReadConversion : public OpRewritePattern<TransferReadOp> {
- using OpRewritePattern<TransferReadOp>::OpRewritePattern;
-
- /// Return the vector into which the newly created TransferReadOp results
- /// are inserted.
- Value getResultVector(TransferReadOp xferOp,
- PatternRewriter &rewriter) const {
- if (auto insertOp = getInsertOp(xferOp))
- return insertOp.dest();
- return std_splat(xferOp.getVectorType(), xferOp.padding()).value;
- }
-
- /// If the result of the TransferReadOp has exactly one user, which is a
- /// vector::InsertOp, return that operation.
- vector::InsertOp getInsertOp(TransferReadOp xferOp) const {
- if (xferOp->hasOneUse()) {
- Operation *xferOpUser = *xferOp->getUsers().begin();
- if (auto insertOp = dyn_cast<vector::InsertOp>(xferOpUser))
- return insertOp;
- }
-
- return vector::InsertOp();
- }
-
- /// If the result of the TransferReadOp has exactly one user, which is a
- /// vector::InsertOp, return that operation's indices.
- void getInsertionIndices(TransferReadOp xferOp,
- SmallVector<int64_t, 8> &indices) const {
- if (auto insertOp = getInsertOp(xferOp)) {
- llvm::for_each(insertOp.position(), [&](Attribute attr) {
- indices.push_back(attr.dyn_cast<IntegerAttr>().getInt());
- });
- }
- }
-
- /// Rewrite the op: Unpack one dimension. Can handle masks, out-of-bounds
- /// accesses, and broadcasts and transposes in permutation maps.
- LogicalResult matchAndRewrite(TransferReadOp xferOp,
- PatternRewriter &rewriter) const override {
- if (xferOp.getVectorType().getRank() <= kTargetRank)
- return failure();
-
- ScopedContext scope(rewriter, xferOp.getLoc());
- auto insertOp = getInsertOp(xferOp);
- auto vec = getResultVector(xferOp, rewriter);
- auto vecType = vec.getType().dyn_cast<VectorType>();
- auto xferVecType = xferOp.getVectorType();
- auto newXferVecType = VectorType::get(xferVecType.getShape().drop_front(),
- xferVecType.getElementType());
- int64_t dimSize = xferVecType.getShape()[0];
-
- // Generate fully unrolled loop of transfer ops.
- for (int64_t i = 0; i < dimSize; ++i) {
- Value iv = std_constant_index(i);
-
- vec = generateInBoundsCheck(
- xferOp, iv, rewriter, unpackedDim(xferOp), TypeRange(vecType),
- /*inBoundsCase=*/
- [&](OpBuilder &b, Location loc) {
- ScopedContext scope(b, loc);
-
- // Indices for the new transfer op.
- SmallVector<Value, 8> xferIndices;
- getXferIndices(xferOp, iv, xferIndices);
-
- // Indices for the new vector.insert op.
- SmallVector<int64_t, 8> insertionIndices;
- getInsertionIndices(xferOp, insertionIndices);
- insertionIndices.push_back(i);
-
- auto inBoundsAttr = dropFirstElem(b, xferOp.in_boundsAttr());
- auto newXferOpVal =
- vector_transfer_read(
- newXferVecType, xferOp.source(), xferIndices,
- AffineMapAttr::get(unpackedPermutationMap(xferOp, b)),
- xferOp.padding(), Value(), inBoundsAttr)
- .value;
- auto newXferOp =
- dyn_cast<TransferReadOp>(newXferOpVal.getDefiningOp());
-
- maybeAssignMask(b, xferOp, newXferOp, i);
-
- return vector_insert(newXferOp, vec, insertionIndices).value;
- },
- /*outOfBoundsCase=*/
- [&](OpBuilder &b, Location loc) {
- // Loop through original (unmodified) vector.
- return vec;
- });
- }
-
- if (insertOp) {
- // Rewrite single user of the old TransferReadOp, which was an InsertOp.
- rewriter.replaceOp(insertOp, vec);
- rewriter.eraseOp(xferOp);
- } else {
- rewriter.replaceOp(xferOp, vec);
- }
-
- return success();
- }
-};
-
-/// Progressive lowering of vector TransferWriteOp with unrolling: Unpack one
-/// dimension. This is similar to TransferOpConversion<TransferWriteOp>, but no
-/// memref buffer is allocated and the SCF loop is fully unrolled.
-///
-/// ```
-/// E.g.:
-/// ```
-/// vector.transfer_write %vec, %A[%a, %b, %c]
-/// : vector<5x4xf32>, memref<?x?x?xf32>
-/// ```
-/// is rewritten to IR such as (simplified):
-/// ```
-/// %v0 = vector.extract %vec[0] : vector<5x4xf32>
-/// vector.transfer_write %v0, %A[%a, %b, %c] : vector<4xf32>, memref<...>
-/// %v1 = vector.extract %vec[1] : vector<5x4xf32>
-/// vector.transfer_write %v1, %A[%a, %b + 1, %c] : vector<4xf32>, memref<...>
-/// ...
-/// %v4 = vector.extract %vec[4] : vector<5x4xf32>
-/// vector.transfer_write %v4, %A[%a, %b + 4, %c] : vector<4xf32>, memref<...>
-/// ```
-///
-/// Note: As an optimization, if the vector of the original TransferWriteOp
-/// was directly extracted from another vector via an ExtractOp `a`, extract
-/// the vectors for the newly generated TransferWriteOps from `a`'s input. By
-/// doing so, `a` may become dead, and the number of ExtractOps generated during
-/// recursive application of this pattern will be minimal.
-struct UnrollTransferWriteConversion
- : public OpRewritePattern<TransferWriteOp> {
- using OpRewritePattern<TransferWriteOp>::OpRewritePattern;
-
- /// Return the vector from which newly generated ExtracOps will extract.
- Value getDataVector(TransferWriteOp xferOp) const {
- if (auto extractOp = getExtractOp(xferOp))
- return extractOp.vector();
- return xferOp.vector();
- }
-
- /// If the input of the given TransferWriteOp is an ExtractOp, return it.
- vector::ExtractOp getExtractOp(TransferWriteOp xferOp) const {
- if (auto *op = xferOp.vector().getDefiningOp())
- return dyn_cast<vector::ExtractOp>(op);
- return vector::ExtractOp();
- }
-
- /// If the input of the given TransferWriteOp is an ExtractOp, return its
- /// indices.
- void getExtractionIndices(TransferWriteOp xferOp,
- SmallVector<int64_t, 8> &indices) const {
- if (auto extractOp = getExtractOp(xferOp)) {
- llvm::for_each(extractOp.position(), [&](Attribute attr) {
- indices.push_back(attr.dyn_cast<IntegerAttr>().getInt());
- });
- }
- }
-
- /// Rewrite the op: Unpack one dimension. Can handle masks, out-of-bounds
- /// accesses, and broadcasts and transposes in permutation maps.
- LogicalResult matchAndRewrite(TransferWriteOp xferOp,
- PatternRewriter &rewriter) const override {
- if (xferOp.getVectorType().getRank() <= kTargetRank)
- return failure();
-
- ScopedContext scope(rewriter, xferOp.getLoc());
- auto vec = getDataVector(xferOp);
- auto xferVecType = xferOp.getVectorType();
- int64_t dimSize = xferVecType.getShape()[0];
-
- // Generate fully unrolled loop of transfer ops.
- for (int64_t i = 0; i < dimSize; ++i) {
- Value iv = std_constant_index(i);
-
- generateInBoundsCheck(
- xferOp, iv, rewriter, unpackedDim(xferOp),
- /*inBoundsCase=*/[&](OpBuilder &b, Location loc) {
- ScopedContext scope(b, loc);
-
- // Indices for the new transfer op.
- SmallVector<Value, 8> xferIndices;
- getXferIndices(xferOp, iv, xferIndices);
-
- // Indices for the new vector.extract op.
- SmallVector<int64_t, 8> extractionIndices;
- getExtractionIndices(xferOp, extractionIndices);
- extractionIndices.push_back(i);
-
- auto extracted = vector_extract(vec, extractionIndices).value;
- auto inBoundsAttr = dropFirstElem(b, xferOp.in_boundsAttr());
-
- auto newXferOp =
- vector_transfer_write(
- Type(), extracted, xferOp.source(), xferIndices,
- AffineMapAttr::get(unpackedPermutationMap(xferOp, b)),
- Value(), inBoundsAttr)
- .op;
-
- maybeAssignMask(b, xferOp, newXferOp, i);
- });
- }
-
- rewriter.eraseOp(xferOp);
- return success();
- }
-};
-
-/// Compute the indices into the memref for the LoadOp/StoreOp generated as
-/// part of TransferOp1dConversion. Return the memref dimension on which
-/// the transfer is operating. A return value of None indicates a broadcast.
-template <typename OpTy>
-static Optional<int64_t> get1dMemrefIndices(
- OpTy xferOp, Value iv, SmallVector<Value, 8> &memrefIndices) {
- auto indices = xferOp.indices();
- auto map = xferOp.permutation_map();
-
- memrefIndices.append(indices.begin(), indices.end());
- assert(map.getNumResults() == 1 &&
- "Expected 1 permutation map result for 1D transfer");
- if (auto expr = map.getResult(0).template dyn_cast<AffineDimExpr>()) {
- auto dim = expr.getPosition();
- using edsc::op::operator+;
- memrefIndices[dim] = memrefIndices[dim] + iv;
- return dim;
- }
-
- assert(xferOp.isBroadcastDim(0) &&
- "Expected AffineDimExpr or AffineConstantExpr");
- return None;
-}
-
-/// Codegen strategy for TransferOp1dConversion, depending on the
-/// operation.
-template <typename OpTy>
-struct Strategy1d;
-
-/// Codegen strategy for TransferReadOp.
-template <>
-struct Strategy1d<TransferReadOp> {
- static void generateForLoopBody(
- OpBuilder &builder, Location loc, TransferReadOp xferOp, Value iv,
- ValueRange loopState) {
- SmallVector<Value, 8> indices;
- auto dim = get1dMemrefIndices(xferOp, iv, indices);
- auto ivI32 = std_index_cast(
- IntegerType::get(builder.getContext(), 32), iv);
- auto vec = loopState[0];
-
- // In case of out-of-bounds access, leave `vec` as is (was initialized with
- // padding value).
- auto nextVec = generateInBoundsCheck(
- xferOp, iv, builder, dim, TypeRange(xferOp.getVectorType()),
- /*inBoundsCase=*/[&](OpBuilder& /*b*/, Location loc) {
- auto val = memref_load(xferOp.source(), indices);
- return vector_insert_element(val, vec, ivI32.value).value;
- }, /*outOfBoundsCase=*/[&](OpBuilder& /*b*/, Location loc) {
- return vec;
- });
- builder.create<scf::YieldOp>(loc, nextVec);
- }
-
- static Value initialLoopState(TransferReadOp xferOp) {
- // Inititalize vector with padding value.
- return std_splat(xferOp.getVectorType(), xferOp.padding()).value;
- }
-};
-
-/// Codegen strategy for TransferWriteOp.
-template <>
-struct Strategy1d<TransferWriteOp> {
- static void generateForLoopBody(
- OpBuilder &builder, Location loc, TransferWriteOp xferOp, Value iv,
- ValueRange /*loopState*/) {
- SmallVector<Value, 8> indices;
- auto dim = get1dMemrefIndices(xferOp, iv, indices);
- auto ivI32 = std_index_cast(
- IntegerType::get(builder.getContext(), 32), iv);
-
- // Nothing to do in case of out-of-bounds access.
- generateInBoundsCheck(
- xferOp, iv, builder, dim,
- /*inBoundsCase=*/[&](OpBuilder& /*b*/, Location loc) {
- auto val = vector_extract_element(xferOp.vector(), ivI32.value);
- memref_store(val, xferOp.source(), indices);
- });
- builder.create<scf::YieldOp>(loc);
- }
-
- static Value initialLoopState(TransferWriteOp xferOp) {
- return Value();
- }
-};
-
-/// Return true if the last dimension of the MemRefType has unit stride.
-static bool isLastMemrefDimUnitStride(MemRefType type) {
- int64_t offset;
- SmallVector<int64_t, 4> strides;
- auto successStrides = getStridesAndOffset(type, strides, offset);
- return succeeded(successStrides) && strides.back() == 1;
-}
-
-/// Lower a 1D vector transfer op to SCF using scalar loads/stores. This is
-/// necessary in cases where a 1D vector transfer op cannot be lowered into
-/// vector load/stores due to non-unit strides or broadcasts:
-///
-/// * Transfer dimension is not the last memref dimension
-/// * Transfer dimension is a broadcast (i.e., scalar load + broadcast)
-/// * Memref has a layout map with non-unit stride on the last dimension
-///
-/// This pattern generates IR as follows:
-///
-/// 1. Generate a for loop iterating over each vector element.
-/// 2. Inside the loop, generate a InsertElementOp or ExtractElementOp,
-/// depending on OpTy.
-///
-/// TODO: In some cases (no masking, etc.), LLVM::MatrixColumnMajorLoadOp
-/// can be generated instead of TransferOp1dConversion. Add such a pattern
-/// to ConvertVectorToLLVM.
-///
-/// E.g.:
-/// ```
-/// vector.transfer_write %vec, %A[%a, %b]
-/// {permutation_map = affine_map<(d0, d1) -> (d0)>, in_bounds = [true]}
-/// : vector<9xf32>, memref<?x?xf32>
-/// ```
-/// Is rewritten to approximately the following pseudo-IR:
-/// ```
-/// for i = 0 to 9 {
-/// %t = vector.extractelement %vec[i] : vector<9xf32>
-/// memref.store %t, %arg0[%a + i, %b] : memref<?x?xf32>
-/// }
-/// ```
-template <typename OpTy>
-struct TransferOp1dConversion : public OpRewritePattern<OpTy> {
- using OpRewritePattern<OpTy>::OpRewritePattern;
-
- LogicalResult matchAndRewrite(OpTy xferOp,
- PatternRewriter &rewriter) const override {
- ScopedContext scope(rewriter, xferOp.getLoc());
- auto map = xferOp.permutation_map();
- auto memRefType = xferOp.getShapedType().template dyn_cast<MemRefType>();
-
- if (!memRefType)
- return failure();
- if (xferOp.getVectorType().getRank() != 1)
- return failure();
- if (map.isMinorIdentity() && isLastMemrefDimUnitStride(memRefType))
- return failure(); // Handled by ConvertVectorToLLVM
-
- // Loop bounds, step, state...
- auto vecType = xferOp.getVectorType();
- auto lb = std_constant_index(0);
- auto ub = std_constant_index(vecType.getDimSize(0));
- auto step = std_constant_index(1);
- auto loopState = Strategy1d<OpTy>::initialLoopState(xferOp);
-
- // Generate for loop.
- rewriter.replaceOpWithNewOp<scf::ForOp>(
- xferOp, lb, ub, step, loopState ? ValueRange(loopState) : ValueRange(),
- [&](OpBuilder &builder, Location loc, Value iv, ValueRange loopState) {
- ScopedContext nestedScope(builder, loc);
- Strategy1d<OpTy>::generateForLoopBody(
- builder, loc, xferOp, iv, loopState);
- });
-
- return success();
- }
-};
-
-} // namespace
-
-namespace mlir {
-
-void populateProgressiveVectorToSCFConversionPatterns(
- RewritePatternSet &patterns,
- const ProgressiveVectorTransferToSCFOptions &options) {
- if (options.unroll) {
- patterns.add<UnrollTransferReadConversion, UnrollTransferWriteConversion>(
- patterns.getContext());
- } else {
- patterns.add<PrepareTransferReadConversion, PrepareTransferWriteConversion,
- TransferOpConversion<TransferReadOp>,
- TransferOpConversion<TransferWriteOp>>(patterns.getContext());
- }
-
- if (kTargetRank == 1) {
- patterns.add<TransferOp1dConversion<TransferReadOp>,
- TransferOp1dConversion<TransferWriteOp>>(
- patterns.getContext());
- }
-}
-
-struct ConvertProgressiveVectorToSCFPass
- : public ConvertVectorToSCFBase<ConvertProgressiveVectorToSCFPass> {
- ConvertProgressiveVectorToSCFPass(
- const ProgressiveVectorTransferToSCFOptions &opt)
- : options(opt) {}
-
- void runOnFunction() override {
- RewritePatternSet patterns(getFunction().getContext());
- populateProgressiveVectorToSCFConversionPatterns(patterns, options);
- (void)applyPatternsAndFoldGreedily(getFunction(), std::move(patterns));
- }
-
- ProgressiveVectorTransferToSCFOptions options;
-};
-
-} // namespace mlir
-
-std::unique_ptr<Pass> mlir::createProgressiveConvertVectorToSCFPass(
- const ProgressiveVectorTransferToSCFOptions &options) {
- return std::make_unique<ConvertProgressiveVectorToSCFPass>(options);
-}
diff --git a/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp b/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp
index 4f13e7d8e5af5..5b5769c9ad066 100644
--- a/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp
+++ b/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp
@@ -1,4 +1,4 @@
-//===- VectorToSCF.cpp - Conversion from Vector to mix of SCF and Std -----===//
+//===- VectorToSCF.cpp - Convert vector to SCF dialect ----------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -6,7 +6,7 @@
//
//===----------------------------------------------------------------------===//
//
-// This file implements target-dependent lowering of vector transfer operations.
+// This file implements lowering of vector transfer operations to SCF.
//
//===----------------------------------------------------------------------===//
@@ -17,16 +17,12 @@
#include "../PassDetail.h"
#include "mlir/Dialect/Affine/EDSC/Intrinsics.h"
#include "mlir/Dialect/MemRef/EDSC/Intrinsics.h"
-#include "mlir/Dialect/SCF/EDSC/Builders.h"
#include "mlir/Dialect/SCF/EDSC/Intrinsics.h"
#include "mlir/Dialect/StandardOps/EDSC/Intrinsics.h"
#include "mlir/Dialect/Vector/EDSC/Intrinsics.h"
#include "mlir/Dialect/Vector/VectorOps.h"
#include "mlir/Dialect/Vector/VectorUtils.h"
-#include "mlir/IR/AffineExpr.h"
-#include "mlir/IR/AffineMap.h"
#include "mlir/IR/Builders.h"
-#include "mlir/IR/Matchers.h"
#include "mlir/Pass/Pass.h"
#include "mlir/Transforms/GreedyPatternRewriteDriver.h"
#include "mlir/Transforms/Passes.h"
@@ -37,672 +33,1091 @@ using namespace mlir::edsc::intrinsics;
using vector::TransferReadOp;
using vector::TransferWriteOp;
-// Return a list of Values that correspond to multiple AffineApplyOp, one for
-// each result of `map`. Each `expr` in `map` is canonicalized and folded
-// greedily according to its operands.
-// TODO: factor out in a common location that both linalg and vector can use.
-static SmallVector<Value, 4>
-applyMapToValues(OpBuilder &b, Location loc, AffineMap map, ValueRange values) {
- SmallVector<Value, 4> res;
- res.reserve(map.getNumResults());
- unsigned numDims = map.getNumDims(), numSym = map.getNumSymbols();
- // For each `expr` in `map`, applies the `expr` to the values extracted from
- // ranges. If the resulting application can be folded into a Value, the
- // folding occurs eagerly. Otherwise, an affine.apply operation is emitted.
- for (auto expr : map.getResults()) {
- AffineMap map = AffineMap::get(numDims, numSym, expr);
- SmallVector<Value, 4> operands(values.begin(), values.end());
- fullyComposeAffineMapAndOperands(&map, &operands);
- canonicalizeMapAndOperands(&map, &operands);
- res.push_back(b.createOrFold<AffineApplyOp>(loc, map, operands));
- }
- return res;
+namespace {
+
+/// Attribute name used for labeling transfer ops during progressive lowering.
+static const char kPassLabel[] = "__vector_to_scf_lowering__";
+
+/// Lower to 1D transfer ops. Target-specific lowering will lower those.
+static const int64_t kTargetRank = 1;
+
+/// Given a MemRefType with VectorType element type, unpack one dimension from
+/// the VectorType into the MemRefType.
+///
+/// E.g.: memref<9xvector<5x6xf32>> --> memref<9x5xvector<6xf32>>
+static MemRefType unpackOneDim(MemRefType type) {
+ auto vectorType = type.getElementType().dyn_cast<VectorType>();
+ auto memrefShape = type.getShape();
+ SmallVector<int64_t, 8> newMemrefShape;
+ newMemrefShape.append(memrefShape.begin(), memrefShape.end());
+ newMemrefShape.push_back(vectorType.getDimSize(0));
+ return MemRefType::get(newMemrefShape,
+ VectorType::get(vectorType.getShape().drop_front(),
+ vectorType.getElementType()));
}
-namespace {
-/// Helper class captures the common information needed to lower N>1-D vector
-/// transfer operations (read and write).
-/// On construction, this class opens an edsc::ScopedContext for simpler IR
-/// manipulation.
-/// In pseudo-IR, for an n-D vector_transfer_read such as:
+/// Helper data structure for data and mask buffers.
+struct BufferAllocs {
+ Value dataBuffer;
+ Value maskBuffer;
+};
+
+/// Allocate temporary buffers for data (vector) and mask (if present).
+/// TODO: Parallelism and threadlocal considerations.
+template <typename OpTy>
+static BufferAllocs allocBuffers(OpTy xferOp) {
+ auto &b = ScopedContext::getBuilderRef();
+ OpBuilder::InsertionGuard guard(b);
+ Operation *scope =
+ xferOp->template getParentWithTrait<OpTrait::AutomaticAllocationScope>();
+ assert(scope && "Expected op to be inside automatic allocation scope");
+ b.setInsertionPointToStart(&scope->getRegion(0).front());
+
+ BufferAllocs result;
+ auto bufferType = MemRefType::get({}, xferOp.getVectorType());
+ result.dataBuffer = memref_alloca(bufferType).value;
+
+ if (xferOp.mask()) {
+ auto maskType = MemRefType::get({}, xferOp.mask().getType());
+ Value maskBuffer = memref_alloca(maskType);
+ memref_store(xferOp.mask(), maskBuffer);
+ result.maskBuffer = memref_load(maskBuffer);
+ }
+
+ return result;
+}
+
+/// Given a vector transfer op, calculate which dimension of the `source`
+/// memref should be unpacked in the next application of TransferOpConversion.
+/// A return value of None indicates a broadcast.
+template <typename OpTy>
+static Optional<int64_t> unpackedDim(OpTy xferOp) {
+ auto map = xferOp.permutation_map();
+ if (auto expr = map.getResult(0).template dyn_cast<AffineDimExpr>()) {
+ return expr.getPosition();
+ }
+ assert(xferOp.isBroadcastDim(0) &&
+ "Expected AffineDimExpr or AffineConstantExpr");
+ return None;
+}
+
+/// Compute the permutation map for the new (N-1)-D vector transfer op. This
+/// map is identical to the current permutation map, but the first result is
+/// omitted.
+template <typename OpTy>
+static AffineMap unpackedPermutationMap(OpTy xferOp, OpBuilder &builder) {
+ auto map = xferOp.permutation_map();
+ return AffineMap::get(map.getNumDims(), 0, map.getResults().drop_front(),
+ builder.getContext());
+}
+
+/// Calculate the indices for the new vector transfer op.
///
+/// E.g.: transfer_read %A[%a, %b, %c, %d] ... : vector<5x4x3xf32> ...
+/// --> transfer_read %A[%a, %b + iv, %c, %d] ... vector<4x3f32>
+/// ^^^^^^
+/// `iv` is the iteration variable of the (new) surrounding loop.
+template <typename OpTy>
+static void getXferIndices(OpTy xferOp, Value iv,
+ SmallVector<Value, 8> &indices) {
+ typename OpTy::Adaptor adaptor(xferOp);
+ // Corresponding memref dim of the vector dim that is unpacked.
+ auto dim = unpackedDim(xferOp);
+ auto prevIndices = adaptor.indices();
+ indices.append(prevIndices.begin(), prevIndices.end());
+
+ bool isBroadcast = !dim.hasValue();
+ if (!isBroadcast) {
+ using edsc::op::operator+;
+ indices[dim.getValue()] = adaptor.indices()[dim.getValue()] + iv;
+ }
+}
+
+static void maybeYieldValue(bool hasRetVal, OpBuilder builder, Location loc,
+ Value value) {
+ if (hasRetVal) {
+ builder.create<scf::YieldOp>(loc, value);
+ } else {
+ builder.create<scf::YieldOp>(loc);
+ }
+}
+
+/// Generates a boolean Value that is true if the iv-th bit in xferOp's mask
+/// is set to true. No such check is generated under following circumstances:
+/// * xferOp does not have a mask.
+/// * xferOp's mask is not 1D. (In case of (N>1)-D, a subvector of the mask is
+/// computed and attached to the new transfer op in the pattern.)
+/// * The to-be-unpacked dim of xferOp is a broadcast.
+template <typename OpTy>
+static Value generateMaskCheck(OpBuilder &builder, OpTy xferOp, Value iv) {
+ if (!xferOp.mask())
+ return Value();
+ if (xferOp.getMaskType().getRank() != 1)
+ return Value();
+ if (xferOp.isBroadcastDim(0))
+ return Value();
+
+ auto ivI32 = std_index_cast(IntegerType::get(builder.getContext(), 32), iv);
+ return vector_extract_element(xferOp.mask(), ivI32).value;
+}
+
+/// Helper function TransferOpConversion and TransferOp1dConversion.
+/// Generate an in-bounds check if the transfer op may go out-of-bounds on the
+/// specified dimension `dim` with the loop iteration variable `iv`.
+/// E.g., when unpacking dimension 0 from:
/// ```
-/// vector_transfer_read(%m, %offsets, identity_map, %fill) :
-/// memref<(leading_dims) x (major_dims) x (minor_dims) x type>,
-/// vector<(major_dims) x (minor_dims) x type>
+/// %vec = vector.transfer_read %A[%a, %b] %cst
+/// : vector<5x4xf32>, memref<?x?xf32>
/// ```
-///
-/// where rank(minor_dims) is the lower-level vector rank (e.g. 1 for LLVM or
-/// higher).
-///
-/// This is the entry point to emitting pseudo-IR resembling:
-///
+/// An if check similar to this will be generated inside the loop:
/// ```
-/// %tmp = alloc(): memref<(major_dims) x vector<minor_dim x type>>
-/// for (%ivs_major, {0}, {vector_shape}, {1}) { // (N-1)-D loop nest
-/// if (any_of(%ivs_major + %offsets, <, major_dims)) {
-/// %v = vector_transfer_read(
-/// {%offsets_leading, %ivs_major + %offsets_major, %offsets_minor},
-/// %ivs_minor):
-/// memref<(leading_dims) x (major_dims) x (minor_dims) x type>,
-/// vector<(minor_dims) x type>;
-/// store(%v, %tmp);
-/// } else {
-/// %v = splat(vector<(minor_dims) x type>, %fill)
-/// store(%v, %tmp, %ivs_major);
-/// }
-/// }
-/// %res = load(%tmp, %0): memref<(major_dims) x vector<minor_dim x type>>):
-// vector<(major_dims) x (minor_dims) x type>
+/// %d = memref.dim %A, %c0 : memref<?x?xf32>
+/// if (%a + iv < %d) {
+/// (in-bounds case)
+/// } else {
+/// (out-of-bounds case)
+/// }
/// ```
///
-template <typename ConcreteOp>
-class NDTransferOpHelper {
-public:
- NDTransferOpHelper(PatternRewriter &rewriter, ConcreteOp xferOp,
- const VectorTransferToSCFOptions &options)
- : rewriter(rewriter), options(options), loc(xferOp.getLoc()),
- scope(std::make_unique<ScopedContext>(rewriter, loc)), xferOp(xferOp),
- op(xferOp.getOperation()) {
- vectorType = xferOp.getVectorType();
- // TODO: when we go to k > 1-D vectors adapt minorRank.
- minorRank = 1;
- majorRank = vectorType.getRank() - minorRank;
- leadingRank = xferOp.getLeadingShapedRank();
- majorVectorType =
- VectorType::get(vectorType.getShape().take_front(majorRank),
- vectorType.getElementType());
- minorVectorType =
- VectorType::get(vectorType.getShape().take_back(minorRank),
- vectorType.getElementType());
- /// Memref of minor vector type is used for individual transfers.
- memRefMinorVectorType = MemRefType::get(
- majorVectorType.getShape(), minorVectorType, {},
- xferOp.getShapedType().template cast<MemRefType>().getMemorySpace());
- }
-
- LogicalResult doReplace();
-
-private:
- /// Creates the loop nest on the "major" dimensions and calls the
- /// `loopBodyBuilder` lambda in the context of the loop nest.
- void
- emitLoops(llvm::function_ref<void(ValueRange, ValueRange, ValueRange,
- ValueRange, const MemRefBoundsCapture &)>
- loopBodyBuilder);
-
- /// Common state to lower vector transfer ops.
- PatternRewriter &rewriter;
- const VectorTransferToSCFOptions &options;
- Location loc;
- std::unique_ptr<ScopedContext> scope;
- ConcreteOp xferOp;
- Operation *op;
- // A vector transfer copies data between:
- // - memref<(leading_dims) x (major_dims) x (minor_dims) x type>
- // - vector<(major_dims) x (minor_dims) x type>
- unsigned minorRank; // for now always 1
- unsigned majorRank; // vector rank - minorRank
- unsigned leadingRank; // memref rank - vector rank
- VectorType vectorType; // vector<(major_dims) x (minor_dims) x type>
- VectorType majorVectorType; // vector<(major_dims) x type>
- VectorType minorVectorType; // vector<(minor_dims) x type>
- MemRefType memRefMinorVectorType; // memref<vector<(minor_dims) x type>>
-};
+/// If the transfer is 1D and has a mask, this function generates a more complex
+/// check also accounts for potentially masked out elements.
+///
+/// This function variant returns the value returned by `inBoundsCase` or
+/// `outOfBoundsCase`. The MLIR type of the return value must be specified in
+/// `resultTypes`.
+template <typename OpTy>
+static Value generateInBoundsCheck(
+ OpTy xferOp, Value iv, OpBuilder &builder, Optional<int64_t> dim,
+ TypeRange resultTypes,
+ function_ref<Value(OpBuilder &, Location)> inBoundsCase,
+ function_ref<Value(OpBuilder &, Location)> outOfBoundsCase = nullptr) {
+ bool hasRetVal = !resultTypes.empty();
+ Value cond; // Condition to be built...
-template <typename ConcreteOp>
-void NDTransferOpHelper<ConcreteOp>::emitLoops(
- llvm::function_ref<void(ValueRange, ValueRange, ValueRange, ValueRange,
- const MemRefBoundsCapture &)>
- loopBodyBuilder) {
- /// Loop nest operates on the major dimensions
- MemRefBoundsCapture memrefBoundsCapture(xferOp.source());
+ // Condition check 1: Access in-bounds?
+ bool isBroadcast = !dim.hasValue(); // No in-bounds check for broadcasts.
+ if (!xferOp.isDimInBounds(0) && !isBroadcast) {
+ auto memrefDim =
+ memref_dim(xferOp.source(), std_constant_index(dim.getValue()));
+ using edsc::op::operator+;
+ auto memrefIdx = xferOp.indices()[dim.getValue()] + iv;
+ cond = std_cmpi_sgt(memrefDim.value, memrefIdx);
+ }
- if (options.unroll) {
- auto shape = majorVectorType.getShape();
- auto strides = computeStrides(shape);
- unsigned numUnrolledInstances = computeMaxLinearIndex(shape);
- ValueRange indices(xferOp.indices());
- for (unsigned idx = 0; idx < numUnrolledInstances; ++idx) {
- SmallVector<int64_t, 4> offsets = delinearize(strides, idx);
- SmallVector<Value, 4> offsetValues =
- llvm::to_vector<4>(llvm::map_range(offsets, [](int64_t off) -> Value {
- return std_constant_index(off);
- }));
- loopBodyBuilder(offsetValues, indices.take_front(leadingRank),
- indices.drop_front(leadingRank).take_front(majorRank),
- indices.take_back(minorRank), memrefBoundsCapture);
+ // Condition check 2: Masked in?
+ if (auto maskCond = generateMaskCheck(builder, xferOp, iv)) {
+ if (cond) {
+ cond = builder.create<AndOp>(xferOp.getLoc(), cond, maskCond);
+ } else {
+ cond = maskCond;
}
- } else {
- VectorBoundsCapture vectorBoundsCapture(majorVectorType);
- auto majorLbs = vectorBoundsCapture.getLbs();
- auto majorUbs = vectorBoundsCapture.getUbs();
- auto majorSteps = vectorBoundsCapture.getSteps();
- affineLoopNestBuilder(
- majorLbs, majorUbs, majorSteps, [&](ValueRange majorIvs) {
- ValueRange indices(xferOp.indices());
- loopBodyBuilder(majorIvs, indices.take_front(leadingRank),
- indices.drop_front(leadingRank).take_front(majorRank),
- indices.take_back(minorRank), memrefBoundsCapture);
+ }
+
+ // If the condition is non-empty, generate an SCF::IfOp.
+ if (cond) {
+ auto check = builder.create<scf::IfOp>(
+ xferOp.getLoc(), resultTypes, cond,
+ /*thenBuilder=*/
+ [&](OpBuilder &builder, Location loc) {
+ maybeYieldValue(hasRetVal, builder, loc, inBoundsCase(builder, loc));
+ },
+ /*elseBuilder=*/
+ [&](OpBuilder &builder, Location loc) {
+ if (outOfBoundsCase) {
+ maybeYieldValue(hasRetVal, builder, loc,
+ outOfBoundsCase(builder, loc));
+ } else {
+ builder.create<scf::YieldOp>(loc);
+ }
});
+
+ return hasRetVal ? check.getResult(0) : Value();
}
+
+ // Condition is empty, no need for an SCF::IfOp.
+ return inBoundsCase(builder, xferOp.getLoc());
}
-static Optional<int64_t> extractConstantIndex(Value v) {
- if (auto cstOp = v.getDefiningOp<ConstantIndexOp>())
- return cstOp.getValue();
- if (auto affineApplyOp = v.getDefiningOp<AffineApplyOp>())
- if (affineApplyOp.getAffineMap().isSingleConstant())
- return affineApplyOp.getAffineMap().getSingleConstantResult();
- return None;
+/// In this function variant, `inBoundsCase` and `outOfBoundsCase` do not have
+/// a return value. Consequently, this function does not have a return value.
+template <typename OpTy>
+static void generateInBoundsCheck(
+ OpTy xferOp, Value iv, OpBuilder &builder, Optional<int64_t> dim,
+ function_ref<void(OpBuilder &, Location)> inBoundsCase,
+ function_ref<void(OpBuilder &, Location)> outOfBoundsCase = nullptr) {
+ generateInBoundsCheck(
+ xferOp, iv, builder, dim, /*resultTypes=*/TypeRange(),
+ /*inBoundsCase=*/
+ [&](OpBuilder &builder, Location loc) {
+ inBoundsCase(builder, loc);
+ return Value();
+ },
+ /*outOfBoundsCase=*/
+ [&](OpBuilder &builder, Location loc) {
+ if (outOfBoundsCase)
+ outOfBoundsCase(builder, loc);
+ return Value();
+ });
}
-// Missing foldings of scf.if make it necessary to perform poor man's folding
-// eagerly, especially in the case of unrolling. In the future, this should go
-// away once scf.if folds properly.
-static Value onTheFlyFoldSLT(Value v, Value ub) {
- using namespace mlir::edsc::op;
- auto maybeCstV = extractConstantIndex(v);
- auto maybeCstUb = extractConstantIndex(ub);
- if (maybeCstV && maybeCstUb && *maybeCstV < *maybeCstUb)
- return Value();
- return slt(v, ub);
+/// Given an ArrayAttr, return a copy where the first element is dropped.
+static ArrayAttr dropFirstElem(OpBuilder &builder, ArrayAttr attr) {
+ if (!attr)
+ return attr;
+ return ArrayAttr::get(builder.getContext(), attr.getValue().drop_front());
}
-/// 1. Compute the indexings `majorIvs + majorOffsets` and save them in
-/// `majorIvsPlusOffsets`.
-/// 2. Return a value of i1 that determines whether the first
-/// `majorIvs.rank()`
-/// dimensions `majorIvs + majorOffsets` are all within `memrefBounds`.
-static Value
-emitInBoundsCondition(PatternRewriter &rewriter,
- VectorTransferOpInterface xferOp, unsigned leadingRank,
- ValueRange majorIvs, ValueRange majorOffsets,
- const MemRefBoundsCapture &memrefBounds,
- SmallVectorImpl<Value> &majorIvsPlusOffsets) {
- Value inBoundsCondition;
- majorIvsPlusOffsets.reserve(majorIvs.size());
- unsigned idx = 0;
- SmallVector<Value, 4> bounds =
- applyMapToValues(rewriter, xferOp.getLoc(), xferOp.permutation_map(),
- memrefBounds.getUbs());
- for (auto it : llvm::zip(majorIvs, majorOffsets, bounds)) {
- Value iv = std::get<0>(it), off = std::get<1>(it), ub = std::get<2>(it);
- using namespace mlir::edsc::op;
- majorIvsPlusOffsets.push_back(iv + off);
- auto affineConstExpr =
- xferOp.permutation_map().getResult(idx).dyn_cast<AffineConstantExpr>();
- bool isBroadcast = affineConstExpr && affineConstExpr.getValue() == 0;
- if (!xferOp.isDimInBounds(leadingRank + idx) && !isBroadcast) {
- Value inBoundsCond = onTheFlyFoldSLT(majorIvsPlusOffsets.back(), ub);
- if (inBoundsCond)
- inBoundsCondition = (inBoundsCondition)
- ? (inBoundsCondition && inBoundsCond)
- : inBoundsCond;
- }
- ++idx;
- }
- return inBoundsCondition;
+/// Add the pass label to a vector transfer op if its rank is not the target
+/// rank.
+template <typename OpTy>
+static void maybeApplyPassLabel(OpBuilder &builder, OpTy newXferOp) {
+ if (newXferOp.getVectorType().getRank() > kTargetRank)
+ newXferOp->setAttr(kPassLabel, builder.getUnitAttr());
}
-// TODO: Parallelism and threadlocal considerations.
-static Value setAllocAtFunctionEntry(MemRefType memRefMinorVectorType,
- Operation *op) {
- auto &b = ScopedContext::getBuilderRef();
- OpBuilder::InsertionGuard guard(b);
- Operation *scope =
- op->getParentWithTrait<OpTrait::AutomaticAllocationScope>();
- assert(scope && "Expected op to be inside automatic allocation scope");
- b.setInsertionPointToStart(&scope->getRegion(0).front());
- Value res = memref_alloca(memRefMinorVectorType);
- return res;
+/// Given a transfer op, find the memref from which the mask is loaded. This
+/// is similar to Strategy<TransferWriteOp>::getBuffer.
+template <typename OpTy>
+static Value getMaskBuffer(OpTy xferOp) {
+ assert(xferOp.mask() && "Expected that transfer op has mask");
+ auto loadOp = xferOp.mask().template getDefiningOp<memref::LoadOp>();
+ assert(loadOp && "Expected transfer op mask produced by LoadOp");
+ return loadOp.getMemRef();
}
+/// Codegen strategy, depending on the operation.
+template <typename OpTy>
+struct Strategy;
+
+/// Code strategy for vector TransferReadOp.
template <>
-LogicalResult NDTransferOpHelper<TransferReadOp>::doReplace() {
- Value alloc, result;
- if (options.unroll)
- result = std_splat(vectorType, xferOp.padding());
- else
- alloc = setAllocAtFunctionEntry(memRefMinorVectorType, op);
-
- emitLoops([&](ValueRange majorIvs, ValueRange leadingOffsets,
- ValueRange majorOffsets, ValueRange minorOffsets,
- const MemRefBoundsCapture &memrefBounds) {
- /// Lambda to load 1-D vector in the current loop ivs + offset context.
- auto load1DVector = [&](ValueRange majorIvsPlusOffsets) -> Value {
- SmallVector<Value, 8> indexing;
- indexing.reserve(leadingRank + majorRank + minorRank);
- indexing.append(leadingOffsets.begin(), leadingOffsets.end());
- indexing.append(majorIvsPlusOffsets.begin(), majorIvsPlusOffsets.end());
- indexing.append(minorOffsets.begin(), minorOffsets.end());
- Value memref = xferOp.source();
- auto map =
- getTransferMinorIdentityMap(xferOp.getShapedType(), minorVectorType);
- ArrayAttr inBounds;
- if (xferOp.isDimInBounds(xferOp.getVectorType().getRank() - 1)) {
- OpBuilder &b = ScopedContext::getBuilderRef();
- inBounds = b.getBoolArrayAttr({true});
- }
- return vector_transfer_read(minorVectorType, memref, indexing,
- AffineMapAttr::get(map), xferOp.padding(),
- inBounds);
- };
-
- // 1. Compute the inBoundsCondition in the current loops ivs + offset
- // context.
- SmallVector<Value, 4> majorIvsPlusOffsets;
- Value inBoundsCondition = emitInBoundsCondition(
- rewriter, cast<VectorTransferOpInterface>(xferOp.getOperation()),
- leadingRank, majorIvs, majorOffsets, memrefBounds, majorIvsPlusOffsets);
-
- if (inBoundsCondition) {
- // 2. If the condition is not null, we need an IfOp, which may yield
- // if `options.unroll` is true.
- SmallVector<Type, 1> resultType;
- if (options.unroll)
- resultType.push_back(vectorType);
-
- // 3. If in-bounds, progressively lower to a 1-D transfer read, otherwise
- // splat a 1-D vector.
- ValueRange ifResults = conditionBuilder(
- resultType, inBoundsCondition,
- [&]() -> scf::ValueVector {
- Value vector = load1DVector(majorIvsPlusOffsets);
- // 3.a. If `options.unroll` is true, insert the 1-D vector in the
- // aggregate. We must yield and merge with the `else` branch.
- if (options.unroll) {
- vector = vector_insert(vector, result, majorIvs);
- return {vector};
- }
- // 3.b. Otherwise, just go through the temporary `alloc`.
- memref_store(vector, alloc, majorIvs);
- return {};
- },
- [&]() -> scf::ValueVector {
- Value vector = std_splat(minorVectorType, xferOp.padding());
- // 3.c. If `options.unroll` is true, insert the 1-D vector in the
- // aggregate. We must yield and merge with the `then` branch.
- if (options.unroll) {
- vector = vector_insert(vector, result, majorIvs);
- return {vector};
- }
- // 3.d. Otherwise, just go through the temporary `alloc`.
- memref_store(vector, alloc, majorIvs);
- return {};
- });
+struct Strategy<TransferReadOp> {
+ /// Find the StoreOp that is used for writing the current TransferReadOp's
+ /// result to the temporary buffer allocation.
+ static memref::StoreOp getStoreOp(TransferReadOp xferOp) {
+ assert(xferOp->hasOneUse() && "Expected exactly one use of TransferReadOp");
+ auto storeOp = dyn_cast<memref::StoreOp>((*xferOp->use_begin()).getOwner());
+ assert(storeOp && "Expected TransferReadOp result used by StoreOp");
+ return storeOp;
+ }
- if (!resultType.empty())
- result = *ifResults.begin();
- } else {
- // 4. Guaranteed in-bounds, progressively lower to a 1-D transfer read.
- Value loaded1D = load1DVector(majorIvsPlusOffsets);
- // 5.a. If `options.unroll` is true, insert the 1-D vector in the
- // aggregate.
- if (options.unroll)
- result = vector_insert(loaded1D, result, majorIvs);
- // 5.b. Otherwise, just go through the temporary `alloc`.
- else
- memref_store(loaded1D, alloc, majorIvs);
- }
- });
+ /// Find the temporary buffer allocation. All labeled TransferReadOps are
+ /// used like this, where %buf is either the buffer allocation or a type cast
+ /// of the buffer allocation:
+ /// ```
+ /// %vec = vector.transfer_read ... { __vector_to_scf_lowering__ } ...
+ /// memref.store %vec, %buf[...] ...
+ /// ```
+ static Value getBuffer(TransferReadOp xferOp) {
+ return getStoreOp(xferOp).getMemRef();
+ }
- assert((!options.unroll ^ (bool)result) &&
- "Expected resulting Value iff unroll");
- if (!result)
- result =
- memref_load(vector_type_cast(MemRefType::get({}, vectorType), alloc));
- rewriter.replaceOp(op, result);
+ /// Retrieve the indices of the current StoreOp that stores into the buffer.
+ static void getBufferIndices(TransferReadOp xferOp,
+ SmallVector<Value, 8> &indices) {
+ auto storeOp = getStoreOp(xferOp);
+ auto prevIndices = memref::StoreOpAdaptor(storeOp).indices();
+ indices.append(prevIndices.begin(), prevIndices.end());
+ }
- return success();
-}
+ /// Rewrite the TransferReadOp, assuming that there are no out-of-bounds
+ /// accesses on the to-be-unpacked dimension.
+ ///
+ /// 1. Generate a new (N-1)-d TransferReadOp using the loop iteration
+ /// variable `iv`.
+ /// 2. Store the result into the (already `vector.type_cast`ed) buffer.
+ ///
+ /// E.g.:
+ /// ```
+ /// %vec = vector.transfer_read %A[%a+%i, %b, %c], %cst
+ /// : memref<?x?x?xf32>, vector<4x3xf32>
+ /// memref.store %vec, %buf[%i] : memref<5xvector<4x3xf32>>
+ /// ```
+ /// Is rewritten to:
+ /// ```
+ /// %casted = vector.type_cast %buf
+ /// : memref<5xvector<4x3xf32>> to memref<5x4xvector<3xf32>>
+ /// for %j = 0 to 4 {
+ /// %vec = vector.transfer_read %A[%a+%i, %b+%j, %c], %cst
+ /// : memref<?x?x?xf32>, vector<3xf32>
+ /// memref.store %vec, %casted[%i, %j] : memref<5x4xvector<3xf32>>
+ /// }
+ /// ```
+ ///
+ /// Note: The loop and type cast are generated in TransferOpConversion.
+ /// The original TransferReadOp and store op are deleted in `cleanup`.
+ /// Note: The `mask` operand is set in TransferOpConversion.
+ static TransferReadOp rewriteOp(OpBuilder &builder, TransferReadOp xferOp,
+ Value buffer, Value iv) {
+ SmallVector<Value, 8> storeIndices;
+ getBufferIndices(xferOp, storeIndices);
+ storeIndices.push_back(iv);
+
+ SmallVector<Value, 8> xferIndices;
+ getXferIndices(xferOp, iv, xferIndices);
+
+ auto bufferType = buffer.getType().dyn_cast<ShapedType>();
+ auto vecType = bufferType.getElementType().dyn_cast<VectorType>();
+ auto inBoundsAttr = dropFirstElem(builder, xferOp.in_boundsAttr());
+ auto newXfer =
+ vector_transfer_read(
+ vecType, xferOp.source(), xferIndices,
+ AffineMapAttr::get(unpackedPermutationMap(xferOp, builder)),
+ xferOp.padding(), Value(), inBoundsAttr)
+ .value;
+
+ maybeApplyPassLabel(builder,
+ dyn_cast<TransferReadOp>(newXfer.getDefiningOp()));
+
+ memref_store(newXfer, buffer, storeIndices);
+ return newXfer.getDefiningOp<TransferReadOp>();
+ }
+
+ /// Handle out-of-bounds accesses on the to-be-unpacked dimension: Write
+ /// padding value to the temporary buffer.
+ static void handleOutOfBoundsDim(OpBuilder & /*builder*/,
+ TransferReadOp xferOp, Value buffer,
+ Value iv) {
+ SmallVector<Value, 8> storeIndices;
+ getBufferIndices(xferOp, storeIndices);
+ storeIndices.push_back(iv);
+
+ auto bufferType = buffer.getType().dyn_cast<ShapedType>();
+ auto vecType = bufferType.getElementType().dyn_cast<VectorType>();
+ auto vec = std_splat(vecType, xferOp.padding());
+ memref_store(vec, buffer, storeIndices);
+ }
+
+ /// Cleanup after rewriting the op.
+ static void cleanup(PatternRewriter &rewriter, TransferReadOp xferOp) {
+ rewriter.eraseOp(getStoreOp(xferOp));
+ rewriter.eraseOp(xferOp);
+ }
+};
+/// Codegen strategy for vector TransferWriteOp.
template <>
-LogicalResult NDTransferOpHelper<TransferWriteOp>::doReplace() {
- Value alloc;
- if (!options.unroll) {
- alloc = setAllocAtFunctionEntry(memRefMinorVectorType, op);
- memref_store(xferOp.vector(),
- vector_type_cast(MemRefType::get({}, vectorType), alloc));
- }
-
- emitLoops([&](ValueRange majorIvs, ValueRange leadingOffsets,
- ValueRange majorOffsets, ValueRange minorOffsets,
- const MemRefBoundsCapture &memrefBounds) {
- // Lower to 1-D vector_transfer_write and let recursion handle it.
- auto emitTransferWrite = [&](ValueRange majorIvsPlusOffsets) {
- SmallVector<Value, 8> indexing;
- indexing.reserve(leadingRank + majorRank + minorRank);
- indexing.append(leadingOffsets.begin(), leadingOffsets.end());
- indexing.append(majorIvsPlusOffsets.begin(), majorIvsPlusOffsets.end());
- indexing.append(minorOffsets.begin(), minorOffsets.end());
- Value result;
- // If `options.unroll` is true, extract the 1-D vector from the
- // aggregate.
- if (options.unroll)
- result = vector_extract(xferOp.vector(), majorIvs);
- else
- result = memref_load(alloc, majorIvs);
- auto map =
- getTransferMinorIdentityMap(xferOp.getShapedType(), minorVectorType);
- ArrayAttr inBounds;
- if (xferOp.isDimInBounds(xferOp.getVectorType().getRank() - 1)) {
- OpBuilder &b = ScopedContext::getBuilderRef();
- inBounds = b.getBoolArrayAttr({true});
- }
- vector_transfer_write(result, xferOp.source(), indexing,
- AffineMapAttr::get(map), inBounds);
- };
-
- // 1. Compute the inBoundsCondition in the current loops ivs + offset
- // context.
- SmallVector<Value, 4> majorIvsPlusOffsets;
- Value inBoundsCondition = emitInBoundsCondition(
- rewriter, cast<VectorTransferOpInterface>(xferOp.getOperation()),
- leadingRank, majorIvs, majorOffsets, memrefBounds, majorIvsPlusOffsets);
-
- if (inBoundsCondition) {
- // 2.a. If the condition is not null, we need an IfOp, to write
- // conditionally. Progressively lower to a 1-D transfer write.
- conditionBuilder(inBoundsCondition,
- [&] { emitTransferWrite(majorIvsPlusOffsets); });
- } else {
- // 2.b. Guaranteed in-bounds. Progressively lower to a 1-D transfer write.
- emitTransferWrite(majorIvsPlusOffsets);
- }
- });
+struct Strategy<TransferWriteOp> {
+ /// Find the temporary buffer allocation. All labeled TransferWriteOps are
+ /// used like this, where %buf is either the buffer allocation or a type cast
+ /// of the buffer allocation:
+ /// ```
+ /// %vec = memref.load %buf[...] ...
+ /// vector.transfer_write %vec ... { __vector_to_scf_lowering__ } ...
+ /// ```
+ static Value getBuffer(TransferWriteOp xferOp) {
+ auto loadOp = xferOp.vector().getDefiningOp<memref::LoadOp>();
+ assert(loadOp && "Expected transfer op vector produced by LoadOp");
+ return loadOp.getMemRef();
+ }
+
+ /// Retrieve the indices of the current LoadOp that loads from the buffer.
+ static void getBufferIndices(TransferWriteOp xferOp,
+ SmallVector<Value, 8> &indices) {
+ auto loadOp = xferOp.vector().getDefiningOp<memref::LoadOp>();
+ auto prevIndices = memref::LoadOpAdaptor(loadOp).indices();
+ indices.append(prevIndices.begin(), prevIndices.end());
+ }
+
+ /// Rewrite the TransferWriteOp, assuming that there are no out-of-bounds
+ /// accesses on the to-be-unpacked dimension.
+ ///
+ /// 1. Load an (N-1)-d vector from the (already `vector.type_cast`ed) buffer,
+ /// using the loop iteration variable `iv`.
+ /// 2. Generate a new (N-1)-d TransferWriteOp, writing the loaded vector back
+ /// to memory.
+ ///
+ /// Note: For more details, see comments on Strategy<TransferReadOp>.
+ static TransferWriteOp rewriteOp(OpBuilder &builder, TransferWriteOp xferOp,
+ Value buffer, Value iv) {
+ SmallVector<Value, 8> loadIndices;
+ getBufferIndices(xferOp, loadIndices);
+ loadIndices.push_back(iv);
+
+ SmallVector<Value, 8> xferIndices;
+ getXferIndices(xferOp, iv, xferIndices);
- rewriter.eraseOp(op);
+ auto vec = memref_load(buffer, loadIndices);
+ auto inBoundsAttr = dropFirstElem(builder, xferOp.in_boundsAttr());
+ auto newXfer = vector_transfer_write(
+ Type(), vec, xferOp.source(), xferIndices,
+ AffineMapAttr::get(unpackedPermutationMap(xferOp, builder)), Value(),
+ inBoundsAttr);
+ maybeApplyPassLabel(builder, newXfer.op);
+
+ return newXfer;
+ }
+
+ /// Handle out-of-bounds accesses on the to-be-unpacked dimension.
+ static void handleOutOfBoundsDim(OpBuilder &builder, TransferWriteOp xferOp,
+ Value buffer, Value iv) {}
+
+ /// Cleanup after rewriting the op.
+ static void cleanup(PatternRewriter &rewriter, TransferWriteOp xferOp) {
+ rewriter.eraseOp(xferOp);
+ }
+};
+
+template <typename OpTy>
+LogicalResult checkPrepareXferOp(OpTy xferOp) {
+ if (xferOp->hasAttr(kPassLabel))
+ return failure();
+ if (xferOp.getVectorType().getRank() <= kTargetRank)
+ return failure();
return success();
}
-} // namespace
+/// Prepare a TransferReadOp for progressive lowering.
+///
+/// 1. Allocate a temporary buffer.
+/// 2. Label the TransferReadOp, marking it eligible for progressive lowering.
+/// 3. Store the result of the TransferReadOp into the temporary buffer.
+/// 4. Load the result from the temporary buffer and replace all uses of the
+/// original TransferReadOp with this load.
+///
+/// E.g.:
+/// ```
+/// %vec = vector.transfer_read %A[%a, %b, %c], %cst
+/// : vector<5x4xf32>, memref<?x?x?xf32>
+/// ```
+/// is rewritten to:
+/// ```
+/// %0 = memref.alloca() : memref<vector<5x4xf32>>
+/// %1 = vector.transfer_read %A[%a, %b, %c], %cst
+/// { __vector_to_scf_lowering__ } : vector<5x4xf32>, memref<?x?x?xf32>
+/// memref.store %1, %0[] : memref<vector<5x4xf32>>
+/// %vec = memref.load %0[] : memref<vector<5x4xf32>>
+/// ```
+///
+/// Note: A second temporary buffer may be allocated for the `mask` operand.
+struct PrepareTransferReadConversion : public OpRewritePattern<TransferReadOp> {
+ using OpRewritePattern<TransferReadOp>::OpRewritePattern;
-/// Analyzes the `transfer` to find an access dimension along the fastest remote
-/// MemRef dimension. If such a dimension with coalescing properties is found,
-/// `pivs` and `vectorBoundsCapture` are swapped so that the invocation of
-/// LoopNestBuilder captures it in the innermost loop.
-template <typename TransferOpTy>
-static int computeCoalescedIndex(TransferOpTy transfer) {
- // rank of the remote memory access, coalescing behavior occurs on the
- // innermost memory dimension.
- auto remoteRank = transfer.getShapedType().getRank();
- // Iterate over the results expressions of the permutation map to determine
- // the loop order for creating pointwise copies between remote and local
- // memories.
- int coalescedIdx = -1;
- auto exprs = transfer.permutation_map().getResults();
- for (auto en : llvm::enumerate(exprs)) {
- auto dim = en.value().template dyn_cast<AffineDimExpr>();
- if (!dim) {
- continue;
+ LogicalResult matchAndRewrite(TransferReadOp xferOp,
+ PatternRewriter &rewriter) const override {
+ if (checkPrepareXferOp(xferOp).failed())
+ return failure();
+
+ ScopedContext scope(rewriter, xferOp.getLoc());
+ auto buffers = allocBuffers(xferOp);
+ auto *newXfer = rewriter.clone(*xferOp.getOperation());
+ newXfer->setAttr(kPassLabel, rewriter.getUnitAttr());
+ if (xferOp.mask()) {
+ dyn_cast<TransferReadOp>(newXfer).maskMutable().assign(
+ buffers.maskBuffer);
}
- auto memRefDim = dim.getPosition();
- if (memRefDim == remoteRank - 1) {
- // memRefDim has coalescing properties, it should be swapped in the last
- // position.
- assert(coalescedIdx == -1 && "Unexpected > 1 coalesced indices");
- coalescedIdx = en.index();
+
+ memref_store(newXfer->getResult(0), buffers.dataBuffer);
+ rewriter.replaceOpWithNewOp<memref::LoadOp>(xferOp, buffers.dataBuffer);
+
+ return success();
+ }
+};
+
+/// Prepare a TransferWriteOp for progressive lowering.
+///
+/// 1. Allocate a temporary buffer.
+/// 2. Store the vector into the buffer.
+/// 3. Load the vector from the buffer again.
+/// 4. Use the loaded vector as a TransferWriteOp operand and label the op,
+/// marking it eligible for progressive lowering via TransferOpConversion.
+///
+/// E.g.:
+/// ```
+/// vector.transfer_write %vec, %A[%a, %b, %c]
+/// : vector<5x4xf32>, memref<?x?x?xf32>
+/// ```
+/// is rewritten to:
+/// ```
+/// %0 = memref.alloca() : memref<vector<5x4xf32>>
+/// memref.store %vec, %0[] : memref<vector<5x4xf32>>
+/// %1 = memref.load %0[] : memref<vector<5x4xf32>>
+/// vector.transfer_write %1, %A[%a, %b, %c] { __vector_to_scf_lowering__ }
+/// : vector<5x4xf32>, memref<?x?x?xf32>
+/// ```
+///
+/// Note: A second temporary buffer may be allocated for the `mask` operand.
+struct PrepareTransferWriteConversion
+ : public OpRewritePattern<TransferWriteOp> {
+ using OpRewritePattern<TransferWriteOp>::OpRewritePattern;
+
+ LogicalResult matchAndRewrite(TransferWriteOp xferOp,
+ PatternRewriter &rewriter) const override {
+ if (checkPrepareXferOp(xferOp).failed())
+ return failure();
+
+ ScopedContext scope(rewriter, xferOp.getLoc());
+ auto buffers = allocBuffers(xferOp);
+ memref_store(xferOp.vector(), buffers.dataBuffer);
+ auto loadedVec = memref_load(buffers.dataBuffer);
+ rewriter.updateRootInPlace(xferOp, [&]() {
+ xferOp.vectorMutable().assign(loadedVec);
+ xferOp->setAttr(kPassLabel, rewriter.getUnitAttr());
+ });
+
+ if (xferOp.mask()) {
+ rewriter.updateRootInPlace(
+ xferOp, [&]() { xferOp.maskMutable().assign(buffers.maskBuffer); });
}
+
+ return success();
}
- return coalescedIdx;
-}
+};
-template <typename TransferOpTy>
-VectorTransferRewriter<TransferOpTy>::VectorTransferRewriter(
- VectorTransferToSCFOptions options, MLIRContext *context)
- : RewritePattern(TransferOpTy::getOperationName(), 1, context),
- options(options) {}
-
-/// Used for staging the transfer in a local buffer.
-template <typename TransferOpTy>
-MemRefType VectorTransferRewriter<TransferOpTy>::tmpMemRefType(
- TransferOpTy transfer) const {
- auto vectorType = transfer.getVectorType();
- return MemRefType::get(vectorType.getShape().drop_back(),
- VectorType::get(vectorType.getShape().take_back(),
- vectorType.getElementType()),
- {}, 0);
-}
+/// Progressive lowering of vector transfer ops: Unpack one dimension.
+///
+/// 1. Unpack one dimension from the current buffer type and cast the buffer
+/// to that new type. E.g.:
+/// ```
+/// %vec = memref.load %0[%1] : memref<5xvector<4x3xf32>>
+/// vector.transfer_write %vec ...
+/// ```
+/// The following cast is generated:
+/// ```
+/// %casted = vector.type_cast %0
+/// : memref<5xvector<4x3xf32>> to memref<5x4xvector<3xf32>>
+/// ```
+/// 2. Generate a for loop and rewrite the transfer op according to the
+/// corresponding Strategy<OpTy>. If the to-be-unpacked dimension can be
+/// out-of-bounds, generate an if-check and handle both cases separately.
+/// 3. Clean up according to the corresponding Strategy<OpTy>.
+template <typename OpTy>
+struct TransferOpConversion : public OpRewritePattern<OpTy> {
+ using OpRewritePattern<OpTy>::OpRewritePattern;
+
+ LogicalResult matchAndRewrite(OpTy xferOp,
+ PatternRewriter &rewriter) const override {
+ if (!xferOp->hasAttr(kPassLabel))
+ return failure();
+
+ ScopedContext scope(rewriter, xferOp.getLoc());
+
+ // Find and cast data buffer. How the buffer can be found depends on OpTy.
+ auto dataBuffer = Strategy<OpTy>::getBuffer(xferOp);
+ auto dataBufferType = dataBuffer.getType().template dyn_cast<MemRefType>();
+ auto castedDataType = unpackOneDim(dataBufferType);
+ auto castedDataBuffer = vector_type_cast(castedDataType, dataBuffer);
-static void emitWithBoundsChecks(
- PatternRewriter &rewriter, VectorTransferOpInterface transfer,
- ValueRange ivs, const MemRefBoundsCapture &memRefBoundsCapture,
- function_ref<void(ArrayRef<Value>)> inBoundsFun,
- function_ref<void(ArrayRef<Value>)> outOfBoundsFun = nullptr) {
- // Permute the incoming indices according to the permutation map.
- SmallVector<Value, 4> indices =
- applyMapToValues(rewriter, transfer.getLoc(), transfer.permutation_map(),
- transfer.indices());
-
- // Generate a bounds check if necessary.
- SmallVector<Value, 4> majorIvsPlusOffsets;
- Value inBoundsCondition =
- emitInBoundsCondition(rewriter, transfer, 0, ivs, indices,
- memRefBoundsCapture, majorIvsPlusOffsets);
-
- // Apply the permutation map to the ivs. The permutation map may not use all
- // the inputs.
- SmallVector<Value, 4> scalarAccessExprs(transfer.indices().size());
- for (unsigned memRefDim = 0; memRefDim < transfer.indices().size();
- ++memRefDim) {
- // Linear search on a small number of entries.
- int loopIndex = -1;
- auto exprs = transfer.permutation_map().getResults();
- for (auto en : llvm::enumerate(exprs)) {
- auto expr = en.value();
- auto dim = expr.dyn_cast<AffineDimExpr>();
- // Sanity check.
- assert((dim || expr.cast<AffineConstantExpr>().getValue() == 0) &&
- "Expected dim or 0 in permutationMap");
- if (dim && memRefDim == dim.getPosition()) {
- loopIndex = en.index();
- break;
+ // If the xferOp has a mask: Find and cast mask buffer.
+ Value castedMaskBuffer;
+ if (xferOp.mask()) {
+ auto maskBuffer = getMaskBuffer(xferOp);
+ auto maskBufferType =
+ maskBuffer.getType().template dyn_cast<MemRefType>();
+ if (xferOp.isBroadcastDim(0) || xferOp.getMaskType().getRank() == 1) {
+ // Do not unpack a dimension of the mask, if:
+ // * To-be-unpacked transfer op dimension is a broadcast.
+ // * Mask is 1D, i.e., the mask cannot be further unpacked.
+ // (That means that all remaining dimensions of the transfer op must
+ // be broadcasted.)
+ castedMaskBuffer = maskBuffer;
+ } else {
+ auto castedMaskType = unpackOneDim(maskBufferType);
+ castedMaskBuffer = vector_type_cast(castedMaskType, maskBuffer);
}
}
- using namespace edsc::op;
- auto i = transfer.indices()[memRefDim];
- scalarAccessExprs[memRefDim] = loopIndex < 0 ? i : i + ivs[loopIndex];
- }
-
- if (inBoundsCondition)
- conditionBuilder(
- /* scf.if */ inBoundsCondition, // {
- [&] { inBoundsFun(scalarAccessExprs); },
- // } else {
- outOfBoundsFun ? [&] { outOfBoundsFun(scalarAccessExprs); }
- : function_ref<void()>()
- // }
- );
- else
- inBoundsFun(scalarAccessExprs);
+ // Loop bounds and step.
+ auto lb = std_constant_index(0).value;
+ auto ub = std_constant_index(
+ castedDataType.getDimSize(castedDataType.getRank() - 1))
+ .value;
+ auto step = std_constant_index(1).value;
+
+ // Generate for loop.
+ rewriter.create<scf::ForOp>(
+ xferOp.getLoc(), lb, ub, step, ValueRange(),
+ [&](OpBuilder &b, Location loc, Value iv, ValueRange /*loopState*/) {
+ ScopedContext scope(b, loc);
+ generateInBoundsCheck(
+ xferOp, iv, b, unpackedDim(xferOp),
+ /*inBoundsCase=*/
+ [&](OpBuilder &b, Location /*loc*/) {
+ // Create new transfer op.
+ OpTy newXfer =
+ Strategy<OpTy>::rewriteOp(b, xferOp, castedDataBuffer, iv);
+
+ // If old transfer op has a mask: Set mask on new transfer op.
+ // Special case: If the mask of the old transfer op is 1D and
+ // the
+ // unpacked dim is not a broadcast, no mask is
+ // needed on the new transfer op.
+ if (xferOp.mask() && (xferOp.isBroadcastDim(0) ||
+ xferOp.getMaskType().getRank() > 1)) {
+ OpBuilder::InsertionGuard guard(b);
+ b.setInsertionPoint(newXfer); // Insert load before newXfer.
+
+ SmallVector<Value, 8> loadIndices;
+ Strategy<OpTy>::getBufferIndices(xferOp, loadIndices);
+ // In case of broadcast: Use same indices to load from memref
+ // as before.
+ if (!xferOp.isBroadcastDim(0))
+ loadIndices.push_back(iv);
+
+ auto mask = memref_load(castedMaskBuffer, loadIndices);
+ rewriter.updateRootInPlace(
+ newXfer, [&]() { newXfer.maskMutable().assign(mask); });
+ }
+ },
+ /*outOfBoundsCase=*/
+ [&](OpBuilder &b, Location /*loc*/) {
+ Strategy<OpTy>::handleOutOfBoundsDim(b, xferOp,
+ castedDataBuffer, iv);
+ });
+ b.create<scf::YieldOp>(loc);
+ });
+
+ Strategy<OpTy>::cleanup(rewriter, xferOp);
+ return success();
+ }
+};
+
+/// If the original transfer op has a mask, compute the mask of the new transfer
+/// op (for the current iteration `i`) and assign it.
+template <typename OpTy>
+static void maybeAssignMask(OpBuilder &builder, OpTy xferOp, OpTy newXferOp,
+ int64_t i) {
+ if (!xferOp.mask())
+ return;
+
+ if (xferOp.isBroadcastDim(0)) {
+ // To-be-unpacked dimension is a broadcast, which does not have a
+ // corresponding mask dimension. Mask attribute remains unchanged.
+ newXferOp.maskMutable().assign(xferOp.mask());
+ return;
+ }
+
+ if (xferOp.getMaskType().getRank() > 1) {
+ // Unpack one dimension of the mask.
+ OpBuilder::InsertionGuard guard(builder);
+ builder.setInsertionPoint(newXferOp); // Insert load before newXfer.
+
+ llvm::SmallVector<int64_t, 1> indices({i});
+ auto newMask = vector_extract(xferOp.mask(), indices).value;
+ newXferOp.maskMutable().assign(newMask);
+ }
+
+ // If we end up here: The mask of the old transfer op is 1D and the unpacked
+ // dim is not a broadcast, so no mask is needed on the new transfer op.
+ // `generateInBoundsCheck` will have evaluated the mask already.
}
-namespace mlir {
+/// Progressive lowering of vector TransferReadOp with unrolling: Unpack one
+/// dimension. This is similar to TransferOpConversion<TransferReadOp>, but no
+/// memref buffer is allocated and the SCF loop is fully unrolled.
+///
+/// ```
+/// E.g.:
+/// ```
+/// %vec = vector.transfer_read %A[%a, %b, %c], %padding
+/// : memref<?x?x?xf32>, vector<5x4xf32>
+/// ```
+/// is rewritten to IR such as (simplified):
+/// ```
+/// %v_init = splat %padding : vector<5x4xf32>
+/// %tmp0 = vector.transfer_read %A[%a, %b, %c], %padding
+/// : memref<?x?x?xf32>, vector<4xf32>
+/// %v0 = vector.insert %tmp0, %v_init[0] : vector<4xf32> into vector<5x4xf32>
+/// %tmp1 = vector.transfer_read %A[%a, %b + 1, %c], %padding
+/// : memref<?x?x?xf32>, vector<4xf32>
+/// %v1 = vector.insert %tmp1, %v0[1] : vector<4xf32> into vector<5x4xf32>
+/// ...
+/// %tmp4 = vector.transfer_read %A[%a, %b + 4, %c], %padding
+/// : memref<?x?x?xf32>, vector<4xf32>
+/// %vec = vector.insert %tmp1, %v3[4] : vector<4xf32> into vector<5x4xf32>
+/// ```
+///
+/// Note: As an optimization, if the result of the original TransferReadOp
+/// was directly inserted into another vector, no new %v_init vector is created.
+/// Instead, the new TransferReadOp results are inserted into that vector.
+struct UnrollTransferReadConversion : public OpRewritePattern<TransferReadOp> {
+ using OpRewritePattern<TransferReadOp>::OpRewritePattern;
+
+ /// Return the vector into which the newly created TransferReadOp results
+ /// are inserted.
+ Value getResultVector(TransferReadOp xferOp,
+ PatternRewriter &rewriter) const {
+ if (auto insertOp = getInsertOp(xferOp))
+ return insertOp.dest();
+ return std_splat(xferOp.getVectorType(), xferOp.padding()).value;
+ }
+
+ /// If the result of the TransferReadOp has exactly one user, which is a
+ /// vector::InsertOp, return that operation.
+ vector::InsertOp getInsertOp(TransferReadOp xferOp) const {
+ if (xferOp->hasOneUse()) {
+ Operation *xferOpUser = *xferOp->getUsers().begin();
+ if (auto insertOp = dyn_cast<vector::InsertOp>(xferOpUser))
+ return insertOp;
+ }
+
+ return vector::InsertOp();
+ }
+
+ /// If the result of the TransferReadOp has exactly one user, which is a
+ /// vector::InsertOp, return that operation's indices.
+ void getInsertionIndices(TransferReadOp xferOp,
+ SmallVector<int64_t, 8> &indices) const {
+ if (auto insertOp = getInsertOp(xferOp)) {
+ llvm::for_each(insertOp.position(), [&](Attribute attr) {
+ indices.push_back(attr.dyn_cast<IntegerAttr>().getInt());
+ });
+ }
+ }
+
+ /// Rewrite the op: Unpack one dimension. Can handle masks, out-of-bounds
+ /// accesses, and broadcasts and transposes in permutation maps.
+ LogicalResult matchAndRewrite(TransferReadOp xferOp,
+ PatternRewriter &rewriter) const override {
+ if (xferOp.getVectorType().getRank() <= kTargetRank)
+ return failure();
+
+ ScopedContext scope(rewriter, xferOp.getLoc());
+ auto insertOp = getInsertOp(xferOp);
+ auto vec = getResultVector(xferOp, rewriter);
+ auto vecType = vec.getType().dyn_cast<VectorType>();
+ auto xferVecType = xferOp.getVectorType();
+ auto newXferVecType = VectorType::get(xferVecType.getShape().drop_front(),
+ xferVecType.getElementType());
+ int64_t dimSize = xferVecType.getShape()[0];
+
+ // Generate fully unrolled loop of transfer ops.
+ for (int64_t i = 0; i < dimSize; ++i) {
+ Value iv = std_constant_index(i);
+
+ vec = generateInBoundsCheck(
+ xferOp, iv, rewriter, unpackedDim(xferOp), TypeRange(vecType),
+ /*inBoundsCase=*/
+ [&](OpBuilder &b, Location loc) {
+ ScopedContext scope(b, loc);
+
+ // Indices for the new transfer op.
+ SmallVector<Value, 8> xferIndices;
+ getXferIndices(xferOp, iv, xferIndices);
+
+ // Indices for the new vector.insert op.
+ SmallVector<int64_t, 8> insertionIndices;
+ getInsertionIndices(xferOp, insertionIndices);
+ insertionIndices.push_back(i);
-/// Lowers TransferReadOp into a combination of:
-/// 1. local memory allocation;
-/// 2. perfect loop nest over:
-/// a. scalar load from local buffers (viewed as a scalar memref);
-/// a. scalar store to original memref (with padding).
-/// 3. vector_load from local buffer (viewed as a memref<1 x vector>);
-/// 4. local memory deallocation.
+ auto inBoundsAttr = dropFirstElem(b, xferOp.in_boundsAttr());
+ auto newXferOpVal =
+ vector_transfer_read(
+ newXferVecType, xferOp.source(), xferIndices,
+ AffineMapAttr::get(unpackedPermutationMap(xferOp, b)),
+ xferOp.padding(), Value(), inBoundsAttr)
+ .value;
+ auto newXferOp =
+ dyn_cast<TransferReadOp>(newXferOpVal.getDefiningOp());
+
+ maybeAssignMask(b, xferOp, newXferOp, i);
+
+ return vector_insert(newXferOp, vec, insertionIndices).value;
+ },
+ /*outOfBoundsCase=*/
+ [&](OpBuilder &b, Location loc) {
+ // Loop through original (unmodified) vector.
+ return vec;
+ });
+ }
+
+ if (insertOp) {
+ // Rewrite single user of the old TransferReadOp, which was an InsertOp.
+ rewriter.replaceOp(insertOp, vec);
+ rewriter.eraseOp(xferOp);
+ } else {
+ rewriter.replaceOp(xferOp, vec);
+ }
+
+ return success();
+ }
+};
+
+/// Progressive lowering of vector TransferWriteOp with unrolling: Unpack one
+/// dimension. This is similar to TransferOpConversion<TransferWriteOp>, but no
+/// memref buffer is allocated and the SCF loop is fully unrolled.
+///
+/// ```
+/// E.g.:
+/// ```
+/// vector.transfer_write %vec, %A[%a, %b, %c]
+/// : vector<5x4xf32>, memref<?x?x?xf32>
+/// ```
+/// is rewritten to IR such as (simplified):
+/// ```
+/// %v0 = vector.extract %vec[0] : vector<5x4xf32>
+/// vector.transfer_write %v0, %A[%a, %b, %c] : vector<4xf32>, memref<...>
+/// %v1 = vector.extract %vec[1] : vector<5x4xf32>
+/// vector.transfer_write %v1, %A[%a, %b + 1, %c] : vector<4xf32>, memref<...>
+/// ...
+/// %v4 = vector.extract %vec[4] : vector<5x4xf32>
+/// vector.transfer_write %v4, %A[%a, %b + 4, %c] : vector<4xf32>, memref<...>
+/// ```
///
-/// Lowers the data transfer part of a TransferReadOp while ensuring no
-/// out-of-bounds accesses are possible. Out-of-bounds behavior is handled by
-/// padding.
+/// Note: As an optimization, if the vector of the original TransferWriteOp
+/// was directly extracted from another vector via an ExtractOp `a`, extract
+/// the vectors for the newly generated TransferWriteOps from `a`'s input. By
+/// doing so, `a` may become dead, and the number of ExtractOps generated during
+/// recursive application of this pattern will be minimal.
+struct UnrollTransferWriteConversion
+ : public OpRewritePattern<TransferWriteOp> {
+ using OpRewritePattern<TransferWriteOp>::OpRewritePattern;
-/// Performs the rewrite.
-template <>
-LogicalResult VectorTransferRewriter<TransferReadOp>::matchAndRewrite(
- Operation *op, PatternRewriter &rewriter) const {
- using namespace mlir::edsc::op;
+ /// Return the vector from which newly generated ExtracOps will extract.
+ Value getDataVector(TransferWriteOp xferOp) const {
+ if (auto extractOp = getExtractOp(xferOp))
+ return extractOp.vector();
+ return xferOp.vector();
+ }
- TransferReadOp transfer = cast<TransferReadOp>(op);
- if (transfer.mask())
- return failure();
- auto memRefType = transfer.getShapedType().dyn_cast<MemRefType>();
- if (!memRefType)
- return failure();
- // Fall back to a loop if the fastest varying stride is not 1 or it is
- // permuted.
- int64_t offset;
- SmallVector<int64_t, 4> strides;
- auto successStrides = getStridesAndOffset(memRefType, strides, offset);
- if (succeeded(successStrides) && strides.back() == 1 &&
- transfer.permutation_map().isMinorIdentity()) {
- // If > 1D, emit a bunch of loops around 1-D vector transfers.
- if (transfer.getVectorType().getRank() > 1)
- return NDTransferOpHelper<TransferReadOp>(rewriter, transfer, options)
- .doReplace();
- // If 1-D this is now handled by the target-specific lowering.
- if (transfer.getVectorType().getRank() == 1)
+ /// If the input of the given TransferWriteOp is an ExtractOp, return it.
+ vector::ExtractOp getExtractOp(TransferWriteOp xferOp) const {
+ if (auto *op = xferOp.vector().getDefiningOp())
+ return dyn_cast<vector::ExtractOp>(op);
+ return vector::ExtractOp();
+ }
+
+ /// If the input of the given TransferWriteOp is an ExtractOp, return its
+ /// indices.
+ void getExtractionIndices(TransferWriteOp xferOp,
+ SmallVector<int64_t, 8> &indices) const {
+ if (auto extractOp = getExtractOp(xferOp)) {
+ llvm::for_each(extractOp.position(), [&](Attribute attr) {
+ indices.push_back(attr.dyn_cast<IntegerAttr>().getInt());
+ });
+ }
+ }
+
+ /// Rewrite the op: Unpack one dimension. Can handle masks, out-of-bounds
+ /// accesses, and broadcasts and transposes in permutation maps.
+ LogicalResult matchAndRewrite(TransferWriteOp xferOp,
+ PatternRewriter &rewriter) const override {
+ if (xferOp.getVectorType().getRank() <= kTargetRank)
return failure();
+
+ ScopedContext scope(rewriter, xferOp.getLoc());
+ auto vec = getDataVector(xferOp);
+ auto xferVecType = xferOp.getVectorType();
+ int64_t dimSize = xferVecType.getShape()[0];
+
+ // Generate fully unrolled loop of transfer ops.
+ for (int64_t i = 0; i < dimSize; ++i) {
+ Value iv = std_constant_index(i);
+
+ generateInBoundsCheck(
+ xferOp, iv, rewriter, unpackedDim(xferOp),
+ /*inBoundsCase=*/[&](OpBuilder &b, Location loc) {
+ ScopedContext scope(b, loc);
+
+ // Indices for the new transfer op.
+ SmallVector<Value, 8> xferIndices;
+ getXferIndices(xferOp, iv, xferIndices);
+
+ // Indices for the new vector.extract op.
+ SmallVector<int64_t, 8> extractionIndices;
+ getExtractionIndices(xferOp, extractionIndices);
+ extractionIndices.push_back(i);
+
+ auto extracted = vector_extract(vec, extractionIndices).value;
+ auto inBoundsAttr = dropFirstElem(b, xferOp.in_boundsAttr());
+
+ auto newXferOp =
+ vector_transfer_write(
+ Type(), extracted, xferOp.source(), xferIndices,
+ AffineMapAttr::get(unpackedPermutationMap(xferOp, b)),
+ Value(), inBoundsAttr)
+ .op;
+
+ maybeAssignMask(b, xferOp, newXferOp, i);
+ });
+ }
+
+ rewriter.eraseOp(xferOp);
+ return success();
}
+};
- // Conservative lowering to scalar load / stores.
- // 1. Setup all the captures.
- ScopedContext scope(rewriter, transfer.getLoc());
- MemRefIndexedValue remote(transfer.source());
- MemRefBoundsCapture memRefBoundsCapture(transfer.source());
- VectorBoundsCapture vectorBoundsCapture(transfer.vector());
- int coalescedIdx = computeCoalescedIndex(transfer);
- // Swap the vectorBoundsCapture which will reorder loop bounds.
- if (coalescedIdx >= 0)
- vectorBoundsCapture.swapRanges(vectorBoundsCapture.rank() - 1,
- coalescedIdx);
-
- auto lbs = vectorBoundsCapture.getLbs();
- auto ubs = vectorBoundsCapture.getUbs();
- SmallVector<Value, 8> steps;
- steps.reserve(vectorBoundsCapture.getSteps().size());
- for (auto step : vectorBoundsCapture.getSteps())
- steps.push_back(std_constant_index(step));
-
- // 2. Emit alloc-copy-load-dealloc.
- MLIRContext *ctx = op->getContext();
- Value tmp = setAllocAtFunctionEntry(tmpMemRefType(transfer), transfer);
- MemRefIndexedValue local(tmp);
- loopNestBuilder(lbs, ubs, steps, [&](ValueRange loopIvs) {
- auto ivsStorage = llvm::to_vector<8>(loopIvs);
- // Swap the ivs which will reorder memory accesses.
- if (coalescedIdx >= 0)
- std::swap(ivsStorage.back(), ivsStorage[coalescedIdx]);
-
- ArrayRef<Value> ivs(ivsStorage);
- Value pos = std_index_cast(IntegerType::get(ctx, 32), ivs.back());
- Value inVector = local(ivs.drop_back());
- auto loadValue = [&](ArrayRef<Value> indices) {
- Value vector = vector_insert_element(remote(indices), inVector, pos);
- local(ivs.drop_back()) = vector;
- };
- auto loadPadding = [&](ArrayRef<Value>) {
- Value vector = vector_insert_element(transfer.padding(), inVector, pos);
- local(ivs.drop_back()) = vector;
- };
- emitWithBoundsChecks(
- rewriter, cast<VectorTransferOpInterface>(transfer.getOperation()), ivs,
- memRefBoundsCapture, loadValue, loadPadding);
- });
- Value vectorValue = memref_load(vector_type_cast(tmp));
-
- // 3. Propagate.
- rewriter.replaceOp(op, vectorValue);
- return success();
+/// Compute the indices into the memref for the LoadOp/StoreOp generated as
+/// part of TransferOp1dConversion. Return the memref dimension on which
+/// the transfer is operating. A return value of None indicates a broadcast.
+template <typename OpTy>
+static Optional<int64_t>
+get1dMemrefIndices(OpTy xferOp, Value iv,
+ SmallVector<Value, 8> &memrefIndices) {
+ auto indices = xferOp.indices();
+ auto map = xferOp.permutation_map();
+
+ memrefIndices.append(indices.begin(), indices.end());
+ assert(map.getNumResults() == 1 &&
+ "Expected 1 permutation map result for 1D transfer");
+ if (auto expr = map.getResult(0).template dyn_cast<AffineDimExpr>()) {
+ auto dim = expr.getPosition();
+ using edsc::op::operator+;
+ memrefIndices[dim] = memrefIndices[dim] + iv;
+ return dim;
+ }
+
+ assert(xferOp.isBroadcastDim(0) &&
+ "Expected AffineDimExpr or AffineConstantExpr");
+ return None;
}
-/// Lowers TransferWriteOp into a combination of:
-/// 1. local memory allocation;
-/// 2. vector_store to local buffer (viewed as a memref<1 x vector>);
-/// 3. perfect loop nest over:
-/// a. scalar load from local buffers (viewed as a scalar memref);
-/// a. scalar store to original memref (if in bounds).
-/// 4. local memory deallocation.
-///
-/// More specifically, lowers the data transfer part while ensuring no
-/// out-of-bounds accesses are possible.
+/// Codegen strategy for TransferOp1dConversion, depending on the
+/// operation.
+template <typename OpTy>
+struct Strategy1d;
+
+/// Codegen strategy for TransferReadOp.
template <>
-LogicalResult VectorTransferRewriter<TransferWriteOp>::matchAndRewrite(
- Operation *op, PatternRewriter &rewriter) const {
- using namespace edsc::op;
+struct Strategy1d<TransferReadOp> {
+ static void generateForLoopBody(OpBuilder &builder, Location loc,
+ TransferReadOp xferOp, Value iv,
+ ValueRange loopState) {
+ SmallVector<Value, 8> indices;
+ auto dim = get1dMemrefIndices(xferOp, iv, indices);
+ auto ivI32 = std_index_cast(IntegerType::get(builder.getContext(), 32), iv);
+ auto vec = loopState[0];
- TransferWriteOp transfer = cast<TransferWriteOp>(op);
- if (transfer.mask())
- return failure();
- auto memRefType = transfer.getShapedType().template dyn_cast<MemRefType>();
- if (!memRefType)
- return failure();
+ // In case of out-of-bounds access, leave `vec` as is (was initialized with
+ // padding value).
+ auto nextVec = generateInBoundsCheck(
+ xferOp, iv, builder, dim, TypeRange(xferOp.getVectorType()),
+ /*inBoundsCase=*/
+ [&](OpBuilder & /*b*/, Location loc) {
+ auto val = memref_load(xferOp.source(), indices);
+ return vector_insert_element(val, vec, ivI32.value).value;
+ },
+ /*outOfBoundsCase=*/
+ [&](OpBuilder & /*b*/, Location loc) { return vec; });
+ builder.create<scf::YieldOp>(loc, nextVec);
+ }
+
+ static Value initialLoopState(TransferReadOp xferOp) {
+ // Inititalize vector with padding value.
+ return std_splat(xferOp.getVectorType(), xferOp.padding()).value;
+ }
+};
+
+/// Codegen strategy for TransferWriteOp.
+template <>
+struct Strategy1d<TransferWriteOp> {
+ static void generateForLoopBody(OpBuilder &builder, Location loc,
+ TransferWriteOp xferOp, Value iv,
+ ValueRange /*loopState*/) {
+ SmallVector<Value, 8> indices;
+ auto dim = get1dMemrefIndices(xferOp, iv, indices);
+ auto ivI32 = std_index_cast(IntegerType::get(builder.getContext(), 32), iv);
+
+ // Nothing to do in case of out-of-bounds access.
+ generateInBoundsCheck(
+ xferOp, iv, builder, dim,
+ /*inBoundsCase=*/[&](OpBuilder & /*b*/, Location loc) {
+ auto val = vector_extract_element(xferOp.vector(), ivI32.value);
+ memref_store(val, xferOp.source(), indices);
+ });
+ builder.create<scf::YieldOp>(loc);
+ }
- // Fall back to a loop if the fastest varying stride is not 1 or it is
- // permuted.
+ static Value initialLoopState(TransferWriteOp xferOp) { return Value(); }
+};
+
+/// Return true if the last dimension of the MemRefType has unit stride.
+static bool isLastMemrefDimUnitStride(MemRefType type) {
int64_t offset;
SmallVector<int64_t, 4> strides;
- auto successStrides = getStridesAndOffset(memRefType, strides, offset);
- if (succeeded(successStrides) && strides.back() == 1 &&
- transfer.permutation_map().isMinorIdentity()) {
- // If > 1D, emit a bunch of loops around 1-D vector transfers.
- if (transfer.getVectorType().getRank() > 1)
- return NDTransferOpHelper<TransferWriteOp>(rewriter, transfer, options)
- .doReplace();
- // If 1-D this is now handled by the target-specific lowering.
- if (transfer.getVectorType().getRank() == 1)
+ auto successStrides = getStridesAndOffset(type, strides, offset);
+ return succeeded(successStrides) && strides.back() == 1;
+}
+
+/// Lower a 1D vector transfer op to SCF using scalar loads/stores. This is
+/// necessary in cases where a 1D vector transfer op cannot be lowered into
+/// vector load/stores due to non-unit strides or broadcasts:
+///
+/// * Transfer dimension is not the last memref dimension
+/// * Transfer dimension is a broadcast (i.e., scalar load + broadcast)
+/// * Memref has a layout map with non-unit stride on the last dimension
+///
+/// This pattern generates IR as follows:
+///
+/// 1. Generate a for loop iterating over each vector element.
+/// 2. Inside the loop, generate a InsertElementOp or ExtractElementOp,
+/// depending on OpTy.
+///
+/// TODO: In some cases (no masking, etc.), LLVM::MatrixColumnMajorLoadOp
+/// can be generated instead of TransferOp1dConversion. Add such a pattern
+/// to ConvertVectorToLLVM.
+///
+/// E.g.:
+/// ```
+/// vector.transfer_write %vec, %A[%a, %b]
+/// {permutation_map = affine_map<(d0, d1) -> (d0)>, in_bounds = [true]}
+/// : vector<9xf32>, memref<?x?xf32>
+/// ```
+/// Is rewritten to approximately the following pseudo-IR:
+/// ```
+/// for i = 0 to 9 {
+/// %t = vector.extractelement %vec[i] : vector<9xf32>
+/// memref.store %t, %arg0[%a + i, %b] : memref<?x?xf32>
+/// }
+/// ```
+template <typename OpTy>
+struct TransferOp1dConversion : public OpRewritePattern<OpTy> {
+ using OpRewritePattern<OpTy>::OpRewritePattern;
+
+ LogicalResult matchAndRewrite(OpTy xferOp,
+ PatternRewriter &rewriter) const override {
+ ScopedContext scope(rewriter, xferOp.getLoc());
+ auto map = xferOp.permutation_map();
+ auto memRefType = xferOp.getShapedType().template dyn_cast<MemRefType>();
+
+ if (!memRefType)
return failure();
+ if (xferOp.getVectorType().getRank() != 1)
+ return failure();
+ if (map.isMinorIdentity() && isLastMemrefDimUnitStride(memRefType))
+ return failure(); // Handled by ConvertVectorToLLVM
+
+ // Loop bounds, step, state...
+ auto vecType = xferOp.getVectorType();
+ auto lb = std_constant_index(0);
+ auto ub = std_constant_index(vecType.getDimSize(0));
+ auto step = std_constant_index(1);
+ auto loopState = Strategy1d<OpTy>::initialLoopState(xferOp);
+
+ // Generate for loop.
+ rewriter.replaceOpWithNewOp<scf::ForOp>(
+ xferOp, lb, ub, step, loopState ? ValueRange(loopState) : ValueRange(),
+ [&](OpBuilder &builder, Location loc, Value iv, ValueRange loopState) {
+ ScopedContext nestedScope(builder, loc);
+ Strategy1d<OpTy>::generateForLoopBody(builder, loc, xferOp, iv,
+ loopState);
+ });
+
+ return success();
}
+};
- // 1. Setup all the captures.
- ScopedContext scope(rewriter, transfer.getLoc());
- MemRefIndexedValue remote(transfer.source());
- MemRefBoundsCapture memRefBoundsCapture(transfer.source());
- Value vectorValue(transfer.vector());
- VectorBoundsCapture vectorBoundsCapture(transfer.vector());
- int coalescedIdx = computeCoalescedIndex(transfer);
- // Swap the vectorBoundsCapture which will reorder loop bounds.
- if (coalescedIdx >= 0)
- vectorBoundsCapture.swapRanges(vectorBoundsCapture.rank() - 1,
- coalescedIdx);
-
- auto lbs = vectorBoundsCapture.getLbs();
- auto ubs = vectorBoundsCapture.getUbs();
- SmallVector<Value, 8> steps;
- steps.reserve(vectorBoundsCapture.getSteps().size());
- for (auto step : vectorBoundsCapture.getSteps())
- steps.push_back(std_constant_index(step));
-
- // 2. Emit alloc-store-copy-dealloc.
- Value tmp = setAllocAtFunctionEntry(tmpMemRefType(transfer), transfer);
- MemRefIndexedValue local(tmp);
- Value vec = vector_type_cast(tmp);
- memref_store(vectorValue, vec);
- loopNestBuilder(lbs, ubs, steps, [&](ValueRange loopIvs) {
- auto ivsStorage = llvm::to_vector<8>(loopIvs);
- // Swap the ivsStorage which will reorder memory accesses.
- if (coalescedIdx >= 0)
- std::swap(ivsStorage.back(), ivsStorage[coalescedIdx]);
-
- ArrayRef<Value> ivs(ivsStorage);
- Value pos =
- std_index_cast(IntegerType::get(op->getContext(), 32), ivs.back());
- auto storeValue = [&](ArrayRef<Value> indices) {
- Value scalar = vector_extract_element(local(ivs.drop_back()), pos);
- remote(indices) = scalar;
- };
- emitWithBoundsChecks(
- rewriter, cast<VectorTransferOpInterface>(transfer.getOperation()), ivs,
- memRefBoundsCapture, storeValue);
- });
-
- // 3. Erase.
- rewriter.eraseOp(op);
- return success();
-}
+} // namespace
+
+namespace mlir {
void populateVectorToSCFConversionPatterns(
RewritePatternSet &patterns, const VectorTransferToSCFOptions &options) {
- patterns.add<VectorTransferRewriter<vector::TransferReadOp>,
- VectorTransferRewriter<vector::TransferWriteOp>>(
- options, patterns.getContext());
+ if (options.unroll) {
+ patterns.add<UnrollTransferReadConversion, UnrollTransferWriteConversion>(
+ patterns.getContext());
+ } else {
+ patterns.add<PrepareTransferReadConversion, PrepareTransferWriteConversion,
+ TransferOpConversion<TransferReadOp>,
+ TransferOpConversion<TransferWriteOp>>(patterns.getContext());
+ }
+
+ if (kTargetRank == 1) {
+ patterns.add<TransferOp1dConversion<TransferReadOp>,
+ TransferOp1dConversion<TransferWriteOp>>(
+ patterns.getContext());
+ }
}
} // namespace mlir
diff --git a/mlir/test/Conversion/VectorToSCF/progressive-vector-to-loops.mlir b/mlir/test/Conversion/VectorToSCF/progressive-vector-to-loops.mlir
deleted file mode 100644
index 75ee49d75fec8..0000000000000
--- a/mlir/test/Conversion/VectorToSCF/progressive-vector-to-loops.mlir
+++ /dev/null
@@ -1,467 +0,0 @@
-// RUN: mlir-opt %s -test-progressive-convert-vector-to-scf -split-input-file -allow-unregistered-dialect | FileCheck %s
-// RUN: mlir-opt %s -test-unrolled-progressive-convert-vector-to-scf -split-input-file -allow-unregistered-dialect | FileCheck %s --check-prefix=FULL-UNROLL
-
-// CHECK-LABEL: func @materialize_read_1d() {
-func @materialize_read_1d() {
- %f0 = constant 0.0: f32
- %A = memref.alloc () : memref<7x42xf32>
- affine.for %i0 = 0 to 7 step 4 {
- affine.for %i1 = 0 to 42 step 4 {
- %f1 = vector.transfer_read %A[%i0, %i1], %f0 {permutation_map = affine_map<(d0, d1) -> (d0)>} : memref<7x42xf32>, vector<4xf32>
- %ip1 = affine.apply affine_map<(d0) -> (d0 + 1)> (%i1)
- %f2 = vector.transfer_read %A[%i0, %ip1], %f0 {permutation_map = affine_map<(d0, d1) -> (d0)>} : memref<7x42xf32>, vector<4xf32>
- %ip2 = affine.apply affine_map<(d0) -> (d0 + 2)> (%i1)
- %f3 = vector.transfer_read %A[%i0, %ip2], %f0 {permutation_map = affine_map<(d0, d1) -> (d0)>} : memref<7x42xf32>, vector<4xf32>
- %ip3 = affine.apply affine_map<(d0) -> (d0 + 3)> (%i1)
- %f4 = vector.transfer_read %A[%i0, %ip3], %f0 {permutation_map = affine_map<(d0, d1) -> (d0)>} : memref<7x42xf32>, vector<4xf32>
- // Both accesses in the load must be clipped otherwise %i1 + 2 and %i1 + 3 will go out of bounds.
- // CHECK: scf.if
- // CHECK-NEXT: memref.load
- // CHECK-NEXT: vector.insertelement
- // CHECK-NEXT: scf.yield
- // CHECK-NEXT: else
- // CHECK-NEXT: scf.yield
- // Add a dummy use to prevent dead code elimination from removing transfer
- // read ops.
- "dummy_use"(%f1, %f2, %f3, %f4) : (vector<4xf32>, vector<4xf32>, vector<4xf32>, vector<4xf32>) -> ()
- }
- }
- return
-}
-
-// -----
-
-// CHECK-LABEL: func @materialize_read_1d_partially_specialized
-func @materialize_read_1d_partially_specialized(%dyn1 : index, %dyn2 : index, %dyn4 : index) {
- %f0 = constant 0.0: f32
- %A = memref.alloc (%dyn1, %dyn2, %dyn4) : memref<7x?x?x42x?xf32>
- affine.for %i0 = 0 to 7 {
- affine.for %i1 = 0 to %dyn1 {
- affine.for %i2 = 0 to %dyn2 {
- affine.for %i3 = 0 to 42 step 2 {
- affine.for %i4 = 0 to %dyn4 {
- %f1 = vector.transfer_read %A[%i0, %i1, %i2, %i3, %i4], %f0 {permutation_map = affine_map<(d0, d1, d2, d3, d4) -> (d3)>} : memref<7x?x?x42x?xf32>, vector<4xf32>
- %i3p1 = affine.apply affine_map<(d0) -> (d0 + 1)> (%i3)
- %f2 = vector.transfer_read %A[%i0, %i1, %i2, %i3p1, %i4], %f0 {permutation_map = affine_map<(d0, d1, d2, d3, d4) -> (d3)>} : memref<7x?x?x42x?xf32>, vector<4xf32>
- // Add a dummy use to prevent dead code elimination from removing
- // transfer read ops.
- "dummy_use"(%f1, %f2) : (vector<4xf32>, vector<4xf32>) -> ()
- }
- }
- }
- }
- }
- // CHECK: %[[tensor:[0-9]+]] = memref.alloc
- // CHECK-NOT: {{.*}} memref.dim %[[tensor]], %c0
- // CHECK-NOT: {{.*}} memref.dim %[[tensor]], %c3
- return
-}
-
-// -----
-
-// CHECK: #[[$ADD:map.*]] = affine_map<(d0, d1) -> (d0 + d1)>
-
-// CHECK-LABEL: func @materialize_read(%{{.*}}: index, %{{.*}}: index, %{{.*}}: index, %{{.*}}: index) {
-func @materialize_read(%M: index, %N: index, %O: index, %P: index) {
- %f0 = constant 0.0: f32
- // CHECK-DAG: %[[ALLOC:.*]] = memref.alloca() : memref<vector<5x4x3xf32>>
- // CHECK-DAG: %[[C0:.*]] = constant 0 : index
- // CHECK-DAG: %[[C1:.*]] = constant 1 : index
- // CHECK-DAG: %[[C3:.*]] = constant 3 : index
- // CHECK-DAG: %[[C4:.*]] = constant 4 : index
- // CHECK-DAG: %[[C5:.*]] = constant 5 : index
- // CHECK: %{{.*}} = memref.alloc(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : memref<?x?x?x?xf32>
- // CHECK-NEXT: affine.for %[[I0:.*]] = 0 to %{{.*}} step 3 {
- // CHECK-NEXT: affine.for %[[I1:.*]] = 0 to %{{.*}} {
- // CHECK-NEXT: affine.for %[[I2:.*]] = 0 to %{{.*}} {
- // CHECK-NEXT: affine.for %[[I3:.*]] = 0 to %{{.*}} step 5 {
- // CHECK: scf.for %[[I4:.*]] = %[[C0]] to %[[C5]] step %[[C1]] {
- // CHECK: scf.if
- // CHECK: %[[L3:.*]] = affine.apply #[[$ADD]](%[[I3]], %[[I4]])
- // CHECK: scf.for %[[I5:.*]] = %[[C0]] to %[[C4]] step %[[C1]] {
- // CHECK: %[[VEC:.*]] = scf.for %[[I6:.*]] = %[[C0]] to %[[C3]] step %[[C1]] {{.*}} -> (vector<3xf32>) {
- // CHECK: %[[L0:.*]] = affine.apply #[[$ADD]](%[[I0]], %[[I6]])
- // CHECK: %[[VIDX:.*]] = index_cast %[[I6]]
- // CHECK: scf.if {{.*}} -> (vector<3xf32>) {
- // CHECK-NEXT: %[[SCAL:.*]] = memref.load %{{.*}}[%[[L0]], %[[I1]], %[[I2]], %[[L3]]] : memref<?x?x?x?xf32>
- // CHECK-NEXT: %[[RVEC:.*]] = vector.insertelement %[[SCAL]], %{{.*}}[%[[VIDX]] : i32] : vector<3xf32>
- // CHECK-NEXT: scf.yield
- // CHECK-NEXT: } else {
- // CHECK-NEXT: scf.yield
- // CHECK-NEXT: }
- // CHECK-NEXT: scf.yield
- // CHECK-NEXT: }
- // CHECK-NEXT: memref.store %[[VEC]], {{.*}} : memref<5x4xvector<3xf32>>
- // CHECK-NEXT: }
- // CHECK-NEXT: } else {
- // CHECK-NEXT: memref.store {{.*}} : memref<5xvector<4x3xf32>>
- // CHECK-NEXT: }
- // CHECK-NEXT: }
- // CHECK-NEXT: %[[LD:.*]] = memref.load %[[ALLOC]][] : memref<vector<5x4x3xf32>>
- // CHECK-NEXT: "dummy_use"(%[[LD]]) : (vector<5x4x3xf32>) -> ()
- // CHECK-NEXT: }
- // CHECK-NEXT: }
- // CHECK-NEXT: }
- // CHECK-NEXT: }
- // CHECK-NEXT: return
- // CHECK-NEXT:}
-
- // Check that I0 + I4 (of size 3) read from first index load(L0, ...) and write into last index store(..., I4)
- // Check that I3 + I6 (of size 5) read from last index load(..., L3) and write into first index store(I6, ...)
- // Other dimensions are just accessed with I1, I2 resp.
- %A = memref.alloc (%M, %N, %O, %P) : memref<?x?x?x?xf32, 0>
- affine.for %i0 = 0 to %M step 3 {
- affine.for %i1 = 0 to %N {
- affine.for %i2 = 0 to %O {
- affine.for %i3 = 0 to %P step 5 {
- %f = vector.transfer_read %A[%i0, %i1, %i2, %i3], %f0 {permutation_map = affine_map<(d0, d1, d2, d3) -> (d3, 0, d0)>} : memref<?x?x?x?xf32>, vector<5x4x3xf32>
- // Add a dummy use to prevent dead code elimination from removing
- // transfer read ops.
- "dummy_use"(%f) : (vector<5x4x3xf32>) -> ()
- }
- }
- }
- }
- return
-}
-
-// -----
-
-// CHECK: #[[$ADD:map.*]] = affine_map<(d0, d1) -> (d0 + d1)>
-
-// CHECK-LABEL:func @materialize_write(%{{.*}}: index, %{{.*}}: index, %{{.*}}: index, %{{.*}}: index) {
-func @materialize_write(%M: index, %N: index, %O: index, %P: index) {
- // CHECK-DAG: %[[ALLOC:.*]] = memref.alloca() : memref<vector<5x4x3xf32>>
- // CHECK-DAG: %{{.*}} = constant dense<1.000000e+00> : vector<5x4x3xf32>
- // CHECK-DAG: %[[C0:.*]] = constant 0 : index
- // CHECK-DAG: %[[C1:.*]] = constant 1 : index
- // CHECK-DAG: %[[C3:.*]] = constant 3 : index
- // CHECK-DAG: %[[C4:.*]] = constant 4 : index
- // CHECK-DAG: %[[C5:.*]] = constant 5 : index
- // CHECK: %{{.*}} = memref.alloc(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : memref<?x?x?x?xf32>
- // CHECK-NEXT: affine.for %[[I0:.*]] = 0 to %{{.*}} step 3 {
- // CHECK-NEXT: affine.for %[[I1:.*]] = 0 to %{{.*}} step 4 {
- // CHECK-NEXT: affine.for %[[I2:.*]] = 0 to %{{.*}} {
- // CHECK-NEXT: affine.for %[[I3:.*]] = 0 to %{{.*}} step 5 {
- // CHECK: memref.store %{{.*}}, %[[ALLOC]][] : memref<vector<5x4x3xf32>>
- // CHECK: %[[VECTOR_VIEW1:.*]] = vector.type_cast %[[ALLOC]] : memref<vector<5x4x3xf32>> to memref<5xvector<4x3xf32>>
- // CHECK: scf.for %[[I4:.*]] = %[[C0]] to %[[C5]] step %[[C1]] {
- // CHECK: scf.if
- // CHECK: %[[S3:.*]] = affine.apply #[[$ADD]](%[[I3]], %[[I4]])
- // CHECK: %[[VECTOR_VIEW2:.*]] = vector.type_cast %[[VECTOR_VIEW1]] : memref<5xvector<4x3xf32>> to memref<5x4xvector<3xf32>>
- // CHECK: scf.for %[[I5:.*]] = %[[C0]] to %[[C4]] step %[[C1]] {
- // CHECK: scf.if
- // CHECK: %[[S1:.*]] = affine.apply #[[$ADD]](%[[I1]], %[[I5]])
- // CHECK: %[[VEC:.*]] = memref.load %[[VECTOR_VIEW2]][%[[I4]], %[[I5]]] : memref<5x4xvector<3xf32>>
- // CHECK: scf.for %[[I6:.*]] = %[[C0]] to %[[C3]] step %[[C1]] {
- // CHECK: %[[S0:.*]] = affine.apply #[[$ADD]](%[[I0]], %[[I6]])
- // CHECK: %[[VIDX:.*]] = index_cast %[[I6]]
- // CHECK: scf.if
- // CHECK: %[[SCAL:.*]] = vector.extractelement %[[VEC]][%[[VIDX]] : i32] : vector<3xf32>
- // CHECK: memref.store %[[SCAL]], {{.*}}[%[[S0]], %[[S1]], %[[I2]], %[[S3]]] : memref<?x?x?x?xf32>
- // CHECK: }
- // CHECK: }
- // CHECK: }
- // CHECK: }
- // CHECK: }
- // CHECK: }
- // CHECK: }
- // CHECK: }
- // CHECK: }
- // CHECK: }
- // CHECK: return
-
- // Check that I0 + I4 (of size 3) read from last index load(..., I4) and write into first index store(S0, ...)
- // Check that I1 + I5 (of size 4) read from second index load(..., I5, ...) and write into second index store(..., S1, ...)
- // Check that I3 + I6 (of size 5) read from first index load(I6, ...) and write into last index store(..., S3)
- // Other dimension is just accessed with I2.
- %A = memref.alloc (%M, %N, %O, %P) : memref<?x?x?x?xf32, 0>
- %f1 = constant dense<1.000000e+00> : vector<5x4x3xf32>
- affine.for %i0 = 0 to %M step 3 {
- affine.for %i1 = 0 to %N step 4 {
- affine.for %i2 = 0 to %O {
- affine.for %i3 = 0 to %P step 5 {
- vector.transfer_write %f1, %A[%i0, %i1, %i2, %i3] {permutation_map = affine_map<(d0, d1, d2, d3) -> (d3, d1, d0)>} : vector<5x4x3xf32>, memref<?x?x?x?xf32>
- }
- }
- }
- }
- return
-}
-
-// -----
-
-// CHECK-DAG: #[[$MAP0:.*]] = affine_map<(d0)[s0] -> (d0 + s0)>
-
-// FULL-UNROLL-DAG: #[[$MAP1:.*]] = affine_map<()[s0] -> (s0 + 1)>
-// FULL-UNROLL-DAG: #[[$MAP2:.*]] = affine_map<()[s0] -> (s0 + 2)>
-
-
-// CHECK-LABEL: transfer_read_progressive(
-// CHECK-SAME: %[[A:[a-zA-Z0-9]+]]: memref<?x?xf32>,
-// CHECK-SAME: %[[base:[a-zA-Z0-9]+]]: index
-
-// FULL-UNROLL-LABEL: transfer_read_progressive(
-// FULL-UNROLL-SAME: %[[A:[a-zA-Z0-9]+]]: memref<?x?xf32>,
-// FULL-UNROLL-SAME: %[[base:[a-zA-Z0-9]+]]: index
-
-func @transfer_read_progressive(%A : memref<?x?xf32>, %base: index) -> vector<3x15xf32> {
- %f7 = constant 7.0: f32
- // CHECK-DAG: %[[C7:.*]] = constant 7.000000e+00 : f32
- // CHECK-DAG: %[[C0:.*]] = constant 0 : index
- // CHECK-DAG: %[[C1:.*]] = constant 1 : index
- // CHECK-DAG: %[[C3:.*]] = constant 3 : index
- // CHECK-DAG: %[[splat:.*]] = constant dense<7.000000e+00> : vector<15xf32>
- // CHECK-DAG: %[[alloc:.*]] = memref.alloca() : memref<vector<3x15xf32>>
- // CHECK: %[[alloc_casted:.*]] = vector.type_cast %[[alloc]] : memref<vector<3x15xf32>> to memref<3xvector<15xf32>>
- // CHECK: scf.for %[[I:.*]] = %[[C0]] to %[[C3]]
- // CHECK: %[[dim:.*]] = memref.dim %[[A]], %[[C0]] : memref<?x?xf32>
- // CHECK: %[[add:.*]] = affine.apply #[[$MAP0]](%[[I]])[%[[base]]]
- // CHECK: %[[cond1:.*]] = cmpi sgt, %[[dim]], %[[add]] : index
- // CHECK: scf.if %[[cond1]] {
- // CHECK: %[[vec_1d:.*]] = vector.transfer_read %[[A]][%{{.*}}, %[[base]]], %[[C7]] : memref<?x?xf32>, vector<15xf32>
- // CHECK: memref.store %[[vec_1d]], %[[alloc_casted]][%[[I]]] : memref<3xvector<15xf32>>
- // CHECK: } else {
- // CHECK: store %[[splat]], %[[alloc_casted]][%[[I]]] : memref<3xvector<15xf32>>
- // CHECK: }
- // CHECK: }
- // CHECK: %[[cst:.*]] = memref.load %[[alloc]][] : memref<vector<3x15xf32>>
-
- // FULL-UNROLL: %[[C7:.*]] = constant 7.000000e+00 : f32
- // FULL-UNROLL: %[[VEC0:.*]] = constant dense<7.000000e+00> : vector<3x15xf32>
- // FULL-UNROLL: %[[C0:.*]] = constant 0 : index
- // FULL-UNROLL: %[[DIM:.*]] = memref.dim %[[A]], %[[C0]] : memref<?x?xf32>
- // FULL-UNROLL: cmpi sgt, %[[DIM]], %[[base]] : index
- // FULL-UNROLL: %[[VEC1:.*]] = scf.if %{{.*}} -> (vector<3x15xf32>) {
- // FULL-UNROLL: vector.transfer_read %[[A]][%[[base]], %[[base]]], %[[C7]] : memref<?x?xf32>, vector<15xf32>
- // FULL-UNROLL: vector.insert %{{.*}}, %[[VEC0]] [0] : vector<15xf32> into vector<3x15xf32>
- // FULL-UNROLL: scf.yield %{{.*}} : vector<3x15xf32>
- // FULL-UNROLL: } else {
- // FULL-UNROLL: scf.yield %{{.*}} : vector<3x15xf32>
- // FULL-UNROLL: }
- // FULL-UNROLL: affine.apply #[[$MAP1]]()[%[[base]]]
- // FULL-UNROLL: cmpi sgt, %{{.*}}, %{{.*}} : index
- // FULL-UNROLL: %[[VEC2:.*]] = scf.if %{{.*}} -> (vector<3x15xf32>) {
- // FULL-UNROLL: vector.transfer_read %[[A]][%{{.*}}, %[[base]]], %[[C7]] : memref<?x?xf32>, vector<15xf32>
- // FULL-UNROLL: vector.insert %{{.*}}, %[[VEC1]] [1] : vector<15xf32> into vector<3x15xf32>
- // FULL-UNROLL: scf.yield %{{.*}} : vector<3x15xf32>
- // FULL-UNROLL: } else {
- // FULL-UNROLL: scf.yield %{{.*}} : vector<3x15xf32>
- // FULL-UNROLL: }
- // FULL-UNROLL: affine.apply #[[$MAP2]]()[%[[base]]]
- // FULL-UNROLL: cmpi sgt, %{{.*}}, %{{.*}} : index
- // FULL-UNROLL: %[[VEC3:.*]] = scf.if %{{.*}} -> (vector<3x15xf32>) {
- // FULL-UNROLL: vector.transfer_read %[[A]][%{{.*}}, %[[base]]], %[[C7]] : memref<?x?xf32>, vector<15xf32>
- // FULL-UNROLL: vector.insert %{{.*}}, %[[VEC2]] [2] : vector<15xf32> into vector<3x15xf32>
- // FULL-UNROLL: scf.yield %{{.*}} : vector<3x15xf32>
- // FULL-UNROLL: } else {
- // FULL-UNROLL: scf.yield %{{.*}} : vector<3x15xf32>
- // FULL-UNROLL: }
-
- %f = vector.transfer_read %A[%base, %base], %f7 :
- memref<?x?xf32>, vector<3x15xf32>
-
- return %f: vector<3x15xf32>
-}
-
-// -----
-
-// CHECK-DAG: #[[$MAP0:.*]] = affine_map<(d0)[s0] -> (d0 + s0)>
-
-// FULL-UNROLL-DAG: #[[$MAP1:.*]] = affine_map<()[s0] -> (s0 + 1)>
-// FULL-UNROLL-DAG: #[[$MAP2:.*]] = affine_map<()[s0] -> (s0 + 2)>
-
-// CHECK-LABEL: transfer_write_progressive(
-// CHECK-SAME: %[[A:[a-zA-Z0-9]+]]: memref<?x?xf32>,
-// CHECK-SAME: %[[base:[a-zA-Z0-9]+]]: index,
-// CHECK-SAME: %[[vec:[a-zA-Z0-9]+]]: vector<3x15xf32>
-// FULL-UNROLL-LABEL: transfer_write_progressive(
-// FULL-UNROLL-SAME: %[[A:[a-zA-Z0-9]+]]: memref<?x?xf32>,
-// FULL-UNROLL-SAME: %[[base:[a-zA-Z0-9]+]]: index,
-// FULL-UNROLL-SAME: %[[vec:[a-zA-Z0-9]+]]: vector<3x15xf32>
-func @transfer_write_progressive(%A : memref<?x?xf32>, %base: index, %vec: vector<3x15xf32>) {
- // CHECK-DAG: %[[C0:.*]] = constant 0 : index
- // CHECK-DAG: %[[C1:.*]] = constant 1 : index
- // CHECK-DAG: %[[C3:.*]] = constant 3 : index
- // CHECK: %[[alloc:.*]] = memref.alloca() : memref<vector<3x15xf32>>
- // CHECK: memref.store %[[vec]], %[[alloc]][] : memref<vector<3x15xf32>>
- // CHECK: %[[vmemref:.*]] = vector.type_cast %[[alloc]] : memref<vector<3x15xf32>> to memref<3xvector<15xf32>>
- // CHECK: scf.for %[[I:.*]] = %[[C0]] to %[[C3]]
- // CHECK: %[[dim:.*]] = memref.dim %[[A]], %[[C0]] : memref<?x?xf32>
- // CHECK: %[[add:.*]] = affine.apply #[[$MAP0]](%[[I]])[%[[base]]]
- // CHECK: %[[cmp:.*]] = cmpi sgt, %[[dim]], %[[add]] : index
- // CHECK: scf.if %[[cmp]] {
- // CHECK: %[[vec_1d:.*]] = memref.load %[[vmemref]][%[[I]]] : memref<3xvector<15xf32>>
- // CHECK: vector.transfer_write %[[vec_1d]], %[[A]][{{.*}}, %[[base]]] : vector<15xf32>, memref<?x?xf32>
- // CHECK: }
- // CHECK: }
-
- // FULL-UNROLL: %[[C0:.*]] = constant 0 : index
- // FULL-UNROLL: %[[DIM:.*]] = memref.dim %[[A]], %[[C0]] : memref<?x?xf32>
- // FULL-UNROLL: %[[CMP0:.*]] = cmpi sgt, %[[DIM]], %[[base]] : index
- // FULL-UNROLL: scf.if %[[CMP0]] {
- // FULL-UNROLL: %[[V0:.*]] = vector.extract %[[vec]][0] : vector<3x15xf32>
- // FULL-UNROLL: vector.transfer_write %[[V0]], %[[A]][%[[base]], %[[base]]] : vector<15xf32>, memref<?x?xf32>
- // FULL-UNROLL: }
- // FULL-UNROLL: %[[I1:.*]] = affine.apply #[[$MAP1]]()[%[[base]]]
- // FULL-UNROLL: %[[CMP1:.*]] = cmpi sgt, %{{.*}}, %[[I1]] : index
- // FULL-UNROLL: scf.if %[[CMP1]] {
- // FULL-UNROLL: %[[V1:.*]] = vector.extract %[[vec]][1] : vector<3x15xf32>
- // FULL-UNROLL: vector.transfer_write %[[V1]], %[[A]][%{{.*}}, %[[base]]] : vector<15xf32>, memref<?x?xf32>
- // FULL-UNROLL: }
- // FULL-UNROLL: %[[I2:.*]] = affine.apply #[[$MAP2]]()[%[[base]]]
- // FULL-UNROLL: %[[CMP2:.*]] = cmpi sgt, %{{.*}}, %[[I2]] : index
- // FULL-UNROLL: scf.if %[[CMP2]] {
- // FULL-UNROLL: %[[V2:.*]] = vector.extract %[[vec]][2] : vector<3x15xf32>
- // FULL-UNROLL: vector.transfer_write %[[V2]], %[[A]][%{{.*}}, %[[base]]] : vector<15xf32>, memref<?x?xf32>
- // FULL-UNROLL: }
-
- vector.transfer_write %vec, %A[%base, %base] :
- vector<3x15xf32>, memref<?x?xf32>
- return
-}
-
-// -----
-
-// CHECK-DAG: #[[$MAP0:.*]] = affine_map<(d0)[s0] -> (d0 + s0)>
-
-// FULL-UNROLL-DAG: #[[$MAP1:.*]] = affine_map<()[s0] -> (s0 + 1)>
-// FULL-UNROLL-DAG: #[[$MAP2:.*]] = affine_map<()[s0] -> (s0 + 2)>
-
-// CHECK-LABEL: transfer_write_progressive_inbounds(
-// CHECK-SAME: %[[A:[a-zA-Z0-9]+]]: memref<?x?xf32>,
-// CHECK-SAME: %[[base:[a-zA-Z0-9]+]]: index,
-// CHECK-SAME: %[[vec:[a-zA-Z0-9]+]]: vector<3x15xf32>
-// FULL-UNROLL-LABEL: transfer_write_progressive_inbounds(
-// FULL-UNROLL-SAME: %[[A:[a-zA-Z0-9]+]]: memref<?x?xf32>,
-// FULL-UNROLL-SAME: %[[base:[a-zA-Z0-9]+]]: index,
-// FULL-UNROLL-SAME: %[[vec:[a-zA-Z0-9]+]]: vector<3x15xf32>
-func @transfer_write_progressive_inbounds(%A : memref<?x?xf32>, %base: index, %vec: vector<3x15xf32>) {
- // CHECK-NOT: scf.if
- // CHECK-DAG: %[[C0:.*]] = constant 0 : index
- // CHECK-DAG: %[[C3:.*]] = constant 3 : index
- // CHECK: %[[alloc:.*]] = memref.alloca() : memref<vector<3x15xf32>>
- // CHECK-NEXT: memref.store %[[vec]], %[[alloc]][] : memref<vector<3x15xf32>>
- // CHECK-NEXT: %[[vmemref:.*]] = vector.type_cast %[[alloc]] : memref<vector<3x15xf32>> to memref<3xvector<15xf32>>
- // CHECK-NEXT: scf.for %[[I:.*]] = %[[C0]] to %[[C3]]
- // CHECK-NEXT: %[[add:.*]] = affine.apply #[[$MAP0]](%[[I]])[%[[base]]]
- // CHECK-NEXT: %[[vec_1d:.*]] = memref.load %[[vmemref]][%[[I]]] : memref<3xvector<15xf32>>
- // CHECK-NEXT: vector.transfer_write %[[vec_1d]], %[[A]][%[[add]], %[[base]]] {in_bounds = [true]} : vector<15xf32>, memref<?x?xf32>
-
- // FULL-UNROLL: %[[VEC0:.*]] = vector.extract %[[vec]][0] : vector<3x15xf32>
- // FULL-UNROLL: vector.transfer_write %[[VEC0]], %[[A]][%[[base]], %[[base]]] {in_bounds = [true]} : vector<15xf32>, memref<?x?xf32>
- // FULL-UNROLL: %[[I1:.*]] = affine.apply #[[$MAP1]]()[%[[base]]]
- // FULL-UNROLL: %[[VEC1:.*]] = vector.extract %[[vec]][1] : vector<3x15xf32>
- // FULL-UNROLL: vector.transfer_write %2, %[[A]][%[[I1]], %[[base]]] {in_bounds = [true]} : vector<15xf32>, memref<?x?xf32>
- // FULL-UNROLL: %[[I2:.*]] = affine.apply #[[$MAP2]]()[%[[base]]]
- // FULL-UNROLL: %[[VEC2:.*]] = vector.extract %[[vec]][2] : vector<3x15xf32>
- // FULL-UNROLL: vector.transfer_write %[[VEC2:.*]], %[[A]][%[[I2]], %[[base]]] {in_bounds = [true]} : vector<15xf32>, memref<?x?xf32>
- vector.transfer_write %vec, %A[%base, %base] {in_bounds = [true, true]} :
- vector<3x15xf32>, memref<?x?xf32>
- return
-}
-
-// -----
-
-// FULL-UNROLL-LABEL: transfer_read_simple
-func @transfer_read_simple(%A : memref<2x2xf32>) -> vector<2x2xf32> {
- %c0 = constant 0 : index
- %f0 = constant 0.0 : f32
- // FULL-UNROLL-DAG: %[[VC0:.*]] = constant dense<0.000000e+00> : vector<2x2xf32>
- // FULL-UNROLL-DAG: %[[C0:.*]] = constant 0 : index
- // FULL-UNROLL-DAG: %[[C1:.*]] = constant 1 : index
- // FULL-UNROLL: %[[V0:.*]] = vector.transfer_read %{{.*}}[%[[C0]], %[[C0]]]
- // FULL-UNROLL: %[[RES0:.*]] = vector.insert %[[V0]], %[[VC0]] [0] : vector<2xf32> into vector<2x2xf32>
- // FULL-UNROLL: %[[V1:.*]] = vector.transfer_read %{{.*}}[%[[C1]], %[[C0]]]
- // FULL-UNROLL: %[[RES1:.*]] = vector.insert %[[V1]], %[[RES0]] [1] : vector<2xf32> into vector<2x2xf32>
- %0 = vector.transfer_read %A[%c0, %c0], %f0 : memref<2x2xf32>, vector<2x2xf32>
- return %0 : vector<2x2xf32>
-}
-
-func @transfer_read_minor_identity(%A : memref<?x?x?x?xf32>) -> vector<3x3xf32> {
- %c0 = constant 0 : index
- %f0 = constant 0.0 : f32
- %0 = vector.transfer_read %A[%c0, %c0, %c0, %c0], %f0
- { permutation_map = affine_map<(d0, d1, d2, d3) -> (d2, d3)> }
- : memref<?x?x?x?xf32>, vector<3x3xf32>
- return %0 : vector<3x3xf32>
-}
-
-// CHECK-LABEL: transfer_read_minor_identity(
-// CHECK-SAME: %[[A:.*]]: memref<?x?x?x?xf32>) -> vector<3x3xf32>
-// CHECK-DAG: %[[c0:.*]] = constant 0 : index
-// CHECK-DAG: %[[c1:.*]] = constant 1 : index
-// CHECK-DAG: %[[c2:.*]] = constant 2 : index
-// CHECK-DAG: %[[c3:.*]] = constant 3 : index
-// CHECK-DAG: %[[f0:.*]] = constant 0.000000e+00 : f32
-// CHECK-DAG: %[[cst0:.*]] = constant dense<0.000000e+00> : vector<3xf32>
-// CHECK: %[[m:.*]] = memref.alloca() : memref<vector<3x3xf32>>
-// CHECK: %[[cast:.*]] = vector.type_cast %[[m]] : memref<vector<3x3xf32>> to memref<3xvector<3xf32>>
-// CHECK: scf.for %[[arg1:.*]] = %[[c0]] to %[[c3]]
-// CHECK: %[[d:.*]] = memref.dim %[[A]], %[[c2]] : memref<?x?x?x?xf32>
-// CHECK: %[[cmp:.*]] = cmpi sgt, %[[d]], %[[arg1]] : index
-// CHECK: scf.if %[[cmp]] {
-// CHECK: %[[tr:.*]] = vector.transfer_read %[[A]][%c0, %c0, %[[arg1]], %c0], %[[f0]] : memref<?x?x?x?xf32>, vector<3xf32>
-// CHECK: memref.store %[[tr]], %[[cast]][%[[arg1]]] : memref<3xvector<3xf32>>
-// CHECK: } else {
-// CHECK: memref.store %[[cst0]], %[[cast]][%[[arg1]]] : memref<3xvector<3xf32>>
-// CHECK: }
-// CHECK: }
-// CHECK: %[[ret:.*]] = memref.load %[[m]][] : memref<vector<3x3xf32>>
-// CHECK: return %[[ret]] : vector<3x3xf32>
-
-func @transfer_write_minor_identity(%A : vector<3x3xf32>, %B : memref<?x?x?x?xf32>) {
- %c0 = constant 0 : index
- %f0 = constant 0.0 : f32
- vector.transfer_write %A, %B[%c0, %c0, %c0, %c0]
- { permutation_map = affine_map<(d0, d1, d2, d3) -> (d2, d3)> }
- : vector<3x3xf32>, memref<?x?x?x?xf32>
- return
-}
-
-// CHECK-LABEL: transfer_write_minor_identity(
-// CHECK-SAME: %[[A:.*]]: vector<3x3xf32>,
-// CHECK-SAME: %[[B:.*]]: memref<?x?x?x?xf32>)
-// CHECK-DAG: %[[c0:.*]] = constant 0 : index
-// CHECK-DAG: %[[c1:.*]] = constant 1 : index
-// CHECK-DAG: %[[c2:.*]] = constant 2 : index
-// CHECK-DAG: %[[c3:.*]] = constant 3 : index
-// CHECK: %[[m:.*]] = memref.alloca() : memref<vector<3x3xf32>>
-// CHECK: memref.store %[[A]], %[[m]][] : memref<vector<3x3xf32>>
-// CHECK: %[[cast:.*]] = vector.type_cast %[[m]] : memref<vector<3x3xf32>> to memref<3xvector<3xf32>>
-// CHECK: scf.for %[[arg2:.*]] = %[[c0]] to %[[c3]]
-// CHECK: %[[d:.*]] = memref.dim %[[B]], %[[c2]] : memref<?x?x?x?xf32>
-// CHECK: %[[cmp:.*]] = cmpi sgt, %[[d]], %[[arg2]] : index
-// CHECK: scf.if %[[cmp]] {
-// CHECK: %[[tmp:.*]] = memref.load %[[cast]][%[[arg2]]] : memref<3xvector<3xf32>>
-// CHECK: vector.transfer_write %[[tmp]], %[[B]][%[[c0]], %[[c0]], %[[arg2]], %[[c0]]] : vector<3xf32>, memref<?x?x?x?xf32>
-// CHECK: }
-// CHECK: }
-// CHECK: return
-
-
-// -----
-
-func @transfer_read_strided(%A : memref<8x4xf32, affine_map<(d0, d1) -> (d0 + d1 * 8)>>) -> vector<4xf32> {
- %c0 = constant 0 : index
- %f0 = constant 0.0 : f32
- %0 = vector.transfer_read %A[%c0, %c0], %f0
- : memref<8x4xf32, affine_map<(d0, d1) -> (d0 + d1 * 8)>>, vector<4xf32>
- return %0 : vector<4xf32>
-}
-
-// CHECK-LABEL: transfer_read_strided(
-// CHECK: scf.for
-// CHECK: memref.load
-
-func @transfer_write_strided(%A : vector<4xf32>, %B : memref<8x4xf32, affine_map<(d0, d1) -> (d0 + d1 * 8)>>) {
- %c0 = constant 0 : index
- vector.transfer_write %A, %B[%c0, %c0] :
- vector<4xf32>, memref<8x4xf32, affine_map<(d0, d1) -> (d0 + d1 * 8)>>
- return
-}
-
-// CHECK-LABEL: transfer_write_strided(
-// CHECK: scf.for
-// CHECK: store
-
diff --git a/mlir/test/Conversion/VectorToSCF/unrolled-vector-to-loops.mlir b/mlir/test/Conversion/VectorToSCF/unrolled-vector-to-loops.mlir
index f90d20a518a65..bd74ff05c2c32 100644
--- a/mlir/test/Conversion/VectorToSCF/unrolled-vector-to-loops.mlir
+++ b/mlir/test/Conversion/VectorToSCF/unrolled-vector-to-loops.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -test-unrolled-progressive-convert-vector-to-scf -split-input-file -allow-unregistered-dialect | FileCheck %s
+// RUN: mlir-opt %s -convert-vector-to-scf=full-unroll=true -split-input-file -allow-unregistered-dialect | FileCheck %s
// CHECK-LABEL: func @transfer_read_inbounds
func @transfer_read_inbounds(%A : memref<?x?x?xf32>) -> (vector<2x3x4xf32>) {
diff --git a/mlir/test/Conversion/VectorToSCF/vector-to-loops.mlir b/mlir/test/Conversion/VectorToSCF/vector-to-loops.mlir
index d84f84c5ade6e..3dce006ab7833 100644
--- a/mlir/test/Conversion/VectorToSCF/vector-to-loops.mlir
+++ b/mlir/test/Conversion/VectorToSCF/vector-to-loops.mlir
@@ -18,10 +18,9 @@ func @materialize_read_1d() {
// CHECK: scf.if
// CHECK-NEXT: memref.load
// CHECK-NEXT: vector.insertelement
- // CHECK-NEXT: store
+ // CHECK-NEXT: scf.yield
// CHECK-NEXT: else
- // CHECK-NEXT: vector.insertelement
- // CHECK-NEXT: store
+ // CHECK-NEXT: scf.yield
// Add a dummy use to prevent dead code elimination from removing transfer
// read ops.
"dummy_use"(%f1, %f2, %f3, %f4) : (vector<4xf32>, vector<4xf32>, vector<4xf32>, vector<4xf32>) -> ()
@@ -65,37 +64,40 @@ func @materialize_read_1d_partially_specialized(%dyn1 : index, %dyn2 : index, %d
// CHECK-LABEL: func @materialize_read(%{{.*}}: index, %{{.*}}: index, %{{.*}}: index, %{{.*}}: index) {
func @materialize_read(%M: index, %N: index, %O: index, %P: index) {
%f0 = constant 0.0: f32
- // CHECK-DAG: %[[ALLOC:.*]] = memref.alloca() : memref<5x4xvector<3xf32>>
+ // CHECK-DAG: %[[ALLOC:.*]] = memref.alloca() : memref<vector<5x4x3xf32>>
// CHECK-DAG: %[[C0:.*]] = constant 0 : index
// CHECK-DAG: %[[C1:.*]] = constant 1 : index
// CHECK-DAG: %[[C3:.*]] = constant 3 : index
// CHECK-DAG: %[[C4:.*]] = constant 4 : index
// CHECK-DAG: %[[C5:.*]] = constant 5 : index
- // CHECK: %{{.*}} = memref.alloc(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : memref<?x?x?x?xf32>
+ // CHECK: %{{.*}} = memref.alloc(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : memref<?x?x?x?xf32>
// CHECK-NEXT: affine.for %[[I0:.*]] = 0 to %{{.*}} step 3 {
// CHECK-NEXT: affine.for %[[I1:.*]] = 0 to %{{.*}} {
// CHECK-NEXT: affine.for %[[I2:.*]] = 0 to %{{.*}} {
// CHECK-NEXT: affine.for %[[I3:.*]] = 0 to %{{.*}} step 5 {
- // CHECK-NEXT: scf.for %[[I4:.*]] = %[[C0]] to %[[C3]] step %[[C1]] {
- // CHECK-NEXT: scf.for %[[I5:.*]] = %[[C0]] to %[[C4]] step %[[C1]] {
- // CHECK-NEXT: scf.for %[[I6:.*]] = %[[C0]] to %[[C5]] step %[[C1]] {
- // CHECK: %[[VIDX:.*]] = index_cast %[[I4]]
- // CHECK: %[[VEC:.*]] = memref.load %[[ALLOC]][%[[I6]], %[[I5]]] : memref<5x4xvector<3xf32>>
- // CHECK: %[[L0:.*]] = affine.apply #[[$ADD]](%[[I0]], %[[I4]])
- // CHECK: %[[L3:.*]] = affine.apply #[[$ADD]](%[[I3]], %[[I6]])
- // CHECK-NEXT: scf.if
- // CHECK-NEXT: %[[SCAL:.*]] = memref.load %{{.*}}[%[[L0]], %[[I1]], %[[I2]], %[[L3]]] : memref<?x?x?x?xf32>
- // CHECK-NEXT: %[[RVEC:.*]] = vector.insertelement %[[SCAL]], %[[VEC]][%[[VIDX]] : i32] : vector<3xf32>
- // CHECK-NEXT: store %[[RVEC]], %[[ALLOC]][%[[I6]], %[[I5]]] : memref<5x4xvector<3xf32>>
- // CHECK-NEXT: } else {
- // CHECK-NEXT: %[[CVEC:.*]] = vector.insertelement
- // CHECK-NEXT: store %[[CVEC]], %[[ALLOC]][%[[I6]], %[[I5]]] : memref<5x4xvector<3xf32>>
+ // CHECK: scf.for %[[I4:.*]] = %[[C0]] to %[[C5]] step %[[C1]] {
+ // CHECK: scf.if
+ // CHECK: %[[L3:.*]] = affine.apply #[[$ADD]](%[[I3]], %[[I4]])
+ // CHECK: scf.for %[[I5:.*]] = %[[C0]] to %[[C4]] step %[[C1]] {
+ // CHECK: %[[VEC:.*]] = scf.for %[[I6:.*]] = %[[C0]] to %[[C3]] step %[[C1]] {{.*}} -> (vector<3xf32>) {
+ // CHECK: %[[L0:.*]] = affine.apply #[[$ADD]](%[[I0]], %[[I6]])
+ // CHECK: %[[VIDX:.*]] = index_cast %[[I6]]
+ // CHECK: scf.if {{.*}} -> (vector<3xf32>) {
+ // CHECK-NEXT: %[[SCAL:.*]] = memref.load %{{.*}}[%[[L0]], %[[I1]], %[[I2]], %[[L3]]] : memref<?x?x?x?xf32>
+ // CHECK-NEXT: %[[RVEC:.*]] = vector.insertelement %[[SCAL]], %{{.*}}[%[[VIDX]] : i32] : vector<3xf32>
+ // CHECK-NEXT: scf.yield
+ // CHECK-NEXT: } else {
+ // CHECK-NEXT: scf.yield
+ // CHECK-NEXT: }
+ // CHECK-NEXT: scf.yield
// CHECK-NEXT: }
+ // CHECK-NEXT: memref.store %[[VEC]], {{.*}} : memref<5x4xvector<3xf32>>
// CHECK-NEXT: }
+ // CHECK-NEXT: } else {
+ // CHECK-NEXT: memref.store {{.*}} : memref<5xvector<4x3xf32>>
// CHECK-NEXT: }
// CHECK-NEXT: }
- // CHECK-NEXT: %[[ALLOC_CAST:.*]] = vector.type_cast %[[ALLOC]] : memref<5x4xvector<3xf32>> to memref<vector<5x4x3xf32>>
- // CHECK-NEXT: %[[LD:.*]] = memref.load %[[ALLOC_CAST]][] : memref<vector<5x4x3xf32>>
+ // CHECK-NEXT: %[[LD:.*]] = memref.load %[[ALLOC]][] : memref<vector<5x4x3xf32>>
// CHECK-NEXT: "dummy_use"(%[[LD]]) : (vector<5x4x3xf32>) -> ()
// CHECK-NEXT: }
// CHECK-NEXT: }
@@ -129,42 +131,46 @@ func @materialize_read(%M: index, %N: index, %O: index, %P: index) {
// CHECK-LABEL:func @materialize_write(%{{.*}}: index, %{{.*}}: index, %{{.*}}: index, %{{.*}}: index) {
func @materialize_write(%M: index, %N: index, %O: index, %P: index) {
- // CHECK-DAG: %[[ALLOC:.*]] = memref.alloca() : memref<5x4xvector<3xf32>>
+ // CHECK-DAG: %[[ALLOC:.*]] = memref.alloca() : memref<vector<5x4x3xf32>>
// CHECK-DAG: %{{.*}} = constant dense<1.000000e+00> : vector<5x4x3xf32>
// CHECK-DAG: %[[C0:.*]] = constant 0 : index
// CHECK-DAG: %[[C1:.*]] = constant 1 : index
// CHECK-DAG: %[[C3:.*]] = constant 3 : index
// CHECK-DAG: %[[C4:.*]] = constant 4 : index
// CHECK-DAG: %[[C5:.*]] = constant 5 : index
- // CHECK: %{{.*}} = memref.alloc(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : memref<?x?x?x?xf32>
- // CHECK-NEXT: affine.for %[[I0:.*]] = 0 to %{{.*}} step 3 {
- // CHECK-NEXT: affine.for %[[I1:.*]] = 0 to %{{.*}} step 4 {
- // CHECK-NEXT: affine.for %[[I2:.*]] = 0 to %{{.*}} {
- // CHECK-NEXT: affine.for %[[I3:.*]] = 0 to %{{.*}} step 5 {
- // CHECK-NEXT: %[[VECTOR_VIEW:.*]] = vector.type_cast {{.*}} : memref<5x4xvector<3xf32>>
- // CHECK: store %{{.*}}, {{.*}} : memref<vector<5x4x3xf32>>
- // CHECK-NEXT: scf.for %[[I4:.*]] = %[[C0]] to %[[C3]] step %[[C1]] {
- // CHECK-NEXT: scf.for %[[I5:.*]] = %[[C0]] to %[[C4]] step %[[C1]] {
- // CHECK-NEXT: scf.for %[[I6:.*]] = %[[C0]] to %[[C5]] step %[[C1]] {
- // CHECK: %[[VIDX:.*]] = index_cast %[[I4]]
- // CHECK: %[[S0:.*]] = affine.apply #[[$ADD]](%[[I0]], %[[I4]])
- // CHECK: %[[S1:.*]] = affine.apply #[[$ADD]](%[[I1]], %[[I5]])
- // CHECK: %[[S3:.*]] = affine.apply #[[$ADD]](%[[I3]], %[[I6]])
- // CHECK-NEXT: scf.if
- // CHECK-NEXT: %[[VEC:.*]] = memref.load {{.*}}[%[[I6]], %[[I5]]] : memref<5x4xvector<3xf32>>
- // CHECK-NEXT: %[[SCAL:.*]] = vector.extractelement %[[VEC]][%[[VIDX]] : i32] : vector<3xf32>
- // CHECK: store %[[SCAL]], {{.*}}[%[[S0]], %[[S1]], %[[I2]], %[[S3]]] : memref<?x?x?x?xf32>
- // CHECK-NEXT: }
- // CHECK-NEXT: }
- // CHECK-NEXT: }
- // CHECK-NEXT: }
- // CHECK-NEXT: }
- // CHECK-NEXT: }
- // CHECK-NEXT: }
- // CHECK-NEXT: }
- // CHECK-NEXT: return
- // CHECK-NEXT:}
- //
+ // CHECK: %{{.*}} = memref.alloc(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : memref<?x?x?x?xf32>
+ // CHECK-NEXT: affine.for %[[I0:.*]] = 0 to %{{.*}} step 3 {
+ // CHECK-NEXT: affine.for %[[I1:.*]] = 0 to %{{.*}} step 4 {
+ // CHECK-NEXT: affine.for %[[I2:.*]] = 0 to %{{.*}} {
+ // CHECK-NEXT: affine.for %[[I3:.*]] = 0 to %{{.*}} step 5 {
+ // CHECK: memref.store %{{.*}}, %[[ALLOC]][] : memref<vector<5x4x3xf32>>
+ // CHECK: %[[VECTOR_VIEW1:.*]] = vector.type_cast %[[ALLOC]] : memref<vector<5x4x3xf32>> to memref<5xvector<4x3xf32>>
+ // CHECK: scf.for %[[I4:.*]] = %[[C0]] to %[[C5]] step %[[C1]] {
+ // CHECK: scf.if
+ // CHECK: %[[S3:.*]] = affine.apply #[[$ADD]](%[[I3]], %[[I4]])
+ // CHECK: %[[VECTOR_VIEW2:.*]] = vector.type_cast %[[VECTOR_VIEW1]] : memref<5xvector<4x3xf32>> to memref<5x4xvector<3xf32>>
+ // CHECK: scf.for %[[I5:.*]] = %[[C0]] to %[[C4]] step %[[C1]] {
+ // CHECK: scf.if
+ // CHECK: %[[S1:.*]] = affine.apply #[[$ADD]](%[[I1]], %[[I5]])
+ // CHECK: %[[VEC:.*]] = memref.load %[[VECTOR_VIEW2]][%[[I4]], %[[I5]]] : memref<5x4xvector<3xf32>>
+ // CHECK: scf.for %[[I6:.*]] = %[[C0]] to %[[C3]] step %[[C1]] {
+ // CHECK: %[[S0:.*]] = affine.apply #[[$ADD]](%[[I0]], %[[I6]])
+ // CHECK: %[[VIDX:.*]] = index_cast %[[I6]]
+ // CHECK: scf.if
+ // CHECK: %[[SCAL:.*]] = vector.extractelement %[[VEC]][%[[VIDX]] : i32] : vector<3xf32>
+ // CHECK: memref.store %[[SCAL]], {{.*}}[%[[S0]], %[[S1]], %[[I2]], %[[S3]]] : memref<?x?x?x?xf32>
+ // CHECK: }
+ // CHECK: }
+ // CHECK: }
+ // CHECK: }
+ // CHECK: }
+ // CHECK: }
+ // CHECK: }
+ // CHECK: }
+ // CHECK: }
+ // CHECK: }
+ // CHECK: return
+
// Check that I0 + I4 (of size 3) read from last index load(..., I4) and write into first index store(S0, ...)
// Check that I1 + I5 (of size 4) read from second index load(..., I5, ...) and write into second index store(..., S1, ...)
// Check that I3 + I6 (of size 5) read from first index load(I6, ...) and write into last index store(..., S3)
@@ -203,53 +209,52 @@ func @transfer_read_progressive(%A : memref<?x?xf32>, %base: index) -> vector<3x
%f7 = constant 7.0: f32
// CHECK-DAG: %[[C7:.*]] = constant 7.000000e+00 : f32
// CHECK-DAG: %[[C0:.*]] = constant 0 : index
+ // CHECK-DAG: %[[C1:.*]] = constant 1 : index
+ // CHECK-DAG: %[[C3:.*]] = constant 3 : index
// CHECK-DAG: %[[splat:.*]] = constant dense<7.000000e+00> : vector<15xf32>
- // CHECK-DAG: %[[alloc:.*]] = memref.alloca() : memref<3xvector<15xf32>>
- // CHECK-DAG: %[[dim:.*]] = memref.dim %[[A]], %[[C0]] : memref<?x?xf32>
- // CHECK: affine.for %[[I:.*]] = 0 to 3 {
- // CHECK: %[[add:.*]] = affine.apply #[[$MAP0]](%[[I]])[%[[base]]]
- // CHECK: %[[cond1:.*]] = cmpi slt, %[[add]], %[[dim]] : index
- // CHECK: scf.if %[[cond1]] {
- // CHECK: %[[vec_1d:.*]] = vector.transfer_read %[[A]][%[[add]], %[[base]]], %[[C7]] : memref<?x?xf32>, vector<15xf32>
- // CHECK: store %[[vec_1d]], %[[alloc]][%[[I]]] : memref<3xvector<15xf32>>
- // CHECK: } else {
- // CHECK: store %[[splat]], %[[alloc]][%[[I]]] : memref<3xvector<15xf32>>
- // CHECK: }
- // CHECK: %[[vmemref:.*]] = vector.type_cast %[[alloc]] : memref<3xvector<15xf32>> to memref<vector<3x15xf32>>
- // CHECK: %[[cst:.*]] = memref.load %[[vmemref]][] : memref<vector<3x15xf32>>
+ // CHECK-DAG: %[[alloc:.*]] = memref.alloca() : memref<vector<3x15xf32>>
+ // CHECK: %[[alloc_casted:.*]] = vector.type_cast %[[alloc]] : memref<vector<3x15xf32>> to memref<3xvector<15xf32>>
+ // CHECK: scf.for %[[I:.*]] = %[[C0]] to %[[C3]]
+ // CHECK: %[[dim:.*]] = memref.dim %[[A]], %[[C0]] : memref<?x?xf32>
+ // CHECK: %[[add:.*]] = affine.apply #[[$MAP0]](%[[I]])[%[[base]]]
+ // CHECK: %[[cond1:.*]] = cmpi sgt, %[[dim]], %[[add]] : index
+ // CHECK: scf.if %[[cond1]] {
+ // CHECK: %[[vec_1d:.*]] = vector.transfer_read %[[A]][%{{.*}}, %[[base]]], %[[C7]] : memref<?x?xf32>, vector<15xf32>
+ // CHECK: memref.store %[[vec_1d]], %[[alloc_casted]][%[[I]]] : memref<3xvector<15xf32>>
+ // CHECK: } else {
+ // CHECK: store %[[splat]], %[[alloc_casted]][%[[I]]] : memref<3xvector<15xf32>>
+ // CHECK: }
+ // CHECK: }
+ // CHECK: %[[cst:.*]] = memref.load %[[alloc]][] : memref<vector<3x15xf32>>
// FULL-UNROLL: %[[C7:.*]] = constant 7.000000e+00 : f32
// FULL-UNROLL: %[[VEC0:.*]] = constant dense<7.000000e+00> : vector<3x15xf32>
// FULL-UNROLL: %[[C0:.*]] = constant 0 : index
- // FULL-UNROLL: %[[SPLAT:.*]] = constant dense<7.000000e+00> : vector<15xf32>
// FULL-UNROLL: %[[DIM:.*]] = memref.dim %[[A]], %[[C0]] : memref<?x?xf32>
- // FULL-UNROLL: cmpi slt, %[[base]], %[[DIM]] : index
+ // FULL-UNROLL: cmpi sgt, %[[DIM]], %[[base]] : index
// FULL-UNROLL: %[[VEC1:.*]] = scf.if %{{.*}} -> (vector<3x15xf32>) {
// FULL-UNROLL: vector.transfer_read %[[A]][%[[base]], %[[base]]], %[[C7]] : memref<?x?xf32>, vector<15xf32>
// FULL-UNROLL: vector.insert %{{.*}}, %[[VEC0]] [0] : vector<15xf32> into vector<3x15xf32>
// FULL-UNROLL: scf.yield %{{.*}} : vector<3x15xf32>
// FULL-UNROLL: } else {
- // FULL-UNROLL: vector.insert %{{.*}}, %[[VEC0]] [0] : vector<15xf32> into vector<3x15xf32>
// FULL-UNROLL: scf.yield %{{.*}} : vector<3x15xf32>
// FULL-UNROLL: }
// FULL-UNROLL: affine.apply #[[$MAP1]]()[%[[base]]]
- // FULL-UNROLL: cmpi slt, %{{.*}}, %[[DIM]] : index
+ // FULL-UNROLL: cmpi sgt, %{{.*}}, %{{.*}} : index
// FULL-UNROLL: %[[VEC2:.*]] = scf.if %{{.*}} -> (vector<3x15xf32>) {
// FULL-UNROLL: vector.transfer_read %[[A]][%{{.*}}, %[[base]]], %[[C7]] : memref<?x?xf32>, vector<15xf32>
// FULL-UNROLL: vector.insert %{{.*}}, %[[VEC1]] [1] : vector<15xf32> into vector<3x15xf32>
// FULL-UNROLL: scf.yield %{{.*}} : vector<3x15xf32>
// FULL-UNROLL: } else {
- // FULL-UNROLL: vector.insert %{{.*}}, %[[VEC1]] [1] : vector<15xf32> into vector<3x15xf32>
// FULL-UNROLL: scf.yield %{{.*}} : vector<3x15xf32>
// FULL-UNROLL: }
// FULL-UNROLL: affine.apply #[[$MAP2]]()[%[[base]]]
- // FULL-UNROLL: cmpi slt, %{{.*}}, %[[DIM]] : index
+ // FULL-UNROLL: cmpi sgt, %{{.*}}, %{{.*}} : index
// FULL-UNROLL: %[[VEC3:.*]] = scf.if %{{.*}} -> (vector<3x15xf32>) {
// FULL-UNROLL: vector.transfer_read %[[A]][%{{.*}}, %[[base]]], %[[C7]] : memref<?x?xf32>, vector<15xf32>
// FULL-UNROLL: vector.insert %{{.*}}, %[[VEC2]] [2] : vector<15xf32> into vector<3x15xf32>
// FULL-UNROLL: scf.yield %{{.*}} : vector<3x15xf32>
// FULL-UNROLL: } else {
- // FULL-UNROLL: vector.insert %{{.*}}, %[[VEC2]] [2] : vector<15xf32> into vector<3x15xf32>
// FULL-UNROLL: scf.yield %{{.*}} : vector<3x15xf32>
// FULL-UNROLL: }
@@ -275,37 +280,40 @@ func @transfer_read_progressive(%A : memref<?x?xf32>, %base: index) -> vector<3x
// FULL-UNROLL-SAME: %[[base:[a-zA-Z0-9]+]]: index,
// FULL-UNROLL-SAME: %[[vec:[a-zA-Z0-9]+]]: vector<3x15xf32>
func @transfer_write_progressive(%A : memref<?x?xf32>, %base: index, %vec: vector<3x15xf32>) {
- // CHECK: %[[C0:.*]] = constant 0 : index
- // CHECK: %[[alloc:.*]] = memref.alloca() : memref<3xvector<15xf32>>
- // CHECK: %[[vmemref:.*]] = vector.type_cast %[[alloc]] : memref<3xvector<15xf32>> to memref<vector<3x15xf32>>
- // CHECK: store %[[vec]], %[[vmemref]][] : memref<vector<3x15xf32>>
- // CHECK: %[[dim:.*]] = memref.dim %[[A]], %[[C0]] : memref<?x?xf32>
- // CHECK: affine.for %[[I:.*]] = 0 to 3 {
- // CHECK: %[[add:.*]] = affine.apply #[[$MAP0]](%[[I]])[%[[base]]]
- // CHECK: %[[cmp:.*]] = cmpi slt, %[[add]], %[[dim]] : index
- // CHECK: scf.if %[[cmp]] {
- // CHECK: %[[vec_1d:.*]] = memref.load %0[%[[I]]] : memref<3xvector<15xf32>>
- // CHECK: vector.transfer_write %[[vec_1d]], %[[A]][%[[add]], %[[base]]] : vector<15xf32>, memref<?x?xf32>
- // CHECK: }
+ // CHECK-DAG: %[[C0:.*]] = constant 0 : index
+ // CHECK-DAG: %[[C1:.*]] = constant 1 : index
+ // CHECK-DAG: %[[C3:.*]] = constant 3 : index
+ // CHECK: %[[alloc:.*]] = memref.alloca() : memref<vector<3x15xf32>>
+ // CHECK: memref.store %[[vec]], %[[alloc]][] : memref<vector<3x15xf32>>
+ // CHECK: %[[vmemref:.*]] = vector.type_cast %[[alloc]] : memref<vector<3x15xf32>> to memref<3xvector<15xf32>>
+ // CHECK: scf.for %[[I:.*]] = %[[C0]] to %[[C3]]
+ // CHECK: %[[dim:.*]] = memref.dim %[[A]], %[[C0]] : memref<?x?xf32>
+ // CHECK: %[[add:.*]] = affine.apply #[[$MAP0]](%[[I]])[%[[base]]]
+ // CHECK: %[[cmp:.*]] = cmpi sgt, %[[dim]], %[[add]] : index
+ // CHECK: scf.if %[[cmp]] {
+ // CHECK: %[[vec_1d:.*]] = memref.load %[[vmemref]][%[[I]]] : memref<3xvector<15xf32>>
+ // CHECK: vector.transfer_write %[[vec_1d]], %[[A]][{{.*}}, %[[base]]] : vector<15xf32>, memref<?x?xf32>
+ // CHECK: }
+ // CHECK: }
// FULL-UNROLL: %[[C0:.*]] = constant 0 : index
// FULL-UNROLL: %[[DIM:.*]] = memref.dim %[[A]], %[[C0]] : memref<?x?xf32>
- // FULL-UNROLL: %[[CMP0:.*]] = cmpi slt, %[[base]], %[[DIM]] : index
+ // FULL-UNROLL: %[[CMP0:.*]] = cmpi sgt, %[[DIM]], %[[base]] : index
// FULL-UNROLL: scf.if %[[CMP0]] {
// FULL-UNROLL: %[[V0:.*]] = vector.extract %[[vec]][0] : vector<3x15xf32>
// FULL-UNROLL: vector.transfer_write %[[V0]], %[[A]][%[[base]], %[[base]]] : vector<15xf32>, memref<?x?xf32>
// FULL-UNROLL: }
// FULL-UNROLL: %[[I1:.*]] = affine.apply #[[$MAP1]]()[%[[base]]]
- // FULL-UNROLL: %[[CMP1:.*]] = cmpi slt, %[[I1]], %[[DIM]] : index
+ // FULL-UNROLL: %[[CMP1:.*]] = cmpi sgt, %{{.*}}, %[[I1]] : index
// FULL-UNROLL: scf.if %[[CMP1]] {
// FULL-UNROLL: %[[V1:.*]] = vector.extract %[[vec]][1] : vector<3x15xf32>
- // FULL-UNROLL: vector.transfer_write %[[V1]], %[[A]][%[[I1]], %[[base]]] : vector<15xf32>, memref<?x?xf32>
+ // FULL-UNROLL: vector.transfer_write %[[V1]], %[[A]][%{{.*}}, %[[base]]] : vector<15xf32>, memref<?x?xf32>
// FULL-UNROLL: }
// FULL-UNROLL: %[[I2:.*]] = affine.apply #[[$MAP2]]()[%[[base]]]
- // FULL-UNROLL: %[[CMP2:.*]] = cmpi slt, %[[I2]], %[[DIM]] : index
+ // FULL-UNROLL: %[[CMP2:.*]] = cmpi sgt, %{{.*}}, %[[I2]] : index
// FULL-UNROLL: scf.if %[[CMP2]] {
// FULL-UNROLL: %[[V2:.*]] = vector.extract %[[vec]][2] : vector<3x15xf32>
- // FULL-UNROLL: vector.transfer_write %[[V2]], %[[A]][%[[I2]], %[[base]]] : vector<15xf32>, memref<?x?xf32>
+ // FULL-UNROLL: vector.transfer_write %[[V2]], %[[A]][%{{.*}}, %[[base]]] : vector<15xf32>, memref<?x?xf32>
// FULL-UNROLL: }
vector.transfer_write %vec, %A[%base, %base] :
@@ -330,12 +338,14 @@ func @transfer_write_progressive(%A : memref<?x?xf32>, %base: index, %vec: vecto
// FULL-UNROLL-SAME: %[[vec:[a-zA-Z0-9]+]]: vector<3x15xf32>
func @transfer_write_progressive_inbounds(%A : memref<?x?xf32>, %base: index, %vec: vector<3x15xf32>) {
// CHECK-NOT: scf.if
- // CHECK-NEXT: %[[alloc:.*]] = memref.alloca() : memref<3xvector<15xf32>>
- // CHECK-NEXT: %[[vmemref:.*]] = vector.type_cast %[[alloc]] : memref<3xvector<15xf32>> to memref<vector<3x15xf32>>
- // CHECK-NEXT: store %[[vec]], %[[vmemref]][] : memref<vector<3x15xf32>>
- // CHECK-NEXT: affine.for %[[I:.*]] = 0 to 3 {
+ // CHECK-DAG: %[[C0:.*]] = constant 0 : index
+ // CHECK-DAG: %[[C3:.*]] = constant 3 : index
+ // CHECK: %[[alloc:.*]] = memref.alloca() : memref<vector<3x15xf32>>
+ // CHECK-NEXT: memref.store %[[vec]], %[[alloc]][] : memref<vector<3x15xf32>>
+ // CHECK-NEXT: %[[vmemref:.*]] = vector.type_cast %[[alloc]] : memref<vector<3x15xf32>> to memref<3xvector<15xf32>>
+ // CHECK-NEXT: scf.for %[[I:.*]] = %[[C0]] to %[[C3]]
// CHECK-NEXT: %[[add:.*]] = affine.apply #[[$MAP0]](%[[I]])[%[[base]]]
- // CHECK-NEXT: %[[vec_1d:.*]] = memref.load %0[%[[I]]] : memref<3xvector<15xf32>>
+ // CHECK-NEXT: %[[vec_1d:.*]] = memref.load %[[vmemref]][%[[I]]] : memref<3xvector<15xf32>>
// CHECK-NEXT: vector.transfer_write %[[vec_1d]], %[[A]][%[[add]], %[[base]]] {in_bounds = [true]} : vector<15xf32>, memref<?x?xf32>
// FULL-UNROLL: %[[VEC0:.*]] = vector.extract %[[vec]][0] : vector<3x15xf32>
@@ -378,25 +388,27 @@ func @transfer_read_minor_identity(%A : memref<?x?x?x?xf32>) -> vector<3x3xf32>
}
// CHECK-LABEL: transfer_read_minor_identity(
-// CHECK-SAME: %[[A:.*]]: memref<?x?x?x?xf32>) -> vector<3x3xf32>
-// CHECK-DAG: %[[c0:.*]] = constant 0 : index
-// CHECK-DAG: %[[f0:.*]] = constant 0.000000e+00 : f32
-// CHECK-DAG: %[[c2:.*]] = constant 2 : index
-// CHECK-DAG: %[[cst0:.*]] = constant dense<0.000000e+00> : vector<3xf32>
-// CHECK: %[[m:.*]] = memref.alloca() : memref<3xvector<3xf32>>
-// CHECK: %[[d:.*]] = memref.dim %[[A]], %[[c2]] : memref<?x?x?x?xf32>
-// CHECK: affine.for %[[arg1:.*]] = 0 to 3 {
-// CHECK: %[[cmp:.*]] = cmpi slt, %[[arg1]], %[[d]] : index
-// CHECK: scf.if %[[cmp]] {
-// CHECK: %[[tr:.*]] = vector.transfer_read %[[A]][%c0, %c0, %[[arg1]], %c0], %[[f0]] : memref<?x?x?x?xf32>, vector<3xf32>
-// CHECK: store %[[tr]], %[[m]][%[[arg1]]] : memref<3xvector<3xf32>>
-// CHECK: } else {
-// CHECK: store %[[cst0]], %[[m]][%[[arg1]]] : memref<3xvector<3xf32>>
-// CHECK: }
-// CHECK: }
-// CHECK: %[[cast:.*]] = vector.type_cast %[[m]] : memref<3xvector<3xf32>> to memref<vector<3x3xf32>>
-// CHECK: %[[ret:.*]] = memref.load %[[cast]][] : memref<vector<3x3xf32>>
-// CHECK: return %[[ret]] : vector<3x3xf32>
+// CHECK-SAME: %[[A:.*]]: memref<?x?x?x?xf32>) -> vector<3x3xf32>
+// CHECK-DAG: %[[c0:.*]] = constant 0 : index
+// CHECK-DAG: %[[c1:.*]] = constant 1 : index
+// CHECK-DAG: %[[c2:.*]] = constant 2 : index
+// CHECK-DAG: %[[c3:.*]] = constant 3 : index
+// CHECK-DAG: %[[f0:.*]] = constant 0.000000e+00 : f32
+// CHECK-DAG: %[[cst0:.*]] = constant dense<0.000000e+00> : vector<3xf32>
+// CHECK: %[[m:.*]] = memref.alloca() : memref<vector<3x3xf32>>
+// CHECK: %[[cast:.*]] = vector.type_cast %[[m]] : memref<vector<3x3xf32>> to memref<3xvector<3xf32>>
+// CHECK: scf.for %[[arg1:.*]] = %[[c0]] to %[[c3]]
+// CHECK: %[[d:.*]] = memref.dim %[[A]], %[[c2]] : memref<?x?x?x?xf32>
+// CHECK: %[[cmp:.*]] = cmpi sgt, %[[d]], %[[arg1]] : index
+// CHECK: scf.if %[[cmp]] {
+// CHECK: %[[tr:.*]] = vector.transfer_read %[[A]][%c0, %c0, %[[arg1]], %c0], %[[f0]] : memref<?x?x?x?xf32>, vector<3xf32>
+// CHECK: memref.store %[[tr]], %[[cast]][%[[arg1]]] : memref<3xvector<3xf32>>
+// CHECK: } else {
+// CHECK: memref.store %[[cst0]], %[[cast]][%[[arg1]]] : memref<3xvector<3xf32>>
+// CHECK: }
+// CHECK: }
+// CHECK: %[[ret:.*]] = memref.load %[[m]][] : memref<vector<3x3xf32>>
+// CHECK: return %[[ret]] : vector<3x3xf32>
func @transfer_write_minor_identity(%A : vector<3x3xf32>, %B : memref<?x?x?x?xf32>) {
%c0 = constant 0 : index
@@ -408,22 +420,25 @@ func @transfer_write_minor_identity(%A : vector<3x3xf32>, %B : memref<?x?x?x?xf3
}
// CHECK-LABEL: transfer_write_minor_identity(
-// CHECK-SAME: %[[A:.*]]: vector<3x3xf32>,
-// CHECK-SAME: %[[B:.*]]: memref<?x?x?x?xf32>)
-// CHECK-DAG: %[[c2:.*]] = constant 2 : index
-// CHECK-DAG: %[[c0:.*]] = constant 0 : index
-// CHECK: %[[m:.*]] = memref.alloca() : memref<3xvector<3xf32>>
-// CHECK: %[[cast:.*]] = vector.type_cast %[[m]] : memref<3xvector<3xf32>> to memref<vector<3x3xf32>>
-// CHECK: store %[[A]], %[[cast]][] : memref<vector<3x3xf32>>
-// CHECK: %[[d:.*]] = memref.dim %[[B]], %[[c2]] : memref<?x?x?x?xf32>
-// CHECK: affine.for %[[arg2:.*]] = 0 to 3 {
-// CHECK: %[[cmp:.*]] = cmpi slt, %[[arg2]], %[[d]] : index
-// CHECK: scf.if %[[cmp]] {
-// CHECK: %[[tmp:.*]] = memref.load %[[m]][%[[arg2]]] : memref<3xvector<3xf32>>
-// CHECK: vector.transfer_write %[[tmp]], %[[B]][%[[c0]], %[[c0]], %[[arg2]], %[[c0]]] : vector<3xf32>, memref<?x?x?x?xf32>
-// CHECK: }
-// CHECK: }
-// CHECK: return
+// CHECK-SAME: %[[A:.*]]: vector<3x3xf32>,
+// CHECK-SAME: %[[B:.*]]: memref<?x?x?x?xf32>)
+// CHECK-DAG: %[[c0:.*]] = constant 0 : index
+// CHECK-DAG: %[[c1:.*]] = constant 1 : index
+// CHECK-DAG: %[[c2:.*]] = constant 2 : index
+// CHECK-DAG: %[[c3:.*]] = constant 3 : index
+// CHECK: %[[m:.*]] = memref.alloca() : memref<vector<3x3xf32>>
+// CHECK: memref.store %[[A]], %[[m]][] : memref<vector<3x3xf32>>
+// CHECK: %[[cast:.*]] = vector.type_cast %[[m]] : memref<vector<3x3xf32>> to memref<3xvector<3xf32>>
+// CHECK: scf.for %[[arg2:.*]] = %[[c0]] to %[[c3]]
+// CHECK: %[[d:.*]] = memref.dim %[[B]], %[[c2]] : memref<?x?x?x?xf32>
+// CHECK: %[[cmp:.*]] = cmpi sgt, %[[d]], %[[arg2]] : index
+// CHECK: scf.if %[[cmp]] {
+// CHECK: %[[tmp:.*]] = memref.load %[[cast]][%[[arg2]]] : memref<3xvector<3xf32>>
+// CHECK: vector.transfer_write %[[tmp]], %[[B]][%[[c0]], %[[c0]], %[[arg2]], %[[c0]]] : vector<3xf32>, memref<?x?x?x?xf32>
+// CHECK: }
+// CHECK: }
+// CHECK: return
+
// -----
diff --git a/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-read-1d.mlir b/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-read-1d.mlir
index d7dc9d6f1e594..20216cc6ba6e1 100644
--- a/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-read-1d.mlir
+++ b/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-read-1d.mlir
@@ -1,9 +1,9 @@
-// RUN: mlir-opt %s -test-progressive-convert-vector-to-scf -lower-affine -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm | \
+// RUN: mlir-opt %s -convert-vector-to-scf -lower-affine -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm | \
// RUN: mlir-cpu-runner -e entry -entry-point-result=void \
// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \
// RUN: FileCheck %s
-// RUN: mlir-opt %s -test-unrolled-progressive-convert-vector-to-scf -lower-affine -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm | \
+// RUN: mlir-opt %s -convert-vector-to-scf=full-unroll=true -lower-affine -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm | \
// RUN: mlir-cpu-runner -e entry -entry-point-result=void \
// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \
// RUN: FileCheck %s
diff --git a/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-read-2d.mlir b/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-read-2d.mlir
index 1fc11fab85286..03cdc3dd8e329 100644
--- a/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-read-2d.mlir
+++ b/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-read-2d.mlir
@@ -1,9 +1,9 @@
-// RUN: mlir-opt %s -test-progressive-convert-vector-to-scf -lower-affine -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm | \
+// RUN: mlir-opt %s -convert-vector-to-scf -lower-affine -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm | \
// RUN: mlir-cpu-runner -e entry -entry-point-result=void \
// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \
// RUN: FileCheck %s
-// RUN: mlir-opt %s -test-unrolled-progressive-convert-vector-to-scf -lower-affine -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm | \
+// RUN: mlir-opt %s -convert-vector-to-scf=full-unroll=true -lower-affine -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm | \
// RUN: mlir-cpu-runner -e entry -entry-point-result=void \
// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \
// RUN: FileCheck %s
diff --git a/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-read-3d.mlir b/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-read-3d.mlir
index 6de89a6cd6ac5..00da9278d50c7 100644
--- a/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-read-3d.mlir
+++ b/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-read-3d.mlir
@@ -1,10 +1,10 @@
-// RUN: mlir-opt %s -test-progressive-convert-vector-to-scf -lower-affine -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm | \
+// RUN: mlir-opt %s -convert-vector-to-scf -lower-affine -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm | \
// RUN: mlir-cpu-runner -e entry -entry-point-result=void \
// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \
// RUN: FileCheck %s
-// RUN: mlir-opt %s -test-unrolled-progressive-convert-vector-to-scf -lower-affine -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm | \
-// RUN: mlir-cpu-runner -e entry -entry-point-result=void \
+// RUN: mlir-opt %s -convert-vector-to-scf=full-unroll=true -lower-affine -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm | \
+// RUN: mlir-cpu-runner -e entry -entry-point-result=void \
// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \
// RUN: FileCheck %s
diff --git a/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-read.mlir b/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-read.mlir
index bed94f02920ab..98d8132ec5ca5 100644
--- a/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-read.mlir
+++ b/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-read.mlir
@@ -1,4 +1,9 @@
-// RUN: mlir-opt %s -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm | \
+// RUN: mlir-opt %s -convert-vector-to-scf -lower-affine -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm | \
+// RUN: mlir-cpu-runner -e entry -entry-point-result=void \
+// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \
+// RUN: FileCheck %s
+
+// RUN: mlir-opt %s -convert-vector-to-scf=full-unroll=true -lower-affine -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm | \
// RUN: mlir-cpu-runner -e entry -entry-point-result=void \
// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \
// RUN: FileCheck %s
diff --git a/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-to-loops.mlir b/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-to-loops.mlir
index 9488534d3e93d..5fdaeafe54482 100644
--- a/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-to-loops.mlir
+++ b/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-to-loops.mlir
@@ -3,7 +3,7 @@
// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext,%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \
// RUN: FileCheck %s
-// RUN: mlir-opt %s -test-progressive-convert-vector-to-scf -lower-affine -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm | \
+// RUN: mlir-opt %s -convert-vector-to-scf=full-unroll=true -lower-affine -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm | \
// RUN: mlir-cpu-runner -e main -entry-point-result=void \
// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext,%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \
// RUN: FileCheck %s
diff --git a/mlir/test/lib/Transforms/TestVectorTransforms.cpp b/mlir/test/lib/Transforms/TestVectorTransforms.cpp
index d1ac5e1b994fe..d60f32d5f6cdf 100644
--- a/mlir/test/lib/Transforms/TestVectorTransforms.cpp
+++ b/mlir/test/lib/Transforms/TestVectorTransforms.cpp
@@ -9,7 +9,6 @@
#include <type_traits>
#include "mlir/Analysis/SliceAnalysis.h"
-#include "mlir/Conversion/VectorToSCF/ProgressiveVectorToSCF.h"
#include "mlir/Dialect/Affine/IR/AffineOps.h"
#include "mlir/Dialect/Linalg/IR/LinalgOps.h"
#include "mlir/Dialect/MemRef/IR/MemRef.h"
@@ -390,23 +389,6 @@ struct TestVectorMultiReductionLoweringPatterns
}
};
-template <bool Unroll>
-struct TestProgressiveVectorToSCFLoweringPatterns
- : public PassWrapper<TestProgressiveVectorToSCFLoweringPatterns<Unroll>,
- FunctionPass> {
- void getDependentDialects(DialectRegistry ®istry) const override {
- registry.insert<memref::MemRefDialect, scf::SCFDialect, AffineDialect>();
- }
- void runOnFunction() override {
- RewritePatternSet patterns(&this->getContext());
- ProgressiveVectorTransferToSCFOptions options;
- options.unroll = Unroll;
- populateProgressiveVectorToSCFConversionPatterns(patterns, options);
- (void)applyPatternsAndFoldGreedily(this->getFunction(),
- std::move(patterns));
- }
-};
-
} // end anonymous namespace
namespace mlir {
@@ -454,19 +436,6 @@ void registerTestVectorConversions() {
"test-vector-transfer-lowering-patterns",
"Test conversion patterns to lower transfer ops to other vector ops");
- PassRegistration<TestProgressiveVectorToSCFLoweringPatterns<
- /*Unroll=*/false>>
- transferOpToSCF("test-progressive-convert-vector-to-scf",
- "Test conversion patterns to progressively lower "
- "transfer ops to SCF");
-
- PassRegistration<TestProgressiveVectorToSCFLoweringPatterns<
- /*Unroll=*/true>>
- transferOpToSCFUnrolled(
- "test-unrolled-progressive-convert-vector-to-scf",
- "Test conversion patterns to progressively lower transfer ops to SCF"
- "(unrolled variant)");
-
PassRegistration<TestVectorMultiReductionLoweringPatterns>
multiDimReductionOpLoweringPass(
"test-vector-multi-reduction-lowering-patterns",
More information about the Mlir-commits
mailing list