[Mlir-commits] [mlir] 14d0735 - [MLIR][Affine][VectorOps] Utility to vectorize loop nest using strategy
Diego Caballero
llvmlistbot at llvm.org
Mon Sep 21 16:37:42 PDT 2020
Author: Diego Caballero
Date: 2020-09-21T16:28:28-07:00
New Revision: 14d0735d3453fb6403da916d7aee6a9f25af4147
URL: https://github.com/llvm/llvm-project/commit/14d0735d3453fb6403da916d7aee6a9f25af4147
DIFF: https://github.com/llvm/llvm-project/commit/14d0735d3453fb6403da916d7aee6a9f25af4147.diff
LOG: [MLIR][Affine][VectorOps] Utility to vectorize loop nest using strategy
This patch adds a utility based on SuperVectorizer to vectorize an
affine loop nest using a given vectorization strategy. This strategy allows
targeting specific loops for vectorization instead of relying of the
SuperVectorizer analysis to choose the right loops to vectorize.
Reviewed By: nicolasvasilache
Differential Revision: https://reviews.llvm.org/D85869
Added:
Modified:
mlir/include/mlir/Dialect/Affine/Utils.h
mlir/lib/Dialect/Affine/Transforms/SuperVectorize.cpp
mlir/test/Dialect/Affine/SuperVectorize/vectorize_1d.mlir
mlir/test/Dialect/Affine/SuperVectorize/vectorize_2d.mlir
Removed:
################################################################################
diff --git a/mlir/include/mlir/Dialect/Affine/Utils.h b/mlir/include/mlir/Dialect/Affine/Utils.h
index 2e563d9e3ba4..c83955eb0891 100644
--- a/mlir/include/mlir/Dialect/Affine/Utils.h
+++ b/mlir/include/mlir/Dialect/Affine/Utils.h
@@ -14,6 +14,8 @@
#define MLIR_DIALECT_AFFINE_UTILS_H
#include "mlir/Support/LLVM.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
namespace mlir {
@@ -34,6 +36,47 @@ void affineParallelize(AffineForOp forOp);
/// significant code expansion in some cases.
LogicalResult hoistAffineIfOp(AffineIfOp ifOp, bool *folded = nullptr);
+/// Holds parameters to perform n-D vectorization on a single loop nest.
+/// For example, for the following loop nest:
+///
+/// func @vec2d(%in: memref<64x128x512xf32>, %out: memref<64x128x512xf32>) {
+/// affine.for %i0 = 0 to 64 {
+/// affine.for %i1 = 0 to 128 {
+/// affine.for %i2 = 0 to 512 {
+/// %ld = affine.load %in[%i0, %i1, %i2] : memref<64x128x512xf32>
+/// affine.store %ld, %out[%i0, %i1, %i2] : memref<64x128x512xf32>
+/// }
+/// }
+/// }
+/// return
+/// }
+///
+/// and VectorizationStrategy = 'vectorSizes = {8, 4}', 'loopToVectorDim =
+/// {{i1->0}, {i2->1}}', SuperVectorizer will generate:
+///
+/// func @vec2d(%arg0: memref<64x128x512xf32>, %arg1: memref<64x128x512xf32>) {
+/// affine.for %arg2 = 0 to 64 {
+/// affine.for %arg3 = 0 to 128 step 8 {
+/// affine.for %arg4 = 0 to 512 step 4 {
+/// %cst = constant 0.000000e+00 : f32
+/// %0 = vector.transfer_read %arg0[%arg2, %arg3, %arg4], %cst : ...
+/// vector.transfer_write %0, %arg1[%arg2, %arg3, %arg4] : ...
+/// }
+/// }
+/// }
+/// return
+/// }
+// TODO: Hoist to a VectorizationStrategy.cpp when appropriate.
+struct VectorizationStrategy {
+ // Vectorization factors to apply to each target vector dimension.
+ // Each factor will be applied to a
diff erent loop.
+ SmallVector<int64_t, 8> vectorSizes;
+ // Maps each AffineForOp vectorization candidate with its vector dimension.
+ // The candidate will be vectorized using the vectorization factor in
+ // 'vectorSizes' for that dimension.
+ DenseMap<Operation *, unsigned> loopToVectorDim;
+};
+
/// Vectorizes affine loops in 'loops' using the n-D vectorization factors in
/// 'vectorSizes'. By default, each vectorization factor is applied
/// inner-to-outer to the loops of each loop nest. 'fastestVaryingPattern' can
@@ -43,6 +86,45 @@ void vectorizeAffineLoops(
llvm::DenseSet<Operation *, DenseMapInfo<Operation *>> &loops,
ArrayRef<int64_t> vectorSizes, ArrayRef<int64_t> fastestVaryingPattern);
+/// External utility to vectorize affine loops from a single loop nest using an
+/// n-D vectorization strategy (see doc in VectorizationStrategy definition).
+/// Loops are provided in a 2D vector container. The first dimension represents
+/// the nesting level relative to the loops to be vectorized. The second
+/// dimension contains the loops. This means that:
+/// a) every loop in 'loops[i]' must have a parent loop in 'loops[i-1]',
+/// b) a loop in 'loops[i]' may or may not have a child loop in 'loops[i+1]'.
+///
+/// For example, for the following loop nest:
+///
+/// func @vec2d(%in0: memref<64x128x512xf32>, %in1: memref<64x128x128xf32>,
+/// %out0: memref<64x128x512xf32>,
+/// %out1: memref<64x128x128xf32>) {
+/// affine.for %i0 = 0 to 64 {
+/// affine.for %i1 = 0 to 128 {
+/// affine.for %i2 = 0 to 512 {
+/// %ld = affine.load %in0[%i0, %i1, %i2] : memref<64x128x512xf32>
+/// affine.store %ld, %out0[%i0, %i1, %i2] : memref<64x128x512xf32>
+/// }
+/// affine.for %i3 = 0 to 128 {
+/// %ld = affine.load %in1[%i0, %i1, %i3] : memref<64x128x128xf32>
+/// affine.store %ld, %out1[%i0, %i1, %i3] : memref<64x128x128xf32>
+/// }
+/// }
+/// }
+/// return
+/// }
+///
+/// loops = {{%i0}, {%i2, %i3}}, to vectorize the outermost and the two
+/// innermost loops;
+/// loops = {{%i1}, {%i2, %i3}}, to vectorize the middle and the two innermost
+/// loops;
+/// loops = {{%i2}}, to vectorize only the first innermost loop;
+/// loops = {{%i3}}, to vectorize only the second innermost loop;
+/// loops = {{%i1}}, to vectorize only the middle loop.
+LogicalResult
+vectorizeAffineLoopNest(const std::vector<SmallVector<AffineForOp, 2>> &loops,
+ const VectorizationStrategy &strategy);
+
/// Normalize a affine.parallel op so that lower bounds are 0 and steps are 1.
/// As currently implemented, this transformation cannot fail and will return
/// early if the op is already in a normalized form.
diff --git a/mlir/lib/Dialect/Affine/Transforms/SuperVectorize.cpp b/mlir/lib/Dialect/Affine/Transforms/SuperVectorize.cpp
index ee52fe44830c..5cc65ecc7ef7 100644
--- a/mlir/lib/Dialect/Affine/Transforms/SuperVectorize.cpp
+++ b/mlir/lib/Dialect/Affine/Transforms/SuperVectorize.cpp
@@ -254,8 +254,8 @@ using namespace vector;
/// interference);
/// 3. Then, for each pattern in order:
/// a. applying iterative rewriting of the loop and the load operations in
-/// DFS postorder. Rewriting is implemented by coarsening the loops and
-/// turning load operations into opaque vector.transfer_read ops;
+/// inner-to-outer order. Rewriting is implemented by coarsening the loops
+/// and turning load operations into opaque vector.transfer_read ops;
/// b. keeping track of the load operations encountered as "roots" and the
/// store operations as "terminals";
/// c. traversing the use-def chains starting from the roots and iteratively
@@ -584,17 +584,6 @@ Vectorize::Vectorize(ArrayRef<int64_t> virtualVectorSize) {
vectorSizes = virtualVectorSize;
}
-/////// TODO: Hoist to a VectorizationStrategy.cpp when appropriate.
-/////////
-namespace {
-
-struct VectorizationStrategy {
- SmallVector<int64_t, 8> vectorSizes;
- DenseMap<Operation *, unsigned> loopToVectorDim;
-};
-
-} // end anonymous namespace
-
static void vectorizeLoopIfProfitable(Operation *loop, unsigned depthInPattern,
unsigned patternDepth,
VectorizationStrategy *strategy) {
@@ -857,44 +846,44 @@ isVectorizableLoopPtrFactory(const DenseSet<Operation *> ¶llelLoops,
};
}
-/// Apply vectorization of `loop` according to `state`. This is only triggered
-/// if all vectorizations in `childrenMatches` have already succeeded
-/// recursively in DFS post-order.
+/// Apply vectorization of `loop` according to `state`. `loops` are processed in
+/// inner-to-outer order to ensure that all the children loops have already been
+/// vectorized before vectorizing the parent loop.
static LogicalResult
-vectorizeLoopsAndLoadsRecursively(NestedMatch oneMatch,
- VectorizationState *state) {
- auto *loopInst = oneMatch.getMatchedOperation();
- auto loop = cast<AffineForOp>(loopInst);
- auto childrenMatches = oneMatch.getMatchedChildren();
-
- // 1. DFS postorder recursion, if any of my children fails, I fail too.
- for (auto m : childrenMatches) {
- if (failed(vectorizeLoopsAndLoadsRecursively(m, state))) {
- return failure();
- }
- }
+vectorizeLoopsAndLoads(std::vector<SmallVector<AffineForOp, 2>> &loops,
+ VectorizationState *state) {
+ // Vectorize loops in inner-to-outer order. If any children fails, the parent
+ // will fail too.
+ for (auto &loopsInLevel : llvm::reverse(loops)) {
+ for (AffineForOp loop : loopsInLevel) {
+ // 1. This loop may have been omitted from vectorization for various
+ // reasons (e.g. due to the performance model or pattern depth > vector
+ // size).
+ auto it = state->strategy->loopToVectorDim.find(loop.getOperation());
+ if (it == state->strategy->loopToVectorDim.end())
+ continue;
- // 2. This loop may have been omitted from vectorization for various reasons
- // (e.g. due to the performance model or pattern depth > vector size).
- auto it = state->strategy->loopToVectorDim.find(loopInst);
- if (it == state->strategy->loopToVectorDim.end()) {
- return success();
+ // 2. Actual inner-to-outer transformation.
+ auto vectorDim = it->second;
+ assert(vectorDim < state->strategy->vectorSizes.size() &&
+ "vector dim overflow");
+ // a. get actual vector size
+ auto vectorSize = state->strategy->vectorSizes[vectorDim];
+ // b. loop transformation for early vectorization is still subject to
+ // exploratory tradeoffs (see top of the file). Apply coarsening,
+ // i.e.:
+ // | ub -> ub
+ // | step -> step * vectorSize
+ LLVM_DEBUG(dbgs() << "\n[early-vect] vectorizeForOp by " << vectorSize
+ << " : \n"
+ << loop);
+ if (failed(
+ vectorizeAffineForOp(loop, loop.getStep() * vectorSize, state)))
+ return failure();
+ } // end for.
}
- // 3. Actual post-order transformation.
- auto vectorDim = it->second;
- assert(vectorDim < state->strategy->vectorSizes.size() &&
- "vector dim overflow");
- // a. get actual vector size
- auto vectorSize = state->strategy->vectorSizes[vectorDim];
- // b. loop transformation for early vectorization is still subject to
- // exploratory tradeoffs (see top of the file). Apply coarsening, i.e.:
- // | ub -> ub
- // | step -> step * vectorSize
- LLVM_DEBUG(dbgs() << "\n[early-vect] vectorizeForOp by " << vectorSize
- << " : ");
- LLVM_DEBUG(loopInst->print(dbgs()));
- return vectorizeAffineForOp(loop, loop.getStep() * vectorSize, state);
+ return success();
}
/// Tries to transform a scalar constant into a vector splat of that constant.
@@ -1145,16 +1134,46 @@ static LogicalResult vectorizeNonTerminals(VectorizationState *state) {
return success();
}
-/// Vectorization is a recursive procedure where anything below can fail.
-/// The root match thus needs to maintain a clone for handling failure.
-/// Each root may succeed independently but will otherwise clean after itself if
-/// anything below it fails.
-static LogicalResult vectorizeRootMatch(NestedMatch m,
- VectorizationStrategy *strategy) {
- auto loop = cast<AffineForOp>(m.getMatchedOperation());
- OperationFolder folder(loop.getContext());
+/// Recursive implementation to convert all the nested loops in 'match' to a 2D
+/// vector container that preserves the relative nesting level of each loop with
+/// respect to the others in 'match'. 'currentLevel' is the nesting level that
+/// will be assigned to the loop in the current 'match'.
+static void
+getMatchedAffineLoopsRec(NestedMatch match, unsigned currentLevel,
+ std::vector<SmallVector<AffineForOp, 2>> &loops) {
+ // Add a new empty level to the output if it doesn't exist already.
+ assert(currentLevel <= loops.size() && "Unexpected currentLevel");
+ if (currentLevel == loops.size())
+ loops.push_back(SmallVector<AffineForOp, 2>());
+
+ // Add current match and recursively visit its children.
+ loops[currentLevel].push_back(cast<AffineForOp>(match.getMatchedOperation()));
+ for (auto childMatch : match.getMatchedChildren()) {
+ getMatchedAffineLoopsRec(childMatch, currentLevel + 1, loops);
+ }
+}
+
+/// Converts all the nested loops in 'match' to a 2D vector container that
+/// preserves the relative nesting level of each loop with respect to the others
+/// in 'match'. This means that every loop in 'loops[i]' will have a parent loop
+/// in 'loops[i-1]'. A loop in 'loops[i]' may or may not have a child loop in
+/// 'loops[i+1]'.
+static void
+getMatchedAffineLoops(NestedMatch match,
+ std::vector<SmallVector<AffineForOp, 2>> &loops) {
+ getMatchedAffineLoopsRec(match, /*currLoopDepth=*/0, loops);
+}
+
+/// Internal implementation to vectorize affine loops from a single loop nest
+/// using an n-D vectorization strategy.
+static LogicalResult
+vectorizeLoopNest(std::vector<SmallVector<AffineForOp, 2>> &loops,
+ const VectorizationStrategy &strategy) {
+ assert(loops[0].size() == 1 && "Expected single root loop");
+ AffineForOp rootLoop = loops[0][0];
+ OperationFolder folder(rootLoop.getContext());
VectorizationState state;
- state.strategy = strategy;
+ state.strategy = &strategy;
state.folder = &folder;
// Since patterns are recursive, they can very well intersect.
@@ -1164,7 +1183,7 @@ static LogicalResult vectorizeRootMatch(NestedMatch m,
// vectorizable. If a pattern is not vectorizable anymore, we just skip it.
// TODO: implement a non-greedy profitability analysis that keeps only
// non-intersecting patterns.
- if (!isVectorizableLoopBody(loop, vectorTransferPattern())) {
+ if (!isVectorizableLoopBody(rootLoop, vectorTransferPattern())) {
LLVM_DEBUG(dbgs() << "\n[early-vect]+++++ loop is not vectorizable");
return failure();
}
@@ -1172,7 +1191,7 @@ static LogicalResult vectorizeRootMatch(NestedMatch m,
/// Sets up error handling for this root loop. This is how the root match
/// maintains a clone for handling failure and restores the proper state via
/// RAII.
- auto *loopInst = loop.getOperation();
+ auto *loopInst = rootLoop.getOperation();
OpBuilder builder(loopInst);
auto clonedLoop = cast<AffineForOp>(builder.clone(*loopInst));
struct Guard {
@@ -1187,17 +1206,17 @@ static LogicalResult vectorizeRootMatch(NestedMatch m,
}
AffineForOp loop;
AffineForOp clonedLoop;
- } guard{loop, clonedLoop};
+ } guard{rootLoop, clonedLoop};
//////////////////////////////////////////////////////////////////////////////
// Start vectorizing.
// From now on, any error triggers the scope guard above.
//////////////////////////////////////////////////////////////////////////////
- // 1. Vectorize all the loops matched by the pattern, recursively.
+ // 1. Vectorize all the loop candidates, in inner-to-outer order.
// This also vectorizes the roots (AffineLoadOp) as well as registers the
// terminals (AffineStoreOp) for post-processing vectorization (we need to
// wait for all use-def chains into them to be vectorized first).
- if (failed(vectorizeLoopsAndLoadsRecursively(m, &state))) {
+ if (failed(vectorizeLoopsAndLoads(loops, &state))) {
LLVM_DEBUG(dbgs() << "\n[early-vect]+++++ failed root vectorizeLoop");
return guard.failure();
}
@@ -1229,38 +1248,25 @@ static LogicalResult vectorizeRootMatch(NestedMatch m,
return guard.success();
}
-/// Applies vectorization to the current Function by searching over a bunch of
-/// predetermined patterns.
-void Vectorize::runOnFunction() {
- FuncOp f = getFunction();
- if (!fastestVaryingPattern.empty() &&
- fastestVaryingPattern.size() != vectorSizes.size()) {
- f.emitRemark("Fastest varying pattern specified with
diff erent size than "
- "the vector size.");
- return signalPassFailure();
- }
-
- DenseSet<Operation *> parallelLoops;
- f.walk([¶llelLoops](AffineForOp loop) {
- if (isLoopParallel(loop))
- parallelLoops.insert(loop);
- });
-
- vectorizeAffineLoops(f, parallelLoops, vectorSizes, fastestVaryingPattern);
+/// Vectorization is a recursive procedure where anything below can fail. The
+/// root match thus needs to maintain a clone for handling failure. Each root
+/// may succeed independently but will otherwise clean after itself if anything
+/// below it fails.
+static LogicalResult vectorizeRootMatch(NestedMatch m,
+ const VectorizationStrategy &strategy) {
+ std::vector<SmallVector<AffineForOp, 2>> loopsToVectorize;
+ getMatchedAffineLoops(m, loopsToVectorize);
+ return vectorizeLoopNest(loopsToVectorize, strategy);
}
-namespace mlir {
-
-/// Vectorizes affine loops in 'loops' using the n-D vectorization factors in
-/// 'vectorSizes'. By default, each vectorization factor is applied
-/// inner-to-outer to the loops of each loop nest. 'fastestVaryingPattern' can
-/// be optionally used to provide a
diff erent loop vectorization order.
-void vectorizeAffineLoops(Operation *parentOp, DenseSet<Operation *> &loops,
- ArrayRef<int64_t> vectorSizes,
- ArrayRef<int64_t> fastestVaryingPattern) {
- // Thread-safe RAII local context, BumpPtrAllocator freed on exit.
- NestedPatternContext mlContext;
-
+/// Internal implementation to vectorize affine loops in 'loops' using the n-D
+/// vectorization factors in 'vectorSizes'. By default, each vectorization
+/// factor is applied inner-to-outer to the loops of each loop nest.
+/// 'fastestVaryingPattern' can be optionally used to provide a
diff erent loop
+/// vectorization order.
+static void vectorizeLoops(Operation *parentOp, DenseSet<Operation *> &loops,
+ ArrayRef<int64_t> vectorSizes,
+ ArrayRef<int64_t> fastestVaryingPattern) {
for (auto &pat :
makePatterns(loops, vectorSizes.size(), fastestVaryingPattern)) {
LLVM_DEBUG(dbgs() << "\n******************************************");
@@ -1286,7 +1292,7 @@ void vectorizeAffineLoops(Operation *parentOp, DenseSet<Operation *> &loops,
&strategy);
// TODO: if pattern does not apply, report it; alter the
// cost/benefit.
- vectorizeRootMatch(m, &strategy);
+ vectorizeRootMatch(m, strategy);
// TODO: some diagnostics if failure to vectorize occurs.
}
}
@@ -1301,4 +1307,127 @@ std::unique_ptr<OperationPass<FuncOp>> createSuperVectorizePass() {
return std::make_unique<Vectorize>();
}
+/// Applies vectorization to the current function by searching over a bunch of
+/// predetermined patterns.
+void Vectorize::runOnFunction() {
+ FuncOp f = getFunction();
+ if (!fastestVaryingPattern.empty() &&
+ fastestVaryingPattern.size() != vectorSizes.size()) {
+ f.emitRemark("Fastest varying pattern specified with
diff erent size than "
+ "the vector size.");
+ return signalPassFailure();
+ }
+
+ DenseSet<Operation *> parallelLoops;
+ f.walk([¶llelLoops](AffineForOp loop) {
+ if (isLoopParallel(loop))
+ parallelLoops.insert(loop);
+ });
+
+ // Thread-safe RAII local context, BumpPtrAllocator freed on exit.
+ NestedPatternContext mlContext;
+ vectorizeLoops(f, parallelLoops, vectorSizes, fastestVaryingPattern);
+}
+
+/// Verify that affine loops in 'loops' meet the nesting criteria expected by
+/// SuperVectorizer:
+/// * There must be at least one loop.
+/// * There must be a single root loop (nesting level 0).
+/// * Each loop at a given nesting level must be nested in a loop from a
+/// previous nesting level.
+static void
+verifyLoopNesting(const std::vector<SmallVector<AffineForOp, 2>> &loops) {
+ assert(!loops.empty() && "Expected at least one loop");
+ assert(!loops[0].size() && "Expected only one root loop");
+
+ // Traverse loops outer-to-inner to check some invariants.
+ for (int i = 1, end = loops.size(); i < end; ++i) {
+ for (AffineForOp loop : loops[i]) {
+ // Check that each loop at this level is nested in one of the loops from
+ // the previous level.
+ bool parentFound = false;
+ for (AffineForOp maybeParent : loops[i - 1]) {
+ if (maybeParent.getOperation()->isProperAncestor(loop)) {
+ parentFound = true;
+ break;
+ }
+ }
+ assert(parentFound && "Child loop not nested in any parent loop");
+
+ // Check that each loop at this level is not nested in another loop from
+ // this level.
+ for (AffineForOp sibling : loops[i])
+ assert(!sibling.getOperation()->isProperAncestor(loop) &&
+ "Loops at the same level are nested");
+ }
+ }
+}
+
+namespace mlir {
+
+/// External utility to vectorize affine loops in 'loops' using the n-D
+/// vectorization factors in 'vectorSizes'. By default, each vectorization
+/// factor is applied inner-to-outer to the loops of each loop nest.
+/// 'fastestVaryingPattern' can be optionally used to provide a
diff erent loop
+/// vectorization order.
+void vectorizeAffineLoops(Operation *parentOp, DenseSet<Operation *> &loops,
+ ArrayRef<int64_t> vectorSizes,
+ ArrayRef<int64_t> fastestVaryingPattern) {
+ // Thread-safe RAII local context, BumpPtrAllocator freed on exit.
+ NestedPatternContext mlContext;
+ vectorizeLoops(parentOp, loops, vectorSizes, fastestVaryingPattern);
+}
+
+/// External utility to vectorize affine loops from a single loop nest using an
+/// n-D vectorization strategy (see doc in VectorizationStrategy definition).
+/// Loops are provided in a 2D vector container. The first dimension represents
+/// the nesting level relative to the loops to be vectorized. The second
+/// dimension contains the loops. This means that:
+/// a) every loop in 'loops[i]' must have a parent loop in 'loops[i-1]',
+/// b) a loop in 'loops[i]' may or may not have a child loop in 'loops[i+1]'.
+///
+/// For example, for the following loop nest:
+///
+/// func @vec2d(%in0: memref<64x128x512xf32>, %in1: memref<64x128x128xf32>,
+/// %out0: memref<64x128x512xf32>,
+/// %out1: memref<64x128x128xf32>) {
+/// affine.for %i0 = 0 to 64 {
+/// affine.for %i1 = 0 to 128 {
+/// affine.for %i2 = 0 to 512 {
+/// %ld = affine.load %in0[%i0, %i1, %i2] : memref<64x128x512xf32>
+/// affine.store %ld, %out0[%i0, %i1, %i2] : memref<64x128x512xf32>
+/// }
+/// affine.for %i3 = 0 to 128 {
+/// %ld = affine.load %in1[%i0, %i1, %i3] : memref<64x128x128xf32>
+/// affine.store %ld, %out1[%i0, %i1, %i3] : memref<64x128x128xf32>
+/// }
+/// }
+/// }
+/// return
+/// }
+///
+/// loops = {{%i0}, {%i2, %i3}}, to vectorize the outermost and the two
+/// innermost loops;
+/// loops = {{%i1}, {%i2, %i3}}, to vectorize the middle and the two innermost
+/// loops;
+/// loops = {{%i2}}, to vectorize only the first innermost loop;
+/// loops = {{%i3}}, to vectorize only the second innermost loop;
+/// loops = {{%i1}}, to vectorize only the middle loop.
+LogicalResult
+vectorizeAffineLoopNest(std::vector<SmallVector<AffineForOp, 2>> &loops,
+ const VectorizationStrategy &strategy) {
+ // Thread-safe RAII local context, BumpPtrAllocator freed on exit.
+ NestedPatternContext mlContext;
+ verifyLoopNesting(loops);
+ return vectorizeLoopNest(loops, strategy);
+}
+
+std::unique_ptr<OperationPass<FuncOp>>
+createSuperVectorizePass(ArrayRef<int64_t> virtualVectorSize) {
+ return std::make_unique<Vectorize>(virtualVectorSize);
+}
+std::unique_ptr<OperationPass<FuncOp>> createSuperVectorizePass() {
+ return std::make_unique<Vectorize>();
+}
+
} // namespace mlir
diff --git a/mlir/test/Dialect/Affine/SuperVectorize/vectorize_1d.mlir b/mlir/test/Dialect/Affine/SuperVectorize/vectorize_1d.mlir
index 66429907205e..ca496b75432c 100644
--- a/mlir/test/Dialect/Affine/SuperVectorize/vectorize_1d.mlir
+++ b/mlir/test/Dialect/Affine/SuperVectorize/vectorize_1d.mlir
@@ -1,7 +1,8 @@
// RUN: mlir-opt %s -affine-super-vectorize="virtual-vector-size=128 test-fastest-varying=0" | FileCheck %s
// Permutation maps used in vectorization.
-// CHECK: #[[$map_proj_d0d1_0:map[0-9]+]] = affine_map<(d0, d1) -> (0)>
+// CHECK-DAG: #[[$map_proj_d0d1_0:map[0-9]+]] = affine_map<(d0, d1) -> (0)>
+// CHECK-DAG: #[[$map_id1:map[0-9]+]] = affine_map<(d0) -> (d0)>
#map0 = affine_map<(d0) -> (d0)>
#mapadd1 = affine_map<(d0) -> (d0 + 1)>
@@ -26,8 +27,8 @@ func @vec1d_1(%A : memref<?x?xf32>, %B : memref<?x?x?xf32>) {
%P = dim %B, %c2 : memref<?x?x?xf32>
// CHECK: for {{.*}} step 128
-// CHECK-NEXT: %{{.*}} = affine.apply #map0(%[[C0]])
-// CHECK-NEXT: %{{.*}} = affine.apply #map0(%[[C0]])
+// CHECK-NEXT: %{{.*}} = affine.apply #[[$map_id1]](%[[C0]])
+// CHECK-NEXT: %{{.*}} = affine.apply #[[$map_id1]](%[[C0]])
// CHECK-NEXT: %{{.*}} = constant 0.0{{.*}}: f32
// CHECK-NEXT: {{.*}} = vector.transfer_read %{{.*}}[%{{.*}}, %{{.*}}], %{{.*}} {permutation_map = #[[$map_proj_d0d1_0]]} : memref<?x?xf32>, vector<128xf32>
affine.for %i0 = 0 to %M { // vectorized due to scalar -> vector
@@ -331,8 +332,8 @@ func @vec_rejected_8(%A : memref<?x?xf32>, %B : memref<?x?x?xf32>) {
// CHECK: affine.for %{{.*}}{{[0-9]*}} = 0 to %{{[0-9]*}} {
// CHECK: for [[IV18:%[a-zA-Z0-9]+]] = 0 to [[ARG_M]] step 128
-// CHECK: %{{.*}} = affine.apply #map0(%{{.*}})
-// CHECK: %{{.*}} = affine.apply #map0(%{{.*}})
+// CHECK: %{{.*}} = affine.apply #[[$map_id1]](%{{.*}})
+// CHECK: %{{.*}} = affine.apply #[[$map_id1]](%{{.*}})
// CHECK: %{{.*}} = constant 0.0{{.*}}: f32
// CHECK: {{.*}} = vector.transfer_read %{{.*}}[%{{.*}}, %{{.*}}], %{{.*}} {permutation_map = #[[$map_proj_d0d1_0]]} : memref<?x?xf32>, vector<128xf32>
affine.for %i17 = 0 to %M { // not vectorized, the 1-D pattern that matched %{{.*}} in DFS post-order prevents vectorizing %{{.*}}
@@ -360,8 +361,8 @@ func @vec_rejected_9(%A : memref<?x?xf32>, %B : memref<?x?x?xf32>) {
// CHECK: affine.for %{{.*}}{{[0-9]*}} = 0 to %{{[0-9]*}} {
// CHECK: for [[IV18:%[a-zA-Z0-9]+]] = 0 to [[ARG_M]] step 128
-// CHECK: %{{.*}} = affine.apply #map0(%{{.*}})
-// CHECK-NEXT: %{{.*}} = affine.apply #map0(%{{.*}})
+// CHECK: %{{.*}} = affine.apply #[[$map_id1]](%{{.*}})
+// CHECK-NEXT: %{{.*}} = affine.apply #[[$map_id1]](%{{.*}})
// CHECK-NEXT: %{{.*}} = constant 0.0{{.*}}: f32
// CHECK-NEXT: {{.*}} = vector.transfer_read %{{.*}}[%{{.*}}, %{{.*}}], %{{.*}} {permutation_map = #[[$map_proj_d0d1_0]]} : memref<?x?xf32>, vector<128xf32>
affine.for %i17 = 0 to %M { // not vectorized, the 1-D pattern that matched %i18 in DFS post-order prevents vectorizing %{{.*}}
diff --git a/mlir/test/Dialect/Affine/SuperVectorize/vectorize_2d.mlir b/mlir/test/Dialect/Affine/SuperVectorize/vectorize_2d.mlir
index f2cd769a7cc1..0cf945ee8199 100644
--- a/mlir/test/Dialect/Affine/SuperVectorize/vectorize_2d.mlir
+++ b/mlir/test/Dialect/Affine/SuperVectorize/vectorize_2d.mlir
@@ -124,7 +124,7 @@ func @vectorize_matmul(%arg0: memref<?x?xf32>, %arg1: memref<?x?xf32>, %arg2: me
}
// VECT: affine.for %[[I2:.*]] = #[[$map_id1]](%[[C0]]) to #[[$map_id1]](%[[M]]) step 4 {
// VECT-NEXT: affine.for %[[I3:.*]] = #[[$map_id1]](%[[C0]]) to #[[$map_id1]](%[[N]]) step 8 {
- // VECT-NEXT: affine.for %[[I4:.*]] = #map5(%[[C0]]) to #[[$map_id1]](%[[K]]) {
+ // VECT-NEXT: affine.for %[[I4:.*]] = #[[$map_id1]](%[[C0]]) to #[[$map_id1]](%[[K]]) {
// VECT: %[[A:.*]] = vector.transfer_read %{{.*}}[%[[I4]], %[[I3]]], %{{.*}} {permutation_map = #[[$map_proj_d0d1_zerod1]]} : memref<?x?xf32>, vector<4x8xf32>
// VECT: %[[B:.*]] = vector.transfer_read %{{.*}}[%[[I2]], %[[I4]]], %{{.*}} {permutation_map = #[[$map_proj_d0d1_d0zero]]} : memref<?x?xf32>, vector<4x8xf32>
// VECT-NEXT: %[[C:.*]] = mulf %[[B]], %[[A]] : vector<4x8xf32>
More information about the Mlir-commits
mailing list