[Mlir-commits] [mlir] 0f3544d - [mlir][scf] Loop peeling: Use scf.for for partial iteration
Matthias Springer
llvmlistbot at llvm.org
Fri Sep 10 03:13:53 PDT 2021
Author: Matthias Springer
Date: 2021-09-10T19:07:09+09:00
New Revision: 0f3544d1856dee83d6e18187098531d79e7f200e
URL: https://github.com/llvm/llvm-project/commit/0f3544d1856dee83d6e18187098531d79e7f200e
DIFF: https://github.com/llvm/llvm-project/commit/0f3544d1856dee83d6e18187098531d79e7f200e.diff
LOG: [mlir][scf] Loop peeling: Use scf.for for partial iteration
Generate an scf.for instead of an scf.if for the partial iteration. This is for consistency reasons: The peeling of linalg.tiled_loop also uses another loop for the partial iteration.
Note: Canonicalizations patterns may rewrite partial iterations to scf.if afterwards.
Differential Revision: https://reviews.llvm.org/D109568
Added:
Modified:
mlir/include/mlir/Dialect/SCF/Transforms.h
mlir/lib/Dialect/SCF/Transforms/LoopSpecialization.cpp
mlir/test/Dialect/SCF/for-loop-peeling.mlir
mlir/test/Dialect/SparseTensor/sparse_vector_peeled.mlir
Removed:
################################################################################
diff --git a/mlir/include/mlir/Dialect/SCF/Transforms.h b/mlir/include/mlir/Dialect/SCF/Transforms.h
index 4cbf43e42111b..b8aebcb9a1900 100644
--- a/mlir/include/mlir/Dialect/SCF/Transforms.h
+++ b/mlir/include/mlir/Dialect/SCF/Transforms.h
@@ -68,8 +68,8 @@ void naivelyFuseParallelOps(Region ®ion);
/// Rewrite a for loop with bounds/step that potentially do not divide evenly
/// into a for loop where the step divides the iteration space evenly, followed
-/// by an scf.if for the last (partial) iteration (if any; returned via `ifOp`).
-/// This transformation is called "loop peeling".
+/// by another scf.for for the last (partial) iteration (if any; returned via
+/// `partialIteration`). This transformation is called "loop peeling".
///
/// This transformation is beneficial for a wide range of transformations such
/// as vectorization or loop tiling: It enables additional canonicalizations
@@ -88,16 +88,16 @@ void naivelyFuseParallelOps(Region ®ion);
/// scf.for %iv = %c0 to %newUb step %c4 {
/// (loop body)
/// }
-/// scf.if %newUb < %ub {
+/// scf.for %iv2 = %newUb to %ub {
/// (loop body)
/// }
/// ```
///
/// After loop peeling, this function tries to simplify/canonicalize affine.min
-/// and affine.max ops in the body of the loop and the scf.if, taking advantage
-/// of the fact that the peeled loop has only "full" iterations and the scf.if
-/// is always a partial iteration (if any). This canonicalization is expected to
-/// enable further canonicalization opportunities through other patterns.
+/// and affine.max ops in the body of the peeled loop and in the body of the
+/// partial iteration loop, taking advantage of the fact that the peeled loop
+/// has only "full" iterations. This canonicalization is expected to enable
+/// further canonicalization opportunities through other patterns.
///
/// The return value indicates whether the loop was rewritten or not. Loops are
/// not rewritten if:
@@ -106,10 +106,10 @@ void naivelyFuseParallelOps(Region ®ion);
/// iteration space evenly.
///
/// Note: This function rewrites the given scf.for loop in-place and creates a
-/// new scf.if operation for the last iteration. It replaces all uses of the
-/// unpeeled loop with the results of the newly generated scf.if.
+/// new scf.for operation for the last iteration. It replaces all uses of the
+/// unpeeled loop with the results of the newly generated scf.for.
LogicalResult peelAndCanonicalizeForLoop(RewriterBase &rewriter, ForOp forOp,
- scf::IfOp &ifOp);
+ scf::ForOp &partialIteration);
/// Try to simplify a min/max operation `op` after loop peeling. This function
/// can simplify min/max operations such as (ub is the previous upper bound of
diff --git a/mlir/lib/Dialect/SCF/Transforms/LoopSpecialization.cpp b/mlir/lib/Dialect/SCF/Transforms/LoopSpecialization.cpp
index fa6b5bcadfc4b..f29e719f684b7 100644
--- a/mlir/lib/Dialect/SCF/Transforms/LoopSpecialization.cpp
+++ b/mlir/lib/Dialect/SCF/Transforms/LoopSpecialization.cpp
@@ -106,8 +106,8 @@ static void specializeForLoopForUnrolling(ForOp op) {
/// The newly generated scf.if operation is returned via `ifOp`. The boundary
/// at which the loop is split (new upper bound) is returned via `splitBound`.
/// The return value indicates whether the loop was rewritten or not.
-static LogicalResult peelForLoop(RewriterBase &b, ForOp forOp, scf::IfOp &ifOp,
- Value &splitBound) {
+static LogicalResult peelForLoop(RewriterBase &b, ForOp forOp,
+ ForOp &partialIteration, Value &splitBound) {
RewriterBase::InsertionGuard guard(b);
auto lbInt = getConstantIntValue(forOp.lowerBound());
auto ubInt = getConstantIntValue(forOp.upperBound());
@@ -130,34 +130,16 @@ static LogicalResult peelForLoop(RewriterBase &b, ForOp forOp, scf::IfOp &ifOp,
loc, modMap,
ValueRange{forOp.lowerBound(), forOp.upperBound(), forOp.step()});
+ // Create ForOp for partial iteration.
+ b.setInsertionPointAfter(forOp);
+ partialIteration = cast<ForOp>(b.clone(*forOp.getOperation()));
+ partialIteration.lowerBoundMutable().assign(splitBound);
+ forOp.replaceAllUsesWith(partialIteration->getResults());
+ partialIteration.initArgsMutable().assign(forOp->getResults());
+
// Set new upper loop bound.
- Value previousUb = forOp.upperBound();
b.updateRootInPlace(forOp,
[&]() { forOp.upperBoundMutable().assign(splitBound); });
- b.setInsertionPointAfter(forOp);
-
- // Do we need one more iteration?
- Value hasMoreIter =
- b.create<CmpIOp>(loc, CmpIPredicate::slt, splitBound, previousUb);
-
- // Create IfOp for last iteration.
- auto resultTypes = forOp.getResultTypes();
- ifOp = b.create<scf::IfOp>(loc, resultTypes, hasMoreIter,
- /*withElseRegion=*/!resultTypes.empty());
- forOp.replaceAllUsesWith(ifOp->getResults());
-
- // Build then case.
- BlockAndValueMapping bvm;
- bvm.map(forOp.region().getArgument(0), splitBound);
- for (auto it : llvm::zip(forOp.getRegionIterArgs(), forOp->getResults())) {
- bvm.map(std::get<0>(it), std::get<1>(it));
- }
- b.cloneRegionBefore(forOp.region(), ifOp.thenRegion(),
- ifOp.thenRegion().begin(), bvm);
- // Build else case.
- if (!resultTypes.empty())
- ifOp.getElseBodyBuilder(b.getListener())
- .create<scf::YieldOp>(loc, forOp->getResults());
return success();
}
@@ -370,37 +352,43 @@ LogicalResult mlir::scf::rewritePeeledMinMaxOp(RewriterBase &rewriter,
}
template <typename OpTy, bool IsMin>
-static void
-rewriteAffineOpAfterPeeling(RewriterBase &rewriter, ForOp forOp, scf::IfOp ifOp,
- Value iv, Value splitBound, Value ub, Value step) {
+static void rewriteAffineOpAfterPeeling(RewriterBase &rewriter, ForOp forOp,
+ ForOp partialIteration,
+ Value previousUb) {
+ Value mainIv = forOp.getInductionVar();
+ Value partialIv = partialIteration.getInductionVar();
+ assert(forOp.step() == partialIteration.step() &&
+ "expected same step in main and partial loop");
+ Value step = forOp.step();
+
forOp.walk([&](OpTy affineOp) {
AffineMap map = affineOp.getAffineMap();
(void)scf::rewritePeeledMinMaxOp(rewriter, affineOp, map,
- affineOp.operands(), IsMin, iv, ub, step,
+ affineOp.operands(), IsMin, mainIv,
+ previousUb, step,
/*insideLoop=*/true);
});
- ifOp.walk([&](OpTy affineOp) {
+ partialIteration.walk([&](OpTy affineOp) {
AffineMap map = affineOp.getAffineMap();
(void)scf::rewritePeeledMinMaxOp(rewriter, affineOp, map,
- affineOp.operands(), IsMin, splitBound, ub,
- step, /*insideLoop=*/false);
+ affineOp.operands(), IsMin, partialIv,
+ previousUb, step, /*insideLoop=*/false);
});
}
LogicalResult mlir::scf::peelAndCanonicalizeForLoop(RewriterBase &rewriter,
ForOp forOp,
- scf::IfOp &ifOp) {
- Value ub = forOp.upperBound();
+ ForOp &partialIteration) {
+ Value previousUb = forOp.upperBound();
Value splitBound;
- if (failed(peelForLoop(rewriter, forOp, ifOp, splitBound)))
+ if (failed(peelForLoop(rewriter, forOp, partialIteration, splitBound)))
return failure();
// Rewrite affine.min and affine.max ops.
- Value iv = forOp.getInductionVar(), step = forOp.step();
rewriteAffineOpAfterPeeling<AffineMinOp, /*IsMin=*/true>(
- rewriter, forOp, ifOp, iv, splitBound, ub, step);
+ rewriter, forOp, partialIteration, previousUb);
rewriteAffineOpAfterPeeling<AffineMaxOp, /*IsMin=*/false>(
- rewriter, forOp, ifOp, iv, splitBound, ub, step);
+ rewriter, forOp, partialIteration, previousUb);
return success();
}
@@ -491,23 +479,24 @@ struct ForLoopPeelingPattern : public OpRewritePattern<ForOp> {
if (forOp->hasAttr(kPeeledLoopLabel))
return failure();
if (skipPartial) {
- // No peeling of loops inside the partial iteration (scf.if) of another
- // peeled loop.
+ // No peeling of loops inside the partial iteration of another peeled
+ // loop.
Operation *op = forOp.getOperation();
- while ((op = op->getParentOfType<scf::IfOp>())) {
+ while ((op = op->getParentOfType<scf::ForOp>())) {
if (op->hasAttr(kPartialIterationLabel))
return failure();
}
}
// Apply loop peeling.
- scf::IfOp ifOp;
- if (failed(peelAndCanonicalizeForLoop(rewriter, forOp, ifOp)))
+ scf::ForOp partialIteration;
+ if (failed(peelAndCanonicalizeForLoop(rewriter, forOp, partialIteration)))
return failure();
// Apply label, so that the same loop is not rewritten a second time.
+ partialIteration->setAttr(kPeeledLoopLabel, rewriter.getUnitAttr());
rewriter.updateRootInPlace(forOp, [&]() {
forOp->setAttr(kPeeledLoopLabel, rewriter.getUnitAttr());
});
- ifOp->setAttr(kPartialIterationLabel, rewriter.getUnitAttr());
+ partialIteration->setAttr(kPartialIterationLabel, rewriter.getUnitAttr());
return success();
}
diff --git a/mlir/test/Dialect/SCF/for-loop-peeling.mlir b/mlir/test/Dialect/SCF/for-loop-peeling.mlir
index beb72e468fbef..7cdb95808158d 100644
--- a/mlir/test/Dialect/SCF/for-loop-peeling.mlir
+++ b/mlir/test/Dialect/SCF/for-loop-peeling.mlir
@@ -2,7 +2,7 @@
// RUN: mlir-opt %s -for-loop-peeling=skip-partial=false -canonicalize -split-input-file | FileCheck %s -check-prefix=CHECK-NO-SKIP
// CHECK-DAG: #[[MAP0:.*]] = affine_map<()[s0, s1, s2] -> (s1 - (s1 - s0) mod s2)>
-// CHECK-DAG: #[[MAP1:.*]] = affine_map<()[s0, s1, s2] -> (-(s0 - (s0 - s1) mod s2) + s0)>
+// CHECK-DAG: #[[MAP1:.*]] = affine_map<(d0)[s0] -> (-d0 + s0)>
// CHECK: func @fully_dynamic_bounds(
// CHECK-SAME: %[[LB:.*]]: index, %[[UB:.*]]: index, %[[STEP:.*]]: index
// CHECK: %[[C0_I32:.*]] = constant 0 : i32
@@ -13,14 +13,12 @@
// CHECK: %[[ADD:.*]] = addi %[[ACC]], %[[CAST]] : i32
// CHECK: scf.yield %[[ADD]]
// CHECK: }
-// CHECK: %[[HAS_MORE:.*]] = cmpi slt, %[[NEW_UB]], %[[UB]]
-// CHECK: %[[RESULT:.*]] = scf.if %[[HAS_MORE]] -> (i32) {
-// CHECK: %[[REM:.*]] = affine.apply #[[MAP1]]()[%[[UB]], %[[LB]], %[[STEP]]]
+// CHECK: %[[RESULT:.*]] = scf.for %[[IV2:.*]] = %[[NEW_UB]] to %[[UB]]
+// CHECK-SAME: step %[[STEP]] iter_args(%[[ACC2:.*]] = %[[LOOP]]) -> (i32) {
+// CHECK: %[[REM:.*]] = affine.apply #[[MAP1]](%[[IV2]])[%[[UB]]]
// CHECK: %[[CAST2:.*]] = index_cast %[[REM]]
-// CHECK: %[[ADD2:.*]] = addi %[[LOOP]], %[[CAST2]]
+// CHECK: %[[ADD2:.*]] = addi %[[ACC2]], %[[CAST2]]
// CHECK: scf.yield %[[ADD2]]
-// CHECK: } else {
-// CHECK: scf.yield %[[LOOP]]
// CHECK: }
// CHECK: return %[[RESULT]]
#map = affine_map<(d0, d1)[s0] -> (s0, d0 - d1)>
@@ -70,7 +68,7 @@ func @fully_static_bounds() -> i32 {
// -----
// CHECK-DAG: #[[MAP0:.*]] = affine_map<()[s0] -> ((s0 floordiv 4) * 4)>
-// CHECK-DAG: #[[MAP1:.*]] = affine_map<()[s0] -> (s0 mod 4)>
+// CHECK-DAG: #[[MAP1:.*]] = affine_map<(d0)[s0] -> (-d0 + s0)>
// CHECK: func @dynamic_upper_bound(
// CHECK-SAME: %[[UB:.*]]: index
// CHECK-DAG: %[[C0_I32:.*]] = constant 0 : i32
@@ -83,14 +81,12 @@ func @fully_static_bounds() -> i32 {
// CHECK: %[[ADD:.*]] = addi %[[ACC]], %[[C4_I32]] : i32
// CHECK: scf.yield %[[ADD]]
// CHECK: }
-// CHECK: %[[HAS_MORE:.*]] = cmpi slt, %[[NEW_UB]], %[[UB]]
-// CHECK: %[[RESULT:.*]] = scf.if %[[HAS_MORE]] -> (i32) {
-// CHECK: %[[REM:.*]] = affine.apply #[[MAP1]]()[%[[UB]]]
+// CHECK: %[[RESULT:.*]] = scf.for %[[IV2:.*]] = %[[NEW_UB]] to %[[UB]]
+// CHECK-SAME: step %[[C4]] iter_args(%[[ACC2:.*]] = %[[LOOP]]) -> (i32) {
+// CHECK: %[[REM:.*]] = affine.apply #[[MAP1]](%[[IV2]])[%[[UB]]]
// CHECK: %[[CAST2:.*]] = index_cast %[[REM]]
-// CHECK: %[[ADD2:.*]] = addi %[[LOOP]], %[[CAST2]]
+// CHECK: %[[ADD2:.*]] = addi %[[ACC2]], %[[CAST2]]
// CHECK: scf.yield %[[ADD2]]
-// CHECK: } else {
-// CHECK: scf.yield %[[LOOP]]
// CHECK: }
// CHECK: return %[[RESULT]]
#map = affine_map<(d0, d1)[s0] -> (s0, d0 - d1)>
@@ -111,7 +107,7 @@ func @dynamic_upper_bound(%ub : index) -> i32 {
// -----
// CHECK-DAG: #[[MAP0:.*]] = affine_map<()[s0] -> ((s0 floordiv 4) * 4)>
-// CHECK-DAG: #[[MAP1:.*]] = affine_map<()[s0] -> (s0 mod 4)>
+// CHECK-DAG: #[[MAP1:.*]] = affine_map<(d0)[s0] -> (-d0 + s0)>
// CHECK: func @no_loop_results(
// CHECK-SAME: %[[UB:.*]]: index, %[[MEMREF:.*]]: memref<i32>
// CHECK-DAG: %[[C4_I32:.*]] = constant 4 : i32
@@ -123,9 +119,8 @@ func @dynamic_upper_bound(%ub : index) -> i32 {
// CHECK: %[[ADD:.*]] = addi %[[LOAD]], %[[C4_I32]] : i32
// CHECK: memref.store %[[ADD]], %[[MEMREF]]
// CHECK: }
-// CHECK: %[[HAS_MORE:.*]] = cmpi slt, %[[NEW_UB]], %[[UB]]
-// CHECK: scf.if %[[HAS_MORE]] {
-// CHECK: %[[REM:.*]] = affine.apply #[[MAP1]]()[%[[UB]]]
+// CHECK: scf.for %[[IV2:.*]] = %[[NEW_UB]] to %[[UB]] step %[[C4]] {
+// CHECK: %[[REM:.*]] = affine.apply #[[MAP1]](%[[IV2]])[%[[UB]]]
// CHECK: %[[LOAD2:.*]] = memref.load %[[MEMREF]][]
// CHECK: %[[CAST2:.*]] = index_cast %[[REM]]
// CHECK: %[[ADD2:.*]] = addi %[[LOAD2]], %[[CAST2]]
@@ -157,11 +152,10 @@ func @no_loop_results(%ub : index, %d : memref<i32>) {
// CHECK-DAG: #[[MAP2:.*]] = affine_map<(d0)[s0, s1] -> (s0, -d0 + s1 - 1)>
// CHECK-DAG: #[[MAP3:.*]] = affine_map<(d0)[s0, s1, s2] -> (s0, -d0 + s1, s2)>
// CHECK-DAG: #[[MAP4:.*]] = affine_map<()[s0] -> (-s0)>
-// CHECK-DAG: #[[MAP5:.*]] = affine_map<()[s0, s1, s2] -> (-(s0 - (s0 - s1) mod s2) + s0)>
-// CHECK-DAG: #[[MAP6:.*]] = affine_map<()[s0, s1, s2] -> (-(s0 - (s0 - s1) mod s2) + s0 + 1)>
-// CHECK-DAG: #[[MAP7:.*]] = affine_map<()[s0, s1, s2] -> (-(s0 - (s0 - s1) mod s2) + s0 - 1)>
-// CHECK-DAG: #[[MAP8:.*]] = affine_map<()[s0, s1, s2, s3] -> (s0, s2 - (s2 - (s2 - s1) mod s0), s3)>
-// CHECK-DAG: #[[MAP9:.*]] = affine_map<()[s0, s1, s2] -> (s0 - (s0 - s1) mod s2 - s0)>
+// CHECK-DAG: #[[MAP5:.*]] = affine_map<(d0)[s0] -> (-d0 + s0)>
+// CHECK-DAG: #[[MAP6:.*]] = affine_map<(d0)[s0] -> (-d0 + s0 + 1)>
+// CHECK-DAG: #[[MAP7:.*]] = affine_map<(d0)[s0] -> (-d0 + s0 - 1)>
+// CHECK-DAG: #[[MAP8:.*]] = affine_map<(d0)[s0] -> (d0 - s0)>
// CHECK: func @test_affine_op_rewrite(
// CHECK-SAME: %[[LB:.*]]: index, %[[UB:.*]]: index, %[[STEP:.*]]: index,
// CHECK-SAME: %[[MEMREF:.*]]: memref<?xindex>, %[[SOME_VAL:.*]]: index
@@ -179,18 +173,18 @@ func @no_loop_results(%ub : index, %d : memref<i32>) {
// CHECK: %[[RES5:.*]] = affine.apply #[[MAP4]]()[%[[STEP]]]
// CHECK: memref.store %[[RES5]]
// CHECK: }
-// CHECK: scf.if {{.*}} {
-// CHECK: %[[RES_IF_0:.*]] = affine.apply #[[MAP5]]()[%[[UB]], %[[LB]], %[[STEP]]]
+// CHECK: scf.for %[[IV2:.*]] = {{.*}} to %[[UB]] step %[[STEP]] {
+// CHECK: %[[RES_IF_0:.*]] = affine.apply #[[MAP5]](%[[IV2]])[%[[UB]]]
// CHECK: memref.store %[[RES_IF_0]]
-// CHECK: %[[RES_IF_1:.*]] = affine.apply #[[MAP6]]()[%[[UB]], %[[LB]], %[[STEP]]]
+// CHECK: %[[RES_IF_1:.*]] = affine.apply #[[MAP6]](%[[IV2]])[%[[UB]]]
// CHECK: memref.store %[[RES_IF_1]]
-// CHECK: %[[RES_IF_2:.*]] = affine.apply #[[MAP6]]()[%[[UB]], %[[LB]], %[[STEP]]]
+// CHECK: %[[RES_IF_2:.*]] = affine.apply #[[MAP6]](%[[IV2]])[%[[UB]]]
// CHECK: memref.store %[[RES_IF_2]]
-// CHECK: %[[RES_IF_3:.*]] = affine.apply #[[MAP7]]()[%[[UB]], %[[LB]], %[[STEP]]]
+// CHECK: %[[RES_IF_3:.*]] = affine.apply #[[MAP7]](%[[IV2]])[%[[UB]]]
// CHECK: memref.store %[[RES_IF_3]]
-// CHECK: %[[RES_IF_4:.*]] = affine.min #[[MAP8]]()[%[[STEP]], %[[LB]], %[[UB]], %[[SOME_VAL]]]
+// CHECK: %[[RES_IF_4:.*]] = affine.min #[[MAP3]](%[[IV2]])[%[[STEP]], %[[UB]], %[[SOME_VAL]]]
// CHECK: memref.store %[[RES_IF_4]]
-// CHECK: %[[RES_IF_5:.*]] = affine.apply #[[MAP9]]()[%[[UB]], %[[LB]], %[[STEP]]]
+// CHECK: %[[RES_IF_5:.*]] = affine.apply #[[MAP8]](%[[IV2]])[%[[UB]]]
// CHECK: memref.store %[[RES_IF_5]]
#map0 = affine_map<(d0, d1)[s0] -> (s0, d0 - d1)>
#map1 = affine_map<(d0, d1)[s0] -> (d0 - d1 + 1, s0)>
@@ -243,26 +237,26 @@ func @test_affine_op_rewrite(%lb : index, %ub: index,
// CHECK: scf.for {{.*}} {
// CHECK: scf.for {{.*}} {
// CHECK: }
-// CHECK: scf.if {{.*}} {
+// CHECK: scf.for {{.*}} {
// CHECK: }
// CHECK: }
-// CHECK: scf.if {{.*}} {
+// CHECK: scf.for {{.*}} {
// CHECK: scf.for {{.*}} {
// CHECK: }
-// CHECK-NOT: scf.if
+// CHECK-NOT: scf.for
// CHECK: }
// CHECK-NO-SKIP: func @nested_loops
// CHECK-NO-SKIP: scf.for {{.*}} {
// CHECK-NO-SKIP: scf.for {{.*}} {
// CHECK-NO-SKIP: }
-// CHECK-NO-SKIP: scf.if {{.*}} {
+// CHECK-NO-SKIP: scf.for {{.*}} {
// CHECK-NO-SKIP: }
// CHECK-NO-SKIP: }
-// CHECK-NO-SKIP: scf.if {{.*}} {
+// CHECK-NO-SKIP: scf.for {{.*}} {
// CHECK-NO-SKIP: scf.for {{.*}} {
// CHECK-NO-SKIP: }
-// CHECK-NO-SKIP: scf.if {{.*}} {
+// CHECK-NO-SKIP: scf.for {{.*}} {
// CHECK-NO-SKIP: }
// CHECK-NO-SKIP: }
#map = affine_map<(d0, d1)[s0] -> (s0, d0 - d1)>
diff --git a/mlir/test/Dialect/SparseTensor/sparse_vector_peeled.mlir b/mlir/test/Dialect/SparseTensor/sparse_vector_peeled.mlir
index 26c424f3f7916..2c03401ce4af0 100644
--- a/mlir/test/Dialect/SparseTensor/sparse_vector_peeled.mlir
+++ b/mlir/test/Dialect/SparseTensor/sparse_vector_peeled.mlir
@@ -18,7 +18,7 @@
}
// CHECK-DAG: #[[$map0:.*]] = affine_map<()[s0, s1] -> (s0 + ((-s0 + s1) floordiv 16) * 16)>
-// CHECK-DAG: #[[$map1:.*]] = affine_map<()[s0, s1] -> ((s0 - s1) mod 16)>
+// CHECK-DAG: #[[$map1:.*]] = affine_map<(d0)[s0] -> (-d0 + s0)>
// CHECK-LABEL: func @mul_s
// CHECK-DAG: %[[c0:.*]] = constant 0 : index
// CHECK-DAG: %[[c1:.*]] = constant 1 : index
@@ -39,13 +39,12 @@
// CHECK: %[[m:.*]] = mulf %[[la]], %[[lb]] : vector<16xf32>
// CHECK: vector.scatter %{{.*}}[%[[c0]]] [%[[zi]]], %[[mask]], %[[m]] : memref<1024xf32>, vector<16xi64>, vector<16xi1>, vector<16xf32>
// CHECK: }
-// CHECK: %[[has_more:.*]] = cmpi slt, %[[boundary]], %[[s]] : index
-// CHECK: scf.if %[[has_more]] {
-// CHECK: %[[sub:.*]] = affine.apply #[[$map1]]()[%[[s]], %[[q]]]
+// CHECK: scf.for %[[i2:.*]] = %[[boundary]] to %[[s]] step %[[c16]] {
+// CHECK: %[[sub:.*]] = affine.apply #[[$map1]](%[[i2]])[%[[s]]]
// CHECK: %[[mask2:.*]] = vector.create_mask %[[sub]] : vector<16xi1>
-// CHECK: %[[li2:.*]] = vector.maskedload %{{.*}}[%[[boundary]]], %[[mask2]], %{{.*}} : memref<?xi32>, vector<16xi1>, vector<16xi32> into vector<16xi32>
+// CHECK: %[[li2:.*]] = vector.maskedload %{{.*}}[%[[i2]]], %[[mask2]], %{{.*}} : memref<?xi32>, vector<16xi1>, vector<16xi32> into vector<16xi32>
// CHECK: %[[zi2:.*]] = zexti %[[li2]] : vector<16xi32> to vector<16xi64>
-// CHECK: %[[la2:.*]] = vector.maskedload %{{.*}}[%[[boundary]]], %[[mask2]], %{{.*}} : memref<?xf32>, vector<16xi1>, vector<16xf32> into vector<16xf32>
+// CHECK: %[[la2:.*]] = vector.maskedload %{{.*}}[%[[i2]]], %[[mask2]], %{{.*}} : memref<?xf32>, vector<16xi1>, vector<16xf32> into vector<16xf32>
// CHECK: %[[lb2:.*]] = vector.gather %{{.*}}[%[[c0]]] [%[[zi2]]], %[[mask2]], %{{.*}} : memref<1024xf32>, vector<16xi64>, vector<16xi1>, vector<16xf32> into vector<16xf32>
// CHECK: %[[m2:.*]] = mulf %[[la2]], %[[lb2]] : vector<16xf32>
// CHECK: vector.scatter %{{.*}}[%[[c0]]] [%[[zi2]]], %[[mask2]], %[[m2]] : memref<1024xf32>, vector<16xi64>, vector<16xi1>, vector<16xf32>
More information about the Mlir-commits
mailing list