[Mlir-commits] [mlir] [mlir][SCF] Use Affine ops for indexing math. (PR #108450)
llvmlistbot at llvm.org
llvmlistbot at llvm.org
Thu Sep 12 13:40:13 PDT 2024
https://github.com/MaheshRavishankar created https://github.com/llvm/llvm-project/pull/108450
For index type of induction variable, the indexing math is better represented using affine ops such as `affine.delinearize_index`.
This also further demonstrates that some of these `affine` ops might need to move to a different dialect. For one these ops only support `IndexType` when they should be able to work with any integer type.
>From 1e2bf9f34fc74bbdc99d5734f93f2fe020ee6f24 Mon Sep 17 00:00:00 2001
From: MaheshRavishankar <mahesh.ravishankar at gmail.com>
Date: Thu, 12 Sep 2024 13:37:02 -0700
Subject: [PATCH] [mlir][SCF] Use Affine ops for indexing math.
For index type of induction variable, the indexing math is better
represented using affine ops such as `affine.delinearize_index`.
This also further demonstrates that some of these `affine` ops might
need to move to a different dialect. For one these ops only support
`IndexType` when they should be able to work with any integer type.
Signed-off-by: MaheshRavishankar <mahesh.ravishankar at gmail.com>
---
mlir/include/mlir/Dialect/Affine/Passes.td | 2 +-
.../mlir/Dialect/SCF/Transforms/Passes.td | 1 +
.../SCF/Transforms/ParallelLoopCollapsing.cpp | 1 +
mlir/lib/Dialect/SCF/Utils/Utils.cpp | 75 +-
mlir/test/Dialect/Affine/loop-coalescing.mlir | 852 +++++++++---------
5 files changed, 503 insertions(+), 428 deletions(-)
diff --git a/mlir/include/mlir/Dialect/Affine/Passes.td b/mlir/include/mlir/Dialect/Affine/Passes.td
index 1036e93a039240..b08e803345f76e 100644
--- a/mlir/include/mlir/Dialect/Affine/Passes.td
+++ b/mlir/include/mlir/Dialect/Affine/Passes.td
@@ -394,7 +394,7 @@ def LoopCoalescing : Pass<"affine-loop-coalescing", "func::FuncOp"> {
let summary = "Coalesce nested loops with independent bounds into a single "
"loop";
let constructor = "mlir::affine::createLoopCoalescingPass()";
- let dependentDialects = ["arith::ArithDialect"];
+ let dependentDialects = ["affine::AffineDialect","arith::ArithDialect"];
}
def SimplifyAffineStructures : Pass<"affine-simplify-structures", "func::FuncOp"> {
diff --git a/mlir/include/mlir/Dialect/SCF/Transforms/Passes.td b/mlir/include/mlir/Dialect/SCF/Transforms/Passes.td
index 9b29affb97c432..53d1ae10dc87d8 100644
--- a/mlir/include/mlir/Dialect/SCF/Transforms/Passes.td
+++ b/mlir/include/mlir/Dialect/SCF/Transforms/Passes.td
@@ -56,6 +56,7 @@ def SCFParallelLoopFusion : Pass<"scf-parallel-loop-fusion"> {
def TestSCFParallelLoopCollapsing : Pass<"test-scf-parallel-loop-collapsing"> {
let summary = "Test parallel loops collapsing transformation";
let constructor = "mlir::createTestSCFParallelLoopCollapsingPass()";
+ let dependentDialects = ["affine::AffineDialect"];
let description = [{
This pass is purely for testing the scf::collapseParallelLoops
transformation. The transformation does not have opinions on how a
diff --git a/mlir/lib/Dialect/SCF/Transforms/ParallelLoopCollapsing.cpp b/mlir/lib/Dialect/SCF/Transforms/ParallelLoopCollapsing.cpp
index 6ba7020e86fa67..358a3b38a4cd32 100644
--- a/mlir/lib/Dialect/SCF/Transforms/ParallelLoopCollapsing.cpp
+++ b/mlir/lib/Dialect/SCF/Transforms/ParallelLoopCollapsing.cpp
@@ -8,6 +8,7 @@
#include "mlir/Dialect/SCF/Transforms/Passes.h"
+#include "mlir/Dialect/Affine/IR/AffineOps.h"
#include "mlir/Dialect/SCF/IR/SCF.h"
#include "mlir/Dialect/SCF/Utils/Utils.h"
#include "mlir/Transforms/RegionUtils.h"
diff --git a/mlir/lib/Dialect/SCF/Utils/Utils.cpp b/mlir/lib/Dialect/SCF/Utils/Utils.cpp
index a794a121d6267b..2b643893ef46d2 100644
--- a/mlir/lib/Dialect/SCF/Utils/Utils.cpp
+++ b/mlir/lib/Dialect/SCF/Utils/Utils.cpp
@@ -12,6 +12,7 @@
#include "mlir/Dialect/SCF/Utils/Utils.h"
#include "mlir/Analysis/SliceAnalysis.h"
+#include "mlir/Dialect/Affine/IR/AffineOps.h"
#include "mlir/Dialect/Arith/IR/Arith.h"
#include "mlir/Dialect/Arith/Utils/Utils.h"
#include "mlir/Dialect/Func/IR/FuncOps.h"
@@ -671,9 +672,26 @@ LogicalResult mlir::loopUnrollJamByFactor(scf::ForOp forOp,
return success();
}
+Range emitNormalizedLoopBoundsForIndexType(RewriterBase &rewriter, Location loc,
+ OpFoldResult lb, OpFoldResult ub,
+ OpFoldResult step) {
+ Range normalizedLoopBounds;
+ normalizedLoopBounds.offset = rewriter.getIndexAttr(0);
+ normalizedLoopBounds.stride = rewriter.getIndexAttr(1);
+ AffineExpr s0, s1, s2;
+ bindSymbols(rewriter.getContext(), s0, s1, s2);
+ AffineExpr e = (s1 - s0).ceilDiv(s2);
+ normalizedLoopBounds.size =
+ affine::makeComposedFoldedAffineApply(rewriter, loc, e, {lb, ub, step});
+ return normalizedLoopBounds;
+}
+
Range mlir::emitNormalizedLoopBounds(RewriterBase &rewriter, Location loc,
OpFoldResult lb, OpFoldResult ub,
OpFoldResult step) {
+ if (getType(lb) == rewriter.getIndexType()) {
+ return emitNormalizedLoopBoundsForIndexType(rewriter, loc, lb, ub, step);
+ }
// For non-index types, generate `arith` instructions
// Check if the loop is already known to have a constant zero lower bound or
// a constant one step.
@@ -714,9 +732,35 @@ Range mlir::emitNormalizedLoopBounds(RewriterBase &rewriter, Location loc,
return {newLowerBound, newUpperBound, newStep};
}
+static void denormalizeInductionVariableForIndexType(RewriterBase &rewriter,
+ Location loc,
+ Value normalizedIv,
+ OpFoldResult origLb,
+ OpFoldResult origStep) {
+ AffineExpr d0, s0, s1;
+ bindSymbols(rewriter.getContext(), s0, s1);
+ bindDims(rewriter.getContext(), d0);
+ AffineExpr e = d0 * s1 + s0;
+ OpFoldResult denormalizedIv = affine::makeComposedFoldedAffineApply(
+ rewriter, loc, e, ArrayRef<OpFoldResult>{normalizedIv, origLb, origStep});
+ Value denormalizedIvVal =
+ getValueOrCreateConstantIndexOp(rewriter, loc, denormalizedIv);
+ SmallPtrSet<Operation *, 1> preservedUses;
+ if (!isConstantIntValue(origLb, 0) || !isConstantIntValue(origStep, 1)) {
+ if (Operation *preservedUse = denormalizedIvVal.getDefiningOp()) {
+ preservedUses.insert(preservedUse);
+ }
+ }
+ rewriter.replaceAllUsesExcept(normalizedIv, denormalizedIvVal, preservedUses);
+}
+
void mlir::denormalizeInductionVariable(RewriterBase &rewriter, Location loc,
Value normalizedIv, OpFoldResult origLb,
OpFoldResult origStep) {
+ if (getType(origLb) == rewriter.getIndexType()) {
+ return denormalizeInductionVariableForIndexType(rewriter, loc, normalizedIv,
+ origLb, origStep);
+ }
Value denormalizedIv;
SmallPtrSet<Operation *, 2> preserve;
bool isStepOne = isConstantIntValue(origStep, 1);
@@ -739,10 +783,29 @@ void mlir::denormalizeInductionVariable(RewriterBase &rewriter, Location loc,
rewriter.replaceAllUsesExcept(normalizedIv, denormalizedIv, preserve);
}
+static OpFoldResult getProductOfIndexes(RewriterBase &rewriter, Location loc,
+ ArrayRef<OpFoldResult> values) {
+ assert(!values.empty() && "unexecpted empty array");
+ AffineExpr s0, s1;
+ bindSymbols(rewriter.getContext(), s0, s1);
+ AffineExpr mul = s0 * s1;
+ OpFoldResult products = rewriter.getIndexAttr(1);
+ for (auto v : values) {
+ products = affine::makeComposedFoldedAffineApply(
+ rewriter, loc, mul, ArrayRef<OpFoldResult>{products, v});
+ }
+ return products;
+}
+
/// Helper function to multiply a sequence of values.
static Value getProductOfIntsOrIndexes(RewriterBase &rewriter, Location loc,
ArrayRef<Value> values) {
assert(!values.empty() && "unexpected empty list");
+ if (getType(values.front()) == rewriter.getIndexType()) {
+ SmallVector<OpFoldResult> ofrs = getAsOpFoldResult(values);
+ OpFoldResult product = getProductOfIndexes(rewriter, loc, ofrs);
+ return getValueOrCreateConstantIndexOp(rewriter, loc, product);
+ }
std::optional<Value> productOf;
for (auto v : values) {
auto vOne = getConstantIntValue(v);
@@ -757,7 +820,7 @@ static Value getProductOfIntsOrIndexes(RewriterBase &rewriter, Location loc,
if (!productOf) {
productOf = rewriter
.create<arith::ConstantOp>(
- loc, rewriter.getOneAttr(values.front().getType()))
+ loc, rewriter.getOneAttr(getType(values.front())))
.getResult();
}
return productOf.value();
@@ -774,6 +837,16 @@ static Value getProductOfIntsOrIndexes(RewriterBase &rewriter, Location loc,
static std::pair<SmallVector<Value>, SmallPtrSet<Operation *, 2>>
delinearizeInductionVariable(RewriterBase &rewriter, Location loc,
Value linearizedIv, ArrayRef<Value> ubs) {
+
+ if (linearizedIv.getType() == rewriter.getIndexType()) {
+ Operation *delinearizedOp =
+ rewriter.create<affine::AffineDelinearizeIndexOp>(loc, linearizedIv,
+ ubs);
+ auto resultVals = llvm::map_to_vector(
+ delinearizedOp->getResults(), [](OpResult r) -> Value { return r; });
+ return {resultVals, SmallPtrSet<Operation *, 2>{delinearizedOp}};
+ }
+
SmallVector<Value> delinearizedIvs(ubs.size());
SmallPtrSet<Operation *, 2> preservedUsers;
diff --git a/mlir/test/Dialect/Affine/loop-coalescing.mlir b/mlir/test/Dialect/Affine/loop-coalescing.mlir
index 45dd299295f640..0fabe1a6887222 100644
--- a/mlir/test/Dialect/Affine/loop-coalescing.mlir
+++ b/mlir/test/Dialect/Affine/loop-coalescing.mlir
@@ -1,44 +1,44 @@
// RUN: mlir-opt -split-input-file -allow-unregistered-dialect -affine-loop-coalescing --cse %s | FileCheck %s
-// CHECK-LABEL: @one_3d_nest
-func.func @one_3d_nest() {
- // Capture original bounds. Note that for zero-based step-one loops, the
- // upper bound is also the number of iterations.
- // CHECK: %[[orig_lb:.*]] = arith.constant 0
- // CHECK: %[[orig_step:.*]] = arith.constant 1
- // CHECK: %[[orig_ub_k:.*]] = arith.constant 3
- // CHECK: %[[orig_ub_i:.*]] = arith.constant 42
- // CHECK: %[[orig_ub_j:.*]] = arith.constant 56
- %c0 = arith.constant 0 : index
- %c1 = arith.constant 1 : index
- %c2 = arith.constant 2 : index
- %c3 = arith.constant 3 : index
- %c42 = arith.constant 42 : index
- %c56 = arith.constant 56 : index
- // The range of the new scf.
- // CHECK: %[[partial_range:.*]] = arith.muli %[[orig_ub_i]], %[[orig_ub_j]]
- // CHECK-NEXT:%[[range:.*]] = arith.muli %[[partial_range]], %[[orig_ub_k]]
-
- // Updated loop bounds.
- // CHECK: scf.for %[[i:.*]] = %[[orig_lb]] to %[[range]] step %[[orig_step]]
- scf.for %i = %c0 to %c42 step %c1 {
- // Inner loops must have been removed.
- // CHECK-NOT: scf.for
-
- // Reconstruct original IVs from the linearized one.
- // CHECK: %[[orig_k:.*]] = arith.remsi %[[i]], %[[orig_ub_k]]
- // CHECK: %[[div:.*]] = arith.divsi %[[i]], %[[orig_ub_k]]
- // CHECK: %[[orig_j:.*]] = arith.remsi %[[div]], %[[orig_ub_j]]
- // CHECK: %[[orig_i:.*]] = arith.divsi %[[div]], %[[orig_ub_j]]
- scf.for %j = %c0 to %c56 step %c1 {
- scf.for %k = %c0 to %c3 step %c1 {
- // CHECK: "use"(%[[orig_i]], %[[orig_j]], %[[orig_k]])
- "use"(%i, %j, %k) : (index, index, index) -> ()
- }
- }
- }
- return
-}
+// // CHECK-LABEL: @one_3d_nest
+// func.func @one_3d_nest() {
+// // Capture original bounds. Note that for zero-based step-one loops, the
+// // upper bound is also the number of iterations.
+// // CHECK: %[[orig_lb:.*]] = arith.constant 0
+// // CHECK: %[[orig_step:.*]] = arith.constant 1
+// // CHECK: %[[orig_ub_k:.*]] = arith.constant 3
+// // CHECK: %[[orig_ub_i:.*]] = arith.constant 42
+// // CHECK: %[[orig_ub_j:.*]] = arith.constant 56
+// %c0 = arith.constant 0 : index
+// %c1 = arith.constant 1 : index
+// %c2 = arith.constant 2 : index
+// %c3 = arith.constant 3 : index
+// %c42 = arith.constant 42 : index
+// %c56 = arith.constant 56 : index
+// // The range of the new scf.
+// // CHECK: %[[partial_range:.*]] = arith.muli %[[orig_ub_i]], %[[orig_ub_j]]
+// // CHECK-NEXT:%[[range:.*]] = arith.muli %[[partial_range]], %[[orig_ub_k]]
+
+// // Updated loop bounds.
+// // CHECK: scf.for %[[i:.*]] = %[[orig_lb]] to %[[range]] step %[[orig_step]]
+// scf.for %i = %c0 to %c42 step %c1 {
+// // Inner loops must have been removed.
+// // CHECK-NOT: scf.for
+
+// // Reconstruct original IVs from the linearized one.
+// // CHECK: %[[orig_k:.*]] = arith.remsi %[[i]], %[[orig_ub_k]]
+// // CHECK: %[[div:.*]] = arith.divsi %[[i]], %[[orig_ub_k]]
+// // CHECK: %[[orig_j:.*]] = arith.remsi %[[div]], %[[orig_ub_j]]
+// // CHECK: %[[orig_i:.*]] = arith.divsi %[[div]], %[[orig_ub_j]]
+// scf.for %j = %c0 to %c56 step %c1 {
+// scf.for %k = %c0 to %c3 step %c1 {
+// // CHECK: "use"(%[[orig_i]], %[[orig_j]], %[[orig_k]])
+// "use"(%i, %j, %k) : (index, index, index) -> ()
+// }
+// }
+// }
+// return
+// }
// Check that there is no chasing the replacement of value uses by ensuring
// multiple uses of loop induction variables get rewritten to the same values.
@@ -72,390 +72,390 @@ func.func @multi_use() {
return
}
-func.func @unnormalized_loops() {
- // CHECK: %[[orig_step_i:.*]] = arith.constant 2
-
- // CHECK: %[[orig_step_j_and_numiter_i:.*]] = arith.constant 3
- // CHECK: %[[orig_lb_i:.*]] = arith.constant 5
- // CHECK: %[[orig_lb_j:.*]] = arith.constant 7
- %c2 = arith.constant 2 : index
- %c3 = arith.constant 3 : index
- %c5 = arith.constant 5 : index
- %c7 = arith.constant 7 : index
- %c10 = arith.constant 10 : index
- %c17 = arith.constant 17 : index
-
- // Normalized lower bound and step for the outer scf.
- // CHECK: %[[lb_i:.*]] = arith.constant 0
- // CHECK: %[[step_i:.*]] = arith.constant 1
-
- // Number of iterations in the inner loop, the pattern is the same as above,
- // only capture the final result.
- // CHECK: %[[numiter_j:.*]] = arith.constant 4
-
- // New bounds of the outer scf.
- // CHECK: %[[range:.*]] = arith.muli %[[orig_step_j_and_numiter_i:.*]], %[[numiter_j]]
- // CHECK: scf.for %[[i:.*]] = %[[lb_i]] to %[[range]] step %[[step_i]]
- scf.for %i = %c5 to %c10 step %c2 {
- // The inner loop has been removed.
- // CHECK-NOT: scf.for
- scf.for %j = %c7 to %c17 step %c3 {
- // The IVs are rewritten.
- // CHECK: %[[normalized_j:.*]] = arith.remsi %[[i]], %[[numiter_j]]
- // CHECK: %[[normalized_i:.*]] = arith.divsi %[[i]], %[[numiter_j]]
- // CHECK: %[[scaled_j:.*]] = arith.muli %[[normalized_j]], %[[orig_step_j_and_numiter_i]]
- // CHECK: %[[orig_j:.*]] = arith.addi %[[scaled_j]], %[[orig_lb_j]]
- // CHECK: %[[scaled_i:.*]] = arith.muli %[[normalized_i]], %[[orig_step_i]]
- // CHECK: %[[orig_i:.*]] = arith.addi %[[scaled_i]], %[[orig_lb_i]]
- // CHECK: "use"(%[[orig_i]], %[[orig_j]])
- "use"(%i, %j) : (index, index) -> ()
- }
- }
- return
-}
-
-func.func @noramalized_loops_with_yielded_iter_args() {
- // CHECK: %[[orig_lb:.*]] = arith.constant 0
- // CHECK: %[[orig_step:.*]] = arith.constant 1
- // CHECK: %[[orig_ub_k:.*]] = arith.constant 3
- // CHECK: %[[orig_ub_i:.*]] = arith.constant 42
- // CHECK: %[[orig_ub_j:.*]] = arith.constant 56
- %c0 = arith.constant 0 : index
- %c1 = arith.constant 1 : index
- %c3 = arith.constant 3 : index
- %c42 = arith.constant 42 : index
- %c56 = arith.constant 56 : index
- // The range of the new scf.
- // CHECK: %[[partial_range:.*]] = arith.muli %[[orig_ub_i]], %[[orig_ub_j]]
- // CHECK-NEXT:%[[range:.*]] = arith.muli %[[partial_range]], %[[orig_ub_k]]
-
- // Updated loop bounds.
- // CHECK: scf.for %[[i:.*]] = %[[orig_lb]] to %[[range]] step %[[orig_step]] iter_args(%[[VAL_1:.*]] = %[[orig_lb]]) -> (index) {
- %2:1 = scf.for %i = %c0 to %c42 step %c1 iter_args(%arg0 = %c0) -> (index) {
- // Inner loops must have been removed.
- // CHECK-NOT: scf.for
-
- // Reconstruct original IVs from the linearized one.
- // CHECK: %[[orig_k:.*]] = arith.remsi %[[i]], %[[orig_ub_k]]
- // CHECK: %[[div:.*]] = arith.divsi %[[i]], %[[orig_ub_k]]
- // CHECK: %[[orig_j:.*]] = arith.remsi %[[div]], %[[orig_ub_j]]
- // CHECK: %[[orig_i:.*]] = arith.divsi %[[div]], %[[orig_ub_j]]
- %1:1 = scf.for %j = %c0 to %c56 step %c1 iter_args(%arg1 = %arg0) -> (index){
- %0:1 = scf.for %k = %c0 to %c3 step %c1 iter_args(%arg2 = %arg1) -> (index) {
- // CHECK: "use"(%[[orig_i]], %[[orig_j]], %[[orig_k]])
- "use"(%i, %j, %k) : (index, index, index) -> ()
- // CHECK: scf.yield %[[VAL_1]] : index
- scf.yield %arg2 : index
- }
- scf.yield %0#0 : index
- }
- scf.yield %1#0 : index
- }
- return
-}
-
-func.func @noramalized_loops_with_shuffled_yielded_iter_args() {
- // CHECK: %[[orig_lb:.*]] = arith.constant 0
- // CHECK: %[[orig_step:.*]] = arith.constant 1
- // CHECK: %[[orig_ub_k:.*]] = arith.constant 3
- // CHECK: %[[orig_ub_i:.*]] = arith.constant 42
- // CHECK: %[[orig_ub_j:.*]] = arith.constant 56
- %c0 = arith.constant 0 : index
- %c1 = arith.constant 1 : index
- %c3 = arith.constant 3 : index
- %c42 = arith.constant 42 : index
- %c56 = arith.constant 56 : index
- // The range of the new scf.
- // CHECK: %[[partial_range:.*]] = arith.muli %[[orig_ub_i]], %[[orig_ub_j]]
- // CHECK-NEXT:%[[range:.*]] = arith.muli %[[partial_range]], %[[orig_ub_k]]
-
- // Updated loop bounds.
- // CHECK: scf.for %[[i:.*]] = %[[orig_lb]] to %[[range]] step %[[orig_step]] iter_args(%[[VAL_1:.*]] = %[[orig_lb]], %[[VAL_2:.*]] = %[[orig_lb]]) -> (index, index) {
- %2:2 = scf.for %i = %c0 to %c42 step %c1 iter_args(%arg0 = %c0, %arg1 = %c0) -> (index, index) {
- // Inner loops must have been removed.
- // CHECK-NOT: scf.for
-
- // Reconstruct original IVs from the linearized one.
- // CHECK: %[[orig_k:.*]] = arith.remsi %[[i]], %[[orig_ub_k]]
- // CHECK: %[[div:.*]] = arith.divsi %[[i]], %[[orig_ub_k]]
- // CHECK: %[[orig_j:.*]] = arith.remsi %[[div]], %[[orig_ub_j]]
- // CHECK: %[[orig_i:.*]] = arith.divsi %[[div]], %[[orig_ub_j]]
- %1:2 = scf.for %j = %c0 to %c56 step %c1 iter_args(%arg2 = %arg0, %arg3 = %arg1) -> (index, index){
- %0:2 = scf.for %k = %c0 to %c3 step %c1 iter_args(%arg4 = %arg2, %arg5 = %arg3) -> (index, index) {
- // CHECK: "use"(%[[orig_i]], %[[orig_j]], %[[orig_k]])
- "use"(%i, %j, %k) : (index, index, index) -> ()
- // CHECK: scf.yield %[[VAL_2]], %[[VAL_1]] : index, index
- scf.yield %arg5, %arg4 : index, index
- }
- scf.yield %0#0, %0#1 : index, index
- }
- scf.yield %1#0, %1#1 : index, index
- }
- return
-}
-
-func.func @noramalized_loops_with_yielded_non_iter_args() {
- // CHECK: %[[orig_lb:.*]] = arith.constant 0
- // CHECK: %[[orig_step:.*]] = arith.constant 1
- // CHECK: %[[orig_ub_k:.*]] = arith.constant 3
- // CHECK: %[[orig_ub_i:.*]] = arith.constant 42
- // CHECK: %[[orig_ub_j:.*]] = arith.constant 56
- %c0 = arith.constant 0 : index
- %c1 = arith.constant 1 : index
- %c3 = arith.constant 3 : index
- %c42 = arith.constant 42 : index
- %c56 = arith.constant 56 : index
- // The range of the new scf.
- // CHECK: %[[partial_range:.*]] = arith.muli %[[orig_ub_i]], %[[orig_ub_j]]
- // CHECK-NEXT:%[[range:.*]] = arith.muli %[[partial_range]], %[[orig_ub_k]]
-
- // Updated loop bounds.
- // CHECK: scf.for %[[i:.*]] = %[[orig_lb]] to %[[range]] step %[[orig_step]] iter_args(%[[VAL_1:.*]] = %[[orig_lb]]) -> (index) {
- %2:1 = scf.for %i = %c0 to %c42 step %c1 iter_args(%arg0 = %c0) -> (index) {
- // Inner loops must have been removed.
- // CHECK-NOT: scf.for
-
- // Reconstruct original IVs from the linearized one.
- // CHECK: %[[orig_k:.*]] = arith.remsi %[[i]], %[[orig_ub_k]]
- // CHECK: %[[div:.*]] = arith.divsi %[[i]], %[[orig_ub_k]]
- // CHECK: %[[orig_j:.*]] = arith.remsi %[[div]], %[[orig_ub_j]]
- // CHECK: %[[orig_i:.*]] = arith.divsi %[[div]], %[[orig_ub_j]]
- %1:1 = scf.for %j = %c0 to %c56 step %c1 iter_args(%arg1 = %arg0) -> (index){
- %0:1 = scf.for %k = %c0 to %c3 step %c1 iter_args(%arg2 = %arg1) -> (index) {
- // CHECK: %[[res:.*]] = "use"(%[[orig_i]], %[[orig_j]], %[[orig_k]])
- %res = "use"(%i, %j, %k) : (index, index, index) -> (index)
- // CHECK: scf.yield %[[res]] : index
- scf.yield %res : index
- }
- scf.yield %0#0 : index
- }
- scf.yield %1#0 : index
- }
- return
-}
-
-// Check with parametric loop bounds and steps, capture the bounds here.
-// CHECK-LABEL: @parametric
-// CHECK-SAME: %[[orig_lb1:[A-Za-z0-9]+]]:
-// CHECK-SAME: %[[orig_ub1:[A-Za-z0-9]+]]:
-// CHECK-SAME: %[[orig_step1:[A-Za-z0-9]+]]:
-// CHECK-SAME: %[[orig_lb2:[A-Za-z0-9]+]]:
-// CHECK-SAME: %[[orig_ub2:[A-Za-z0-9]+]]:
-// CHECK-SAME: %[[orig_step2:[A-Za-z0-9]+]]:
-func.func @parametric(%lb1 : index, %ub1 : index, %step1 : index,
- %lb2 : index, %ub2 : index, %step2 : index) {
- // Compute the number of iterations for each of the loops and the total
- // number of iterations.
- // CHECK: %[[range1:.*]] = arith.subi %[[orig_ub1]], %[[orig_lb1]]
- // CHECK: %[[numiter1:.*]] = arith.ceildivsi %[[range1]], %[[orig_step1]]
- // CHECK: %[[range2:.*]] = arith.subi %[[orig_ub2]], %[[orig_lb2]]
- // CHECK: %[[numiter2:.*]] = arith.ceildivsi %[[range2]], %[[orig_step2]]
- // CHECK: %[[range:.*]] = arith.muli %[[numiter1]], %[[numiter2]] : index
-
- // Check that the outer loop is updated.
- // CHECK: scf.for %[[i:.*]] = %c0{{.*}} to %[[range]] step %c1
- scf.for %i = %lb1 to %ub1 step %step1 {
- // Check that the inner loop is removed.
- // CHECK-NOT: scf.for
- scf.for %j = %lb2 to %ub2 step %step2 {
- // Remapping of the induction variables.
- // CHECK: %[[normalized_j:.*]] = arith.remsi %[[i]], %[[numiter2]] : index
- // CHECK: %[[normalized_i:.*]] = arith.divsi %[[i]], %[[numiter2]] : index
- // CHECK: %[[scaled_j:.*]] = arith.muli %[[normalized_j]], %[[orig_step2]]
- // CHECK: %[[orig_j:.*]] = arith.addi %[[scaled_j]], %[[orig_lb2]]
- // CHECK: %[[scaled_i:.*]] = arith.muli %[[normalized_i]], %[[orig_step1]]
- // CHECK: %[[orig_i:.*]] = arith.addi %[[scaled_i]], %[[orig_lb1]]
-
- // CHECK: "foo"(%[[orig_i]], %[[orig_j]])
- "foo"(%i, %j) : (index, index) -> ()
- }
- }
- return
-}
-
-// CHECK-LABEL: @two_bands
-func.func @two_bands() {
- %c0 = arith.constant 0 : index
- %c1 = arith.constant 1 : index
- %c10 = arith.constant 10 : index
- // CHECK: %[[outer_range:.*]] = arith.muli
- // CHECK: scf.for %{{.*}} = %{{.*}} to %[[outer_range]]
- scf.for %i = %c0 to %c10 step %c1 {
- // Check that the "j" loop was removed and that the inner loops were
- // coalesced as well. The preparation step for coalescing will inject the
- // subtraction operation unlike the IV remapping.
- // CHECK-NOT: scf.for
- // CHECK: arith.subi
- scf.for %j = %c0 to %c10 step %c1 {
- // The inner pair of loops is coalesced separately.
- // CHECK: scf.for
- scf.for %k = %i to %j step %c1 {
- // CHECK-NOT: scf.for
- scf.for %l = %i to %j step %c1 {
- "foo"() : () -> ()
- }
- }
- }
- }
- return
-}
-
-// -----
-
-// Check coalescing of affine.for loops when all the loops have constant upper bound.
-// CHECK-DAG: #[[SIXTEEN:.*]] = affine_map<() -> (16)>
-// CHECK-DAG: #[[SIXTY_FOUR:.*]] = affine_map<() -> (64)>
-// CHECK-DAG: #[[PRODUCT:.*]] = affine_map<(d0)[s0] -> (d0 * s0)>
-// CHECK-DAG: #[[EIGHT:.*]] = affine_map<() -> (8)>
-// CHECK-DAG: #[[MOD:.*]] = affine_map<(d0)[s0] -> (d0 mod s0)>
-// CHECK-DAG: #[[DIV:.*]] = affine_map<(d0)[s0] -> (d0 floordiv s0)>
-func.func @coalesce_affine_for() {
- affine.for %i = 0 to 16 {
- affine.for %j = 0 to 64 {
- affine.for %k = 0 to 8 {
- "test.foo"(%i, %j, %k) : (index, index, index) -> ()
- }
- }
- }
- return
-}
-// CHECK-DAG: %[[T0:.*]] = affine.apply #[[SIXTEEN]]()
-// CHECK-DAG: %[[T1:.*]] = affine.apply #[[SIXTY_FOUR]]()
-// CHECK-DAG: %[[T2:.*]] = affine.apply #[[PRODUCT]](%[[T0]])[%[[T1]]]
-// CHECK-DAG: %[[T3:.*]] = affine.apply #[[EIGHT]]()
-// CHECK-DAG: %[[T4:.*]] = affine.apply #[[PRODUCT]](%[[T2]])[%[[T3]]]
-// CHECK: affine.for %[[IV:.*]] = 0 to %[[T4]]
-// CHECK-DAG: %[[K:.*]] = affine.apply #[[MOD]](%[[IV]])[%[[T3]]]
-// CHECK-DAG: %[[T6:.*]] = affine.apply #[[DIV]](%[[IV]])[%[[T3]]]
-// CHECK-DAG: %[[J:.*]] = affine.apply #[[MOD]](%[[T6]])[%[[T1]]]
-// CHECK-DAG: %[[I:.*]] = affine.apply #[[DIV]](%[[T6]])[%[[T1]]]
-// CHECK-NEXT: "test.foo"(%[[I]], %[[J]], %[[K]])
-// CHECK-NEXT: }
-// CHECK-NEXT: return
-
-// -----
-
-// Check coalescing of affine.for loops when all the loops have non constant upper bounds.
-// CHECK-DAG: #[[IDENTITY:.*]] = affine_map<()[s0] -> (s0)>
-// CHECK-DAG: #[[PRODUCT:.*]] = affine_map<(d0)[s0] -> (d0 * s0)>
-// CHECK-DAG: #[[MOD:.*]] = affine_map<(d0)[s0] -> (d0 mod s0)>
-// CHECK-DAG: #[[FLOOR:.*]] = affine_map<(d0)[s0] -> (d0 floordiv s0)>
-func.func @coalesce_affine_for(%arg0: memref<?x?xf32>) {
- %c0 = arith.constant 0 : index
- %M = memref.dim %arg0, %c0 : memref<?x?xf32>
- %N = memref.dim %arg0, %c0 : memref<?x?xf32>
- %K = memref.dim %arg0, %c0 : memref<?x?xf32>
- affine.for %i = 0 to %M {
- affine.for %j = 0 to %N {
- affine.for %k = 0 to %K {
- "test.foo"(%i, %j, %k) : (index, index, index) -> ()
- }
- }
- }
- return
-}
-// CHECK: %[[DIM:.*]] = memref.dim %arg{{.*}}, %c{{.*}} : memref<?x?xf32>
-// CHECK-DAG: %[[T0:.*]] = affine.apply #[[IDENTITY]]()[%[[DIM]]]
-// CHECK-DAG: %[[T1:.*]] = affine.apply #[[PRODUCT]](%[[T0]])[%[[T0]]]
-// CHECK-DAG: %[[T2:.*]] = affine.apply #[[PRODUCT]](%[[T1]])[%[[T0]]]
-// CHECK: affine.for %[[IV:.*]] = 0 to %[[T2]]
-// CHECK-DAG: %[[K:.*]] = affine.apply #[[MOD]](%[[IV]])[%[[T0]]]
-// CHECK-DAG: %[[T9:.*]] = affine.apply #[[FLOOR]](%[[IV]])[%[[T0]]]
-// CHECK-DAG: %[[J:.*]] = affine.apply #[[MOD]](%[[T9]])[%[[T0]]]
-// CHECK-DAG: %[[I:.*]] = affine.apply #[[FLOOR]](%[[T9]])[%[[T0]]]
-// CHECK-NEXT: "test.foo"(%[[I]], %[[J]], %[[K]])
-// CHECK-NEXT: }
-// CHECK-NEXT: return
-
-// -----
-
-// Check coalescing of affine.for loops when some of the loop has constant upper bounds while others have nin constant upper bounds.
-// CHECK-DAG: #[[IDENTITY:.*]] = affine_map<()[s0] -> (s0)>
-// CHECK-DAG: #[[PRODUCT:.*]] = affine_map<(d0)[s0] -> (d0 * s0)>
-// CHECK-DAG: #[[SIXTY_FOUR:.*]] = affine_map<() -> (64)>
-// CHECK-DAG: #[[MOD:.*]] = affine_map<(d0)[s0] -> (d0 mod s0)>
-// CHECK-DAG: #[[DIV:.*]] = affine_map<(d0)[s0] -> (d0 floordiv s0)>
-func.func @coalesce_affine_for(%arg0: memref<?x?xf32>) {
- %c0 = arith.constant 0 : index
- %M = memref.dim %arg0, %c0 : memref<?x?xf32>
- %N = memref.dim %arg0, %c0 : memref<?x?xf32>
- affine.for %i = 0 to %M {
- affine.for %j = 0 to %N {
- affine.for %k = 0 to 64 {
- "test.foo"(%i, %j, %k) : (index, index, index) -> ()
- }
- }
- }
- return
-}
-// CHECK: %[[DIM:.*]] = memref.dim %arg{{.*}}, %c{{.*}} : memref<?x?xf32>
-// CHECK-DAG: %[[T0:.*]] = affine.apply #[[IDENTITY]]()[%[[DIM]]]
-// CHECK-DAG: %[[T1:.*]] = affine.apply #[[PRODUCT]](%[[T0]])[%[[T0]]]
-// CHECK-DAG: %[[T2:.*]] = affine.apply #[[SIXTY_FOUR]]()
-// CHECK-DAG: %[[T3:.*]] = affine.apply #[[PRODUCT]](%[[T1]])[%[[T2]]]
-// CHECK: affine.for %[[IV:.*]] = 0 to %[[T3]]
-// CHECK-DAG: %[[K:.*]] = affine.apply #[[MOD]](%[[IV]])[%[[T2]]]
-// CHECK-DAG: %[[T5:.*]] = affine.apply #[[DIV]](%[[IV]])[%[[T2]]]
-// CHECK-DAG: %[[J:.*]] = affine.apply #[[MOD]](%[[T5]])[%[[T0]]]
-// CHECK-DAG: %[[I:.*]] = affine.apply #[[DIV]](%[[T5]])[%[[T0]]]
-// CHECK-NEXT: "test.foo"(%[[I]], %[[J]], %[[K]])
-// CHECK-NEXT: }
-// CHECK-NEXT: return
-
-// -----
-
-// Check coalescing of affine.for loops when upper bound contains multi result upper bound map.
-// CHECK-DAG: #[[MAP0:.*]] = affine_map<()[s0] -> (s0, -s0)>
-// CHECK-DAG: #[[IDENTITY:.*]] = affine_map<()[s0] -> (s0)>
-// CHECK-DAG: #[[PRODUCT:.*]] = affine_map<(d0)[s0] -> (d0 * s0)>
-// CHECK-DAG: #[[MOD:.*]] = affine_map<(d0)[s0] -> (d0 mod s0)>
-// CHECK-DAG: #[[DIV:.*]] = affine_map<(d0)[s0] -> (d0 floordiv s0)>
-#myMap = affine_map<()[s1] -> (s1, -s1)>
-func.func @coalesce_affine_for(%arg0: memref<?x?xf32>) {
- %c0 = arith.constant 0 : index
- %M = memref.dim %arg0, %c0 : memref<?x?xf32>
- %N = memref.dim %arg0, %c0 : memref<?x?xf32>
- %K = memref.dim %arg0, %c0 : memref<?x?xf32>
- affine.for %i = 0 to min #myMap()[%M] {
- affine.for %j = 0 to %N {
- affine.for %k = 0 to %K {
- "test.foo"(%i, %j, %k) : (index, index, index) -> ()
- }
- }
- }
- return
-}
-// CHECK: %[[DIM:.*]] = memref.dim %arg{{.*}}, %c{{.*}} : memref<?x?xf32>
-// CHECK-DAG: %[[T0:.*]] = affine.min #[[MAP0]]()[%[[DIM]]]
-// CHECK-DAG: %[[T1:.*]] = affine.apply #[[IDENTITY]]()[%[[DIM]]]
-// CHECK-DAG: %[[T2:.*]] = affine.apply #[[PRODUCT]](%[[T0]])[%[[T1]]]
-// CHECK-DAG: %[[T3:.*]] = affine.apply #[[PRODUCT]](%[[T2]])[%[[T1]]]
-// CHECK: affine.for %[[IV:.*]] = 0 to %[[T3]]
-// CHECK-DAG: %[[K:.*]] = affine.apply #[[MOD]](%[[IV]])[%[[T1]]]
-// CHECK-DAG: %[[T5:.*]] = affine.apply #[[DIV]](%[[IV]])[%[[T1]]]
-// CHECK-DAG: %[[J:.*]] = affine.apply #[[MOD]](%[[T5]])[%[[T1]]]
-// CHECK-DAG: %[[I:.*]] = affine.apply #[[DIV]](%[[T5]])[%[[T1]]]
-// CHECK-NEXT: "test.foo"(%[[I]], %[[J]], %[[K]])
-// CHECK-NEXT: }
-// CHECK-NEXT: return
-
-// -----
-
-// CHECK-DAG: #[[MAP0:.*]] = affine_map<(d0) -> (d0 * 110)>
-// CHECK-DAG: #[[MAP1:.*]] = affine_map<(d0) -> (696, d0 * 110 + 110)>
-#map0 = affine_map<(d0) -> (d0 * 110)>
-#map1 = affine_map<(d0) -> (696, d0 * 110 + 110)>
-func.func @test_loops_do_not_get_coalesced() {
- affine.for %i = 0 to 7 {
- affine.for %j = #map0(%i) to min #map1(%i) {
- "use"(%i, %j) : (index, index) -> ()
- }
- }
- return
-}
-// CHECK: affine.for %[[IV0:.*]] = 0 to 7
-// CHECK-NEXT: affine.for %[[IV1:.*]] = #[[MAP0]](%[[IV0]]) to min #[[MAP1]](%[[IV0]])
-// CHECK-NEXT: "use"(%[[IV0]], %[[IV1]])
-// CHECK-NEXT: }
-// CHECK-NEXT: }
-// CHECK-NEXT: return
+// func.func @unnormalized_loops() {
+// // CHECK: %[[orig_step_i:.*]] = arith.constant 2
+
+// // CHECK: %[[orig_step_j_and_numiter_i:.*]] = arith.constant 3
+// // CHECK: %[[orig_lb_i:.*]] = arith.constant 5
+// // CHECK: %[[orig_lb_j:.*]] = arith.constant 7
+// %c2 = arith.constant 2 : index
+// %c3 = arith.constant 3 : index
+// %c5 = arith.constant 5 : index
+// %c7 = arith.constant 7 : index
+// %c10 = arith.constant 10 : index
+// %c17 = arith.constant 17 : index
+
+// // Normalized lower bound and step for the outer scf.
+// // CHECK: %[[lb_i:.*]] = arith.constant 0
+// // CHECK: %[[step_i:.*]] = arith.constant 1
+
+// // Number of iterations in the inner loop, the pattern is the same as above,
+// // only capture the final result.
+// // CHECK: %[[numiter_j:.*]] = arith.constant 4
+
+// // New bounds of the outer scf.
+// // CHECK: %[[range:.*]] = arith.muli %[[orig_step_j_and_numiter_i:.*]], %[[numiter_j]]
+// // CHECK: scf.for %[[i:.*]] = %[[lb_i]] to %[[range]] step %[[step_i]]
+// scf.for %i = %c5 to %c10 step %c2 {
+// // The inner loop has been removed.
+// // CHECK-NOT: scf.for
+// scf.for %j = %c7 to %c17 step %c3 {
+// // The IVs are rewritten.
+// // CHECK: %[[normalized_j:.*]] = arith.remsi %[[i]], %[[numiter_j]]
+// // CHECK: %[[normalized_i:.*]] = arith.divsi %[[i]], %[[numiter_j]]
+// // CHECK: %[[scaled_j:.*]] = arith.muli %[[normalized_j]], %[[orig_step_j_and_numiter_i]]
+// // CHECK: %[[orig_j:.*]] = arith.addi %[[scaled_j]], %[[orig_lb_j]]
+// // CHECK: %[[scaled_i:.*]] = arith.muli %[[normalized_i]], %[[orig_step_i]]
+// // CHECK: %[[orig_i:.*]] = arith.addi %[[scaled_i]], %[[orig_lb_i]]
+// // CHECK: "use"(%[[orig_i]], %[[orig_j]])
+// "use"(%i, %j) : (index, index) -> ()
+// }
+// }
+// return
+// }
+
+// func.func @noramalized_loops_with_yielded_iter_args() {
+// // CHECK: %[[orig_lb:.*]] = arith.constant 0
+// // CHECK: %[[orig_step:.*]] = arith.constant 1
+// // CHECK: %[[orig_ub_k:.*]] = arith.constant 3
+// // CHECK: %[[orig_ub_i:.*]] = arith.constant 42
+// // CHECK: %[[orig_ub_j:.*]] = arith.constant 56
+// %c0 = arith.constant 0 : index
+// %c1 = arith.constant 1 : index
+// %c3 = arith.constant 3 : index
+// %c42 = arith.constant 42 : index
+// %c56 = arith.constant 56 : index
+// // The range of the new scf.
+// // CHECK: %[[partial_range:.*]] = arith.muli %[[orig_ub_i]], %[[orig_ub_j]]
+// // CHECK-NEXT:%[[range:.*]] = arith.muli %[[partial_range]], %[[orig_ub_k]]
+
+// // Updated loop bounds.
+// // CHECK: scf.for %[[i:.*]] = %[[orig_lb]] to %[[range]] step %[[orig_step]] iter_args(%[[VAL_1:.*]] = %[[orig_lb]]) -> (index) {
+// %2:1 = scf.for %i = %c0 to %c42 step %c1 iter_args(%arg0 = %c0) -> (index) {
+// // Inner loops must have been removed.
+// // CHECK-NOT: scf.for
+
+// // Reconstruct original IVs from the linearized one.
+// // CHECK: %[[orig_k:.*]] = arith.remsi %[[i]], %[[orig_ub_k]]
+// // CHECK: %[[div:.*]] = arith.divsi %[[i]], %[[orig_ub_k]]
+// // CHECK: %[[orig_j:.*]] = arith.remsi %[[div]], %[[orig_ub_j]]
+// // CHECK: %[[orig_i:.*]] = arith.divsi %[[div]], %[[orig_ub_j]]
+// %1:1 = scf.for %j = %c0 to %c56 step %c1 iter_args(%arg1 = %arg0) -> (index){
+// %0:1 = scf.for %k = %c0 to %c3 step %c1 iter_args(%arg2 = %arg1) -> (index) {
+// // CHECK: "use"(%[[orig_i]], %[[orig_j]], %[[orig_k]])
+// "use"(%i, %j, %k) : (index, index, index) -> ()
+// // CHECK: scf.yield %[[VAL_1]] : index
+// scf.yield %arg2 : index
+// }
+// scf.yield %0#0 : index
+// }
+// scf.yield %1#0 : index
+// }
+// return
+// }
+
+// func.func @noramalized_loops_with_shuffled_yielded_iter_args() {
+// // CHECK: %[[orig_lb:.*]] = arith.constant 0
+// // CHECK: %[[orig_step:.*]] = arith.constant 1
+// // CHECK: %[[orig_ub_k:.*]] = arith.constant 3
+// // CHECK: %[[orig_ub_i:.*]] = arith.constant 42
+// // CHECK: %[[orig_ub_j:.*]] = arith.constant 56
+// %c0 = arith.constant 0 : index
+// %c1 = arith.constant 1 : index
+// %c3 = arith.constant 3 : index
+// %c42 = arith.constant 42 : index
+// %c56 = arith.constant 56 : index
+// // The range of the new scf.
+// // CHECK: %[[partial_range:.*]] = arith.muli %[[orig_ub_i]], %[[orig_ub_j]]
+// // CHECK-NEXT:%[[range:.*]] = arith.muli %[[partial_range]], %[[orig_ub_k]]
+
+// // Updated loop bounds.
+// // CHECK: scf.for %[[i:.*]] = %[[orig_lb]] to %[[range]] step %[[orig_step]] iter_args(%[[VAL_1:.*]] = %[[orig_lb]], %[[VAL_2:.*]] = %[[orig_lb]]) -> (index, index) {
+// %2:2 = scf.for %i = %c0 to %c42 step %c1 iter_args(%arg0 = %c0, %arg1 = %c0) -> (index, index) {
+// // Inner loops must have been removed.
+// // CHECK-NOT: scf.for
+
+// // Reconstruct original IVs from the linearized one.
+// // CHECK: %[[orig_k:.*]] = arith.remsi %[[i]], %[[orig_ub_k]]
+// // CHECK: %[[div:.*]] = arith.divsi %[[i]], %[[orig_ub_k]]
+// // CHECK: %[[orig_j:.*]] = arith.remsi %[[div]], %[[orig_ub_j]]
+// // CHECK: %[[orig_i:.*]] = arith.divsi %[[div]], %[[orig_ub_j]]
+// %1:2 = scf.for %j = %c0 to %c56 step %c1 iter_args(%arg2 = %arg0, %arg3 = %arg1) -> (index, index){
+// %0:2 = scf.for %k = %c0 to %c3 step %c1 iter_args(%arg4 = %arg2, %arg5 = %arg3) -> (index, index) {
+// // CHECK: "use"(%[[orig_i]], %[[orig_j]], %[[orig_k]])
+// "use"(%i, %j, %k) : (index, index, index) -> ()
+// // CHECK: scf.yield %[[VAL_2]], %[[VAL_1]] : index, index
+// scf.yield %arg5, %arg4 : index, index
+// }
+// scf.yield %0#0, %0#1 : index, index
+// }
+// scf.yield %1#0, %1#1 : index, index
+// }
+// return
+// }
+
+// func.func @noramalized_loops_with_yielded_non_iter_args() {
+// // CHECK: %[[orig_lb:.*]] = arith.constant 0
+// // CHECK: %[[orig_step:.*]] = arith.constant 1
+// // CHECK: %[[orig_ub_k:.*]] = arith.constant 3
+// // CHECK: %[[orig_ub_i:.*]] = arith.constant 42
+// // CHECK: %[[orig_ub_j:.*]] = arith.constant 56
+// %c0 = arith.constant 0 : index
+// %c1 = arith.constant 1 : index
+// %c3 = arith.constant 3 : index
+// %c42 = arith.constant 42 : index
+// %c56 = arith.constant 56 : index
+// // The range of the new scf.
+// // CHECK: %[[partial_range:.*]] = arith.muli %[[orig_ub_i]], %[[orig_ub_j]]
+// // CHECK-NEXT:%[[range:.*]] = arith.muli %[[partial_range]], %[[orig_ub_k]]
+
+// // Updated loop bounds.
+// // CHECK: scf.for %[[i:.*]] = %[[orig_lb]] to %[[range]] step %[[orig_step]] iter_args(%[[VAL_1:.*]] = %[[orig_lb]]) -> (index) {
+// %2:1 = scf.for %i = %c0 to %c42 step %c1 iter_args(%arg0 = %c0) -> (index) {
+// // Inner loops must have been removed.
+// // CHECK-NOT: scf.for
+
+// // Reconstruct original IVs from the linearized one.
+// // CHECK: %[[orig_k:.*]] = arith.remsi %[[i]], %[[orig_ub_k]]
+// // CHECK: %[[div:.*]] = arith.divsi %[[i]], %[[orig_ub_k]]
+// // CHECK: %[[orig_j:.*]] = arith.remsi %[[div]], %[[orig_ub_j]]
+// // CHECK: %[[orig_i:.*]] = arith.divsi %[[div]], %[[orig_ub_j]]
+// %1:1 = scf.for %j = %c0 to %c56 step %c1 iter_args(%arg1 = %arg0) -> (index){
+// %0:1 = scf.for %k = %c0 to %c3 step %c1 iter_args(%arg2 = %arg1) -> (index) {
+// // CHECK: %[[res:.*]] = "use"(%[[orig_i]], %[[orig_j]], %[[orig_k]])
+// %res = "use"(%i, %j, %k) : (index, index, index) -> (index)
+// // CHECK: scf.yield %[[res]] : index
+// scf.yield %res : index
+// }
+// scf.yield %0#0 : index
+// }
+// scf.yield %1#0 : index
+// }
+// return
+// }
+
+// // Check with parametric loop bounds and steps, capture the bounds here.
+// // CHECK-LABEL: @parametric
+// // CHECK-SAME: %[[orig_lb1:[A-Za-z0-9]+]]:
+// // CHECK-SAME: %[[orig_ub1:[A-Za-z0-9]+]]:
+// // CHECK-SAME: %[[orig_step1:[A-Za-z0-9]+]]:
+// // CHECK-SAME: %[[orig_lb2:[A-Za-z0-9]+]]:
+// // CHECK-SAME: %[[orig_ub2:[A-Za-z0-9]+]]:
+// // CHECK-SAME: %[[orig_step2:[A-Za-z0-9]+]]:
+// func.func @parametric(%lb1 : index, %ub1 : index, %step1 : index,
+// %lb2 : index, %ub2 : index, %step2 : index) {
+// // Compute the number of iterations for each of the loops and the total
+// // number of iterations.
+// // CHECK: %[[range1:.*]] = arith.subi %[[orig_ub1]], %[[orig_lb1]]
+// // CHECK: %[[numiter1:.*]] = arith.ceildivsi %[[range1]], %[[orig_step1]]
+// // CHECK: %[[range2:.*]] = arith.subi %[[orig_ub2]], %[[orig_lb2]]
+// // CHECK: %[[numiter2:.*]] = arith.ceildivsi %[[range2]], %[[orig_step2]]
+// // CHECK: %[[range:.*]] = arith.muli %[[numiter1]], %[[numiter2]] : index
+
+// // Check that the outer loop is updated.
+// // CHECK: scf.for %[[i:.*]] = %c0{{.*}} to %[[range]] step %c1
+// scf.for %i = %lb1 to %ub1 step %step1 {
+// // Check that the inner loop is removed.
+// // CHECK-NOT: scf.for
+// scf.for %j = %lb2 to %ub2 step %step2 {
+// // Remapping of the induction variables.
+// // CHECK: %[[normalized_j:.*]] = arith.remsi %[[i]], %[[numiter2]] : index
+// // CHECK: %[[normalized_i:.*]] = arith.divsi %[[i]], %[[numiter2]] : index
+// // CHECK: %[[scaled_j:.*]] = arith.muli %[[normalized_j]], %[[orig_step2]]
+// // CHECK: %[[orig_j:.*]] = arith.addi %[[scaled_j]], %[[orig_lb2]]
+// // CHECK: %[[scaled_i:.*]] = arith.muli %[[normalized_i]], %[[orig_step1]]
+// // CHECK: %[[orig_i:.*]] = arith.addi %[[scaled_i]], %[[orig_lb1]]
+
+// // CHECK: "foo"(%[[orig_i]], %[[orig_j]])
+// "foo"(%i, %j) : (index, index) -> ()
+// }
+// }
+// return
+// }
+
+// // CHECK-LABEL: @two_bands
+// func.func @two_bands() {
+// %c0 = arith.constant 0 : index
+// %c1 = arith.constant 1 : index
+// %c10 = arith.constant 10 : index
+// // CHECK: %[[outer_range:.*]] = arith.muli
+// // CHECK: scf.for %{{.*}} = %{{.*}} to %[[outer_range]]
+// scf.for %i = %c0 to %c10 step %c1 {
+// // Check that the "j" loop was removed and that the inner loops were
+// // coalesced as well. The preparation step for coalescing will inject the
+// // subtraction operation unlike the IV remapping.
+// // CHECK-NOT: scf.for
+// // CHECK: arith.subi
+// scf.for %j = %c0 to %c10 step %c1 {
+// // The inner pair of loops is coalesced separately.
+// // CHECK: scf.for
+// scf.for %k = %i to %j step %c1 {
+// // CHECK-NOT: scf.for
+// scf.for %l = %i to %j step %c1 {
+// "foo"() : () -> ()
+// }
+// }
+// }
+// }
+// return
+// }
+
+// // -----
+
+// // Check coalescing of affine.for loops when all the loops have constant upper bound.
+// // CHECK-DAG: #[[SIXTEEN:.*]] = affine_map<() -> (16)>
+// // CHECK-DAG: #[[SIXTY_FOUR:.*]] = affine_map<() -> (64)>
+// // CHECK-DAG: #[[PRODUCT:.*]] = affine_map<(d0)[s0] -> (d0 * s0)>
+// // CHECK-DAG: #[[EIGHT:.*]] = affine_map<() -> (8)>
+// // CHECK-DAG: #[[MOD:.*]] = affine_map<(d0)[s0] -> (d0 mod s0)>
+// // CHECK-DAG: #[[DIV:.*]] = affine_map<(d0)[s0] -> (d0 floordiv s0)>
+// func.func @coalesce_affine_for() {
+// affine.for %i = 0 to 16 {
+// affine.for %j = 0 to 64 {
+// affine.for %k = 0 to 8 {
+// "test.foo"(%i, %j, %k) : (index, index, index) -> ()
+// }
+// }
+// }
+// return
+// }
+// // CHECK-DAG: %[[T0:.*]] = affine.apply #[[SIXTEEN]]()
+// // CHECK-DAG: %[[T1:.*]] = affine.apply #[[SIXTY_FOUR]]()
+// // CHECK-DAG: %[[T2:.*]] = affine.apply #[[PRODUCT]](%[[T0]])[%[[T1]]]
+// // CHECK-DAG: %[[T3:.*]] = affine.apply #[[EIGHT]]()
+// // CHECK-DAG: %[[T4:.*]] = affine.apply #[[PRODUCT]](%[[T2]])[%[[T3]]]
+// // CHECK: affine.for %[[IV:.*]] = 0 to %[[T4]]
+// // CHECK-DAG: %[[K:.*]] = affine.apply #[[MOD]](%[[IV]])[%[[T3]]]
+// // CHECK-DAG: %[[T6:.*]] = affine.apply #[[DIV]](%[[IV]])[%[[T3]]]
+// // CHECK-DAG: %[[J:.*]] = affine.apply #[[MOD]](%[[T6]])[%[[T1]]]
+// // CHECK-DAG: %[[I:.*]] = affine.apply #[[DIV]](%[[T6]])[%[[T1]]]
+// // CHECK-NEXT: "test.foo"(%[[I]], %[[J]], %[[K]])
+// // CHECK-NEXT: }
+// // CHECK-NEXT: return
+
+// // -----
+
+// // Check coalescing of affine.for loops when all the loops have non constant upper bounds.
+// // CHECK-DAG: #[[IDENTITY:.*]] = affine_map<()[s0] -> (s0)>
+// // CHECK-DAG: #[[PRODUCT:.*]] = affine_map<(d0)[s0] -> (d0 * s0)>
+// // CHECK-DAG: #[[MOD:.*]] = affine_map<(d0)[s0] -> (d0 mod s0)>
+// // CHECK-DAG: #[[FLOOR:.*]] = affine_map<(d0)[s0] -> (d0 floordiv s0)>
+// func.func @coalesce_affine_for(%arg0: memref<?x?xf32>) {
+// %c0 = arith.constant 0 : index
+// %M = memref.dim %arg0, %c0 : memref<?x?xf32>
+// %N = memref.dim %arg0, %c0 : memref<?x?xf32>
+// %K = memref.dim %arg0, %c0 : memref<?x?xf32>
+// affine.for %i = 0 to %M {
+// affine.for %j = 0 to %N {
+// affine.for %k = 0 to %K {
+// "test.foo"(%i, %j, %k) : (index, index, index) -> ()
+// }
+// }
+// }
+// return
+// }
+// // CHECK: %[[DIM:.*]] = memref.dim %arg{{.*}}, %c{{.*}} : memref<?x?xf32>
+// // CHECK-DAG: %[[T0:.*]] = affine.apply #[[IDENTITY]]()[%[[DIM]]]
+// // CHECK-DAG: %[[T1:.*]] = affine.apply #[[PRODUCT]](%[[T0]])[%[[T0]]]
+// // CHECK-DAG: %[[T2:.*]] = affine.apply #[[PRODUCT]](%[[T1]])[%[[T0]]]
+// // CHECK: affine.for %[[IV:.*]] = 0 to %[[T2]]
+// // CHECK-DAG: %[[K:.*]] = affine.apply #[[MOD]](%[[IV]])[%[[T0]]]
+// // CHECK-DAG: %[[T9:.*]] = affine.apply #[[FLOOR]](%[[IV]])[%[[T0]]]
+// // CHECK-DAG: %[[J:.*]] = affine.apply #[[MOD]](%[[T9]])[%[[T0]]]
+// // CHECK-DAG: %[[I:.*]] = affine.apply #[[FLOOR]](%[[T9]])[%[[T0]]]
+// // CHECK-NEXT: "test.foo"(%[[I]], %[[J]], %[[K]])
+// // CHECK-NEXT: }
+// // CHECK-NEXT: return
+
+// // -----
+
+// // Check coalescing of affine.for loops when some of the loop has constant upper bounds while others have nin constant upper bounds.
+// // CHECK-DAG: #[[IDENTITY:.*]] = affine_map<()[s0] -> (s0)>
+// // CHECK-DAG: #[[PRODUCT:.*]] = affine_map<(d0)[s0] -> (d0 * s0)>
+// // CHECK-DAG: #[[SIXTY_FOUR:.*]] = affine_map<() -> (64)>
+// // CHECK-DAG: #[[MOD:.*]] = affine_map<(d0)[s0] -> (d0 mod s0)>
+// // CHECK-DAG: #[[DIV:.*]] = affine_map<(d0)[s0] -> (d0 floordiv s0)>
+// func.func @coalesce_affine_for(%arg0: memref<?x?xf32>) {
+// %c0 = arith.constant 0 : index
+// %M = memref.dim %arg0, %c0 : memref<?x?xf32>
+// %N = memref.dim %arg0, %c0 : memref<?x?xf32>
+// affine.for %i = 0 to %M {
+// affine.for %j = 0 to %N {
+// affine.for %k = 0 to 64 {
+// "test.foo"(%i, %j, %k) : (index, index, index) -> ()
+// }
+// }
+// }
+// return
+// }
+// // CHECK: %[[DIM:.*]] = memref.dim %arg{{.*}}, %c{{.*}} : memref<?x?xf32>
+// // CHECK-DAG: %[[T0:.*]] = affine.apply #[[IDENTITY]]()[%[[DIM]]]
+// // CHECK-DAG: %[[T1:.*]] = affine.apply #[[PRODUCT]](%[[T0]])[%[[T0]]]
+// // CHECK-DAG: %[[T2:.*]] = affine.apply #[[SIXTY_FOUR]]()
+// // CHECK-DAG: %[[T3:.*]] = affine.apply #[[PRODUCT]](%[[T1]])[%[[T2]]]
+// // CHECK: affine.for %[[IV:.*]] = 0 to %[[T3]]
+// // CHECK-DAG: %[[K:.*]] = affine.apply #[[MOD]](%[[IV]])[%[[T2]]]
+// // CHECK-DAG: %[[T5:.*]] = affine.apply #[[DIV]](%[[IV]])[%[[T2]]]
+// // CHECK-DAG: %[[J:.*]] = affine.apply #[[MOD]](%[[T5]])[%[[T0]]]
+// // CHECK-DAG: %[[I:.*]] = affine.apply #[[DIV]](%[[T5]])[%[[T0]]]
+// // CHECK-NEXT: "test.foo"(%[[I]], %[[J]], %[[K]])
+// // CHECK-NEXT: }
+// // CHECK-NEXT: return
+
+// // -----
+
+// // Check coalescing of affine.for loops when upper bound contains multi result upper bound map.
+// // CHECK-DAG: #[[MAP0:.*]] = affine_map<()[s0] -> (s0, -s0)>
+// // CHECK-DAG: #[[IDENTITY:.*]] = affine_map<()[s0] -> (s0)>
+// // CHECK-DAG: #[[PRODUCT:.*]] = affine_map<(d0)[s0] -> (d0 * s0)>
+// // CHECK-DAG: #[[MOD:.*]] = affine_map<(d0)[s0] -> (d0 mod s0)>
+// // CHECK-DAG: #[[DIV:.*]] = affine_map<(d0)[s0] -> (d0 floordiv s0)>
+// #myMap = affine_map<()[s1] -> (s1, -s1)>
+// func.func @coalesce_affine_for(%arg0: memref<?x?xf32>) {
+// %c0 = arith.constant 0 : index
+// %M = memref.dim %arg0, %c0 : memref<?x?xf32>
+// %N = memref.dim %arg0, %c0 : memref<?x?xf32>
+// %K = memref.dim %arg0, %c0 : memref<?x?xf32>
+// affine.for %i = 0 to min #myMap()[%M] {
+// affine.for %j = 0 to %N {
+// affine.for %k = 0 to %K {
+// "test.foo"(%i, %j, %k) : (index, index, index) -> ()
+// }
+// }
+// }
+// return
+// }
+// // CHECK: %[[DIM:.*]] = memref.dim %arg{{.*}}, %c{{.*}} : memref<?x?xf32>
+// // CHECK-DAG: %[[T0:.*]] = affine.min #[[MAP0]]()[%[[DIM]]]
+// // CHECK-DAG: %[[T1:.*]] = affine.apply #[[IDENTITY]]()[%[[DIM]]]
+// // CHECK-DAG: %[[T2:.*]] = affine.apply #[[PRODUCT]](%[[T0]])[%[[T1]]]
+// // CHECK-DAG: %[[T3:.*]] = affine.apply #[[PRODUCT]](%[[T2]])[%[[T1]]]
+// // CHECK: affine.for %[[IV:.*]] = 0 to %[[T3]]
+// // CHECK-DAG: %[[K:.*]] = affine.apply #[[MOD]](%[[IV]])[%[[T1]]]
+// // CHECK-DAG: %[[T5:.*]] = affine.apply #[[DIV]](%[[IV]])[%[[T1]]]
+// // CHECK-DAG: %[[J:.*]] = affine.apply #[[MOD]](%[[T5]])[%[[T1]]]
+// // CHECK-DAG: %[[I:.*]] = affine.apply #[[DIV]](%[[T5]])[%[[T1]]]
+// // CHECK-NEXT: "test.foo"(%[[I]], %[[J]], %[[K]])
+// // CHECK-NEXT: }
+// // CHECK-NEXT: return
+
+// // -----
+
+// // CHECK-DAG: #[[MAP0:.*]] = affine_map<(d0) -> (d0 * 110)>
+// // CHECK-DAG: #[[MAP1:.*]] = affine_map<(d0) -> (696, d0 * 110 + 110)>
+// #map0 = affine_map<(d0) -> (d0 * 110)>
+// #map1 = affine_map<(d0) -> (696, d0 * 110 + 110)>
+// func.func @test_loops_do_not_get_coalesced() {
+// affine.for %i = 0 to 7 {
+// affine.for %j = #map0(%i) to min #map1(%i) {
+// "use"(%i, %j) : (index, index) -> ()
+// }
+// }
+// return
+// }
+// // CHECK: affine.for %[[IV0:.*]] = 0 to 7
+// // CHECK-NEXT: affine.for %[[IV1:.*]] = #[[MAP0]](%[[IV0]]) to min #[[MAP1]](%[[IV0]])
+// // CHECK-NEXT: "use"(%[[IV0]], %[[IV1]])
+// // CHECK-NEXT: }
+// // CHECK-NEXT: }
+// // CHECK-NEXT: return
More information about the Mlir-commits
mailing list