[Mlir-commits] [mlir] [mlir][SCF] Use Affine ops for indexing math. (PR #108450)
llvmlistbot at llvm.org
llvmlistbot at llvm.org
Thu Sep 12 23:27:14 PDT 2024
https://github.com/MaheshRavishankar updated https://github.com/llvm/llvm-project/pull/108450
>From 12811be4db909fe4c040405343fb089c9594175e Mon Sep 17 00:00:00 2001
From: MaheshRavishankar <mahesh.ravishankar at gmail.com>
Date: Thu, 12 Sep 2024 13:37:02 -0700
Subject: [PATCH] [mlir][SCF] Use Affine ops for indexing math.
For index type of induction variable, the indexing math is better
represented using affine ops such as `affine.delinearize_index`.
This also further demonstrates that some of these `affine` ops might
need to move to a different dialect. For one these ops only support
`IndexType` when they should be able to work with any integer type.
Signed-off-by: MaheshRavishankar <mahesh.ravishankar at gmail.com>
---
mlir/include/mlir/Dialect/Affine/Passes.td | 2 +-
.../mlir/Dialect/SCF/Transforms/Passes.td | 1 +
.../SCF/Transforms/ParallelLoopCollapsing.cpp | 1 +
mlir/lib/Dialect/SCF/Utils/Utils.cpp | 75 ++++-
mlir/test/Dialect/Affine/loop-coalescing.mlir | 262 ++++++++----------
.../Dialect/SCF/transform-op-coalesce.mlir | 73 ++---
.../Transforms/parallel-loop-collapsing.mlir | 7 +-
.../single-parallel-loop-collapsing.mlir | 15 +-
8 files changed, 230 insertions(+), 206 deletions(-)
diff --git a/mlir/include/mlir/Dialect/Affine/Passes.td b/mlir/include/mlir/Dialect/Affine/Passes.td
index 1036e93a039240..b08e803345f76e 100644
--- a/mlir/include/mlir/Dialect/Affine/Passes.td
+++ b/mlir/include/mlir/Dialect/Affine/Passes.td
@@ -394,7 +394,7 @@ def LoopCoalescing : Pass<"affine-loop-coalescing", "func::FuncOp"> {
let summary = "Coalesce nested loops with independent bounds into a single "
"loop";
let constructor = "mlir::affine::createLoopCoalescingPass()";
- let dependentDialects = ["arith::ArithDialect"];
+ let dependentDialects = ["affine::AffineDialect","arith::ArithDialect"];
}
def SimplifyAffineStructures : Pass<"affine-simplify-structures", "func::FuncOp"> {
diff --git a/mlir/include/mlir/Dialect/SCF/Transforms/Passes.td b/mlir/include/mlir/Dialect/SCF/Transforms/Passes.td
index 9b29affb97c432..53d1ae10dc87d8 100644
--- a/mlir/include/mlir/Dialect/SCF/Transforms/Passes.td
+++ b/mlir/include/mlir/Dialect/SCF/Transforms/Passes.td
@@ -56,6 +56,7 @@ def SCFParallelLoopFusion : Pass<"scf-parallel-loop-fusion"> {
def TestSCFParallelLoopCollapsing : Pass<"test-scf-parallel-loop-collapsing"> {
let summary = "Test parallel loops collapsing transformation";
let constructor = "mlir::createTestSCFParallelLoopCollapsingPass()";
+ let dependentDialects = ["affine::AffineDialect"];
let description = [{
This pass is purely for testing the scf::collapseParallelLoops
transformation. The transformation does not have opinions on how a
diff --git a/mlir/lib/Dialect/SCF/Transforms/ParallelLoopCollapsing.cpp b/mlir/lib/Dialect/SCF/Transforms/ParallelLoopCollapsing.cpp
index 6ba7020e86fa67..358a3b38a4cd32 100644
--- a/mlir/lib/Dialect/SCF/Transforms/ParallelLoopCollapsing.cpp
+++ b/mlir/lib/Dialect/SCF/Transforms/ParallelLoopCollapsing.cpp
@@ -8,6 +8,7 @@
#include "mlir/Dialect/SCF/Transforms/Passes.h"
+#include "mlir/Dialect/Affine/IR/AffineOps.h"
#include "mlir/Dialect/SCF/IR/SCF.h"
#include "mlir/Dialect/SCF/Utils/Utils.h"
#include "mlir/Transforms/RegionUtils.h"
diff --git a/mlir/lib/Dialect/SCF/Utils/Utils.cpp b/mlir/lib/Dialect/SCF/Utils/Utils.cpp
index a794a121d6267b..2b643893ef46d2 100644
--- a/mlir/lib/Dialect/SCF/Utils/Utils.cpp
+++ b/mlir/lib/Dialect/SCF/Utils/Utils.cpp
@@ -12,6 +12,7 @@
#include "mlir/Dialect/SCF/Utils/Utils.h"
#include "mlir/Analysis/SliceAnalysis.h"
+#include "mlir/Dialect/Affine/IR/AffineOps.h"
#include "mlir/Dialect/Arith/IR/Arith.h"
#include "mlir/Dialect/Arith/Utils/Utils.h"
#include "mlir/Dialect/Func/IR/FuncOps.h"
@@ -671,9 +672,26 @@ LogicalResult mlir::loopUnrollJamByFactor(scf::ForOp forOp,
return success();
}
+Range emitNormalizedLoopBoundsForIndexType(RewriterBase &rewriter, Location loc,
+ OpFoldResult lb, OpFoldResult ub,
+ OpFoldResult step) {
+ Range normalizedLoopBounds;
+ normalizedLoopBounds.offset = rewriter.getIndexAttr(0);
+ normalizedLoopBounds.stride = rewriter.getIndexAttr(1);
+ AffineExpr s0, s1, s2;
+ bindSymbols(rewriter.getContext(), s0, s1, s2);
+ AffineExpr e = (s1 - s0).ceilDiv(s2);
+ normalizedLoopBounds.size =
+ affine::makeComposedFoldedAffineApply(rewriter, loc, e, {lb, ub, step});
+ return normalizedLoopBounds;
+}
+
Range mlir::emitNormalizedLoopBounds(RewriterBase &rewriter, Location loc,
OpFoldResult lb, OpFoldResult ub,
OpFoldResult step) {
+ if (getType(lb) == rewriter.getIndexType()) {
+ return emitNormalizedLoopBoundsForIndexType(rewriter, loc, lb, ub, step);
+ }
// For non-index types, generate `arith` instructions
// Check if the loop is already known to have a constant zero lower bound or
// a constant one step.
@@ -714,9 +732,35 @@ Range mlir::emitNormalizedLoopBounds(RewriterBase &rewriter, Location loc,
return {newLowerBound, newUpperBound, newStep};
}
+static void denormalizeInductionVariableForIndexType(RewriterBase &rewriter,
+ Location loc,
+ Value normalizedIv,
+ OpFoldResult origLb,
+ OpFoldResult origStep) {
+ AffineExpr d0, s0, s1;
+ bindSymbols(rewriter.getContext(), s0, s1);
+ bindDims(rewriter.getContext(), d0);
+ AffineExpr e = d0 * s1 + s0;
+ OpFoldResult denormalizedIv = affine::makeComposedFoldedAffineApply(
+ rewriter, loc, e, ArrayRef<OpFoldResult>{normalizedIv, origLb, origStep});
+ Value denormalizedIvVal =
+ getValueOrCreateConstantIndexOp(rewriter, loc, denormalizedIv);
+ SmallPtrSet<Operation *, 1> preservedUses;
+ if (!isConstantIntValue(origLb, 0) || !isConstantIntValue(origStep, 1)) {
+ if (Operation *preservedUse = denormalizedIvVal.getDefiningOp()) {
+ preservedUses.insert(preservedUse);
+ }
+ }
+ rewriter.replaceAllUsesExcept(normalizedIv, denormalizedIvVal, preservedUses);
+}
+
void mlir::denormalizeInductionVariable(RewriterBase &rewriter, Location loc,
Value normalizedIv, OpFoldResult origLb,
OpFoldResult origStep) {
+ if (getType(origLb) == rewriter.getIndexType()) {
+ return denormalizeInductionVariableForIndexType(rewriter, loc, normalizedIv,
+ origLb, origStep);
+ }
Value denormalizedIv;
SmallPtrSet<Operation *, 2> preserve;
bool isStepOne = isConstantIntValue(origStep, 1);
@@ -739,10 +783,29 @@ void mlir::denormalizeInductionVariable(RewriterBase &rewriter, Location loc,
rewriter.replaceAllUsesExcept(normalizedIv, denormalizedIv, preserve);
}
+static OpFoldResult getProductOfIndexes(RewriterBase &rewriter, Location loc,
+ ArrayRef<OpFoldResult> values) {
+ assert(!values.empty() && "unexecpted empty array");
+ AffineExpr s0, s1;
+ bindSymbols(rewriter.getContext(), s0, s1);
+ AffineExpr mul = s0 * s1;
+ OpFoldResult products = rewriter.getIndexAttr(1);
+ for (auto v : values) {
+ products = affine::makeComposedFoldedAffineApply(
+ rewriter, loc, mul, ArrayRef<OpFoldResult>{products, v});
+ }
+ return products;
+}
+
/// Helper function to multiply a sequence of values.
static Value getProductOfIntsOrIndexes(RewriterBase &rewriter, Location loc,
ArrayRef<Value> values) {
assert(!values.empty() && "unexpected empty list");
+ if (getType(values.front()) == rewriter.getIndexType()) {
+ SmallVector<OpFoldResult> ofrs = getAsOpFoldResult(values);
+ OpFoldResult product = getProductOfIndexes(rewriter, loc, ofrs);
+ return getValueOrCreateConstantIndexOp(rewriter, loc, product);
+ }
std::optional<Value> productOf;
for (auto v : values) {
auto vOne = getConstantIntValue(v);
@@ -757,7 +820,7 @@ static Value getProductOfIntsOrIndexes(RewriterBase &rewriter, Location loc,
if (!productOf) {
productOf = rewriter
.create<arith::ConstantOp>(
- loc, rewriter.getOneAttr(values.front().getType()))
+ loc, rewriter.getOneAttr(getType(values.front())))
.getResult();
}
return productOf.value();
@@ -774,6 +837,16 @@ static Value getProductOfIntsOrIndexes(RewriterBase &rewriter, Location loc,
static std::pair<SmallVector<Value>, SmallPtrSet<Operation *, 2>>
delinearizeInductionVariable(RewriterBase &rewriter, Location loc,
Value linearizedIv, ArrayRef<Value> ubs) {
+
+ if (linearizedIv.getType() == rewriter.getIndexType()) {
+ Operation *delinearizedOp =
+ rewriter.create<affine::AffineDelinearizeIndexOp>(loc, linearizedIv,
+ ubs);
+ auto resultVals = llvm::map_to_vector(
+ delinearizedOp->getResults(), [](OpResult r) -> Value { return r; });
+ return {resultVals, SmallPtrSet<Operation *, 2>{delinearizedOp}};
+ }
+
SmallVector<Value> delinearizedIvs(ubs.size());
SmallPtrSet<Operation *, 2> preservedUsers;
diff --git a/mlir/test/Dialect/Affine/loop-coalescing.mlir b/mlir/test/Dialect/Affine/loop-coalescing.mlir
index 45dd299295f640..f6e7b21bc66aba 100644
--- a/mlir/test/Dialect/Affine/loop-coalescing.mlir
+++ b/mlir/test/Dialect/Affine/loop-coalescing.mlir
@@ -1,14 +1,15 @@
-// RUN: mlir-opt -split-input-file -allow-unregistered-dialect -affine-loop-coalescing --cse %s | FileCheck %s
+// RUN: mlir-opt -split-input-file -allow-unregistered-dialect -affine-loop-coalescing --cse --mlir-print-local-scope %s | FileCheck %s
// CHECK-LABEL: @one_3d_nest
func.func @one_3d_nest() {
// Capture original bounds. Note that for zero-based step-one loops, the
// upper bound is also the number of iterations.
- // CHECK: %[[orig_lb:.*]] = arith.constant 0
- // CHECK: %[[orig_step:.*]] = arith.constant 1
- // CHECK: %[[orig_ub_k:.*]] = arith.constant 3
- // CHECK: %[[orig_ub_i:.*]] = arith.constant 42
- // CHECK: %[[orig_ub_j:.*]] = arith.constant 56
+ // CHECK-DAG: %[[orig_lb:.*]] = arith.constant 0
+ // CHECK-DAG: %[[orig_step:.*]] = arith.constant 1
+ // CHECK-DAG: %[[orig_ub_k:.*]] = arith.constant 3
+ // CHECK-DAG: %[[orig_ub_i:.*]] = arith.constant 42
+ // CHECK-DAG: %[[orig_ub_j:.*]] = arith.constant 56
+ // CHECK-DAG: %[[range:.*]] = arith.constant 7056
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%c2 = arith.constant 2 : index
@@ -16,9 +17,6 @@ func.func @one_3d_nest() {
%c42 = arith.constant 42 : index
%c56 = arith.constant 56 : index
// The range of the new scf.
- // CHECK: %[[partial_range:.*]] = arith.muli %[[orig_ub_i]], %[[orig_ub_j]]
- // CHECK-NEXT:%[[range:.*]] = arith.muli %[[partial_range]], %[[orig_ub_k]]
-
// Updated loop bounds.
// CHECK: scf.for %[[i:.*]] = %[[orig_lb]] to %[[range]] step %[[orig_step]]
scf.for %i = %c0 to %c42 step %c1 {
@@ -26,13 +24,11 @@ func.func @one_3d_nest() {
// CHECK-NOT: scf.for
// Reconstruct original IVs from the linearized one.
- // CHECK: %[[orig_k:.*]] = arith.remsi %[[i]], %[[orig_ub_k]]
- // CHECK: %[[div:.*]] = arith.divsi %[[i]], %[[orig_ub_k]]
- // CHECK: %[[orig_j:.*]] = arith.remsi %[[div]], %[[orig_ub_j]]
- // CHECK: %[[orig_i:.*]] = arith.divsi %[[div]], %[[orig_ub_j]]
+ // CHECK: %[[delinearize:.+]]:3 = affine.delinearize_index %[[i]]
+ // CHECK-SAME: into (%[[orig_ub_i]], %[[orig_ub_j]], %[[orig_ub_k]])
scf.for %j = %c0 to %c56 step %c1 {
scf.for %k = %c0 to %c3 step %c1 {
- // CHECK: "use"(%[[orig_i]], %[[orig_j]], %[[orig_k]])
+ // CHECK: "use"(%[[delinearize]]#0, %[[delinearize]]#1, %[[delinearize]]#2)
"use"(%i, %j, %k) : (index, index, index) -> ()
}
}
@@ -40,6 +36,8 @@ func.func @one_3d_nest() {
return
}
+// -----
+
// Check that there is no chasing the replacement of value uses by ensuring
// multiple uses of loop induction variables get rewritten to the same values.
@@ -52,13 +50,10 @@ func.func @multi_use() {
scf.for %i = %c1 to %c10 step %c1 {
scf.for %j = %c1 to %c10 step %c1 {
scf.for %k = %c1 to %c10 step %c1 {
- // CHECK: %[[k_unshifted:.*]] = arith.remsi %[[iv]], %[[k_extent:.*]]
- // CHECK: %[[ij:.*]] = arith.divsi %[[iv]], %[[k_extent]]
- // CHECK: %[[j_unshifted:.*]] = arith.remsi %[[ij]], %[[j_extent:.*]]
- // CHECK: %[[i_unshifted:.*]] = arith.divsi %[[ij]], %[[j_extent]]
- // CHECK: %[[k:.*]] = arith.addi %[[k_unshifted]]
- // CHECK: %[[j:.*]] = arith.addi %[[j_unshifted]]
- // CHECK: %[[i:.*]] = arith.addi %[[i_unshifted]]
+ // CHECK: %[[delinearize:.+]]:3 = affine.delinearize_index %[[iv]]
+ // CHECK: %[[k:.*]] = affine.apply affine_map<(d0) -> (d0 + 1)>(%[[delinearize]]#2)
+ // CHECK: %[[j:.*]] = affine.apply affine_map<(d0) -> (d0 + 1)>(%[[delinearize]]#1)
+ // CHECK: %[[i:.*]] = affine.apply affine_map<(d0) -> (d0 + 1)>(%[[delinearize]]#0)
// CHECK: "use1"(%[[i]], %[[j]], %[[k]])
"use1"(%i,%j,%k) : (index,index,index) -> ()
@@ -72,12 +67,20 @@ func.func @multi_use() {
return
}
+// -----
+
func.func @unnormalized_loops() {
- // CHECK: %[[orig_step_i:.*]] = arith.constant 2
+ // Normalized lower bound and step for the outer scf.
+ // CHECK-DAG: %[[lb_i:.*]] = arith.constant 0
+ // CHECK-DAG: %[[step_i:.*]] = arith.constant 1
+ // CHECK-DAG: %[[orig_step_j_and_numiter_i:.*]] = arith.constant 3
+
+ // Number of iterations in the inner loop, the pattern is the same as above,
+ // only capture the final result.
+ // CHECK-DAG: %[[numiter_j:.*]] = arith.constant 4
+
+ // CHECK-DAG: %[[range:.*]] = arith.constant 12
- // CHECK: %[[orig_step_j_and_numiter_i:.*]] = arith.constant 3
- // CHECK: %[[orig_lb_i:.*]] = arith.constant 5
- // CHECK: %[[orig_lb_j:.*]] = arith.constant 7
%c2 = arith.constant 2 : index
%c3 = arith.constant 3 : index
%c5 = arith.constant 5 : index
@@ -85,28 +88,18 @@ func.func @unnormalized_loops() {
%c10 = arith.constant 10 : index
%c17 = arith.constant 17 : index
- // Normalized lower bound and step for the outer scf.
- // CHECK: %[[lb_i:.*]] = arith.constant 0
- // CHECK: %[[step_i:.*]] = arith.constant 1
-
- // Number of iterations in the inner loop, the pattern is the same as above,
- // only capture the final result.
- // CHECK: %[[numiter_j:.*]] = arith.constant 4
// New bounds of the outer scf.
- // CHECK: %[[range:.*]] = arith.muli %[[orig_step_j_and_numiter_i:.*]], %[[numiter_j]]
// CHECK: scf.for %[[i:.*]] = %[[lb_i]] to %[[range]] step %[[step_i]]
scf.for %i = %c5 to %c10 step %c2 {
// The inner loop has been removed.
// CHECK-NOT: scf.for
scf.for %j = %c7 to %c17 step %c3 {
// The IVs are rewritten.
- // CHECK: %[[normalized_j:.*]] = arith.remsi %[[i]], %[[numiter_j]]
- // CHECK: %[[normalized_i:.*]] = arith.divsi %[[i]], %[[numiter_j]]
- // CHECK: %[[scaled_j:.*]] = arith.muli %[[normalized_j]], %[[orig_step_j_and_numiter_i]]
- // CHECK: %[[orig_j:.*]] = arith.addi %[[scaled_j]], %[[orig_lb_j]]
- // CHECK: %[[scaled_i:.*]] = arith.muli %[[normalized_i]], %[[orig_step_i]]
- // CHECK: %[[orig_i:.*]] = arith.addi %[[scaled_i]], %[[orig_lb_i]]
+ // CHECK: %[[delinearize:.+]]:2 = affine.delinearize_index %[[i]]
+ // CHECK-SAME: into (%[[orig_step_j_and_numiter_i]], %[[numiter_j]])
+ // CHECK: %[[orig_j:.*]] = affine.apply affine_map<(d0) -> (d0 * 3 + 7)>(%[[delinearize]]#1)
+ // CHECK: %[[orig_i:.*]] = affine.apply affine_map<(d0) -> (d0 * 2 + 5)>(%[[delinearize]]#0)
// CHECK: "use"(%[[orig_i]], %[[orig_j]])
"use"(%i, %j) : (index, index) -> ()
}
@@ -114,20 +107,21 @@ func.func @unnormalized_loops() {
return
}
+// -----
+
func.func @noramalized_loops_with_yielded_iter_args() {
- // CHECK: %[[orig_lb:.*]] = arith.constant 0
- // CHECK: %[[orig_step:.*]] = arith.constant 1
- // CHECK: %[[orig_ub_k:.*]] = arith.constant 3
- // CHECK: %[[orig_ub_i:.*]] = arith.constant 42
- // CHECK: %[[orig_ub_j:.*]] = arith.constant 56
+ // CHECK-DAG: %[[orig_lb:.*]] = arith.constant 0
+ // CHECK-DAG: %[[orig_ub_i:.*]] = arith.constant 42
+ // CHECK-DAG: %[[orig_step:.*]] = arith.constant 1
+ // CHECK-DAG: %[[orig_ub_j:.*]] = arith.constant 56
+ // CHECK-DAG: %[[orig_ub_k:.*]] = arith.constant 3
+ // CHECK-DAG: %[[range:.*]] = arith.constant 7056
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%c3 = arith.constant 3 : index
%c42 = arith.constant 42 : index
%c56 = arith.constant 56 : index
// The range of the new scf.
- // CHECK: %[[partial_range:.*]] = arith.muli %[[orig_ub_i]], %[[orig_ub_j]]
- // CHECK-NEXT:%[[range:.*]] = arith.muli %[[partial_range]], %[[orig_ub_k]]
// Updated loop bounds.
// CHECK: scf.for %[[i:.*]] = %[[orig_lb]] to %[[range]] step %[[orig_step]] iter_args(%[[VAL_1:.*]] = %[[orig_lb]]) -> (index) {
@@ -136,13 +130,10 @@ func.func @noramalized_loops_with_yielded_iter_args() {
// CHECK-NOT: scf.for
// Reconstruct original IVs from the linearized one.
- // CHECK: %[[orig_k:.*]] = arith.remsi %[[i]], %[[orig_ub_k]]
- // CHECK: %[[div:.*]] = arith.divsi %[[i]], %[[orig_ub_k]]
- // CHECK: %[[orig_j:.*]] = arith.remsi %[[div]], %[[orig_ub_j]]
- // CHECK: %[[orig_i:.*]] = arith.divsi %[[div]], %[[orig_ub_j]]
+ // CHECK: %[[delinearize:.+]]:3 = affine.delinearize_index %[[i]] into (%[[orig_ub_i]], %[[orig_ub_j]], %[[orig_ub_k]])
%1:1 = scf.for %j = %c0 to %c56 step %c1 iter_args(%arg1 = %arg0) -> (index){
%0:1 = scf.for %k = %c0 to %c3 step %c1 iter_args(%arg2 = %arg1) -> (index) {
- // CHECK: "use"(%[[orig_i]], %[[orig_j]], %[[orig_k]])
+ // CHECK: "use"(%[[delinearize]]#0, %[[delinearize]]#1, %[[delinearize]]#2)
"use"(%i, %j, %k) : (index, index, index) -> ()
// CHECK: scf.yield %[[VAL_1]] : index
scf.yield %arg2 : index
@@ -154,20 +145,21 @@ func.func @noramalized_loops_with_yielded_iter_args() {
return
}
+// -----
+
func.func @noramalized_loops_with_shuffled_yielded_iter_args() {
- // CHECK: %[[orig_lb:.*]] = arith.constant 0
- // CHECK: %[[orig_step:.*]] = arith.constant 1
- // CHECK: %[[orig_ub_k:.*]] = arith.constant 3
- // CHECK: %[[orig_ub_i:.*]] = arith.constant 42
- // CHECK: %[[orig_ub_j:.*]] = arith.constant 56
+ // CHECK-DAG: %[[orig_lb:.*]] = arith.constant 0
+ // CHECK-DAG: %[[orig_step:.*]] = arith.constant 1
+ // CHECK-DAG: %[[orig_ub_k:.*]] = arith.constant 3
+ // CHECK-DAG: %[[orig_ub_i:.*]] = arith.constant 42
+ // CHECK-DAG: %[[orig_ub_j:.*]] = arith.constant 56
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%c3 = arith.constant 3 : index
%c42 = arith.constant 42 : index
%c56 = arith.constant 56 : index
// The range of the new scf.
- // CHECK: %[[partial_range:.*]] = arith.muli %[[orig_ub_i]], %[[orig_ub_j]]
- // CHECK-NEXT:%[[range:.*]] = arith.muli %[[partial_range]], %[[orig_ub_k]]
+ // CHECK-DAG:%[[range:.*]] = arith.constant 7056
// Updated loop bounds.
// CHECK: scf.for %[[i:.*]] = %[[orig_lb]] to %[[range]] step %[[orig_step]] iter_args(%[[VAL_1:.*]] = %[[orig_lb]], %[[VAL_2:.*]] = %[[orig_lb]]) -> (index, index) {
@@ -176,13 +168,11 @@ func.func @noramalized_loops_with_shuffled_yielded_iter_args() {
// CHECK-NOT: scf.for
// Reconstruct original IVs from the linearized one.
- // CHECK: %[[orig_k:.*]] = arith.remsi %[[i]], %[[orig_ub_k]]
- // CHECK: %[[div:.*]] = arith.divsi %[[i]], %[[orig_ub_k]]
- // CHECK: %[[orig_j:.*]] = arith.remsi %[[div]], %[[orig_ub_j]]
- // CHECK: %[[orig_i:.*]] = arith.divsi %[[div]], %[[orig_ub_j]]
+ // CHECK: %[[delinearize:.+]]:3 = affine.delinearize_index %[[i]]
+ // CHECK-SAME: into (%[[orig_ub_i]], %[[orig_ub_j]], %[[orig_ub_k]])
%1:2 = scf.for %j = %c0 to %c56 step %c1 iter_args(%arg2 = %arg0, %arg3 = %arg1) -> (index, index){
%0:2 = scf.for %k = %c0 to %c3 step %c1 iter_args(%arg4 = %arg2, %arg5 = %arg3) -> (index, index) {
- // CHECK: "use"(%[[orig_i]], %[[orig_j]], %[[orig_k]])
+ // CHECK: "use"(%[[delinearize]]#0, %[[delinearize]]#1, %[[delinearize]]#2)
"use"(%i, %j, %k) : (index, index, index) -> ()
// CHECK: scf.yield %[[VAL_2]], %[[VAL_1]] : index, index
scf.yield %arg5, %arg4 : index, index
@@ -194,20 +184,21 @@ func.func @noramalized_loops_with_shuffled_yielded_iter_args() {
return
}
+// -----
+
func.func @noramalized_loops_with_yielded_non_iter_args() {
- // CHECK: %[[orig_lb:.*]] = arith.constant 0
- // CHECK: %[[orig_step:.*]] = arith.constant 1
- // CHECK: %[[orig_ub_k:.*]] = arith.constant 3
- // CHECK: %[[orig_ub_i:.*]] = arith.constant 42
- // CHECK: %[[orig_ub_j:.*]] = arith.constant 56
+ // CHECK-DAG: %[[orig_lb:.*]] = arith.constant 0
+ // CHECK-DAG: %[[orig_step:.*]] = arith.constant 1
+ // CHECK-DAG: %[[orig_ub_k:.*]] = arith.constant 3
+ // CHECK-DAG: %[[orig_ub_i:.*]] = arith.constant 42
+ // CHECK-DAG: %[[orig_ub_j:.*]] = arith.constant 56
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%c3 = arith.constant 3 : index
%c42 = arith.constant 42 : index
%c56 = arith.constant 56 : index
// The range of the new scf.
- // CHECK: %[[partial_range:.*]] = arith.muli %[[orig_ub_i]], %[[orig_ub_j]]
- // CHECK-NEXT:%[[range:.*]] = arith.muli %[[partial_range]], %[[orig_ub_k]]
+ // CHECK-DAG: %[[range:.*]] = arith.constant 7056
// Updated loop bounds.
// CHECK: scf.for %[[i:.*]] = %[[orig_lb]] to %[[range]] step %[[orig_step]] iter_args(%[[VAL_1:.*]] = %[[orig_lb]]) -> (index) {
@@ -216,13 +207,11 @@ func.func @noramalized_loops_with_yielded_non_iter_args() {
// CHECK-NOT: scf.for
// Reconstruct original IVs from the linearized one.
- // CHECK: %[[orig_k:.*]] = arith.remsi %[[i]], %[[orig_ub_k]]
- // CHECK: %[[div:.*]] = arith.divsi %[[i]], %[[orig_ub_k]]
- // CHECK: %[[orig_j:.*]] = arith.remsi %[[div]], %[[orig_ub_j]]
- // CHECK: %[[orig_i:.*]] = arith.divsi %[[div]], %[[orig_ub_j]]
+ // CHECK: %[[delinearize:.+]]:3 = affine.delinearize_index %[[i]]
+ // CHECK-SAME: into (%[[orig_ub_i]], %[[orig_ub_j]], %[[orig_ub_k]])
%1:1 = scf.for %j = %c0 to %c56 step %c1 iter_args(%arg1 = %arg0) -> (index){
%0:1 = scf.for %k = %c0 to %c3 step %c1 iter_args(%arg2 = %arg1) -> (index) {
- // CHECK: %[[res:.*]] = "use"(%[[orig_i]], %[[orig_j]], %[[orig_k]])
+ // CHECK: %[[res:.*]] = "use"(%[[delinearize]]#0, %[[delinearize]]#1, %[[delinearize]]#2)
%res = "use"(%i, %j, %k) : (index, index, index) -> (index)
// CHECK: scf.yield %[[res]] : index
scf.yield %res : index
@@ -234,6 +223,8 @@ func.func @noramalized_loops_with_yielded_non_iter_args() {
return
}
+// -----
+
// Check with parametric loop bounds and steps, capture the bounds here.
// CHECK-LABEL: @parametric
// CHECK-SAME: %[[orig_lb1:[A-Za-z0-9]+]]:
@@ -246,25 +237,28 @@ func.func @parametric(%lb1 : index, %ub1 : index, %step1 : index,
%lb2 : index, %ub2 : index, %step2 : index) {
// Compute the number of iterations for each of the loops and the total
// number of iterations.
- // CHECK: %[[range1:.*]] = arith.subi %[[orig_ub1]], %[[orig_lb1]]
- // CHECK: %[[numiter1:.*]] = arith.ceildivsi %[[range1]], %[[orig_step1]]
- // CHECK: %[[range2:.*]] = arith.subi %[[orig_ub2]], %[[orig_lb2]]
- // CHECK: %[[numiter2:.*]] = arith.ceildivsi %[[range2]], %[[orig_step2]]
- // CHECK: %[[range:.*]] = arith.muli %[[numiter1]], %[[numiter2]] : index
+ // CHECK: %[[normalized_i:.*]] = affine.apply
+ // CHECK-SAME: affine_map<()[s0, s1, s2] -> ((-s0 + s1) ceildiv s2)>()[%[[orig_lb1]], %[[orig_ub1]], %[[orig_step1]]]
+ // CHECK: %[[c0:.+]] = arith.constant 0
+ // CHECK: %[[c1:.+]] = arith.constant 1
+ // CHECK: %[[normalized_j:.*]] = affine.apply
+ // CHECK-SAME: affine_map<()[s0, s1, s2] -> ((-s0 + s1) ceildiv s2)>()[%[[orig_lb2]], %[[orig_ub2]], %[[orig_step2]]]
+ // CHECK: %[[range:.+]] = affine.apply
+ // CHECK-SAME: affine_map<()[s0, s1, s2, s3, s4, s5] -> (((-s0 + s1) ceildiv s2) * ((-s3 + s4) ceildiv s5))>()
+ // CHECK-SAME: [%[[orig_lb1]], %[[orig_ub1]], %[[orig_step1]], %[[orig_lb2]], %[[orig_ub2]], %[[orig_step2]]]
// Check that the outer loop is updated.
- // CHECK: scf.for %[[i:.*]] = %c0{{.*}} to %[[range]] step %c1
+ // CHECK: scf.for %[[i:.*]] = %[[c0]] to %[[range]] step %[[c1]]
scf.for %i = %lb1 to %ub1 step %step1 {
// Check that the inner loop is removed.
// CHECK-NOT: scf.for
scf.for %j = %lb2 to %ub2 step %step2 {
// Remapping of the induction variables.
- // CHECK: %[[normalized_j:.*]] = arith.remsi %[[i]], %[[numiter2]] : index
- // CHECK: %[[normalized_i:.*]] = arith.divsi %[[i]], %[[numiter2]] : index
- // CHECK: %[[scaled_j:.*]] = arith.muli %[[normalized_j]], %[[orig_step2]]
- // CHECK: %[[orig_j:.*]] = arith.addi %[[scaled_j]], %[[orig_lb2]]
- // CHECK: %[[scaled_i:.*]] = arith.muli %[[normalized_i]], %[[orig_step1]]
- // CHECK: %[[orig_i:.*]] = arith.addi %[[scaled_i]], %[[orig_lb1]]
+ // CHECK: %[[delinearize:.+]]:2 = affine.delinearize_index %[[i]] into (%[[normalized_i]], %[[normalized_j]])
+ // CHECK: %[[orig_j:.*]] = affine.apply affine_map<(d0)[s0, s1] -> (d0 * s1 + s0)>
+ // CHECK-SAME: (%[[delinearize]]#1)[%[[orig_lb2]], %[[orig_step2]]]
+ // CHECK: %[[orig_i:.*]] = affine.apply affine_map<(d0)[s0, s1] -> (d0 * s1 + s0)>
+ // CHECK-SAME: (%[[delinearize]]#0)[%[[orig_lb1]], %[[orig_step1]]]
// CHECK: "foo"(%[[orig_i]], %[[orig_j]])
"foo"(%i, %j) : (index, index) -> ()
@@ -273,19 +267,21 @@ func.func @parametric(%lb1 : index, %ub1 : index, %step1 : index,
return
}
+// -----
+
// CHECK-LABEL: @two_bands
func.func @two_bands() {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%c10 = arith.constant 10 : index
- // CHECK: %[[outer_range:.*]] = arith.muli
+ // CHECK: %[[outer_range:.*]] = arith.constant 100
// CHECK: scf.for %{{.*}} = %{{.*}} to %[[outer_range]]
scf.for %i = %c0 to %c10 step %c1 {
// Check that the "j" loop was removed and that the inner loops were
// coalesced as well. The preparation step for coalescing will inject the
// subtraction operation unlike the IV remapping.
// CHECK-NOT: scf.for
- // CHECK: arith.subi
+ // CHECK: affine.delinearize_index
scf.for %j = %c0 to %c10 step %c1 {
// The inner pair of loops is coalesced separately.
// CHECK: scf.for
@@ -303,12 +299,6 @@ func.func @two_bands() {
// -----
// Check coalescing of affine.for loops when all the loops have constant upper bound.
-// CHECK-DAG: #[[SIXTEEN:.*]] = affine_map<() -> (16)>
-// CHECK-DAG: #[[SIXTY_FOUR:.*]] = affine_map<() -> (64)>
-// CHECK-DAG: #[[PRODUCT:.*]] = affine_map<(d0)[s0] -> (d0 * s0)>
-// CHECK-DAG: #[[EIGHT:.*]] = affine_map<() -> (8)>
-// CHECK-DAG: #[[MOD:.*]] = affine_map<(d0)[s0] -> (d0 mod s0)>
-// CHECK-DAG: #[[DIV:.*]] = affine_map<(d0)[s0] -> (d0 floordiv s0)>
func.func @coalesce_affine_for() {
affine.for %i = 0 to 16 {
affine.for %j = 0 to 64 {
@@ -319,16 +309,16 @@ func.func @coalesce_affine_for() {
}
return
}
-// CHECK-DAG: %[[T0:.*]] = affine.apply #[[SIXTEEN]]()
-// CHECK-DAG: %[[T1:.*]] = affine.apply #[[SIXTY_FOUR]]()
-// CHECK-DAG: %[[T2:.*]] = affine.apply #[[PRODUCT]](%[[T0]])[%[[T1]]]
-// CHECK-DAG: %[[T3:.*]] = affine.apply #[[EIGHT]]()
-// CHECK-DAG: %[[T4:.*]] = affine.apply #[[PRODUCT]](%[[T2]])[%[[T3]]]
+// CHECK-DAG: %[[T0:.*]] = affine.apply affine_map<() -> (16)>()
+// CHECK-DAG: %[[T1:.*]] = affine.apply affine_map<() -> (64)>()
+// CHECK-DAG: %[[T2:.*]] = affine.apply affine_map<(d0)[s0] -> (d0 * s0)>(%[[T0]])[%[[T1]]]
+// CHECK-DAG: %[[T3:.*]] = affine.apply affine_map<() -> (8)>()
+// CHECK-DAG: %[[T4:.*]] = affine.apply affine_map<(d0)[s0] -> (d0 * s0)>(%[[T2]])[%[[T3]]]
// CHECK: affine.for %[[IV:.*]] = 0 to %[[T4]]
-// CHECK-DAG: %[[K:.*]] = affine.apply #[[MOD]](%[[IV]])[%[[T3]]]
-// CHECK-DAG: %[[T6:.*]] = affine.apply #[[DIV]](%[[IV]])[%[[T3]]]
-// CHECK-DAG: %[[J:.*]] = affine.apply #[[MOD]](%[[T6]])[%[[T1]]]
-// CHECK-DAG: %[[I:.*]] = affine.apply #[[DIV]](%[[T6]])[%[[T1]]]
+// CHECK-DAG: %[[K:.*]] = affine.apply affine_map<(d0)[s0] -> (d0 mod s0)>(%[[IV]])[%[[T3]]]
+// CHECK-DAG: %[[T6:.*]] = affine.apply affine_map<(d0)[s0] -> (d0 floordiv s0)>(%[[IV]])[%[[T3]]]
+// CHECK-DAG: %[[J:.*]] = affine.apply affine_map<(d0)[s0] -> (d0 mod s0)>(%[[T6]])[%[[T1]]]
+// CHECK-DAG: %[[I:.*]] = affine.apply affine_map<(d0)[s0] -> (d0 floordiv s0)>(%[[T6]])[%[[T1]]]
// CHECK-NEXT: "test.foo"(%[[I]], %[[J]], %[[K]])
// CHECK-NEXT: }
// CHECK-NEXT: return
@@ -336,10 +326,6 @@ func.func @coalesce_affine_for() {
// -----
// Check coalescing of affine.for loops when all the loops have non constant upper bounds.
-// CHECK-DAG: #[[IDENTITY:.*]] = affine_map<()[s0] -> (s0)>
-// CHECK-DAG: #[[PRODUCT:.*]] = affine_map<(d0)[s0] -> (d0 * s0)>
-// CHECK-DAG: #[[MOD:.*]] = affine_map<(d0)[s0] -> (d0 mod s0)>
-// CHECK-DAG: #[[FLOOR:.*]] = affine_map<(d0)[s0] -> (d0 floordiv s0)>
func.func @coalesce_affine_for(%arg0: memref<?x?xf32>) {
%c0 = arith.constant 0 : index
%M = memref.dim %arg0, %c0 : memref<?x?xf32>
@@ -355,14 +341,14 @@ func.func @coalesce_affine_for(%arg0: memref<?x?xf32>) {
return
}
// CHECK: %[[DIM:.*]] = memref.dim %arg{{.*}}, %c{{.*}} : memref<?x?xf32>
-// CHECK-DAG: %[[T0:.*]] = affine.apply #[[IDENTITY]]()[%[[DIM]]]
-// CHECK-DAG: %[[T1:.*]] = affine.apply #[[PRODUCT]](%[[T0]])[%[[T0]]]
-// CHECK-DAG: %[[T2:.*]] = affine.apply #[[PRODUCT]](%[[T1]])[%[[T0]]]
+// CHECK-DAG: %[[T0:.*]] = affine.apply affine_map<()[s0] -> (s0)>()[%[[DIM]]]
+// CHECK-DAG: %[[T1:.*]] = affine.apply affine_map<(d0)[s0] -> (d0 * s0)>(%[[T0]])[%[[T0]]]
+// CHECK-DAG: %[[T2:.*]] = affine.apply affine_map<(d0)[s0] -> (d0 * s0)>(%[[T1]])[%[[T0]]]
// CHECK: affine.for %[[IV:.*]] = 0 to %[[T2]]
-// CHECK-DAG: %[[K:.*]] = affine.apply #[[MOD]](%[[IV]])[%[[T0]]]
-// CHECK-DAG: %[[T9:.*]] = affine.apply #[[FLOOR]](%[[IV]])[%[[T0]]]
-// CHECK-DAG: %[[J:.*]] = affine.apply #[[MOD]](%[[T9]])[%[[T0]]]
-// CHECK-DAG: %[[I:.*]] = affine.apply #[[FLOOR]](%[[T9]])[%[[T0]]]
+// CHECK-DAG: %[[K:.*]] = affine.apply affine_map<(d0)[s0] -> (d0 mod s0)>(%[[IV]])[%[[T0]]]
+// CHECK-DAG: %[[T9:.*]] = affine.apply affine_map<(d0)[s0] -> (d0 floordiv s0)>(%[[IV]])[%[[T0]]]
+// CHECK-DAG: %[[J:.*]] = affine.apply affine_map<(d0)[s0] -> (d0 mod s0)>(%[[T9]])[%[[T0]]]
+// CHECK-DAG: %[[I:.*]] = affine.apply affine_map<(d0)[s0] -> (d0 floordiv s0)>(%[[T9]])[%[[T0]]]
// CHECK-NEXT: "test.foo"(%[[I]], %[[J]], %[[K]])
// CHECK-NEXT: }
// CHECK-NEXT: return
@@ -370,11 +356,6 @@ func.func @coalesce_affine_for(%arg0: memref<?x?xf32>) {
// -----
// Check coalescing of affine.for loops when some of the loop has constant upper bounds while others have nin constant upper bounds.
-// CHECK-DAG: #[[IDENTITY:.*]] = affine_map<()[s0] -> (s0)>
-// CHECK-DAG: #[[PRODUCT:.*]] = affine_map<(d0)[s0] -> (d0 * s0)>
-// CHECK-DAG: #[[SIXTY_FOUR:.*]] = affine_map<() -> (64)>
-// CHECK-DAG: #[[MOD:.*]] = affine_map<(d0)[s0] -> (d0 mod s0)>
-// CHECK-DAG: #[[DIV:.*]] = affine_map<(d0)[s0] -> (d0 floordiv s0)>
func.func @coalesce_affine_for(%arg0: memref<?x?xf32>) {
%c0 = arith.constant 0 : index
%M = memref.dim %arg0, %c0 : memref<?x?xf32>
@@ -389,15 +370,15 @@ func.func @coalesce_affine_for(%arg0: memref<?x?xf32>) {
return
}
// CHECK: %[[DIM:.*]] = memref.dim %arg{{.*}}, %c{{.*}} : memref<?x?xf32>
-// CHECK-DAG: %[[T0:.*]] = affine.apply #[[IDENTITY]]()[%[[DIM]]]
-// CHECK-DAG: %[[T1:.*]] = affine.apply #[[PRODUCT]](%[[T0]])[%[[T0]]]
-// CHECK-DAG: %[[T2:.*]] = affine.apply #[[SIXTY_FOUR]]()
-// CHECK-DAG: %[[T3:.*]] = affine.apply #[[PRODUCT]](%[[T1]])[%[[T2]]]
+// CHECK-DAG: %[[T0:.*]] = affine.apply affine_map<()[s0] -> (s0)>()[%[[DIM]]]
+// CHECK-DAG: %[[T1:.*]] = affine.apply affine_map<(d0)[s0] -> (d0 * s0)>(%[[T0]])[%[[T0]]]
+// CHECK-DAG: %[[T2:.*]] = affine.apply affine_map<() -> (64)>()
+// CHECK-DAG: %[[T3:.*]] = affine.apply affine_map<(d0)[s0] -> (d0 * s0)>(%[[T1]])[%[[T2]]]
// CHECK: affine.for %[[IV:.*]] = 0 to %[[T3]]
-// CHECK-DAG: %[[K:.*]] = affine.apply #[[MOD]](%[[IV]])[%[[T2]]]
-// CHECK-DAG: %[[T5:.*]] = affine.apply #[[DIV]](%[[IV]])[%[[T2]]]
-// CHECK-DAG: %[[J:.*]] = affine.apply #[[MOD]](%[[T5]])[%[[T0]]]
-// CHECK-DAG: %[[I:.*]] = affine.apply #[[DIV]](%[[T5]])[%[[T0]]]
+// CHECK-DAG: %[[K:.*]] = affine.apply affine_map<(d0)[s0] -> (d0 mod s0)>(%[[IV]])[%[[T2]]]
+// CHECK-DAG: %[[T5:.*]] = affine.apply affine_map<(d0)[s0] -> (d0 floordiv s0)>(%[[IV]])[%[[T2]]]
+// CHECK-DAG: %[[J:.*]] = affine.apply affine_map<(d0)[s0] -> (d0 mod s0)>(%[[T5]])[%[[T0]]]
+// CHECK-DAG: %[[I:.*]] = affine.apply affine_map<(d0)[s0] -> (d0 floordiv s0)>(%[[T5]])[%[[T0]]]
// CHECK-NEXT: "test.foo"(%[[I]], %[[J]], %[[K]])
// CHECK-NEXT: }
// CHECK-NEXT: return
@@ -405,11 +386,6 @@ func.func @coalesce_affine_for(%arg0: memref<?x?xf32>) {
// -----
// Check coalescing of affine.for loops when upper bound contains multi result upper bound map.
-// CHECK-DAG: #[[MAP0:.*]] = affine_map<()[s0] -> (s0, -s0)>
-// CHECK-DAG: #[[IDENTITY:.*]] = affine_map<()[s0] -> (s0)>
-// CHECK-DAG: #[[PRODUCT:.*]] = affine_map<(d0)[s0] -> (d0 * s0)>
-// CHECK-DAG: #[[MOD:.*]] = affine_map<(d0)[s0] -> (d0 mod s0)>
-// CHECK-DAG: #[[DIV:.*]] = affine_map<(d0)[s0] -> (d0 floordiv s0)>
#myMap = affine_map<()[s1] -> (s1, -s1)>
func.func @coalesce_affine_for(%arg0: memref<?x?xf32>) {
%c0 = arith.constant 0 : index
@@ -426,23 +402,21 @@ func.func @coalesce_affine_for(%arg0: memref<?x?xf32>) {
return
}
// CHECK: %[[DIM:.*]] = memref.dim %arg{{.*}}, %c{{.*}} : memref<?x?xf32>
-// CHECK-DAG: %[[T0:.*]] = affine.min #[[MAP0]]()[%[[DIM]]]
-// CHECK-DAG: %[[T1:.*]] = affine.apply #[[IDENTITY]]()[%[[DIM]]]
-// CHECK-DAG: %[[T2:.*]] = affine.apply #[[PRODUCT]](%[[T0]])[%[[T1]]]
-// CHECK-DAG: %[[T3:.*]] = affine.apply #[[PRODUCT]](%[[T2]])[%[[T1]]]
+// CHECK-DAG: %[[T0:.*]] = affine.min affine_map<()[s0] -> (s0, -s0)>()[%[[DIM]]]
+// CHECK-DAG: %[[T1:.*]] = affine.apply affine_map<()[s0] -> (s0)>()[%[[DIM]]]
+// CHECK-DAG: %[[T2:.*]] = affine.apply affine_map<(d0)[s0] -> (d0 * s0)>(%[[T0]])[%[[T1]]]
+// CHECK-DAG: %[[T3:.*]] = affine.apply affine_map<(d0)[s0] -> (d0 * s0)>(%[[T2]])[%[[T1]]]
// CHECK: affine.for %[[IV:.*]] = 0 to %[[T3]]
-// CHECK-DAG: %[[K:.*]] = affine.apply #[[MOD]](%[[IV]])[%[[T1]]]
-// CHECK-DAG: %[[T5:.*]] = affine.apply #[[DIV]](%[[IV]])[%[[T1]]]
-// CHECK-DAG: %[[J:.*]] = affine.apply #[[MOD]](%[[T5]])[%[[T1]]]
-// CHECK-DAG: %[[I:.*]] = affine.apply #[[DIV]](%[[T5]])[%[[T1]]]
+// CHECK-DAG: %[[K:.*]] = affine.apply affine_map<(d0)[s0] -> (d0 mod s0)>(%[[IV]])[%[[T1]]]
+// CHECK-DAG: %[[T5:.*]] = affine.apply affine_map<(d0)[s0] -> (d0 floordiv s0)>(%[[IV]])[%[[T1]]]
+// CHECK-DAG: %[[J:.*]] = affine.apply affine_map<(d0)[s0] -> (d0 mod s0)>(%[[T5]])[%[[T1]]]
+// CHECK-DAG: %[[I:.*]] = affine.apply affine_map<(d0)[s0] -> (d0 floordiv s0)>(%[[T5]])[%[[T1]]]
// CHECK-NEXT: "test.foo"(%[[I]], %[[J]], %[[K]])
// CHECK-NEXT: }
// CHECK-NEXT: return
// -----
-// CHECK-DAG: #[[MAP0:.*]] = affine_map<(d0) -> (d0 * 110)>
-// CHECK-DAG: #[[MAP1:.*]] = affine_map<(d0) -> (696, d0 * 110 + 110)>
#map0 = affine_map<(d0) -> (d0 * 110)>
#map1 = affine_map<(d0) -> (696, d0 * 110 + 110)>
func.func @test_loops_do_not_get_coalesced() {
@@ -454,7 +428,7 @@ func.func @test_loops_do_not_get_coalesced() {
return
}
// CHECK: affine.for %[[IV0:.*]] = 0 to 7
-// CHECK-NEXT: affine.for %[[IV1:.*]] = #[[MAP0]](%[[IV0]]) to min #[[MAP1]](%[[IV0]])
+// CHECK-NEXT: affine.for %[[IV1:.*]] = affine_map<(d0) -> (d0 * 110)>(%[[IV0]]) to min affine_map<(d0) -> (696, d0 * 110 + 110)>(%[[IV0]])
// CHECK-NEXT: "use"(%[[IV0]], %[[IV1]])
// CHECK-NEXT: }
// CHECK-NEXT: }
diff --git a/mlir/test/Dialect/SCF/transform-op-coalesce.mlir b/mlir/test/Dialect/SCF/transform-op-coalesce.mlir
index 6fcd727621bae9..1c405a47950fcc 100644
--- a/mlir/test/Dialect/SCF/transform-op-coalesce.mlir
+++ b/mlir/test/Dialect/SCF/transform-op-coalesce.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -transform-interpreter -split-input-file -verify-diagnostics -allow-unregistered-dialect --cse | FileCheck %s
+// RUN: mlir-opt %s -transform-interpreter -split-input-file -verify-diagnostics -allow-unregistered-dialect --cse --mlir-print-local-scope | FileCheck %s
func.func @coalesce_inner() {
%c0 = arith.constant 0 : index
@@ -33,19 +33,15 @@ module attributes {transform.with_named_sequence} {
// -----
-// CHECK-DAG: #[[MAP:.+]] = affine_map<() -> (64)>
-// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0)[s0] -> (d0 * s0)>
-// CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0)[s0] -> (d0 mod s0)>
-// CHECK-DAG: #[[MAP3:.+]] = affine_map<(d0)[s0] -> (d0 floordiv s0)>
func.func @coalesce_outer(%arg1: memref<64x64xf32, 1>, %arg2: memref<64x64xf32, 1>, %arg3: memref<64x64xf32, 1>) attributes {} {
- // CHECK: %[[T0:.+]] = affine.apply #[[MAP]]()
- // CHECK: %[[UB:.+]] = affine.apply #[[MAP1]](%[[T0]])[%[[T0]]]
+ // CHECK: %[[T0:.+]] = affine.apply affine_map<() -> (64)>()
+ // CHECK: %[[UB:.+]] = affine.apply affine_map<(d0)[s0] -> (d0 * s0)>(%[[T0]])[%[[T0]]]
// CHECK: affine.for %[[IV1:.+]] = 0 to %[[UB:.+]] {
// CHECK-NOT: affine.for %[[IV2:.+]]
affine.for %arg4 = 0 to 64 {
affine.for %arg5 = 0 to 64 {
- // CHECK: %[[IDX0:.+]] = affine.apply #[[MAP2]](%[[IV1]])[%{{.+}}]
- // CHECK: %[[IDX1:.+]] = affine.apply #[[MAP3]](%[[IV1]])[%{{.+}}]
+ // CHECK: %[[IDX0:.+]] = affine.apply affine_map<(d0)[s0] -> (d0 mod s0)>(%[[IV1]])[%{{.+}}]
+ // CHECK: %[[IDX1:.+]] = affine.apply affine_map<(d0)[s0] -> (d0 floordiv s0)>(%[[IV1]])[%{{.+}}]
// CHECK-NEXT: %{{.+}} = affine.load %{{.+}}[%[[IDX1]], %[[IDX0]]] : memref<64x64xf32, 1>
%0 = affine.load %arg1[%arg4, %arg5] : memref<64x64xf32, 1>
%1 = affine.load %arg2[%arg4, %arg5] : memref<64x64xf32, 1>
@@ -76,9 +72,8 @@ func.func @coalesce_and_unroll(%arg1: memref<64x64xf32, 1>, %arg2: memref<64x64x
scf.for %arg4 = %c0 to %c64 step %c1 {
// CHECK-NOT: scf.for
scf.for %arg5 = %c0 to %c64 step %c1 {
- // CHECK: %[[IDX0:.+]] = arith.remsi %[[IV1]]
- // CHECK: %[[IDX1:.+]] = arith.divsi %[[IV1]]
- // CHECK-NEXT: %{{.+}} = memref.load %{{.+}}[%[[IDX1]], %[[IDX0]]] : memref<64x64xf32, 1>
+ // CHECK: %[[IDX:.+]]:2 = affine.delinearize_index
+ // CHECK-NEXT: %{{.+}} = memref.load %{{.+}}[%[[IDX]]#0, %[[IDX]]#1] : memref<64x64xf32, 1>
%0 = memref.load %arg1[%arg4, %arg5] : memref<64x64xf32, 1>
%1 = memref.load %arg2[%arg4, %arg5] : memref<64x64xf32, 1>
%2 = arith.addf %0, %1 : f32
@@ -138,27 +133,22 @@ module attributes {transform.with_named_sequence} {
// CHECK-SAME: %[[LB2:[a-zA-Z0-9_]+]]: index
// CHECK-SAME: %[[UB2:[a-zA-Z0-9_]+]]: index
// CHECK-SAME: %[[STEP2:[a-zA-Z0-9_]+]]: index
-// CHECK: %[[NEWUB0_DIFF:.+]] = arith.subi %[[UB0]], %[[LB0]]
-// CHECK-DAG: %[[NEWUB0:.+]] = arith.ceildivsi %[[NEWUB0_DIFF]], %[[STEP0]]
-// CHECK-DAG: %[[C0:.+]] = arith.constant 0
-// CHECK-DAG: %[[C1:.+]] = arith.constant 1
-// CHECK: %[[NEWUB1_DIFF:.+]] = arith.subi %[[UB1]], %[[LB1]]
-// CHECK-DAG: %[[NEWUB1:.+]] = arith.ceildivsi %[[NEWUB1_DIFF]], %[[STEP1]]
-// CHECK: %[[NEWUB2_DIFF:.+]] = arith.subi %[[UB2]], %[[LB2]]
-// CHECK-DAG: %[[NEWUB2:.+]] = arith.ceildivsi %[[NEWUB2_DIFF]], %[[STEP2]]
-// CHECK: %[[PROD1:.+]] = arith.muli %[[NEWUB0]], %[[NEWUB1]]
-// CHECK: %[[NEWUB:.+]] = arith.muli %[[PROD1]], %[[NEWUB2]]
+// CHECK: %[[NITERS0:.+]] = affine.apply
+// CHECK-SAME: affine_map<()[s0, s1, s2] -> ((-s0 + s1) ceildiv s2)>()[%[[LB0]], %[[UB0]], %[[STEP0]]]
+// CHECK: %[[C0:.+]] = arith.constant 0 : index
+// CHECK: %[[C1:.+]] = arith.constant 1 : index
+// CHECK: %[[NITERS1:.+]] = affine.apply
+// CHECK-SAME: affine_map<()[s0, s1, s2] -> ((-s0 + s1) ceildiv s2)>()[%[[LB1]], %[[UB1]], %[[STEP1]]]
+// CHECK: %[[NITERS2:.+]] = affine.apply
+// CHECK-SAME: affine_map<()[s0, s1, s2] -> ((-s0 + s1) ceildiv s2)>()[%[[LB2]], %[[UB2]], %[[STEP2]]]
+// CHECK: %[[NEWUB:.+]] = affine.apply affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8] ->
+// CHECK-SAME: ((((-s0 + s1) ceildiv s2) * ((-s3 + s4) ceildiv s5)) * ((-s6 + s7) ceildiv s8))
+// CHECK-SAME: [%[[LB0]], %[[UB0]], %[[STEP0]], %[[LB1]], %[[UB1]], %[[STEP1]], %[[LB2]], %[[UB2]], %[[STEP2]]]
// CHECK: %[[RESULT:.+]] = scf.for %[[IV:[a-zA-Z0-9]+]] = %[[C0]] to %[[NEWUB]] step %[[C1]] iter_args(%[[ITER_ARG:.+]] = %[[ARG0]])
-// CHECK: %[[IV2:.+]] = arith.remsi %[[IV]], %[[NEWUB2]]
-// CHECK: %[[PREVIOUS:.+]] = arith.divsi %[[IV]], %[[NEWUB2]]
-// CHECK: %[[IV1:.+]] = arith.remsi %[[PREVIOUS]], %[[NEWUB1]]
-// CHECK: %[[IV0:.+]] = arith.divsi %[[PREVIOUS]], %[[NEWUB1]]
-// CHECK: %[[K_STEP:.+]] = arith.muli %[[IV2]], %[[STEP2]]
-// CHECK: %[[K:.+]] = arith.addi %[[K_STEP]], %[[LB2]]
-// CHECK: %[[J_STEP:.+]] = arith.muli %[[IV1]], %[[STEP1]]
-// CHECK: %[[J:.+]] = arith.addi %[[J_STEP]], %[[LB1]]
-// CHECK: %[[I_STEP:.+]] = arith.muli %[[IV0]], %[[STEP0]]
-// CHECK: %[[I:.+]] = arith.addi %[[I_STEP]], %[[LB0]]
+// CHECK: %[[DELINEARIZE:.+]]:3 = affine.delinearize_index %[[IV]] into (%[[NITERS0]], %[[NITERS1]], %[[NITERS2]])
+// CHECK-DAG: %[[K:.+]] = affine.apply affine_map<(d0)[s0, s1] -> (d0 * s1 + s0)>(%[[DELINEARIZE]]#2)[%[[LB2]], %[[STEP2]]]
+// CHECK-DAG: %[[J:.+]] = affine.apply affine_map<(d0)[s0, s1] -> (d0 * s1 + s0)>(%[[DELINEARIZE]]#1)[%[[LB1]], %[[STEP1]]]
+// CHECK-DAG: %[[I:.+]] = affine.apply affine_map<(d0)[s0, s1] -> (d0 * s1 + s0)>(%[[DELINEARIZE]]#0)[%[[LB0]], %[[STEP0]]]
// CHECK: %[[USE:.+]] = "use"(%[[ITER_ARG]], %[[I]], %[[J]], %[[K]])
// CHECK: scf.yield %[[USE]]
// CHECK: return %[[RESULT]]
@@ -201,8 +191,7 @@ module attributes {transform.with_named_sequence} {
// CHECK-SAME: %[[UB2:[a-zA-Z0-9_]+]]: index
// CHECK-SAME: %[[STEP2:[a-zA-Z0-9_]+]]: index
// CHECK: scf.for
-// CHECK: arith.remsi
-// CHECK: arith.divsi
+// CHECK: affine.delinearize_index
// CHECK: scf.for %{{[a-zA-Z0-9]+}} = %[[LB2]] to %[[UB2]] step %[[STEP2]]
// CHECK-NOT: scf.for
// CHECK: transform.named_sequence
@@ -245,8 +234,7 @@ module attributes {transform.with_named_sequence} {
// CHECK-SAME: %[[UB2:[a-zA-Z0-9_]+]]: index
// CHECK-SAME: %[[STEP2:[a-zA-Z0-9_]+]]: index
// CHECK: scf.for
-// CHECK: arith.remsi
-// CHECK: arith.divsi
+// CHECK: affine.delinearize_index
// CHECK: scf.for %{{[a-zA-Z0-9]+}} = %[[LB2]] to %[[UB2]] step %[[STEP2]]
// CHECK-NOT: scf.for
// CHECK: transform.named_sequence
@@ -289,13 +277,9 @@ module attributes {transform.with_named_sequence} {
// CHECK-SAME: %[[UB2:[a-zA-Z0-9_]+]]: index
// CHECK-SAME: %[[STEP2:[a-zA-Z0-9_]+]]: index
// CHECK: scf.for %{{[a-zA-Z0-9]+}} = %[[LB0]] to %[[UB0]] step %[[STEP0]]
-// CHECK: arith.subi
-// CHECK: arith.ceildivsi
-// CHECK: arith.subi
-// CHECK: arith.ceildivsi
+// CHECK-NOT: affine.delinearize_index
// CHECK: scf.for
-// CHECK: arith.remsi
-// CHECK: arith.divsi
+// CHECK: affine.delinearize_index
// CHECK-NOT: scf.for
// CHECK: transform.named_sequence
@@ -337,10 +321,9 @@ module attributes {transform.with_named_sequence} {
// CHECK-SAME: %[[ARG2:.+]]: index)
// CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index
// CHECK-DAG: %[[C1:.+]] = arith.constant 1 : index
-// CHECK: %[[UB:.+]] = arith.muli %[[ARG1]], %[[ARG2]]
+// CHECK: %[[UB:.+]] = affine.apply affine_map<()[s0, s1] -> (s0 * s1)>()[%[[ARG1]], %[[ARG2]]]
// CHECK: scf.for %[[IV:.+]] = %[[C0]] to %[[UB]] step %[[C1]]
-// CHECK: %[[IV1:.+]] = arith.remsi %[[IV]], %[[ARG2]]
-// CHECK: %[[IV2:.+]] = arith.divsi %[[IV]], %[[ARG2]]
+// CHECK: %[[DELINEARIZE:.+]]:2 = affine.delinearize_index %[[IV]](%[[ARG1]], %[[ARG2]])
// CHECK: "some_use"(%{{[a-zA-Z0-9]+}}, %[[C0]], %[[C0]], %[[IV2]], %[[C0]], %[[IV1]])
// -----
diff --git a/mlir/test/Transforms/parallel-loop-collapsing.mlir b/mlir/test/Transforms/parallel-loop-collapsing.mlir
index d1c23d584f92b7..dc4e042a3c4f56 100644
--- a/mlir/test/Transforms/parallel-loop-collapsing.mlir
+++ b/mlir/test/Transforms/parallel-loop-collapsing.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline='builtin.module(func.func(test-scf-parallel-loop-collapsing{collapsed-indices-0=0,3 collapsed-indices-1=1,4 collapsed-indices-2=2}, canonicalize))' | FileCheck %s
+// RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline='builtin.module(func.func(test-scf-parallel-loop-collapsing{collapsed-indices-0=0,3 collapsed-indices-1=1,4 collapsed-indices-2=2}, canonicalize))' --mlir-print-local-scope | FileCheck %s
// CHECK: func @parallel_many_dims() {
func.func @parallel_many_dims() {
@@ -33,14 +33,11 @@ func.func @parallel_many_dims() {
// CHECK-DAG: %[[C12:.*]] = arith.constant 12 : index
// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index
// CHECK-DAG: %[[C1:.*]] = arith.constant 1 : index
-// CHECK-DAG: %[[C9:.*]] = arith.constant 9 : index
-// CHECK-DAG: %[[C10:.*]] = arith.constant 10 : index
// CHECK-DAG: %[[C2:.*]] = arith.constant 2 : index
// CHECK-DAG: %[[C4:.*]] = arith.constant 4 : index
// CHECK: scf.parallel (%[[NEW_I0:.*]]) = (%[[C0]]) to (%[[C4]]) step (%[[C1]]) {
// CHECK: %[[V0:.*]] = arith.remsi %[[NEW_I0]], %[[C2]] : index
// CHECK: %[[I0:.*]] = arith.divsi %[[NEW_I0]], %[[C2]] : index
-// CHECK: %[[V2:.*]] = arith.muli %[[V0]], %[[C10]]
-// CHECK: %[[I3:.*]] = arith.addi %[[V2]], %[[C9]]
+// CHECK: %[[I3:.*]] = affine.apply affine_map<(d0) -> (d0 * 10 + 9)>(%[[V0]])
// CHECK: "magic.op"(%[[I0]], %[[C3]], %[[C6]], %[[I3]], %[[C12]]) : (index, index, index, index, index) -> index
// CHECK: scf.reduce
diff --git a/mlir/test/Transforms/single-parallel-loop-collapsing.mlir b/mlir/test/Transforms/single-parallel-loop-collapsing.mlir
index 4eed61a65aa475..1ef787bec1bb37 100644
--- a/mlir/test/Transforms/single-parallel-loop-collapsing.mlir
+++ b/mlir/test/Transforms/single-parallel-loop-collapsing.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline='builtin.module(func.func(test-scf-parallel-loop-collapsing{collapsed-indices-0=0,1}, canonicalize))' | FileCheck %s
+// RUN: mlir-opt -allow-unregistered-dialect -pass-pipeline='builtin.module(func.func(test-scf-parallel-loop-collapsing{collapsed-indices-0=0,1}, canonicalize))' --mlir-print-local-scope %s | FileCheck %s
func.func @collapse_to_single() {
%c0 = arith.constant 3 : index
@@ -14,20 +14,15 @@ func.func @collapse_to_single() {
}
// CHECK: func @collapse_to_single() {
-// CHECK-DAG: %[[C1:.*]] = arith.constant 1 : index
-// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index
-// CHECK-DAG: %[[C3:.*]] = arith.constant 3 : index
-// CHECK-DAG: %[[C7:.*]] = arith.constant 7 : index
-// CHECK-DAG: %[[C4:.*]] = arith.constant 4 : index
// CHECK-DAG: %[[C6:.*]] = arith.constant 6 : index
+// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index
+// CHECK-DAG: %[[C1:.*]] = arith.constant 1 : index
// CHECK-DAG: %[[C18:.*]] = arith.constant 18 : index
// CHECK: scf.parallel (%[[NEW_I:.*]]) = (%[[C0]]) to (%[[C18]]) step (%[[C1]]) {
// CHECK: %[[I0_COUNT:.*]] = arith.remsi %[[NEW_I]], %[[C6]] : index
// CHECK: %[[I1_COUNT:.*]] = arith.divsi %[[NEW_I]], %[[C6]] : index
-// CHECK: %[[V0:.*]] = arith.muli %[[I0_COUNT]], %[[C4]]
-// CHECK: %[[I1:.*]] = arith.addi %[[V0]], %[[C7]]
-// CHECK: %[[V1:.*]] = arith.muli %[[I1_COUNT]], %[[C3]]
-// CHECK: %[[I0:.*]] = arith.addi %[[V1]], %[[C3]]
+// CHECK: %[[I1:.*]] = affine.apply affine_map<(d0) -> (d0 * 4 + 7)>(%[[I0_COUNT]])
+// CHECK: %[[I0:.*]] = affine.apply affine_map<(d0) -> (d0 * 3 + 3)>(%[[I1_COUNT]])
// CHECK: "magic.op"(%[[I0]], %[[I1]]) : (index, index) -> index
// CHECK: scf.reduce
// CHECK-NEXT: }
More information about the Mlir-commits
mailing list