[Mlir-commits] [mlir] 9b47525 - Reorder linalg.conv indexing_maps loop order
Ahmed S. Taei
llvmlistbot at llvm.org
Mon Sep 21 21:54:29 PDT 2020
Author: Ahmed S. Taei
Date: 2020-09-22T04:53:57Z
New Revision: 9b47525824df9be5ae23c39e7ce0d220d12f85e5
URL: https://github.com/llvm/llvm-project/commit/9b47525824df9be5ae23c39e7ce0d220d12f85e5
DIFF: https://github.com/llvm/llvm-project/commit/9b47525824df9be5ae23c39e7ce0d220d12f85e5.diff
LOG: Reorder linalg.conv indexing_maps loop order
Change the indexing map to iterate over the (b, x0, x1, z0, z1, q, k) instead of (b, x0, x1, k, q, z0, z1) to evaluate the convolution expression:
Y[b, x0, x1, k] = sum(W[z0, z1, q, k] * X[b, x0 + z0, x1 + z1, q], z0, z1, q)
This allows llvm auto vectorize to work and has better locality resulting significant performance improvments
Differential Revision: https://reviews.llvm.org/D87781
Added:
Modified:
mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOps.td
mlir/test/Dialect/Linalg/affine.mlir
mlir/test/Dialect/Linalg/loops.mlir
mlir/test/Dialect/Linalg/tile_conv.mlir
Removed:
################################################################################
diff --git a/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOps.td b/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOps.td
index a6c0d16a9ee2..21208407b327 100644
--- a/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOps.td
+++ b/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOps.td
@@ -309,10 +309,13 @@ def ConvOp : PoolingBase_Op<"conv", [NInputs<2>, NOutputs<1>]> {
// This may evolve in the future.
unsigned nWin =
nPar - getNumBatchDimensions() - getNumInputFeatureDimensions();
- SmallVector<StringRef, 8> iters(nPar, getParallelIteratorTypeName());
+ SmallVector<StringRef, 8> iters;
iters.reserve(nPar + nRed + nWin);
- iters.append(nRed, getReductionIteratorTypeName());
+ iters.append(nPar - getNumOutputFeatureDimensions(),
+ getParallelIteratorTypeName());
iters.append(nWin, getWindowIteratorTypeName());
+ iters.append(nRed, getReductionIteratorTypeName());
+ iters.append(getNumOutputFeatureDimensions(), getParallelIteratorTypeName());
return Builder(getContext()).getStrArrayAttr(iters);
}
@@ -337,13 +340,13 @@ def ConvOp : PoolingBase_Op<"conv", [NInputs<2>, NOutputs<1>]> {
// * output filter dimensions (ks with #ks = 1 for now)
auto bs = makeAffineDimExprs(getNumBatchDimensions(), idx, context);
auto xs = makeAffineDimExprs(nWin, idx, context);
- auto ks = makeAffineDimExprs(
- getNumOutputFeatureDimensions(), idx, context);
+ // Window reduction dims: sum_{z[0], ..., z[N-1], q}
+ auto zs = makeAffineDimExprs(nWin, idx, context);
// Non-window reduction dim: sum_{z[0], ..., z[N-1], q}
auto qs = makeAffineDimExprs(
getNumInputFeatureDimensions(), idx, context);
- // Window reduction dims: sum_{z[0], ..., z[N-1], q}
- auto zs = makeAffineDimExprs(nWin, idx, context);
+ auto ks = makeAffineDimExprs(
+ getNumOutputFeatureDimensions(), idx, context);
// Construct the weighedSum expression.
auto ws = weightedPoolingInputIndex(*this, xs, zs);
return Builder(getContext()).getAffineMapArrayAttr({
diff --git a/mlir/test/Dialect/Linalg/affine.mlir b/mlir/test/Dialect/Linalg/affine.mlir
index 0df7db06e4c4..7203a4befb8a 100644
--- a/mlir/test/Dialect/Linalg/affine.mlir
+++ b/mlir/test/Dialect/Linalg/affine.mlir
@@ -51,9 +51,9 @@ func @conv_view3(%arg0: memref<?x?x?xf32, offset: ?, strides: [?, ?, 1]>, %arg1:
// CHECK: %[[X0:.*]] = dim %arg2, %c1 : memref<?x?x?xf32, #[[$strided3D]]>
// CHECK: affine.for %{{.*}} = 0 to %[[B]] {
// CHECK: affine.for %{{.*}} = 0 to %[[X0]] {
-// CHECK: affine.for %{{.*}} = 0 to %[[K]] {
+// CHECK: affine.for %{{.*}} = 0 to %[[Z0]] {
// CHECK: affine.for %{{.*}} = 0 to %[[Q]] {
-// CHECK: affine.for %{{.*}} = 0 to %[[Z0]] {
+// CHECK: affine.for %{{.*}} = 0 to %[[K]] {
// CHECK: %[[SUM:.*]] = affine.apply #[[$stride2Dilation1]](%{{.*}}, %{{.*}})
// No padding needed here; only affine loads.
// CHECK-NEXT: affine.load
@@ -81,10 +81,10 @@ func @conv_padding(%arg0: memref<?x?x?x?xf32>,
// CHECK: affine.for %{{.*}} = 0 to %[[B]] {
// CHECK: affine.for %{{.*}} = 0 to %[[X0]] {
// CHECK: affine.for %{{.*}} = 0 to %[[X1]] {
-// CHECK: affine.for %{{.*}} = 0 to %[[K]] {
-// CHECK: affine.for %{{.*}} = 0 to %[[Q]] {
-// CHECK: affine.for %{{.*}} = 0 to %[[Z0]] {
-// CHECK: affine.for %{{.*}} = 0 to %[[Z1]] {
+// CHECK: affine.for %{{.*}} = 0 to %[[Z0]] {
+// CHECK: affine.for %{{.*}} = 0 to %[[Z1]] {
+// CHECK: affine.for %{{.*}} = 0 to %[[Q]] {
+// CHECK: affine.for %{{.*}} = 0 to %[[K]] {
// CHECK: %[[SUM0:.*]] = affine.apply #{{.*}}(%{{.*}}, %{{.*}})
// CHECK: %[[SUM1:.*]] = affine.apply #{{.*}}(%{{.*}}, %{{.*}})
// CHECK: %[[IDX:.*]] = affine.max #[[$clampMinMap]](%[[SUM0]])
diff --git a/mlir/test/Dialect/Linalg/loops.mlir b/mlir/test/Dialect/Linalg/loops.mlir
index 04ca27b8e175..b8df79e0b743 100644
--- a/mlir/test/Dialect/Linalg/loops.mlir
+++ b/mlir/test/Dialect/Linalg/loops.mlir
@@ -285,9 +285,9 @@ func @conv_view3(%arg0: memref<?x?x?xf32, offset: ?, strides: [?, ?, 1]>, %arg1:
// CHECKLOOP: %[[X0:.*]] = dim %arg2, %c1 : memref<?x?x?xf32, #[[$strided3D]]>
// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[B]] step %{{.*}} {
// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[X0]] step %{{.*}} {
-// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[K]] step %{{.*}} {
+// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[Z0]] step %{{.*}} {
// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[Q]] step %{{.*}} {
-// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[Z0]] step %{{.*}} {
+// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[K]] step %{{.*}} {
// CHECKLOOP: %[[SUM:.*]] = affine.apply #[[$stride2Dilation1]](%{{.*}}, %{{.*}})
// CHECKLOOP: %{{.*}} = load %{{.*}}[%{{.*}}, %[[SUM]], %{{.*}}] : memref<?x?x?xf32, #[[$strided3D]]>
// CHECKLOOP: %{{.*}} = load %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}] : memref<?x?x?xf32, #[[$strided3D]]>
@@ -303,16 +303,17 @@ func @conv_view3(%arg0: memref<?x?x?xf32, offset: ?, strides: [?, ?, 1]>, %arg1:
// CHECKPARALLEL: %[[K:.*]] = dim %arg0, %c2 : memref<?x?x?xf32, #[[$strided3D]]>
// CHECKPARALLEL: %[[B:.*]] = dim %arg1, %c0 : memref<?x?x?xf32, #[[$strided3D]]>
// CHECKPARALLEL: %[[X0:.*]] = dim %arg2, %c1 : memref<?x?x?xf32, #[[$strided3D]]>
-// CHECKPARALLEL: scf.parallel (%{{.*}}, %{{.*}}, %{{.*}}) = (%{{.*}}, %{{.*}}, %{{.*}}) to (%[[B]], %[[X0]], %[[K]]) step (%{{.*}}, %{{.*}}, %{{.*}}) {
-// CHECKPARALLEL: scf.for %{{.*}} = %{{.*}} to %[[Q]] step %{{.*}} {
-// CHECKPARALLEL: scf.for %{{.*}} = %{{.*}} to %[[Z0]] step %{{.*}} {
-// CHECKPARALLEL: %[[SUM:.*]] = affine.apply #[[$stride2Dilation1]](%{{.*}}, %{{.*}})
-// CHECKPARALLEL: %{{.*}} = load %{{.*}}[%{{.*}}, %[[SUM]], %{{.*}}] : memref<?x?x?xf32, #[[$strided3D]]>
-// CHECKPARALLEL: %{{.*}} = load %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}] : memref<?x?x?xf32, #[[$strided3D]]>
-// CHECKPARALLEL: %{{.*}} = mulf %{{.*}}, %{{.*}} : f32
-// CHECKPARALLEL: %{{.*}} = load %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}] : memref<?x?x?xf32, #[[$strided3D]]>
-// CHECKPARALLEL: %{{.*}} = addf %{{.*}}, %{{.*}} : f32
-// CHECKPARALLEL: store %{{.*}}, %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}] : memref<?x?x?xf32, #[[$strided3D]]>
+// CHECKPARALLEL: scf.parallel (%{{.*}}, %{{.*}}) = (%{{.*}}, %{{.*}}) to (%[[B]], %[[X0]]) step (%{{.*}}, %{{.*}}) {
+// CHECKPARALLEL: scf.for %{{.*}} = %{{.*}} to %[[Z0]] step %{{.*}} {
+// CHECKPARALLEL: scf.for %{{.*}} = %{{.*}} to %[[Q]] step %{{.*}} {
+// CHECKPARALLEL: scf.parallel ({{.*}}) = (%{{.*}}) to (%[[K]]) step (%{{.*}}) {
+// CHECKPARALLEL: %[[SUM:.*]] = affine.apply #[[$stride2Dilation1]](%{{.*}}, %{{.*}})
+// CHECKPARALLEL: %{{.*}} = load %{{.*}}[%{{.*}}, %[[SUM]], %{{.*}}] : memref<?x?x?xf32, #[[$strided3D]]>
+// CHECKPARALLEL: %{{.*}} = load %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}] : memref<?x?x?xf32, #[[$strided3D]]>
+// CHECKPARALLEL: %{{.*}} = mulf %{{.*}}, %{{.*}} : f32
+// CHECKPARALLEL: %{{.*}} = load %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}] : memref<?x?x?xf32, #[[$strided3D]]>
+// CHECKPARALLEL: %{{.*}} = addf %{{.*}}, %{{.*}} : f32
+// CHECKPARALLEL: store %{{.*}}, %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}] : memref<?x?x?xf32, #[[$strided3D]]>
func @conv_view4(%arg0: memref<?x?x?x?xf32, offset: ?, strides: [?, ?, ?, 1]>, %arg1: memref<?x?x?x?xf32, offset: ?, strides: [?, ?, ?, 1]>, %arg2: memref<?x?x?x?xf32, offset: ?, strides: [?, ?, ?, 1]>) {
linalg.conv(%arg0, %arg1, %arg2) {dilations = [4, 5], strides = [2, 3]} : memref<?x?x?x?xf32, offset: ?, strides: [?, ?, ?, 1]>, memref<?x?x?x?xf32, offset: ?, strides: [?, ?, ?, 1]>, memref<?x?x?x?xf32, offset: ?, strides: [?, ?, ?, 1]>
@@ -330,10 +331,10 @@ func @conv_view4(%arg0: memref<?x?x?x?xf32, offset: ?, strides: [?, ?, ?, 1]>, %
// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[B]] step %{{.*}} {
// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[X0]] step %{{.*}} {
// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[X1]] step %{{.*}} {
-// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[K]] step %{{.*}} {
-// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[Q]] step %{{.*}} {
-// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[Z0]] step %{{.*}} {
-// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[Z1]] step %{{.*}} {
+// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[Z0]] step %{{.*}} {
+// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[Z1]] step %{{.*}} {
+// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[Q]] step %{{.*}} {
+// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[K]] step %{{.*}} {
// CHECKLOOP: %[[SUM0:.*]] = affine.apply #[[$stride2Dilation4]](%{{.*}}, %{{.*}})
// CHECKLOOP: %[[SUM1:.*]] = affine.apply #[[$stride3Dilation5]](%{{.*}}, %{{.*}})
// CHECKLOOP: %{{.*}} = load %{{.*}}[%{{.*}}, %[[SUM0]], %[[SUM1]], %{{.*}}] : memref<?x?x?x?xf32, #[[$strided4D]]>
@@ -352,18 +353,19 @@ func @conv_view4(%arg0: memref<?x?x?x?xf32, offset: ?, strides: [?, ?, ?, 1]>, %
// CHECKPARALLEL: %[[B:.*]] = dim %arg1, %c0 : memref<?x?x?x?xf32, #[[$strided4D]]>
// CHECKPARALLEL: %[[X0:.*]] = dim %arg2, %c1 : memref<?x?x?x?xf32, #[[$strided4D]]>
// CHECKPARALLEL: %[[X1:.*]] = dim %arg2, %c2 : memref<?x?x?x?xf32, #[[$strided4D]]>
-// CHECKPARALLEL: scf.parallel (%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) = (%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) to (%[[B]], %[[X0]], %[[X1]], %[[K]]) step (%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) {
-// CHECKPARALLEL: scf.for %{{.*}} = %{{.*}} to %[[Q]] step %{{.*}} {
-// CHECKPARALLEL: scf.for %{{.*}} = %{{.*}} to %[[Z0]] step %{{.*}} {
-// CHECKPARALLEL: scf.for %{{.*}} = %{{.*}} to %[[Z1]] step %{{.*}} {
-// CHECKPARALLEL: %[[SUM0:.*]] = affine.apply #[[$stride2Dilation4]](%{{.*}}, %{{.*}})
-// CHECKPARALLEL: %[[SUM1:.*]] = affine.apply #[[$stride3Dilation5]](%{{.*}}, %{{.*}})
-// CHECKPARALLEL: %{{.*}} = load %{{.*}}[%{{.*}}, %[[SUM0]], %[[SUM1]], %{{.*}}] : memref<?x?x?x?xf32, #[[$strided4D]]>
-// CHECKPARALLEL: %{{.*}} = load %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}] : memref<?x?x?x?xf32, #[[$strided4D]]>
-// CHECKPARALLEL: %{{.*}} = mulf %{{.*}}, %{{.*}} : f32
-// CHECKPARALLEL: %{{.*}} = load %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}] : memref<?x?x?x?xf32, #[[$strided4D]]>
-// CHECKPARALLEL: %{{.*}} = addf %{{.*}}, %{{.*}} : f32
-// CHECKPARALLEL: store %{{.*}}, %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}] : memref<?x?x?x?xf32, #[[$strided4D]]>
+// CHECKPARALLEL: scf.parallel (%{{.*}}, %{{.*}}, %{{.*}}) = (%{{.*}}, %{{.*}}, %{{.*}}) to (%[[B]], %[[X0]], %[[X1]]) step (%{{.*}}, %{{.*}}, %{{.*}}) {
+// CHECKPARALLEL: scf.for %{{.*}} = %{{.*}} to %[[Z0]] step %{{.*}} {
+// CHECKPARALLEL: scf.for %{{.*}} = %{{.*}} to %[[Z1]] step %{{.*}} {
+// CHECKPARALLEL: scf.for %{{.*}} = %{{.*}} to %[[Q]] step %{{.*}} {
+// CHECKPARALLEL: scf.parallel (%{{.*}}) = (%{{.*}}) to (%[[K]]) step (%{{.*}}) {
+// CHECKPARALLEL: %[[SUM0:.*]] = affine.apply #[[$stride2Dilation4]](%{{.*}}, %{{.*}})
+// CHECKPARALLEL: %[[SUM1:.*]] = affine.apply #[[$stride3Dilation5]](%{{.*}}, %{{.*}})
+// CHECKPARALLEL: %{{.*}} = load %{{.*}}[%{{.*}}, %[[SUM0]], %[[SUM1]], %{{.*}}] : memref<?x?x?x?xf32, #[[$strided4D]]>
+// CHECKPARALLEL: %{{.*}} = load %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}] : memref<?x?x?x?xf32, #[[$strided4D]]>
+// CHECKPARALLEL: %{{.*}} = mulf %{{.*}}, %{{.*}} : f32
+// CHECKPARALLEL: %{{.*}} = load %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}] : memref<?x?x?x?xf32, #[[$strided4D]]>
+// CHECKPARALLEL: %{{.*}} = addf %{{.*}}, %{{.*}} : f32
+// CHECKPARALLEL: store %{{.*}}, %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}] : memref<?x?x?x?xf32, #[[$strided4D]]>
func @conv_padding(%arg0: memref<?x?x?x?xf32>,
%arg1: memref<?x?x?x?xf32>,
@@ -387,10 +389,10 @@ func @conv_padding(%arg0: memref<?x?x?x?xf32>,
// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[B]] step %{{.*}} {
// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[X0]] step %{{.*}} {
// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[X1]] step %{{.*}} {
-// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[K]] step %{{.*}} {
-// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[Q]] step %{{.*}} {
-// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[Z0]] step %{{.*}} {
-// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[Z1]] step %{{.*}} {
+// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[Z0]] step %{{.*}} {
+// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[Z1]] step %{{.*}} {
+// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[Q]] step %{{.*}} {
+// CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[K]] step %{{.*}} {
// CHECKLOOP: %[[SUM0:.*]] = affine.apply #{{.*}}(%{{.*}}, %{{.*}})
// CHECKLOOP: %[[SUM1:.*]] = affine.apply #{{.*}}(%{{.*}}, %{{.*}})
// CHECKLOOP: %[[IDX:.*]] = affine.max #[[$clampMinMap]](%[[SUM0]])
@@ -413,21 +415,22 @@ func @conv_padding(%arg0: memref<?x?x?x?xf32>,
// CHECKPARALLEL: %[[B:.*]] = dim %arg1, %c0 : memref<?x?x?x?xf32>
// CHECKPARALLEL: %[[X0:.*]] = dim %arg2, %c1 : memref<?x?x?x?xf32>
// CHECKPARALLEL: %[[X1:.*]] = dim %arg2, %c2 : memref<?x?x?x?xf32>
-// CHECKPARALLEL: scf.parallel (%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) = (%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) to (%[[B]], %[[X0]], %[[X1]], %[[K]]) step (%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) {
-// CHECKPARALLEL: scf.for %{{.*}} = %{{.*}} to %[[Q]] step %{{.*}} {
-// CHECKPARALLEL: scf.for %{{.*}} = %{{.*}} to %[[Z0]] step %{{.*}} {
-// CHECKPARALLEL: scf.for %{{.*}} = %{{.*}} to %[[Z1]] step %{{.*}} {
-// CHECKPARALLEL: %[[SUM0:.*]] = affine.apply #{{.*}}(%{{.*}}, %{{.*}})
-// CHECKPARALLEL: %[[SUM1:.*]] = affine.apply #{{.*}}(%{{.*}}, %{{.*}})
-// CHECKPARALLEL: %[[IDX:.*]] = affine.max #[[$clampMinMap]](%[[SUM0]])
-// CHECKPARALLEL: %[[IDY:.*]] = affine.max #[[$clampMinMap]](%[[SUM1]])
-// CHECKPARALLEL: %{{.*}} = load %{{.*}}[%{{.*}}, %[[IDX]], %[[IDY]], %{{.*}}] : memref<?x?x?x?xf32>
-// CHECKPARALLEL: %{{.*}} = select %{{.*}}, %{{.*}}, %{{.*}} : f32
-// CHECKPARALLEL: %{{.*}} = load %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}] : memref<?x?x?x?xf32>
-// CHECKPARALLEL: %{{.*}} = mulf %{{.*}}, %{{.*}} : f32
-// CHECKPARALLEL: %{{.*}} = load %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}] : memref<?x?x?x?xf32>
-// CHECKPARALLEL: %{{.*}} = addf %{{.*}}, %{{.*}} : f32
-// CHECKPARALLEL: store %{{.*}}, %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}] : memref<?x?x?x?xf32>
+// CHECKPARALLEL: scf.parallel (%{{.*}}, %{{.*}}, %{{.*}}) = (%{{.*}}, %{{.*}}, %{{.*}}) to (%[[B]], %[[X0]], %[[X1]]) step (%{{.*}}, %{{.*}}, %{{.*}}) {
+// CHECKPARALLEL: scf.for %{{.*}} = %{{.*}} to %[[Z0]] step %{{.*}} {
+// CHECKPARALLEL: scf.for %{{.*}} = %{{.*}} to %[[Z1]] step %{{.*}} {
+// CHECKPARALLEL: scf.for %{{.*}} = %{{.*}} to %[[Q]] step %{{.*}} {
+// CHECKPARALLEL: scf.parallel (%{{.*}}) = (%{{.*}}) to (%[[K]]) step (%{{.*}}) {
+// CHECKPARALLEL: %[[SUM0:.*]] = affine.apply #{{.*}}(%{{.*}}, %{{.*}})
+// CHECKPARALLEL: %[[SUM1:.*]] = affine.apply #{{.*}}(%{{.*}}, %{{.*}})
+// CHECKPARALLEL: %[[IDX:.*]] = affine.max #[[$clampMinMap]](%[[SUM0]])
+// CHECKPARALLEL: %[[IDY:.*]] = affine.max #[[$clampMinMap]](%[[SUM1]])
+// CHECKPARALLEL: %{{.*}} = load %{{.*}}[%{{.*}}, %[[IDX]], %[[IDY]], %{{.*}}] : memref<?x?x?x?xf32>
+// CHECKPARALLEL: %{{.*}} = select %{{.*}}, %{{.*}}, %{{.*}} : f32
+// CHECKPARALLEL: %{{.*}} = load %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}] : memref<?x?x?x?xf32>
+// CHECKPARALLEL: %{{.*}} = mulf %{{.*}}, %{{.*}} : f32
+// CHECKPARALLEL: %{{.*}} = load %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}] : memref<?x?x?x?xf32>
+// CHECKPARALLEL: %{{.*}} = addf %{{.*}}, %{{.*}} : f32
+// CHECKPARALLEL: store %{{.*}}, %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}] : memref<?x?x?x?xf32>
func @pooling_max(%arg0: memref<?x?xf32>,
%arg1: memref<?x?xi32>,
diff --git a/mlir/test/Dialect/Linalg/tile_conv.mlir b/mlir/test/Dialect/Linalg/tile_conv.mlir
index 3b76f8a3139c..a57aadb903f3 100644
--- a/mlir/test/Dialect/Linalg/tile_conv.mlir
+++ b/mlir/test/Dialect/Linalg/tile_conv.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=2,3,0,0,4" | FileCheck %s -check-prefix=TILE-23004
+// RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=2,3,0,0,0,4" | FileCheck %s -check-prefix=TILE-23004
// TILE-23004-DAG: #[[$D0x30pS0x10:.*]] = affine_map<(d0) -> (d0 * 30)>
// TILE-23004-DAG: #[[$S0x10p90D0x30pS1:.*]] = affine_map<(d0)[s0, s1] -> (s0 * 10 + 51, d0 * -30 + s1)>
More information about the Mlir-commits
mailing list