[Mlir-commits] [mlir] cb2e651 - [mlir][linalg] Fix incorrect bound calculation for tiling conv

Thu Sep 30 10:54:49 PDT 2021

Author: Lei Zhang
Date: 2021-09-30T13:50:57-04:00
New Revision: cb2e6518000c7e1c5c2244592457afe4a97827e7

URL: https://github.com/llvm/llvm-project/commit/cb2e6518000c7e1c5c2244592457afe4a97827e7
DIFF: https://github.com/llvm/llvm-project/commit/cb2e6518000c7e1c5c2244592457afe4a97827e7.diff

LOG: [mlir][linalg] Fix incorrect bound calculation for tiling conv

For convolution, the input window dimension's access affine map
is of the form `(d0 * s0 + d1)`, where `d0`/`d1` is the output/
filter window dimension, and `s0` is the stride.

When tiling, https://reviews.llvm.org/D109267 changed how the
way dimensions are acquired. Instead of directly querying using
`*.dim` ops on the original convolution op, we now get it by
applying the access affine map to the loop upper bounds. This
is fine for dimensions having single-dimension affine maps,
like matmul, but not for convolution input. It will cause
incorrect compuation and out of bound. A concrete example, say
we have 1x225x225x3 (NHWC) input, 3x3x3x32 (HWCF) filter, and
1x112x112x3 (NHWC) output with stride 2, (112 * 2 + 3) would be
227, which is different from the correct input window dimension
size 225.

Instead, we should first calculate the max indices for each loop,
and apply the affine map to them, and then plus one to get the
dimension size. Note this makes no difference for matmul-like
ops given they will have `d0 - 1 + 1` effectively.

Reviewed By: nicolasvasilache

Differential Revision: https://reviews.llvm.org/D110849

Added: 
    

Modified: 
    mlir/lib/Dialect/Linalg/Utils/Utils.cpp
    mlir/test/Dialect/Linalg/tile-and-fuse-on-tensors.mlir
    mlir/test/Dialect/Linalg/tile-and-fuse-tensors.mlir
    mlir/test/Dialect/Linalg/tile-conv.mlir
    mlir/test/Dialect/Linalg/tile-simple-conv.mlir

Removed: 
    


################################################################################
diff  --git a/mlir/lib/Dialect/Linalg/Utils/Utils.cpp b/mlir/lib/Dialect/Linalg/Utils/Utils.cpp
index b7a2becefa77f..7caef6bd399fe 100644

--- a/mlir/lib/Dialect/Linalg/Utils/Utils.cpp
+++ b/mlir/lib/Dialect/Linalg/Utils/Utils.cpp
@@ -637,14 +637,33 @@ Value makeTiledShape(OpBuilder &builder, Location loc, Value valueToTile,
       LLVM_DEBUG(llvm::dbgs() << "makeTiledShape: shapeSize=" << shapeSize
                               << ", size: " << size
                               << ": make sure in bound with affine.min\n");
+
       AffineExpr dim0, dim1, dim2;
       bindDims(builder.getContext(), dim0, dim1, dim2);
-      // Compute min(size, dim - offset) to avoid out-of-bounds accesses.
-      AffineMap minMap =
-          AffineMap::inferFromExprList(
-              ArrayRef<ArrayRef<AffineExpr>>{{dim0, dim1 - dim2}})
+
+      // Get the dimension size for this dimension. We need to first calculate
+      // the max index and then plus one. This is important because for
+      // convolution ops, we have its input window dimension's affine map of the
+      // form `(d0 * s0 + d1)`, where `d0`/`d1 is an output/filter window
+      // dimension and `s0` is stride. Directly use the dimension size of
+      // output/filer window dimensions will cause incorrect calculation.
+      AffineMap minusOneMap =
+          AffineMap::inferFromExprList({ArrayRef<AffineExpr>{dim0 - 1}})
               .front();
-      Value d = applyMapToValues(builder, loc, m, ubs).front();
+      AffineMap plusOneMap =
+          AffineMap::inferFromExprList({ArrayRef<AffineExpr>{dim0 + 1}})
+              .front();
+      auto maxIndices = llvm::to_vector<8>(llvm::map_range(ubs, [&](Value ub) {
+        return makeComposedAffineApply(builder, loc, minusOneMap, {ub})
+            .getResult();
+      }));
+      Value maxIndex = applyMapToValues(builder, loc, m, maxIndices).front();
+      Value d = makeComposedAffineApply(builder, loc, plusOneMap, {maxIndex});
+
+      // Compute min(size, dim - offset) to avoid out-of-bounds accesses.
+      AffineMap minMap = AffineMap::inferFromExprList(
+                             {ArrayRef<AffineExpr>{dim0, dim1 - dim2}})
+                             .front();
       SmallVector<Value, 4> operands{size, d, offset};
       fullyComposeAffineMapAndOperands(&minMap, &operands);
       canonicalizeMapAndOperands(&minMap, &operands);

diff  --git a/mlir/test/Dialect/Linalg/tile-and-fuse-on-tensors.mlir b/mlir/test/Dialect/Linalg/tile-and-fuse-on-tensors.mlir
index 8b430134eb2fd..f53e1708986fd 100644
--- a/mlir/test/Dialect/Linalg/tile-and-fuse-on-tensors.mlir
+++ b/mlir/test/Dialect/Linalg/tile-and-fuse-on-tensors.mlir
@@ -233,7 +233,7 @@ builtin.func @fuse_indexed(%arg0: tensor<24x12xi32>,
 // -----
 
 //  CHECK-DAG:  #[[MAP0:.*]] = affine_map<(d0, d1) -> (d0 + d1)>
-//  CHECK-DAG:  #[[MAP1:.*]] = affine_map<(d0, d1) -> (8, -d0 - d1 + 18)>
+//  CHECK-DAG:  #[[MAP1:.*]] = affine_map<(d0, d1) -> (8, -d0 - d1 + 17)>
 //  CHECK-DAG:  #[[MAP2:.*]] = affine_map<(d0, d1, d2) -> (d0, -d1 - d2 + 18)>
 #map0 = affine_map<(d0, d1) -> (d0, d0 + d1)>
 #map1 = affine_map<(d0, d1) -> (d0, d1)>
@@ -245,13 +245,13 @@ func @fuse_non_rectangular(%arg0: tensor<10x18xf32>,
   %cst = constant 0.000000e+00 : f32
   %0 = linalg.fill(%cst, %arg0) : f32, tensor<10x18xf32> -> tensor<10x18xf32>
 
-  //      CHECK:  scf.for %[[IV0:[0-9a-zA-Z]*]] =
-  //      CHECK:    scf.for %[[IV1:[0-9a-zA-Z]*]] =
+  //      CHECK:  scf.for %[[IV0:[0-9a-zA-Z]*]] = %c0 to %c8 step %c4
+  //      CHECK:    scf.for %[[IV1:[0-9a-zA-Z]*]] = %c0 to %c10 step %c5
 
   // Compute producer on a hyper rectangular bounding box. Along the second dimenson,
-  // the offset is set to the sum of the induction variables and the upper bound
-  // to either eight (sum of the tile sizes) or eighteen (sum of the domain sizes)
-  // minus the induction variables.
+  // the offset is set to the sum of the induction variables, and the upper bound
+  // to either 8 (tile size) or 17 (sum of max indices (9+7) then + 1) minus the
+  // induction variables.
   //      CHECK:      %[[SUM:.*]] = affine.apply #[[MAP0]](%[[IV1]], %[[IV0]]
   //      CHECK:      %[[TS1:.*]] = affine.min #[[MAP1]](%[[IV1]], %[[IV0]]
   //      CHECK:      %[[UB1:.*]] = affine.min #[[MAP2]](%[[TS1]], %[[IV1]], %[[IV0]]

diff  --git a/mlir/test/Dialect/Linalg/tile-and-fuse-tensors.mlir b/mlir/test/Dialect/Linalg/tile-and-fuse-tensors.mlir
index 4aef50e6c96ba..324da1086af0d 100644
--- a/mlir/test/Dialect/Linalg/tile-and-fuse-tensors.mlir
+++ b/mlir/test/Dialect/Linalg/tile-and-fuse-tensors.mlir
@@ -203,7 +203,7 @@ func @conv_tensors_dynamic(%input: tensor<?x?x?x?xf32>, %filter: tensor<?x?x?x?x
 // CHECK: #[[BOUND8_MAP_2:.+]] = affine_map<(d0)[s0, s1] -> (-d0 + s0, 8, -d0 + s1)>
 // CHECK: #[[BOUND16_MAP:.+]] = affine_map<(d0)[s0] -> (16, -d0 + s0)>
 // CHECK: #[[X2_MAP:.+]] = affine_map<(d0) -> (d0 * 2)>
-// CHECK: #[[INPUT_BOUND:.+]] = affine_map<(d0, d1)[s0, s1] -> (d0 * 2 + s0 - 2, d1 * -2 + s0 + s1 * 2)>
+// CHECK: #[[INPUT_BOUND:.+]] = affine_map<(d0, d1)[s0, s1] -> (d0 * 2 + s0 - 2, d1 * -2 + s0 + s1 * 2 - 2)>
 // CHECK: #[[BOUND16_MAP_2:.+]] = affine_map<(d0)[s0, s1] -> (-d0 + s0, 16, -d0 + s1)>
 // CHECK: #[[BOUND4_MAP:.+]] = affine_map<(d0)[s0] -> (4, -d0 + s0)>
 // CHECK: #[[BOUND2_MAP:.+]] = affine_map<(d0)[s0] -> (2, -d0 + s0)>

diff  --git a/mlir/test/Dialect/Linalg/tile-conv.mlir b/mlir/test/Dialect/Linalg/tile-conv.mlir
index eb4124fbb03a9..b4ee26a15ba5c 100644
--- a/mlir/test/Dialect/Linalg/tile-conv.mlir
+++ b/mlir/test/Dialect/Linalg/tile-conv.mlir
@@ -1,7 +1,7 @@
 // RUN: mlir-opt %s -linalg-tile="tile-sizes=2,3,0,0,4" | FileCheck %s -check-prefix=TILE-23004
 
 // TILE-23004-DAG: #[[$D0x30pS0x10:.*]] = affine_map<(d0) -> (d0 * 30)>
-// TILE-23004-DAG: #[[$S0x10p90D0x30pS1:.*]] = affine_map<(d0)[s0, s1] -> (s0 * 10 + 51, d0 * -30 + s0 * 10 + s1 * 30)>
+// TILE-23004-DAG: #[[$S0x10p90D0x30pS1:.*]] = affine_map<(d0)[s0, s1] -> (s0 * 10 + 51, d0 * -30 + s0 * 10 + s1 * 30 - 39)>
 // TILE-23004-DAG: #[[$strided4D:.*]] = affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3] -> (d0 * s1 + s0 + d1 * s2 + d2 * s3 + d3)>
 // TILE-23004-DAG: #[[$bound_map_2:.*]] = affine_map<(d0)[s0] -> (2, -d0 + s0)>
 // TILE-23004-DAG: #[[$bound_map_3:.*]] = affine_map<(d0)[s0] -> (3, -d0 + s0)>

diff  --git a/mlir/test/Dialect/Linalg/tile-simple-conv.mlir b/mlir/test/Dialect/Linalg/tile-simple-conv.mlir
index b25ad22cd309d..9d3e0a5cd745c 100644
--- a/mlir/test/Dialect/Linalg/tile-simple-conv.mlir
+++ b/mlir/test/Dialect/Linalg/tile-simple-conv.mlir
@@ -1,8 +1,8 @@
 // RUN: mlir-opt %s -linalg-tile="tile-sizes=2,3,4" | FileCheck %s
 
 //  CHECK-DAG: #[[MAP0:.*]] = affine_map<(d0)[s0] -> (2, -d0 + s0)>
-//  CHECK-DAG: #[[MAP1:.*]] = affine_map<(d0)[s0, s1] -> (s0 + 2, -d0 + s0 + s1)>
-//  CHECK-DAG: #[[MAP2:.*]] = affine_map<(d0)[s0, s1] -> (s0 + 3, -d0 + s0 + s1)>
+//  CHECK-DAG: #[[MAP1:.*]] = affine_map<(d0)[s0, s1] -> (s0 + 2, -d0 + s0 + s1 - 1)>
+//  CHECK-DAG: #[[MAP2:.*]] = affine_map<(d0)[s0, s1] -> (s0 + 3, -d0 + s0 + s1 - 1)>
 //  CHECK-DAG: #[[MAP4:.*]] = affine_map<(d0)[s0] -> (3, -d0 + s0)>
 //  CHECK-DAG: #[[MAP5:.*]] = affine_map<(d0)[s0] -> (4, -d0 + s0)>